Tutorial 2: mouse olfactory bulb generated by Stereo-seq

This tutorial demonstrates the analysis of spatial transcriptomics data from the mouse olfactory bulb, generated using Stereo-seq technology. The dataset is publicly available through Zenodo.

For quality control purposes, we preprocessed the data by filtering out spots located outside the main tissue region following the STAGATE method[1]. The curated dataset containing only the valid tissue spots can be accessed via shared Google Drive repository.

Loading and Preparing Data

import pandas as pd  
import numpy as np  
import scanpy as sc  
import anndata as ad  
import matplotlib.pyplot as plt  
from sklearn.metrics import adjusted_rand_score  
from sklearn.metrics.cluster import normalized_mutual_info_score as nmi  
from SEPAR_model import SEPAR  

import os
filepath = 'dataset/Stereo/Dataset1_LiuLongQi_MouseOlfactoryBulb'
counts_file = os.path.join(filepath, 'RNAcounts.h5ad')
coor_file = os.path.join(filepath, 'position.tsv')
# Load spatial coordinates  
coor_df = pd.read_csv(coor_file, sep='\t')  
coor_df.index = coor_df.index.map(lambda x: 'Spot_'+str(x+1))  
coor_df = coor_df.loc[:, ['x','y']]  

# Read count data  
adata = sc.read_h5ad(filename=counts_file)  
adata.var_names_make_unique()  

# Add spatial coordinates  
adata.obsm["spatial"] = coor_df.to_numpy()[:,[1,0]]  

# Quality control  
sc.pp.calculate_qc_metrics(adata, inplace=True)  

used_barcode = pd.read_csv('dataset/Stereo/Dataset1_LiuLongQi_MouseOlfactoryBulb/used_barcodes.txt',sep='\t', header=None)
used_barcode = used_barcode[0]
adata = adata[used_barcode,]
# Initialize SEPAR  
separ = SEPAR(adata, n_cluster=8)  

Data Preprocessing

# Preprocess the data  
separ.preprocess(min_cells=50)  

# Compute spatial graph  
separ.compute_graph()  

# Select features using Moran's I  
separ.select_morani(nslt=2000)  

# Compute weights  
separ.compute_weight(n_cluster=10)  

After filtering:  (19109, 14376)

png

Counting moran's i ...

Finish selecting

1. Running SEPAR Algorithm

# Run SEPAR algorithm  
separ.separ_algorithm(  
    r=30,              # Number of spatial patterns  
    alpha=0.8,         # Graph regularization weight  
    beta=0.05,         # Sparsity penalty weight (previously l1)  
    gamma=0.5          # Pattern orthogonality weight (previously lam)  
)

Processing iterations: 100%|██████████| 100/100 [02:11<00:00,  1.32s/it]

2. Identifying Pattern-Specific Genes

# Identify pattern-specific genes  
pattern_genes = separ.identify_pattern_specific_genes(  
    n_patterns=30,    # Number of patterns to consider  
    threshold=0.3     # Threshold for gene-pattern association  
)  

Pattern Visualization and Pattern-specific Genes

First, let’s visualize the identified spatial patterns and analyze their characteristics:

# Visualize all spatial patterns
sim_slt = separ.sim_res(separ.Wpn, separ.Hpn, separ.Xt.T)
sim_argsort = np.argsort(-sim_slt)

num_patterns = 30
plt.figure(dpi=200, figsize=(24, 7))
for i in range(30):
    ii = sim_argsort[i]
    plt.subplot(3, np.int32(num_patterns/3), i + 1)
    plt.scatter(separ.loc[:, 0], separ.loc[:, 1], 
                c=separ.Wpn[:, ii].reshape(-1, 1), 
                s=1.2, cmap='Reds')
    plt.axis('off')
    plt.title(f'Pattern {i + 1}: {int(separ.genes_per_pattern[ii])} genes', 
              fontsize=12)
plt.tight_layout()
plt.show()

png

# Create and display sorted pattern-specific genes table  
print("\nTop Pattern-Specific Genes (Sorted by Pattern Significance):")  
print("-" * 100)  
print(f"{'Pattern':11} | {'Significance':11} | {'#Genes':8} | {'Top Genes'}")  
print("-" * 100)  

for rank, pattern_idx in enumerate(sim_argsort[:30]):  
    genes = pattern_genes[pattern_idx]  
    if len(genes) == 0:  
        gene_str = "None"  
    else:  
        gene_str = ", ".join(genes[:10])  
        if len(genes) > 10:  
            gene_str += "..."  
    
    print(f"Pattern {rank+1:<3} | {sim_slt[pattern_idx]:.4f}      | {len(genes):<8} | {gene_str}")  
print("-" * 100)  

Top Pattern-Specific Genes (Sorted by Pattern Significance):
----------------------------------------------------------------------------------------------------
Pattern     | Significance | #Genes   | Top Genes
----------------------------------------------------------------------------------------------------
Pattern 1   | 0.9338      | 753      | Npas4, Cpne6, Necab3, Egr4, Slc35d3, Hpcal4, Shisa8, Cdkn1a, Fam126a, Dusp14...
Pattern 2   | 0.8867      | 101      | Nrgn, Cartpt, Ctxn3, Prkcg, Dll1, C130073E24Rik, Camk4, Lamp5, Necab2, Gm45341...
Pattern 3   | 0.8674      | 108      | Clca3a1, Agt, Cebpd, Serpina3n, Gm34838, Matn4, Omp, S100a5, Kctd12, Hmgcs2...
Pattern 4   | 0.8625      | 183      | Shisa3, Lbhd2, Kcnk15, Nefm, Nefh, Spp1, Npr1, Tbx21, Lrrtm1, Nrn1...
Pattern 5   | 0.8586      | 204      | Nppa, Trh, AI593442, Cnr1, Calb1, Insm1, Gm11549, Gpx3, Fibcd1, Syndig1l...
Pattern 6   | 0.7912      | 32       | Fmod, Ogn, Nov, Slc6a20a, Slc13a4, Igf2, Mgp, Slc6a13, Col1a2, Aebp1...
Pattern 7   | 0.6884      | 17       | Igfbpl1, Prokr2, Naaa, Carhsp1, Sox11, Mobp, Id4, Prr18, Ednrb, Mbp...
Pattern 8   | 0.6852      | 33       | Barhl2, Cbln4, Cck, Coch, Adcyap1, Olfr115, Tmem40, Nptx1, Mafa, Cdhr1...
Pattern 9   | 0.4574      | 13       | Tmsb4x, Gm3839, Gm32401, Gramd1c, Gm3764, Gm10800, Clec18a, Caps2, Gm33838, Gm16863...
Pattern 10  | 0.4347      | 2        | Vip, Sst
Pattern 11  | 0.3793      | 1        | Palmd
Pattern 12  | 0.3730      | 5        | Hba-a2, Hbb-bt, Hba-a1, Hbb-bs, Acta2
Pattern 13  | 0.2760      | 0        | None
Pattern 14  | 0.2665      | 1        | Nme7
Pattern 15  | 0.2405      | 0        | None
Pattern 16  | 0.2232      | 0        | None
Pattern 17  | 0.2211      | 0        | None
Pattern 18  | 0.1995      | 0        | None
Pattern 19  | 0.1995      | 0        | None
Pattern 20  | 0.1954      | 0        | None
Pattern 21  | 0.1823      | 0        | None
Pattern 22  | 0.1805      | 0        | None
Pattern 23  | 0.1801      | 0        | None
Pattern 24  | 0.1801      | 0        | None
Pattern 25  | 0.1795      | 1        | Cdk8
Pattern 26  | 0.1714      | 0        | None
Pattern 27  | 0.1641      | 0        | None
Pattern 28  | 0.1573      | 0        | None
Pattern 29  | 0.1550      | 1        | Tmsb4x
Pattern 30  | 0.1541      | 0        | None
----------------------------------------------------------------------------------------------------

3. Spatially Variable Genes (SVGs) Analysis

# Get SVG results  
svg_results = separ.recognize_svgs(err_tol=0.7)

Visualize some top and bottom SVGs:

gene_ranking = svg_results['gene_ranking']
# Create a single figure with subplots for top SVGs and bottom non-SVGs  
plt.figure(figsize=(8.5/3*10, 5), dpi=80)  
plt.rcParams['font.size'] = 8  
# Plot top 10 SVGs  
for i in range(10):  
    plt.subplot(2, 10, i+1)  
    gene_idx = gene_ranking[i]  
    gene_name = adata.var_names[gene_idx]  
    gene_exp = separ.adata[:, gene_idx].X.toarray().flatten()  
    
    scatter = plt.scatter(separ.adata.obsm['spatial'][:, 0],   
                        separ.adata.obsm['spatial'][:, 1],  
                        c=gene_exp, s=5, cmap='Reds')  
    plt.title(f'Top SVG {i+1}: {gene_name}')  
    plt.axis('off')  
    plt.colorbar(scatter)  

# Plot bottom 10 non-SVGs  
for i in range(10):  
    plt.subplot(2, 10, i+11)  
    gene_idx = gene_ranking[-(i+1)]  
    gene_name = adata.var_names[gene_idx]  
    gene_exp = separ.adata[:, gene_idx].X.toarray().flatten()  
    
    scatter = plt.scatter(separ.adata.obsm['spatial'][:, 0],   
                        separ.adata.obsm['spatial'][:, 1],  
                        c=gene_exp, s=5, cmap='Reds')  
    plt.title(f'Bottom non-SVG {i+1}: {gene_name}')  
    plt.axis('off')  
    plt.colorbar(scatter)  

plt.tight_layout()  
plt.show()

png

5. Performing Clustering

# Perform clustering
cluster_res = separ.clustering(n_cluster=10, N1=16, N2=4)

Clustering Results Visualization

# Create visualization  
fig, ax = plt.subplots(figsize=(6, 5), dpi=100)  
scatter = ax.scatter(separ.loc[:, 0], separ.loc[:, 1],   
                    c=separ.labelres, s=1, cmap='tab10')  
plt.title("SEPAR Spatial Domains", fontsize=14)  
plt.axis('off')  

# Add legend  
legend = ax.legend(*scatter.legend_elements(),  
                  title="Clusters",  
                  bbox_to_anchor=(1.05, 0.5),  
                  loc='center left')  

plt.subplots_adjust(right=0.75)  
plt.show()  

png

References

[1] Dong, Kangning, and Shihua Zhang. “Deciphering spatial domains from spatially resolved transcriptomics with an adaptive graph attention auto-encoder.” Nature Communications 13.1 (2022): 1-12.