Tutorial 2: mouse olfactory bulb generated by Stereo-seq

This tutorial demonstrates the analysis of spatial transcriptomics data from the mouse olfactory bulb, generated using Stereo-seq technology. The dataset is publicly available through Zenodo.

For quality control purposes, we preprocessed the data by filtering out spots located outside the main tissue region following the STAGATE method[1]. The curated dataset containing only the valid tissue spots can be accessed via shared Google Drive repository.


Loading and Preparing Data

import pandas as pd  
import numpy as np  
import scanpy as sc  
import anndata as ad  
import matplotlib.pyplot as plt  
from sklearn.metrics import adjusted_rand_score  
from sklearn.metrics.cluster import normalized_mutual_info_score as nmi  
from SEPAR_model import SEPAR  
import os
filepath = 'dataset/Stereo/Dataset1_LiuLongQi_MouseOlfactoryBulb'
counts_file = os.path.join(filepath, 'RNAcounts.h5ad')
coor_file = os.path.join(filepath, 'position.tsv')
# Load spatial coordinates  
coor_df = pd.read_csv(coor_file, sep='\t')  
coor_df.index = coor_df.index.map(lambda x: 'Spot_'+str(x+1))  
coor_df = coor_df.loc[:, ['x','y']]  

# Read count data  
adata = sc.read_h5ad(filename=counts_file)  
adata.var_names_make_unique()  

# Add spatial coordinates  
adata.obsm["spatial"] = coor_df.to_numpy()[:,[1,0]]  

# Quality control  
sc.pp.calculate_qc_metrics(adata, inplace=True)  

used_barcode = pd.read_csv('dataset/Stereo/Dataset1_LiuLongQi_MouseOlfactoryBulb/used_barcodes.txt',sep='\t', header=None)
used_barcode = used_barcode[0]
adata = adata[used_barcode,]
# Initialize SEPAR  
separ = SEPAR(adata, n_cluster=8)  

Data Preprocessing

# Preprocess the data  
separ.preprocess(min_cells=50)  

# Compute spatial graph  
separ.compute_graph()  

# Select features using Moran's I  
separ.select_morani(nslt=2000)  

# Compute weights  
separ.compute_weight(n_cluster=10)  
After filtering:  (19109, 14376)

png

Counting moran's i ...

Finish selecting

1. Running SEPAR Algorithm

# Run SEPAR algorithm  
separ.separ_algorithm(  
    r=30,              # Number of spatial patterns  
    alpha=0.8,         # Graph regularization weight  
    beta=0.05,         # Sparsity penalty weight (previously l1)  
    gamma=0.5          # Pattern orthogonality weight (previously lam)  
)
Processing iterations: 100%|██████████| 100/100 [02:11<00:00,  1.32s/it]

2. Identifying Pattern-Specific Genes

# Identify pattern-specific genes  
pattern_genes = separ.identify_pattern_specific_genes(  
    n_patterns=30,    # Number of patterns to consider  
    threshold=0.3     # Threshold for gene-pattern association  
)  

Pattern Visualization and Pattern-specific Genes

First, let’s visualize the identified spatial patterns and analyze their characteristics:

# Visualize all spatial patterns
sim_slt = separ.sim_res(separ.Wpn, separ.Hpn, separ.Xt.T)
sim_argsort = np.argsort(-sim_slt)

num_patterns = 30
plt.figure(dpi=200, figsize=(24, 7))
for i in range(30):
    ii = sim_argsort[i]
    plt.subplot(3, np.int32(num_patterns/3), i + 1)
    plt.scatter(separ.loc[:, 0], separ.loc[:, 1], 
                c=separ.Wpn[:, ii].reshape(-1, 1), 
                s=1.2, cmap='Reds')
    plt.axis('off')
    plt.title(f'Pattern {i + 1}: {int(separ.genes_per_pattern[ii])} genes', 
              fontsize=12)
plt.tight_layout()
plt.show()

png

# Create and display sorted pattern-specific genes table  
print("\nTop Pattern-Specific Genes (Sorted by Pattern Significance):")  
print("-" * 100)  
print(f"{'Pattern':11} | {'Significance':11} | {'#Genes':8} | {'Top Genes'}")  
print("-" * 100)  

for rank, pattern_idx in enumerate(sim_argsort[:30]):  
    genes = pattern_genes[pattern_idx]  
    if len(genes) == 0:  
        gene_str = "None"  
    else:  
        gene_str = ", ".join(genes[:10])  
        if len(genes) > 10:  
            gene_str += "..."  
    
    print(f"Pattern {rank+1:<3} | {sim_slt[pattern_idx]:.4f}      | {len(genes):<8} | {gene_str}")  
print("-" * 100)  
Top Pattern-Specific Genes (Sorted by Pattern Significance):
----------------------------------------------------------------------------------------------------
Pattern     | Significance | #Genes   | Top Genes
----------------------------------------------------------------------------------------------------
Pattern 1   | 0.9338      | 753      | Npas4, Cpne6, Necab3, Egr4, Slc35d3, Hpcal4, Shisa8, Cdkn1a, Fam126a, Dusp14...
Pattern 2   | 0.8867      | 101      | Nrgn, Cartpt, Ctxn3, Prkcg, Dll1, C130073E24Rik, Camk4, Lamp5, Necab2, Gm45341...
Pattern 3   | 0.8674      | 108      | Clca3a1, Agt, Cebpd, Serpina3n, Gm34838, Matn4, Omp, S100a5, Kctd12, Hmgcs2...
Pattern 4   | 0.8625      | 183      | Shisa3, Lbhd2, Kcnk15, Nefm, Nefh, Spp1, Npr1, Tbx21, Lrrtm1, Nrn1...
Pattern 5   | 0.8586      | 204      | Nppa, Trh, AI593442, Cnr1, Calb1, Insm1, Gm11549, Gpx3, Fibcd1, Syndig1l...
Pattern 6   | 0.7912      | 32       | Fmod, Ogn, Nov, Slc6a20a, Slc13a4, Igf2, Mgp, Slc6a13, Col1a2, Aebp1...
Pattern 7   | 0.6884      | 17       | Igfbpl1, Prokr2, Naaa, Carhsp1, Sox11, Mobp, Id4, Prr18, Ednrb, Mbp...
Pattern 8   | 0.6852      | 33       | Barhl2, Cbln4, Cck, Coch, Adcyap1, Olfr115, Tmem40, Nptx1, Mafa, Cdhr1...
Pattern 9   | 0.4574      | 13       | Tmsb4x, Gm3839, Gm32401, Gramd1c, Gm3764, Gm10800, Clec18a, Caps2, Gm33838, Gm16863...
Pattern 10  | 0.4347      | 2        | Vip, Sst
Pattern 11  | 0.3793      | 1        | Palmd
Pattern 12  | 0.3730      | 5        | Hba-a2, Hbb-bt, Hba-a1, Hbb-bs, Acta2
Pattern 13  | 0.2760      | 0        | None
Pattern 14  | 0.2665      | 1        | Nme7
Pattern 15  | 0.2405      | 0        | None
Pattern 16  | 0.2232      | 0        | None
Pattern 17  | 0.2211      | 0        | None
Pattern 18  | 0.1995      | 0        | None
Pattern 19  | 0.1995      | 0        | None
Pattern 20  | 0.1954      | 0        | None
Pattern 21  | 0.1823      | 0        | None
Pattern 22  | 0.1805      | 0        | None
Pattern 23  | 0.1801      | 0        | None
Pattern 24  | 0.1801      | 0        | None
Pattern 25  | 0.1795      | 1        | Cdk8
Pattern 26  | 0.1714      | 0        | None
Pattern 27  | 0.1641      | 0        | None
Pattern 28  | 0.1573      | 0        | None
Pattern 29  | 0.1550      | 1        | Tmsb4x
Pattern 30  | 0.1541      | 0        | None
----------------------------------------------------------------------------------------------------

3. Spatially Variable Genes (SVGs) Analysis

# Get SVG results  
svg_results = separ.recognize_svgs(err_tol=0.7)

Visualize some top and bottom SVGs:

gene_ranking = svg_results['gene_ranking']
# Create a single figure with subplots for top SVGs and bottom non-SVGs  
plt.figure(figsize=(8.5/3*10, 5), dpi=80)  
plt.rcParams['font.size'] = 8  
# Plot top 10 SVGs  
for i in range(10):  
    plt.subplot(2, 10, i+1)  
    gene_idx = gene_ranking[i]  
    gene_name = adata.var_names[gene_idx]  
    gene_exp = separ.adata[:, gene_idx].X.toarray().flatten()  
    
    scatter = plt.scatter(separ.adata.obsm['spatial'][:, 0],   
                        separ.adata.obsm['spatial'][:, 1],  
                        c=gene_exp, s=5, cmap='Reds')  
    plt.title(f'Top SVG {i+1}: {gene_name}')  
    plt.axis('off')  
    plt.colorbar(scatter)  

# Plot bottom 10 non-SVGs  
for i in range(10):  
    plt.subplot(2, 10, i+11)  
    gene_idx = gene_ranking[-(i+1)]  
    gene_name = adata.var_names[gene_idx]  
    gene_exp = separ.adata[:, gene_idx].X.toarray().flatten()  
    
    scatter = plt.scatter(separ.adata.obsm['spatial'][:, 0],   
                        separ.adata.obsm['spatial'][:, 1],  
                        c=gene_exp, s=5, cmap='Reds')  
    plt.title(f'Bottom non-SVG {i+1}: {gene_name}')  
    plt.axis('off')  
    plt.colorbar(scatter)  

plt.tight_layout()  
plt.show()

png

4. Gene Expression Refinement

adata_refined = separ.get_refined_expression()

Evaluating Refinement Performance

First, we use Moran’s I statistic to quantitatively evaluate the improvement in spatial patterns. Moran’s I measures spatial autocorrelation, with higher values indicating stronger spatial patterns. The violin plot below compares the distribution of Moran’s I values between raw and refined expression data across all genes:

# Calculate Moran's I for both raw and refined data  
morani = sc.metrics.morans_i(separ.adata)  
morani_refine = sc.metrics.morans_i(adata_refined)  

# Create DataFrame for violin plot  
data_violin = pd.DataFrame({  
    'Moran\'s I': np.concatenate([morani, morani_refine]),  
    'Condition': ['Raw'] * len(morani) + ['Refined'] * len(morani_refine)  
})  

import seaborn as sns
# Create violin plot  
plt.rcParams['font.size'] = 16  
plt.figure(figsize=(3, 5.5))  
sns.violinplot(x='Condition', y='Moran\'s I', data=data_violin, palette='Set2')  
plt.ylabel('Moran\'s I')  
plt.xlabel('')  
plt.show()  

png

Single Gene Visualization

To demonstrate the refinement effect at individual gene level, we visualize the spatial expression pattern of a single gene. The comparison shows how SEPAR preserves and enhances genuine spatial patterns while reducing technical noise:

def plot_gene_refinement(gene_name, separ, adata_refined):  
    """  
    Visualize raw and refined expression patterns for a specific gene.  
    
    Parameters  
    ----------  
    gene_name : str  
        Name of the gene to visualize  
    separ : SEPAR object  
        SEPAR object containing raw data  
    adata_refined : AnnData  
        Refined expression data from get_refined_expression()  
    """  
    # Get gene index and expression values  
    gene_idx = separ.adata.var_names.get_loc(gene_name)  
    raw_exp = separ.adata[:, gene_idx].X.toarray().flatten()  
    refined_exp = adata_refined.X[:, gene_idx]  

    # Create comparison plot  
    plt.rcParams['font.size'] = 16  
    plt.figure(dpi=100, figsize=(7/1.2, 4/1.2))  
    
    # Set main title  
    plt.suptitle(gene_name, fontsize=18)  
    
    # Plot raw expression  
    plt.subplot(1, 2, 1)  
    plt.scatter(separ.loc[:, 0], separ.loc[:, 1],  
                c=raw_exp, s=0.3, cmap='Reds', rasterized=True)  
    plt.axis('off')  
    plt.title('Raw', fontsize=16)  
    
    # Plot refined expression  
    plt.subplot(1, 2, 2)  
    plt.scatter(separ.loc[:, 0], separ.loc[:, 1],  
                c=refined_exp, s=0.3, cmap='Reds', rasterized=True)  
    plt.axis('off')  
    plt.title('Refined', fontsize=16)  
    
    plt.tight_layout(pad=0)  
    plt.show()  

plot_gene_refinement("Gad1", separ, adata_refined)

png

plot_gene_refinement("Tbx21", separ, adata_refined) 

png

plot_gene_refinement("Omp", separ, adata_refined) 

png

5. Performing Clustering

# Perform clustering
cluster_res = separ.clustering(n_cluster=10, N1=16, N2=4)

Clustering Results Visualization

# Create visualization  
fig, ax = plt.subplots(figsize=(6, 5), dpi=100)  
scatter = ax.scatter(separ.loc[:, 0], separ.loc[:, 1],   
                    c=separ.labelres, s=1, cmap='tab10')  
plt.title("SEPAR Spatial Domains", fontsize=14)  
plt.axis('off')  

# Add legend  
legend = ax.legend(*scatter.legend_elements(),  
                  title="Clusters",  
                  bbox_to_anchor=(1.05, 0.5),  
                  loc='center left')  

plt.subplots_adjust(right=0.75)  
plt.show()  

png

References

[1] Dong, Kangning, and Shihua Zhang. “Deciphering spatial domains from spatially resolved transcriptomics with an adaptive graph attention auto-encoder.” Nature Communications 13.1 (2022): 1-12.