# Tutorial 2: mouse olfactory bulb generated by Stereo-seq This tutorial demonstrates the analysis of spatial transcriptomics data from the mouse olfactory bulb, generated using Stereo-seq technology. The dataset is publicly available through [Zenodo](https://doi.org/10.5281/zenodo.8356092). For quality control purposes, we preprocessed the data by filtering out spots located outside the main tissue region following the STAGATE method[1]. The curated dataset containing only the valid tissue spots can be accessed via [shared Google Drive repository](https://drive.google.com/drive/folders/10lhz5VY7YfvHrtV40MwaqLmWz56U9eBP?usp=sharing). --- ## Loading and Preparing Data ```python import pandas as pd import numpy as np import scanpy as sc import anndata as ad import matplotlib.pyplot as plt from sklearn.metrics import adjusted_rand_score from sklearn.metrics.cluster import normalized_mutual_info_score as nmi from SEPAR_model import SEPAR ``` ```python import os filepath = 'dataset/Stereo/Dataset1_LiuLongQi_MouseOlfactoryBulb' counts_file = os.path.join(filepath, 'RNAcounts.h5ad') coor_file = os.path.join(filepath, 'position.tsv') # Load spatial coordinates coor_df = pd.read_csv(coor_file, sep='\t') coor_df.index = coor_df.index.map(lambda x: 'Spot_'+str(x+1)) coor_df = coor_df.loc[:, ['x','y']] # Read count data adata = sc.read_h5ad(filename=counts_file) adata.var_names_make_unique() # Add spatial coordinates adata.obsm["spatial"] = coor_df.to_numpy()[:,[1,0]] # Quality control sc.pp.calculate_qc_metrics(adata, inplace=True) used_barcode = pd.read_csv('dataset/Stereo/Dataset1_LiuLongQi_MouseOlfactoryBulb/used_barcodes.txt',sep='\t', header=None) used_barcode = used_barcode[0] adata = adata[used_barcode,] # Initialize SEPAR separ = SEPAR(adata, n_cluster=8) ``` ## Data Preprocessing ```python # Preprocess the data separ.preprocess(min_cells=50) # Compute spatial graph separ.compute_graph() # Select features using Moran's I separ.select_morani(nslt=2000) # Compute weights separ.compute_weight(n_cluster=10) ``` After filtering: (19109, 14376) ![png](Tutorial2_files/Tutorial2_4_1.png) Counting moran's i ... Finish selecting ## 1. Running SEPAR Algorithm ```python # Run SEPAR algorithm separ.separ_algorithm( r=30, # Number of spatial patterns alpha=0.8, # Graph regularization weight beta=0.05, # Sparsity penalty weight (previously l1) gamma=0.5 # Pattern orthogonality weight (previously lam) ) ``` Processing iterations: 100%|██████████| 100/100 [02:11<00:00, 1.32s/it] ## 2. Identifying Pattern-Specific Genes ```python # Identify pattern-specific genes pattern_genes = separ.identify_pattern_specific_genes( n_patterns=30, # Number of patterns to consider threshold=0.3 # Threshold for gene-pattern association ) ``` ### Pattern Visualization and Pattern-specific Genes First, let's visualize the identified spatial patterns and analyze their characteristics: ```python # Visualize all spatial patterns sim_slt = separ.sim_res(separ.Wpn, separ.Hpn, separ.Xt.T) sim_argsort = np.argsort(-sim_slt) num_patterns = 30 plt.figure(dpi=200, figsize=(24, 7)) for i in range(30): ii = sim_argsort[i] plt.subplot(3, np.int32(num_patterns/3), i + 1) plt.scatter(separ.loc[:, 0], separ.loc[:, 1], c=separ.Wpn[:, ii].reshape(-1, 1), s=1.2, cmap='Reds') plt.axis('off') plt.title(f'Pattern {i + 1}: {int(separ.genes_per_pattern[ii])} genes', fontsize=12) plt.tight_layout() plt.show() ``` ![png](Tutorial2_files/Tutorial2_10_0.png) ```python # Create and display sorted pattern-specific genes table print("\nTop Pattern-Specific Genes (Sorted by Pattern Significance):") print("-" * 100) print(f"{'Pattern':11} | {'Significance':11} | {'#Genes':8} | {'Top Genes'}") print("-" * 100) for rank, pattern_idx in enumerate(sim_argsort[:30]): genes = pattern_genes[pattern_idx] if len(genes) == 0: gene_str = "None" else: gene_str = ", ".join(genes[:10]) if len(genes) > 10: gene_str += "..." print(f"Pattern {rank+1:<3} | {sim_slt[pattern_idx]:.4f} | {len(genes):<8} | {gene_str}") print("-" * 100) ``` Top Pattern-Specific Genes (Sorted by Pattern Significance): ---------------------------------------------------------------------------------------------------- Pattern | Significance | #Genes | Top Genes ---------------------------------------------------------------------------------------------------- Pattern 1 | 0.9338 | 753 | Npas4, Cpne6, Necab3, Egr4, Slc35d3, Hpcal4, Shisa8, Cdkn1a, Fam126a, Dusp14... Pattern 2 | 0.8867 | 101 | Nrgn, Cartpt, Ctxn3, Prkcg, Dll1, C130073E24Rik, Camk4, Lamp5, Necab2, Gm45341... Pattern 3 | 0.8674 | 108 | Clca3a1, Agt, Cebpd, Serpina3n, Gm34838, Matn4, Omp, S100a5, Kctd12, Hmgcs2... Pattern 4 | 0.8625 | 183 | Shisa3, Lbhd2, Kcnk15, Nefm, Nefh, Spp1, Npr1, Tbx21, Lrrtm1, Nrn1... Pattern 5 | 0.8586 | 204 | Nppa, Trh, AI593442, Cnr1, Calb1, Insm1, Gm11549, Gpx3, Fibcd1, Syndig1l... Pattern 6 | 0.7912 | 32 | Fmod, Ogn, Nov, Slc6a20a, Slc13a4, Igf2, Mgp, Slc6a13, Col1a2, Aebp1... Pattern 7 | 0.6884 | 17 | Igfbpl1, Prokr2, Naaa, Carhsp1, Sox11, Mobp, Id4, Prr18, Ednrb, Mbp... Pattern 8 | 0.6852 | 33 | Barhl2, Cbln4, Cck, Coch, Adcyap1, Olfr115, Tmem40, Nptx1, Mafa, Cdhr1... Pattern 9 | 0.4574 | 13 | Tmsb4x, Gm3839, Gm32401, Gramd1c, Gm3764, Gm10800, Clec18a, Caps2, Gm33838, Gm16863... Pattern 10 | 0.4347 | 2 | Vip, Sst Pattern 11 | 0.3793 | 1 | Palmd Pattern 12 | 0.3730 | 5 | Hba-a2, Hbb-bt, Hba-a1, Hbb-bs, Acta2 Pattern 13 | 0.2760 | 0 | None Pattern 14 | 0.2665 | 1 | Nme7 Pattern 15 | 0.2405 | 0 | None Pattern 16 | 0.2232 | 0 | None Pattern 17 | 0.2211 | 0 | None Pattern 18 | 0.1995 | 0 | None Pattern 19 | 0.1995 | 0 | None Pattern 20 | 0.1954 | 0 | None Pattern 21 | 0.1823 | 0 | None Pattern 22 | 0.1805 | 0 | None Pattern 23 | 0.1801 | 0 | None Pattern 24 | 0.1801 | 0 | None Pattern 25 | 0.1795 | 1 | Cdk8 Pattern 26 | 0.1714 | 0 | None Pattern 27 | 0.1641 | 0 | None Pattern 28 | 0.1573 | 0 | None Pattern 29 | 0.1550 | 1 | Tmsb4x Pattern 30 | 0.1541 | 0 | None ---------------------------------------------------------------------------------------------------- ## 3. Spatially Variable Genes (SVGs) Analysis ```python # Get SVG results svg_results = separ.recognize_svgs(err_tol=0.7) ``` Visualize some top and bottom SVGs: ```python gene_ranking = svg_results['gene_ranking'] # Create a single figure with subplots for top SVGs and bottom non-SVGs plt.figure(figsize=(8.5/3*10, 5), dpi=80) plt.rcParams['font.size'] = 8 # Plot top 10 SVGs for i in range(10): plt.subplot(2, 10, i+1) gene_idx = gene_ranking[i] gene_name = adata.var_names[gene_idx] gene_exp = separ.adata[:, gene_idx].X.toarray().flatten() scatter = plt.scatter(separ.adata.obsm['spatial'][:, 0], separ.adata.obsm['spatial'][:, 1], c=gene_exp, s=5, cmap='Reds') plt.title(f'Top SVG {i+1}: {gene_name}') plt.axis('off') plt.colorbar(scatter) # Plot bottom 10 non-SVGs for i in range(10): plt.subplot(2, 10, i+11) gene_idx = gene_ranking[-(i+1)] gene_name = adata.var_names[gene_idx] gene_exp = separ.adata[:, gene_idx].X.toarray().flatten() scatter = plt.scatter(separ.adata.obsm['spatial'][:, 0], separ.adata.obsm['spatial'][:, 1], c=gene_exp, s=5, cmap='Reds') plt.title(f'Bottom non-SVG {i+1}: {gene_name}') plt.axis('off') plt.colorbar(scatter) plt.tight_layout() plt.show() ``` ![png](Tutorial2_files/Tutorial2_15_0.png) ## 4. Gene Expression Refinement ```python adata_refined = separ.get_refined_expression() ``` ### Evaluating Refinement Performance First, we use Moran's I statistic to quantitatively evaluate the improvement in spatial patterns. Moran's I measures spatial autocorrelation, with higher values indicating stronger spatial patterns. The violin plot below compares the distribution of Moran's I values between raw and refined expression data across all genes: ```python # Calculate Moran's I for both raw and refined data morani = sc.metrics.morans_i(separ.adata) morani_refine = sc.metrics.morans_i(adata_refined) # Create DataFrame for violin plot data_violin = pd.DataFrame({ 'Moran\'s I': np.concatenate([morani, morani_refine]), 'Condition': ['Raw'] * len(morani) + ['Refined'] * len(morani_refine) }) import seaborn as sns # Create violin plot plt.rcParams['font.size'] = 16 plt.figure(figsize=(3, 5.5)) sns.violinplot(x='Condition', y='Moran\'s I', data=data_violin, palette='Set2') plt.ylabel('Moran\'s I') plt.xlabel('') plt.show() ``` ![png](Tutorial2_files/Tutorial2_19_0.png) ### Single Gene Visualization To demonstrate the refinement effect at individual gene level, we visualize the spatial expression pattern of a single gene. The comparison shows how SEPAR preserves and enhances genuine spatial patterns while reducing technical noise: ```python def plot_gene_refinement(gene_name, separ, adata_refined): """ Visualize raw and refined expression patterns for a specific gene. Parameters ---------- gene_name : str Name of the gene to visualize separ : SEPAR object SEPAR object containing raw data adata_refined : AnnData Refined expression data from get_refined_expression() """ # Get gene index and expression values gene_idx = separ.adata.var_names.get_loc(gene_name) raw_exp = separ.adata[:, gene_idx].X.toarray().flatten() refined_exp = adata_refined.X[:, gene_idx] # Create comparison plot plt.rcParams['font.size'] = 16 plt.figure(dpi=100, figsize=(7/1.2, 4/1.2)) # Set main title plt.suptitle(gene_name, fontsize=18) # Plot raw expression plt.subplot(1, 2, 1) plt.scatter(separ.loc[:, 0], separ.loc[:, 1], c=raw_exp, s=0.3, cmap='Reds', rasterized=True) plt.axis('off') plt.title('Raw', fontsize=16) # Plot refined expression plt.subplot(1, 2, 2) plt.scatter(separ.loc[:, 0], separ.loc[:, 1], c=refined_exp, s=0.3, cmap='Reds', rasterized=True) plt.axis('off') plt.title('Refined', fontsize=16) plt.tight_layout(pad=0) plt.show() plot_gene_refinement("Gad1", separ, adata_refined) ``` ![png](Tutorial2_files/Tutorial2_21_0.png) ```python plot_gene_refinement("Tbx21", separ, adata_refined) ``` ![png](Tutorial2_files/Tutorial2_22_0.png) ```python plot_gene_refinement("Omp", separ, adata_refined) ``` ![png](Tutorial2_files/Tutorial2_23_0.png) ## 5. Performing Clustering ```python # Perform clustering cluster_res = separ.clustering(n_cluster=10, N1=16, N2=4) ``` ### Clustering Results Visualization ```python # Create visualization fig, ax = plt.subplots(figsize=(6, 5), dpi=100) scatter = ax.scatter(separ.loc[:, 0], separ.loc[:, 1], c=separ.labelres, s=1, cmap='tab10') plt.title("SEPAR Spatial Domains", fontsize=14) plt.axis('off') # Add legend legend = ax.legend(*scatter.legend_elements(), title="Clusters", bbox_to_anchor=(1.05, 0.5), loc='center left') plt.subplots_adjust(right=0.75) plt.show() ``` ![png](Tutorial2_files/Tutorial2_27_0.png) ## References [1] Dong, Kangning, and Shihua Zhang. “Deciphering spatial domains from spatially resolved transcriptomics with an adaptive graph attention auto-encoder.” Nature Communications 13.1 (2022): 1-12.