Spatial dense data construction with SpatialZ
Import packages and set configurations
[1]:
import warnings
warnings.filterwarnings("ignore")
[2]:
# Import SpatialZ package
from SpatialZ import *
[3]:
import scanpy as sc
import numpy as np
import pandas as pd
import anndata as ad
import os
import matplotlib.pyplot as plt
Data preprocessing
[4]:
def merge_h5ad_files(directory):
h5ad_files = [f for f in os.listdir(directory) if f.endswith('.h5ad')]
adatas = []
for file in h5ad_files:
adata = sc.read_h5ad(os.path.join(directory, file))
adatas.append(adata)
merged_adata = ad.concat(adatas, join='outer', merge='same')
return merged_adata
[5]:
directory_path = './data/merfish'
adata_all = merge_h5ad_files(directory_path)
[6]:
adata_all
[6]:
AnnData object with n_obs × n_vars = 28317 × 155
obs: 'cell_class', 'neuron_class', 'domain', 'Region', 'annotation', 'slice_id'
obsm: 'spatial'
[7]:
np.unique(adata_all.obs['slice_id'])
[7]:
array(['0.04', '0.09', '0.14', '0.19', '0.24'], dtype=object)
[8]:
all_section_order = np.unique(adata_all.obs['slice_id'])
all_section_order
[8]:
array(['0.04', '0.09', '0.14', '0.19', '0.24'], dtype=object)
[9]:
z_values = {slice_id: 400 * (i + 1) for i, slice_id in enumerate(all_section_order)}
adata_all.obs['Z'] = adata_all.obs['slice_id'].map(z_values)
[10]:
adata_all.obs['X'] = adata_all.obsm['spatial'][:, 0]
adata_all.obs['Y'] = adata_all.obsm['spatial'][:, 1]
[11]:
# Visualization on original sparsely sampling data
fig = plt.figure(figsize=(6, 6))
ax1 = fig.add_subplot(111, projection='3d')
for it, label in enumerate(all_section_order):
temp_Coor = adata_all.obs[adata_all.obs['slice_id'] == label]
temp_xd = temp_Coor['X']
temp_yd = temp_Coor['Y']
temp_zd = temp_Coor['Z']
ax1.scatter3D(temp_xd, temp_yd, temp_zd, s=1, marker="o", label=label)
ax1.set_xlabel('')
ax1.set_ylabel('')
ax1.set_zlabel('')
ax1.set_xticklabels([])
ax1.set_yticklabels([])
ax1.set_zticklabels([])
plt.legend(bbox_to_anchor=(1, 0.8), markerscale=1, frameon=False)
ax1.elev = 45
ax1.azim = -20
plt.show()
[12]:
adatas_id_list = ['0.04', '0.09', '0.14', '0.19', '0.24']
adatas_id_list
[12]:
['0.04', '0.09', '0.14', '0.19', '0.24']
[13]:
adata_list = [adata_all[adata_all.obs['slice_id'] == section_id].copy() for section_id in adatas_id_list]
adata_list
[13]:
[AnnData object with n_obs × n_vars = 5488 × 155
obs: 'cell_class', 'neuron_class', 'domain', 'Region', 'annotation', 'slice_id', 'Z', 'X', 'Y'
obsm: 'spatial',
AnnData object with n_obs × n_vars = 5557 × 155
obs: 'cell_class', 'neuron_class', 'domain', 'Region', 'annotation', 'slice_id', 'Z', 'X', 'Y'
obsm: 'spatial',
AnnData object with n_obs × n_vars = 5926 × 155
obs: 'cell_class', 'neuron_class', 'domain', 'Region', 'annotation', 'slice_id', 'Z', 'X', 'Y'
obsm: 'spatial',
AnnData object with n_obs × n_vars = 5803 × 155
obs: 'cell_class', 'neuron_class', 'domain', 'Region', 'annotation', 'slice_id', 'Z', 'X', 'Y'
obsm: 'spatial',
AnnData object with n_obs × n_vars = 5543 × 155
obs: 'cell_class', 'neuron_class', 'domain', 'Region', 'annotation', 'slice_id', 'Z', 'X', 'Y'
obsm: 'spatial']
[14]:
adata_all.obs
[14]:
| cell_class | neuron_class | domain | Region | annotation | slice_id | Z | X | Y | |
|---|---|---|---|---|---|---|---|---|---|
| Unnamed: 0 | |||||||||
| 1148x-733 | Microglia | NaN | MPA | MPA | MPA | 0.19 | 1600 | -893.764412 | -759.632252 |
| 1150x-713 | OD Mature 1 | NaN | MPA | MPA | MPA | 0.19 | 1600 | -892.355504 | -739.512027 |
| 1154x-761 | Pericytes | NaN | MPA | MPA | MPA | 0.19 | 1600 | -887.772490 | -787.484829 |
| 1162x-823 | Pericytes | NaN | MPA | MPA | MPA | 0.19 | 1600 | -879.402558 | -849.248706 |
| 1174x-690 | Microglia | NaN | MPA | MPA | MPA | 0.19 | 1600 | -868.628006 | -716.737968 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| -1431x4324 | Inhibitory | I-19 | BST | BST | BST | 0.04 | 400 | 717.016661 | 594.801919 |
| -1347x4422 | Inhibitory | I-11 | BST | BST | BST | 0.04 | 400 | 800.626757 | 692.290678 |
| -1365x4412 | Inhibitory | I-9 | BST | BST | BST | 0.04 | 400 | 782.569763 | 683.022899 |
| -1285x4407 | Inhibitory | I-1 | BST | BST | BST | 0.04 | 400 | 862.682919 | 678.065584 |
| -1246x4494 | Inhibitory | I-9 | BST | BST | BST | 0.04 | 400 | 902.229527 | 764.451035 |
28317 rows × 9 columns
[15]:
adata_all.X
[15]:
array([[0. , 0. , 0. , ..., 0.00976636, 0.00435998,
0. ],
[0. , 0. , 2.3637214 , ..., 0. , 0. ,
0. ],
[0. , 0. , 2.376603 , ..., 0.04077056, 0.01528226,
0. ],
...,
[0. , 0. , 0. , ..., 0. , 0.00727418,
0.00530444],
[0. , 0.6277586 , 3.1387577 , ..., 0. , 0.01738001,
0.02959162],
[0. , 0. , 0. , ..., 0. , 0.0042025 ,
0.00865513]], dtype=float32)
[16]:
number = 3
number_list = [number] * (len(adatas_id_list) - 1)
Running SpatialZ to generate multiple slices from a list of tissue sections
[17]:
import time
start_time = time.time()
adatas = Generate_multiple_slices(adata_list= adata_list,
num_sim_list = number_list,
adatas_id_list= adatas_id_list ,
save_path= './output',
device='cuda:0',
nb_iter_max=1000,
cell_type_key='cell_class',
k_sam=50,
syn_mode= 'default',
Beta = 1,
add_obs_list=['neuron_class', 'domain', 'Region', 'annotation'],
include_raw=True)
end_time = time.time()
Generating simulations for slices: 0%| | 0/4 [00:00<?, ?it/s]
Begin to generate spatial coordinates......
coordinate initialization time: 1.09 seconds
Iteration 0: Loss = 5.544589519500732
Ot optimization time: 7.88 seconds
Begin to determine cell types......
Cell type determination time: 5.62 seconds
Begin to transfer the attribute......
Transfer the attribute time: 0.13 seconds
Begin to calculate micro-environment measurement......
Micro-environment measurement time: 16.67 seconds
Begin to synthesize gene expression......
Gene expression synthesis time: 29.08 seconds
Completed 0.04-0.09-1 generated and saved!
Begin to generate spatial coordinates......
coordinate initialization time: 0.00 seconds
Iteration 0: Loss = 7.194208145141602
Ot optimization time: 7.17 seconds
Begin to determine cell types......
Cell type determination time: 5.84 seconds
Begin to transfer the attribute......
Transfer the attribute time: 0.16 seconds
Begin to calculate micro-environment measurement......
Micro-environment measurement time: 19.30 seconds
Begin to synthesize gene expression......
Gene expression synthesis time: 29.54 seconds
Completed 0.04-0.09-2 generated and saved!
Begin to generate spatial coordinates......
coordinate initialization time: 0.00 seconds
Iteration 0: Loss = 5.730836868286133
Ot optimization time: 7.05 seconds
Begin to determine cell types......
Cell type determination time: 6.11 seconds
Begin to transfer the attribute......
Transfer the attribute time: 0.14 seconds
Begin to calculate micro-environment measurement......
Micro-environment measurement time: 17.63 seconds
Begin to synthesize gene expression......
Generating simulations for slices: 25%|██▌ | 1/4 [03:03<09:10, 183.54s/it]
Gene expression synthesis time: 30.02 seconds
Completed 0.04-0.09-3 generated and saved!
Begin to generate spatial coordinates......
coordinate initialization time: 0.00 seconds
Iteration 0: Loss = 6.906851768493652
Ot optimization time: 7.26 seconds
Begin to determine cell types......
Cell type determination time: 5.79 seconds
Begin to transfer the attribute......
Transfer the attribute time: 0.14 seconds
Begin to calculate micro-environment measurement......
Micro-environment measurement time: 19.13 seconds
Begin to synthesize gene expression......
Gene expression synthesis time: 30.41 seconds
Completed 0.09-0.14-1 generated and saved!
Begin to generate spatial coordinates......
coordinate initialization time: 0.00 seconds
Iteration 0: Loss = 8.83037281036377
Ot optimization time: 7.31 seconds
Begin to determine cell types......
Cell type determination time: 5.84 seconds
Begin to transfer the attribute......
Transfer the attribute time: 0.14 seconds
Begin to calculate micro-environment measurement......
Micro-environment measurement time: 17.87 seconds
Begin to synthesize gene expression......
Gene expression synthesis time: 31.17 seconds
Completed 0.09-0.14-2 generated and saved!
Begin to generate spatial coordinates......
coordinate initialization time: 0.00 seconds
Iteration 0: Loss = 6.560633659362793
Ot optimization time: 7.31 seconds
Begin to determine cell types......
Cell type determination time: 5.88 seconds
Begin to transfer the attribute......
Transfer the attribute time: 0.14 seconds
Begin to calculate micro-environment measurement......
Micro-environment measurement time: 16.26 seconds
Begin to synthesize gene expression......
Generating simulations for slices: 50%|█████ | 2/4 [06:09<06:09, 184.99s/it]
Gene expression synthesis time: 31.24 seconds
Completed 0.09-0.14-3 generated and saved!
Begin to generate spatial coordinates......
coordinate initialization time: 0.00 seconds
Iteration 0: Loss = 4.106593608856201
Ot optimization time: 7.44 seconds
Begin to determine cell types......
Cell type determination time: 6.02 seconds
Begin to transfer the attribute......
Transfer the attribute time: 0.14 seconds
Begin to calculate micro-environment measurement......
Micro-environment measurement time: 18.66 seconds
Begin to synthesize gene expression......
Gene expression synthesis time: 31.74 seconds
Completed 0.14-0.19-1 generated and saved!
Begin to generate spatial coordinates......
coordinate initialization time: 0.00 seconds
Iteration 0: Loss = 5.262857437133789
Ot optimization time: 7.41 seconds
Begin to determine cell types......
Cell type determination time: 5.96 seconds
Begin to transfer the attribute......
Transfer the attribute time: 0.15 seconds
Begin to calculate micro-environment measurement......
Micro-environment measurement time: 19.50 seconds
Begin to synthesize gene expression......
Gene expression synthesis time: 32.16 seconds
Completed 0.14-0.19-2 generated and saved!
Begin to generate spatial coordinates......
coordinate initialization time: 0.00 seconds
Iteration 0: Loss = 4.242251873016357
Ot optimization time: 7.41 seconds
Begin to determine cell types......
Cell type determination time: 6.08 seconds
Begin to transfer the attribute......
Transfer the attribute time: 0.14 seconds
Begin to calculate micro-environment measurement......
Micro-environment measurement time: 18.72 seconds
Begin to synthesize gene expression......
Generating simulations for slices: 75%|███████▌ | 3/4 [09:22<03:08, 188.50s/it]
Gene expression synthesis time: 31.00 seconds
Completed 0.14-0.19-3 generated and saved!
Begin to generate spatial coordinates......
coordinate initialization time: 0.00 seconds
Iteration 0: Loss = 4.385869979858398
Ot optimization time: 7.26 seconds
Begin to determine cell types......
Cell type determination time: 5.99 seconds
Begin to transfer the attribute......
Transfer the attribute time: 0.14 seconds
Begin to calculate micro-environment measurement......
Micro-environment measurement time: 18.17 seconds
Begin to synthesize gene expression......
Gene expression synthesis time: 31.33 seconds
Completed 0.19-0.24-1 generated and saved!
Begin to generate spatial coordinates......
coordinate initialization time: 0.00 seconds
Iteration 0: Loss = 5.350555419921875
Ot optimization time: 7.21 seconds
Begin to determine cell types......
Cell type determination time: 5.77 seconds
Begin to transfer the attribute......
Transfer the attribute time: 0.14 seconds
Begin to calculate micro-environment measurement......
Micro-environment measurement time: 19.99 seconds
Begin to synthesize gene expression......
Gene expression synthesis time: 30.36 seconds
Completed 0.19-0.24-2 generated and saved!
Begin to generate spatial coordinates......
coordinate initialization time: 0.00 seconds
Iteration 0: Loss = 4.210763454437256
Ot optimization time: 7.18 seconds
Begin to determine cell types......
Cell type determination time: 5.78 seconds
Begin to transfer the attribute......
Transfer the attribute time: 0.14 seconds
Begin to calculate micro-environment measurement......
Micro-environment measurement time: 16.36 seconds
Begin to synthesize gene expression......
Generating simulations for slices: 100%|██████████| 4/4 [12:27<00:00, 187.00s/it]
Gene expression synthesis time: 29.85 seconds
Completed 0.19-0.24-3 generated and saved!
[18]:
elapsed_time = end_time - start_time
print(f"Time taken to create all data: {elapsed_time:.6f} seconds")
Time taken to create all data: 748.315443 seconds
[19]:
adatas
[19]:
AnnData object with n_obs × n_vars = 96717 × 155
obs: 'cell_class', 'neuron_class', 'domain', 'Region', 'annotation', 'slice_id', 'Z', 'X', 'Y', 'data_type'
obsm: 'spatial'
[20]:
np.unique(adatas.obs['slice_id'])
[20]:
array(['0.04', '0.04-0.09-1', '0.04-0.09-2', '0.04-0.09-3', '0.09',
'0.09-0.14-1', '0.09-0.14-2', '0.09-0.14-3', '0.14', '0.14-0.19-1',
'0.14-0.19-2', '0.14-0.19-3', '0.19', '0.19-0.24-1', '0.19-0.24-2',
'0.19-0.24-3', '0.24'], dtype=object)
[21]:
np.unique(adatas.obs['data_type'])
[21]:
array(['real', 'synthetic'], dtype=object)
[22]:
adatas.write_h5ad('./dense_data/merfish_interp3.h5ad')
Downstream analysis
[23]:
z_values = {slice_id: 100 * (i + 1) for i, slice_id in enumerate(np.unique(adatas.obs['slice_id']))}
adatas.obs['Z'] = adatas.obs['slice_id'].map(z_values)
[24]:
adatas.obs['X'] = adatas.obsm['spatial'][:, 0]
adatas.obs['Y'] = adatas.obsm['spatial'][:, 1]
[25]:
# Visualization on SpatialZ-reconstructed densely spatial data
fig = plt.figure(figsize=(6, 6))
ax1 = fig.add_subplot(111, projection='3d')
for it, label in enumerate(np.unique(adatas.obs['slice_id'])):
temp_Coor = adatas.obs[adatas.obs['slice_id'] == label]
temp_xd = temp_Coor['X']
temp_yd = temp_Coor['Y']
temp_zd = temp_Coor['Z']
ax1.scatter3D(temp_xd, temp_yd, temp_zd, s=1, marker="o", label=label)
ax1.set_xlabel('')
ax1.set_ylabel('')
ax1.set_zlabel('')
ax1.set_xticklabels([])
ax1.set_yticklabels([])
ax1.set_zticklabels([])
plt.legend(bbox_to_anchor=(1, 0.8), markerscale=1, frameon=False)
ax1.elev = 45
ax1.azim = -20
plt.show()
[26]:
sc.tl.pca(adatas, svd_solver='arpack')
sc.pp.neighbors(adatas)
sc.tl.umap(adatas,min_dist=0.8)
UMAP on the SpatialZ-reconstructed dense spatial data colored by different slices
[27]:
import scanpy as sc
import matplotlib.pyplot as plt
# Define the color palette for regions
slice_colors = {
'0.04': '#8B0000',
'0.04-0.09-1': '#4B0082',
'0.04-0.09-2': '#000080',
'0.04-0.09-3': '#228B22',
'0.09': '#556B2F',
'0.09-0.14-1': '#BDB76B',
'0.09-0.14-2': '#008B8B',
'0.09-0.14-3': '#006400',
'0.14': '#FF7AA3',
'0.14-0.19-1': '#D54151',
'0.14-0.19-2': '#F46C44',
'0.14-0.19-3': '#78A02D',
'0.19': '#549745',
'0.19-0.24-1': '#386DAA',
'0.19-0.24-2': '#86D1F8',
'0.19-0.24-3': '#B185DC',
'0.24': '#7051B1',
}
# Create a large figure to accommodate the subplots
fig, axs = plt.subplots(4, 5, figsize=(30, 20)) # Adjust figsize as needed
# Flatten the array of axes to simplify accessing them in the loop
axs = axs.flatten()
# Define the exact order of slice_ids as specified
slice_ids_order = [
'0.04', '0.04-0.09-1', '0.04-0.09-2', '0.04-0.09-3', '0.09',
'0.09', '0.09-0.14-1', '0.09-0.14-2', '0.09-0.14-3', '0.14',
'0.14', '0.14-0.19-1', '0.14-0.19-2', '0.14-0.19-3', '0.19',
'0.19', '0.19-0.24-1', '0.19-0.24-2', '0.19-0.24-3', '0.24'
]
# Loop through each specified slice_id and plot
for i, slice_id in enumerate(slice_ids_order):
use_adata = adatas[adatas.obs['slice_id'] == slice_id]
title = f'Slice ID: {slice_id}'
ax = sc.pl.umap(adatas, color='slice_id', size=10, title=title,
groups=[slice_id],
palette=slice_colors,
legend_loc='none', frameon=False, show=False, ax=axs[i])
axs[i].set_xlabel('')
axs[i].set_ylabel('')
axs[i].set_axis_off()
plt.tight_layout() # Adjust layout to
Cell type distribution on the UMAP in the SpatialZ-reconstructed densely spatial data
[28]:
ct_colors = {
'Astrocyte': '#bcbd22',
'Endothelial 1': '#ff7f0e',
'Endothelial 2': '#1f77b4',
'Endothelial 3': '#9467bd',
'Ependymal': '#17becf',
'Excitatory': '#e377c2',
'Inhibitory': '#2ca02c',
'Microglia': '#ff9896',
'Pericytes': '#8c564b',
'OD Immature 1': '#f7b6d2',
'OD Immature 2': '#c49c94',
'OD Mature 1': '#aec7e8',
'OD Mature 2': '#ffbb78',
'OD Mature 3': '#98df8a',
'OD Mature 4': '#7f7f7f'
}
# Create a large figure to accommodate the subplots
fig, axs = plt.subplots(4, 5, figsize=(25, 20)) # Adjust figsize as needed
# Flatten the array of axes to simplify accessing them in the loop
axs = axs.flatten()
# Define the exact order of slice_ids as specified
slice_ids_order = [
'0.04', '0.04-0.09-1', '0.04-0.09-2', '0.04-0.09-3', '0.09',
'0.09', '0.09-0.14-1', '0.09-0.14-2', '0.09-0.14-3', '0.14',
'0.14', '0.14-0.19-1', '0.14-0.19-2', '0.14-0.19-3', '0.19',
'0.19', '0.19-0.24-1', '0.19-0.24-2', '0.19-0.24-3', '0.24'
]
# Loop through each specified slice_id and plot
for i, slice_id in enumerate(slice_ids_order):
use_adata = adatas[adatas.obs['slice_id'] == slice_id]
title = f'Slice ID: {slice_id}'
ax = sc.pl.umap(use_adata, color='cell_class', size=10, title=title, palette=ct_colors,
legend_loc=None, frameon=False, show=False, ax=axs[i])
axs[i].set_xlabel('')
axs[i].set_ylabel('')
axs[i].set_axis_off()
plt.tight_layout() # Adjust layout to minimize overlap
plt.show() # Display the plot
Molecular-defined cell type visualization under the spatial context
[29]:
ct_colors = {
'Astrocyte': '#bcbd22',
'Endothelial 1': '#ff7f0e',
'Endothelial 2': '#1f77b4',
'Endothelial 3': '#9467bd',
'Ependymal': '#17becf',
'Excitatory': '#e377c2',
'Inhibitory': '#2ca02c',
'Microglia': '#ff9896',
'Pericytes': '#8c564b',
'OD Immature 1': '#f7b6d2',
'OD Immature 2': '#c49c94',
'OD Mature 1': '#aec7e8',
'OD Mature 2': '#ffbb78',
'OD Mature 3': '#98df8a',
'OD Mature 4': '#7f7f7f'
}
# Create a large figure to accommodate the subplots
fig, axs = plt.subplots(4, 5, figsize=(25, 20)) # Adjust figsize as needed
# Flatten the array of axes to simplify accessing them in the loop
axs = axs.flatten()
# Define the exact order of slice_ids as specified
slice_ids_order = [
'0.04', '0.04-0.09-1', '0.04-0.09-2', '0.04-0.09-3', '0.09',
'0.09', '0.09-0.14-1', '0.09-0.14-2', '0.09-0.14-3', '0.14',
'0.14', '0.14-0.19-1', '0.14-0.19-2', '0.14-0.19-3', '0.19',
'0.19', '0.19-0.24-1', '0.19-0.24-2', '0.19-0.24-3', '0.24'
]
# Loop through each specified slice_id and plot
for i, slice_id in enumerate(slice_ids_order):
use_adata = adatas[adatas.obs['slice_id'] == slice_id]
title = f'Slice ID: {slice_id}'
# Use Scanpy's plotting function
ax = sc.pl.embedding(use_adata, basis='spatial', color='cell_class', title=title, palette=ct_colors,
size=30, alpha=1, show=False, legend_loc='none', ax=axs[i])
axs[i].set_xlabel('')
axs[i].set_ylabel('')
axs[i].set_aspect('equal')
axs[i].set_axis_off()
plt.tight_layout() # Adjust layout to minimize overlap
plt.show() # Display the plot
Transferred label visualization under the spatial context
[30]:
# Define the color palette for regions
label_colors = {
'PVH': '#A2C7E5',
'PVT': '#C47BB4',
'BST': '#B099C9',
'fx': '#75AC74',
'V3': '#ED9860',
'PV': '#549745',
'MPA': '#DAB6D1',
'MPN': '#847FB5'
}
# Create a large figure to accommodate the subplots
fig, axs = plt.subplots(4, 5, figsize=(25, 20)) # Adjust figsize as needed
# Flatten the array of axes to simplify accessing them in the loop
axs = axs.flatten()
# Define the exact order of slice_ids as specified
slice_ids_order = [
'0.04', '0.04-0.09-1', '0.04-0.09-2', '0.04-0.09-3', '0.09',
'0.09', '0.09-0.14-1', '0.09-0.14-2', '0.09-0.14-3', '0.14',
'0.14', '0.14-0.19-1', '0.14-0.19-2', '0.14-0.19-3', '0.19',
'0.19', '0.19-0.24-1', '0.19-0.24-2', '0.19-0.24-3', '0.24'
]
# Loop through each specified slice_id and plot
for i, slice_id in enumerate(slice_ids_order):
use_adata = adatas[adatas.obs['slice_id'] == slice_id]
title = f'Slice ID: {slice_id}'
# Use Scanpy's plotting function
ax = sc.pl.embedding(use_adata, basis='spatial', color='Region', title=title, palette=label_colors,
size=30, alpha=1, show=False, legend_loc='none', ax=axs[i])
axs[i].set_xlabel('')
axs[i].set_ylabel('')
axs[i].set_aspect('equal')
axs[i].set_axis_off()
plt.tight_layout() # Adjust layout to minimize overlap
plt.show() # Display the plot