slideSeq Dataset - Part 2. Clustering and Spatial Domains

The slideSeq data to run this tutorial can be found here

Clustering analysis

library(Giotto)
#data loading
bead_positions <- fread(file="2019_slideseq_cerebellum/cell_locations/BeadLocationsForR.csv")
expr_matrix<-fread(file="2019_slideseq_cerebellum/raw_data/MappedDGEForR.csv")
expr_mat = as.matrix(expr_matrix[,-1]);rownames(expr_mat) = expr_matrix$Row
Slide_test <- createGiottoObject(raw_exprs = expr_mat, spatial_locs = bead_positions[,.(xcoord, ycoord)])

#normal giotto steps
#filtering of genes and cells, removal of mitochondrial genes
filterCombinations(Slide_test, expression_thresholds = c(1, 1), gene_det_in_min_cells = c(20, 20, 20), min_det_genes_per_cell = c(20, 32, 100))
visPlot(gobject = Slide_test, point_size=0.5)
Slide_test<-filterGiotto(gobject=Slide_test, gene_det_in_min_cells=20, min_det_genes_per_cell=20)
non_mito_genes = grep(pattern = 'mt-', Slide_test@gene_ID, value = T, invert = T)
non_mito_or_blood_genes = grep(pattern = 'Hb[ab]', non_mito_genes, value = T, invert = T)
Slide_test = subsetGiotto(gobject = Slide_test, gene_ids = non_mito_or_blood_genes)
visPlot(gobject = Slide_test, point_size=0.5)
dim(Slide_test@raw_exprs)

#dimensional reduction
Slide_test <- normalizeGiotto(gobject = Slide_test, scalefactor = 10000, verbose = T)
Slide_test <- addStatistics(gobject = Slide_test)
Slide_test <- calculateHVG(gobject = Slide_test, method = 'cov_groups', zscore_threshold = 0.5, nr_expression_groups = 10)
gene_metadata = fDataDT(Slide_test)
featgenes = gene_metadata[hvg == 'yes' & perc_cells > 0.5 & mean_expr_det > 1]$gene_ID
featgenes
featgenes = gene_metadata[hvg == 'yes']$gene_ID
featgenes
Slide_test <- runPCA(gobject = Slide_test, expression_values = 'custom', genes_to_use = featgenes, scale_unit = F)
Slide_test <- adjustGiottoMatrix(gobject = Slide_test, expression_values = c('normalized'), batch_columns = NULL, covariate_columns = c('nr_genes', 'total_expr'),  return_gobject = TRUE, update_slot = c('custom'))
Slide_test <- runPCA(gobject = Slide_test, expression_values = 'custom', genes_to_use = featgenes, scale_unit = F)
signPCA(Slide_test, genes_to_use = featgenes, scale_unit = F, scree_ylim = c(0,0.3))
plotPCA(gobject=Slide_test)
Slide_test <- runUMAP(Slide_test, dimensions_to_use=1:9, n_components=2)
plotUMAP(gobject=Slide_test, point_size=1)

#leiden clustering
Slide_test<-createNearestNetwork(gobject=Slide_test, dimensions_to_use=1:9, k=20)
Slide_test<-doLeidenCluster(gobject=Slide_test, resolution=0.5, n_iterations=10, name="leiden", python_path="/n/app/python/3.6.0/bin/python3")
plotUMAP(gobject=Slide_test, cell_color="leiden", point_size=1, plot_method="ggplot")
plotMetaDataHeatmap(Slide_test, expression_values="custom", metadata_cols=c("leiden"))
markers_scarn=findMarkers_one_vs_all(gobject=Slide_test, method="scran", expression_values="normalized", cluster_column="leiden", min_genes=5)
markergenes_scran = unique(markers_scarn[, head(.SD, 8), by="cluster_ID"][["gene_ID"]])
plotMetaDataHeatmap(Slide_test, expression_values="normalized", metadata_cols=c("leiden"), selected_genes=markergenes_scran)

Filtering (filterCombinations)
Responsive image
After genes and cells filtering, visPlot
Responsive image
calculateHVG
Responsive image
significant PCs (signPCA)
Responsive image
plotUMAP
Responsive image
Leiden clustering
Responsive image
plotMetaHeatmap
Responsive image

Spatial analysis

spatial_genes<-calculate_spatial_genes_python(gobject=Slide_test, expression_values="norm", python_path="/n/app/python/3.6.0/bin/python3", rbp_p=0.95, examine_top=0.05)
Slide_test<-createSpatialNetwork(gobject=Slide_test, k=7, maximum_distance=100, minimum_k=1)
visPlot(gobject=Slide_test, show_network=T, sdimx="sdimx", sdimy="sdimy", point_size=1, network_color="blue")

Top spatial genes
Responsive image
createSpatialNetwork
Responsive image
Spatial expression patterns of a few top genes
Responsive image

Spatial domains (Hidden markov random field, HMRF)

#select top 250 genes from spatial_genes
sp<-spatial_genes[1:250]

#start HMRF
HMRF<-doHMRF(gobject=Slide_test, expression_values="normalized", spatial_genes=sp, k=20, betas=c(0,1,20), output_folder="result_k20", python_path="/n/app/python/3.6.0/bin/python3", zscore="rowcol", tolerance=1e-5)

#add HMRF results
source("~/addHMRF.R")
Slide_test<-addHMRF2(gobject=Slide_test, name="test",betas=c(0,1.0,20), betas_to_add=c(0, 10, 11, 14), hmrf_name="hmrf", python_path="/n/app/python/3.6.0/bin/python3", output_data="result_k20/result.spatial.zscore", k=20)
visPlot(gobject=Slide_test, cell_color="hmrf_k20_b.14", point_size=2)
visPlot(gobject=Slide_test, cell_color="hmrf_k20_b.0", point_size=2)

#cluster relationship
showClusterHeatmap(gobject=Slide_test, cluster_column="hmrf_k20_b.14")

#visualize selected clusters
visPlot(gobject = Slide_test, plot_method = 'ggplot', sdimx = 'sdimx', sdimy = 'sdimy', cell_color = 'hmrf_k20_b.14', point_size=2, select_cell_groups = c(1, 12, 3, 7, 8))
visPlot(gobject = Slide_test, plot_method = 'ggplot', sdimx = 'sdimx', sdimy = 'sdimy', cell_color = 'hmrf_k20_b.14', point_size=2, select_cell_groups = c(10, 19))
visPlot(gobject = Slide_test, plot_method = 'ggplot', sdimx = 'sdimx', sdimy = 'sdimy', cell_color = 'hmrf_k20_b.14', point_size=2, select_cell_groups = c(14, 16, 4, 5, 9))
visPlot(gobject = Slide_test, plot_method = 'ggplot', sdimx = 'sdimx', sdimy = 'sdimy', cell_color = 'hmrf_k20_b.14', point_size=2, select_cell_groups = c(15, 17, 18))
visPlot(gobject = Slide_test, plot_method = 'ggplot', sdimx = 'sdimx', sdimy = 'sdimy', cell_color = 'hmrf_k20_b.14', point_size=2, select_cell_groups = c(11, 13, 2, 20, 6))

visPlot (HMRF, k20, b14)
Responsive image
visPlot (No spatial clustering, purely clustering based on expression)
Responsive image
showClusterHeatmap
Responsive image
visPlot (selected HMRF clusters 1 12 3 7 8)
Responsive image
visPlot (selected HMRF clusters 10 19)
Responsive image
visPlot (selected HMRF clusters 14 16 4 5 9)
Responsive image
visPlot (selected HMRF clusters 15 17 18)
Responsive image
visPlot (selected HMRF clusters 11 13 2 20 6)
Responsive image