142 lines
4.7 KiB
Python
142 lines
4.7 KiB
Python
|
|
import math
|
|
|
|
import matplotlib.pyplot as plt
|
|
from matplotlib.collections import LineCollection
|
|
import matplotlib as mpl
|
|
import numpy as np
|
|
import squidpy as sq
|
|
import scipy
|
|
import spatialdata as sd
|
|
from spatialdata_io.experimental import to_legacy_anndata
|
|
from graph_tool.all import *
|
|
|
|
from src import centrality
|
|
from src import plot
|
|
from src import fitting
|
|
|
|
|
|
def merfish():
|
|
"""
|
|
Merfish dataset from `squidpy`.
|
|
"""
|
|
adata = sq.datasets.merfish()
|
|
adata = adata[adata.obs.Bregma == -9].copy()
|
|
return adata
|
|
|
|
|
|
def random_graph(n=5000, seed=None):
|
|
"""
|
|
Uniformly random point cloud generation.
|
|
`n` [int] Number of points to generate. Default 5000 seems like a good starting point in point density and corresponding runtime for the subsequent calculations.
|
|
@return [numpy.ndarray] Array of shape(n, 2) containing the coordinates for each point of the generated point cloud.
|
|
"""
|
|
if seed is None:
|
|
import secrets
|
|
seed = secrets.randbits(128)
|
|
rng = np.random.default_rng(seed=seed)
|
|
return rng.random((n, 2)), seed
|
|
|
|
|
|
def spatial_graph(adata):
|
|
"""
|
|
Generate the spatial graph using delaunay for the given `adata`.
|
|
`adata` will contain the calculated spatial graph contents in the keys
|
|
adata.obsm['spatial']` in case the `adata` is created from a dataset of *squidpy*.
|
|
@return [Graph] generated networkx graph from adata.obsp['spatial_distances']
|
|
"""
|
|
g, pos = graph_tool.generation.triangulation(adata, type="delaunay")
|
|
g.vp["pos"] = pos
|
|
weight = g.new_edge_property("double")
|
|
for e in g.edges():
|
|
weight[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2
|
|
g.ep["weight"] = weight
|
|
return g, weight
|
|
|
|
|
|
def plot_graph_nodes(g, centralities, fig, ax, name):
|
|
pos = g.vp["pos"]
|
|
x = []
|
|
y = []
|
|
for v in g.vertices():
|
|
ver = pos[v]
|
|
x.append(ver[0])
|
|
y.append(ver[1])
|
|
|
|
sc = ax.scatter(x, y, s=1, c=centralities, cmap=plt.cm.plasma)
|
|
ax.set_title(name)
|
|
fig.colorbar(sc, ax=ax)
|
|
|
|
|
|
def plot_relationship_nodes(g, vp, convex_hull, fig, ax, name):
|
|
quantification = plot.quantification_data(g, vp, convex_hull)
|
|
|
|
# optimize model's piece-wise linear function
|
|
d = quantification[:, 0]
|
|
C = quantification[:, 1]
|
|
m_opt, c0_opt, b_opt, aic_opt = fitting.fit_piece_wise_linear(d, C, M=100)
|
|
|
|
# TODO
|
|
# should this be part of the plotting function itself, it should not be necessary for me to do this
|
|
d_curve = np.linspace(min(d), max(d), 500)
|
|
C_curve = np.piecewise(
|
|
d_curve,
|
|
[d_curve <= b_opt, d_curve > b_opt],
|
|
[lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt]
|
|
)
|
|
|
|
ax.set_title(name)
|
|
ax.set_xlabel('Distance to Bounding-Box')
|
|
ax.set_ylabel('Centrality')
|
|
ax.scatter(quantification[:, 0], quantification[:, 1], c=quantification[:, 1], cmap=plt.cm.plasma, s=0.2)
|
|
ax.plot(d_curve, C_curve, color='g', linewidth=1, label=f"m = {m_opt} | b = {b_opt} | c = {c0_opt}")
|
|
ax.legend()
|
|
|
|
|
|
def plot_degree_distribution(g, fig, ax, name):
|
|
ax.set_title(name)
|
|
ax.set_ylabel('Degree')
|
|
ax.set_xlabel('# Occurances')
|
|
degrees = g.get_total_degrees(list(g.vertices()))
|
|
bins = degrees.max() - degrees.min()
|
|
counts, bins, patches = ax.hist(degrees, bins=bins, orientation='horizontal')
|
|
|
|
# Label the percentages below the x-axis...
|
|
# bin_centers = 0.5 * np.diff(bins) + bins[:-1]
|
|
# for count, x in zip(counts, bin_centers):
|
|
# # Label the percentages
|
|
# percent = '%0.000f%%' % (100 * float(count) / counts.sum())
|
|
# ax.annotate(percent, xy=(x, 0), xycoords=('data', 'axes fraction'),
|
|
# xytext=(0, -18), textcoords='offset points', va='top', ha='center')
|
|
|
|
|
|
points, seed = random_graph(n=2000, seed=231533135843957409942915332448253409428)
|
|
g, weight = spatial_graph(points)
|
|
# adata = merfish()
|
|
# g, weight = spatial_graph(adata.obsm['spatial'])
|
|
g = GraphView(g)
|
|
|
|
# relationship with betweenness scoring for both node and edges
|
|
vp = pagerank(g, weight=weight)
|
|
vp.a = np.nan_to_num(vp.a) # correct floating point values
|
|
|
|
# plot graph
|
|
fig = plt.figure(figsize=(16, 9), layout='constrained')
|
|
fig.suptitle(f"Artical (n = 2000 | seed {seed})", fontsize=16)
|
|
ax1, ax2 = fig.subplots(1, 2)
|
|
|
|
# relationship with betweenness scoring for both node and edges
|
|
vp = pagerank(g, weight=weight)
|
|
vp.a = np.nan_to_num(vp.a) # correct floating point values
|
|
|
|
# normalize centrality values
|
|
min_val, max_val = vp.a.min(), vp.a.max()
|
|
vp.a = (vp.a - min_val) / (max_val - min_val)
|
|
|
|
# compare relationships
|
|
convex_hull = centrality.convex_hull(g)
|
|
plot_degree_distribution(g, fig, ax2, "Degree Distribution")
|
|
plot_relationship_nodes(g, vp, convex_hull, fig, ax1, "Pagerank Centrality with fitted model")
|
|
|
|
fig.savefig(f"degree_distribution_vs_pagerank_artifical.pdf", format='pdf')
|