Files
boundary-aware-centrality/degree_distribution_pagerank.py
2026-04-26 16:59:15 +02:00

142 lines
4.7 KiB
Python

import math
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
import matplotlib as mpl
import numpy as np
import squidpy as sq
import scipy
import spatialdata as sd
from spatialdata_io.experimental import to_legacy_anndata
from graph_tool.all import *
from src import centrality
from src import plot
from src import fitting
def merfish():
"""
Merfish dataset from `squidpy`.
"""
adata = sq.datasets.merfish()
adata = adata[adata.obs.Bregma == -9].copy()
return adata
def random_graph(n=5000, seed=None):
"""
Uniformly random point cloud generation.
`n` [int] Number of points to generate. Default 5000 seems like a good starting point in point density and corresponding runtime for the subsequent calculations.
@return [numpy.ndarray] Array of shape(n, 2) containing the coordinates for each point of the generated point cloud.
"""
if seed is None:
import secrets
seed = secrets.randbits(128)
rng = np.random.default_rng(seed=seed)
return rng.random((n, 2)), seed
def spatial_graph(adata):
"""
Generate the spatial graph using delaunay for the given `adata`.
`adata` will contain the calculated spatial graph contents in the keys
adata.obsm['spatial']` in case the `adata` is created from a dataset of *squidpy*.
@return [Graph] generated networkx graph from adata.obsp['spatial_distances']
"""
g, pos = graph_tool.generation.triangulation(adata, type="delaunay")
g.vp["pos"] = pos
weight = g.new_edge_property("double")
for e in g.edges():
weight[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2
g.ep["weight"] = weight
return g, weight
def plot_graph_nodes(g, centralities, fig, ax, name):
pos = g.vp["pos"]
x = []
y = []
for v in g.vertices():
ver = pos[v]
x.append(ver[0])
y.append(ver[1])
sc = ax.scatter(x, y, s=1, c=centralities, cmap=plt.cm.plasma)
ax.set_title(name)
fig.colorbar(sc, ax=ax)
def plot_relationship_nodes(g, vp, convex_hull, fig, ax, name):
quantification = plot.quantification_data(g, vp, convex_hull)
# optimize model's piece-wise linear function
d = quantification[:, 0]
C = quantification[:, 1]
m_opt, c0_opt, b_opt, aic_opt = fitting.fit_piece_wise_linear(d, C, M=100)
# TODO
# should this be part of the plotting function itself, it should not be necessary for me to do this
d_curve = np.linspace(min(d), max(d), 500)
C_curve = np.piecewise(
d_curve,
[d_curve <= b_opt, d_curve > b_opt],
[lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt]
)
ax.set_title(name)
ax.set_xlabel('Distance to Bounding-Box')
ax.set_ylabel('Centrality')
ax.scatter(quantification[:, 0], quantification[:, 1], c=quantification[:, 1], cmap=plt.cm.plasma, s=0.2)
ax.plot(d_curve, C_curve, color='g', linewidth=1, label=f"m = {m_opt} | b = {b_opt} | c = {c0_opt}")
ax.legend()
def plot_degree_distribution(g, fig, ax, name):
ax.set_title(name)
ax.set_ylabel('Degree')
ax.set_xlabel('# Occurances')
degrees = g.get_total_degrees(list(g.vertices()))
bins = degrees.max() - degrees.min()
counts, bins, patches = ax.hist(degrees, bins=bins, orientation='horizontal')
# Label the percentages below the x-axis...
# bin_centers = 0.5 * np.diff(bins) + bins[:-1]
# for count, x in zip(counts, bin_centers):
# # Label the percentages
# percent = '%0.000f%%' % (100 * float(count) / counts.sum())
# ax.annotate(percent, xy=(x, 0), xycoords=('data', 'axes fraction'),
# xytext=(0, -18), textcoords='offset points', va='top', ha='center')
points, seed = random_graph(n=2000, seed=231533135843957409942915332448253409428)
g, weight = spatial_graph(points)
# adata = merfish()
# g, weight = spatial_graph(adata.obsm['spatial'])
g = GraphView(g)
# relationship with betweenness scoring for both node and edges
vp = pagerank(g, weight=weight)
vp.a = np.nan_to_num(vp.a) # correct floating point values
# plot graph
fig = plt.figure(figsize=(16, 9), layout='constrained')
fig.suptitle(f"Artical (n = 2000 | seed {seed})", fontsize=16)
ax1, ax2 = fig.subplots(1, 2)
# relationship with betweenness scoring for both node and edges
vp = pagerank(g, weight=weight)
vp.a = np.nan_to_num(vp.a) # correct floating point values
# normalize centrality values
min_val, max_val = vp.a.min(), vp.a.max()
vp.a = (vp.a - min_val) / (max_val - min_val)
# compare relationships
convex_hull = centrality.convex_hull(g)
plot_degree_distribution(g, fig, ax2, "Degree Distribution")
plot_relationship_nodes(g, vp, convex_hull, fig, ax1, "Pagerank Centrality with fitted model")
fig.savefig(f"degree_distribution_vs_pagerank_artifical.pdf", format='pdf')