add node vs edge centrality comparison for betweenness

The generated illustration shows that the differences between the edge and node based scores are very small, resulting in pretty much the same resulting overall shape, which would not cause any difference for the model and the resulting outcomes for the model. This allows me to focus on node based centralties.
This commit is contained in:
2026-04-26 11:12:45 +02:00
parent 3acf54a000
commit c0d0e25ca2
+139
View File
@@ -0,0 +1,139 @@
import math
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
import matplotlib as mpl
import numpy as np
import squidpy as sq
import scipy
import spatialdata as sd
from spatialdata_io.experimental import to_legacy_anndata
from graph_tool.all import *
from src import centrality
from src import plot
from src import fitting
def merfish():
"""
Merfish dataset from `squidpy`.
"""
adata = sq.datasets.merfish()
adata = adata[adata.obs.Bregma == -9].copy()
return adata
def random_graph(n=5000, seed=None):
"""
Uniformly random point cloud generation.
`n` [int] Number of points to generate. Default 5000 seems like a good starting point in point density and corresponding runtime for the subsequent calculations.
@return [numpy.ndarray] Array of shape(n, 2) containing the coordinates for each point of the generated point cloud.
"""
if seed is None:
import secrets
seed = secrets.randbits(128)
rng = np.random.default_rng(seed=seed)
return rng.random((n, 2)), seed
def spatial_graph(adata):
"""
Generate the spatial graph using delaunay for the given `adata`.
`adata` will contain the calculated spatial graph contents in the keys
adata.obsm['spatial']` in case the `adata` is created from a dataset of *squidpy*.
@return [Graph] generated networkx graph from adata.obsp['spatial_distances']
"""
g, pos = graph_tool.generation.triangulation(adata, type="delaunay")
g.vp["pos"] = pos
weight = g.new_edge_property("double")
for e in g.edges():
weight[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2
g.ep["weight"] = weight
return g, weight
def plot_graph_edges(g, centralities, fig, ax, name):
pos = g.vp["pos"]
norm = mpl.colors.Normalize(vmin=centralities.min(), vmax=centralities.max())
cmap = plt.cm.plasma.resampled(g.num_edges())
for idx, e in enumerate(g.edges()):
ex = [pos[e.source()][0], pos[e.target()][0]]
ey = [pos[e.source()][1], pos[e.target()][1]]
ax.add_collection(LineCollection([np.column_stack([ex, ey])], colors=cmap(norm(centralities[idx])), linewidths=0.5))
ax.set_title(name)
fig.colorbar(plt.cm.ScalarMappable(norm=norm, cmap=cmap), ax=ax)
def plot_graph_nodes(g, centralities, fig, ax, name):
pos = g.vp["pos"]
x = []
y = []
for v in g.vertices():
ver = pos[v]
x.append(ver[0])
y.append(ver[1])
sc = ax.scatter(x, y, s=1, c=centralities, cmap=plt.cm.plasma)
ax.set_title(name)
fig.colorbar(sc, ax=ax)
def plot_relationship_nodes(g, vp, convex_hull, fig, ax, name):
quantification = plot.quantification_data(g, vp, convex_hull)
ax.set_title(name)
ax.set_xlabel('Distance to Bounding-Box')
ax.set_ylabel('Centrality')
ax.scatter(quantification[:, 0], quantification[:, 1], c=quantification[:, 1], cmap=plt.cm.plasma, s=0.2)
def plot_relationship_edges(g, ep, convex_hull, fig, ax, name):
quantification = plot.quantification_data_edges(g, ep, convex_hull)
ax.set_title(name)
ax.set_xlabel('Distance to Bounding-Box')
ax.set_ylabel('Centrality')
ax.scatter(quantification[:, 0], quantification[:, 1], c=quantification[:, 1], cmap=plt.cm.plasma, s=0.2)
# points, seed = random_graph(n=3000)
# g, weight = spatial_graph(points)
adata = merfish()
g, weight = spatial_graph(adata.obsm['spatial'])
g = GraphView(g)
# plot graph
fig = plt.figure(figsize=(15, 18), layout='constrained')
fig.suptitle(f"Merfish", fontsize=16)
row1, row2, row3 = fig.subplots(3, 2)
ax1, ax2 = row1
ax3, ax4 = row2
ax5, ax6 = row3
# relationship with betweenness scoring for both node and edges
vp, ep = betweenness(g, weight=weight)
vp.a = np.nan_to_num(vp.a) # correct floating point values
ep.a = np.nan_to_num(ep.a) # correct floating point values
# compare location of centrality scores
plot_graph_nodes(g, vp.a, fig, ax1, "Node Betweenness centrality")
plot_graph_edges(g, ep.a, fig, ax2, "Edge Betweenness centrality")
# compare relative amount of centrality scores
ax3.hist(vp.a, bins=50)
ax3.set_xlabel('Centrality scorce')
ax3.set_ylabel('# Occurances')
ax4.hist(ep.a, bins=50)
ax4.set_xlabel('Centrality scorce')
ax4.set_ylabel('# Occurances')
# compare relationships
convex_hull = centrality.convex_hull(g)
plot_relationship_nodes(g, vp, convex_hull, fig, ax5, "Node Betweenness relationship")
plot_relationship_edges(g, ep, convex_hull, fig, ax6, "Node Betweenness relationship")
fig.savefig(f"node_vs_edge_betweenness_centrality_merfish.pdf", format='pdf')