Files
boundary-aware-centrality/comparison.py
T
2026-04-16 07:20:19 +02:00

238 lines
7.8 KiB
Python

import math
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
import numpy as np
import squidpy as sq
import scipy
from graph_tool.all import *
from src import centrality
from src import plot
from src import fitting
def merfish():
"""
Merfish dataset from `squidpy`.
"""
adata = sq.datasets.merfish()
adata = adata[adata.obs.Bregma == -9].copy()
return adata
def mibitof():
"""
Mibitof dataset from `squidpy`.
"""
adata = sq.datasets.mibitof()
return adata
def degree(g, weight):
# VertexPropertyMap
vp = g.new_vertex_property("double")
for v in g.vertices():
neighbours = g.get_all_neighbours(v)
vp[v] = len(neighbours)
return vp
def leverage(g, weight):
# VertexPropertyMap
vp = g.new_vertex_property("double")
for v in g.vertices():
li = 0.0
neighbours = g.get_all_neighbours(v)
ki = len(neighbours)
# mibitof has an isolated node, why? should that not be possible with the triangulation?
if ki == 0:
continue
# sum
for nv in neighbours:
other_neighbours = g.get_all_neighbours(nv)
kj = len(other_neighbours)
li += (ki - kj) / (ki + kj)
li /= ki
vp[v] = li
return vp
def laplacian(g, weight):
vp = g.new_vertex_property("double")
lap_g = graph_tool.spectral.laplacian(g, weight=weight)
elap_g = sum(l**2 for l in scipy.linalg.eigvals(lap_g.toarray()))
for v in g.vertices():
gv = g.copy()
gv.remove_vertex(v, True)
# pos = gv.vp["pos"]
# weight_gv = gv.new_edge_property("double")
# for e in gv.edges():
# weight_gv[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2
lap_gv = graph_tool.spectral.laplacian(gv, weight=gv.ep["weight"])
elap_gv = sum(l**2 for l in scipy.linalg.eigvals(lap_gv.toarray()))
vp[v] = (elap_g - elap_gv) / elap_g
return vp
def random_graph(n=5000, seed=None):
"""
Uniformly random point cloud generation.
`n` [int] Number of points to generate. Default 5000 seems like a good starting point in point density and corresponding runtime for the subsequent calculations.
@return [numpy.ndarray] Array of shape(n, 2) containing the coordinates for each point of the generated point cloud.
"""
if seed is None:
import secrets
seed = secrets.randbits(128)
rng = np.random.default_rng(seed=seed)
return rng.random((n, 2)), seed
def spatial_graph(adata):
"""
Generate the spatial graph using delaunay for the given `adata`.
`adata` will contain the calculated spatial graph contents in the keys
adata.obsm['spatial']` in case the `adata` is created from a dataset of *squidpy*.
@return [Graph] generated networkx graph from adata.obsp['spatial_distances']
"""
g, pos = graph_tool.generation.triangulation(adata, type="delaunay")
g.vp["pos"] = pos
weight = g.new_edge_property("double")
for e in g.edges():
weight[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2
g.ep["weight"] = weight
return g, weight
def apply(g, seed, weight, convex_hull, ax, method, method_name):
# calculate centrality values
vp = None
if method_name == "Betweeness":
vp, ep = method(g, weight=weight)
elif method_name == "Eigenvector":
ep, vp = method(g, weight=weight)
elif method_name == "Hits":
ep, vp, hub_centrality = method(g, weight=weight)
else:
vp = method(g, weight=weight)
vp.a = np.nan_to_num(vp.a) # correct floating point values
# normalization
min_val, max_val = vp.a.min(), vp.a.max()
vp.a = (vp.a - min_val) / (max_val - min_val)
# generate model based on convex hull and associated centrality values
quantification = plot.quantification_data(g, vp, convex_hull)
# optimize model's piece-wise linear function
d = quantification[:, 0]
C = quantification[:, 1]
m_opt, c0_opt, b_opt, aic_opt = fitting.fit_piece_wise_linear(d, C)
# TODO
# should this be part of the plotting function itself, it should not be necessary for me to do this
d_curve = np.linspace(min(d), max(d), 500)
C_curve = np.piecewise(
d_curve,
[d_curve <= b_opt, d_curve > b_opt],
[lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt]
)
# plot model containing modeled piece-wise linear function
plot.quantification_plot(ax, quantification, d_curve, C_curve, method_name, aic_opt)
def draw_graph(G, ax, name):
pos = G.vp["pos"]
x = []
y = []
for v in G.vertices():
ver = pos[v]
x.append(ver[0])
y.append(ver[1])
# edges
for e in G.edges():
ex = [pos[e.source()][0], pos[e.target()][0]]
ey = [pos[e.source()][1], pos[e.target()][1]]
ax.add_collection(LineCollection([np.column_stack([ex, ey])], colors=['k'], linewidths=0.1))
ax.scatter(x, y, s=1)
ax.set_title(name)
#
# - Create a random point cloud and calculate a triangulation on it
# - For that graph calculate the convex hull
# - Draw the graph with the convex hull
# - For each centrality measure
# - apply centrality measure to the next axis
# - Draw the corresponding resulting models into a grid
#
points, seed = random_graph(n=3000)
# adata = merfish()
# g, weight = spatial_graph(adata.obsm['spatial'])
g, weight = spatial_graph(points)
g = GraphView(g)
# NOTE remove duplicated node that has is an isolated node
# only relevant for `mibitof`
# for v in g.vertices():
# neighbours = g.get_all_neighbours(v)
# if len(neighbours) == 0:
# g.remove_vertex(v)
# break
# pos = g.vp["pos"]
# weight = g.new_edge_property("double")
# for e in g.edges():
# weight[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2
# calculate convex hull
convex_hull = centrality.convex_hull(g)
# plot graph
fig_graph, ax_graph = plt.subplots(figsize=(15, 12))
draw_graph(g, ax_graph, f"Artifical (n=3000)\n(seed = {seed})")
fig_graph.savefig(f"Comparison_node_artificial_3000_graph.svg", format='svg')
# | Closeness | PageRank | Eigenvector | Leverage |
# | Betweenness | Katz | Laplacian | Degree |
# | | Hits | | |
fig, ax = plt.subplots(figsize=(15, 12))
apply(g, None, weight, convex_hull, ax, closeness, "Closeness")
fig.savefig(f"Comparison_node_closeness_artifical_3000.svg", format='svg')
fig, ax = plt.subplots(figsize=(15, 12))
apply(g, None, weight, convex_hull, ax, betweenness, "Betweeness")
fig.savefig(f"Comparison_node_betweenness_artifical_3000.svg", format='svg')
fig, ax = plt.subplots(figsize=(15, 12))
apply(g, None, weight, convex_hull, ax, pagerank, "PageRank")
fig.savefig(f"Comparison_node_pagerank_artifical_3000.svg", format='svg')
fig, ax = plt.subplots(figsize=(15, 12))
apply(g, None, weight, convex_hull, ax, eigenvector, "Eigenvector")
fig.savefig(f"Comparison_node_eigenvector_artifical_3000.svg", format='svg')
fig, ax = plt.subplots(figsize=(15, 12))
apply(g, None, weight, convex_hull, ax, hits, "Hits")
fig.savefig(f"Comparison_node_hits_artifical_3000.svg", format='svg')
fig, ax = plt.subplots(figsize=(15, 12))
apply(g, None, weight, convex_hull, ax, katz, "Katz")
fig.savefig(f"Comparison_node_katz_artifical_3000.svg", format='svg')
fig, ax = plt.subplots(figsize=(15, 12))
apply(g, None, weight, convex_hull, ax, degree, "Degree")
fig.savefig(f"Comparison_node_degree_artifical_3000.svg", format='svg')
fig, ax = plt.subplots(figsize=(15, 12))
apply(g, None, weight, convex_hull, ax, leverage, "Leverage")
fig.savefig(f"Comparison_node_leverage_artifical_3000.svg", format='svg')
fig, ax = plt.subplots(figsize=(15, 12))
apply(g, None, weight, convex_hull, ax, laplacian, "Laplacian")
fig.savefig(f"Comparison_node_laplacian_artifical_3000.svg", format='svg')