From 72c9790165a66290ddb64db16c73b52abaf33364 Mon Sep 17 00:00:00 2001
From: Yves Biener <yves.biener@gmx.de>
Date: Tue, 31 Mar 2026 13:10:42 +0200
Subject: [PATCH] add: model comparison between original and sub graph

---
 diff_model_comparison.py | 148 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 148 insertions(+)
 create mode 100644 diff_model_comparison.py

diff --git a/diff_model_comparison.py b/diff_model_comparison.py
new file mode 100644
index 0000000..2494c23
--- /dev/null
+++ b/diff_model_comparison.py
@@ -0,0 +1,148 @@
+import math
+
+import matplotlib.pyplot as plt
+import numpy as np
+from graph_tool.all import *
+
+from src import centrality
+from src import plot
+from src import fitting
+
+
+def leverage(g, weight):
+    # VertexPropertyMap
+    vp = g.new_vertex_property("double")
+    for v in g.vertices():
+        li = 0.0
+        neighbours = g.get_all_neighbours(v)
+        ki = len(neighbours)
+        # sum
+        for nv in neighbours:
+            other_neighbours = g.get_all_neighbours(nv)
+            kj = len(other_neighbours)
+            li += (ki - kj) / (ki + kj)
+        li /= ki
+        vp[v] = li
+    return vp
+
+
+def random_graph(n=5000, seed=None):
+    """
+    Uniformly random point cloud generation.
+    `n` [int] Number of points to generate. Default 5000 seems like a good starting point in point density and corresponding runtime for the subsequent calculations.
+    @return [numpy.ndarray] Array of shape(n, 2) containing the coordinates for each point of the generated point cloud.
+    """
+    if seed is None:
+        import secrets
+        seed = secrets.randbits(128)
+    rng = np.random.default_rng(seed=seed)
+    return rng.random((n, 2)), seed
+
+
+def sub_spatial_graph(adata, percentage):
+    sub_adata = np.array([])
+    distance_of_center = 0.5 * percentage
+    for point in adata:
+        if point[0] > 0.5 - distance_of_center and point[0] <= 0.5 + distance_of_center:
+            if point[1] > 0.5 - distance_of_center and point[1] <= 0.5 + distance_of_center:
+                sub_adata = np.append(sub_adata, [point[0], point[1]])
+    
+    sub_adata = sub_adata.reshape(sub_adata.shape[0] // 2, 2)
+    return spatial_graph(sub_adata)
+
+
+def spatial_graph(adata):
+    """
+    Generate the spatial graph using delaunay for the given `adata`.
+    `adata` will contain the calculated spatial graph contents in the keys
+    adata.obsm['spatial']` in case the `adata` is created from a dataset of *squidpy*.
+    @return [Graph] generated networkx graph from adata.obsp['spatial_distances']
+    """
+    g, pos = graph_tool.generation.triangulation(adata, type="delaunay")
+    g.vp["pos"] = pos
+    weight = g.new_edge_property("double")
+    for e in g.edges():
+        weight[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2
+    return g, weight
+
+
+def apply(g, weight, convex_hull, ax, method, method_name):
+    # calculate centrality values
+    vp = None
+    if method_name == "Betweeness":
+        vp, ep = method(g, weight=weight)
+    elif method_name == "Eigenvector":
+        ep, vp = method(g, weight=weight)
+    elif method_name == "Hits":
+        ep, vp, hub_centrality = method(g, weight=weight)
+    else:
+        vp = method(g, weight=weight)
+    vp.a = np.nan_to_num(vp.a) # correct floating point values
+
+    # normalization
+    min_val, max_val = vp.a.min(), vp.a.max()
+    vp.a = (vp.a - min_val) / (max_val - min_val)
+
+    # generate model based on convex hull and associated centrality values
+    quantification = plot.quantification_data(g, vp, convex_hull)
+
+    # optimize model's piece-wise linear function
+    d = quantification[:, 0]
+    C = quantification[:, 1]
+    m_opt, c0_opt, b_opt, aic_opt = fitting.fit_piece_wise_linear(d, C)
+
+    # TODO
+    # should this be part of the plotting function itself, it should not be necessary for me to do this
+    d_curve = np.linspace(min(d), max(d), 500)
+    C_curve = np.piecewise(
+        d_curve,
+        [d_curve <= b_opt, d_curve > b_opt],
+        [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt]
+    )
+    # plot model containing modeled piece-wise linear function
+    plot.quantification_plot(ax, quantification, d_curve, C_curve, method_name, aic_opt)
+
+
+#
+# - Create a random point cloud and calculate a triangulation on it
+# - For that graph calculate the convex hull
+# - Draw the graph with the convex hull
+# - For each centrality measure
+#   - apply centrality measure to the next axis
+# - Draw the corresponding resulting models into a grid
+#
+points, seed = random_graph(n=5000)
+g, weight = spatial_graph(points)
+g = GraphView(g)
+# calculate convex hull
+convex_hull = centrality.convex_hull(g)
+
+# plot graph with convex_hull
+fig_graph, ax_graph = plt.subplots(figsize=(15, 12))
+# draw without any centrality measure `vp`
+vp = g.new_vertex_property("double")
+plot.graph_plot(fig_graph, ax_graph, g, vp, convex_hull, f"Pointcloud (seed: {seed})")
+fig_graph.savefig("point_cloud_diff_comparison_5000_pagerank_leverage.svg", format='svg')
+
+fig = plt.figure(figsize=(15, 12))
+row1, row2 = fig.subplots(2, 2)
+
+ax1, ax2 = row1
+apply(g, weight, convex_hull, ax1, pagerank, "PageRank")
+apply(g, weight, convex_hull, ax2, leverage, "Leverage")
+
+g_sub, weight_sub = sub_spatial_graph(points, 0.5)
+g_sub = GraphView(g_sub)
+convex_hull = centrality.convex_hull(g_sub)
+# plot graph with convex_hull
+fig_graph, ax_graph = plt.subplots(figsize=(15, 12))
+# draw without any centrality measure `vp`
+vp = g_sub.new_vertex_property("double")
+plot.graph_plot(fig_graph, ax_graph, g_sub, vp, convex_hull, f"Pointcloud (50% of original)")
+fig_graph.savefig("point_cloud_diff_comparison_5000_sub_pagerank_leverage.svg", format='svg')
+
+ax1, ax2 = row2
+apply(g_sub, weight_sub, convex_hull, ax1, pagerank, "PageRank")
+apply(g_sub, weight_sub, convex_hull, ax2, leverage, "Leverage")
+
+fig.savefig(f"model_diff_comparison_5000_pagerank_leverage.svg", format='svg')