From ead3d70c3541be9fb77192c962b6db42fc23e459 Mon Sep 17 00:00:00 2001
From: Yves Biener <yves.biener@gmx.de>
Date: Sat, 21 Mar 2026 21:16:18 +0100
Subject: [PATCH] WIP diff centrality scores

Check whether model correction is reliable in predicting the
"expected" outcome.
---
 diff_comparison.py | 123 ++++++++++++++++++++++++---------------------
 1 file changed, 65 insertions(+), 58 deletions(-)

diff --git a/diff_comparison.py b/diff_comparison.py
index f6067c5..d571c1f 100644
--- a/diff_comparison.py
+++ b/diff_comparison.py
@@ -79,7 +79,7 @@ def spatial_graph(adata):
     return g, weight
 
 
-def plot_graph_diff(G, c, fig, ax, name):
+def plot_graph_diff(G, c, fig, ax, name, cmap=plt.cm.plasma):
     pos = G.vp["pos"]
     x = []
     y = []
@@ -89,7 +89,7 @@ def plot_graph_diff(G, c, fig, ax, name):
             x.append(ver[0])
             y.append(ver[1])
 
-    sc = ax.scatter(x, y, s=1, cmap=plt.cm.plasma, c=c) # map closeness values as color mapping on the verticies
+    sc = ax.scatter(x, y, s=1, cmap=cmap, c=c) # map closeness values as color mapping on the verticies
     ax.set_title(name)
     fig.colorbar(sc, ax=ax)
 
@@ -200,7 +200,7 @@ row1, row2 = fig.subplots(2, 2)
 ax1, ax2 = row1
 # TODO select corresponding centrality measure method
 vp_closeness = apply(g, seed, weight, convex_hull, ax1, closeness, "Closeness")
-# vp_betweenness = apply(g, seed, weight, convex_hull, ax2, betweenness, "Betweeness")
+vp_betweenness = apply(g, seed, weight, convex_hull, ax2, betweenness, "Betweeness")
 
 # calculate convex hull
 convex_hull = centrality.convex_hull(g_sub)
@@ -214,75 +214,82 @@ fig_graph.savefig("Diff_subgraph.svg", format='svg')
 
 ax1, ax2 = row2
 vp_closeness_corrected = apply_corrected(g_sub, seed, weight_sub, convex_hull, ax1, closeness, "Closeness")
-# vp_betweeness_corrected = apply_corrected(g_sub, seed, weight_sub, convex_hull, ax2, betweenness, "Betweeness")
+vp_betweeness_corrected = apply_corrected(g_sub, seed, weight_sub, convex_hull, ax2, betweenness, "Betweeness")
 
 fig.savefig(f"Diff_scores.svg", format='svg')
 
-# TODO how can I match the two vp's such that I can actually create a diff?
-#
-print(f"Closeness: {vp_closeness}")
-print(f"Closeness corrected: {vp_closeness_corrected}")
+for type in ['closeness', 'betweenness']:
+    print(type)
 
-sub_keys = iter(g_sub.vertices())
-keys = iter(g.vertices())
+    sub_keys = iter(g_sub.vertices())
+    keys = iter(g.vertices())
 
-scores = []
-sub_scores = []
+    scores = []
+    sub_scores = []
+    diff_scores = []
 
-for sub_key in sub_keys:
-    key = next(keys)
-    position = g.vp["pos"][key]
-    while not (position[0] > 0.33 and position[0] <= 0.66 and position[1] > 0.33 and position[1] <= 0.66):
+    for sub_key in sub_keys:
         key = next(keys)
         position = g.vp["pos"][key]
-    # NOTE print corresponding position (which are identical)
-    # position = g.vp["pos"][key]
-    # sub_position = g_sub.vp["pos"][sub_key]
-    # print(f"position: {position} | sub_position: {sub_position}")
+        while not (position[0] > 0.33 and position[0] <= 0.66 and position[1] > 0.33 and position[1] <= 0.66):
+            key = next(keys)
+            position = g.vp["pos"][key]
+        # NOTE print corresponding position (which are identical)
+        # position = g.vp["pos"][key]
+        # sub_position = g_sub.vp["pos"][sub_key]
+        # print(f"position: {position} | sub_position: {sub_position}")
 
-    value = vp_closeness[key]
-    sub_value = vp_closeness_corrected[sub_key]
-    scores.append(value)
-    sub_scores.append(sub_value)
-    # print(f"value: {value} | sub_value: {sub_value}")
-    # TODO what do I want to know?
-    # - median score comparison?
-    # - max delta's between scores
-    # - improvement compared to with and without correction?
+        value = 0.0
+        sub_value = 0.0
+        if type == 'closeness':
+            value = vp_closeness[key]
+            sub_value = vp_closeness_corrected[sub_key]
+        else:
+            value = vp_betweenness[key]
+            sub_value = vp_betweeness_corrected[sub_key]
 
-# TODO can I create the scatter graph with the points with their corresponding values?
-median_score = np.median(scores)
-median_sub_score = np.median(sub_scores)
-print(f"median score: {median_score}")
-print(f"median sub_score: {median_sub_score}")
-print(f"median delta: {(median_score - median_sub_score)}")
-print("")
+        scores.append(value)
+        sub_scores.append(sub_value)
+        diff_scores.append(value - sub_value)
 
-max_value_score = np.max(scores)
-max_value_sub_score = np.max(sub_scores)
-print(f"max value score: {max_value_score}")
-print(f"max value sub_score: {max_value_sub_score}")
-print(f"max value delta: {(max_value_score - max_value_sub_score)}")
-print("")
+    median_score = np.median(scores)
+    median_sub_score = np.median(sub_scores)
+    print(f"\tmedian score: {median_score}")
+    print(f"\tmedian sub_score: {median_sub_score}")
+    print(f"\tmedian delta: {(median_score - median_sub_score)}")
+    print("")
 
-min_value_score = np.min(scores)
-min_value_sub_score = np.min(sub_scores)
-print(f"min value score: {min_value_score}")
-print(f"min value sub_score: {min_value_sub_score}")
-print(f"min value delta: {(min_value_score - min_value_sub_score)}")
+    max_value_score = np.max(scores)
+    max_value_sub_score = np.max(sub_scores)
+    print(f"\tmax value score: {max_value_score}")
+    print(f"\tmax value sub_score: {max_value_sub_score}")
+    print(f"\tmax value delta: {(max_value_score - max_value_sub_score)}")
+    print("")
 
+    min_value_score = np.min(scores)
+    min_value_sub_score = np.min(sub_scores)
+    print(f"\tmin value score: {min_value_score}")
+    print(f"\tmin value sub_score: {min_value_sub_score}")
+    print(f"\tmin value delta: {(min_value_score - min_value_sub_score)}")
+    print("")
 
-fig = plt.figure(figsize=(35, 10))
-plot_graph_ax, plot_sub_graph_ax, plot_sub_graph_before_ax = fig.subplots(1, 3)
+    fig = plt.figure(figsize=(35, 10))
+    plot_graph_ax, plot_sub_graph_ax, plot_sub_graph_before_ax = fig.subplots(1, 3)
 
-plot_graph_diff(g, scores, fig, plot_graph_ax, "Original Graph (region of sub graph)")
-plot_graph_diff(g, sub_scores, fig, plot_sub_graph_ax, "Sub Graph (extracted region of original graph) with correction")
+    plot_graph_diff(g, scores, fig, plot_graph_ax, "Original Graph (region of sub graph)")
+    plot_graph_diff(g, diff_scores, fig, plot_sub_graph_ax, "Differences after correction of sub graph compared to original graph", plt.cm.seismic)
 
-vp = closeness(g_sub, weight=weight_sub)
-vp.a = np.nan_to_num(vp.a) # correct floating point values
-# normalization
-min_val, max_val = vp.a.min(), vp.a.max()
-vp.a = (vp.a - min_val) / (max_val - min_val)
-plot_graph_diff(g, vp.a, fig, plot_sub_graph_before_ax, "Sub Graph (extracted region of original graph) without correction")
+    vp = None
+    ep = None
+    if type == 'closeness':
+        vp = closeness(g_sub, weight=weight_sub)
+        vp.a = np.nan_to_num(vp.a) # correct floating point values
+    else:
+        vp, ep = betweenness(g_sub, weight=weight_sub)
+        vp.a = np.nan_to_num(vp.a) # correct floating point values
+    # normalization
+    # min_val, max_val = vp.a.min(), vp.a.max()
+    # vp.a = (vp.a - min_val) / (max_val - min_val)
+    plot_graph_diff(g, vp.a, fig, plot_sub_graph_before_ax, "Sub Graph (extracted region of original graph) without correction")
 
-fig.savefig(f"Diff_graph_scatter.svg", format='svg')
+    fig.savefig(f"Diff_graph_scatter_{type}.svg", format='svg')