From 6adc1e46bdd3f390ca6eddded5108519150ec95d Mon Sep 17 00:00:00 2001
From: Yves Biener <yves.biener@gmx.de>
Date: Sun, 29 Mar 2026 19:31:34 +0200
Subject: [PATCH] WIP: compare prediction of sub graphs with original graph
 scorings

---
 diff_comparison.py | 147 ++++++++++++++++++++++-----------------------
 src/fitting.py     |   5 +-
 2 files changed, 76 insertions(+), 76 deletions(-)

diff --git a/diff_comparison.py b/diff_comparison.py
index 9435d62..2c480e3 100644
--- a/diff_comparison.py
+++ b/diff_comparison.py
@@ -48,17 +48,13 @@ def random_graph(n=5000, seed=None):
     return rng.random((n, 2)), seed
 
 
-def sub_spatial_graph(adata):
-    """
-    Generate the spatial graph using delaunay for the given `adata`.
-    `adata` will contain the calculated spatial graph contents in the keys
-    adata.obsm['spatial']` in case the `adata` is created from a dataset of *squidpy*.
-    @return [Graph] generated networkx graph from adata.obsp['spatial_distances']
-    """
+def sub_spatial_graph(adata, percentage):
     sub_adata = np.array([])
+    distance_of_center = 0.5 * percentage
     for point in adata:
-        if point[0] > 0.33 and point[0] <= 0.66 and point[1] > 0.33 and point[1] <= 0.66:
-            sub_adata = np.append(sub_adata, [point[0], point[1]])
+        if point[0] > 0.5 - distance_of_center and point[0] <= 0.5 + distance_of_center:
+            if point[1] > 0.5 - distance_of_center and point[1] <= 0.5 + distance_of_center:
+                sub_adata = np.append(sub_adata, [point[0], point[1]])
     
     sub_adata = sub_adata.reshape(sub_adata.shape[0] // 2, 2)
     return spatial_graph(sub_adata)
@@ -83,11 +79,13 @@ def plot_graph_diff(G, c, fig, ax, name, cmap=plt.cm.plasma):
     pos = G.vp["pos"]
     x = []
     y = []
+    distance_of_center = 0.5 * percentage
     for v in G.vertices():
         ver = pos[v]
-        if ver[0] > 0.33 and ver[0] <= 0.66 and ver[1] > 0.33 and ver[1] <= 0.66:
-            x.append(ver[0])
-            y.append(ver[1])
+        if ver[0] > 0.5 - distance_of_center and ver[0] <= 0.5 + distance_of_center:
+            if ver[1] > 0.5 - distance_of_center and ver[1] <= 0.5 + distance_of_center:
+                x.append(ver[0])
+                y.append(ver[1])
 
     sc = ax.scatter(x, y, s=1, cmap=cmap, c=c) # map closeness values as color mapping on the verticies
     ax.set_title(name)
@@ -97,7 +95,7 @@ def plot_graph_diff(G, c, fig, ax, name, cmap=plt.cm.plasma):
 def apply(g, seed, weight, convex_hull, ax, method, method_name):
     # calculate centrality values
     vp = None
-    if method_name == "Betweeness":
+    if method_name == "Betweenness":
         vp, ep = method(g, weight=weight)
     elif method_name == "Eigenvector":
         ep, vp = method(g, weight=weight)
@@ -107,10 +105,6 @@ def apply(g, seed, weight, convex_hull, ax, method, method_name):
         vp = method(g, weight=weight)
     vp.a = np.nan_to_num(vp.a) # correct floating point values
 
-    # normalization
-    # min_val, max_val = vp.a.min(), vp.a.max()
-    # vp.a = (vp.a - min_val) / (max_val - min_val)
-
     # generate model based on convex hull and associated centrality values
     quantification = plot.quantification_data(g, vp, convex_hull)
 
@@ -128,7 +122,12 @@ def apply(g, seed, weight, convex_hull, ax, method, method_name):
         [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt]
     )
     # plot model containing modeled piece-wise linear function
-    plot.quantification_plot(ax, quantification, d_curve, C_curve, method_name, aic_opt)
+    if ax is not None:
+        plot.quantification_plot(ax, quantification, d_curve, C_curve, method_name, aic_opt)
+
+    # normalization
+    min_val, max_val = vp.a.min(), vp.a.max()
+    vp.a = (vp.a - min_val) / (max_val - min_val)
 
     return vp
 
@@ -136,7 +135,8 @@ def apply(g, seed, weight, convex_hull, ax, method, method_name):
 def apply_corrected(g, seed, weight, convex_hull, ax, method, method_name):
     # calculate centrality values
     vp = None
-    if method_name == "Betweeness":
+    ep = None
+    if method_name == "Betweenness":
         vp, ep = method(g, weight=weight)
     elif method_name == "Eigenvector":
         ep, vp = method(g, weight=weight)
@@ -165,9 +165,15 @@ def apply_corrected(g, seed, weight, convex_hull, ax, method, method_name):
         [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt]
     )
     # plot model containing modeled piece-wise linear function
-    plot.quantification_plot(ax, quantification, d_curve, C_curve, method_name, aic_opt)
+    if ax is not None:
+        plot.quantification_plot(ax, quantification, d_curve, C_curve, method_name, aic_opt)
 
-    return centrality.correct(g, vp, m_opt, c0_opt, b_opt)
+    vp = centrality.correct(g, vp, m_opt, c0_opt, b_opt)
+    # normalization
+    min_val, max_val = vp.a.min(), vp.a.max()
+    vp.a = (vp.a - min_val) / (max_val - min_val)
+
+    return vp
 
 #
 # - Create a random point cloud and calculate a triangulation on it
@@ -177,61 +183,59 @@ def apply_corrected(g, seed, weight, convex_hull, ax, method, method_name):
 #   - apply centrality measure to the next axis
 # - Draw the corresponding resulting models into a grid
 #
-points, seed = random_graph(n=3000)
+points, seed = random_graph(n=5000)
 g, weight = spatial_graph(points)
 g = GraphView(g)
 
-g_sub, weight_sub = sub_spatial_graph(points)
-g_sub = GraphView(g_sub)
-
 # calculate convex hull
 convex_hull = centrality.convex_hull(g)
 
 # plot graph with convex_hull
 fig_graph, ax_graph = plt.subplots(figsize=(15, 12))
 # draw without any centrality measure `vp`
-vp = g.new_vertex_property("double")
+vp, ep = betweenness(g, weight=weight)
+
 plot.graph_plot(fig_graph, ax_graph, g, vp, convex_hull, f"Pointcloud (seed: {seed})")
-fig_graph.savefig("Diff_graph.svg", format='svg')
+fig_graph.savefig("model_prediction_graph_original_betweenness_5000.svg", format='svg')
 
-fig = plt.figure(figsize=(15, 12))
-row1, row2 = fig.subplots(2, 2)
+# normalization
+min_val, max_val = vp.a.min(), vp.a.max()
+vp.a = (vp.a - min_val) / (max_val - min_val)
+vp_betweenness_original = vp
 
-ax1, ax2 = row1
-# TODO select corresponding centrality measure method
-vp_closeness = apply(g, seed, weight, convex_hull, ax1, closeness, "Closeness")
-vp_betweenness = apply(g, seed, weight, convex_hull, ax2, betweenness, "Betweeness")
+for percentage in np.arange(0.1, 1, 0.1, dtype=float):
+    print(f"Percentage: {percentage:.0%}")
+    g_sub, weight_sub = sub_spatial_graph(points, percentage)
+    g_sub = GraphView(g_sub)
+    convex_hull = centrality.convex_hull(g_sub)
+    # draw subgraph
+    fig_sub = plt.figure(figsize=(25, 12))
+    ax1, ax2 = fig_sub.subplots(1, 2)
+    vp, ep = betweenness(g_sub, weight=weight_sub)
+    plot.graph_plot(fig_sub, ax1, g_sub, vp, convex_hull, f"{percentage:.0%} of Pointcloud (seed: {seed})")
 
-# calculate convex hull
-convex_hull = centrality.convex_hull(g_sub)
+    min_val, max_val = vp.a.min(), vp.a.max()
+    vp.a = (vp.a - min_val) / (max_val - min_val)
 
-# plot graph with convex_hull
-fig_graph, ax_graph = plt.subplots(figsize=(15, 12))
-# draw without any centrality measure `vp`
-vp = g_sub.new_vertex_property("double")
-plot.graph_plot(fig_graph, ax_graph, g_sub, vp, convex_hull, f"Pointcloud (seed: {seed})")
-fig_graph.savefig("Diff_subgraph.svg", format='svg')
+    vp_betweenness_corrected = apply_corrected(g_sub, seed, weight_sub, convex_hull, None, betweenness, "Betweenness")
+    plot.graph_plot(fig_sub, ax2, g_sub, vp_betweenness_corrected, convex_hull, f"{percentage:.0%} of Pointcloud with applied prediction")
+    fig_sub.savefig(f"model_prediction_subgraph_betweenness_5000_{percentage * 100:.0f}_percent.svg", format='svg')
 
-ax1, ax2 = row2
-vp_closeness_corrected = apply_corrected(g_sub, seed, weight_sub, convex_hull, ax1, closeness, "Closeness")
-vp_betweeness_corrected = apply_corrected(g_sub, seed, weight_sub, convex_hull, ax2, betweenness, "Betweeness")
-
-fig.savefig(f"Diff_scores.svg", format='svg')
-
-for type in ['closeness', 'betweenness']:
-    print(type)
+    distance_of_center = 0.5 * percentage
 
     sub_keys = iter(g_sub.vertices())
     keys = iter(g.vertices())
 
     scores = []
+    raw_sub_scores = []
     sub_scores = []
+    raw_diff_scores = []
     diff_scores = []
 
     for sub_key in sub_keys:
         key = next(keys)
         position = g.vp["pos"][key]
-        while not (position[0] > 0.33 and position[0] <= 0.66 and position[1] > 0.33 and position[1] <= 0.66):
+        while not (position[0] > 0.5 - distance_of_center and position[0] <= 0.5 + distance_of_center and position[1] > 0.5 - distance_of_center and position[1] <= 0.5 + distance_of_center):
             key = next(keys)
             position = g.vp["pos"][key]
         # NOTE print corresponding position (which are identical)
@@ -239,38 +243,45 @@ for type in ['closeness', 'betweenness']:
         # sub_position = g_sub.vp["pos"][sub_key]
         # print(f"position: {position} | sub_position: {sub_position}")
 
-        value = 0.0
-        sub_value = 0.0
-        if type == 'closeness':
-            value = vp_closeness[key]
-            sub_value = vp_closeness_corrected[sub_key]
-        else:
-            value = vp_betweenness[key]
-            sub_value = vp_betweeness_corrected[sub_key]
+        # calculate for betweenness
+        value = vp_betweenness_original[key]
+        pre_prediction = vp[sub_key]
+        sub_value = vp_betweenness_corrected[sub_key]
 
         scores.append(value)
+        raw_sub_scores.append(pre_prediction)
         sub_scores.append(sub_value)
+        raw_diff_scores.append(value - pre_prediction)
         diff_scores.append(value - sub_value)
 
     median_score = np.median(scores)
+    median_raw_sub_score = np.median(raw_sub_scores)
     median_sub_score = np.median(sub_scores)
     print(f"\tmedian score: {median_score}")
+    print(f"\tmedian raw_sub_score: {median_raw_sub_score}")
     print(f"\tmedian sub_score: {median_sub_score}")
-    print(f"\tmedian delta: {(median_score - median_sub_score)}")
+    print(f"\tmedian delta (score - raw_sub_score): {(median_score - median_raw_sub_score)}")
+    print(f"\tmedian delta (score - sub_score): {(median_score - median_sub_score)}")
     print("")
 
     max_value_score = np.max(scores)
+    max_value_raw_sub_score = np.max(raw_sub_scores)
     max_value_sub_score = np.max(sub_scores)
     print(f"\tmax value score: {max_value_score}")
+    print(f"\tmax value raw_sub_score: {max_value_raw_sub_score}")
     print(f"\tmax value sub_score: {max_value_sub_score}")
-    print(f"\tmax value delta: {(max_value_score - max_value_sub_score)}")
+    print(f"\tmax value delta (score - raw_sub_score): {(max_value_score - max_value_raw_sub_score)}")
+    print(f"\tmax value delta (score - sub_score): {(max_value_score - max_value_sub_score)}")
     print("")
 
     min_value_score = np.min(scores)
+    min_value_raw_sub_score = np.min(raw_sub_scores)
     min_value_sub_score = np.min(sub_scores)
     print(f"\tmin value score: {min_value_score}")
+    print(f"\tmin value raw_sub_score: {min_value_raw_sub_score}")
     print(f"\tmin value sub_score: {min_value_sub_score}")
-    print(f"\tmin value delta: {(min_value_score - min_value_sub_score)}")
+    print(f"\tmin value delta (score - raw_sub_score): {(min_value_score - min_value_raw_sub_score)}")
+    print(f"\tmin value delta (score - sub_score): {(min_value_score - min_value_sub_score)}")
     print("")
 
     fig = plt.figure(figsize=(35, 10))
@@ -278,18 +289,6 @@ for type in ['closeness', 'betweenness']:
 
     plot_graph_diff(g, scores, fig, plot_graph_ax, "Original Graph (region of sub graph)")
     plot_graph_diff(g, diff_scores, fig, plot_sub_graph_ax, "Differences after correction of sub graph compared to original graph", plt.cm.seismic)
-
-    vp = None
-    ep = None
-    if type == 'closeness':
-        vp = closeness(g_sub, weight=weight_sub)
-        vp.a = np.nan_to_num(vp.a) # correct floating point values
-    else:
-        vp, ep = betweenness(g_sub, weight=weight_sub)
-        vp.a = np.nan_to_num(vp.a) # correct floating point values
-    # normalization
-    # min_val, max_val = vp.a.min(), vp.a.max()
-    # vp.a = (vp.a - min_val) / (max_val - min_val)
     plot_graph_diff(g, vp.a, fig, plot_sub_graph_before_ax, "Sub Graph (extracted region of original graph) without correction")
 
-    fig.savefig(f"Diff_graph_scatter_{type}.svg", format='svg')
+    fig.savefig(f"model_prediction_subgraph_betweenness_5000_{percentage * 100:.0f}_percentage_diff.svg", format='svg')
diff --git a/src/fitting.py b/src/fitting.py
index 436488b..1cabd10 100644
--- a/src/fitting.py
+++ b/src/fitting.py
@@ -35,8 +35,9 @@ def fit_piece_wise_linear(d, C, M=1000):
     model.setObjective(gp.quicksum(epsilon[i] * epsilon[i] for i in range(n)), GRB.MINIMIZE)
 
     # Setting solver parameters for precision
-    model.setParam('OptimalityTol', 1e-4) 
-    model.setParam('MIPGap', 0.01)  
+    model.setParam('OptimalityTol', 1e-4)
+    model.setParam('MIPGap', 0.01) 
+    model.setParam('OutputFlag', 0)
 
     for i in range(n):
         # Constraints enforcing piecewise linear fit