From 44a93dc160554b814fe6e5481daac3ee4df5c0a4 Mon Sep 17 00:00:00 2001
From: Yves Biener <yves.biener@gmx.de>
Date: Fri, 9 Jan 2026 15:14:22 +0100
Subject: [PATCH] add: AIC score for each model; add score into lable of
 corresponding function in plots

---
 example.py  | 162 +++++++++++++++++++++++++++++++++++++++++++++++-----
 src/plot.py |   6 +-
 2 files changed, 150 insertions(+), 18 deletions(-)

diff --git a/example.py b/example.py
index 18bdaf3..1a632c3 100644
--- a/example.py
+++ b/example.py
@@ -2,7 +2,7 @@ import math
 
 import matplotlib.pyplot as plt
 import numpy as np
-# import squidpy as sq
+import squidpy as sq
 from graph_tool.all import *
 
 from src import centrality
@@ -43,8 +43,8 @@ def spatial_graph(adata):
     """
     Generate the spatial graph using delaunay for the given `adata`.
     `adata` will contain the calculated spatial graph contents in the keys
-    `adata.obps['spatial_distances']` and `adata.obsm['spatial']` afterwards too.
-    @return [Graph] generated networkx graph from adata['spatial_distances']
+    adata.obsm['spatial']` in case the `adata` is created from a dataset of *squidpy*.
+    @return [Graph] generated networkx graph from adata.obsp['spatial_distances']
     """
     g, pos = graph_tool.generation.triangulation(adata, type="delaunay")
     g.vp["pos"] = pos
@@ -53,18 +53,94 @@ def spatial_graph(adata):
         weight[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2
     return g, weight
 
-# generate spatial graph from a given dataset
-# g, weight = spatial_graph(merfish().obsm['spatial'])
-for i in range(1, 10):
+def merfish_example():
+    # generate spatial graph from a given dataset
+    g, weight = spatial_graph(merfish().obsm['spatial'])
+    g = GraphView(g)
+
+    x_spatial = []
+    for v in g.vertices():
+        x_spatial.append(g.vp["pos"][v][0])
+
+    # calculate centrality values
+    vp = closeness(g, weight=weight)
+    vp.a = np.nan_to_num(vp.a) # correct floating point values
+
+    # normalization
+    min_val, max_val = vp.a.min(), vp.a.max()
+    vp.a = (vp.a - min_val) / (max_val - min_val)
+
+    # calculate convex hull
+    convex_hull = centrality.convex_hull(g)
+
+    # plot graph with convex_hull
+    fig = plt.figure(figsize=(15, 5))
+    ax0, ax1 = fig.subplots(1, 2)
+    plot.graph_plot(fig, ax0, g, vp, convex_hull, f"Merfish\nCloseness")
+
+    # generate model based on convex hull and associated centrality values
+    quantification = plot.quantification_data(g, vp, convex_hull)
+
+    # optimize model's piece-wise linear function
+    d = quantification[:, 0]
+    C = quantification[:, 1]
+    m_opt, c0_opt, b_opt = fitting.fit_piece_wise_linear(d, C)
+
+    # AIC
+    # AIC = 2 * k (= 2) - 2 * ln(L^~)
+    # with L^~ = sum(f(x_i)) where x_i describes a data point
+    # - f is *not normalized*
+    sum_log = 0.0
+    for x_i in x_spatial:
+        sum_log += math.log(m_opt* b_opt + c0_opt if x_i >= b_opt else m_opt * x_i + c0_opt)
+    aic_model = 6. - 2. * sum_log # three parameters: b_opt, m_opt, c0_opt
+
+    # TODO
+    # should this be part of the plotting function itself, it should not be necessary for me to do this
+    d_curve = np.linspace(min(d), max(d), 500)
+    C_curve = np.piecewise(
+        d_curve,
+        [d_curve <= b_opt, d_curve > b_opt],
+        [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt]
+    )
+    # plot model containing modeled piece-wise linear function
+    plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models', aic_model)
+
+    # linear regression model
+    m_reg, c_reg = fitting.fit_linear_regression(d, C)
+
+    # AIC
+    sum_log = 0.0
+    for x_i in x_spatial:
+        sum_log += math.log(m_reg * x_i + c_reg)
+    aic_regression = 4. - 2. * sum_log # two parameter: m_reg, c_reg
+
+    x = np.linspace(min(d), max(d), 500)
+    y = m_reg * x + c_reg
+    ax1.plot(x, y, color='k', linewidth=1, label=f"Simple Linear Regression | AIC: {aic_regression}")
+    ax1.legend()
+
+    fig.savefig(f"Merfish_closeness.svg", format='svg')
+
+
+for i in range(1, 6):
     points, seed = random_graph()
     g, weight = spatial_graph(points)
     g = GraphView(g)
 
+    x_spatial = []
+    for v in g.vertices():
+        x_spatial.append(g.vp["pos"][v][0])
+
     # calculate centrality values
     vp = closeness(g, weight=weight)
     vp.a = np.nan_to_num(vp.a) # correct floating point values
     # ep.a = np.nan_to_num(ep.a) # correct floating point values
 
+    # normalization
+    min_val, max_val = vp.a.min(), vp.a.max()
+    vp.a = (vp.a - min_val) / (max_val - min_val)
+
     # calculate convex hull
     convex_hull = centrality.convex_hull(g)
 
@@ -81,6 +157,15 @@ for i in range(1, 10):
     C = quantification[:, 1]
     m_opt, c0_opt, b_opt = fitting.fit_piece_wise_linear(d, C)
 
+    # AIC
+    # AIC = 2 * k (= 2) - 2 * ln(L^~)
+    # with L^~ = sum(f(x_i)) where x_i describes a data point
+    # - f is *not normalized*
+    sum_log = 0.0
+    for x_i in x_spatial:
+        sum_log += math.log(m_opt* b_opt + c0_opt if x_i >= b_opt else m_opt * x_i + c0_opt)
+    aic_model = 6. - 2. * sum_log # three parameters: b_opt, m_opt, c0_opt
+
     # TODO
     # should this be part of the plotting function itself, it should not be necessary for me to do this
     d_curve = np.linspace(min(d), max(d), 500)
@@ -90,16 +175,23 @@ for i in range(1, 10):
         [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt]
     )
     # plot model containing modeled piece-wise linear function
-    plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models')
+    plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models', aic_model)
 
     # linear regression model
     m_reg, c_reg = fitting.fit_linear_regression(d, C)
+
+    # AIC
+    sum_log = 0.0
+    for x_i in x_spatial:
+        sum_log += math.log(m_reg * x_i + c_reg)
+    aic_regression = 4. - 2. * sum_log # two parameter: m_reg, c_reg
+
     x = np.linspace(min(d), max(d), 500)
     y = m_reg * x + c_reg
-    ax1.plot(x, y, color='k', linewidth=1, label="Simple Linear Regression")
+    ax1.plot(x, y, color='k', linewidth=1, label=f"Simple Linear Regression | AIC: {aic_regression}")
     ax1.legend()
 
-    fig.savefig(f"random_point_clouds/{i}_closeness.svg", format='svg')
+    fig.savefig(f"uniform_random_point_clouds/{i}_closeness.svg", format='svg')
 
     # ---------------------------------------------------------------------------------------------
 
@@ -108,6 +200,10 @@ for i in range(1, 10):
     vp.a = np.nan_to_num(vp.a) # correct floating point values
     # ep.a = np.nan_to_num(ep.a) # correct floating point values
 
+    # normalization
+    min_val, max_val = vp.a.min(), vp.a.max()
+    vp.a = (vp.a - min_val) / (max_val - min_val)
+
     # calculate convex hull
     convex_hull = centrality.convex_hull(g)
 
@@ -124,6 +220,15 @@ for i in range(1, 10):
     C = quantification[:, 1]
     m_opt, c0_opt, b_opt = fitting.fit_piece_wise_linear(d, C)
 
+    # AIC
+    # AIC = 2 * k (= 2) - 2 * ln(L^~)
+    # with L^~ = sum(f(x_i)) where x_i describes a data point
+    # - f is *not normalized*
+    sum_log = 0.0
+    for x_i in x_spatial:
+        sum_log += math.log(m_opt* b_opt + c0_opt if x_i >= b_opt else m_opt * x_i + c0_opt)
+    aic_model = 6. - 2. * sum_log # three parameters: b_opt, m_opt, c0_opt
+
     # TODO
     # should this be part of the plotting function itself, it should not be necessary for me to do this
     d_curve = np.linspace(min(d), max(d), 500)
@@ -133,16 +238,23 @@ for i in range(1, 10):
         [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt]
     )
     # plot model containing modeled piece-wise linear function
-    plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models')
+    plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models', aic_model)
 
     # linear regression model
     m_reg, c_reg = fitting.fit_linear_regression(d, C)
+
+    # AIC
+    sum_log = 0.0
+    for x_i in x_spatial:
+        sum_log += math.log(m_reg * x_i + c_reg)
+    aic_regression = 4. - 2. * sum_log # two parameter: m_reg, c_reg
+
     x = np.linspace(min(d), max(d), 500)
     y = m_reg * x + c_reg
-    ax1.plot(x, y, color='k', linewidth=1, label="Simple Linear Regression")
+    ax1.plot(x, y, color='k', linewidth=1, label=f"Simple Linear Regression | AIC: {aic_regression}")
     ax1.legend()
 
-    fig.savefig(f"random_point_clouds/{i}_betweenness.svg", format='svg')
+    fig.savefig(f"uniform_random_point_clouds/{i}_betweenness.svg", format='svg')
 
     # ---------------------------------------------------------------------------------------------
     
@@ -151,6 +263,10 @@ for i in range(1, 10):
     vp.a = np.nan_to_num(vp.a) # correct floating point values
     # ep.a = np.nan_to_num(ep.a) # correct floating point values
 
+    # normalization
+    min_val, max_val = vp.a.min(), vp.a.max()
+    vp.a = (vp.a - min_val) / (max_val - min_val)
+
     # calculate convex hull
     convex_hull = centrality.convex_hull(g)
 
@@ -167,6 +283,15 @@ for i in range(1, 10):
     C = quantification[:, 1]
     m_opt, c0_opt, b_opt = fitting.fit_piece_wise_linear(d, C)
 
+    # AIC
+    # AIC = 2 * k (= 2) - 2 * ln(L^~)
+    # with L^~ = sum(f(x_i)) where x_i describes a data point
+    # - f is *not normalized*
+    sum_log = 0.0
+    for x_i in x_spatial:
+        sum_log += math.log(m_opt* b_opt + c0_opt if x_i >= b_opt else m_opt * x_i + c0_opt)
+    aic_model = 6. - 2. * sum_log # three parameters: b_opt, m_opt, c0_opt
+
     # TODO
     # should this be part of the plotting function itself, it should not be necessary for me to do this
     d_curve = np.linspace(min(d), max(d), 500)
@@ -176,13 +301,20 @@ for i in range(1, 10):
         [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt]
     )
     # plot model containing modeled piece-wise linear function
-    plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models')
+    plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models', aic_model)
 
     # linear regression model
     m_reg, c_reg = fitting.fit_linear_regression(d, C)
+
+    # AIC
+    sum_log = 0.0
+    for x_i in x_spatial:
+        sum_log += math.log(m_reg * x_i + c_reg)
+    aic_regression = 4. - 2. * sum_log # two parameter: m_reg, c_reg
+
     x = np.linspace(min(d), max(d), 500)
     y = m_reg * x + c_reg
-    ax1.plot(x, y, color='k', linewidth=1, label="Simple Linear Regression")
+    ax1.plot(x, y, color='k', linewidth=1, label=f"Simple Linear Regression | AIC: {aic_regression}")
     ax1.legend()
 
-    fig.savefig(f"random_point_clouds/{i}_pagerank.svg", format='svg')
+    fig.savefig(f"uniform_random_point_clouds/{i}_pagerank.svg", format='svg')
diff --git a/src/plot.py b/src/plot.py
index a8e7aea..264101f 100644
--- a/src/plot.py
+++ b/src/plot.py
@@ -112,21 +112,21 @@ def quantification_data(G, measures, convex_hull):
     return np.array(quantification)
 
 
-def quantification_plot(ax, quantification, d_curve, C_curve, metric_name):
+def quantification_plot(ax, quantification, d_curve, C_curve, metric_name, aic_score):
     """
     Plot relationship data.
     @param data [Array-2d] see `data(pos, metric)`
     @param d_curve linear function of the left side of the intersection point
     @param C_curve constant function of the right side of the intersection point
     @param metric_name [String] Name of the metric to be used as a title for the plot
-    @param path [String] Path to store the generated plot as svg file
+    @param aic_score [Float] Calculated AIC value for the model
     """
     ax.set_title(metric_name)
     ax.set_xlabel('Distance to Bounding-Box')
     ax.set_ylabel('Centrality')
     ax.scatter(quantification[:, 0], quantification[:, 1], c=quantification[:, 1], cmap=plt.cm.plasma, s=0.2)
     if d_curve is not None and C_curve is not None:
-        ax.plot(d_curve, C_curve, color='g', linewidth=1, label='Piecewise Linear Model')
+        ax.plot(d_curve, C_curve, color='g', linewidth=1, label=f"Piecewise Linear Model | AIC: {aic_score}")
 
 
 class Quantification: