From 44a93dc160554b814fe6e5481daac3ee4df5c0a4 Mon Sep 17 00:00:00 2001 From: Yves Biener Date: Fri, 9 Jan 2026 15:14:22 +0100 Subject: [PATCH] add: AIC score for each model; add score into lable of corresponding function in plots --- example.py | 162 +++++++++++++++++++++++++++++++++++++++++++++++----- src/plot.py | 6 +- 2 files changed, 150 insertions(+), 18 deletions(-) diff --git a/example.py b/example.py index 18bdaf3..1a632c3 100644 --- a/example.py +++ b/example.py @@ -2,7 +2,7 @@ import math import matplotlib.pyplot as plt import numpy as np -# import squidpy as sq +import squidpy as sq from graph_tool.all import * from src import centrality @@ -43,8 +43,8 @@ def spatial_graph(adata): """ Generate the spatial graph using delaunay for the given `adata`. `adata` will contain the calculated spatial graph contents in the keys - `adata.obps['spatial_distances']` and `adata.obsm['spatial']` afterwards too. - @return [Graph] generated networkx graph from adata['spatial_distances'] + adata.obsm['spatial']` in case the `adata` is created from a dataset of *squidpy*. + @return [Graph] generated networkx graph from adata.obsp['spatial_distances'] """ g, pos = graph_tool.generation.triangulation(adata, type="delaunay") g.vp["pos"] = pos @@ -53,18 +53,94 @@ def spatial_graph(adata): weight[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2 return g, weight -# generate spatial graph from a given dataset -# g, weight = spatial_graph(merfish().obsm['spatial']) -for i in range(1, 10): +def merfish_example(): + # generate spatial graph from a given dataset + g, weight = spatial_graph(merfish().obsm['spatial']) + g = GraphView(g) + + x_spatial = [] + for v in g.vertices(): + x_spatial.append(g.vp["pos"][v][0]) + + # calculate centrality values + vp = closeness(g, weight=weight) + vp.a = np.nan_to_num(vp.a) # correct floating point values + + # normalization + min_val, max_val = vp.a.min(), vp.a.max() + vp.a = (vp.a - min_val) / (max_val - min_val) + + # calculate convex hull + convex_hull = centrality.convex_hull(g) + + # plot graph with convex_hull + fig = plt.figure(figsize=(15, 5)) + ax0, ax1 = fig.subplots(1, 2) + plot.graph_plot(fig, ax0, g, vp, convex_hull, f"Merfish\nCloseness") + + # generate model based on convex hull and associated centrality values + quantification = plot.quantification_data(g, vp, convex_hull) + + # optimize model's piece-wise linear function + d = quantification[:, 0] + C = quantification[:, 1] + m_opt, c0_opt, b_opt = fitting.fit_piece_wise_linear(d, C) + + # AIC + # AIC = 2 * k (= 2) - 2 * ln(L^~) + # with L^~ = sum(f(x_i)) where x_i describes a data point + # - f is *not normalized* + sum_log = 0.0 + for x_i in x_spatial: + sum_log += math.log(m_opt* b_opt + c0_opt if x_i >= b_opt else m_opt * x_i + c0_opt) + aic_model = 6. - 2. * sum_log # three parameters: b_opt, m_opt, c0_opt + + # TODO + # should this be part of the plotting function itself, it should not be necessary for me to do this + d_curve = np.linspace(min(d), max(d), 500) + C_curve = np.piecewise( + d_curve, + [d_curve <= b_opt, d_curve > b_opt], + [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt] + ) + # plot model containing modeled piece-wise linear function + plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models', aic_model) + + # linear regression model + m_reg, c_reg = fitting.fit_linear_regression(d, C) + + # AIC + sum_log = 0.0 + for x_i in x_spatial: + sum_log += math.log(m_reg * x_i + c_reg) + aic_regression = 4. - 2. * sum_log # two parameter: m_reg, c_reg + + x = np.linspace(min(d), max(d), 500) + y = m_reg * x + c_reg + ax1.plot(x, y, color='k', linewidth=1, label=f"Simple Linear Regression | AIC: {aic_regression}") + ax1.legend() + + fig.savefig(f"Merfish_closeness.svg", format='svg') + + +for i in range(1, 6): points, seed = random_graph() g, weight = spatial_graph(points) g = GraphView(g) + x_spatial = [] + for v in g.vertices(): + x_spatial.append(g.vp["pos"][v][0]) + # calculate centrality values vp = closeness(g, weight=weight) vp.a = np.nan_to_num(vp.a) # correct floating point values # ep.a = np.nan_to_num(ep.a) # correct floating point values + # normalization + min_val, max_val = vp.a.min(), vp.a.max() + vp.a = (vp.a - min_val) / (max_val - min_val) + # calculate convex hull convex_hull = centrality.convex_hull(g) @@ -81,6 +157,15 @@ for i in range(1, 10): C = quantification[:, 1] m_opt, c0_opt, b_opt = fitting.fit_piece_wise_linear(d, C) + # AIC + # AIC = 2 * k (= 2) - 2 * ln(L^~) + # with L^~ = sum(f(x_i)) where x_i describes a data point + # - f is *not normalized* + sum_log = 0.0 + for x_i in x_spatial: + sum_log += math.log(m_opt* b_opt + c0_opt if x_i >= b_opt else m_opt * x_i + c0_opt) + aic_model = 6. - 2. * sum_log # three parameters: b_opt, m_opt, c0_opt + # TODO # should this be part of the plotting function itself, it should not be necessary for me to do this d_curve = np.linspace(min(d), max(d), 500) @@ -90,16 +175,23 @@ for i in range(1, 10): [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt] ) # plot model containing modeled piece-wise linear function - plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models') + plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models', aic_model) # linear regression model m_reg, c_reg = fitting.fit_linear_regression(d, C) + + # AIC + sum_log = 0.0 + for x_i in x_spatial: + sum_log += math.log(m_reg * x_i + c_reg) + aic_regression = 4. - 2. * sum_log # two parameter: m_reg, c_reg + x = np.linspace(min(d), max(d), 500) y = m_reg * x + c_reg - ax1.plot(x, y, color='k', linewidth=1, label="Simple Linear Regression") + ax1.plot(x, y, color='k', linewidth=1, label=f"Simple Linear Regression | AIC: {aic_regression}") ax1.legend() - fig.savefig(f"random_point_clouds/{i}_closeness.svg", format='svg') + fig.savefig(f"uniform_random_point_clouds/{i}_closeness.svg", format='svg') # --------------------------------------------------------------------------------------------- @@ -108,6 +200,10 @@ for i in range(1, 10): vp.a = np.nan_to_num(vp.a) # correct floating point values # ep.a = np.nan_to_num(ep.a) # correct floating point values + # normalization + min_val, max_val = vp.a.min(), vp.a.max() + vp.a = (vp.a - min_val) / (max_val - min_val) + # calculate convex hull convex_hull = centrality.convex_hull(g) @@ -124,6 +220,15 @@ for i in range(1, 10): C = quantification[:, 1] m_opt, c0_opt, b_opt = fitting.fit_piece_wise_linear(d, C) + # AIC + # AIC = 2 * k (= 2) - 2 * ln(L^~) + # with L^~ = sum(f(x_i)) where x_i describes a data point + # - f is *not normalized* + sum_log = 0.0 + for x_i in x_spatial: + sum_log += math.log(m_opt* b_opt + c0_opt if x_i >= b_opt else m_opt * x_i + c0_opt) + aic_model = 6. - 2. * sum_log # three parameters: b_opt, m_opt, c0_opt + # TODO # should this be part of the plotting function itself, it should not be necessary for me to do this d_curve = np.linspace(min(d), max(d), 500) @@ -133,16 +238,23 @@ for i in range(1, 10): [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt] ) # plot model containing modeled piece-wise linear function - plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models') + plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models', aic_model) # linear regression model m_reg, c_reg = fitting.fit_linear_regression(d, C) + + # AIC + sum_log = 0.0 + for x_i in x_spatial: + sum_log += math.log(m_reg * x_i + c_reg) + aic_regression = 4. - 2. * sum_log # two parameter: m_reg, c_reg + x = np.linspace(min(d), max(d), 500) y = m_reg * x + c_reg - ax1.plot(x, y, color='k', linewidth=1, label="Simple Linear Regression") + ax1.plot(x, y, color='k', linewidth=1, label=f"Simple Linear Regression | AIC: {aic_regression}") ax1.legend() - fig.savefig(f"random_point_clouds/{i}_betweenness.svg", format='svg') + fig.savefig(f"uniform_random_point_clouds/{i}_betweenness.svg", format='svg') # --------------------------------------------------------------------------------------------- @@ -151,6 +263,10 @@ for i in range(1, 10): vp.a = np.nan_to_num(vp.a) # correct floating point values # ep.a = np.nan_to_num(ep.a) # correct floating point values + # normalization + min_val, max_val = vp.a.min(), vp.a.max() + vp.a = (vp.a - min_val) / (max_val - min_val) + # calculate convex hull convex_hull = centrality.convex_hull(g) @@ -167,6 +283,15 @@ for i in range(1, 10): C = quantification[:, 1] m_opt, c0_opt, b_opt = fitting.fit_piece_wise_linear(d, C) + # AIC + # AIC = 2 * k (= 2) - 2 * ln(L^~) + # with L^~ = sum(f(x_i)) where x_i describes a data point + # - f is *not normalized* + sum_log = 0.0 + for x_i in x_spatial: + sum_log += math.log(m_opt* b_opt + c0_opt if x_i >= b_opt else m_opt * x_i + c0_opt) + aic_model = 6. - 2. * sum_log # three parameters: b_opt, m_opt, c0_opt + # TODO # should this be part of the plotting function itself, it should not be necessary for me to do this d_curve = np.linspace(min(d), max(d), 500) @@ -176,13 +301,20 @@ for i in range(1, 10): [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt] ) # plot model containing modeled piece-wise linear function - plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models') + plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models', aic_model) # linear regression model m_reg, c_reg = fitting.fit_linear_regression(d, C) + + # AIC + sum_log = 0.0 + for x_i in x_spatial: + sum_log += math.log(m_reg * x_i + c_reg) + aic_regression = 4. - 2. * sum_log # two parameter: m_reg, c_reg + x = np.linspace(min(d), max(d), 500) y = m_reg * x + c_reg - ax1.plot(x, y, color='k', linewidth=1, label="Simple Linear Regression") + ax1.plot(x, y, color='k', linewidth=1, label=f"Simple Linear Regression | AIC: {aic_regression}") ax1.legend() - fig.savefig(f"random_point_clouds/{i}_pagerank.svg", format='svg') + fig.savefig(f"uniform_random_point_clouds/{i}_pagerank.svg", format='svg') diff --git a/src/plot.py b/src/plot.py index a8e7aea..264101f 100644 --- a/src/plot.py +++ b/src/plot.py @@ -112,21 +112,21 @@ def quantification_data(G, measures, convex_hull): return np.array(quantification) -def quantification_plot(ax, quantification, d_curve, C_curve, metric_name): +def quantification_plot(ax, quantification, d_curve, C_curve, metric_name, aic_score): """ Plot relationship data. @param data [Array-2d] see `data(pos, metric)` @param d_curve linear function of the left side of the intersection point @param C_curve constant function of the right side of the intersection point @param metric_name [String] Name of the metric to be used as a title for the plot - @param path [String] Path to store the generated plot as svg file + @param aic_score [Float] Calculated AIC value for the model """ ax.set_title(metric_name) ax.set_xlabel('Distance to Bounding-Box') ax.set_ylabel('Centrality') ax.scatter(quantification[:, 0], quantification[:, 1], c=quantification[:, 1], cmap=plt.cm.plasma, s=0.2) if d_curve is not None and C_curve is not None: - ax.plot(d_curve, C_curve, color='g', linewidth=1, label='Piecewise Linear Model') + ax.plot(d_curve, C_curve, color='g', linewidth=1, label=f"Piecewise Linear Model | AIC: {aic_score}") class Quantification: