From 3414b6c1456fe93425566b211bfc085e2a4d6a3c Mon Sep 17 00:00:00 2001 From: Yves Biener Date: Tue, 31 Mar 2026 22:23:38 +0200 Subject: [PATCH] add: model approximation visualization Approximation uses simple linear regression to determine whether the _location information_ is significant enough (through the calculated steepness `beta`) which can be used to determine in a faster and more efficient way whether the calculation of the model is necessary and helpful in the first place. --- model_approximation.py | 105 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 model_approximation.py diff --git a/model_approximation.py b/model_approximation.py new file mode 100644 index 0000000..d8e2d43 --- /dev/null +++ b/model_approximation.py @@ -0,0 +1,105 @@ +import math + +import matplotlib.pyplot as plt +import numpy as np +from graph_tool.all import * + +from src import centrality +from src import plot +from src import fitting + + +def leverage(g, weight): + # VertexPropertyMap + vp = g.new_vertex_property("double") + for v in g.vertices(): + li = 0.0 + neighbours = g.get_all_neighbours(v) + ki = len(neighbours) + # sum + for nv in neighbours: + other_neighbours = g.get_all_neighbours(nv) + kj = len(other_neighbours) + li += (ki - kj) / (ki + kj) + li /= ki + vp[v] = li + return vp + + +def random_graph(n=5000, seed=None): + """ + Uniformly random point cloud generation. + `n` [int] Number of points to generate. Default 5000 seems like a good starting point in point density and corresponding runtime for the subsequent calculations. + @return [numpy.ndarray] Array of shape(n, 2) containing the coordinates for each point of the generated point cloud. + """ + if seed is None: + import secrets + seed = secrets.randbits(128) + rng = np.random.default_rng(seed=seed) + return rng.random((n, 2)), seed + + +def spatial_graph(adata): + g, pos = graph_tool.generation.triangulation(adata, type="delaunay") + g.vp["pos"] = pos + weight = g.new_edge_property("double") + for e in g.edges(): + weight[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2 + return g, weight + + +points, seed = random_graph() +g, weight = spatial_graph(points) +g = GraphView(g) + +# calculate centrality values +vp, ep = betweenness(g, weight=weight) +vp.a = np.nan_to_num(vp.a) # correct floating point values +ep.a = np.nan_to_num(ep.a) # correct floating point values + +# normalization +min_val, max_val = vp.a.min(), vp.a.max() +vp.a = (vp.a - min_val) / (max_val - min_val) + +# calculate convex hull +convex_hull = centrality.convex_hull(g) + +# plot graph with convex_hull +fig = plt.figure(figsize=(15, 5)) +ax0, ax1 = fig.subplots(1, 2) +plot.graph_plot(fig, ax0, g, vp, convex_hull, f"Random Graph (seed: {seed})") + +# generate model based on convex hull and associated centrality values +quantification = plot.quantification_data(g, vp, convex_hull) + +# optimize model's piece-wise linear function +d = quantification[:, 0] +C = quantification[:, 1] +m_opt, c0_opt, b_opt, aic_opt = fitting.fit_piece_wise_linear(d, C) + +# TODO +# should this be part of the plotting function itself, it should not be necessary for me to do this +d_curve = np.linspace(min(d), max(d), 500) +C_curve = np.piecewise( + d_curve, + [d_curve <= b_opt, d_curve > b_opt], + [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt] +) +# plot model containing modeled piece-wise linear function +plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Betweenness', aic_opt) + +# linear regression model +m_reg, c0_reg, aic_reg = fitting.fit_linear_regression(d, C) + +# TODO +# should this be part of the plotting function itself, it should not be necessary for me to do this +d_curve = np.linspace(min(d), max(d), 500) +C_curve = np.piecewise( + d_curve, + [d_curve >= 0], + [lambda x: m_reg * x + c0_reg] +) +ax1.plot(d_curve, C_curve, color='k', linewidth=1, label=f"Simple Linear Regression | AIC: {aic_reg}") +ax1.legend() + +fig.savefig(f"model_approximation_betweenness_5000.svg", format='svg')