commit 35ad81484ce0c11bf361f8c78924c815597bca86 Author: Yves Biener Date: Tue Jan 6 18:26:38 2026 +0100 Initial commit based of previous repositories contents diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..78a5bf3 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*/__pycache__/** diff --git a/README.md b/README.md new file mode 100644 index 0000000..41f059a --- /dev/null +++ b/README.md @@ -0,0 +1,38 @@ +# Boundary-aware centrality for Spatial Graphs + +University project for boundary-aware node centralities for spatial graphs. For the project details and results see the [typst paper](./doc/paper.typ). For building the document and the usage of the python library used for this project see the corresponding sections below. + +## Code usage + +> [!important] +> The implementation uses [gurobi](https://www.gurobi.com/solutions/gurobi-optimizer/) for solving the linear problem for the function fitting which requires a *license*. Please refer to the gurobi license documentation for details. + +Install the requirements into a virtual environment: + +```sh +# create virtual environment +python -m venv venv +# activate virtual environment +source venv/bin/activate +# install required dependencies +pip install -r requirements.txt +``` + +The `src` directory contains all the python source files that are used for the creation of the diagrams and images found in the `doc/figures` directory. For the corresponding usages for both datasets used for this project please see [merfish.py](./merfish.py) and [mibitof.py](./mibitof.py). + +For running the generation of the diagrams and images you can run one of the two python scripts like so (inside of the virtual environment): + +```sh +python merfish.py +``` + + +## Document generation + +The document is written using [typst](https://typst.app) and can be compiled into a pdf file using: + +```sh +typst c doc/paper.typ +``` + +The generated pdf file will then be generated into the `doc` directory named `paper.pdf`. diff --git a/example.py b/example.py new file mode 100644 index 0000000..18bdaf3 --- /dev/null +++ b/example.py @@ -0,0 +1,188 @@ +import math + +import matplotlib.pyplot as plt +import numpy as np +# import squidpy as sq +from graph_tool.all import * + +from src import centrality +from src import plot +from src import fitting + + +def merfish(): + """ + Merfish dataset from `squidpy`. + """ + adata = sq.datasets.merfish() + adata = adata[adata.obs.Bregma == -9].copy() + return adata + + +def mibitof(): + """ + Mibitof dataset from `squidpy`. + """ + adata = sq.datasets.mibitof() + return adata + +def random_graph(n=5000, seed=None): + """ + Uniformly random point cloud generation. + `n` [int] Number of points to generate. Default 5000 seems like a good starting point in point density and corresponding runtime for the subsequent calculations. + @return [numpy.ndarray] Array of shape(n, 2) containing the coordinates for each point of the generated point cloud. + """ + if seed is None: + import secrets + seed = secrets.randbits(128) + rng = np.random.default_rng(seed=seed) + return rng.random((n, 2)), seed + + +def spatial_graph(adata): + """ + Generate the spatial graph using delaunay for the given `adata`. + `adata` will contain the calculated spatial graph contents in the keys + `adata.obps['spatial_distances']` and `adata.obsm['spatial']` afterwards too. + @return [Graph] generated networkx graph from adata['spatial_distances'] + """ + g, pos = graph_tool.generation.triangulation(adata, type="delaunay") + g.vp["pos"] = pos + weight = g.new_edge_property("double") + for e in g.edges(): + weight[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2 + return g, weight + +# generate spatial graph from a given dataset +# g, weight = spatial_graph(merfish().obsm['spatial']) +for i in range(1, 10): + points, seed = random_graph() + g, weight = spatial_graph(points) + g = GraphView(g) + + # calculate centrality values + vp = closeness(g, weight=weight) + vp.a = np.nan_to_num(vp.a) # correct floating point values + # ep.a = np.nan_to_num(ep.a) # correct floating point values + + # calculate convex hull + convex_hull = centrality.convex_hull(g) + + # plot graph with convex_hull + fig = plt.figure(figsize=(15, 5)) + ax0, ax1 = fig.subplots(1, 2) + plot.graph_plot(fig, ax0, g, vp, convex_hull, f"Random Graph (seed: {seed})\nCloseness") + + # generate model based on convex hull and associated centrality values + quantification = plot.quantification_data(g, vp, convex_hull) + + # optimize model's piece-wise linear function + d = quantification[:, 0] + C = quantification[:, 1] + m_opt, c0_opt, b_opt = fitting.fit_piece_wise_linear(d, C) + + # TODO + # should this be part of the plotting function itself, it should not be necessary for me to do this + d_curve = np.linspace(min(d), max(d), 500) + C_curve = np.piecewise( + d_curve, + [d_curve <= b_opt, d_curve > b_opt], + [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt] + ) + # plot model containing modeled piece-wise linear function + plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models') + + # linear regression model + m_reg, c_reg = fitting.fit_linear_regression(d, C) + x = np.linspace(min(d), max(d), 500) + y = m_reg * x + c_reg + ax1.plot(x, y, color='k', linewidth=1, label="Simple Linear Regression") + ax1.legend() + + fig.savefig(f"random_point_clouds/{i}_closeness.svg", format='svg') + + # --------------------------------------------------------------------------------------------- + + # calculate centrality values + vp, ep = betweenness(g, weight=weight) + vp.a = np.nan_to_num(vp.a) # correct floating point values + # ep.a = np.nan_to_num(ep.a) # correct floating point values + + # calculate convex hull + convex_hull = centrality.convex_hull(g) + + # plot graph with convex_hull + fig = plt.figure(figsize=(15, 5)) + ax0, ax1 = fig.subplots(1, 2) + plot.graph_plot(fig, ax0, g, vp, convex_hull, f"Random Graph (seed: {seed})\nBetweenness") + + # generate model based on convex hull and associated centrality values + quantification = plot.quantification_data(g, vp, convex_hull) + + # optimize model's piece-wise linear function + d = quantification[:, 0] + C = quantification[:, 1] + m_opt, c0_opt, b_opt = fitting.fit_piece_wise_linear(d, C) + + # TODO + # should this be part of the plotting function itself, it should not be necessary for me to do this + d_curve = np.linspace(min(d), max(d), 500) + C_curve = np.piecewise( + d_curve, + [d_curve <= b_opt, d_curve > b_opt], + [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt] + ) + # plot model containing modeled piece-wise linear function + plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models') + + # linear regression model + m_reg, c_reg = fitting.fit_linear_regression(d, C) + x = np.linspace(min(d), max(d), 500) + y = m_reg * x + c_reg + ax1.plot(x, y, color='k', linewidth=1, label="Simple Linear Regression") + ax1.legend() + + fig.savefig(f"random_point_clouds/{i}_betweenness.svg", format='svg') + + # --------------------------------------------------------------------------------------------- + + # calculate centrality values + vp = pagerank(g, weight=weight) + vp.a = np.nan_to_num(vp.a) # correct floating point values + # ep.a = np.nan_to_num(ep.a) # correct floating point values + + # calculate convex hull + convex_hull = centrality.convex_hull(g) + + # plot graph with convex_hull + fig = plt.figure(figsize=(15, 5)) + ax0, ax1 = fig.subplots(1, 2) + plot.graph_plot(fig, ax0, g, vp, convex_hull, f"Random Graph (seed: {seed})\nPageRank") + + # generate model based on convex hull and associated centrality values + quantification = plot.quantification_data(g, vp, convex_hull) + + # optimize model's piece-wise linear function + d = quantification[:, 0] + C = quantification[:, 1] + m_opt, c0_opt, b_opt = fitting.fit_piece_wise_linear(d, C) + + # TODO + # should this be part of the plotting function itself, it should not be necessary for me to do this + d_curve = np.linspace(min(d), max(d), 500) + C_curve = np.piecewise( + d_curve, + [d_curve <= b_opt, d_curve > b_opt], + [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt] + ) + # plot model containing modeled piece-wise linear function + plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models') + + # linear regression model + m_reg, c_reg = fitting.fit_linear_regression(d, C) + x = np.linspace(min(d), max(d), 500) + y = m_reg * x + c_reg + ax1.plot(x, y, color='k', linewidth=1, label="Simple Linear Regression") + ax1.legend() + + fig.savefig(f"random_point_clouds/{i}_pagerank.svg", format='svg') diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..72b2ed4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +gurobipy +graph-tools +numpy +matplotlib diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/centrality.py b/src/centrality.py new file mode 100644 index 0000000..510f63b --- /dev/null +++ b/src/centrality.py @@ -0,0 +1,80 @@ +import math + +import numpy as np + +from src import plot + +def convex_hull(G): + # gift wrapping algorithm + pos = G.vp["pos"] + x = [] + y = [] + for v in G.vertices(): + # print(pos[v]) + ver = pos[v] + x.append(ver[0]) + y.append(ver[1]) + + min_x = min(x) + min_y = min(y) + max_x = max(x) + max_y = max(y) + pos = np.stack((np.array(x), np.array(y)), axis=-1) + + convex_hull = [] + hull_point = np.array([min_x,pos[np.argmin(x)][1]]) + convex_hull.append(hull_point) + endpoint = None + i = 0 + while not plot.Vector.eql(endpoint, convex_hull[0]): + endpoint = pos[0] + for point in pos: + if plot.Vector.eql(endpoint, hull_point) or plot.Vector.cross_z(plot.Vector.vec(convex_hull[i], endpoint), plot.Vector.vec(convex_hull[i], point)) < 0: + endpoint = point + i += 1 + hull_point = endpoint + convex_hull.append(hull_point) + return convex_hull + +def correct(G, centrality, m_opt, c0_opt, b_opt): + """ + Correct a given centrality through the calculated model. + @param pos [Array-2d] Positions of points (i.e. `g.vp["pos"].get_2d_array()`) + @param centrality [VertexPropertyMap] Result of a centrality calculation of `graph-tool` on graph G + @param m_opt [Float] Model m value (slope of linear function) + @param c0_opt [Float] Model c0 value (offset of linear function) + @param b_opt [Float] Model b value (intersection point) + @return [VertexPropertyMap] Corrected centrality values based on @param centrality + """ + corrected_metric = {} + pos = G.vp["pos"] + x = [] + y = [] + for v in G.vertices(): + # print(pos[v]) + ver = pos[v] + x.append(ver[0]) + y.append(ver[1]) + + keys = iter(centrality.a) + hull = convex_hull(x, y) + + points = np.stack((np.array(x), np.array(y)), axis=-1) + for point in pos: + min_distance = math.inf + key = next(keys) + for edge in hull: + vector = plot.Vector.vec(point, edge) + distance = plot.Vector.vec_len(vector) + if distance < min_distance: + min_distance = distance + if min_distance <= b_opt: + # requires correction + value = centrality[key] + delta = (m_opt * b_opt) - (m_opt * min_distance) + corrected_metric[key] = value + delta + else: + # unaffected by boundary + corrected_metric[key] = centrality[key] + + return corrected_metric diff --git a/src/dataset.py b/src/dataset.py new file mode 100644 index 0000000..5ebd987 --- /dev/null +++ b/src/dataset.py @@ -0,0 +1,30 @@ +import networkx as nx +import squidpy as sq + +# NOTE these shall be provided from the outside and not be inside of the library! +def merfish(): + """ + Merfish dataset from `squidpy`. + """ + adata = sq.datasets.merfish() + adata = adata[adata.obs.Bregma == -9].copy() + return adata + + +def mibitof(): + """ + Mibitof dataset from `squidpy`. + """ + adata = sq.datasets.mibitof() + return adata + + +def spatial_graph(adata): + """ + Generate the spatial graph using delaunay for the given `adata`. + `adata` will contain the calculated spatial graph contents in the keys + `adata.obps['spatial_distances']` and `adata.obsm['spatial']` afterwards too. + @return [Graph] generated networkx graph from adata['spatial_distances'] + """ + sq.gr.spatial_neighbors(adata, delaunay=True, coord_type="generic") + return nx.from_scipy_sparse_array(adata.obsp['spatial_distances']) diff --git a/src/fitting.py b/src/fitting.py new file mode 100644 index 0000000..9010d00 --- /dev/null +++ b/src/fitting.py @@ -0,0 +1,101 @@ +import gurobipy as gp +from gurobipy import GRB +import numpy as np +import matplotlib.pyplot as plt + +# NOTE: +# Thank you Anna for providing her sources: https://github.com/uderhardtlab/truncated_graphs/blob/main/src/fit.py + + +def fit_piece_wise_linear(d, C, M=1000): + """ + Fits a piecewise linear function to the given data using optimization. + + Parameters: + d (array-like): Distance values. + C (array-like): Corresponding centrality values. + M (int, optional): Large constant for big-M constraints. Default is 1000. + + Returns: + tuple: Optimized slope (m), intercept (c0), and breakpoint (b). + """ + n = len(d) + + model = gp.Model() + m = model.addVar(vtype=GRB.CONTINUOUS, name="m") # Slope before breakpoint + + c0 = model.addVar(vtype=GRB.CONTINUOUS, name="c0") # y-intercept + b = model.addVar(vtype=GRB.CONTINUOUS, lb=min(d), ub=max(d), name="b") # Breakpoint + + z = model.addVars(n, vtype=GRB.BINARY, name="z") + epsilon = model.addVars(n, vtype=GRB.CONTINUOUS, name="epsilon") + + model.setObjective(gp.quicksum(epsilon[i] * epsilon[i] for i in range(n)), GRB.MINIMIZE) + + # Setting solver parameters for precision + model.setParam('OptimalityTol', 1e-4) + model.setParam('MIPGap', 0.01) + + for i in range(n): + # Constraints enforcing piecewise linear fit + model.addConstr(epsilon[i] >= (m * d[i] + c0 - C[i]) - (1 - z[i]) * M) + model.addConstr(epsilon[i] >= -(m * d[i] + c0 - C[i]) - (1 - z[i]) * M) + + model.addConstr(epsilon[i] >= (m * b + c0 - C[i]) - z[i] * M) + model.addConstr(epsilon[i] >= -(m * b + c0 - C[i]) - z[i] * M) + + # Binary switch for piecewise behavior + model.addConstr(z[i] * M >= b - d[i]) + model.addConstr((1 - z[i]) * M >= d[i] - b) + + model.optimize() + return m.X, c0.X, b.X + + +def fit_linear_regression(d, C): + """ + Fits a simple linear regression model to the given data using optimization. + + Parameters: + d (array-like): Distance values. + C (array-like): Corresponding centrality values. + + Returns: + tuple: Optimized slope (beta) and intercept (alpha). + """ + n = len(d) + model = gp.Model() + + alpha = model.addVar(vtype=GRB.CONTINUOUS, name="alpha") + beta = model.addVar(vtype=GRB.CONTINUOUS, name="beta") + + model.setObjective(gp.quicksum((C[i] - alpha - beta * d[i])**2 for i in range(n)), GRB.MINIMIZE) + model.optimize() + return beta.X, alpha.X + + +def plot_piece_wise_linear(d, C, m_opt, c0_opt, b_opt, measure, n, t, path): + """ + Plots the original data and optimized piecewise linear fit. + """ + d_curve = np.linspace(min(d), max(d), 500) + C_curve = np.piecewise( + d_curve, + [d_curve <= b_opt, d_curve > b_opt], + [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt] + ) + + plt.scatter(d, C, color="blue", label="Original", alpha=0.5) + plt.plot(d_curve, C_curve, color="red", label="Optimized", linewidth=2) + plt.xlabel("Distance to border") + plt.ylabel(f"{measure.capitalize()} centrality") + plt.title(f"Optimized piece-wise linear fit for {t} graph and {measure} centrality") + plt.legend() + plt.savefig(path, format='svg') + + fig, ax = plt.subplots() + ax.set_title(metric_name) + ax.set_xlabel('Distance to Bounding-Box') + ax.set_ylabel('Centrality') + ax.scatter(d, C, color='b', s=0.2) + fig.savefig(path, format='svg') diff --git a/src/plot.py b/src/plot.py new file mode 100644 index 0000000..a8e7aea --- /dev/null +++ b/src/plot.py @@ -0,0 +1,160 @@ +import math + +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.colors as mcolors + +from matplotlib.collections import LineCollection +from src import centrality + +class Vector: + """ + Small Vector class used for calculations of distances + """ + def cross_z(a, b): + return a[0] * b[1] - a[1] * b[0] + + def vec(start, end): + """ + Create a new vector from `start` to `end` + @param start [Array-2d] + @param end [Array-2d] + @return [Array-2d] + """ + return [end[0]-start[0],end[1]-start[1]] + + def eql(point, other): + """ + Check if `point` is equivalent to `other`. + """ + if point is None or other is None: + return False + return point[0] == other[0] and point[1] == other[1] + + def vec_len(vector): + """ + Calculate the length of a `vector` + """ + return math.sqrt(vector[0]**2 + vector[1]**2) + + +def graph_plot(fig, ax, G, measures, convex_hull, name, show_edges=False): + """ + Plot a given graph. + @param G [Graph] networkx Graph + @param pos [Array-2d] position values of points + @param measures [Dict] Result of a centrality calculation of networkx on graph G that should be visualized + @param name [String] Name of the measurement used, which will be integrated into the generated plot + @param path [String] Path to store the generated plot as svg file + """ + pos = G.vp["pos"] + x = [] + y = [] + for v in G.vertices(): + # print(pos[v]) + ver = pos[v] + x.append(ver[0]) + y.append(ver[1]) + + c = measures.get_array() + # convex hull -> Bounding-Box + ch = LineCollection([convex_hull], colors=['g'], linewidths=1) + ax.add_collection(ch) + if show_edges: + for e in G.edges(): + ex = [pos[e.source()][0], pos[e.target()][0]] + ey = [pos[e.source()][1], pos[e.target()][1]] + ax.add_collection(LineCollection([np.column_stack([ex, ey])], colors=['k'], linewidths=0.1)) + + sc = ax.scatter(x, y, s=1, cmap=plt.cm.plasma, c=c) # map closeness values as color mapping on the verticies + ax.set_title(name) + fig.colorbar(sc, ax=ax) + + +def normalize_dict(d): + max = np.max(list(d.values())) + return {k: (v / max) for k, v in d.items()} + + +def quantification_data(G, measures, convex_hull): + """ + Create distance to metric relationship. + @param pos [Array-2d] Positions of points + @param metric [Dict] Result of a centrality calculation of networkx on graph G + @return [Array-2d] relationship ordered (acending) by distance + """ + quantification = [] + pos = G.vp["pos"] + x = [] + y = [] + for v in G.vertices(): + # print(pos[v]) + ver = pos[v] + x.append(ver[0]) + y.append(ver[1]) + + measures = measures.a + keys = iter(measures) + + points = np.stack((np.array(x), np.array(y)), axis=-1) + for point in points: + min_distance = math.inf + key = next(keys) + for edge in convex_hull: + vector = Vector.vec(point, edge) + distance = Vector.vec_len(vector) + if distance < min_distance: + min_distance = distance + quantification.append([min_distance, key]) + + # sort by distance + quantification.sort(key=lambda entry: entry[0]) + return np.array(quantification) + + +def quantification_plot(ax, quantification, d_curve, C_curve, metric_name): + """ + Plot relationship data. + @param data [Array-2d] see `data(pos, metric)` + @param d_curve linear function of the left side of the intersection point + @param C_curve constant function of the right side of the intersection point + @param metric_name [String] Name of the metric to be used as a title for the plot + @param path [String] Path to store the generated plot as svg file + """ + ax.set_title(metric_name) + ax.set_xlabel('Distance to Bounding-Box') + ax.set_ylabel('Centrality') + ax.scatter(quantification[:, 0], quantification[:, 1], c=quantification[:, 1], cmap=plt.cm.plasma, s=0.2) + if d_curve is not None and C_curve is not None: + ax.plot(d_curve, C_curve, color='g', linewidth=1, label='Piecewise Linear Model') + + +class Quantification: + def opt_data(pos, metric, b_opt): + """ + Optimal data relationship. Describe Points which are effected by the boundary. + @param pos [Array-2d] Positions of points + @param metric [Dict] Result of a centrality calculation of networkx on graph G + @param b_opt [int] Cross point distance. + @return [Dict] `metric` like dict (same keys) where True determines a node that is uneffected by the boundary, otherwise False. + """ + keys = iter(metric.keys()) + boundary_effected = {} + hull = convex_hull(pos) + + for point in pos: + min_distance = math.inf + key = next(keys) + for edge in hull: + vector = Vector.vec(point, edge) + distance = Vector.vec_len(vector) + if distance < min_distance: + min_distance = distance + if min_distance <= b_opt: + boundary_effected[key] = True + else: + boundary_effected[key] = False + + # sort by distance + return boundary_effected +