WIP

2026-04-26 16:59:15 +02:00
parent c0d0e25ca2
commit 111ce77513
1 changed files with 141 additions and 0 deletions
@@ -0,0 +1,141 @@
+
+import math
+
+import matplotlib.pyplot as plt
+from matplotlib.collections import LineCollection
+import matplotlib as mpl
+import numpy as np
+import squidpy as sq
+import scipy
+import spatialdata as sd
+from spatialdata_io.experimental import to_legacy_anndata
+from graph_tool.all import *
+
+from src import centrality
+from src import plot
+from src import fitting
+
+
+def merfish():
+    """
+    Merfish dataset from `squidpy`.
+    """
+    adata = sq.datasets.merfish()
+    adata = adata[adata.obs.Bregma == -9].copy()
+    return adata
+
+
+def random_graph(n=5000, seed=None):
+    """
+    Uniformly random point cloud generation.
+    `n` [int] Number of points to generate. Default 5000 seems like a good starting point in point density and corresponding runtime for the subsequent calculations.
+    @return [numpy.ndarray] Array of shape(n, 2) containing the coordinates for each point of the generated point cloud.
+    """
+    if seed is None:
+        import secrets
+        seed = secrets.randbits(128)
+    rng = np.random.default_rng(seed=seed)
+    return rng.random((n, 2)), seed
+
+
+def spatial_graph(adata):
+    """
+    Generate the spatial graph using delaunay for the given `adata`.
+    `adata` will contain the calculated spatial graph contents in the keys
+    adata.obsm['spatial']` in case the `adata` is created from a dataset of *squidpy*.
+    @return [Graph] generated networkx graph from adata.obsp['spatial_distances']
+    """
+    g, pos = graph_tool.generation.triangulation(adata, type="delaunay")
+    g.vp["pos"] = pos
+    weight = g.new_edge_property("double")
+    for e in g.edges():
+        weight[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2
+    g.ep["weight"] = weight
+    return g, weight
+
+
+def plot_graph_nodes(g, centralities, fig, ax, name):
+    pos = g.vp["pos"]
+    x = []
+    y = []
+    for v in g.vertices():
+        ver = pos[v]
+        x.append(ver[0])
+        y.append(ver[1])
+
+    sc = ax.scatter(x, y, s=1, c=centralities, cmap=plt.cm.plasma)
+    ax.set_title(name)
+    fig.colorbar(sc, ax=ax)
+
+
+def plot_relationship_nodes(g, vp, convex_hull, fig, ax, name):
+    quantification = plot.quantification_data(g, vp, convex_hull)
+
+    # optimize model's piece-wise linear function
+    d = quantification[:, 0]
+    C = quantification[:, 1]
+    m_opt, c0_opt, b_opt, aic_opt = fitting.fit_piece_wise_linear(d, C, M=100)
+
+    # TODO
+    # should this be part of the plotting function itself, it should not be necessary for me to do this
+    d_curve = np.linspace(min(d), max(d), 500)
+    C_curve = np.piecewise(
+        d_curve,
+        [d_curve <= b_opt, d_curve > b_opt],
+        [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt]
+    )
+
+    ax.set_title(name)
+    ax.set_xlabel('Distance to Bounding-Box')
+    ax.set_ylabel('Centrality')
+    ax.scatter(quantification[:, 0], quantification[:, 1], c=quantification[:, 1], cmap=plt.cm.plasma, s=0.2)
+    ax.plot(d_curve, C_curve, color='g', linewidth=1, label=f"m = {m_opt} | b = {b_opt} | c = {c0_opt}")
+    ax.legend()
+
+
+def plot_degree_distribution(g, fig, ax, name):
+    ax.set_title(name)
+    ax.set_ylabel('Degree')
+    ax.set_xlabel('# Occurances')
+    degrees = g.get_total_degrees(list(g.vertices()))
+    bins = degrees.max() - degrees.min()
+    counts, bins, patches = ax.hist(degrees, bins=bins, orientation='horizontal')
+
+    # Label the percentages below the x-axis...
+    # bin_centers = 0.5 * np.diff(bins) + bins[:-1]
+    # for count, x in zip(counts, bin_centers):
+    #     # Label the percentages
+    #     percent = '%0.000f%%' % (100 * float(count) / counts.sum())
+    #     ax.annotate(percent, xy=(x, 0), xycoords=('data', 'axes fraction'),
+    #         xytext=(0, -18), textcoords='offset points', va='top', ha='center')
+
+
+points, seed = random_graph(n=2000, seed=231533135843957409942915332448253409428)
+g, weight = spatial_graph(points)
+# adata = merfish()
+# g, weight = spatial_graph(adata.obsm['spatial'])
+g = GraphView(g)
+
+# relationship with betweenness scoring for both node and edges
+vp = pagerank(g, weight=weight)
+vp.a = np.nan_to_num(vp.a) # correct floating point values
+
+# plot graph
+fig = plt.figure(figsize=(16, 9), layout='constrained')
+fig.suptitle(f"Artical (n = 2000 | seed {seed})", fontsize=16)
+ax1, ax2 = fig.subplots(1, 2)
+
+# relationship with betweenness scoring for both node and edges
+vp = pagerank(g, weight=weight)
+vp.a = np.nan_to_num(vp.a) # correct floating point values
+
+# normalize centrality values
+min_val, max_val = vp.a.min(), vp.a.max()
+vp.a = (vp.a - min_val) / (max_val - min_val)
+
+# compare relationships
+convex_hull = centrality.convex_hull(g)
+plot_degree_distribution(g, fig, ax2, "Degree Distribution")
+plot_relationship_nodes(g, vp, convex_hull, fig, ax1, "Pagerank Centrality with fitted model")
+
+fig.savefig(f"degree_distribution_vs_pagerank_artifical.pdf", format='pdf')