diff --git a/degree_distribution_pagerank.py b/degree_distribution_pagerank.py new file mode 100644 index 0000000..4a17b9d --- /dev/null +++ b/degree_distribution_pagerank.py @@ -0,0 +1,141 @@ + +import math + +import matplotlib.pyplot as plt +from matplotlib.collections import LineCollection +import matplotlib as mpl +import numpy as np +import squidpy as sq +import scipy +import spatialdata as sd +from spatialdata_io.experimental import to_legacy_anndata +from graph_tool.all import * + +from src import centrality +from src import plot +from src import fitting + + +def merfish(): + """ + Merfish dataset from `squidpy`. + """ + adata = sq.datasets.merfish() + adata = adata[adata.obs.Bregma == -9].copy() + return adata + + +def random_graph(n=5000, seed=None): + """ + Uniformly random point cloud generation. + `n` [int] Number of points to generate. Default 5000 seems like a good starting point in point density and corresponding runtime for the subsequent calculations. + @return [numpy.ndarray] Array of shape(n, 2) containing the coordinates for each point of the generated point cloud. + """ + if seed is None: + import secrets + seed = secrets.randbits(128) + rng = np.random.default_rng(seed=seed) + return rng.random((n, 2)), seed + + +def spatial_graph(adata): + """ + Generate the spatial graph using delaunay for the given `adata`. + `adata` will contain the calculated spatial graph contents in the keys + adata.obsm['spatial']` in case the `adata` is created from a dataset of *squidpy*. + @return [Graph] generated networkx graph from adata.obsp['spatial_distances'] + """ + g, pos = graph_tool.generation.triangulation(adata, type="delaunay") + g.vp["pos"] = pos + weight = g.new_edge_property("double") + for e in g.edges(): + weight[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2 + g.ep["weight"] = weight + return g, weight + + +def plot_graph_nodes(g, centralities, fig, ax, name): + pos = g.vp["pos"] + x = [] + y = [] + for v in g.vertices(): + ver = pos[v] + x.append(ver[0]) + y.append(ver[1]) + + sc = ax.scatter(x, y, s=1, c=centralities, cmap=plt.cm.plasma) + ax.set_title(name) + fig.colorbar(sc, ax=ax) + + +def plot_relationship_nodes(g, vp, convex_hull, fig, ax, name): + quantification = plot.quantification_data(g, vp, convex_hull) + + # optimize model's piece-wise linear function + d = quantification[:, 0] + C = quantification[:, 1] + m_opt, c0_opt, b_opt, aic_opt = fitting.fit_piece_wise_linear(d, C, M=100) + + # TODO + # should this be part of the plotting function itself, it should not be necessary for me to do this + d_curve = np.linspace(min(d), max(d), 500) + C_curve = np.piecewise( + d_curve, + [d_curve <= b_opt, d_curve > b_opt], + [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt] + ) + + ax.set_title(name) + ax.set_xlabel('Distance to Bounding-Box') + ax.set_ylabel('Centrality') + ax.scatter(quantification[:, 0], quantification[:, 1], c=quantification[:, 1], cmap=plt.cm.plasma, s=0.2) + ax.plot(d_curve, C_curve, color='g', linewidth=1, label=f"m = {m_opt} | b = {b_opt} | c = {c0_opt}") + ax.legend() + + +def plot_degree_distribution(g, fig, ax, name): + ax.set_title(name) + ax.set_ylabel('Degree') + ax.set_xlabel('# Occurances') + degrees = g.get_total_degrees(list(g.vertices())) + bins = degrees.max() - degrees.min() + counts, bins, patches = ax.hist(degrees, bins=bins, orientation='horizontal') + + # Label the percentages below the x-axis... + # bin_centers = 0.5 * np.diff(bins) + bins[:-1] + # for count, x in zip(counts, bin_centers): + # # Label the percentages + # percent = '%0.000f%%' % (100 * float(count) / counts.sum()) + # ax.annotate(percent, xy=(x, 0), xycoords=('data', 'axes fraction'), + # xytext=(0, -18), textcoords='offset points', va='top', ha='center') + + +points, seed = random_graph(n=2000, seed=231533135843957409942915332448253409428) +g, weight = spatial_graph(points) +# adata = merfish() +# g, weight = spatial_graph(adata.obsm['spatial']) +g = GraphView(g) + +# relationship with betweenness scoring for both node and edges +vp = pagerank(g, weight=weight) +vp.a = np.nan_to_num(vp.a) # correct floating point values + +# plot graph +fig = plt.figure(figsize=(16, 9), layout='constrained') +fig.suptitle(f"Artical (n = 2000 | seed {seed})", fontsize=16) +ax1, ax2 = fig.subplots(1, 2) + +# relationship with betweenness scoring for both node and edges +vp = pagerank(g, weight=weight) +vp.a = np.nan_to_num(vp.a) # correct floating point values + +# normalize centrality values +min_val, max_val = vp.a.min(), vp.a.max() +vp.a = (vp.a - min_val) / (max_val - min_val) + +# compare relationships +convex_hull = centrality.convex_hull(g) +plot_degree_distribution(g, fig, ax2, "Degree Distribution") +plot_relationship_nodes(g, vp, convex_hull, fig, ax1, "Pagerank Centrality with fitted model") + +fig.savefig(f"degree_distribution_vs_pagerank_artifical.pdf", format='pdf')