boundary-aware-centrality/degree_distribution_pagerank.py


import math

import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
import matplotlib as mpl
import numpy as np
import squidpy as sq
import scipy
import spatialdata as sd
from spatialdata_io.experimental import to_legacy_anndata
from graph_tool.all import *

from src import centrality
from src import plot
from src import fitting


def merfish():
    """
    Merfish dataset from `squidpy`.
    """
    adata = sq.datasets.merfish()
    adata = adata[adata.obs.Bregma == -9].copy()
    return adata


def random_graph(n=5000, seed=None):
    """
    Uniformly random point cloud generation.
    `n` [int] Number of points to generate. Default 5000 seems like a good starting point in point density and corresponding runtime for the subsequent calculations.
    @return [numpy.ndarray] Array of shape(n, 2) containing the coordinates for each point of the generated point cloud.
    """
    if seed is None:
        import secrets
        seed = secrets.randbits(128)
    rng = np.random.default_rng(seed=seed)
    return rng.random((n, 2)), seed


def spatial_graph(adata):
    """
    Generate the spatial graph using delaunay for the given `adata`.
    `adata` will contain the calculated spatial graph contents in the keys
    adata.obsm['spatial']` in case the `adata` is created from a dataset of *squidpy*.
    @return [Graph] generated networkx graph from adata.obsp['spatial_distances']
    """
    g, pos = graph_tool.generation.triangulation(adata, type="delaunay")
    g.vp["pos"] = pos
    weight = g.new_edge_property("double")
    for e in g.edges():
        weight[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2
    g.ep["weight"] = weight
    return g, weight


def plot_graph_nodes(g, centralities, fig, ax, name):
    pos = g.vp["pos"]
    x = []
    y = []
    for v in g.vertices():
        ver = pos[v]
        x.append(ver[0])
        y.append(ver[1])

    sc = ax.scatter(x, y, s=1, c=centralities, cmap=plt.cm.plasma)
    ax.set_title(name)
    fig.colorbar(sc, ax=ax)


def plot_relationship_nodes(g, vp, convex_hull, fig, ax, name):
    quantification = plot.quantification_data(g, vp, convex_hull)

    # optimize model's piece-wise linear function
    d = quantification[:, 0]
    C = quantification[:, 1]
    m_opt, c0_opt, b_opt, aic_opt = fitting.fit_piece_wise_linear(d, C, M=100)

    # TODO
    # should this be part of the plotting function itself, it should not be necessary for me to do this
    d_curve = np.linspace(min(d), max(d), 500)
    C_curve = np.piecewise(
        d_curve,
        [d_curve <= b_opt, d_curve > b_opt],
        [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt]
    )

    ax.set_title(name)
    ax.set_xlabel('Distance to Bounding-Box')
    ax.set_ylabel('Centrality')
    ax.scatter(quantification[:, 0], quantification[:, 1], c=quantification[:, 1], cmap=plt.cm.plasma, s=0.2)
    ax.plot(d_curve, C_curve, color='g', linewidth=1, label=f"m = {m_opt} | b = {b_opt} | c = {c0_opt}")
    ax.legend()


def plot_degree_distribution(g, fig, ax, name):
    ax.set_title(name)
    ax.set_ylabel('Degree')
    ax.set_xlabel('# Occurances')
    degrees = g.get_total_degrees(list(g.vertices()))
    bins = degrees.max() - degrees.min()
    counts, bins, patches = ax.hist(degrees, bins=bins, orientation='horizontal')

    # Label the percentages below the x-axis...
    # bin_centers = 0.5 * np.diff(bins) + bins[:-1]
    # for count, x in zip(counts, bin_centers):
    #     # Label the percentages
    #     percent = '%0.000f%%' % (100 * float(count) / counts.sum())
    #     ax.annotate(percent, xy=(x, 0), xycoords=('data', 'axes fraction'),
    #         xytext=(0, -18), textcoords='offset points', va='top', ha='center')


points, seed = random_graph(n=2000, seed=231533135843957409942915332448253409428)
g, weight = spatial_graph(points)
# adata = merfish()
# g, weight = spatial_graph(adata.obsm['spatial'])
g = GraphView(g)

# relationship with betweenness scoring for both node and edges
vp = pagerank(g, weight=weight)
vp.a = np.nan_to_num(vp.a) # correct floating point values

# plot graph
fig = plt.figure(figsize=(16, 9), layout='constrained')
fig.suptitle(f"Artical (n = 2000 | seed {seed})", fontsize=16)
ax1, ax2 = fig.subplots(1, 2)

# relationship with betweenness scoring for both node and edges
vp = pagerank(g, weight=weight)
vp.a = np.nan_to_num(vp.a) # correct floating point values

# normalize centrality values
min_val, max_val = vp.a.min(), vp.a.max()
vp.a = (vp.a - min_val) / (max_val - min_val)

# compare relationships
convex_hull = centrality.convex_hull(g)
plot_degree_distribution(g, fig, ax2, "Degree Distribution")
plot_relationship_nodes(g, vp, convex_hull, fig, ax1, "Pagerank Centrality with fitted model")

fig.savefig(f"degree_distribution_vs_pagerank_artifical.pdf", format='pdf')