boundary-aware-centrality/diff_comparison.py

import math

import matplotlib.pyplot as plt
import numpy as np
from graph_tool.all import *

from src import centrality
from src import plot
from src import fitting


def degree(g, weight):
    # VertexPropertyMap
    vp = g.new_vertex_property("double")
    for v in g.vertices():
        neighbours = g.get_all_neighbours(v)
        vp[v] = len(neighbours)
    return vp


def leverage(g, weight):
    # VertexPropertyMap
    vp = g.new_vertex_property("double")
    for v in g.vertices():
        li = 0.0
        neighbours = g.get_all_neighbours(v)
        ki = len(neighbours)
        # sum
        for nv in neighbours:
            other_neighbours = g.get_all_neighbours(nv)
            kj = len(other_neighbours)
            li += (ki - kj) / (ki + kj)
        li /= ki
        vp[v] = li
    return vp


def random_graph(n=5000, seed=None):
    """
    Uniformly random point cloud generation.
    `n` [int] Number of points to generate. Default 5000 seems like a good starting point in point density and corresponding runtime for the subsequent calculations.
    @return [numpy.ndarray] Array of shape(n, 2) containing the coordinates for each point of the generated point cloud.
    """
    if seed is None:
        import secrets
        seed = secrets.randbits(128)
    rng = np.random.default_rng(seed=seed)
    return rng.random((n, 2)), seed


def sub_spatial_graph(adata):
    """
    Generate the spatial graph using delaunay for the given `adata`.
    `adata` will contain the calculated spatial graph contents in the keys
    adata.obsm['spatial']` in case the `adata` is created from a dataset of *squidpy*.
    @return [Graph] generated networkx graph from adata.obsp['spatial_distances']
    """
    sub_adata = np.array([])
    for point in adata:
        if point[0] > 0.33 and point[0] <= 0.66 and point[1] > 0.33 and point[1] <= 0.66:
            sub_adata = np.append(sub_adata, [point[0], point[1]])

    sub_adata = sub_adata.reshape(sub_adata.shape[0] // 2, 2)
    return spatial_graph(sub_adata)


def spatial_graph(adata):
    """
    Generate the spatial graph using delaunay for the given `adata`.
    `adata` will contain the calculated spatial graph contents in the keys
    adata.obsm['spatial']` in case the `adata` is created from a dataset of *squidpy*.
    @return [Graph] generated networkx graph from adata.obsp['spatial_distances']
    """
    g, pos = graph_tool.generation.triangulation(adata, type="delaunay")
    g.vp["pos"] = pos
    weight = g.new_edge_property("double")
    for e in g.edges():
        weight[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2
    return g, weight


def plot_graph_diff(G, c, fig, ax, name, cmap=plt.cm.plasma):
    pos = G.vp["pos"]
    x = []
    y = []
    for v in G.vertices():
        ver = pos[v]
        if ver[0] > 0.33 and ver[0] <= 0.66 and ver[1] > 0.33 and ver[1] <= 0.66:
            x.append(ver[0])
            y.append(ver[1])

    sc = ax.scatter(x, y, s=1, cmap=cmap, c=c) # map closeness values as color mapping on the verticies
    ax.set_title(name)
    fig.colorbar(sc, ax=ax)


def apply(g, seed, weight, convex_hull, ax, method, method_name):
    # calculate centrality values
    vp = None
    if method_name == "Betweeness":
        vp, ep = method(g, weight=weight)
    elif method_name == "Eigenvector":
        ep, vp = method(g, weight=weight)
    elif method_name == "Hits":
        ep, vp, hub_centrality = method(g, weight=weight)
    else:
        vp = method(g, weight=weight)
    vp.a = np.nan_to_num(vp.a) # correct floating point values

    # normalization
    # min_val, max_val = vp.a.min(), vp.a.max()
    # vp.a = (vp.a - min_val) / (max_val - min_val)

    # generate model based on convex hull and associated centrality values
    quantification = plot.quantification_data(g, vp, convex_hull)

    # optimize model's piece-wise linear function
    d = quantification[:, 0]
    C = quantification[:, 1]
    m_opt, c0_opt, b_opt, aic_opt = fitting.fit_piece_wise_linear(d, C)

    # TODO
    # should this be part of the plotting function itself, it should not be necessary for me to do this
    d_curve = np.linspace(min(d), max(d), 500)
    C_curve = np.piecewise(
        d_curve,
        [d_curve <= b_opt, d_curve > b_opt],
        [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt]
    )
    # plot model containing modeled piece-wise linear function
    plot.quantification_plot(ax, quantification, d_curve, C_curve, method_name, aic_opt)

    return vp


def apply_corrected(g, seed, weight, convex_hull, ax, method, method_name):
    # calculate centrality values
    vp = None
    if method_name == "Betweeness":
        vp, ep = method(g, weight=weight)
    elif method_name == "Eigenvector":
        ep, vp = method(g, weight=weight)
    elif method_name == "Hits":
        ep, vp, hub_centrality = method(g, weight=weight)
    else:
        vp = method(g, weight=weight)
    vp.a = np.nan_to_num(vp.a) # correct floating point values

    # normalization
    # min_val, max_val = vp.a.min(), vp.a.max()
    # vp.a = (vp.a - min_val) / (max_val - min_val)

    # generate model based on convex hull and associated centrality values
    quantification = plot.quantification_data(g, vp, convex_hull)

    # optimize model's piece-wise linear function
    d = quantification[:, 0]
    C = quantification[:, 1]
    m_opt, c0_opt, b_opt, aic_opt = fitting.fit_piece_wise_linear(d, C)

    d_curve = np.linspace(min(d), max(d), 500)
    C_curve = np.piecewise(
        d_curve,
        [d_curve <= b_opt, d_curve > b_opt],
        [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt]
    )
    # plot model containing modeled piece-wise linear function
    plot.quantification_plot(ax, quantification, d_curve, C_curve, method_name, aic_opt)

    return centrality.correct(g, vp, m_opt, c0_opt, b_opt)

#
# - Create a random point cloud and calculate a triangulation on it
# - For that graph calculate the convex hull
# - Draw the graph with the convex hull
# - For each centrality measure
#   - apply centrality measure to the next axis
# - Draw the corresponding resulting models into a grid
#
points, seed = random_graph(n=3000)
g, weight = spatial_graph(points)
g = GraphView(g)

g_sub, weight_sub = sub_spatial_graph(points)
g_sub = GraphView(g_sub)

# calculate convex hull
convex_hull = centrality.convex_hull(g)

# plot graph with convex_hull
fig_graph, ax_graph = plt.subplots(figsize=(15, 12))
# draw without any centrality measure `vp`
vp = g.new_vertex_property("double")
plot.graph_plot(fig_graph, ax_graph, g, vp, convex_hull, f"Pointcloud (seed: {seed})")
fig_graph.savefig("Diff_graph.svg", format='svg')

fig = plt.figure(figsize=(15, 12))
row1, row2 = fig.subplots(2, 2)

ax1, ax2 = row1
# TODO select corresponding centrality measure method
vp_closeness = apply(g, seed, weight, convex_hull, ax1, closeness, "Closeness")
vp_betweenness = apply(g, seed, weight, convex_hull, ax2, betweenness, "Betweeness")

# calculate convex hull
convex_hull = centrality.convex_hull(g_sub)

# plot graph with convex_hull
fig_graph, ax_graph = plt.subplots(figsize=(15, 12))
# draw without any centrality measure `vp`
vp = g_sub.new_vertex_property("double")
plot.graph_plot(fig_graph, ax_graph, g_sub, vp, convex_hull, f"Pointcloud (seed: {seed})")
fig_graph.savefig("Diff_subgraph.svg", format='svg')

ax1, ax2 = row2
vp_closeness_corrected = apply_corrected(g_sub, seed, weight_sub, convex_hull, ax1, closeness, "Closeness")
vp_betweeness_corrected = apply_corrected(g_sub, seed, weight_sub, convex_hull, ax2, betweenness, "Betweeness")

fig.savefig(f"Diff_scores.svg", format='svg')

for type in ['closeness', 'betweenness']:
    print(type)

    sub_keys = iter(g_sub.vertices())
    keys = iter(g.vertices())

    scores = []
    sub_scores = []
    diff_scores = []

    for sub_key in sub_keys:
        key = next(keys)
        position = g.vp["pos"][key]
        while not (position[0] > 0.33 and position[0] <= 0.66 and position[1] > 0.33 and position[1] <= 0.66):
            key = next(keys)
            position = g.vp["pos"][key]
        # NOTE print corresponding position (which are identical)
        # position = g.vp["pos"][key]
        # sub_position = g_sub.vp["pos"][sub_key]
        # print(f"position: {position} | sub_position: {sub_position}")

        value = 0.0
        sub_value = 0.0
        if type == 'closeness':
            value = vp_closeness[key]
            sub_value = vp_closeness_corrected[sub_key]
        else:
            value = vp_betweenness[key]
            sub_value = vp_betweeness_corrected[sub_key]

        scores.append(value)
        sub_scores.append(sub_value)
        diff_scores.append(value - sub_value)

    median_score = np.median(scores)
    median_sub_score = np.median(sub_scores)
    print(f"\tmedian score: {median_score}")
    print(f"\tmedian sub_score: {median_sub_score}")
    print(f"\tmedian delta: {(median_score - median_sub_score)}")
    print("")

    max_value_score = np.max(scores)
    max_value_sub_score = np.max(sub_scores)
    print(f"\tmax value score: {max_value_score}")
    print(f"\tmax value sub_score: {max_value_sub_score}")
    print(f"\tmax value delta: {(max_value_score - max_value_sub_score)}")
    print("")

    min_value_score = np.min(scores)
    min_value_sub_score = np.min(sub_scores)
    print(f"\tmin value score: {min_value_score}")
    print(f"\tmin value sub_score: {min_value_sub_score}")
    print(f"\tmin value delta: {(min_value_score - min_value_sub_score)}")
    print("")

    fig = plt.figure(figsize=(35, 10))
    plot_graph_ax, plot_sub_graph_ax, plot_sub_graph_before_ax = fig.subplots(1, 3)

    plot_graph_diff(g, scores, fig, plot_graph_ax, "Original Graph (region of sub graph)")
    plot_graph_diff(g, diff_scores, fig, plot_sub_graph_ax, "Differences after correction of sub graph compared to original graph", plt.cm.seismic)

    vp = None
    ep = None
    if type == 'closeness':
        vp = closeness(g_sub, weight=weight_sub)
        vp.a = np.nan_to_num(vp.a) # correct floating point values
    else:
        vp, ep = betweenness(g_sub, weight=weight_sub)
        vp.a = np.nan_to_num(vp.a) # correct floating point values
    # normalization
    # min_val, max_val = vp.a.min(), vp.a.max()
    # vp.a = (vp.a - min_val) / (max_val - min_val)
    plot_graph_diff(g, vp.a, fig, plot_sub_graph_before_ax, "Sub Graph (extracted region of original graph) without correction")

    fig.savefig(f"Diff_graph_scatter_{type}.svg", format='svg')