Approximation uses simple linear regression to determine whether the _location information_ is significant enough (through the calculated steepness `beta`) which can be used to determine in a faster and more efficient way whether the calculation of the model is necessary and helpful in the first place.
106 lines
3.3 KiB
Python
106 lines
3.3 KiB
Python
import math
|
|
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
from graph_tool.all import *
|
|
|
|
from src import centrality
|
|
from src import plot
|
|
from src import fitting
|
|
|
|
|
|
def leverage(g, weight):
|
|
# VertexPropertyMap
|
|
vp = g.new_vertex_property("double")
|
|
for v in g.vertices():
|
|
li = 0.0
|
|
neighbours = g.get_all_neighbours(v)
|
|
ki = len(neighbours)
|
|
# sum
|
|
for nv in neighbours:
|
|
other_neighbours = g.get_all_neighbours(nv)
|
|
kj = len(other_neighbours)
|
|
li += (ki - kj) / (ki + kj)
|
|
li /= ki
|
|
vp[v] = li
|
|
return vp
|
|
|
|
|
|
def random_graph(n=5000, seed=None):
|
|
"""
|
|
Uniformly random point cloud generation.
|
|
`n` [int] Number of points to generate. Default 5000 seems like a good starting point in point density and corresponding runtime for the subsequent calculations.
|
|
@return [numpy.ndarray] Array of shape(n, 2) containing the coordinates for each point of the generated point cloud.
|
|
"""
|
|
if seed is None:
|
|
import secrets
|
|
seed = secrets.randbits(128)
|
|
rng = np.random.default_rng(seed=seed)
|
|
return rng.random((n, 2)), seed
|
|
|
|
|
|
def spatial_graph(adata):
|
|
g, pos = graph_tool.generation.triangulation(adata, type="delaunay")
|
|
g.vp["pos"] = pos
|
|
weight = g.new_edge_property("double")
|
|
for e in g.edges():
|
|
weight[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2
|
|
return g, weight
|
|
|
|
|
|
points, seed = random_graph()
|
|
g, weight = spatial_graph(points)
|
|
g = GraphView(g)
|
|
|
|
# calculate centrality values
|
|
vp, ep = betweenness(g, weight=weight)
|
|
vp.a = np.nan_to_num(vp.a) # correct floating point values
|
|
ep.a = np.nan_to_num(ep.a) # correct floating point values
|
|
|
|
# normalization
|
|
min_val, max_val = vp.a.min(), vp.a.max()
|
|
vp.a = (vp.a - min_val) / (max_val - min_val)
|
|
|
|
# calculate convex hull
|
|
convex_hull = centrality.convex_hull(g)
|
|
|
|
# plot graph with convex_hull
|
|
fig = plt.figure(figsize=(15, 5))
|
|
ax0, ax1 = fig.subplots(1, 2)
|
|
plot.graph_plot(fig, ax0, g, vp, convex_hull, f"Random Graph (seed: {seed})")
|
|
|
|
# generate model based on convex hull and associated centrality values
|
|
quantification = plot.quantification_data(g, vp, convex_hull)
|
|
|
|
# optimize model's piece-wise linear function
|
|
d = quantification[:, 0]
|
|
C = quantification[:, 1]
|
|
m_opt, c0_opt, b_opt, aic_opt = fitting.fit_piece_wise_linear(d, C)
|
|
|
|
# TODO
|
|
# should this be part of the plotting function itself, it should not be necessary for me to do this
|
|
d_curve = np.linspace(min(d), max(d), 500)
|
|
C_curve = np.piecewise(
|
|
d_curve,
|
|
[d_curve <= b_opt, d_curve > b_opt],
|
|
[lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt]
|
|
)
|
|
# plot model containing modeled piece-wise linear function
|
|
plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Betweenness', aic_opt)
|
|
|
|
# linear regression model
|
|
m_reg, c0_reg, aic_reg = fitting.fit_linear_regression(d, C)
|
|
|
|
# TODO
|
|
# should this be part of the plotting function itself, it should not be necessary for me to do this
|
|
d_curve = np.linspace(min(d), max(d), 500)
|
|
C_curve = np.piecewise(
|
|
d_curve,
|
|
[d_curve >= 0],
|
|
[lambda x: m_reg * x + c0_reg]
|
|
)
|
|
ax1.plot(d_curve, C_curve, color='k', linewidth=1, label=f"Simple Linear Regression | AIC: {aic_reg}")
|
|
ax1.legend()
|
|
|
|
fig.savefig(f"model_approximation_betweenness_5000.svg", format='svg')
|