The final API will be derived from these scripts into a different repository, which then only holds the corresponding functions that provide the corresponding functionalities described in the associated master thesis.
296 lines
10 KiB
Python
296 lines
10 KiB
Python
import math
|
|
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
from graph_tool.all import *
|
|
|
|
from src import centrality
|
|
from src import plot
|
|
from src import fitting
|
|
|
|
|
|
def degree(g, weight):
|
|
# VertexPropertyMap
|
|
vp = g.new_vertex_property("double")
|
|
for v in g.vertices():
|
|
neighbours = g.get_all_neighbours(v)
|
|
vp[v] = len(neighbours)
|
|
return vp
|
|
|
|
|
|
def leverage(g, weight):
|
|
# VertexPropertyMap
|
|
vp = g.new_vertex_property("double")
|
|
for v in g.vertices():
|
|
li = 0.0
|
|
neighbours = g.get_all_neighbours(v)
|
|
ki = len(neighbours)
|
|
# sum
|
|
for nv in neighbours:
|
|
other_neighbours = g.get_all_neighbours(nv)
|
|
kj = len(other_neighbours)
|
|
li += (ki - kj) / (ki + kj)
|
|
li /= ki
|
|
vp[v] = li
|
|
return vp
|
|
|
|
|
|
def random_graph(n=5000, seed=None):
|
|
"""
|
|
Uniformly random point cloud generation.
|
|
`n` [int] Number of points to generate. Default 5000 seems like a good starting point in point density and corresponding runtime for the subsequent calculations.
|
|
@return [numpy.ndarray] Array of shape(n, 2) containing the coordinates for each point of the generated point cloud.
|
|
"""
|
|
if seed is None:
|
|
import secrets
|
|
seed = secrets.randbits(128)
|
|
rng = np.random.default_rng(seed=seed)
|
|
return rng.random((n, 2)), seed
|
|
|
|
|
|
def sub_spatial_graph(adata):
|
|
"""
|
|
Generate the spatial graph using delaunay for the given `adata`.
|
|
`adata` will contain the calculated spatial graph contents in the keys
|
|
adata.obsm['spatial']` in case the `adata` is created from a dataset of *squidpy*.
|
|
@return [Graph] generated networkx graph from adata.obsp['spatial_distances']
|
|
"""
|
|
sub_adata = np.array([])
|
|
for point in adata:
|
|
if point[0] > 0.33 and point[0] <= 0.66 and point[1] > 0.33 and point[1] <= 0.66:
|
|
sub_adata = np.append(sub_adata, [point[0], point[1]])
|
|
|
|
sub_adata = sub_adata.reshape(sub_adata.shape[0] // 2, 2)
|
|
return spatial_graph(sub_adata)
|
|
|
|
|
|
def spatial_graph(adata):
|
|
"""
|
|
Generate the spatial graph using delaunay for the given `adata`.
|
|
`adata` will contain the calculated spatial graph contents in the keys
|
|
adata.obsm['spatial']` in case the `adata` is created from a dataset of *squidpy*.
|
|
@return [Graph] generated networkx graph from adata.obsp['spatial_distances']
|
|
"""
|
|
g, pos = graph_tool.generation.triangulation(adata, type="delaunay")
|
|
g.vp["pos"] = pos
|
|
weight = g.new_edge_property("double")
|
|
for e in g.edges():
|
|
weight[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2
|
|
return g, weight
|
|
|
|
|
|
def plot_graph_diff(G, c, fig, ax, name, cmap=plt.cm.plasma):
|
|
pos = G.vp["pos"]
|
|
x = []
|
|
y = []
|
|
for v in G.vertices():
|
|
ver = pos[v]
|
|
if ver[0] > 0.33 and ver[0] <= 0.66 and ver[1] > 0.33 and ver[1] <= 0.66:
|
|
x.append(ver[0])
|
|
y.append(ver[1])
|
|
|
|
sc = ax.scatter(x, y, s=1, cmap=cmap, c=c) # map closeness values as color mapping on the verticies
|
|
ax.set_title(name)
|
|
fig.colorbar(sc, ax=ax)
|
|
|
|
|
|
def apply(g, seed, weight, convex_hull, ax, method, method_name):
|
|
# calculate centrality values
|
|
vp = None
|
|
if method_name == "Betweeness":
|
|
vp, ep = method(g, weight=weight)
|
|
elif method_name == "Eigenvector":
|
|
ep, vp = method(g, weight=weight)
|
|
elif method_name == "Hits":
|
|
ep, vp, hub_centrality = method(g, weight=weight)
|
|
else:
|
|
vp = method(g, weight=weight)
|
|
vp.a = np.nan_to_num(vp.a) # correct floating point values
|
|
|
|
# normalization
|
|
# min_val, max_val = vp.a.min(), vp.a.max()
|
|
# vp.a = (vp.a - min_val) / (max_val - min_val)
|
|
|
|
# generate model based on convex hull and associated centrality values
|
|
quantification = plot.quantification_data(g, vp, convex_hull)
|
|
|
|
# optimize model's piece-wise linear function
|
|
d = quantification[:, 0]
|
|
C = quantification[:, 1]
|
|
m_opt, c0_opt, b_opt, aic_opt = fitting.fit_piece_wise_linear(d, C)
|
|
|
|
# TODO
|
|
# should this be part of the plotting function itself, it should not be necessary for me to do this
|
|
d_curve = np.linspace(min(d), max(d), 500)
|
|
C_curve = np.piecewise(
|
|
d_curve,
|
|
[d_curve <= b_opt, d_curve > b_opt],
|
|
[lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt]
|
|
)
|
|
# plot model containing modeled piece-wise linear function
|
|
plot.quantification_plot(ax, quantification, d_curve, C_curve, method_name, aic_opt)
|
|
|
|
return vp
|
|
|
|
|
|
def apply_corrected(g, seed, weight, convex_hull, ax, method, method_name):
|
|
# calculate centrality values
|
|
vp = None
|
|
if method_name == "Betweeness":
|
|
vp, ep = method(g, weight=weight)
|
|
elif method_name == "Eigenvector":
|
|
ep, vp = method(g, weight=weight)
|
|
elif method_name == "Hits":
|
|
ep, vp, hub_centrality = method(g, weight=weight)
|
|
else:
|
|
vp = method(g, weight=weight)
|
|
vp.a = np.nan_to_num(vp.a) # correct floating point values
|
|
|
|
# normalization
|
|
# min_val, max_val = vp.a.min(), vp.a.max()
|
|
# vp.a = (vp.a - min_val) / (max_val - min_val)
|
|
|
|
# generate model based on convex hull and associated centrality values
|
|
quantification = plot.quantification_data(g, vp, convex_hull)
|
|
|
|
# optimize model's piece-wise linear function
|
|
d = quantification[:, 0]
|
|
C = quantification[:, 1]
|
|
m_opt, c0_opt, b_opt, aic_opt = fitting.fit_piece_wise_linear(d, C)
|
|
|
|
d_curve = np.linspace(min(d), max(d), 500)
|
|
C_curve = np.piecewise(
|
|
d_curve,
|
|
[d_curve <= b_opt, d_curve > b_opt],
|
|
[lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt]
|
|
)
|
|
# plot model containing modeled piece-wise linear function
|
|
plot.quantification_plot(ax, quantification, d_curve, C_curve, method_name, aic_opt)
|
|
|
|
return centrality.correct(g, vp, m_opt, c0_opt, b_opt)
|
|
|
|
#
|
|
# - Create a random point cloud and calculate a triangulation on it
|
|
# - For that graph calculate the convex hull
|
|
# - Draw the graph with the convex hull
|
|
# - For each centrality measure
|
|
# - apply centrality measure to the next axis
|
|
# - Draw the corresponding resulting models into a grid
|
|
#
|
|
points, seed = random_graph(n=3000)
|
|
g, weight = spatial_graph(points)
|
|
g = GraphView(g)
|
|
|
|
g_sub, weight_sub = sub_spatial_graph(points)
|
|
g_sub = GraphView(g_sub)
|
|
|
|
# calculate convex hull
|
|
convex_hull = centrality.convex_hull(g)
|
|
|
|
# plot graph with convex_hull
|
|
fig_graph, ax_graph = plt.subplots(figsize=(15, 12))
|
|
# draw without any centrality measure `vp`
|
|
vp = g.new_vertex_property("double")
|
|
plot.graph_plot(fig_graph, ax_graph, g, vp, convex_hull, f"Pointcloud (seed: {seed})")
|
|
fig_graph.savefig("Diff_graph.svg", format='svg')
|
|
|
|
fig = plt.figure(figsize=(15, 12))
|
|
row1, row2 = fig.subplots(2, 2)
|
|
|
|
ax1, ax2 = row1
|
|
# TODO select corresponding centrality measure method
|
|
vp_closeness = apply(g, seed, weight, convex_hull, ax1, closeness, "Closeness")
|
|
vp_betweenness = apply(g, seed, weight, convex_hull, ax2, betweenness, "Betweeness")
|
|
|
|
# calculate convex hull
|
|
convex_hull = centrality.convex_hull(g_sub)
|
|
|
|
# plot graph with convex_hull
|
|
fig_graph, ax_graph = plt.subplots(figsize=(15, 12))
|
|
# draw without any centrality measure `vp`
|
|
vp = g_sub.new_vertex_property("double")
|
|
plot.graph_plot(fig_graph, ax_graph, g_sub, vp, convex_hull, f"Pointcloud (seed: {seed})")
|
|
fig_graph.savefig("Diff_subgraph.svg", format='svg')
|
|
|
|
ax1, ax2 = row2
|
|
vp_closeness_corrected = apply_corrected(g_sub, seed, weight_sub, convex_hull, ax1, closeness, "Closeness")
|
|
vp_betweeness_corrected = apply_corrected(g_sub, seed, weight_sub, convex_hull, ax2, betweenness, "Betweeness")
|
|
|
|
fig.savefig(f"Diff_scores.svg", format='svg')
|
|
|
|
for type in ['closeness', 'betweenness']:
|
|
print(type)
|
|
|
|
sub_keys = iter(g_sub.vertices())
|
|
keys = iter(g.vertices())
|
|
|
|
scores = []
|
|
sub_scores = []
|
|
diff_scores = []
|
|
|
|
for sub_key in sub_keys:
|
|
key = next(keys)
|
|
position = g.vp["pos"][key]
|
|
while not (position[0] > 0.33 and position[0] <= 0.66 and position[1] > 0.33 and position[1] <= 0.66):
|
|
key = next(keys)
|
|
position = g.vp["pos"][key]
|
|
# NOTE print corresponding position (which are identical)
|
|
# position = g.vp["pos"][key]
|
|
# sub_position = g_sub.vp["pos"][sub_key]
|
|
# print(f"position: {position} | sub_position: {sub_position}")
|
|
|
|
value = 0.0
|
|
sub_value = 0.0
|
|
if type == 'closeness':
|
|
value = vp_closeness[key]
|
|
sub_value = vp_closeness_corrected[sub_key]
|
|
else:
|
|
value = vp_betweenness[key]
|
|
sub_value = vp_betweeness_corrected[sub_key]
|
|
|
|
scores.append(value)
|
|
sub_scores.append(sub_value)
|
|
diff_scores.append(value - sub_value)
|
|
|
|
median_score = np.median(scores)
|
|
median_sub_score = np.median(sub_scores)
|
|
print(f"\tmedian score: {median_score}")
|
|
print(f"\tmedian sub_score: {median_sub_score}")
|
|
print(f"\tmedian delta: {(median_score - median_sub_score)}")
|
|
print("")
|
|
|
|
max_value_score = np.max(scores)
|
|
max_value_sub_score = np.max(sub_scores)
|
|
print(f"\tmax value score: {max_value_score}")
|
|
print(f"\tmax value sub_score: {max_value_sub_score}")
|
|
print(f"\tmax value delta: {(max_value_score - max_value_sub_score)}")
|
|
print("")
|
|
|
|
min_value_score = np.min(scores)
|
|
min_value_sub_score = np.min(sub_scores)
|
|
print(f"\tmin value score: {min_value_score}")
|
|
print(f"\tmin value sub_score: {min_value_sub_score}")
|
|
print(f"\tmin value delta: {(min_value_score - min_value_sub_score)}")
|
|
print("")
|
|
|
|
fig = plt.figure(figsize=(35, 10))
|
|
plot_graph_ax, plot_sub_graph_ax, plot_sub_graph_before_ax = fig.subplots(1, 3)
|
|
|
|
plot_graph_diff(g, scores, fig, plot_graph_ax, "Original Graph (region of sub graph)")
|
|
plot_graph_diff(g, diff_scores, fig, plot_sub_graph_ax, "Differences after correction of sub graph compared to original graph", plt.cm.seismic)
|
|
|
|
vp = None
|
|
ep = None
|
|
if type == 'closeness':
|
|
vp = closeness(g_sub, weight=weight_sub)
|
|
vp.a = np.nan_to_num(vp.a) # correct floating point values
|
|
else:
|
|
vp, ep = betweenness(g_sub, weight=weight_sub)
|
|
vp.a = np.nan_to_num(vp.a) # correct floating point values
|
|
# normalization
|
|
# min_val, max_val = vp.a.min(), vp.a.max()
|
|
# vp.a = (vp.a - min_val) / (max_val - min_val)
|
|
plot_graph_diff(g, vp.a, fig, plot_sub_graph_before_ax, "Sub Graph (extracted region of original graph) without correction")
|
|
|
|
fig.savefig(f"Diff_graph_scatter_{type}.svg", format='svg')
|