import math import matplotlib.pyplot as plt import numpy as np from graph_tool.all import * from src import centrality from src import plot from src import fitting def degree(g, weight): # VertexPropertyMap vp = g.new_vertex_property("double") for v in g.vertices(): neighbours = g.get_all_neighbours(v) vp[v] = len(neighbours) return vp def leverage(g, weight): # VertexPropertyMap vp = g.new_vertex_property("double") for v in g.vertices(): li = 0.0 neighbours = g.get_all_neighbours(v) ki = len(neighbours) # sum for nv in neighbours: other_neighbours = g.get_all_neighbours(nv) kj = len(other_neighbours) li += (ki - kj) / (ki + kj) li /= ki vp[v] = li return vp def random_graph(n=5000, seed=None): """ Uniformly random point cloud generation. `n` [int] Number of points to generate. Default 5000 seems like a good starting point in point density and corresponding runtime for the subsequent calculations. @return [numpy.ndarray] Array of shape(n, 2) containing the coordinates for each point of the generated point cloud. """ if seed is None: import secrets seed = secrets.randbits(128) rng = np.random.default_rng(seed=seed) return rng.random((n, 2)), seed def sub_spatial_graph(adata): """ Generate the spatial graph using delaunay for the given `adata`. `adata` will contain the calculated spatial graph contents in the keys adata.obsm['spatial']` in case the `adata` is created from a dataset of *squidpy*. @return [Graph] generated networkx graph from adata.obsp['spatial_distances'] """ sub_adata = np.array([]) for point in adata: if point[0] > 0.33 and point[0] <= 0.66 and point[1] > 0.33 and point[1] <= 0.66: sub_adata = np.append(sub_adata, [point[0], point[1]]) sub_adata = sub_adata.reshape(sub_adata.shape[0] // 2, 2) return spatial_graph(sub_adata) def spatial_graph(adata): """ Generate the spatial graph using delaunay for the given `adata`. `adata` will contain the calculated spatial graph contents in the keys adata.obsm['spatial']` in case the `adata` is created from a dataset of *squidpy*. @return [Graph] generated networkx graph from adata.obsp['spatial_distances'] """ g, pos = graph_tool.generation.triangulation(adata, type="delaunay") g.vp["pos"] = pos weight = g.new_edge_property("double") for e in g.edges(): weight[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2 return g, weight def plot_graph_diff(G, c, fig, ax, name): pos = G.vp["pos"] x = [] y = [] for v in G.vertices(): ver = pos[v] if ver[0] > 0.33 and ver[0] <= 0.66 and ver[1] > 0.33 and ver[1] <= 0.66: x.append(ver[0]) y.append(ver[1]) sc = ax.scatter(x, y, s=1, cmap=plt.cm.plasma, c=c) # map closeness values as color mapping on the verticies ax.set_title(name) fig.colorbar(sc, ax=ax) def apply(g, seed, weight, convex_hull, ax, method, method_name): # calculate centrality values vp = None if method_name == "Betweeness": vp, ep = method(g, weight=weight) elif method_name == "Eigenvector": ep, vp = method(g, weight=weight) elif method_name == "Hits": ep, vp, hub_centrality = method(g, weight=weight) else: vp = method(g, weight=weight) vp.a = np.nan_to_num(vp.a) # correct floating point values # normalization min_val, max_val = vp.a.min(), vp.a.max() vp.a = (vp.a - min_val) / (max_val - min_val) # generate model based on convex hull and associated centrality values quantification = plot.quantification_data(g, vp, convex_hull) # optimize model's piece-wise linear function d = quantification[:, 0] C = quantification[:, 1] m_opt, c0_opt, b_opt, aic_opt = fitting.fit_piece_wise_linear(d, C) # TODO # should this be part of the plotting function itself, it should not be necessary for me to do this d_curve = np.linspace(min(d), max(d), 500) C_curve = np.piecewise( d_curve, [d_curve <= b_opt, d_curve > b_opt], [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt] ) # plot model containing modeled piece-wise linear function plot.quantification_plot(ax, quantification, d_curve, C_curve, method_name, aic_opt) return vp def apply_corrected(g, seed, weight, convex_hull, ax, method, method_name): # calculate centrality values vp = None if method_name == "Betweeness": vp, ep = method(g, weight=weight) elif method_name == "Eigenvector": ep, vp = method(g, weight=weight) elif method_name == "Hits": ep, vp, hub_centrality = method(g, weight=weight) else: vp = method(g, weight=weight) vp.a = np.nan_to_num(vp.a) # correct floating point values # normalization min_val, max_val = vp.a.min(), vp.a.max() vp.a = (vp.a - min_val) / (max_val - min_val) # generate model based on convex hull and associated centrality values quantification = plot.quantification_data(g, vp, convex_hull) # optimize model's piece-wise linear function d = quantification[:, 0] C = quantification[:, 1] m_opt, c0_opt, b_opt, aic_opt = fitting.fit_piece_wise_linear(d, C) d_curve = np.linspace(min(d), max(d), 500) C_curve = np.piecewise( d_curve, [d_curve <= b_opt, d_curve > b_opt], [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt] ) # plot model containing modeled piece-wise linear function plot.quantification_plot(ax, quantification, d_curve, C_curve, method_name, aic_opt) return centrality.correct(g, vp, m_opt, c0_opt, b_opt) # # - Create a random point cloud and calculate a triangulation on it # - For that graph calculate the convex hull # - Draw the graph with the convex hull # - For each centrality measure # - apply centrality measure to the next axis # - Draw the corresponding resulting models into a grid # points, seed = random_graph(n=3000) g, weight = spatial_graph(points) g = GraphView(g) g_sub, weight_sub = sub_spatial_graph(points) g_sub = GraphView(g_sub) # calculate convex hull convex_hull = centrality.convex_hull(g) # plot graph with convex_hull fig_graph, ax_graph = plt.subplots(figsize=(15, 12)) # draw without any centrality measure `vp` vp = g.new_vertex_property("double") plot.graph_plot(fig_graph, ax_graph, g, vp, convex_hull, f"Pointcloud (seed: {seed})") fig_graph.savefig("Diff_graph.svg", format='svg') fig = plt.figure(figsize=(15, 12)) row1, row2 = fig.subplots(2, 2) ax1, ax2 = row1 # TODO select corresponding centrality measure method vp_closeness = apply(g, seed, weight, convex_hull, ax1, closeness, "Closeness") # vp_betweenness = apply(g, seed, weight, convex_hull, ax2, betweenness, "Betweeness") # calculate convex hull convex_hull = centrality.convex_hull(g_sub) # plot graph with convex_hull fig_graph, ax_graph = plt.subplots(figsize=(15, 12)) # draw without any centrality measure `vp` vp = g_sub.new_vertex_property("double") plot.graph_plot(fig_graph, ax_graph, g_sub, vp, convex_hull, f"Pointcloud (seed: {seed})") fig_graph.savefig("Diff_subgraph.svg", format='svg') ax1, ax2 = row2 vp_closeness_corrected = apply_corrected(g_sub, seed, weight_sub, convex_hull, ax1, closeness, "Closeness") # vp_betweeness_corrected = apply_corrected(g_sub, seed, weight_sub, convex_hull, ax2, betweenness, "Betweeness") fig.savefig(f"Diff_scores.svg", format='svg') # TODO how can I match the two vp's such that I can actually create a diff? # print(f"Closeness: {vp_closeness}") print(f"Closeness corrected: {vp_closeness_corrected}") sub_keys = iter(g_sub.vertices()) keys = iter(g.vertices()) scores = [] sub_scores = [] for sub_key in sub_keys: key = next(keys) position = g.vp["pos"][key] while not (position[0] > 0.33 and position[0] <= 0.66 and position[1] > 0.33 and position[1] <= 0.66): key = next(keys) position = g.vp["pos"][key] # NOTE print corresponding position (which are identical) # position = g.vp["pos"][key] # sub_position = g_sub.vp["pos"][sub_key] # print(f"position: {position} | sub_position: {sub_position}") value = vp_closeness[key] sub_value = vp_closeness_corrected[sub_key] scores.append(value) sub_scores.append(sub_value) # print(f"value: {value} | sub_value: {sub_value}") # TODO what do I want to know? # - median score comparison? # - max delta's between scores # - improvement compared to with and without correction? # TODO can I create the scatter graph with the points with their corresponding values? median_score = np.median(scores) median_sub_score = np.median(sub_scores) print(f"median score: {median_score}") print(f"median sub_score: {median_sub_score}") print(f"median delta: {(median_score - median_sub_score)}") print("") max_value_score = np.max(scores) max_value_sub_score = np.max(sub_scores) print(f"max value score: {max_value_score}") print(f"max value sub_score: {max_value_sub_score}") print(f"max value delta: {(max_value_score - max_value_sub_score)}") print("") min_value_score = np.min(scores) min_value_sub_score = np.min(sub_scores) print(f"min value score: {min_value_score}") print(f"min value sub_score: {min_value_sub_score}") print(f"min value delta: {(min_value_score - min_value_sub_score)}") fig = plt.figure(figsize=(35, 10)) plot_graph_ax, plot_sub_graph_ax, plot_sub_graph_before_ax = fig.subplots(1, 3) plot_graph_diff(g, scores, fig, plot_graph_ax, "Original Graph (region of sub graph)") plot_graph_diff(g, sub_scores, fig, plot_sub_graph_ax, "Sub Graph (extracted region of original graph) with correction") vp = closeness(g_sub, weight=weight_sub) vp.a = np.nan_to_num(vp.a) # correct floating point values # normalization min_val, max_val = vp.a.min(), vp.a.max() vp.a = (vp.a - min_val) / (max_val - min_val) plot_graph_diff(g, vp.a, fig, plot_sub_graph_before_ax, "Sub Graph (extracted region of original graph) without correction") fig.savefig(f"Diff_graph_scatter.svg", format='svg')