import math import matplotlib.pyplot as plt import numpy as np import squidpy as sq from graph_tool.all import * from src import centrality from src import plot from src import fitting # TODO: implement this AIC: # https://www.statlect.com/fundamentals-of-statistics/linear-regression-model-selection-criteria def merfish(): """ Merfish dataset from `squidpy`. """ adata = sq.datasets.merfish() adata = adata[adata.obs.Bregma == -9].copy() return adata def mibitof(): """ Mibitof dataset from `squidpy`. """ adata = sq.datasets.mibitof() return adata def random_graph(n=5000, seed=None): """ Uniformly random point cloud generation. `n` [int] Number of points to generate. Default 5000 seems like a good starting point in point density and corresponding runtime for the subsequent calculations. @return [numpy.ndarray] Array of shape(n, 2) containing the coordinates for each point of the generated point cloud. """ if seed is None: import secrets seed = secrets.randbits(128) rng = np.random.default_rng(seed=seed) return rng.random((n, 2)), seed def spatial_graph(adata): """ Generate the spatial graph using delaunay for the given `adata`. `adata` will contain the calculated spatial graph contents in the keys adata.obsm['spatial']` in case the `adata` is created from a dataset of *squidpy*. @return [Graph] generated networkx graph from adata.obsp['spatial_distances'] """ g, pos = graph_tool.generation.triangulation(adata, type="delaunay") g.vp["pos"] = pos weight = g.new_edge_property("double") for e in g.edges(): weight[e] = math.sqrt(sum(map(abs, pos[e.source()].a - pos[e.target()].a)))**2 return g, weight def merfish_example(): # generate spatial graph from a given dataset g, weight = spatial_graph(merfish().obsm['spatial']) g = GraphView(g) x_spatial = [] for v in g.vertices(): x_spatial.append(g.vp["pos"][v][0]) # calculate centrality values vp = closeness(g, weight=weight) vp.a = np.nan_to_num(vp.a) # correct floating point values # normalization min_val, max_val = vp.a.min(), vp.a.max() vp.a = (vp.a - min_val) / (max_val - min_val) # calculate convex hull convex_hull = centrality.convex_hull(g) # plot graph with convex_hull fig = plt.figure(figsize=(15, 5)) ax0, ax1 = fig.subplots(1, 2) plot.graph_plot(fig, ax0, g, vp, convex_hull, f"Merfish\nCloseness") # generate model based on convex hull and associated centrality values quantification = plot.quantification_data(g, vp, convex_hull) # optimize model's piece-wise linear function d = quantification[:, 0] C = quantification[:, 1] m_opt, c0_opt, b_opt = fitting.fit_piece_wise_linear(d, C) # AIC # AIC = 2 * k (= 2) - 2 * ln(L^~) # with L^~ = sum(f(x_i)) where x_i describes a data point # - f is *not normalized* sum_log = 0.0 for x_i in x_spatial: sum_log += math.log(m_opt* b_opt + c0_opt if x_i >= b_opt else m_opt * x_i + c0_opt) aic_model = 6. - 2. * sum_log # three parameters: b_opt, m_opt, c0_opt # TODO # should this be part of the plotting function itself, it should not be necessary for me to do this d_curve = np.linspace(min(d), max(d), 500) C_curve = np.piecewise( d_curve, [d_curve <= b_opt, d_curve > b_opt], [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt] ) # plot model containing modeled piece-wise linear function plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models', aic_model) # linear regression model m_reg, c_reg = fitting.fit_linear_regression(d, C) # AIC sum_log = 0.0 for x_i in x_spatial: sum_log += math.log(m_reg * x_i + c_reg) aic_regression = 4. - 2. * sum_log # two parameter: m_reg, c_reg x = np.linspace(min(d), max(d), 500) y = m_reg * x + c_reg ax1.plot(x, y, color='k', linewidth=1, label=f"Simple Linear Regression | AIC: {aic_regression}") ax1.legend() fig.savefig(f"Merfish_closeness.svg", format='svg') for i in range(1, 6): points, seed = random_graph() g, weight = spatial_graph(points) g = GraphView(g) x_spatial = [] for v in g.vertices(): x_spatial.append(g.vp["pos"][v][0]) # calculate centrality values vp = closeness(g, weight=weight) vp.a = np.nan_to_num(vp.a) # correct floating point values # ep.a = np.nan_to_num(ep.a) # correct floating point values # normalization min_val, max_val = vp.a.min(), vp.a.max() vp.a = (vp.a - min_val) / (max_val - min_val) # calculate convex hull convex_hull = centrality.convex_hull(g) # plot graph with convex_hull fig = plt.figure(figsize=(15, 5)) ax0, ax1 = fig.subplots(1, 2) plot.graph_plot(fig, ax0, g, vp, convex_hull, f"Random Graph (seed: {seed})\nCloseness") # generate model based on convex hull and associated centrality values quantification = plot.quantification_data(g, vp, convex_hull) # optimize model's piece-wise linear function d = quantification[:, 0] C = quantification[:, 1] m_opt, c0_opt, b_opt = fitting.fit_piece_wise_linear(d, C) # AIC # AIC = 2 * k (= 2) - 2 * ln(L^~) # with L^~ = sum(f(x_i)) where x_i describes a data point # - f is *not normalized* sum_log = 0.0 for x_i in x_spatial: sum_log += math.log(m_opt* b_opt + c0_opt if x_i >= b_opt else m_opt * x_i + c0_opt) aic_model = 6. - 2. * sum_log # three parameters: b_opt, m_opt, c0_opt # TODO # should this be part of the plotting function itself, it should not be necessary for me to do this d_curve = np.linspace(min(d), max(d), 500) C_curve = np.piecewise( d_curve, [d_curve <= b_opt, d_curve > b_opt], [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt] ) # plot model containing modeled piece-wise linear function plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models', aic_model) # linear regression model m_reg, c_reg = fitting.fit_linear_regression(d, C) # AIC sum_log = 0.0 for x_i in x_spatial: sum_log += math.log(m_reg * x_i + c_reg) aic_regression = 4. - 2. * sum_log # two parameter: m_reg, c_reg x = np.linspace(min(d), max(d), 500) y = m_reg * x + c_reg ax1.plot(x, y, color='k', linewidth=1, label=f"Simple Linear Regression | AIC: {aic_regression}") ax1.legend() fig.savefig(f"uniform_random_point_clouds/{i}_closeness.svg", format='svg') # --------------------------------------------------------------------------------------------- # calculate centrality values vp, ep = betweenness(g, weight=weight) vp.a = np.nan_to_num(vp.a) # correct floating point values # ep.a = np.nan_to_num(ep.a) # correct floating point values # normalization min_val, max_val = vp.a.min(), vp.a.max() vp.a = (vp.a - min_val) / (max_val - min_val) # calculate convex hull convex_hull = centrality.convex_hull(g) # plot graph with convex_hull fig = plt.figure(figsize=(15, 5)) ax0, ax1 = fig.subplots(1, 2) plot.graph_plot(fig, ax0, g, vp, convex_hull, f"Random Graph (seed: {seed})\nBetweenness") # generate model based on convex hull and associated centrality values quantification = plot.quantification_data(g, vp, convex_hull) # optimize model's piece-wise linear function d = quantification[:, 0] C = quantification[:, 1] m_opt, c0_opt, b_opt = fitting.fit_piece_wise_linear(d, C) # AIC # AIC = 2 * k (= 2) - 2 * ln(L^~) # with L^~ = sum(f(x_i)) where x_i describes a data point # - f is *not normalized* sum_log = 0.0 for x_i in x_spatial: sum_log += math.log(m_opt* b_opt + c0_opt if x_i >= b_opt else m_opt * x_i + c0_opt) aic_model = 6. - 2. * sum_log # three parameters: b_opt, m_opt, c0_opt # TODO # should this be part of the plotting function itself, it should not be necessary for me to do this d_curve = np.linspace(min(d), max(d), 500) C_curve = np.piecewise( d_curve, [d_curve <= b_opt, d_curve > b_opt], [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt] ) # plot model containing modeled piece-wise linear function plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models', aic_model) # linear regression model m_reg, c_reg = fitting.fit_linear_regression(d, C) # AIC sum_log = 0.0 for x_i in x_spatial: sum_log += math.log(m_reg * x_i + c_reg) aic_regression = 4. - 2. * sum_log # two parameter: m_reg, c_reg x = np.linspace(min(d), max(d), 500) y = m_reg * x + c_reg ax1.plot(x, y, color='k', linewidth=1, label=f"Simple Linear Regression | AIC: {aic_regression}") ax1.legend() fig.savefig(f"uniform_random_point_clouds/{i}_betweenness.svg", format='svg') # --------------------------------------------------------------------------------------------- # calculate centrality values vp = pagerank(g, weight=weight) vp.a = np.nan_to_num(vp.a) # correct floating point values # ep.a = np.nan_to_num(ep.a) # correct floating point values # normalization min_val, max_val = vp.a.min(), vp.a.max() vp.a = (vp.a - min_val) / (max_val - min_val) # calculate convex hull convex_hull = centrality.convex_hull(g) # plot graph with convex_hull fig = plt.figure(figsize=(15, 5)) ax0, ax1 = fig.subplots(1, 2) plot.graph_plot(fig, ax0, g, vp, convex_hull, f"Random Graph (seed: {seed})\nPageRank") # generate model based on convex hull and associated centrality values quantification = plot.quantification_data(g, vp, convex_hull) # optimize model's piece-wise linear function d = quantification[:, 0] C = quantification[:, 1] m_opt, c0_opt, b_opt = fitting.fit_piece_wise_linear(d, C) # AIC # AIC = 2 * k (= 2) - 2 * ln(L^~) # with L^~ = sum(f(x_i)) where x_i describes a data point # - f is *not normalized* sum_log = 0.0 for x_i in x_spatial: sum_log += math.log(m_opt* b_opt + c0_opt if x_i >= b_opt else m_opt * x_i + c0_opt) aic_model = 6. - 2. * sum_log # three parameters: b_opt, m_opt, c0_opt # TODO # should this be part of the plotting function itself, it should not be necessary for me to do this d_curve = np.linspace(min(d), max(d), 500) C_curve = np.piecewise( d_curve, [d_curve <= b_opt, d_curve > b_opt], [lambda x: m_opt * x + c0_opt, lambda x: m_opt * b_opt + c0_opt] ) # plot model containing modeled piece-wise linear function plot.quantification_plot(ax1, quantification, d_curve, C_curve, 'Models', aic_model) # linear regression model m_reg, c_reg = fitting.fit_linear_regression(d, C) # AIC sum_log = 0.0 for x_i in x_spatial: sum_log += math.log(m_reg * x_i + c_reg) aic_regression = 4. - 2. * sum_log # two parameter: m_reg, c_reg x = np.linspace(min(d), max(d), 500) y = m_reg * x + c_reg ax1.plot(x, y, color='k', linewidth=1, label=f"Simple Linear Regression | AIC: {aic_regression}") ax1.legend() fig.savefig(f"uniform_random_point_clouds/{i}_pagerank.svg", format='svg')