From e981b6eaed87420fd2442686b62a61543800c3e9 Mon Sep 17 00:00:00 2001 From: Yves Biener Date: Mon, 26 Jan 2026 15:22:59 +0100 Subject: [PATCH] fix: correct implementation of top cut model Model assumes that top 10% of centrality values are not effected by the boundary. This means that they form the basis for the constant part of the two-part function: - linear function determined through simple linear regression for the remaining points that are below the calculated threshold - constant consistenting of the threshold The threshold describes the median of the centrality values of the top 10% of the values. --- src/fitting.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/fitting.py b/src/fitting.py index 3b86bda..436488b 100644 --- a/src/fitting.py +++ b/src/fitting.py @@ -69,23 +69,30 @@ def fit_cut(d, C): C (array-like): Corresponding centrality values. Returns: - tuple: Optimize slope (m), intercept (t) and breaking point (b) + tuple: Optimize slope (m), intercept (t) and breaking point (b) as well as AIC score """ n = len(d) - cut = math.floor(n * 0.1) - b = sum(C[n - cut..n]) / cut + cut = math.ceil(n * 0.1) + b_c = sum(sorted(C[n - cut:n])) / cut model = gp.Model("Top Cut") m = model.addVar(vtype=GRB.CONTINUOUS, name="m") t = model.addVar(vtype=GRB.CONTINUOUS, name="t") - model.setObjective(gp.quicksum((C[i] - t - m * d[i])**2 for i in range(n - cut, n)), GRB.MINIMIZE) - for i in range(n - cut, n): - model.addConstr(b >= m * d[i] + t) + model.setObjective(gp.quicksum((C[i] - t - m * d[i])**2 for i in range(n) if C[i] <= b_c), GRB.MINIMIZE) + for i in range(n - cut): + model.addConstr(b_c >= m * d[i] + t) model.optimize() - return m.X, t.X, b + b = (b_c - t.X) / m.X + print(f"b_c: {b_c} | b: {b}") + + # AIC + k = 2 + aic = 2. * k + n * math.log(model.ObjVal) + + return m.X, t.X, b, aic def fit_linear_regression(d, C):