fix: correct implementation of top cut model

Model assumes that top 10% of centrality values are not effected by the boundary.
This means that they form the basis for the constant part of the two-part function:
  - linear function determined through simple linear regression for the remaining
    points that are below the calculated threshold
  - constant consistenting of the threshold

The threshold describes the median of the centrality values of the top 10% of the
values.
This commit is contained in:
2026-01-26 15:22:59 +01:00
parent 2b2247a758
commit e981b6eaed

View File

@@ -69,23 +69,30 @@ def fit_cut(d, C):
C (array-like): Corresponding centrality values.
Returns:
tuple: Optimize slope (m), intercept (t) and breaking point (b)
tuple: Optimize slope (m), intercept (t) and breaking point (b) as well as AIC score
"""
n = len(d)
cut = math.floor(n * 0.1)
b = sum(C[n - cut..n]) / cut
cut = math.ceil(n * 0.1)
b_c = sum(sorted(C[n - cut:n])) / cut
model = gp.Model("Top Cut")
m = model.addVar(vtype=GRB.CONTINUOUS, name="m")
t = model.addVar(vtype=GRB.CONTINUOUS, name="t")
model.setObjective(gp.quicksum((C[i] - t - m * d[i])**2 for i in range(n - cut, n)), GRB.MINIMIZE)
for i in range(n - cut, n):
model.addConstr(b >= m * d[i] + t)
model.setObjective(gp.quicksum((C[i] - t - m * d[i])**2 for i in range(n) if C[i] <= b_c), GRB.MINIMIZE)
for i in range(n - cut):
model.addConstr(b_c >= m * d[i] + t)
model.optimize()
return m.X, t.X, b
b = (b_c - t.X) / m.X
print(f"b_c: {b_c} | b: {b}")
# AIC
k = 2
aic = 2. * k + n * math.log(model.ObjVal)
return m.X, t.X, b, aic
def fit_linear_regression(d, C):