Coverage for src/scicom/knowledgespread/utils.py: 0%
128 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-15 13:26 +0200
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-15 13:26 +0200
1"""Utility functions for initial condition generation."""
2import numpy as np
3import pandas as pd
4from itertools import combinations
5import networkx as nx
6from tqdm import tqdm
9def epistemicRange(baseRange, age):
10 """Returns the age dependent search radius for epistemic discovery."""
11 if age > 0:
12 return baseRange / age # Refine e.g. in direction of ageFuntion
13 else:
14 return baseRange
17def ageFunction(agent, a, b, c, radius):
18 """Returns an age dependent radius.
20 Bell-shaped function with center at c, slop and width
21 of plateau defined by a, b.
23 Can be used to show age-dependent activity of agents.
24 """
25 if agent.age == 0:
26 return 0
27 else:
28 return radius/(1 + abs((agent.age - c)/(a))**(2*b))
31class GenerateInitalPopulation():
32 """Generate sets of initial conditions,"""
34 def __init__(
35 self,
36 numScientists,
37 timesteps
38 ):
39 self.N = numScientists
40 self.Tstep = timesteps
42 def epistemicFunc(self, fc, x, y, beta=8):
43 """Epistemic space initial sampling."""
44 if fc == "complex":
45 return np.sin(beta*x) + np.cos(beta*y)
46 elif fc == "central":
47 return np.exp(-(x**2 + y**2))
48 elif fc == "polarized":
49 return x * np.exp(- 3 * (x ** 2 + y ** 2))
51 def timeFunc(self, dataframe, step, fcT="saturate", slope=5, base=5):
52 """Population growth function."""
53 if fcT == "saturate":
54 n2step = round(step * (1 - step / self.Tstep))
55 elif fcT == "linear":
56 n2step = slope
57 elif fcT == "exponential":
58 n2step = base ** step
59 try:
60 dft = dataframe.sample(
61 n2step,
62 weights=abs(dataframe.z)
63 )
64 return dft
65 except ValueError("Your sample size is larger then the data. Adjust exponential time."):
66 raise
68 def sample(self, fcE="complex", fcT="saturate", beta=8, slope=5, base=5):
69 """Generate the sample population and add activation time."""
70 dta = self._fullDist(fcE=fcE, beta=beta)
71 initial_population = dta.sample(self.N, weights=abs(dta.z))
72 initial_population['t'] = 0
73 stepDF = []
74 for step in range(1, self.Tstep + 1):
75 temp = self.timeFunc(dta, step, fcT=fcT, slope=slope, base=base)
76 temp['t'] = step
77 stepDF.append(temp)
78 joined = pd.concat(stepDF)
79 initial_population = pd.concat(
80 [initial_population, joined]
81 )
82 initial_population = initial_population.reset_index(drop=True)
83 initial_population['id'] = initial_population.index + 1
84 return initial_population
86 def _fullDist(self, fcE, beta):
87 """Full distribution to sample from."""
88 x = np.linspace(-1, 1, 10000)
89 y = np.linspace(-1, 1, 10000)
90 X, Y = np.meshgrid(x, y)
91 Z = self.epistemicFunc(fcE, X, Y, beta)
92 dta = pd.DataFrame(
93 {'x': X.flatten(), 'y': Y.flatten(), 'z': Z.flatten()}
94 )
95 return dta
98class GenerateSocNet():
100 def __init__(
101 self,
102 dataframe: pd.DataFrame,
103 minDist: float = 0.0001
104 ):
105 self.population = dataframe
106 self.density = ''
107 self.allEdges = ''
108 self.socialNet = ''
109 self.minDist = minDist
111 def _getWeighted(self, row, degree):
112 try:
113 return degree[int(row["from_id"])] * row["dist"]
114 except KeyError:
115 return None
117 def initSocNet(self):
118 """Generates initial social network sample from population.
120 The dataframe input should contain the colum names: id, x, y, z, t
121 Returns social network sample with from_id, to_id, dist, time
122 """
123 first_gen = self.population.query("t == 0").id.unique()
124 initPopN = len(first_gen)
125 coordinateDict = {
126 row["id"]: np.array([row["x"], row["y"], row["z"]]) for ix, row in self.population.iterrows()
127 }
128 idCombinations = [tup for tup in combinations(self.population.id.unique(), 2)]
129 edges = []
130 for combi in idCombinations:
131 dist = np.linalg.norm(
132 coordinateDict[combi[0]] - coordinateDict[combi[1]]
133 )
134 if dist <= 0.0:
135 dist = self.minDist
136 edges.append(
137 (combi[0], combi[1], dist)
138 )
139 self.allEdges = pd.DataFrame(edges, columns=["from_id", "to_id", "dist"])
140 social_net = self.allEdges.query("from_id.isin(@first_gen) and to_id.isin(@first_gen)")
141 social_net_sample = social_net.sample(
142 round(self.density * (initPopN * (initPopN - 1) / 2)),
143 weights=sum(social_net.dist) / social_net.dist
144 )
145 social_net_sample.insert(0, "time", 0)
146 self.socialNet = social_net_sample
147 return social_net_sample
149 def growNodes(self, time, nEdges):
150 """Add nodes with weighted preferential attachment.
152 For a time step select all new agents. For each
153 agent query all potential edges to previously active
154 agents. Weight these edges with the degree of the
155 previous social network and the distances. From this
156 select N edges for each new agent.
157 Return the concatenated new edges
158 """
159 addedEdges = []
160 oldIDs = self.population.query("t < @time").id.unique()
161 for newID in self.population.query("t == @time").id.unique():
162 potEdges = self.allEdges.query(
163 "from_id == @newID or to_id == @newID"
164 ).query(
165 "from_id.isin(@oldIDs) or to_id.isin(@oldIDs)"
166 )
167 socialGraph = nx.from_pandas_edgelist(
168 self.socialNet,
169 source="from_id",
170 target="to_id"
171 )
173 degree = nx.degree(socialGraph)
174 degreeDict = dict(degree)
176 weightedDist = potEdges.apply(
177 lambda x: self._getWeighted(x, degreeDict), axis=1
178 )
179 potEdges.insert(
180 0, "weighted", weightedDist
181 )
183 potEdges = potEdges.dropna()
184 sample = potEdges.sample(
185 nEdges,
186 weights=sum(potEdges.weighted) / potEdges.weighted
187 )
188 sample.insert(0, "time", time)
189 addedEdges.append(sample)
190 return pd.concat(addedEdges)
192 def growEdges(self, time, density, densityGrowth):
193 """Add edges with weighted preferential attachement.
195 For a given time, select the current social network,
196 including newly added nodes. Add weights by current
197 degree and distances.
198 Sample a suffiecient number of edges to keep density
199 at a given level.
200 """
202 curSocEdges = self.socialNet.query("time <= time")
203 curSocNet = nx.from_pandas_edgelist(
204 curSocEdges, source='from_id', target='to_id'
205 )
206 edges2add = (
207 (
208 curSocNet.number_of_nodes() * (curSocNet.number_of_nodes() - 1) / 2
209 ) * (density + densityGrowth * time)
210 ) - curSocNet.number_of_edges()
211 from_degree = pd.DataFrame(
212 curSocNet.degree, columns=['from_id', 'from_degree']
213 )
214 to_degree = pd.DataFrame(
215 curSocNet.degree, columns=['to_id', 'to_degree']
216 )
217 potEdges = self.allEdges.merge(
218 from_degree, how='inner'
219 ).merge(
220 to_degree, how='inner'
221 )
222 weights = potEdges.from_degree * potEdges.to_degree * potEdges.dist
223 potEdges.insert(0, "weighted", weights)
224 try:
225 sample = potEdges.sample(
226 round(edges2add),
227 weights=sum(potEdges.weighted) / potEdges.weighted
228 )
229 sample = sample[["from_id", "to_id", "dist"]]
230 sample.insert(0, "time", time)
231 return sample
232 except ValueError:
233 print("Failed")
234 return potEdges
236 def run(self, nEdges=4, density=0.2, densityGrowth=0):
237 self.density = density
238 maxT = self.population.t.max()
239 _ = self.initSocNet()
240 for time in range(1, maxT + 1, 1):
241 newNodeEdges = self.growNodes(time, nEdges)
242 self.socialNet = pd.concat([self.socialNet, newNodeEdges])
243 newPrefEdges = self.growEdges(time, density, densityGrowth)
244 self.socialNet = pd.concat([self.socialNet, newPrefEdges])
245 return self.socialNet