Coverage for src/scicom/knowledgespread/utils.py: 0%

128 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-15 13:26 +0200

1"""Utility functions for initial condition generation.""" 

2import numpy as np 

3import pandas as pd 

4from itertools import combinations 

5import networkx as nx 

6from tqdm import tqdm 

7 

8 

9def epistemicRange(baseRange, age): 

10 """Returns the age dependent search radius for epistemic discovery.""" 

11 if age > 0: 

12 return baseRange / age # Refine e.g. in direction of ageFuntion 

13 else: 

14 return baseRange 

15 

16 

17def ageFunction(agent, a, b, c, radius): 

18 """Returns an age dependent radius. 

19  

20 Bell-shaped function with center at c, slop and width 

21 of plateau defined by a, b. 

22 

23 Can be used to show age-dependent activity of agents.  

24 """ 

25 if agent.age == 0: 

26 return 0 

27 else: 

28 return radius/(1 + abs((agent.age - c)/(a))**(2*b)) 

29 

30 

31class GenerateInitalPopulation(): 

32 """Generate sets of initial conditions,""" 

33 

34 def __init__( 

35 self, 

36 numScientists, 

37 timesteps 

38 ): 

39 self.N = numScientists 

40 self.Tstep = timesteps 

41 

42 def epistemicFunc(self, fc, x, y, beta=8): 

43 """Epistemic space initial sampling.""" 

44 if fc == "complex": 

45 return np.sin(beta*x) + np.cos(beta*y) 

46 elif fc == "central": 

47 return np.exp(-(x**2 + y**2)) 

48 elif fc == "polarized": 

49 return x * np.exp(- 3 * (x ** 2 + y ** 2)) 

50 

51 def timeFunc(self, dataframe, step, fcT="saturate", slope=5, base=5): 

52 """Population growth function.""" 

53 if fcT == "saturate": 

54 n2step = round(step * (1 - step / self.Tstep)) 

55 elif fcT == "linear": 

56 n2step = slope 

57 elif fcT == "exponential": 

58 n2step = base ** step 

59 try: 

60 dft = dataframe.sample( 

61 n2step, 

62 weights=abs(dataframe.z) 

63 ) 

64 return dft 

65 except ValueError("Your sample size is larger then the data. Adjust exponential time."): 

66 raise 

67 

68 def sample(self, fcE="complex", fcT="saturate", beta=8, slope=5, base=5): 

69 """Generate the sample population and add activation time.""" 

70 dta = self._fullDist(fcE=fcE, beta=beta) 

71 initial_population = dta.sample(self.N, weights=abs(dta.z)) 

72 initial_population['t'] = 0 

73 stepDF = [] 

74 for step in range(1, self.Tstep + 1): 

75 temp = self.timeFunc(dta, step, fcT=fcT, slope=slope, base=base) 

76 temp['t'] = step 

77 stepDF.append(temp) 

78 joined = pd.concat(stepDF) 

79 initial_population = pd.concat( 

80 [initial_population, joined] 

81 ) 

82 initial_population = initial_population.reset_index(drop=True) 

83 initial_population['id'] = initial_population.index + 1 

84 return initial_population 

85 

86 def _fullDist(self, fcE, beta): 

87 """Full distribution to sample from.""" 

88 x = np.linspace(-1, 1, 10000) 

89 y = np.linspace(-1, 1, 10000) 

90 X, Y = np.meshgrid(x, y) 

91 Z = self.epistemicFunc(fcE, X, Y, beta) 

92 dta = pd.DataFrame( 

93 {'x': X.flatten(), 'y': Y.flatten(), 'z': Z.flatten()} 

94 ) 

95 return dta 

96 

97 

98class GenerateSocNet(): 

99 

100 def __init__( 

101 self, 

102 dataframe: pd.DataFrame, 

103 minDist: float = 0.0001 

104 ): 

105 self.population = dataframe 

106 self.density = '' 

107 self.allEdges = '' 

108 self.socialNet = '' 

109 self.minDist = minDist 

110 

111 def _getWeighted(self, row, degree): 

112 try: 

113 return degree[int(row["from_id"])] * row["dist"] 

114 except KeyError: 

115 return None 

116 

117 def initSocNet(self): 

118 """Generates initial social network sample from population. 

119 

120 The dataframe input should contain the colum names: id, x, y, z, t  

121 Returns social network sample with from_id, to_id, dist, time 

122 """ 

123 first_gen = self.population.query("t == 0").id.unique() 

124 initPopN = len(first_gen) 

125 coordinateDict = { 

126 row["id"]: np.array([row["x"], row["y"], row["z"]]) for ix, row in self.population.iterrows() 

127 } 

128 idCombinations = [tup for tup in combinations(self.population.id.unique(), 2)] 

129 edges = [] 

130 for combi in idCombinations: 

131 dist = np.linalg.norm( 

132 coordinateDict[combi[0]] - coordinateDict[combi[1]] 

133 ) 

134 if dist <= 0.0: 

135 dist = self.minDist 

136 edges.append( 

137 (combi[0], combi[1], dist) 

138 ) 

139 self.allEdges = pd.DataFrame(edges, columns=["from_id", "to_id", "dist"]) 

140 social_net = self.allEdges.query("from_id.isin(@first_gen) and to_id.isin(@first_gen)") 

141 social_net_sample = social_net.sample( 

142 round(self.density * (initPopN * (initPopN - 1) / 2)), 

143 weights=sum(social_net.dist) / social_net.dist 

144 ) 

145 social_net_sample.insert(0, "time", 0) 

146 self.socialNet = social_net_sample 

147 return social_net_sample 

148 

149 def growNodes(self, time, nEdges): 

150 """Add nodes with weighted preferential attachment. 

151  

152 For a time step select all new agents. For each 

153 agent query all potential edges to previously active 

154 agents. Weight these edges with the degree of the  

155 previous social network and the distances. From this 

156 select N edges for each new agent. 

157 Return the concatenated new edges 

158 """ 

159 addedEdges = [] 

160 oldIDs = self.population.query("t < @time").id.unique() 

161 for newID in self.population.query("t == @time").id.unique(): 

162 potEdges = self.allEdges.query( 

163 "from_id == @newID or to_id == @newID" 

164 ).query( 

165 "from_id.isin(@oldIDs) or to_id.isin(@oldIDs)" 

166 ) 

167 socialGraph = nx.from_pandas_edgelist( 

168 self.socialNet, 

169 source="from_id", 

170 target="to_id" 

171 ) 

172 

173 degree = nx.degree(socialGraph) 

174 degreeDict = dict(degree) 

175 

176 weightedDist = potEdges.apply( 

177 lambda x: self._getWeighted(x, degreeDict), axis=1 

178 ) 

179 potEdges.insert( 

180 0, "weighted", weightedDist 

181 ) 

182 

183 potEdges = potEdges.dropna() 

184 sample = potEdges.sample( 

185 nEdges, 

186 weights=sum(potEdges.weighted) / potEdges.weighted 

187 ) 

188 sample.insert(0, "time", time) 

189 addedEdges.append(sample) 

190 return pd.concat(addedEdges) 

191 

192 def growEdges(self, time, density, densityGrowth): 

193 """Add edges with weighted preferential attachement. 

194  

195 For a given time, select the current social network, 

196 including newly added nodes. Add weights by current  

197 degree and distances.  

198 Sample a suffiecient number of edges to keep density 

199 at a given level.  

200 """ 

201 

202 curSocEdges = self.socialNet.query("time <= time") 

203 curSocNet = nx.from_pandas_edgelist( 

204 curSocEdges, source='from_id', target='to_id' 

205 ) 

206 edges2add = ( 

207 ( 

208 curSocNet.number_of_nodes() * (curSocNet.number_of_nodes() - 1) / 2 

209 ) * (density + densityGrowth * time) 

210 ) - curSocNet.number_of_edges() 

211 from_degree = pd.DataFrame( 

212 curSocNet.degree, columns=['from_id', 'from_degree'] 

213 ) 

214 to_degree = pd.DataFrame( 

215 curSocNet.degree, columns=['to_id', 'to_degree'] 

216 ) 

217 potEdges = self.allEdges.merge( 

218 from_degree, how='inner' 

219 ).merge( 

220 to_degree, how='inner' 

221 ) 

222 weights = potEdges.from_degree * potEdges.to_degree * potEdges.dist 

223 potEdges.insert(0, "weighted", weights) 

224 try: 

225 sample = potEdges.sample( 

226 round(edges2add), 

227 weights=sum(potEdges.weighted) / potEdges.weighted 

228 ) 

229 sample = sample[["from_id", "to_id", "dist"]] 

230 sample.insert(0, "time", time) 

231 return sample 

232 except ValueError: 

233 print("Failed") 

234 return potEdges 

235 

236 def run(self, nEdges=4, density=0.2, densityGrowth=0): 

237 self.density = density 

238 maxT = self.population.t.max() 

239 _ = self.initSocNet() 

240 for time in range(1, maxT + 1, 1): 

241 newNodeEdges = self.growNodes(time, nEdges) 

242 self.socialNet = pd.concat([self.socialNet, newNodeEdges]) 

243 newPrefEdges = self.growEdges(time, density, densityGrowth) 

244 self.socialNet = pd.concat([self.socialNet, newPrefEdges]) 

245 return self.socialNet 

246