Coverage for src/scicom/historicalletters/utils.py: 0%
35 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-15 13:26 +0200
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-15 13:26 +0200
1import random
2import pandas as pd
3import geopandas as gpd
4from shapely import contains, LineString
7def createData(population: int, populationDistribution: str):
8 """Create random coordinates of historically motivated choices.
10 The routine samples a population sample based on estimated
11 population density of that coordinate.
13 The original CSV dataset is retrieved from
14 https://doi.org/10.1371/journal.pone.0162678.s003
15 """
17 initial_population_choices = pd.read_csv(
18 populationDistribution,
19 encoding="latin1", index_col=0
20 )
22 # Calculate relative population ratio to estimated settlement area.
23 # This will correspont to the probabilities to draw an agent from
24 # these coordinates.
25 relPop = []
27 for idx ,row in initial_population_choices.iterrows():
28 relPop.append(
29 row["Area"]/row["Pop"]
30 )
32 initial_population_choices.insert(0, 'relPop', relPop)
34 # Four costal cities can not be considered, since the modern NUTS regions
35 # give zero overlap to their coordinates, leading to potential errors when
36 # agents move.
37 excludeCoastal = ['Great Yarmouth', 'Kingston-upon-Hull', 'Calais', 'Toulon']
38 initial_population_choices = initial_population_choices.query("~Settlement.isin(@excludeCoastal)")
40 loc_probabilities = []
41 loc_values = []
42 for idx, row in initial_population_choices.iterrows():
43 loc_probabilities.append(row["relPop"])
44 loc_values.append(
45 (row["longitude"], row["latitude"])
46 )
48 coordinates = random.choices(
49 loc_values,
50 loc_probabilities,
51 k=population
52 )
54 data = pd.DataFrame(
55 coordinates,
56 columns=["longitude", "latitude"]
57 )
59 data.insert(
60 0,
61 'unique_id',
62 [
63 "P" + str(x) for x in list(range(population))
64 ]
65 )
67 # Read the Geodataframe with EPSG:4326 projection.
68 geodf = gpd.GeoDataFrame(
69 data,
70 geometry=gpd.points_from_xy(data.longitude, data.latitude),
71 crs="EPSG:4326"
72 )
74 # Transform to EPSG:3857, since the NUTS shape files are in
75 # that projection.
76 geodf = geodf.to_crs("EPSG:3857")
78 return geodf
81def getRegion(geometry, model):
82 """Get region ID overlaping with input geometry.
84 Might e.g. fail if line of connection crosses international
85 waters, since there is no NUTS region assigned then.
86 """
87 regionID = [
88 x.unique_id for x in model.regions if contains(x.geometry, geometry)
89 ]
90 if regionID:
91 return regionID[0]
92 else:
93 raise IndexError(f"Can not find overlaping region to geometry {geometry}")
96def getPositionOnLine(start, target, returnType="point"):
97 """Interpolate movement along line between two given points.
99 The amount of moving from start to target is random.
100 """
101 segment = LineString([start, target])
102 newPos = segment.interpolate(random.uniform(0.0,1.0), normalized=True)
103 if returnType == "point":
104 return newPos
105 elif returnType == "coords":
106 return [x for x in newPos.coords][0]