Coverage for src/scicom/historicalletters/utils.py: 0%

35 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-15 13:26 +0200

1import random 

2import pandas as pd 

3import geopandas as gpd 

4from shapely import contains, LineString 

5 

6 

7def createData(population: int, populationDistribution: str): 

8 """Create random coordinates of historically motivated choices. 

9  

10 The routine samples a population sample based on estimated 

11 population density of that coordinate.  

12 

13 The original CSV dataset is retrieved from  

14 https://doi.org/10.1371/journal.pone.0162678.s003 

15 """ 

16 

17 initial_population_choices = pd.read_csv( 

18 populationDistribution, 

19 encoding="latin1", index_col=0 

20 ) 

21 

22 # Calculate relative population ratio to estimated settlement area. 

23 # This will correspont to the probabilities to draw an agent from 

24 # these coordinates. 

25 relPop = [] 

26 

27 for idx ,row in initial_population_choices.iterrows(): 

28 relPop.append( 

29 row["Area"]/row["Pop"] 

30 ) 

31 

32 initial_population_choices.insert(0, 'relPop', relPop) 

33 

34 # Four costal cities can not be considered, since the modern NUTS regions 

35 # give zero overlap to their coordinates, leading to potential errors when  

36 # agents move. 

37 excludeCoastal = ['Great Yarmouth', 'Kingston-upon-Hull', 'Calais', 'Toulon'] 

38 initial_population_choices = initial_population_choices.query("~Settlement.isin(@excludeCoastal)") 

39 

40 loc_probabilities = [] 

41 loc_values = [] 

42 for idx, row in initial_population_choices.iterrows(): 

43 loc_probabilities.append(row["relPop"]) 

44 loc_values.append( 

45 (row["longitude"], row["latitude"]) 

46 ) 

47 

48 coordinates = random.choices( 

49 loc_values, 

50 loc_probabilities, 

51 k=population 

52 ) 

53 

54 data = pd.DataFrame( 

55 coordinates, 

56 columns=["longitude", "latitude"] 

57 ) 

58 

59 data.insert( 

60 0, 

61 'unique_id', 

62 [ 

63 "P" + str(x) for x in list(range(population)) 

64 ] 

65 ) 

66 

67 # Read the Geodataframe with EPSG:4326 projection. 

68 geodf = gpd.GeoDataFrame( 

69 data, 

70 geometry=gpd.points_from_xy(data.longitude, data.latitude), 

71 crs="EPSG:4326" 

72 ) 

73 

74 # Transform to EPSG:3857, since the NUTS shape files are in  

75 # that projection. 

76 geodf = geodf.to_crs("EPSG:3857") 

77 

78 return geodf 

79 

80 

81def getRegion(geometry, model): 

82 """Get region ID overlaping with input geometry. 

83  

84 Might e.g. fail if line of connection crosses international 

85 waters, since there is no NUTS region assigned then. 

86 """ 

87 regionID = [ 

88 x.unique_id for x in model.regions if contains(x.geometry, geometry) 

89 ] 

90 if regionID: 

91 return regionID[0] 

92 else: 

93 raise IndexError(f"Can not find overlaping region to geometry {geometry}") 

94 

95 

96def getPositionOnLine(start, target, returnType="point"): 

97 """Interpolate movement along line between two given points. 

98  

99 The amount of moving from start to target is random. 

100 """ 

101 segment = LineString([start, target]) 

102 newPos = segment.interpolate(random.uniform(0.0,1.0), normalized=True) 

103 if returnType == "point": 

104 return newPos 

105 elif returnType == "coords": 

106 return [x for x in newPos.coords][0]