lyimo commited on
Commit
bf26f29
·
verified ·
1 Parent(s): 5c02e21

Update part1_data.py

Browse files
Files changed (1) hide show
  1. part1_data.py +156 -238
part1_data.py CHANGED
@@ -8,8 +8,6 @@ import requests
8
  from geopy.geocoders import Nominatim
9
  from geopy.exc import GeocoderTimedOut
10
  from scipy import stats
11
- import ee
12
- import geemap
13
 
14
  # Get API key from environment variable
15
  OPENWEATHER_API_KEY = os.getenv('OPENWEATHER_API_KEY', 'default_key')
@@ -21,7 +19,7 @@ class TobaccoAnalyzer:
21
  'temperature': {'min': 20, 'max': 30},
22
  'humidity': {'min': 60, 'max': 80},
23
  'rainfall': {'min': 500/365, 'max': 1200/365},
24
- 'ndvi': {'min': 0.3, 'max': 0.8} # Optimal NDVI range for tobacco
25
  }
26
  self.geolocator = Nominatim(user_agent="tobacco_analyzer")
27
  self.seasons = {
@@ -30,13 +28,13 @@ class TobaccoAnalyzer:
30
  7: 'Summer', 8: 'Summer', 9: 'Fall',
31
  10: 'Fall', 11: 'Fall', 12: 'Winter'
32
  }
33
- # Initialize Earth Engine
34
- try:
35
- ee.Initialize()
36
- self.ee_initialized = True
37
- except Exception as e:
38
- print(f"Error initializing Earth Engine: {e}")
39
- self.ee_initialized = False
40
 
41
  def geocode_location(self, location_name):
42
  """Convert location name to coordinates"""
@@ -46,12 +44,23 @@ class TobaccoAnalyzer:
46
  return {
47
  'lat': location.latitude,
48
  'lon': location.longitude,
49
- 'address': location.address
 
50
  }
51
  return None
52
  except GeocoderTimedOut:
53
  return None
54
 
 
 
 
 
 
 
 
 
 
 
55
  def get_weather_data(self, lat, lon, historical_days=90, forecast_days=90):
56
  """Get historical and forecast weather data"""
57
  historical_data = []
@@ -70,265 +79,174 @@ class TobaccoAnalyzer:
70
  'humidity': data['main']['humidity'],
71
  'rainfall': data.get('rain', {}).get('1h', 0) * 24,
72
  'type': 'historical',
73
- 'description': data['weather'][0]['description']
 
74
  }
75
  historical_data.append(weather_data)
76
  except Exception as e:
77
  print(f"Error fetching historical data: {e}")
78
 
79
  # Get forecast data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  forecast_data = []
81
  try:
 
82
  forecast_url = f"https://api.openweathermap.org/data/2.5/forecast?lat={lat}&lon={lon}&appid={self.api_key}&units=metric"
83
  response = requests.get(forecast_url)
84
  if response.status_code == 200:
85
  data = response.json()
86
  for item in data['list']:
 
87
  forecast = {
88
- 'date': datetime.fromtimestamp(item['dt']),
89
  'temperature': item['main']['temp'],
90
  'humidity': item['main']['humidity'],
91
  'rainfall': item.get('rain', {}).get('3h', 0) * 8,
92
  'type': 'forecast_5day',
93
- 'description': item['weather'][0]['description']
 
94
  }
95
  forecast_data.append(forecast)
96
 
97
- # Generate extended forecast
98
  last_date = max(d['date'] for d in forecast_data)
99
- historical_df = pd.DataFrame(historical_data)
 
 
100
 
101
- for day in range(1, forecast_days - 5):
102
- date = last_date + timedelta(days=day)
103
-
104
- if not historical_df.empty:
105
- temp_trend = stats.linregress(range(len(historical_df)), historical_df['temperature'])[0]
106
- humidity_trend = stats.linregress(range(len(historical_df)), historical_df['humidity'])[0]
107
- rainfall_trend = stats.linregress(range(len(historical_df)), historical_df['rainfall'])[0]
108
- else:
109
- temp_trend = humidity_trend = rainfall_trend = 0
110
-
111
- recent_temps = [d['temperature'] for d in forecast_data[-5:]]
112
- recent_humidity = [d['humidity'] for d in forecast_data[-5:]]
113
- recent_rainfall = [d['rainfall'] for d in forecast_data[-5:]]
114
-
115
- extended_forecast = {
116
- 'date': date,
117
- 'temperature': np.mean(recent_temps) + temp_trend * day,
118
- 'humidity': np.mean(recent_humidity) + humidity_trend * day,
119
- 'rainfall': np.mean(recent_rainfall) + rainfall_trend * day,
120
- 'type': 'forecast_extended',
121
- 'description': 'Extended Forecast'
122
- }
123
- forecast_data.append(extended_forecast)
124
-
125
  except Exception as e:
126
  print(f"Error fetching forecast data: {e}")
127
-
128
- # Combine and process all data
129
- all_data = pd.DataFrame(historical_data + forecast_data)
130
- all_data = all_data.sort_values('date')
131
-
132
- # Add analysis columns
133
- all_data['month'] = all_data['date'].dt.month
134
- all_data['season'] = all_data['month'].map(self.seasons)
135
 
136
- # Calculate rolling averages
137
- all_data['temp_7day_avg'] = all_data['temperature'].rolling(window=7, min_periods=1).mean()
138
- all_data['humidity_7day_avg'] = all_data['humidity'].rolling(window=7, min_periods=1).mean()
139
- all_data['rainfall_7day_avg'] = all_data['rainfall'].rolling(window=7, min_periods=1).mean()
140
-
141
- return all_data
142
-
143
- def get_ndvi_data(self, lat, lon, radius=2000):
144
- """Get NDVI data for location"""
145
- try:
146
- point = ee.Geometry.Point([lon, lat])
147
- area = point.buffer(radius)
148
-
149
- end_date = datetime.now()
150
- start_date = end_date - timedelta(days=90)
151
-
152
- s2 = ee.ImageCollection('COPERNICUS/S2_SR') \
153
- .filterDate(start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')) \
154
- .filterBounds(area) \
155
- .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))
156
-
157
- def addNDVI(image):
158
- ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI')
159
- return image.addBands(ndvi)
160
-
161
- s2_ndvi = s2.map(addNDVI)
162
- ndvi_image = s2_ndvi.select('NDVI').mean()
163
-
164
- stats = ndvi_image.reduceRegion(
165
- reducer=ee.Reducer.mean().combine(
166
- reducer2=ee.Reducer.stdDev(),
167
- sharedInputs=True
168
- ).combine(
169
- reducer2=ee.Reducer.minMax(),
170
- sharedInputs=True
171
- ),
172
- geometry=area,
173
- scale=10,
174
- maxPixels=1e9
175
- ).getInfo()
176
-
177
- return {
178
- 'image': ndvi_image,
179
- 'stats': stats,
180
- 'area': area
181
- }
182
-
183
- except Exception as e:
184
- print(f"Error fetching NDVI data: {e}")
185
- return None
186
-
187
- def analyze_location(self, location_name, historical_days=90, forecast_days=90):
188
- """Comprehensive location analysis including weather and NDVI"""
189
- try:
190
- location_info = self.geocode_location(location_name)
191
- if not location_info:
192
- raise ValueError(f"Could not find coordinates for location: {location_name}")
193
-
194
- lat = location_info['lat']
195
- lon = location_info['lon']
196
-
197
- weather_data = self.get_weather_data(lat, lon, historical_days, forecast_days)
198
- weather_analysis = self.analyze_trends(weather_data)
199
- weather_score = self.calculate_weather_score(weather_analysis)
200
-
201
- ndvi_data = None
202
- ndvi_score = None
203
- if self.ee_initialized:
204
- try:
205
- ndvi_data = self.get_ndvi_data(lat, lon)
206
- ndvi_score = self.calculate_ndvi_score(ndvi_data)
207
- except Exception as e:
208
- print(f"Error getting NDVI data: {e}")
209
 
210
- return {
211
- 'location': location_info,
212
- 'weather_data': weather_data,
213
- 'weather_analysis': weather_analysis,
214
- 'weather_score': weather_score,
215
- 'ndvi_data': ndvi_data,
216
- 'ndvi_score': ndvi_score,
217
- 'combined_score': self.calculate_combined_score(weather_score, ndvi_score)
218
- }
219
-
220
- except Exception as e:
221
- print(f"Error in location analysis: {e}")
222
- return None
223
-
224
- def analyze_trends(self, df):
225
- """Analyze weather trends and patterns"""
226
- historical = df[df['type'] == 'historical']
227
- forecast = df[df['type'].isin(['forecast_5day', 'forecast_extended'])]
228
 
229
- analysis = {
230
- 'historical': {
231
- 'temperature': {
232
- 'mean': historical['temperature'].mean(),
233
- 'std': historical['temperature'].std(),
234
- 'trend': stats.linregress(range(len(historical)), historical['temperature'])[0]
235
- },
236
- 'humidity': {
237
- 'mean': historical['humidity'].mean(),
238
- 'std': historical['humidity'].std(),
239
- 'trend': stats.linregress(range(len(historical)), historical['humidity'])[0]
240
- },
241
- 'rainfall': {
242
- 'mean': historical['rainfall'].mean(),
243
- 'std': historical['rainfall'].std(),
244
- 'trend': stats.linregress(range(len(historical)), historical['rainfall'])[0]
245
- }
246
- },
247
- 'forecast': {
248
- 'temperature': {
249
- 'mean': forecast['temperature'].mean(),
250
- 'std': forecast['temperature'].std(),
251
- },
252
- 'humidity': {
253
- 'mean': forecast['humidity'].mean(),
254
- 'std': forecast['humidity'].std(),
255
- },
256
- 'rainfall': {
257
- 'mean': forecast['rainfall'].mean(),
258
- 'std': forecast['rainfall'].std(),
259
  }
260
- }
261
- }
262
-
263
- return analysis
264
-
265
- def calculate_ndvi_score(self, ndvi_data):
266
- """Calculate a score based on NDVI data"""
267
- if not ndvi_data or 'stats' not in ndvi_data:
268
- return None
269
-
270
- stats = ndvi_data['stats']
271
- mean_ndvi = stats.get('NDVI_mean', 0)
272
-
273
- # Convert NDVI from -1:1 scale to 0:1 scale
274
- score = (mean_ndvi + 1) / 2
275
-
276
- # Adjust score based on optimal NDVI ranges
277
- if self.optimal_conditions['ndvi']['min'] <= mean_ndvi <= self.optimal_conditions['ndvi']['max']:
278
- score *= 1.2 # Bonus for optimal range
279
- elif mean_ndvi < 0:
280
- score *= 0.5 # Penalty for very low vegetation
281
-
282
- return min(1.0, max(0.0, score))
283
-
284
- def calculate_weather_score(self, weather_analysis):
285
- """Calculate weather suitability score"""
286
- if not weather_analysis:
287
- return None
288
-
289
- historical = weather_analysis['historical']
290
-
291
- temp_mean = historical['temperature']['mean']
292
- humidity_mean = historical['humidity']['mean']
293
- rainfall_mean = historical['rainfall']['mean']
294
 
295
- temp_score = self.calculate_range_score(
296
- temp_mean,
297
- self.optimal_conditions['temperature']['min'],
298
- self.optimal_conditions['temperature']['max']
299
- )
 
 
 
 
 
 
 
 
 
 
 
300
 
301
- humidity_score = self.calculate_range_score(
302
- humidity_mean,
303
- self.optimal_conditions['humidity']['min'],
304
- self.optimal_conditions['humidity']['max']
305
- )
306
 
307
- rainfall_score = self.calculate_range_score(
308
- rainfall_mean,
309
- self.optimal_conditions['rainfall']['min'],
310
- self.optimal_conditions['rainfall']['max']
311
- )
 
312
 
313
- return (temp_score * 0.4 + humidity_score * 0.3 + rainfall_score * 0.3)
314
-
315
- def calculate_range_score(self, value, min_val, max_val):
316
- """Calculate score based on optimal range"""
317
- if min_val <= value <= max_val:
318
- return 1.0
319
- elif value < min_val:
320
- return max(0, 1 - (min_val - value) / min_val)
321
- else:
322
- return max(0, 1 - (value - max_val) / max_val)
323
 
324
- def calculate_combined_score(self, weather_score, ndvi_score):
325
- """Calculate combined suitability score"""
326
- if weather_score is None:
327
- return None
328
- if ndvi_score is None:
329
- return weather_score
330
 
331
- weather_weight = 0.6
332
- ndvi_weight = 0.4
333
-
334
- return (weather_score * weather_weight) + (ndvi_score * ndvi_weight)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  from geopy.geocoders import Nominatim
9
  from geopy.exc import GeocoderTimedOut
10
  from scipy import stats
 
 
11
 
12
  # Get API key from environment variable
13
  OPENWEATHER_API_KEY = os.getenv('OPENWEATHER_API_KEY', 'default_key')
 
19
  'temperature': {'min': 20, 'max': 30},
20
  'humidity': {'min': 60, 'max': 80},
21
  'rainfall': {'min': 500/365, 'max': 1200/365},
22
+ 'ndvi': {'min': 0.3, 'max': 0.8} # Added NDVI optimal range for tobacco
23
  }
24
  self.geolocator = Nominatim(user_agent="tobacco_analyzer")
25
  self.seasons = {
 
28
  7: 'Summer', 8: 'Summer', 9: 'Fall',
29
  10: 'Fall', 11: 'Fall', 12: 'Winter'
30
  }
31
+ # Tanzania-specific growing seasons
32
+ self.tanzania_seasons = {
33
+ 1: 'Main', 2: 'Main', 3: 'Main', # Main growing season
34
+ 4: 'Late', 5: 'Late', 6: 'Dry',
35
+ 7: 'Dry', 8: 'Dry', 9: 'Early',
36
+ 10: 'Early', 11: 'Early', 12: 'Main'
37
+ }
38
 
39
  def geocode_location(self, location_name):
40
  """Convert location name to coordinates"""
 
44
  return {
45
  'lat': location.latitude,
46
  'lon': location.longitude,
47
+ 'address': location.address,
48
+ 'region': self.get_tanzania_region(location.address)
49
  }
50
  return None
51
  except GeocoderTimedOut:
52
  return None
53
 
54
+ def get_tanzania_region(self, address):
55
+ """Extract Tanzania region from address"""
56
+ if address:
57
+ address_parts = address.lower().split(',')
58
+ tanzania_regions = ['tabora', 'urambo', 'sikonge', 'nzega']
59
+ for part in address_parts:
60
+ if any(region in part.strip() for region in tanzania_regions):
61
+ return part.strip()
62
+ return None
63
+
64
  def get_weather_data(self, lat, lon, historical_days=90, forecast_days=90):
65
  """Get historical and forecast weather data"""
66
  historical_data = []
 
79
  'humidity': data['main']['humidity'],
80
  'rainfall': data.get('rain', {}).get('1h', 0) * 24,
81
  'type': 'historical',
82
+ 'description': data['weather'][0]['description'],
83
+ 'season': self.tanzania_seasons[date.month] # Use Tanzania-specific seasons
84
  }
85
  historical_data.append(weather_data)
86
  except Exception as e:
87
  print(f"Error fetching historical data: {e}")
88
 
89
  # Get forecast data
90
+ forecast_data = self.get_forecast_data(lat, lon, forecast_days)
91
+
92
+ # Combine and process all data
93
+ all_data = pd.DataFrame(historical_data + forecast_data)
94
+ if not all_data.empty:
95
+ all_data = all_data.sort_values('date')
96
+ all_data['month'] = all_data['date'].dt.month
97
+ all_data['season'] = all_data['month'].map(self.tanzania_seasons)
98
+
99
+ # Calculate rolling averages
100
+ all_data['temp_7day_avg'] = all_data['temperature'].rolling(window=7, min_periods=1).mean()
101
+ all_data['humidity_7day_avg'] = all_data['humidity'].rolling(window=7, min_periods=1).mean()
102
+ all_data['rainfall_7day_avg'] = all_data['rainfall'].rolling(window=7, min_periods=1).mean()
103
+
104
+ # Add vegetation index estimates based on weather conditions
105
+ all_data['estimated_ndvi'] = self.estimate_ndvi(all_data)
106
+
107
+ return all_data
108
+
109
+ def get_forecast_data(self, lat, lon, forecast_days):
110
+ """Get and process forecast data"""
111
  forecast_data = []
112
  try:
113
+ # Get 5-day forecast
114
  forecast_url = f"https://api.openweathermap.org/data/2.5/forecast?lat={lat}&lon={lon}&appid={self.api_key}&units=metric"
115
  response = requests.get(forecast_url)
116
  if response.status_code == 200:
117
  data = response.json()
118
  for item in data['list']:
119
+ date = datetime.fromtimestamp(item['dt'])
120
  forecast = {
121
+ 'date': date,
122
  'temperature': item['main']['temp'],
123
  'humidity': item['main']['humidity'],
124
  'rainfall': item.get('rain', {}).get('3h', 0) * 8,
125
  'type': 'forecast_5day',
126
+ 'description': item['weather'][0]['description'],
127
+ 'season': self.tanzania_seasons[date.month]
128
  }
129
  forecast_data.append(forecast)
130
 
131
+ # Generate extended forecast using historical trends
132
  last_date = max(d['date'] for d in forecast_data)
133
+ forecast_data.extend(
134
+ self.generate_extended_forecast(forecast_data, last_date, forecast_days)
135
+ )
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  except Exception as e:
138
  print(f"Error fetching forecast data: {e}")
 
 
 
 
 
 
 
 
139
 
140
+ return forecast_data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
+ def generate_extended_forecast(self, forecast_data, last_date, forecast_days):
143
+ """Generate extended forecast data"""
144
+ extended_data = []
145
+ recent_data = pd.DataFrame(forecast_data[-5:]) # Use last 5 days for trends
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
+ if not recent_data.empty:
148
+ temp_trend = stats.linregress(range(len(recent_data)), recent_data['temperature'])[0]
149
+ humidity_trend = stats.linregress(range(len(recent_data)), recent_data['humidity'])[0]
150
+ rainfall_trend = stats.linregress(range(len(recent_data)), recent_data['rainfall'])[0]
151
+
152
+ for day in range(1, forecast_days - 5):
153
+ date = last_date + timedelta(days=day)
154
+ extended_forecast = {
155
+ 'date': date,
156
+ 'temperature': recent_data['temperature'].mean() + temp_trend * day,
157
+ 'humidity': recent_data['humidity'].mean() + humidity_trend * day,
158
+ 'rainfall': recent_data['rainfall'].mean() + rainfall_trend * day,
159
+ 'type': 'forecast_extended',
160
+ 'description': 'Extended Forecast',
161
+ 'season': self.tanzania_seasons[date.month]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  }
163
+ extended_data.append(extended_forecast)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
+ return extended_data
166
+
167
+ def estimate_ndvi(self, weather_data):
168
+ """Estimate NDVI based on weather conditions"""
169
+ # Create a baseline NDVI estimate using weather parameters
170
+ normalized_temp = (weather_data['temperature'] - 15) / (30 - 15)
171
+ normalized_humidity = (weather_data['humidity'] - 50) / (80 - 50)
172
+ normalized_rainfall = weather_data['rainfall'] / 5
173
+
174
+ # Season adjustment factors for Tanzania
175
+ season_factors = {
176
+ 'Main': 1.0, # Best growing season
177
+ 'Early': 0.8, # Early growing season
178
+ 'Late': 0.7, # Late growing season
179
+ 'Dry': 0.5 # Dry season
180
+ }
181
 
182
+ # Apply season adjustments
183
+ season_multiplier = weather_data['season'].map(season_factors)
 
 
 
184
 
185
+ # Combine factors to estimate NDVI
186
+ estimated_ndvi = (
187
+ 0.4 * normalized_temp +
188
+ 0.3 * normalized_humidity +
189
+ 0.3 * normalized_rainfall
190
+ ) * season_multiplier
191
 
192
+ # Clip values to realistic NDVI range (-1 to 1)
193
+ return np.clip(estimated_ndvi, -1, 1)
 
 
 
 
 
 
 
 
194
 
195
+ def analyze_trends(self, df):
196
+ """Analyze weather trends and patterns"""
197
+ try:
198
+ historical = df[df['type'] == 'historical']
199
+ forecast = df[df['type'].isin(['forecast_5day', 'forecast_extended'])]
 
200
 
201
+ if historical.empty:
202
+ return None
203
+
204
+ analysis = {
205
+ 'historical': {
206
+ 'temperature': {
207
+ 'mean': historical['temperature'].mean(),
208
+ 'std': historical['temperature'].std(),
209
+ 'trend': stats.linregress(range(len(historical)), historical['temperature'])[0]
210
+ },
211
+ 'humidity': {
212
+ 'mean': historical['humidity'].mean(),
213
+ 'std': historical['humidity'].std(),
214
+ 'trend': stats.linregress(range(len(historical)), historical['humidity'])[0]
215
+ },
216
+ 'rainfall': {
217
+ 'mean': historical['rainfall'].mean(),
218
+ 'std': historical['rainfall'].std(),
219
+ 'trend': stats.linregress(range(len(historical)), historical['rainfall'])[0]
220
+ },
221
+ 'ndvi': {
222
+ 'mean': historical['estimated_ndvi'].mean(),
223
+ 'std': historical['estimated_ndvi'].std(),
224
+ 'trend': stats.linregress(range(len(historical)), historical['estimated_ndvi'])[0]
225
+ }
226
+ }
227
+ }
228
+
229
+ if not forecast.empty:
230
+ analysis['forecast'] = {
231
+ 'temperature': {
232
+ 'mean': forecast['temperature'].mean(),
233
+ 'std': forecast['temperature'].std(),
234
+ },
235
+ 'humidity': {
236
+ 'mean': forecast['humidity'].mean(),
237
+ 'std': forecast['humidity'].std(),
238
+ },
239
+ 'rainfall': {
240
+ 'mean': forecast['rainfall'].mean(),
241
+ 'std': forecast['rainfall'].std(),
242
+ },
243
+ 'ndvi': {
244
+ 'mean': forecast['estimated_ndvi'].mean(),
245
+ 'std': forecast['estimated_ndvi'].std(),
246
+ }
247
+ }
248
+
249
+ return analysis
250
+ except Exception as e:
251
+ print(f"Error in trend analysis: {e}")
252
+ return None