lyimo commited on
Commit
9275c29
·
verified ·
1 Parent(s): 4f47d5b

Update part1_data.py

Browse files
Files changed (1) hide show
  1. part1_data.py +88 -148
part1_data.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  import os
2
  import numpy as np
3
  import pandas as pd
@@ -85,72 +88,20 @@ class TobaccoAnalyzer:
85
  response = requests.get(forecast_url)
86
  if response.status_code == 200:
87
  data = response.json()
88
- daily_forecasts = {}
89
-
90
  for item in data['list']:
91
  date = datetime.fromtimestamp(item['dt'])
92
- day_key = date.date()
93
-
94
- if day_key not in daily_forecasts:
95
- daily_forecasts[day_key] = {
96
- 'temps': [],
97
- 'humidity': [],
98
- 'rainfall': 0,
99
- 'descriptions': [],
100
- 'temp_mins': [],
101
- 'temp_maxs': []
102
- }
103
-
104
- daily_forecasts[day_key]['temps'].append(float(item['main']['temp']))
105
- daily_forecasts[day_key]['humidity'].append(float(item['main']['humidity']))
106
- daily_forecasts[day_key]['rainfall'] += float(item.get('rain', {}).get('3h', 0))
107
- daily_forecasts[day_key]['descriptions'].append(item['weather'][0]['description'])
108
- daily_forecasts[day_key]['temp_mins'].append(float(item['main']['temp_min']))
109
- daily_forecasts[day_key]['temp_maxs'].append(float(item['main']['temp_max']))
110
-
111
- # Create daily forecast entries
112
- for day_key, day_data in daily_forecasts.items():
113
  forecast = {
114
- 'date': datetime.combine(day_key, datetime.min.time()),
115
- 'temperature': np.mean(day_data['temps']),
116
- 'temp_min': min(day_data['temp_mins']),
117
- 'temp_max': max(day_data['temp_maxs']),
118
- 'humidity': np.mean(day_data['humidity']),
119
- 'rainfall': day_data['rainfall'],
120
  'type': 'forecast',
121
- 'description': max(day_data['descriptions'], key=day_data['descriptions'].count)
 
 
122
  }
123
  forecast_data.append(forecast)
124
 
125
- # Generate extended forecast using trends
126
- if forecast_data:
127
- last_date = max(d['date'] for d in forecast_data)
128
- temp_trend = 0
129
- humidity_trend = 0
130
- rainfall_trend = 0
131
-
132
- if len(historical_data) > 1:
133
- historical_df = pd.DataFrame(historical_data)
134
- temp_trend = stats.linregress(range(len(historical_df)), historical_df['temperature'])[0]
135
- humidity_trend = stats.linregress(range(len(historical_df)), historical_df['humidity'])[0]
136
- rainfall_trend = stats.linregress(range(len(historical_df)), historical_df['rainfall'])[0]
137
-
138
- for day in range(1, forecast_days - len(forecast_data)):
139
- base_forecast = forecast_data[-1]
140
- date = last_date + timedelta(days=day)
141
-
142
- extended_forecast = {
143
- 'date': date,
144
- 'temperature': base_forecast['temperature'] + temp_trend * day,
145
- 'temp_min': base_forecast['temp_min'] + temp_trend * day,
146
- 'temp_max': base_forecast['temp_max'] + temp_trend * day,
147
- 'humidity': base_forecast['humidity'] + humidity_trend * day,
148
- 'rainfall': max(0, base_forecast['rainfall'] + rainfall_trend * day),
149
- 'type': 'forecast_extended',
150
- 'description': 'Extended Forecast'
151
- }
152
- forecast_data.append(extended_forecast)
153
-
154
  except Exception as e:
155
  print(f"Error fetching forecast data: {e}")
156
 
@@ -158,27 +109,22 @@ class TobaccoAnalyzer:
158
  all_data = pd.DataFrame(historical_data + forecast_data)
159
 
160
  if not all_data.empty:
161
- # Ensure numeric types
162
- numeric_columns = ['temperature', 'humidity', 'rainfall', 'temp_min', 'temp_max']
163
- for col in numeric_columns:
164
- all_data[col] = pd.to_numeric(all_data[col], errors='coerce')
165
-
166
  # Sort by date
167
  all_data = all_data.sort_values('date')
168
 
169
- # Calculate temperature range
170
- all_data['temp_range'] = all_data['temp_max'] - all_data['temp_min']
171
-
172
  # Add analysis columns
173
  all_data['month'] = all_data['date'].dt.month
174
  all_data['season'] = all_data['month'].map(self.tanzania_seasons)
175
 
 
 
 
176
  # Calculate rolling averages
177
  all_data['temp_7day_avg'] = all_data['temperature'].rolling(window=7, min_periods=1).mean()
178
  all_data['humidity_7day_avg'] = all_data['humidity'].rolling(window=7, min_periods=1).mean()
179
  all_data['rainfall_7day_avg'] = all_data['rainfall'].rolling(window=7, min_periods=1).mean()
180
 
181
- # Calculate daily suitability and NDVI
182
  all_data['daily_suitability'] = self.calculate_daily_suitability(all_data)
183
  all_data['estimated_ndvi'] = self.estimate_ndvi(all_data)
184
 
@@ -186,6 +132,79 @@ class TobaccoAnalyzer:
186
 
187
  return pd.DataFrame()
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  def calculate_daily_suitability(self, df):
190
  """Calculate daily growing suitability"""
191
  try:
@@ -249,83 +268,4 @@ class TobaccoAnalyzer:
249
  except Exception as e:
250
  print(f"Error estimating NDVI: {e}")
251
  return pd.Series(0, index=weather_data.index)
252
-
253
- def analyze_trends(self, df):
254
- """Analyze weather trends and patterns with improved calculations"""
255
- try:
256
- historical = df[df['type'] == 'historical']
257
- forecast = df[df['type'].isin(['forecast', 'forecast_extended'])]
258
-
259
- if len(historical) < 2: # Need at least 2 points for trend
260
- return None
261
-
262
- # Create time index for proper trend calculation
263
- historical['days'] = (historical['date'] - historical['date'].min()).dt.total_seconds() / (24*60*60)
264
-
265
- # Calculate trends using proper time series analysis
266
- temp_trend = stats.linregress(historical['days'], historical['temperature'])
267
- humidity_trend = stats.linregress(historical['days'], historical['humidity'])
268
- rainfall_trend = stats.linregress(historical['days'], historical['rainfall'])
269
- ndvi_trend = stats.linregress(historical['days'], historical['estimated_ndvi'])
270
-
271
- analysis = {
272
- 'historical': {
273
- 'temperature': {
274
- 'mean': historical['temperature'].mean(),
275
- 'std': historical['temperature'].std(),
276
- 'trend': temp_trend.slope, # Change per day
277
- 'trend_r2': temp_trend.rvalue**2,
278
- 'recent_change': historical['temperature'].iloc[-1] - historical['temperature'].iloc[0]
279
- },
280
- 'humidity': {
281
- 'mean': historical['humidity'].mean(),
282
- 'std': historical['humidity'].std(),
283
- 'trend': humidity_trend.slope,
284
- 'trend_r2': humidity_trend.rvalue**2,
285
- 'recent_change': historical['humidity'].iloc[-1] - historical['humidity'].iloc[0]
286
- },
287
- 'rainfall': {
288
- 'mean': historical['rainfall'].mean(),
289
- 'std': historical['rainfall'].std(),
290
- 'trend': rainfall_trend.slope,
291
- 'trend_r2': rainfall_trend.rvalue**2,
292
- 'recent_change': historical['rainfall'].iloc[-1] - historical['rainfall'].iloc[0],
293
- 'rainy_days': (historical['rainfall'] > 0.1).sum() # Count days with significant rain
294
- },
295
- 'ndvi': {
296
- 'mean': historical['estimated_ndvi'].mean(),
297
- 'std': historical['estimated_ndvi'].std(),
298
- 'trend': ndvi_trend.slope,
299
- 'trend_r2': ndvi_trend.rvalue**2,
300
- 'recent_change': historical['estimated_ndvi'].iloc[-1] - historical['estimated_ndvi'].iloc[0]
301
- }
302
- }
303
- }
304
-
305
- if not forecast.empty:
306
- analysis['forecast'] = {
307
- 'temperature': {
308
- 'mean': forecast['temperature'].mean(),
309
- 'std': forecast['temperature'].std(),
310
- 'range': forecast['temp_max'].mean() - forecast['temp_min'].mean()
311
- },
312
- 'humidity': {
313
- 'mean': forecast['humidity'].mean(),
314
- 'std': forecast['humidity'].std()
315
- },
316
- 'rainfall': {
317
- 'mean': forecast['rainfall'].mean(),
318
- 'std': forecast['rainfall'].std(),
319
- 'rainy_days_expected': (forecast['rainfall'] > 0.1).sum()
320
- },
321
- 'ndvi': {
322
- 'mean': forecast['estimated_ndvi'].mean(),
323
- 'std': forecast['estimated_ndvi'].std()
324
- }
325
- }
326
-
327
- return analysis
328
-
329
- except Exception as e:
330
- print(f"Error in trend analysis: {e}")
331
- return None
 
1
+ ```python
2
+ # part1_data.py
3
+
4
  import os
5
  import numpy as np
6
  import pandas as pd
 
88
  response = requests.get(forecast_url)
89
  if response.status_code == 200:
90
  data = response.json()
 
 
91
  for item in data['list']:
92
  date = datetime.fromtimestamp(item['dt'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  forecast = {
94
+ 'date': date,
95
+ 'temperature': float(item['main']['temp']),
96
+ 'humidity': float(item['main']['humidity']),
97
+ 'rainfall': float(item.get('rain', {}).get('3h', 0)) * 8,
 
 
98
  'type': 'forecast',
99
+ 'description': item['weather'][0]['description'],
100
+ 'temp_min': float(item['main']['temp_min']),
101
+ 'temp_max': float(item['main']['temp_max'])
102
  }
103
  forecast_data.append(forecast)
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  except Exception as e:
106
  print(f"Error fetching forecast data: {e}")
107
 
 
109
  all_data = pd.DataFrame(historical_data + forecast_data)
110
 
111
  if not all_data.empty:
 
 
 
 
 
112
  # Sort by date
113
  all_data = all_data.sort_values('date')
114
 
 
 
 
115
  # Add analysis columns
116
  all_data['month'] = all_data['date'].dt.month
117
  all_data['season'] = all_data['month'].map(self.tanzania_seasons)
118
 
119
+ # Calculate temperature range
120
+ all_data['temp_range'] = all_data['temp_max'] - all_data['temp_min']
121
+
122
  # Calculate rolling averages
123
  all_data['temp_7day_avg'] = all_data['temperature'].rolling(window=7, min_periods=1).mean()
124
  all_data['humidity_7day_avg'] = all_data['humidity'].rolling(window=7, min_periods=1).mean()
125
  all_data['rainfall_7day_avg'] = all_data['rainfall'].rolling(window=7, min_periods=1).mean()
126
 
127
+ # Calculate suitability and NDVI
128
  all_data['daily_suitability'] = self.calculate_daily_suitability(all_data)
129
  all_data['estimated_ndvi'] = self.estimate_ndvi(all_data)
130
 
 
132
 
133
  return pd.DataFrame()
134
 
135
+ def analyze_trends(self, df):
136
+ """Analyze weather trends and patterns"""
137
+ try:
138
+ historical = df[df['type'] == 'historical']
139
+ forecast = df[df['type'].isin(['forecast', 'forecast_extended'])]
140
+
141
+ if len(historical) < 2:
142
+ return None
143
+
144
+ # Create time index for trend calculation
145
+ historical['days'] = (historical['date'] - historical['date'].min()).dt.total_seconds() / (24*60*60)
146
+
147
+ # Calculate trends
148
+ temp_trend = stats.linregress(historical['days'], historical['temperature'])
149
+ humidity_trend = stats.linregress(historical['days'], historical['humidity'])
150
+ rainfall_trend = stats.linregress(historical['days'], historical['rainfall'])
151
+ ndvi_trend = stats.linregress(historical['days'], historical['estimated_ndvi'])
152
+
153
+ analysis = {
154
+ 'historical': {
155
+ 'temperature': {
156
+ 'mean': historical['temperature'].mean(),
157
+ 'std': historical['temperature'].std(),
158
+ 'trend': temp_trend.slope,
159
+ 'trend_r2': temp_trend.rvalue**2
160
+ },
161
+ 'humidity': {
162
+ 'mean': historical['humidity'].mean(),
163
+ 'std': historical['humidity'].std(),
164
+ 'trend': humidity_trend.slope,
165
+ 'trend_r2': humidity_trend.rvalue**2
166
+ },
167
+ 'rainfall': {
168
+ 'mean': historical['rainfall'].mean(),
169
+ 'std': historical['rainfall'].std(),
170
+ 'trend': rainfall_trend.slope,
171
+ 'trend_r2': rainfall_trend.rvalue**2
172
+ },
173
+ 'ndvi': {
174
+ 'mean': historical['estimated_ndvi'].mean(),
175
+ 'std': historical['estimated_ndvi'].std(),
176
+ 'trend': ndvi_trend.slope,
177
+ 'trend_r2': ndvi_trend.rvalue**2
178
+ }
179
+ }
180
+ }
181
+
182
+ if not forecast.empty:
183
+ analysis['forecast'] = {
184
+ 'temperature': {
185
+ 'mean': forecast['temperature'].mean(),
186
+ 'std': forecast['temperature'].std()
187
+ },
188
+ 'humidity': {
189
+ 'mean': forecast['humidity'].mean(),
190
+ 'std': forecast['humidity'].std()
191
+ },
192
+ 'rainfall': {
193
+ 'mean': forecast['rainfall'].mean(),
194
+ 'std': forecast['rainfall'].std()
195
+ },
196
+ 'ndvi': {
197
+ 'mean': forecast['estimated_ndvi'].mean(),
198
+ 'std': forecast['estimated_ndvi'].std()
199
+ }
200
+ }
201
+
202
+ return analysis
203
+
204
+ except Exception as e:
205
+ print(f"Error in trend analysis: {e}")
206
+ return None
207
+
208
  def calculate_daily_suitability(self, df):
209
  """Calculate daily growing suitability"""
210
  try:
 
268
  except Exception as e:
269
  print(f"Error estimating NDVI: {e}")
270
  return pd.Series(0, index=weather_data.index)
271
+ ```