lyimo commited on
Commit
f83de07
·
verified ·
1 Parent(s): 772a49a

Update part1_data.py

Browse files
Files changed (1) hide show
  1. part1_data.py +94 -130
part1_data.py CHANGED
@@ -53,173 +53,137 @@ class TobaccoAnalyzer:
53
  return None
54
 
55
  def get_weather_data(self, lat, lon, historical_days=90, forecast_days=90):
56
- """Get historical and forecast weather data with pattern variations"""
57
  historical_data = []
58
 
59
- # Get current weather and recent history
60
- current_url = f"https://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={self.api_key}&units=metric"
61
- try:
62
- response = requests.get(current_url)
63
- if response.status_code == 200:
64
- current = response.json()
65
- base_temp = current['main']['temp']
66
- base_humidity = current['main']['humidity']
67
- base_rainfall = current.get('rain', {}).get('1h', 0) * 24
68
- else:
69
- base_temp = 25
70
- base_humidity = 70
71
- base_rainfall = 0
72
- except Exception as e:
73
- print(f"Error fetching current weather: {e}")
74
- base_temp = 25
75
- base_humidity = 70
76
- base_rainfall = 0
77
-
78
- # Generate historical data with patterns
79
  for day in range(historical_days):
80
  date = datetime.now() - timedelta(days=day)
81
-
82
- # Add daily patterns
83
- for hour in range(24):
84
- # Temperature pattern: Daily cycle with random variations
85
- hour_temp = base_temp + \
86
- 3 * np.sin((hour - 6) * np.pi / 12) + \
87
- np.random.normal(0, 1)
88
-
89
- # Humidity pattern: Inverse to temperature
90
- hour_humidity = base_humidity - \
91
- 10 * np.sin((hour - 6) * np.pi / 12) + \
92
- np.random.normal(0, 5)
93
-
94
- # Rainfall pattern: More likely in afternoon
95
- rain_chance = 0.1 + 0.2 * np.sin((hour - 12) * np.pi / 12)
96
- hour_rainfall = np.random.exponential(base_rainfall) if np.random.random() < rain_chance else 0
97
-
98
- weather_data = {
99
- 'date': date + timedelta(hours=hour),
100
- 'temperature': hour_temp,
101
- 'humidity': np.clip(hour_humidity, 0, 100),
102
- 'rainfall': hour_rainfall,
103
- 'type': 'historical',
104
- 'description': self.get_weather_description(hour_temp, hour_humidity, hour_rainfall),
105
- 'temp_min': hour_temp - np.random.uniform(0, 2),
106
- 'temp_max': hour_temp + np.random.uniform(0, 2),
107
- 'wind_speed': np.random.normal(5, 2),
108
- 'clouds': np.random.normal(50, 20)
109
- }
110
- historical_data.append(weather_data)
111
 
112
- # Get 5-day forecast
113
  forecast_data = []
114
- forecast_url = f"https://api.openweathermap.org/data/2.5/forecast?lat={lat}&lon={lon}&appid={self.api_key}&units=metric"
115
  try:
 
116
  response = requests.get(forecast_url)
117
  if response.status_code == 200:
118
  data = response.json()
 
 
 
119
  for item in data['list']:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  forecast = {
121
- 'date': datetime.fromtimestamp(item['dt']),
122
- 'temperature': item['main']['temp'],
123
- 'humidity': item['main']['humidity'],
124
- 'rainfall': item.get('rain', {}).get('3h', 0) * 8,
 
 
125
  'type': 'forecast',
126
- 'description': item['weather'][0]['description'],
127
- 'temp_min': item['main']['temp_min'],
128
- 'temp_max': item['main']['temp_max'],
129
- 'wind_speed': item['wind']['speed'],
130
- 'clouds': item['clouds']['all']
131
  }
132
  forecast_data.append(forecast)
133
 
134
- # Generate extended forecast with patterns
135
- last_date = max(d['date'] for d in forecast_data)
136
- for day in range(1, forecast_days - 5):
137
- base_forecast = forecast_data[-1]
138
-
139
- for hour in range(24):
140
- date = last_date + timedelta(days=day, hours=hour)
141
-
142
- # Add seasonal trend
143
- seasonal_factor = np.sin(2 * np.pi * (date.timetuple().tm_yday / 365))
144
-
145
- # Temperature with daily and seasonal patterns
146
- temp = base_forecast['temperature'] + \
147
- 3 * np.sin((hour - 6) * np.pi / 12) + \
148
- 2 * seasonal_factor + \
149
- np.random.normal(0, 1)
150
-
151
- # Humidity with inverse pattern
152
- humidity = base_forecast['humidity'] - \
153
- 10 * np.sin((hour - 6) * np.pi / 12) - \
154
- 5 * seasonal_factor + \
155
- np.random.normal(0, 5)
156
-
157
- # Rainfall with seasonal influence
158
- rain_chance = 0.1 + 0.2 * np.sin((hour - 12) * np.pi / 12) + 0.1 * seasonal_factor
159
- rainfall = np.random.exponential(base_rainfall) if np.random.random() < rain_chance else 0
160
-
161
- extended_forecast = {
162
- 'date': date,
163
- 'temperature': temp,
164
- 'humidity': np.clip(humidity, 0, 100),
165
- 'rainfall': rainfall,
166
- 'type': 'forecast_extended',
167
- 'description': self.get_weather_description(temp, humidity, rainfall),
168
- 'temp_min': temp - np.random.uniform(0, 2),
169
- 'temp_max': temp + np.random.uniform(0, 2),
170
- 'wind_speed': base_forecast['wind_speed'] + np.random.normal(0, 1),
171
- 'clouds': np.clip(base_forecast['clouds'] + np.random.normal(0, 10), 0, 100)
172
- }
173
- forecast_data.append(extended_forecast)
174
-
175
  except Exception as e:
176
  print(f"Error fetching forecast data: {e}")
177
 
178
- # Combine and process data
179
  all_data = pd.DataFrame(historical_data + forecast_data)
180
 
181
  if not all_data.empty:
182
- # Sort and clean data
 
 
 
 
 
183
  all_data = all_data.sort_values('date')
184
 
185
- # Resample to hourly data while preserving patterns
186
- all_data = all_data.set_index('date').resample('1H').mean().reset_index()
187
-
188
- # Add analysis columns
189
  all_data['month'] = all_data['date'].dt.month
190
  all_data['season'] = all_data['month'].map(self.tanzania_seasons)
191
 
192
  # Calculate rolling averages
193
- all_data['temp_7day_avg'] = all_data['temperature'].rolling(window=168, min_periods=1).mean() # 7 days * 24 hours
194
- all_data['humidity_7day_avg'] = all_data['humidity'].rolling(window=168, min_periods=1).mean()
195
- all_data['rainfall_7day_avg'] = all_data['rainfall'].rolling(window=168, min_periods=1).mean()
 
 
 
196
 
197
- # Fill missing values
198
- all_data = all_data.fillna(method='ffill').fillna(method='bfill')
 
 
 
 
 
199
 
200
- # Calculate daily aggregates
201
- daily_data = all_data.groupby(all_data['date'].dt.date).agg({
202
- 'temperature': ['mean', 'min', 'max'],
203
  'humidity': 'mean',
204
  'rainfall': 'sum',
 
 
 
 
 
 
 
 
 
 
 
205
  'type': 'first',
206
  'description': 'first',
207
- 'wind_speed': 'mean',
208
- 'clouds': 'mean',
209
  'season': 'first'
210
- }).reset_index()
211
-
212
- # Flatten column names
213
- daily_data.columns = ['date', 'temperature', 'temp_min', 'temp_max', 'humidity',
214
- 'rainfall', 'type', 'description', 'wind_speed', 'clouds', 'season']
215
-
216
- # Convert date back to datetime
217
- daily_data['date'] = pd.to_datetime(daily_data['date'])
218
 
219
- # Add suitability and NDVI calculations
220
- daily_data['daily_suitability'] = self.calculate_daily_suitability(daily_data)
221
- daily_data['estimated_ndvi'] = self.estimate_ndvi(daily_data)
222
 
 
 
 
 
 
223
  return daily_data
224
 
225
  return pd.DataFrame()
 
53
  return None
54
 
55
  def get_weather_data(self, lat, lon, historical_days=90, forecast_days=90):
56
+ """Get historical and forecast weather data"""
57
  historical_data = []
58
 
59
+ # Get historical data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  for day in range(historical_days):
61
  date = datetime.now() - timedelta(days=day)
62
+ url = f"https://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={self.api_key}&units=metric&dt={int(date.timestamp())}"
63
+ try:
64
+ response = requests.get(url)
65
+ if response.status_code == 200:
66
+ data = response.json()
67
+ weather_data = {
68
+ 'date': date,
69
+ 'temperature': float(data['main']['temp']), # Ensure numeric
70
+ 'humidity': float(data['main']['humidity']),
71
+ 'rainfall': float(data.get('rain', {}).get('1h', 0)) * 24,
72
+ 'type': 'historical',
73
+ 'description': data['weather'][0]['description'],
74
+ 'temp_min': float(data['main']['temp_min']),
75
+ 'temp_max': float(data['main']['temp_max'])
76
+ }
77
+ historical_data.append(weather_data)
78
+ except Exception as e:
79
+ print(f"Error fetching historical data: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
+ # Get forecast data
82
  forecast_data = []
 
83
  try:
84
+ forecast_url = f"https://api.openweathermap.org/data/2.5/forecast?lat={lat}&lon={lon}&appid={self.api_key}&units=metric"
85
  response = requests.get(forecast_url)
86
  if response.status_code == 200:
87
  data = response.json()
88
+ # Group forecast data by day
89
+ daily_forecasts = {}
90
+
91
  for item in data['list']:
92
+ date = datetime.fromtimestamp(item['dt'])
93
+ day_key = date.date()
94
+
95
+ if day_key not in daily_forecasts:
96
+ daily_forecasts[day_key] = {
97
+ 'temps': [],
98
+ 'humidity': [],
99
+ 'rainfall': 0,
100
+ 'descriptions': []
101
+ }
102
+
103
+ daily_forecasts[day_key]['temps'].append(float(item['main']['temp']))
104
+ daily_forecasts[day_key]['humidity'].append(float(item['main']['humidity']))
105
+ daily_forecasts[day_key]['rainfall'] += float(item.get('rain', {}).get('3h', 0))
106
+ daily_forecasts[day_key]['descriptions'].append(item['weather'][0]['description'])
107
+
108
+ # Create daily forecast entries
109
+ for day_key, day_data in daily_forecasts.items():
110
  forecast = {
111
+ 'date': datetime.combine(day_key, datetime.min.time()),
112
+ 'temperature': np.mean(day_data['temps']),
113
+ 'temp_min': min(day_data['temps']),
114
+ 'temp_max': max(day_data['temps']),
115
+ 'humidity': np.mean(day_data['humidity']),
116
+ 'rainfall': day_data['rainfall'],
117
  'type': 'forecast',
118
+ 'description': max(day_data['descriptions'], key=day_data['descriptions'].count)
 
 
 
 
119
  }
120
  forecast_data.append(forecast)
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  except Exception as e:
123
  print(f"Error fetching forecast data: {e}")
124
 
125
+ # Combine all data
126
  all_data = pd.DataFrame(historical_data + forecast_data)
127
 
128
  if not all_data.empty:
129
+ # Ensure numeric types
130
+ numeric_columns = ['temperature', 'humidity', 'rainfall', 'temp_min', 'temp_max']
131
+ for col in numeric_columns:
132
+ all_data[col] = pd.to_numeric(all_data[col], errors='coerce')
133
+
134
+ # Sort by date
135
  all_data = all_data.sort_values('date')
136
 
137
+ # Add additional columns
 
 
 
138
  all_data['month'] = all_data['date'].dt.month
139
  all_data['season'] = all_data['month'].map(self.tanzania_seasons)
140
 
141
  # Calculate rolling averages
142
+ all_data['temp_7day_avg'] = all_data['temperature'].rolling(window=7, min_periods=1).mean()
143
+ all_data['humidity_7day_avg'] = all_data['humidity'].rolling(window=7, min_periods=1).mean()
144
+ all_data['rainfall_7day_avg'] = all_data['rainfall'].rolling(window=7, min_periods=1).mean()
145
+
146
+ # Calculate daily suitability
147
+ all_data['daily_suitability'] = self.calculate_daily_suitability(all_data)
148
 
149
+ # Calculate NDVI
150
+ all_data['estimated_ndvi'] = self.estimate_ndvi(all_data)
151
+
152
+ # Group by date to get daily values while preserving types
153
+ daily_data = pd.DataFrame()
154
+ daily_data['date'] = all_data['date'].dt.date.unique()
155
+ daily_data = daily_data.set_index('date')
156
 
157
+ # Aggregate numeric columns
158
+ numeric_aggs = {
159
+ 'temperature': 'mean',
160
  'humidity': 'mean',
161
  'rainfall': 'sum',
162
+ 'temp_min': 'min',
163
+ 'temp_max': 'max',
164
+ 'temp_7day_avg': 'last',
165
+ 'humidity_7day_avg': 'last',
166
+ 'rainfall_7day_avg': 'last',
167
+ 'daily_suitability': 'mean',
168
+ 'estimated_ndvi': 'mean'
169
+ }
170
+
171
+ # Aggregate categoric columns
172
+ categoric_aggs = {
173
  'type': 'first',
174
  'description': 'first',
 
 
175
  'season': 'first'
176
+ }
 
 
 
 
 
 
 
177
 
178
+ # Perform aggregations separately
179
+ numeric_data = all_data.groupby(all_data['date'].dt.date).agg(numeric_aggs)
180
+ categoric_data = all_data.groupby(all_data['date'].dt.date).agg(categoric_aggs)
181
 
182
+ # Combine the results
183
+ daily_data = pd.concat([numeric_data, categoric_data], axis=1)
184
+ daily_data = daily_data.reset_index()
185
+ daily_data['date'] = pd.to_datetime(daily_data['date'])
186
+
187
  return daily_data
188
 
189
  return pd.DataFrame()