Spaces:

lyimo
/

tobacco

Sleeping

App Files Files Community

lyimo commited on Nov 24, 2024

Commit

bf26f29

verified ·

1 Parent(s): 5c02e21

Update part1_data.py

Browse files

Files changed (1) hide show

part1_data.py +156 -238

part1_data.py CHANGED Viewed

@@ -8,8 +8,6 @@ import requests
 from geopy.geocoders import Nominatim
 from geopy.exc import GeocoderTimedOut
 from scipy import stats
-import ee
-import geemap
 # Get API key from environment variable
 OPENWEATHER_API_KEY = os.getenv('OPENWEATHER_API_KEY', 'default_key')
@@ -21,7 +19,7 @@ class TobaccoAnalyzer:
             'temperature': {'min': 20, 'max': 30},
             'humidity': {'min': 60, 'max': 80},
             'rainfall': {'min': 500/365, 'max': 1200/365},
-            'ndvi': {'min': 0.3, 'max': 0.8}  # Optimal NDVI range for tobacco
         }
         self.geolocator = Nominatim(user_agent="tobacco_analyzer")
         self.seasons = {
@@ -30,13 +28,13 @@ class TobaccoAnalyzer:
             7: 'Summer', 8: 'Summer', 9: 'Fall',
             10: 'Fall', 11: 'Fall', 12: 'Winter'
         }
-        # Initialize Earth Engine
-        try:
-            ee.Initialize()
-            self.ee_initialized = True
-        except Exception as e:
-            print(f"Error initializing Earth Engine: {e}")
-            self.ee_initialized = False
     def geocode_location(self, location_name):
         """Convert location name to coordinates"""
@@ -46,12 +44,23 @@ class TobaccoAnalyzer:
                 return {
                     'lat': location.latitude,
                     'lon': location.longitude,
-                    'address': location.address
                 }
             return None
         except GeocoderTimedOut:
             return None
     def get_weather_data(self, lat, lon, historical_days=90, forecast_days=90):
         """Get historical and forecast weather data"""
         historical_data = []
@@ -70,265 +79,174 @@ class TobaccoAnalyzer:
                         'humidity': data['main']['humidity'],
                         'rainfall': data.get('rain', {}).get('1h', 0) * 24,
                         'type': 'historical',
-                        'description': data['weather'][0]['description']
                     }
                     historical_data.append(weather_data)
             except Exception as e:
                 print(f"Error fetching historical data: {e}")
         # Get forecast data
         forecast_data = []
         try:
             forecast_url = f"https://api.openweathermap.org/data/2.5/forecast?lat={lat}&lon={lon}&appid={self.api_key}&units=metric"
             response = requests.get(forecast_url)
             if response.status_code == 200:
                 data = response.json()
                 for item in data['list']:
                     forecast = {
-                        'date': datetime.fromtimestamp(item['dt']),
                         'temperature': item['main']['temp'],
                         'humidity': item['main']['humidity'],
                         'rainfall': item.get('rain', {}).get('3h', 0) * 8,
                         'type': 'forecast_5day',
-                        'description': item['weather'][0]['description']
                     }
                     forecast_data.append(forecast)
-                # Generate extended forecast
                 last_date = max(d['date'] for d in forecast_data)
-                historical_df = pd.DataFrame(historical_data)
-                for day in range(1, forecast_days - 5):
-                    date = last_date + timedelta(days=day)
-                    if not historical_df.empty:
-                        temp_trend = stats.linregress(range(len(historical_df)), historical_df['temperature'])[0]
-                        humidity_trend = stats.linregress(range(len(historical_df)), historical_df['humidity'])[0]
-                        rainfall_trend = stats.linregress(range(len(historical_df)), historical_df['rainfall'])[0]
-                    else:
-                        temp_trend = humidity_trend = rainfall_trend = 0
-                    recent_temps = [d['temperature'] for d in forecast_data[-5:]]
-                    recent_humidity = [d['humidity'] for d in forecast_data[-5:]]
-                    recent_rainfall = [d['rainfall'] for d in forecast_data[-5:]]
-                    extended_forecast = {
-                        'date': date,
-                        'temperature': np.mean(recent_temps) + temp_trend * day,
-                        'humidity': np.mean(recent_humidity) + humidity_trend * day,
-                        'rainfall': np.mean(recent_rainfall) + rainfall_trend * day,
-                        'type': 'forecast_extended',
-                        'description': 'Extended Forecast'
-                    }
-                    forecast_data.append(extended_forecast)
         except Exception as e:
             print(f"Error fetching forecast data: {e}")
-        # Combine and process all data
-        all_data = pd.DataFrame(historical_data + forecast_data)
-        all_data = all_data.sort_values('date')
-        # Add analysis columns
-        all_data['month'] = all_data['date'].dt.month
-        all_data['season'] = all_data['month'].map(self.seasons)
-        # Calculate rolling averages
-        all_data['temp_7day_avg'] = all_data['temperature'].rolling(window=7, min_periods=1).mean()
-        all_data['humidity_7day_avg'] = all_data['humidity'].rolling(window=7, min_periods=1).mean()
-        all_data['rainfall_7day_avg'] = all_data['rainfall'].rolling(window=7, min_periods=1).mean()
-        return all_data
-    def get_ndvi_data(self, lat, lon, radius=2000):
-        """Get NDVI data for location"""
-        try:
-            point = ee.Geometry.Point([lon, lat])
-            area = point.buffer(radius)
-            end_date = datetime.now()
-            start_date = end_date - timedelta(days=90)
-            s2 = ee.ImageCollection('COPERNICUS/S2_SR') \
-                .filterDate(start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')) \
-                .filterBounds(area) \
-                .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))
-            def addNDVI(image):
-                ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI')
-                return image.addBands(ndvi)
-            s2_ndvi = s2.map(addNDVI)
-            ndvi_image = s2_ndvi.select('NDVI').mean()
-            stats = ndvi_image.reduceRegion(
-                reducer=ee.Reducer.mean().combine(
-                    reducer2=ee.Reducer.stdDev(),
-                    sharedInputs=True
-                ).combine(
-                    reducer2=ee.Reducer.minMax(),
-                    sharedInputs=True
-                ),
-                geometry=area,
-                scale=10,
-                maxPixels=1e9
-            ).getInfo()
-            return {
-                'image': ndvi_image,
-                'stats': stats,
-                'area': area
-            }
-        except Exception as e:
-            print(f"Error fetching NDVI data: {e}")
-            return None
-    def analyze_location(self, location_name, historical_days=90, forecast_days=90):
-        """Comprehensive location analysis including weather and NDVI"""
-        try:
-            location_info = self.geocode_location(location_name)
-            if not location_info:
-                raise ValueError(f"Could not find coordinates for location: {location_name}")
-            lat = location_info['lat']
-            lon = location_info['lon']
-            weather_data = self.get_weather_data(lat, lon, historical_days, forecast_days)
-            weather_analysis = self.analyze_trends(weather_data)
-            weather_score = self.calculate_weather_score(weather_analysis)
-            ndvi_data = None
-            ndvi_score = None
-            if self.ee_initialized:
-                try:
-                    ndvi_data = self.get_ndvi_data(lat, lon)
-                    ndvi_score = self.calculate_ndvi_score(ndvi_data)
-                except Exception as e:
-                    print(f"Error getting NDVI data: {e}")
-            return {
-                'location': location_info,
-                'weather_data': weather_data,
-                'weather_analysis': weather_analysis,
-                'weather_score': weather_score,
-                'ndvi_data': ndvi_data,
-                'ndvi_score': ndvi_score,
-                'combined_score': self.calculate_combined_score(weather_score, ndvi_score)
-            }
-        except Exception as e:
-            print(f"Error in location analysis: {e}")
-            return None
-    def analyze_trends(self, df):
-        """Analyze weather trends and patterns"""
-        historical = df[df['type'] == 'historical']
-        forecast = df[df['type'].isin(['forecast_5day', 'forecast_extended'])]
-        analysis = {
-            'historical': {
-                'temperature': {
-                    'mean': historical['temperature'].mean(),
-                    'std': historical['temperature'].std(),
-                    'trend': stats.linregress(range(len(historical)), historical['temperature'])[0]
-                },
-                'humidity': {
-                    'mean': historical['humidity'].mean(),
-                    'std': historical['humidity'].std(),
-                    'trend': stats.linregress(range(len(historical)), historical['humidity'])[0]
-                },
-                'rainfall': {
-                    'mean': historical['rainfall'].mean(),
-                    'std': historical['rainfall'].std(),
-                    'trend': stats.linregress(range(len(historical)), historical['rainfall'])[0]
-                }
-            },
-            'forecast': {
-                'temperature': {
-                    'mean': forecast['temperature'].mean(),
-                    'std': forecast['temperature'].std(),
-                },
-                'humidity': {
-                    'mean': forecast['humidity'].mean(),
-                    'std': forecast['humidity'].std(),
-                },
-                'rainfall': {
-                    'mean': forecast['rainfall'].mean(),
-                    'std': forecast['rainfall'].std(),
                 }
-            }
-        }
-        return analysis
-    def calculate_ndvi_score(self, ndvi_data):
-        """Calculate a score based on NDVI data"""
-        if not ndvi_data or 'stats' not in ndvi_data:
-            return None
-        stats = ndvi_data['stats']
-        mean_ndvi = stats.get('NDVI_mean', 0)
-        # Convert NDVI from -1:1 scale to 0:1 scale
-        score = (mean_ndvi + 1) / 2
-        # Adjust score based on optimal NDVI ranges
-        if self.optimal_conditions['ndvi']['min'] <= mean_ndvi <= self.optimal_conditions['ndvi']['max']:
-            score *= 1.2  # Bonus for optimal range
-        elif mean_ndvi < 0:
-            score *= 0.5  # Penalty for very low vegetation
-        return min(1.0, max(0.0, score))
-    def calculate_weather_score(self, weather_analysis):
-        """Calculate weather suitability score"""
-        if not weather_analysis:
-            return None
-        historical = weather_analysis['historical']
-        temp_mean = historical['temperature']['mean']
-        humidity_mean = historical['humidity']['mean']
-        rainfall_mean = historical['rainfall']['mean']
-        temp_score = self.calculate_range_score(
-            temp_mean,
-            self.optimal_conditions['temperature']['min'],
-            self.optimal_conditions['temperature']['max']
-        )
-        humidity_score = self.calculate_range_score(
-            humidity_mean,
-            self.optimal_conditions['humidity']['min'],
-            self.optimal_conditions['humidity']['max']
-        )
-        rainfall_score = self.calculate_range_score(
-            rainfall_mean,
-            self.optimal_conditions['rainfall']['min'],
-            self.optimal_conditions['rainfall']['max']
-        )
-        return (temp_score * 0.4 + humidity_score * 0.3 + rainfall_score * 0.3)
-    def calculate_range_score(self, value, min_val, max_val):
-        """Calculate score based on optimal range"""
-        if min_val <= value <= max_val:
-            return 1.0
-        elif value < min_val:
-            return max(0, 1 - (min_val - value) / min_val)
-        else:
-            return max(0, 1 - (value - max_val) / max_val)
-    def calculate_combined_score(self, weather_score, ndvi_score):
-        """Calculate combined suitability score"""
-        if weather_score is None:
-            return None
-        if ndvi_score is None:
-            return weather_score
-        weather_weight = 0.6
-        ndvi_weight = 0.4
-        return (weather_score * weather_weight) + (ndvi_score * ndvi_weight)

 from geopy.geocoders import Nominatim
 from geopy.exc import GeocoderTimedOut
 from scipy import stats
 # Get API key from environment variable
 OPENWEATHER_API_KEY = os.getenv('OPENWEATHER_API_KEY', 'default_key')
             'temperature': {'min': 20, 'max': 30},
             'humidity': {'min': 60, 'max': 80},
             'rainfall': {'min': 500/365, 'max': 1200/365},
+            'ndvi': {'min': 0.3, 'max': 0.8}  # Added NDVI optimal range for tobacco
         }
         self.geolocator = Nominatim(user_agent="tobacco_analyzer")
         self.seasons = {
             7: 'Summer', 8: 'Summer', 9: 'Fall',
             10: 'Fall', 11: 'Fall', 12: 'Winter'
         }
+        # Tanzania-specific growing seasons
+        self.tanzania_seasons = {
+            1: 'Main', 2: 'Main', 3: 'Main',  # Main growing season
+            4: 'Late', 5: 'Late', 6: 'Dry',
+            7: 'Dry', 8: 'Dry', 9: 'Early',
+            10: 'Early', 11: 'Early', 12: 'Main'
+        }
     def geocode_location(self, location_name):
         """Convert location name to coordinates"""
                 return {
                     'lat': location.latitude,
                     'lon': location.longitude,
+                    'address': location.address,
+                    'region': self.get_tanzania_region(location.address)
                 }
             return None
         except GeocoderTimedOut:
             return None
+    def get_tanzania_region(self, address):
+        """Extract Tanzania region from address"""
+        if address:
+            address_parts = address.lower().split(',')
+            tanzania_regions = ['tabora', 'urambo', 'sikonge', 'nzega']
+            for part in address_parts:
+                if any(region in part.strip() for region in tanzania_regions):
+                    return part.strip()
+        return None
     def get_weather_data(self, lat, lon, historical_days=90, forecast_days=90):
         """Get historical and forecast weather data"""
         historical_data = []
                         'humidity': data['main']['humidity'],
                         'rainfall': data.get('rain', {}).get('1h', 0) * 24,
                         'type': 'historical',
+                        'description': data['weather'][0]['description'],
+                        'season': self.tanzania_seasons[date.month]  # Use Tanzania-specific seasons
                     }
                     historical_data.append(weather_data)
             except Exception as e:
                 print(f"Error fetching historical data: {e}")
         # Get forecast data
+        forecast_data = self.get_forecast_data(lat, lon, forecast_days)
+        # Combine and process all data
+        all_data = pd.DataFrame(historical_data + forecast_data)
+        if not all_data.empty:
+            all_data = all_data.sort_values('date')
+            all_data['month'] = all_data['date'].dt.month
+            all_data['season'] = all_data['month'].map(self.tanzania_seasons)
+            # Calculate rolling averages
+            all_data['temp_7day_avg'] = all_data['temperature'].rolling(window=7, min_periods=1).mean()
+            all_data['humidity_7day_avg'] = all_data['humidity'].rolling(window=7, min_periods=1).mean()
+            all_data['rainfall_7day_avg'] = all_data['rainfall'].rolling(window=7, min_periods=1).mean()
+            # Add vegetation index estimates based on weather conditions
+            all_data['estimated_ndvi'] = self.estimate_ndvi(all_data)
+        return all_data
+    def get_forecast_data(self, lat, lon, forecast_days):
+        """Get and process forecast data"""
         forecast_data = []
         try:
+            # Get 5-day forecast
             forecast_url = f"https://api.openweathermap.org/data/2.5/forecast?lat={lat}&lon={lon}&appid={self.api_key}&units=metric"
             response = requests.get(forecast_url)
             if response.status_code == 200:
                 data = response.json()
                 for item in data['list']:
+                    date = datetime.fromtimestamp(item['dt'])
                     forecast = {
+                        'date': date,
                         'temperature': item['main']['temp'],
                         'humidity': item['main']['humidity'],
                         'rainfall': item.get('rain', {}).get('3h', 0) * 8,
                         'type': 'forecast_5day',
+                        'description': item['weather'][0]['description'],
+                        'season': self.tanzania_seasons[date.month]
                     }
                     forecast_data.append(forecast)
+                # Generate extended forecast using historical trends
                 last_date = max(d['date'] for d in forecast_data)
+                forecast_data.extend(
+                    self.generate_extended_forecast(forecast_data, last_date, forecast_days)
+                )
         except Exception as e:
             print(f"Error fetching forecast data: {e}")
+        return forecast_data
+    def generate_extended_forecast(self, forecast_data, last_date, forecast_days):
+        """Generate extended forecast data"""
+        extended_data = []
+        recent_data = pd.DataFrame(forecast_data[-5:])  # Use last 5 days for trends
+        if not recent_data.empty:
+            temp_trend = stats.linregress(range(len(recent_data)), recent_data['temperature'])[0]
+            humidity_trend = stats.linregress(range(len(recent_data)), recent_data['humidity'])[0]
+            rainfall_trend = stats.linregress(range(len(recent_data)), recent_data['rainfall'])[0]
+            for day in range(1, forecast_days - 5):
+                date = last_date + timedelta(days=day)
+                extended_forecast = {
+                    'date': date,
+                    'temperature': recent_data['temperature'].mean() + temp_trend * day,
+                    'humidity': recent_data['humidity'].mean() + humidity_trend * day,
+                    'rainfall': recent_data['rainfall'].mean() + rainfall_trend * day,
+                    'type': 'forecast_extended',
+                    'description': 'Extended Forecast',
+                    'season': self.tanzania_seasons[date.month]
                 }
+                extended_data.append(extended_forecast)
+        return extended_data
+    def estimate_ndvi(self, weather_data):
+        """Estimate NDVI based on weather conditions"""
+        # Create a baseline NDVI estimate using weather parameters
+        normalized_temp = (weather_data['temperature'] - 15) / (30 - 15)
+        normalized_humidity = (weather_data['humidity'] - 50) / (80 - 50)
+        normalized_rainfall = weather_data['rainfall'] / 5
+        # Season adjustment factors for Tanzania
+        season_factors = {
+            'Main': 1.0,    # Best growing season
+            'Early': 0.8,   # Early growing season
+            'Late': 0.7,    # Late growing season
+            'Dry': 0.5      # Dry season
+        }
+        # Apply season adjustments
+        season_multiplier = weather_data['season'].map(season_factors)
+        # Combine factors to estimate NDVI
+        estimated_ndvi = (
+            0.4 * normalized_temp +
+            0.3 * normalized_humidity +
+            0.3 * normalized_rainfall
+        ) * season_multiplier
+        # Clip values to realistic NDVI range (-1 to 1)
+        return np.clip(estimated_ndvi, -1, 1)
+    def analyze_trends(self, df):
+        """Analyze weather trends and patterns"""
+        try:
+            historical = df[df['type'] == 'historical']
+            forecast = df[df['type'].isin(['forecast_5day', 'forecast_extended'])]
+            if historical.empty:
+                return None
+            analysis = {
+                'historical': {
+                    'temperature': {
+                        'mean': historical['temperature'].mean(),
+                        'std': historical['temperature'].std(),
+                        'trend': stats.linregress(range(len(historical)), historical['temperature'])[0]
+                    },
+                    'humidity': {
+                        'mean': historical['humidity'].mean(),
+                        'std': historical['humidity'].std(),
+                        'trend': stats.linregress(range(len(historical)), historical['humidity'])[0]
+                    },
+                    'rainfall': {
+                        'mean': historical['rainfall'].mean(),
+                        'std': historical['rainfall'].std(),
+                        'trend': stats.linregress(range(len(historical)), historical['rainfall'])[0]
+                    },
+                    'ndvi': {
+                        'mean': historical['estimated_ndvi'].mean(),
+                        'std': historical['estimated_ndvi'].std(),
+                        'trend': stats.linregress(range(len(historical)), historical['estimated_ndvi'])[0]
+                    }
+                }
+            }
+            if not forecast.empty:
+                analysis['forecast'] = {
+                    'temperature': {
+                        'mean': forecast['temperature'].mean(),
+                        'std': forecast['temperature'].std(),
+                    },
+                    'humidity': {
+                        'mean': forecast['humidity'].mean(),
+                        'std': forecast['humidity'].std(),
+                    },
+                    'rainfall': {
+                        'mean': forecast['rainfall'].mean(),
+                        'std': forecast['rainfall'].std(),
+                    },
+                    'ndvi': {
+                        'mean': forecast['estimated_ndvi'].mean(),
+                        'std': forecast['estimated_ndvi'].std(),
+                    }
+                }
+            return analysis
+        except Exception as e:
+            print(f"Error in trend analysis: {e}")
+            return None