Spaces:

alidenewade
/

nyc-urban-analytics

Sleeping

App Files Files Community

alidenewade commited on 22 days ago

Commit

5ac174d

verified ·

1 Parent(s): 622eec2

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -32

app.py CHANGED Viewed

@@ -236,46 +236,101 @@ def predict_crime_level(crime_felony, crime_misd, crime_viol, sr311_total, dob_p
         else:
             return "Predicted Crime Level: High (basic fallback)", {"Low": 0.1, "Medium": 0.3, "High": 0.6}
-def forecast_time_series(geoid):
     """Forecasts crime for a specific GEOID."""
     if panel_df is None or 'DUMMY' in panel_df['GEOID'].tolist():
         fig, ax = plt.subplots()
         ax.text(0.5, 0.5, "Data not loaded", ha='center', va='center')
         return fig, "Data not loaded."
     if geoid not in panel_df['GEOID'].unique():
-        return None, f"GEOID {geoid} not found in the dataset."
     tract_data = panel_df[panel_df['GEOID'] == geoid].set_index('month')['crime_total'].asfreq('MS')
     if len(tract_data) < 24: # Need enough data to forecast
-        return None, f"Not enough historical data for GEOID {geoid} to create a forecast."
-    # Simple SARIMAX model for demonstration
-    model_ts = SARIMAX(tract_data, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
-    results = model_ts.fit(disp=False)
-    forecast = results.get_forecast(steps=12)
-    forecast_mean = forecast.predicted_mean
-    forecast_ci = forecast.conf_int()
-    fig, ax = plt.subplots(figsize=(12, 6))
-    tract_data.plot(ax=ax, label='Historical')
-    forecast_mean.plot(ax=ax, label='Forecast')
-    ax.fill_between(forecast_ci.index,
-                    forecast_ci.iloc[:, 0],
-                    forecast_ci.iloc[:, 1], color='k', alpha=.25)
-    ax.set_title(f'Crime Forecast for Census Tract {geoid}')
-    ax.set_xlabel('Date')
-    ax.set_ylabel('Crime Total')
-    ax.legend()
-    ax.grid(True)
-    plt.tight_layout()
-    metrics_text = f"Forecast for GEOID: {geoid}\n"
-    metrics_text += "Mean Absolute Error (on test set) would be calculated here in a full implementation."
-    return fig, metrics_text
 # --- Gradio App Layout ---
 with gr.Blocks() as demo:
@@ -372,20 +427,42 @@ with gr.Blocks() as demo:
             with gr.TabItem("Time Series Forecasting"):
                 gr.Markdown("## Forecast Future Crime Counts")
-                gr.Markdown("Enter a Census Tract GEOID to forecast the total crime count for the next 12 months.")
                 with gr.Row():
                     with gr.Column():
-                        geoid_input = gr.Textbox(label="Enter GEOID", placeholder="e.g., 36005000100")
                         forecast_button = gr.Button("Generate Forecast")
                     with gr.Column():
-                        forecast_metrics = gr.Textbox(label="Forecast Metrics", interactive=False)
                 forecast_plot = gr.Plot()
                 forecast_button.click(
                     fn=forecast_time_series,
-                    inputs=[geoid_input],
-                    outputs=[forecast_plot, forecast_metrics]
                 )
 if __name__ == "__main__":

         else:
             return "Predicted Crime Level: High (basic fallback)", {"Low": 0.1, "Medium": 0.3, "High": 0.6}
+def forecast_time_series(geoid, selected_metric):
     """Forecasts crime for a specific GEOID."""
+    print(f"DEBUG: forecast_time_series called with GEOID={geoid}, metric={selected_metric}")
     if panel_df is None or 'DUMMY' in panel_df['GEOID'].tolist():
         fig, ax = plt.subplots()
         ax.text(0.5, 0.5, "Data not loaded", ha='center', va='center')
         return fig, "Data not loaded."
     if geoid not in panel_df['GEOID'].unique():
+        empty_fig, ax = plt.subplots(figsize=(12, 6))
+        ax.text(0.5, 0.5, f"GEOID {geoid} not found in the dataset.", ha='center', va='center')
+        ax.set_title("GEOID Not Found")
+        return empty_fig, f"GEOID {geoid} not found in the dataset."
     tract_data = panel_df[panel_df['GEOID'] == geoid].set_index('month')['crime_total'].asfreq('MS')
     if len(tract_data) < 24: # Need enough data to forecast
+        empty_fig, ax = plt.subplots(figsize=(12, 6))
+        ax.text(0.5, 0.5, f"Not enough historical data for GEOID {geoid}\n(need at least 24 months)",
+                ha='center', va='center')
+        ax.set_title("Insufficient Data")
+        return empty_fig, f"Not enough historical data for GEOID {geoid} to create a forecast."
+    try:
+        # Simple SARIMAX model for demonstration
+        model_ts = SARIMAX(tract_data, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
+        results = model_ts.fit(disp=False)
+        forecast = results.get_forecast(steps=12)
+        forecast_mean = forecast.predicted_mean
+        forecast_ci = forecast.conf_int()
+        fig, ax = plt.subplots(figsize=(12, 6))
+        tract_data.plot(ax=ax, label='Historical', color='blue')
+        forecast_mean.plot(ax=ax, label='Forecast', color='red')
+        ax.fill_between(forecast_ci.index,
+                        forecast_ci.iloc[:, 0],
+                        forecast_ci.iloc[:, 1], color='red', alpha=.25, label='Confidence Interval')
+        ax.set_title(f'Crime Forecast for Census Tract {geoid}')
+        ax.set_xlabel('Date')
+        ax.set_ylabel('Crime Total')
+        ax.legend()
+        ax.grid(True)
+        plt.tight_layout()
+        # Calculate different metrics based on selection
+        # For demonstration, we'll use in-sample fit statistics
+        metrics_text = f"Forecast Results for GEOID: {geoid}\n"
+        metrics_text += f"Selected Metric: {selected_metric}\n"
+        metrics_text += "="*50 + "\n\n"
+        if selected_metric == "Mean Absolute Error (MAE)":
+            # Calculate MAE on fitted values vs actual
+            fitted_values = results.fittedvalues
+            mae = np.mean(np.abs(tract_data - fitted_values))
+            metrics_text += f"In-Sample MAE: {mae:.2f}\n"
+            metrics_text += "Lower MAE indicates better model fit.\n"
+        elif selected_metric == "Root Mean Square Error (RMSE)":
+            fitted_values = results.fittedvalues
+            rmse = np.sqrt(np.mean((tract_data - fitted_values)**2))
+            metrics_text += f"In-Sample RMSE: {rmse:.2f}\n"
+            metrics_text += "Lower RMSE indicates better model fit.\n"
+        elif selected_metric == "Mean Absolute Percentage Error (MAPE)":
+            fitted_values = results.fittedvalues
+            mape = np.mean(np.abs((tract_data - fitted_values) / tract_data)) * 100
+            metrics_text += f"In-Sample MAPE: {mape:.2f}%\n"
+            metrics_text += "Lower MAPE indicates better model fit.\n"
+        elif selected_metric == "Akaike Information Criterion (AIC)":
+            aic = results.aic
+            metrics_text += f"AIC: {aic:.2f}\n"
+            metrics_text += "Lower AIC indicates better model quality.\n"
+        elif selected_metric == "Bayesian Information Criterion (BIC)":
+            bic = results.bic
+            metrics_text += f"BIC: {bic:.2f}\n"
+            metrics_text += "Lower BIC indicates better model quality.\n"
+        metrics_text += f"\nForecast Summary:\n"
+        metrics_text += f"• Historical data points: {len(tract_data)}\n"
+        metrics_text += f"• Forecast horizon: 12 months\n"
+        metrics_text += f"• Average historical crime: {tract_data.mean():.2f}\n"
+        metrics_text += f"• Average forecast: {forecast_mean.mean():.2f}\n"
+        return fig, metrics_text
+    except Exception as e:
+        print(f"DEBUG: Error in forecasting: {e}")
+        error_fig, ax = plt.subplots(figsize=(12, 6))
+        ax.text(0.5, 0.5, f"Error in forecasting:\n{str(e)}", ha='center', va='center')
+        ax.set_title("Forecasting Error")
+        return error_fig, f"Error in forecasting for GEOID {geoid}: {str(e)}"
 # --- Gradio App Layout ---
 with gr.Blocks() as demo:
             with gr.TabItem("Time Series Forecasting"):
                 gr.Markdown("## Forecast Future Crime Counts")
+                gr.Markdown("Select a Census Tract GEOID to forecast the total crime count for the next 12 months.")
                 with gr.Row():
                     with gr.Column():
+                        # Create list of available GEOIDs for dropdown
+                        available_geoids = sorted(panel_df['GEOID'].unique().tolist()) if 'DUMMY' not in panel_df['GEOID'].tolist() else ['36005000100', '36005000200']
+                        geoid_dropdown = gr.Dropdown(
+                            label="Select GEOID",
+                            choices=available_geoids,
+                            value=available_geoids[0] if available_geoids else None,
+                            allow_custom_value=True,
+                            filterable=True,
+                            info="Type to search or select from list"
+                        )
+                        forecast_metrics_dropdown = gr.Dropdown(
+                            label="Forecast Evaluation Metric",
+                            choices=["Mean Absolute Error (MAE)",
+                                   "Root Mean Square Error (RMSE)",
+                                   "Mean Absolute Percentage Error (MAPE)",
+                                   "Akaike Information Criterion (AIC)",
+                                   "Bayesian Information Criterion (BIC)"],
+                            value="Mean Absolute Error (MAE)",
+                            info="Select metric to display in forecast evaluation"
+                        )
                         forecast_button = gr.Button("Generate Forecast")
                     with gr.Column():
+                        forecast_metrics_output = gr.Textbox(label="Forecast Metrics", interactive=False, lines=5)
                 forecast_plot = gr.Plot()
                 forecast_button.click(
                     fn=forecast_time_series,
+                    inputs=[geoid_dropdown, forecast_metrics_dropdown],
+                    outputs=[forecast_plot, forecast_metrics_output]
                 )
 if __name__ == "__main__":