Spaces:

Vishwas1
/

Moirai_Stock_Price_Predictor

Running

App Files Files Community

Vishwas1 commited on about 1 month ago

Commit

b60a104

verified ·

1 Parent(s): 0be8fa0

Update app.py

Browse files

Files changed (1) hide show

app.py +195 -76

app.py CHANGED Viewed

@@ -10,15 +10,22 @@ import matplotlib.pyplot as plt
 import torch
 from gluonts.dataset.common import ListDataset
-# Moirai 2.0 via Uni2TS (per Salesforce's example)
-# https://www.salesforce.com/blog/moirai-2-0/
-from uni2ts.model.moirai2 import Moirai2Forecast, Moirai2Module  # type: ignore
 MODEL_ID = "Salesforce/moirai-2.0-R-small"
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-# Load the Moirai 2.0 module once at startup
 _MODULE = None
 def load_module():
     global _MODULE
@@ -26,6 +33,59 @@ def load_module():
         _MODULE = Moirai2Module.from_pretrained(MODEL_ID)
     return _MODULE
 def fetch_series(ticker: str, years: int) -> pd.Series:
     """Fetch daily close price and align to business-day frequency."""
     data = yf.download(
@@ -39,17 +99,14 @@ def fetch_series(ticker: str, years: int) -> pd.Series:
     if data is None or data.empty:
         raise gr.Error(f"No price data found for '{ticker}'.")
-    # Choose a price column
     col = "Close" if "Close" in data.columns else ("Adj Close" if "Adj Close" in data.columns else None)
     if col is None:
         raise gr.Error(f"Unexpected columns from yfinance: {list(data.columns)}")
-    # yfinance can sometimes return a MultiIndex (e.g., if a list of tickers slips through)
     if isinstance(data.columns, pd.MultiIndex):
         if ticker in data[col].columns:
             s = data[col][ticker]
         else:
-            # fall back to the first column
             s = data[col].iloc[:, 0]
     else:
         s = data[col]
@@ -66,98 +123,160 @@ def fetch_series(ticker: str, years: int) -> pd.Series:
         raise gr.Error(f"Only missing values for '{ticker}'.")
     return y
-def forecast_ticker(ticker: str,
-                    horizon: int,
-                    lookback_years: int,
-                    context_hint: int):
     ticker = (ticker or "").strip().upper()
     if not ticker:
         raise gr.Error("Please enter a ticker symbol (e.g., AAPL).")
     if horizon < 1:
         raise gr.Error("Forecast horizon must be at least 1.")
-    # 1) Get history
     y = fetch_series(ticker, lookback_years)
-    if len(y) < 50:
-        raise gr.Error("Not enough history to forecast (need at least 50 points).")
-    # 2) Build dataset for GluonTS-style predictor
-    #    Use business-day freq ('B'); pick a context <= history length.
-    default_ctx = 1680  # from Moirai 2.0 examples
-    ctx = int(np.clip(context_hint or default_ctx, 32, len(y)))
-    target = y.values[-ctx:]
-    start_idx = y.index[-ctx]
-    ds = ListDataset([{"start": start_idx, "target": target}], freq="B")
-    # 3) Create forecast wrapper and predictor
-    module = load_module()
-    model = Moirai2Forecast(
-        module=module,
-        prediction_length=int(horizon),
-        context_length=ctx,
-        target_dim=1,
-        feat_dynamic_real_dim=0,
-        past_feat_dynamic_real_dim=0,
-    )
-    predictor = model.create_predictor(batch_size=32)  # remove device=...
-    # 4) Predict
-    forecast = next(iter(predictor.predict(ds)))
-    # 5) Extract a reasonable central estimate
-    if hasattr(forecast, "mean"):
-        yhat = np.asarray(forecast.mean)
-    elif hasattr(forecast, "quantile"):
-        # 50th percentile as point
-        yhat = np.asarray(forecast.quantile(0.5))
-    elif hasattr(forecast, "samples"):
-        yhat = np.asarray(forecast.samples).mean(axis=0)
     else:
-        # very defensive fallback
-        yhat = np.asarray(forecast)
-    # Guard length (some forecast objects can be slightly longer)
-    yhat = np.asarray(yhat).ravel()[:horizon]
-    # 6) Assemble dates & outputs
-    # Next business days after the last historical date
-    future_idx = pd.bdate_range(y.index[-1] + pd.tseries.offsets.BDay(), periods=horizon)
-    pred = pd.Series(yhat, index=future_idx, name="predicted_close")
-    # 7) Plot
-    fig = plt.figure(figsize=(10, 5))
-    plt.plot(y.index, y.values, label="history")
-    plt.plot(pred.index, pred.values, label="forecast")
-    plt.title(f"{ticker} close price forecast (Moirai 2.0 R-small)")
-    plt.xlabel("Date"); plt.ylabel("Price"); plt.legend(); plt.tight_layout()
-    # 8) Table
-    out_df = pd.DataFrame({"date": pred.index, "predicted_close": pred.values})
-    return fig, out_df
-with gr.Blocks(title="Moirai 2.0 — Stock Price Forecast (Research)") as demo:
     gr.Markdown(
         """
-# Moirai 2.0 — Stock Price Forecast (Research)
-Enter a ticker to fetch recent daily prices and generate a short-term forecast using **Salesforce/moirai-2.0-R-small**.
-> **Important**: For **research/educational** use only. Not investment advice. Model license is **CC-BY-NC-4.0 (non-commercial)**.
         """
     )
-    with gr.Row():
-        ticker = gr.Textbox(label="Ticker", value="AAPL", placeholder="e.g., AAPL, MSFT, TSLA")
-        horizon = gr.Slider(5, 120, value=30, step=1, label="Forecast horizon (business days)")
-    with gr.Row():
-        lookback = gr.Slider(1, 10, value=5, step=1, label="Lookback window (years of history)")
-        ctx = gr.Slider(64, 2000, value=1680, step=16, label="Context length (points)")
-    run = gr.Button("Run forecast", variant="primary")
-    plot = gr.Plot(label="History + Forecast")
-    table = gr.Dataframe(label="Forecast table", interactive=False)
-    run.click(forecast_ticker, inputs=[ticker, horizon, lookback, ctx], outputs=[plot, table])
 if __name__ == "__main__":
     demo.launch()

 import torch
 from gluonts.dataset.common import ListDataset
+from pandas.tseries.frequencies import to_offset
+# Moirai 2.0 via Uni2TS
+try:
+    from uni2ts.model.moirai2 import Moirai2Forecast, Moirai2Module
+except Exception as e:
+    raise ImportError(
+        "Moirai 2.0 not found in your Uni2TS install. "
+        "Make sure requirements.txt installs Uni2TS from GitHub: "
+        "git+https://github.com/SalesforceAIResearch/uni2ts.git\n"
+        f"Original error: {e}"
+    )
 MODEL_ID = "Salesforce/moirai-2.0-R-small"
+# ---- Model loader (one-time) ----
 _MODULE = None
 def load_module():
     global _MODULE
         _MODULE = Moirai2Module.from_pretrained(MODEL_ID)
     return _MODULE
+# ---- Utilities ----
+def _future_index(last_idx: pd.Timestamp, freq: str, horizon: int) -> pd.DatetimeIndex:
+    """Create future timestamps continuing the given freq."""
+    off = to_offset(freq)
+    start = last_idx + off
+    return pd.date_range(start=start, periods=horizon, freq=freq)
+def _run_forecast_on_series(y: pd.Series, freq: str, horizon: int, context_hint: int, title: str):
+    """Core forecasting routine on an indexed univariate series y with pandas freq string."""
+    if len(y) < 50:
+        raise gr.Error("Need at least 50 points to forecast.")
+    ctx = int(np.clip(context_hint or 1680, 32, len(y)))
+    target = y.values[-ctx:].astype(np.float32)
+    start_idx = y.index[-ctx]
+    ds = ListDataset([{"start": start_idx, "target": target}], freq=freq)
+    module = load_module()
+    model = Moirai2Forecast(
+        module=module,
+        prediction_length=int(horizon),
+        context_length=ctx,
+        target_dim=1,
+        feat_dynamic_real_dim=0,
+        past_feat_dynamic_real_dim=0,
+    )
+    predictor = model.create_predictor(batch_size=32)  # device managed internally
+    forecast = next(iter(predictor.predict(ds)))
+    if hasattr(forecast, "mean"):
+        yhat = np.asarray(forecast.mean)
+    elif hasattr(forecast, "quantile"):
+        yhat = np.asarray(forecast.quantile(0.5))
+    elif hasattr(forecast, "samples"):
+        yhat = np.asarray(forecast.samples).mean(axis=0)
+    else:
+        yhat = np.asarray(forecast)
+    yhat = np.asarray(yhat).ravel()[:horizon]
+    future_idx = _future_index(y.index[-1], freq, horizon)
+    pred = pd.Series(yhat, index=future_idx, name="predicted")
+    # Plot
+    fig = plt.figure(figsize=(10, 5))
+    plt.plot(y.index, y.values, label="history")
+    plt.plot(pred.index, pred.values, label="forecast")
+    plt.title(title)
+    plt.xlabel("Time"); plt.ylabel("Value"); plt.legend(); plt.tight_layout()
+    out_df = pd.DataFrame({"date": pred.index, "prediction": pred.values})
+    return fig, out_df
+# ---- Ticker path ----
 def fetch_series(ticker: str, years: int) -> pd.Series:
     """Fetch daily close price and align to business-day frequency."""
     data = yf.download(
     if data is None or data.empty:
         raise gr.Error(f"No price data found for '{ticker}'.")
     col = "Close" if "Close" in data.columns else ("Adj Close" if "Adj Close" in data.columns else None)
     if col is None:
         raise gr.Error(f"Unexpected columns from yfinance: {list(data.columns)}")
     if isinstance(data.columns, pd.MultiIndex):
         if ticker in data[col].columns:
             s = data[col][ticker]
         else:
             s = data[col].iloc[:, 0]
     else:
         s = data[col]
         raise gr.Error(f"Only missing values for '{ticker}'.")
     return y
+def forecast_ticker(ticker: str, horizon: int, lookback_years: int, context_hint: int):
     ticker = (ticker or "").strip().upper()
     if not ticker:
         raise gr.Error("Please enter a ticker symbol (e.g., AAPL).")
     if horizon < 1:
         raise gr.Error("Forecast horizon must be at least 1.")
     y = fetch_series(ticker, lookback_years)
+    return _run_forecast_on_series(y, "B", horizon, context_hint, f"{ticker} — forecast (Moirai 2.0 R-small)")
+# ---- CSV path ----
+def _read_csv_columns(file_path: str) -> pd.DataFrame:
+    # Try very tolerant CSV read
+    try:
+        df = pd.read_csv(file_path)
+    except Exception:
+        # if it’s actually TSV or weird delimiter, try python engine
+        df = pd.read_csv(file_path, sep=None, engine="python")
+    return df
+def _coerce_numeric_series(s: pd.Series) -> pd.Series:
+    s = pd.to_numeric(s, errors="coerce")
+    return s.dropna().astype(np.float32)
+def build_series_from_csv(file, value_col: str, date_col: str, freq_choice: str) -> tuple[pd.Series, str]:
+    """
+    Returns (series y with DateTimeIndex, freq string).
+    - If date_col provided: parse dates and (optionally) infer freq.
+    - If no date_col: require freq_choice != 'auto'; build synthetic dates from 2000-01-01.
+    """
+    if file is None:
+        raise gr.Error("Please upload a CSV file.")
+    # Gradio v4/v5 file object compatibility
+    path = getattr(file, "name", None) or getattr(file, "path", None) or (file if isinstance(file, str) else None)
+    if path is None:
+        raise gr.Error("Could not read the uploaded file path.")
+    df = _read_csv_columns(path)
+    if df.empty:
+        raise gr.Error("Uploaded file is empty.")
+    # Pick value column
+    if value_col:
+        if value_col not in df.columns:
+            raise gr.Error(f"Value column '{value_col}' not found. Available: {list(df.columns)}")
+        vals = _coerce_numeric_series(df[value_col])
     else:
+        # Try the first numeric-looking column
+        numeric_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
+        if not numeric_cols:
+            # Coerce first column
+            vals = _coerce_numeric_series(df.iloc[:, 0])
+        else:
+            vals = _coerce_numeric_series(df[numeric_cols[0]])
+    if vals.empty or len(vals) < 10:
+        raise gr.Error("Not enough numeric values after parsing (need at least 10).")
+    # With datetime column
+    if date_col:
+        if date_col not in df.columns:
+            raise gr.Error(f"Date column '{date_col}' not found. Available: {list(df.columns)}")
+        dt = pd.to_datetime(df[date_col], errors="coerce")
+        mask = dt.notna() & vals.notna()
+        dt = pd.DatetimeIndex(dt[mask])
+        vals = vals[mask]
+        if len(vals) < 10:
+            raise gr.Error("Too few valid rows after parsing date/value columns.")
+        # sort by date
+        order = np.argsort(dt.values)
+        dt = dt[order]
+        vals = vals.iloc[order].reset_index(drop=True)
+        y = pd.Series(vals.values, index=dt, name=value_col or "value").copy()
+        y.index = y.index.tz_localize(None)
+        # Determine frequency
+        freq = None
+        if freq_choice and freq_choice != "auto":
+            freq = freq_choice
+            y = y.asfreq(freq, method="ffill")
+        else:
+            # try to infer; if None, fallback to 'D'
+            freq = pd.infer_freq(y.index)
+            if freq is None:
+                # try business day if looks like weekdays only
+                weekday_ratio = (y.index.dayofweek < 5).mean()
+                freq = "B" if weekday_ratio > 0.95 else "D"
+            y = y.asfreq(freq, method="ffill")
+    else:
+        # No date column: require explicit freq
+        if not freq_choice or freq_choice == "auto":
+            raise gr.Error("No date column given. Please choose a frequency (e.g., D, B, H).")
+        freq = freq_choice
+        idx = pd.date_range(start="2000-01-01", periods=len(vals), freq=freq)
+        y = pd.Series(vals.values, index=idx, name=value_col or "value").copy()
+    # Final sanity
+    if y.isna().all():
+        raise gr.Error("Series is all-NaN after processing.")
+    return y, freq
+def forecast_csv(file, value_col: str, date_col: str, freq_choice: str, horizon: int, context_hint: int):
+    y, freq = build_series_from_csv(file, value_col.strip(), date_col.strip(), freq_choice.strip())
+    return _run_forecast_on_series(y, freq, horizon, context_hint, f"Uploaded series — forecast (freq={freq})")
+# ---- UI ----
+with gr.Blocks(title="Moirai 2.0 — Time Series Forecast (Research)") as demo:
     gr.Markdown(
         """
+# Moirai 2.0 — Time Series Forecast (Research)
+Use **Salesforce/moirai-2.0-R-small** (via Uni2TS) to forecast either a stock ticker *or* a generic CSV time series.
+> **Important**: Research/educational use only. Not investment advice. Model license: **CC-BY-NC-4.0 (non-commercial)**.
         """
     )
+    with gr.Tab("By Ticker"):
+        with gr.Row():
+            ticker = gr.Textbox(label="Ticker", value="AAPL", placeholder="e.g., AAPL, MSFT, TSLA")
+            horizon_t = gr.Slider(5, 120, value=30, step=1, label="Forecast horizon (steps)")
+        with gr.Row():
+            lookback = gr.Slider(1, 10, value=5, step=1, label="Lookback window (years of history)")
+            ctx_t = gr.Slider(64, 2000, value=1680, step=16, label="Context length")
+        run_t = gr.Button("Run forecast", variant="primary")
+        plot_t = gr.Plot(label="History + Forecast")
+        table_t = gr.Dataframe(label="Forecast table", interactive=False)
+        run_t.click(forecast_ticker, inputs=[ticker, horizon_t, lookback, ctx_t], outputs=[plot_t, table_t])
+    with gr.Tab("Upload CSV"):
+        gr.Markdown(
+            "Upload a CSV with either (1) a **date/time column** and a **value column**, "
+            "or (2) just a numeric value column (then choose a frequency)."
+        )
+        with gr.Row():
+            file = gr.File(label="CSV file", file_types=[".csv"])
+        with gr.Row():
+            date_col = gr.Textbox(label="Date/time column (optional)", placeholder="e.g., date, timestamp")
+            value_col = gr.Textbox(label="Value column (optional — auto-detects first numeric)", placeholder="e.g., value, close")
+        with gr.Row():
+            freq_choice = gr.Dropdown(
+                label="Frequency",
+                value="auto",
+                choices=["auto", "B", "D", "H", "W", "M", "MS"],
+                info="If no date column, pick a freq (e.g., D)."
+            )
+        with gr.Row():
+            horizon_u = gr.Slider(1, 500, value=60, step=1, label="Forecast horizon (steps)")
+            ctx_u = gr.Slider(32, 5000, value=512, step=16, label="Context length")
+        run_u = gr.Button("Run forecast on CSV", variant="primary")
+        plot_u = gr.Plot(label="History + Forecast (CSV)")
+        table_u = gr.Dataframe(label="Forecast table (CSV)", interactive=False)
+        run_u.click(forecast_csv, inputs=[file, value_col, date_col, freq_choice, horizon_u, ctx_u], outputs=[plot_u, table_u])
 if __name__ == "__main__":
     demo.launch()