Spaces:

EDS-lab
/

DAM-price-forecast

Running

App Files Files Community

mmmapms commited on Jan 13

Commit

7fe8c1e

verified ·

1 Parent(s): 0f35109

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -46

app.py CHANGED Viewed

@@ -6,24 +6,57 @@ import requests
 from io import StringIO
 import base64
-#@st.cache_data(ttl=86400)  # TTL is set for 86400 seconds (24 hours)
 def load_data_predictions(github_token):
-    url = 'https://api.github.com/repos/mmmapms/Forecast_DAM_V2/contents/Predictions.csv'
-    headers = {'Authorization': f'token {github_token}'}
-    response = requests.get(url, headers=headers)
-    st.write("Status code: ", response.status_code)
-    st.write("Response JSON:", response.json())
-    if response.status_code == 200:
-        file_content = response.json()['content']
-        decoded_content = base64.b64decode(file_content).decode('utf-8')
-        csv_content = StringIO(decoded_content)
-        df = pd.read_csv(csv_content, encoding='utf-8')
-        df = df.rename(columns={
         'Price': 'Real Price',
         'DNN1': 'Neural Network 1',
         'DNN2': 'Neural Network 2',
@@ -36,48 +69,23 @@ def load_data_predictions(github_token):
         'Persis': 'Persistence Model',
         'Hybrid_Ensemble': 'Hybrid Ensemble',
         'Weighted_Ensemble': 'Weighted Ensemble'
-        })
-        df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
-        df_filtered = df.dropna(subset=['Real Price'])
-        return df, df_filtered
-    else:
-        st.error("Failed to download data. Please check your GitHub token and repository details.")
-        return pd.DataFrame(), pd.DataFrame()
 github_token = st.secrets["GitHub_Token_Margarida"]
 if github_token:
     df, df_filtered = load_data_predictions(github_token)
 else:
     st.warning("Please enter your GitHub Personal Access Token to proceed.")
-#@st.cache_data
-#def load_data_predictions():
-#    df = pd.read_csv('Predictions.csv')
-#    df = df.rename(columns={
-#    'Price': 'Real Price',
-#    'DNN1': 'Neural Network 1',
-#    'DNN2': 'Neural Network 2',
-#    'DNN3': 'Neural Network 3',
-#    'DNN4': 'Neural Network 4',
-#    'DNN_Ensemble': 'Neural Network Ensemble',
-#    'LEAR56': 'Regularized Linear Model 1',
-#    'LEAR84': 'Regularized Linear Model 2',
-#    'LEAR112': 'Regularized Linear Model 3',
-#    'LEAR730': 'Regularized Linear Model 4',
-#    'LEAR_Ensemble': 'Regularized Linear Model Ensemble',
-#    'Persis': 'Persistence Model',
-#    'Hybrid_Ensemble': 'Hybrid Ensemble'
-#})
-#    df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
-#    df_filtered = df.dropna(subset=['Real Price'])
-#   return df, df_filtered
-#df, df_filtered = load_data_predictions()
 min_date_allowed_pred = df_filtered['Date'].min().date()
 max_date_allowed_pred = df_filtered['Date'].max().date()

 from io import StringIO
 import base64
 def load_data_predictions(github_token):
+    """
+    Fetch Predictions.csv from the GitHub 'Forecast_DAM_V2' repository
+    via the blob SHA. This works for files larger than 1 MB.
+    """
+    owner = "mmmapms"
+    repo = "Forecast_DAM_V2"
+    file_path = "Predictions.csv"
+    # 1. Get file metadata (including SHA) from the “contents” endpoint
+    url_contents = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}"
+    headers_contents = {
+        "Authorization": f"token {github_token}",
+    }
+    response_contents = requests.get(url_contents, headers=headers_contents)
+    st.write("Status code (contents):", response_contents.status_code)
+    st.write("Response JSON (contents):", response_contents.json())
+    if response_contents.status_code != 200:
+        st.error("Failed to download file metadata. Check token and file path.")
+        return pd.DataFrame(), pd.DataFrame()
+    json_data = response_contents.json()
+    # We expect "sha" to be present for the file
+    if "sha" not in json_data:
+        st.error("No 'sha' field found in JSON response. File might be missing.")
+        return pd.DataFrame(), pd.DataFrame()
+    sha = json_data["sha"]
+    # 2. Use the “blobs” endpoint to fetch the raw file content
+    url_blob = f"https://api.github.com/repos/{owner}/{repo}/git/blobs/{sha}"
+    headers_blob = {
+        "Authorization": f"token {github_token}",
+        "Accept": "application/vnd.github.v3.raw",  # crucial for large files
+    }
+    response_blob = requests.get(url_blob, headers=headers_blob)
+    if response_blob.status_code != 200:
+        st.error(f"Failed to fetch raw blob. Status code: {response_blob.status_code}")
+        return pd.DataFrame(), pd.DataFrame()
+    # The response body is the raw CSV text
+    csv_text = response_blob.text
+    csv_content = StringIO(csv_text)
+    # 3. Read the CSV into a Pandas DataFrame
+    df = pd.read_csv(csv_content, encoding='utf-8')
+    # 4. Rename columns as needed
+    df = df.rename(columns={
         'Price': 'Real Price',
         'DNN1': 'Neural Network 1',
         'DNN2': 'Neural Network 2',
         'Persis': 'Persistence Model',
         'Hybrid_Ensemble': 'Hybrid Ensemble',
         'Weighted_Ensemble': 'Weighted Ensemble'
+    })
+    # 5. Parse dates and filter
+    df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
+    df_filtered = df.dropna(subset=['Real Price'])
+    return df, df_filtered
 github_token = st.secrets["GitHub_Token_Margarida"]
 if github_token:
     df, df_filtered = load_data_predictions(github_token)
 else:
     st.warning("Please enter your GitHub Personal Access Token to proceed.")
 min_date_allowed_pred = df_filtered['Date'].min().date()
 max_date_allowed_pred = df_filtered['Date'].max().date()