Spaces:
Sleeping
Sleeping
add trend data visualization
Browse files- pages/HF_Dataset.py +95 -3
pages/HF_Dataset.py
CHANGED
@@ -2,6 +2,11 @@ import streamlit as st
|
|
2 |
import pandas as pd
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
import os
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
# Set your Hugging Face token from environment variable
|
7 |
hf_token = os.getenv("pegelonline_dataset_read_only")
|
@@ -10,19 +15,106 @@ if hf_token is None:
|
|
10 |
st.error("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
|
11 |
else:
|
12 |
# Download the dataset file
|
13 |
-
|
14 |
repo_id="DSSG-Wasserwacht/pegelonline-dataset",
|
15 |
filename="processed/stations.parquet",
|
16 |
repo_type="dataset",
|
17 |
use_auth_token=hf_token
|
18 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
# Load the dataset
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
# Streamlit app
|
24 |
st.title("Pegelonline Dataset Viewer")
|
25 |
st.write("This app displays data from the Pegelonline dataset.")
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
# Display the dataframe
|
28 |
-
st.dataframe(df)
|
|
|
2 |
import pandas as pd
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
import os
|
5 |
+
import pydeck as pdk
|
6 |
+
from datetime import datetime
|
7 |
+
|
8 |
+
# Set the Streamlit layout to wide
|
9 |
+
st.set_page_config(layout="wide")
|
10 |
|
11 |
# Set your Hugging Face token from environment variable
|
12 |
hf_token = os.getenv("pegelonline_dataset_read_only")
|
|
|
15 |
st.error("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
|
16 |
else:
|
17 |
# Download the dataset file
|
18 |
+
dataset_path_stations = hf_hub_download(
|
19 |
repo_id="DSSG-Wasserwacht/pegelonline-dataset",
|
20 |
filename="processed/stations.parquet",
|
21 |
repo_type="dataset",
|
22 |
use_auth_token=hf_token
|
23 |
)
|
24 |
+
dataset_path_water_level = hf_hub_download(
|
25 |
+
repo_id="DSSG-Wasserwacht/pegelonline-dataset",
|
26 |
+
filename="processed/current_water_level.parquet",
|
27 |
+
repo_type="dataset",
|
28 |
+
use_auth_token=hf_token
|
29 |
+
)
|
30 |
+
dataset_path_timeseries = hf_hub_download(
|
31 |
+
repo_id="DSSG-Wasserwacht/pegelonline-dataset",
|
32 |
+
filename="processed/timeseries.parquet",
|
33 |
+
repo_type="dataset",
|
34 |
+
use_auth_token=hf_token
|
35 |
+
)
|
36 |
|
37 |
# Load the dataset
|
38 |
+
df_stations = pd.read_parquet(dataset_path_stations)
|
39 |
+
df_water_level = pd.read_parquet(dataset_path_water_level)
|
40 |
+
df_timeseries = pd.read_parquet(dataset_path_timeseries).groupby("uuid").mean("value").round(1).reset_index().rename(columns={"value": "mean_value"})
|
41 |
+
df = df_stations.merge(df_water_level, how="left", on="uuid")
|
42 |
+
df = df.merge(df_timeseries, how="left", on="uuid")
|
43 |
+
|
44 |
+
# Format the timestamp nicely
|
45 |
+
df["formatted_timestamp"] = df["timestamp"].apply(
|
46 |
+
lambda x: datetime.fromisoformat(x).strftime("%Y-%m-%d %H:%M:%S") if pd.notna(x) else None
|
47 |
+
)
|
48 |
+
|
49 |
+
# Add arrows based on the result of mean_value - value
|
50 |
+
def add_arrow(row):
|
51 |
+
if pd.isna(row["value"]):
|
52 |
+
return None
|
53 |
+
difference = row["mean_value"] - row["value"]
|
54 |
+
if abs(difference) <= 0.01 * row["mean_value"]:
|
55 |
+
return "➡️"
|
56 |
+
elif difference > 0:
|
57 |
+
return "⬇️"
|
58 |
+
else:
|
59 |
+
return "⬆️"
|
60 |
+
|
61 |
+
df["arrow"] = df.apply(add_arrow, axis=1)
|
62 |
+
|
63 |
+
# Add traffic light column
|
64 |
+
def add_traffic_light(row):
|
65 |
+
if pd.isna(row["value"]):
|
66 |
+
return None
|
67 |
+
difference = abs(row["mean_value"] - row["value"])
|
68 |
+
if difference > 0.15 * row["mean_value"]:
|
69 |
+
return "🔴"
|
70 |
+
elif difference > 0.10 * row["mean_value"]:
|
71 |
+
return "🟡"
|
72 |
+
else:
|
73 |
+
return "🟢"
|
74 |
+
|
75 |
+
df["traffic_light"] = df.apply(add_traffic_light, axis=1)
|
76 |
+
|
77 |
+
# Define a color mapping for the traffic lights
|
78 |
+
color_mapping = {
|
79 |
+
"🟢": [0, 255, 0, 140], # Green
|
80 |
+
"🟡": [255, 255, 0, 140], # Yellow
|
81 |
+
"🔴": [255, 0, 0, 140], # Red
|
82 |
+
None: [128, 128, 128, 140] # Grey for None
|
83 |
+
}
|
84 |
+
|
85 |
+
# Map the traffic_light column to colors
|
86 |
+
df["color"] = df["traffic_light"].map(color_mapping)
|
87 |
|
88 |
# Streamlit app
|
89 |
st.title("Pegelonline Dataset Viewer")
|
90 |
st.write("This app displays data from the Pegelonline dataset.")
|
91 |
|
92 |
+
# PyDeck Layer für Kartenanzeige
|
93 |
+
layer = pdk.Layer(
|
94 |
+
"ScatterplotLayer",
|
95 |
+
data=df,
|
96 |
+
get_position=["longitude", "latitude"],
|
97 |
+
get_radius=2000,
|
98 |
+
get_color="color",
|
99 |
+
pickable=True,
|
100 |
+
)
|
101 |
+
|
102 |
+
# Deck.gl Map
|
103 |
+
view_state = pdk.ViewState(
|
104 |
+
latitude=df["latitude"].mean(),
|
105 |
+
longitude=df["longitude"].mean(),
|
106 |
+
zoom=6,
|
107 |
+
pitch=0,
|
108 |
+
)
|
109 |
+
r = pdk.Deck(
|
110 |
+
layers=[layer],
|
111 |
+
initial_view_state=view_state,
|
112 |
+
tooltip={"text": "{shortname}, {value} cm, {mean_value} cm,\n {arrow} {traffic_light} {formatted_timestamp}"}
|
113 |
+
)
|
114 |
+
st.pydeck_chart(r)
|
115 |
+
|
116 |
+
# Rohdaten anzeigen
|
117 |
+
st.write("### Rohdaten der Pegelstationen")
|
118 |
+
|
119 |
# Display the dataframe
|
120 |
+
st.dataframe(df[["shortname", "km", "value", "mean_value", "arrow", "traffic_light", "formatted_timestamp"]])
|