bsenst commited on
Commit
0dff473
·
1 Parent(s): 6fa34d1

add trend data visualization

Browse files
Files changed (1) hide show
  1. pages/HF_Dataset.py +95 -3
pages/HF_Dataset.py CHANGED
@@ -2,6 +2,11 @@ import streamlit as st
2
  import pandas as pd
3
  from huggingface_hub import hf_hub_download
4
  import os
 
 
 
 
 
5
 
6
  # Set your Hugging Face token from environment variable
7
  hf_token = os.getenv("pegelonline_dataset_read_only")
@@ -10,19 +15,106 @@ if hf_token is None:
10
  st.error("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
11
  else:
12
  # Download the dataset file
13
- dataset_path = hf_hub_download(
14
  repo_id="DSSG-Wasserwacht/pegelonline-dataset",
15
  filename="processed/stations.parquet",
16
  repo_type="dataset",
17
  use_auth_token=hf_token
18
  )
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  # Load the dataset
21
- df = pd.read_parquet(dataset_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # Streamlit app
24
  st.title("Pegelonline Dataset Viewer")
25
  st.write("This app displays data from the Pegelonline dataset.")
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  # Display the dataframe
28
- st.dataframe(df)
 
2
  import pandas as pd
3
  from huggingface_hub import hf_hub_download
4
  import os
5
+ import pydeck as pdk
6
+ from datetime import datetime
7
+
8
+ # Set the Streamlit layout to wide
9
+ st.set_page_config(layout="wide")
10
 
11
  # Set your Hugging Face token from environment variable
12
  hf_token = os.getenv("pegelonline_dataset_read_only")
 
15
  st.error("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
16
  else:
17
  # Download the dataset file
18
+ dataset_path_stations = hf_hub_download(
19
  repo_id="DSSG-Wasserwacht/pegelonline-dataset",
20
  filename="processed/stations.parquet",
21
  repo_type="dataset",
22
  use_auth_token=hf_token
23
  )
24
+ dataset_path_water_level = hf_hub_download(
25
+ repo_id="DSSG-Wasserwacht/pegelonline-dataset",
26
+ filename="processed/current_water_level.parquet",
27
+ repo_type="dataset",
28
+ use_auth_token=hf_token
29
+ )
30
+ dataset_path_timeseries = hf_hub_download(
31
+ repo_id="DSSG-Wasserwacht/pegelonline-dataset",
32
+ filename="processed/timeseries.parquet",
33
+ repo_type="dataset",
34
+ use_auth_token=hf_token
35
+ )
36
 
37
  # Load the dataset
38
+ df_stations = pd.read_parquet(dataset_path_stations)
39
+ df_water_level = pd.read_parquet(dataset_path_water_level)
40
+ df_timeseries = pd.read_parquet(dataset_path_timeseries).groupby("uuid").mean("value").round(1).reset_index().rename(columns={"value": "mean_value"})
41
+ df = df_stations.merge(df_water_level, how="left", on="uuid")
42
+ df = df.merge(df_timeseries, how="left", on="uuid")
43
+
44
+ # Format the timestamp nicely
45
+ df["formatted_timestamp"] = df["timestamp"].apply(
46
+ lambda x: datetime.fromisoformat(x).strftime("%Y-%m-%d %H:%M:%S") if pd.notna(x) else None
47
+ )
48
+
49
+ # Add arrows based on the result of mean_value - value
50
+ def add_arrow(row):
51
+ if pd.isna(row["value"]):
52
+ return None
53
+ difference = row["mean_value"] - row["value"]
54
+ if abs(difference) <= 0.01 * row["mean_value"]:
55
+ return "➡️"
56
+ elif difference > 0:
57
+ return "⬇️"
58
+ else:
59
+ return "⬆️"
60
+
61
+ df["arrow"] = df.apply(add_arrow, axis=1)
62
+
63
+ # Add traffic light column
64
+ def add_traffic_light(row):
65
+ if pd.isna(row["value"]):
66
+ return None
67
+ difference = abs(row["mean_value"] - row["value"])
68
+ if difference > 0.15 * row["mean_value"]:
69
+ return "🔴"
70
+ elif difference > 0.10 * row["mean_value"]:
71
+ return "🟡"
72
+ else:
73
+ return "🟢"
74
+
75
+ df["traffic_light"] = df.apply(add_traffic_light, axis=1)
76
+
77
+ # Define a color mapping for the traffic lights
78
+ color_mapping = {
79
+ "🟢": [0, 255, 0, 140], # Green
80
+ "🟡": [255, 255, 0, 140], # Yellow
81
+ "🔴": [255, 0, 0, 140], # Red
82
+ None: [128, 128, 128, 140] # Grey for None
83
+ }
84
+
85
+ # Map the traffic_light column to colors
86
+ df["color"] = df["traffic_light"].map(color_mapping)
87
 
88
  # Streamlit app
89
  st.title("Pegelonline Dataset Viewer")
90
  st.write("This app displays data from the Pegelonline dataset.")
91
 
92
+ # PyDeck Layer für Kartenanzeige
93
+ layer = pdk.Layer(
94
+ "ScatterplotLayer",
95
+ data=df,
96
+ get_position=["longitude", "latitude"],
97
+ get_radius=2000,
98
+ get_color="color",
99
+ pickable=True,
100
+ )
101
+
102
+ # Deck.gl Map
103
+ view_state = pdk.ViewState(
104
+ latitude=df["latitude"].mean(),
105
+ longitude=df["longitude"].mean(),
106
+ zoom=6,
107
+ pitch=0,
108
+ )
109
+ r = pdk.Deck(
110
+ layers=[layer],
111
+ initial_view_state=view_state,
112
+ tooltip={"text": "{shortname}, {value} cm, {mean_value} cm,\n {arrow} {traffic_light} {formatted_timestamp}"}
113
+ )
114
+ st.pydeck_chart(r)
115
+
116
+ # Rohdaten anzeigen
117
+ st.write("### Rohdaten der Pegelstationen")
118
+
119
  # Display the dataframe
120
+ st.dataframe(df[["shortname", "km", "value", "mean_value", "arrow", "traffic_light", "formatted_timestamp"]])