Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,19 +3,19 @@ import requests
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
import pandas as pd
|
5 |
import plotly.express as px
|
|
|
6 |
|
7 |
# 定義爬取數據的函數
|
8 |
def fetch_data(hospital_url, table_id, hospital_name):
|
9 |
response = requests.get(hospital_url)
|
10 |
soup = BeautifulSoup(response.text, 'html.parser')
|
11 |
table = soup.find('table', {'id': table_id})
|
12 |
-
|
13 |
if not table:
|
14 |
return pd.DataFrame() # 若表格未找到,返回空的DataFrame
|
15 |
|
16 |
rows = table.find_all('tr')
|
17 |
data = []
|
18 |
-
|
19 |
if hospital_name == "成大醫院":
|
20 |
# 成大醫院的表格結構不同
|
21 |
columns = [th.text.strip() for th in rows[0].find_all("th")]
|
@@ -33,19 +33,9 @@ def fetch_data(hospital_url, table_id, hospital_name):
|
|
33 |
row_data = [col.get_text(strip=True) for col in cols]
|
34 |
if all(row_data): # 確保數據不為空
|
35 |
data.append(row_data)
|
36 |
-
|
37 |
return pd.DataFrame(data, columns=columns)
|
38 |
|
39 |
-
# 定義繪製圓餅圖的函數
|
40 |
-
def plot_pie_chart(df):
|
41 |
-
fig = px.pie(df, names='病床種類', values='床位別數', title='各類病床床位佔比')
|
42 |
-
st.plotly_chart(fig)
|
43 |
-
|
44 |
-
# 定義繪製柱狀圖的函數
|
45 |
-
def plot_bar_chart(df):
|
46 |
-
fig = px.bar(df, x='病床種類', y='床位別數', color='醫院', barmode='group', title="各醫院病床分佈")
|
47 |
-
st.plotly_chart(fig)
|
48 |
-
|
49 |
# Streamlit UI
|
50 |
st.title("醫院床位分配表爬取工具")
|
51 |
|
@@ -71,26 +61,24 @@ selected_hospitals = st.multiselect("選擇醫院", list(hospital_options.keys()
|
|
71 |
if st.button("爬取資料"):
|
72 |
st.write("正在爬取資料...")
|
73 |
progress_bar = st.progress(0)
|
74 |
-
|
75 |
all_data = pd.DataFrame()
|
76 |
|
77 |
for i, hospital_name in enumerate(selected_hospitals):
|
78 |
hospital_data = hospital_options[hospital_name]
|
79 |
df = fetch_data(hospital_data["url"], hospital_data["table_id"], hospital_name)
|
80 |
-
|
81 |
if df.empty:
|
82 |
st.warning(f"{hospital_name} 的數據爬取結果為空,請檢查是否存在問題。")
|
83 |
else:
|
84 |
df['醫院'] = hospital_name
|
85 |
all_data = pd.concat([all_data, df], ignore_index=True)
|
86 |
-
|
87 |
# 更新進度條
|
88 |
progress_bar.progress((i + 1) / len(selected_hospitals))
|
89 |
|
90 |
if not all_data.empty:
|
91 |
st.write("爬取完成,合併的數據如下:")
|
92 |
st.dataframe(all_data)
|
93 |
-
|
94 |
# 下載數據為 CSV
|
95 |
csv = all_data.to_csv(index=False).encode('utf-8-sig')
|
96 |
st.download_button(
|
@@ -99,11 +87,22 @@ if st.button("爬取資料"):
|
|
99 |
file_name='合併的床位分配資料.csv',
|
100 |
mime='text/csv'
|
101 |
)
|
102 |
-
|
103 |
-
#
|
104 |
if st.button("繪製圖表"):
|
105 |
-
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
else:
|
109 |
-
st.error("沒有成功爬取任何數據。")
|
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
import pandas as pd
|
5 |
import plotly.express as px
|
6 |
+
import plotly.graph_objects as go
|
7 |
|
8 |
# 定義爬取數據的函數
|
9 |
def fetch_data(hospital_url, table_id, hospital_name):
|
10 |
response = requests.get(hospital_url)
|
11 |
soup = BeautifulSoup(response.text, 'html.parser')
|
12 |
table = soup.find('table', {'id': table_id})
|
|
|
13 |
if not table:
|
14 |
return pd.DataFrame() # 若表格未找到,返回空的DataFrame
|
15 |
|
16 |
rows = table.find_all('tr')
|
17 |
data = []
|
18 |
+
|
19 |
if hospital_name == "成大醫院":
|
20 |
# 成大醫院的表格結構不同
|
21 |
columns = [th.text.strip() for th in rows[0].find_all("th")]
|
|
|
33 |
row_data = [col.get_text(strip=True) for col in cols]
|
34 |
if all(row_data): # 確保數據不為空
|
35 |
data.append(row_data)
|
36 |
+
|
37 |
return pd.DataFrame(data, columns=columns)
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
# Streamlit UI
|
40 |
st.title("醫院床位分配表爬取工具")
|
41 |
|
|
|
61 |
if st.button("爬取資料"):
|
62 |
st.write("正在爬取資料...")
|
63 |
progress_bar = st.progress(0)
|
|
|
64 |
all_data = pd.DataFrame()
|
65 |
|
66 |
for i, hospital_name in enumerate(selected_hospitals):
|
67 |
hospital_data = hospital_options[hospital_name]
|
68 |
df = fetch_data(hospital_data["url"], hospital_data["table_id"], hospital_name)
|
|
|
69 |
if df.empty:
|
70 |
st.warning(f"{hospital_name} 的數據爬取結果為空,請檢查是否存在問題。")
|
71 |
else:
|
72 |
df['醫院'] = hospital_name
|
73 |
all_data = pd.concat([all_data, df], ignore_index=True)
|
74 |
+
|
75 |
# 更新進度條
|
76 |
progress_bar.progress((i + 1) / len(selected_hospitals))
|
77 |
|
78 |
if not all_data.empty:
|
79 |
st.write("爬取完成,合併的數據如下:")
|
80 |
st.dataframe(all_data)
|
81 |
+
|
82 |
# 下載數據為 CSV
|
83 |
csv = all_data.to_csv(index=False).encode('utf-8-sig')
|
84 |
st.download_button(
|
|
|
87 |
file_name='合併的床位分配資料.csv',
|
88 |
mime='text/csv'
|
89 |
)
|
90 |
+
|
91 |
+
# 添加繪製圖表的按鈕
|
92 |
if st.button("繪製圖表"):
|
93 |
+
# 準備數據
|
94 |
+
all_data['床位別數'] = pd.to_numeric(all_data['床位別數'], errors='coerce')
|
95 |
+
bed_counts = all_data.groupby(['醫院', '病床種類'])['床位別數'].sum().reset_index()
|
96 |
+
|
97 |
+
# 繪製圓餅圖
|
98 |
+
fig_pie = px.pie(bed_counts, values='床位別數', names='病床種類', title='各類型病床分佈',
|
99 |
+
hover_data=['醫院'], labels={'床位別數':'床位數'})
|
100 |
+
st.plotly_chart(fig_pie)
|
101 |
+
|
102 |
+
# 繪製柱狀圖
|
103 |
+
fig_bar = px.bar(bed_counts, x='醫院', y='床位別數', color='病床種類', title='醫院病床分佈',
|
104 |
+
labels={'床位別數':'床位數'}, barmode='group')
|
105 |
+
st.plotly_chart(fig_bar)
|
106 |
|
107 |
else:
|
108 |
+
st.error("沒有成功爬取任何數據。")
|