Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,19 +3,19 @@ import requests
|
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
import pandas as pd
|
| 5 |
import plotly.express as px
|
|
|
|
| 6 |
|
| 7 |
# 定義爬取數據的函數
|
| 8 |
def fetch_data(hospital_url, table_id, hospital_name):
|
| 9 |
response = requests.get(hospital_url)
|
| 10 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 11 |
table = soup.find('table', {'id': table_id})
|
| 12 |
-
|
| 13 |
if not table:
|
| 14 |
return pd.DataFrame() # 若表格未找到,返回空的DataFrame
|
| 15 |
|
| 16 |
rows = table.find_all('tr')
|
| 17 |
data = []
|
| 18 |
-
|
| 19 |
if hospital_name == "成大醫院":
|
| 20 |
# 成大醫院的表格結構不同
|
| 21 |
columns = [th.text.strip() for th in rows[0].find_all("th")]
|
|
@@ -33,19 +33,9 @@ def fetch_data(hospital_url, table_id, hospital_name):
|
|
| 33 |
row_data = [col.get_text(strip=True) for col in cols]
|
| 34 |
if all(row_data): # 確保數據不為空
|
| 35 |
data.append(row_data)
|
| 36 |
-
|
| 37 |
return pd.DataFrame(data, columns=columns)
|
| 38 |
|
| 39 |
-
# 定義繪製圓餅圖的函數
|
| 40 |
-
def plot_pie_chart(df):
|
| 41 |
-
fig = px.pie(df, names='病床種類', values='床位別數', title='各類病床床位佔比')
|
| 42 |
-
st.plotly_chart(fig)
|
| 43 |
-
|
| 44 |
-
# 定義繪製柱狀圖的函數
|
| 45 |
-
def plot_bar_chart(df):
|
| 46 |
-
fig = px.bar(df, x='病床種類', y='床位別數', color='醫院', barmode='group', title="各醫院病床分佈")
|
| 47 |
-
st.plotly_chart(fig)
|
| 48 |
-
|
| 49 |
# Streamlit UI
|
| 50 |
st.title("醫院床位分配表爬取工具")
|
| 51 |
|
|
@@ -71,26 +61,24 @@ selected_hospitals = st.multiselect("選擇醫院", list(hospital_options.keys()
|
|
| 71 |
if st.button("爬取資料"):
|
| 72 |
st.write("正在爬取資料...")
|
| 73 |
progress_bar = st.progress(0)
|
| 74 |
-
|
| 75 |
all_data = pd.DataFrame()
|
| 76 |
|
| 77 |
for i, hospital_name in enumerate(selected_hospitals):
|
| 78 |
hospital_data = hospital_options[hospital_name]
|
| 79 |
df = fetch_data(hospital_data["url"], hospital_data["table_id"], hospital_name)
|
| 80 |
-
|
| 81 |
if df.empty:
|
| 82 |
st.warning(f"{hospital_name} 的數據爬取結果為空,請檢查是否存在問題。")
|
| 83 |
else:
|
| 84 |
df['醫院'] = hospital_name
|
| 85 |
all_data = pd.concat([all_data, df], ignore_index=True)
|
| 86 |
-
|
| 87 |
# 更新進度條
|
| 88 |
progress_bar.progress((i + 1) / len(selected_hospitals))
|
| 89 |
|
| 90 |
if not all_data.empty:
|
| 91 |
st.write("爬取完成,合併的數據如下:")
|
| 92 |
st.dataframe(all_data)
|
| 93 |
-
|
| 94 |
# 下載數據為 CSV
|
| 95 |
csv = all_data.to_csv(index=False).encode('utf-8-sig')
|
| 96 |
st.download_button(
|
|
@@ -99,11 +87,22 @@ if st.button("爬取資料"):
|
|
| 99 |
file_name='合併的床位分配資料.csv',
|
| 100 |
mime='text/csv'
|
| 101 |
)
|
| 102 |
-
|
| 103 |
-
#
|
| 104 |
if st.button("繪製圖表"):
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
else:
|
| 109 |
-
st.error("沒有成功爬取任何數據。")
|
|
|
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
import pandas as pd
|
| 5 |
import plotly.express as px
|
| 6 |
+
import plotly.graph_objects as go
|
| 7 |
|
| 8 |
# 定義爬取數據的函數
|
| 9 |
def fetch_data(hospital_url, table_id, hospital_name):
|
| 10 |
response = requests.get(hospital_url)
|
| 11 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 12 |
table = soup.find('table', {'id': table_id})
|
|
|
|
| 13 |
if not table:
|
| 14 |
return pd.DataFrame() # 若表格未找到,返回空的DataFrame
|
| 15 |
|
| 16 |
rows = table.find_all('tr')
|
| 17 |
data = []
|
| 18 |
+
|
| 19 |
if hospital_name == "成大醫院":
|
| 20 |
# 成大醫院的表格結構不同
|
| 21 |
columns = [th.text.strip() for th in rows[0].find_all("th")]
|
|
|
|
| 33 |
row_data = [col.get_text(strip=True) for col in cols]
|
| 34 |
if all(row_data): # 確保數據不為空
|
| 35 |
data.append(row_data)
|
| 36 |
+
|
| 37 |
return pd.DataFrame(data, columns=columns)
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
# Streamlit UI
|
| 40 |
st.title("醫院床位分配表爬取工具")
|
| 41 |
|
|
|
|
| 61 |
if st.button("爬取資料"):
|
| 62 |
st.write("正在爬取資料...")
|
| 63 |
progress_bar = st.progress(0)
|
|
|
|
| 64 |
all_data = pd.DataFrame()
|
| 65 |
|
| 66 |
for i, hospital_name in enumerate(selected_hospitals):
|
| 67 |
hospital_data = hospital_options[hospital_name]
|
| 68 |
df = fetch_data(hospital_data["url"], hospital_data["table_id"], hospital_name)
|
|
|
|
| 69 |
if df.empty:
|
| 70 |
st.warning(f"{hospital_name} 的數據爬取結果為空,請檢查是否存在問題。")
|
| 71 |
else:
|
| 72 |
df['醫院'] = hospital_name
|
| 73 |
all_data = pd.concat([all_data, df], ignore_index=True)
|
| 74 |
+
|
| 75 |
# 更新進度條
|
| 76 |
progress_bar.progress((i + 1) / len(selected_hospitals))
|
| 77 |
|
| 78 |
if not all_data.empty:
|
| 79 |
st.write("爬取完成,合併的數據如下:")
|
| 80 |
st.dataframe(all_data)
|
| 81 |
+
|
| 82 |
# 下載數據為 CSV
|
| 83 |
csv = all_data.to_csv(index=False).encode('utf-8-sig')
|
| 84 |
st.download_button(
|
|
|
|
| 87 |
file_name='合併的床位分配資料.csv',
|
| 88 |
mime='text/csv'
|
| 89 |
)
|
| 90 |
+
|
| 91 |
+
# 添加繪製圖表的按鈕
|
| 92 |
if st.button("繪製圖表"):
|
| 93 |
+
# 準備數據
|
| 94 |
+
all_data['床位別數'] = pd.to_numeric(all_data['床位別數'], errors='coerce')
|
| 95 |
+
bed_counts = all_data.groupby(['醫院', '病床種類'])['床位別數'].sum().reset_index()
|
| 96 |
+
|
| 97 |
+
# 繪製圓餅圖
|
| 98 |
+
fig_pie = px.pie(bed_counts, values='床位別數', names='病床種類', title='各類型病床分佈',
|
| 99 |
+
hover_data=['醫院'], labels={'床位別數':'床位數'})
|
| 100 |
+
st.plotly_chart(fig_pie)
|
| 101 |
+
|
| 102 |
+
# 繪製柱狀圖
|
| 103 |
+
fig_bar = px.bar(bed_counts, x='醫院', y='床位別數', color='病床種類', title='醫院病床分佈',
|
| 104 |
+
labels={'床位別數':'床位數'}, barmode='group')
|
| 105 |
+
st.plotly_chart(fig_bar)
|
| 106 |
|
| 107 |
else:
|
| 108 |
+
st.error("沒有成功爬取任何數據。")
|