Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
import plotly.express as px | |
import plotly.graph_objects as go | |
# 定義爬取數據的函數 | |
def fetch_data(hospital_url, table_id, hospital_name): | |
response = requests.get(hospital_url) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
table = soup.find('table', {'id': table_id}) | |
if not table: | |
return pd.DataFrame() # 若表格未找到,返回空的DataFrame | |
rows = table.find_all('tr') | |
data = [] | |
if hospital_name == "成大醫院": | |
# 成大醫院的表格結構不同 | |
columns = [th.text.strip() for th in rows[0].find_all("th")] | |
# 將 "病床類別" 改名為 "病床種類" | |
columns = ['病床種類' if col == '病床類別' else col for col in columns] | |
data_rows = rows[1:] | |
for row in data_rows: | |
row_data = [td.text.strip() for td in row.find_all("td")] | |
if all(row_data): # 確保數據不為空 | |
data.append(row_data) | |
else: | |
# 台南醫院和奇美醫院的表格結構類似 | |
columns = ['病床種類', '病床數', '住院人數', '空床數', '佔床率'] | |
for row in rows[1:]: # 跳過標題行 | |
cols = row.find_all('td') | |
if len(cols) == 5: | |
row_data = [col.get_text(strip=True) for col in cols] | |
if all(row_data): # 確保數據不為空 | |
data.append(row_data) | |
df = pd.DataFrame(data, columns=columns) | |
# 確保所有醫院的數據都有 '病床數' 列 | |
if '病床數' not in df.columns and '開放床數' in df.columns: | |
df = df.rename(columns={'開放床數': '病床數'}) | |
return df | |
# Streamlit UI | |
st.title("醫院床位分配表爬取工具") | |
# 下拉式選單選擇醫院 | |
hospital_options = { | |
"台南醫院": { | |
"url": "https://www.tmh.org.tw/tmh2016/ImpBD.aspx?Kind=2", | |
"table_id": "ctl00_ContentPlaceHolder1_GV_Bed" | |
}, | |
"奇美醫院": { | |
"url": "https://www.chimei.org.tw/%E4%BD%94%E5%BA%8A%E7%8E%87%E6%9F%A5%E8%A9%A2/%E4%BD%94%E5%BA%8A%E7%8E%87%E6%9F%A5%E8%A9%A2.aspx?ihospital=10&ffloor=", | |
"table_id": "DG1" | |
}, | |
"成大醫院": { | |
"url": "https://web.hosp.ncku.edu.tw/nckm/Bedstatus/BedStatus.aspx", | |
"table_id": "GV_EmgInsure" | |
} | |
} | |
selected_hospitals = st.multiselect("選擇醫院", list(hospital_options.keys())) | |
# 當用戶按下按鈕時,開始爬取數據 | |
if st.button("爬取資料"): | |
st.write("正在爬取資料...") | |
progress_bar = st.progress(0) | |
all_data = pd.DataFrame() | |
for i, hospital_name in enumerate(selected_hospitals): | |
hospital_data = hospital_options[hospital_name] | |
df = fetch_data(hospital_data["url"], hospital_data["table_id"], hospital_name) | |
if df.empty: | |
st.warning(f"{hospital_name} 的數據爬取結果為空,請檢查是否存在問題。") | |
else: | |
df['醫院'] = hospital_name | |
all_data = pd.concat([all_data, df], ignore_index=True) | |
# 更新進度條 | |
progress_bar.progress((i + 1) / len(selected_hospitals)) | |
if not all_data.empty: | |
st.write("爬取完成,合併的數據如下:") | |
st.dataframe(all_data) | |
# 下載數據為 CSV | |
csv = all_data.to_csv(index=False).encode('utf-8-sig') | |
st.download_button( | |
label="下載數據為 CSV", | |
data=csv, | |
file_name='合併的床位分配資料.csv', | |
mime='text/csv' | |
) | |
# 自動繪製圖表 | |
st.write("正在繪製圖表...") | |
# 準備數據 | |
bed_column = '病床數' | |
all_data[bed_column] = pd.to_numeric(all_data[bed_column], errors='coerce') | |
bed_counts = all_data.groupby(['醫院', '病床種類'])[bed_column].sum().reset_index() | |
# 繪製圓餅圖 | |
fig_pie = px.pie(bed_counts, values=bed_column, names='病床種類', title='各類型病床分佈', | |
hover_data=['醫院'], labels={bed_column:'病床數'}) | |
st.plotly_chart(fig_pie) | |
# 繪製柱狀圖 | |
fig_bar = px.bar(bed_counts, x='醫院', y=bed_column, color='病床種類', title='醫院病床分佈', | |
labels={bed_column:'病床數'}, barmode='group') | |
st.plotly_chart(fig_bar) | |
else: | |
st.error("沒有成功爬取任何數據。") |