Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
# 定義爬取數據的函數 | |
def fetch_data(hospital_url, table_id, hospital_name): | |
response = requests.get(hospital_url) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
table = soup.find('table', {'id': table_id}) | |
if not table: | |
return pd.DataFrame() # 若表格未找到,返回空的DataFrame | |
rows = table.find_all('tr') | |
data = [] | |
if hospital_name == "成大醫院": | |
# 成大醫院的表格結構不同 | |
columns = [th.text.strip() for th in rows[0].find_all("th")] | |
data_rows = rows[1:] | |
for row in data_rows: | |
row_data = [td.text.strip() for td in row.find_all("td")] | |
if all(row_data): # 確保數據不為空 | |
data.append(row_data) | |
else: | |
# 台南醫院和奇美醫院的表格結構類似 | |
for row in rows[1:]: # 跳過標題行 | |
columns = ['病床種類', '床位別數', '住院人數', '空床數', '佔床率'] | |
cols = row.find_all('td') | |
if len(cols) == 5: | |
row_data = [col.get_text(strip=True) for col in cols] | |
if all(row_data): # 確保數據不為空 | |
data.append(row_data) | |
return pd.DataFrame(data, columns=columns) | |
# Streamlit UI | |
st.title("醫院床位分配表爬取工具") | |
# 下拉式選單選擇醫院 | |
hospital_options = { | |
"台南醫院": { | |
"url": "https://www.tmh.org.tw/tmh2016/ImpBD.aspx?Kind=2", | |
"table_id": "ctl00_ContentPlaceHolder1_GV_Bed" | |
}, | |
"奇美醫院": { | |
"url": "https://www.chimei.org.tw/%E4%BD%94%E5%BA%8A%E7%8E%87%E6%9F%A5%E8%A9%A2/%E4%BD%94%E5%BA%8A%E7%8E%87%E6%9F%A5%E8%A9%A2.aspx?ihospital=10&ffloor=", | |
"table_id": "DG1" | |
}, | |
"成大醫院": { | |
"url": "https://web.hosp.ncku.edu.tw/nckm/Bedstatus/BedStatus.aspx", | |
"table_id": "GV_EmgInsure" | |
} | |
} | |
selected_hospitals = st.multiselect("選擇醫院", list(hospital_options.keys())) | |
# 當用戶按下按鈕時,開始爬取數據 | |
if st.button("爬取資料"): | |
st.write("正在爬取資料...") | |
progress_bar = st.progress(0) | |
all_data = pd.DataFrame() | |
for i, hospital_name in enumerate(selected_hospitals): | |
hospital_data = hospital_options[hospital_name] | |
df = fetch_data(hospital_data["url"], hospital_data["table_id"], hospital_name) | |
if df.empty: | |
st.warning(f"{hospital_name} 的數據爬取結果為空,請檢查是否存在問題。") | |
else: | |
df['醫院'] = hospital_name | |
all_data = pd.concat([all_data, df], ignore_index=True) | |
# 更新進度條 | |
progress_bar.progress((i + 1) / len(selected_hospitals)) | |
if not all_data.empty: | |
st.write("爬取完成,合併的數據如下:") | |
st.dataframe(all_data) | |
# 下載數據為 CSV | |
csv = all_data.to_csv(index=False).encode('utf-8-sig') | |
st.download_button( | |
label="下載數據為 CSV", | |
data=csv, | |
file_name='合併的床位分配資料.csv', | |
mime='text/csv' | |
) | |
else: | |
st.error("沒有成功爬取任何數據。") | |