TCN_DATA_0814 / app.py
Roberta2024's picture
Update app.py
eec48a9 verified
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
# 定義爬取數據的函數
def fetch_data(hospital_url, table_id, hospital_name):
response = requests.get(hospital_url)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table', {'id': table_id})
if not table:
return pd.DataFrame() # 若表格未找到,返回空的DataFrame
rows = table.find_all('tr')
data = []
if hospital_name == "成大醫院":
# 成大醫院的表格結構不同
columns = [th.text.strip() for th in rows[0].find_all("th")]
# 將 "病床類別" 改名為 "病床種類"
columns = ['病床種類' if col == '病床類別' else col for col in columns]
data_rows = rows[1:]
for row in data_rows:
row_data = [td.text.strip() for td in row.find_all("td")]
if all(row_data): # 確保數據不為空
data.append(row_data)
else:
# 台南醫院和奇美醫院的表格結構類似
columns = ['病床種類', '病床數', '住院人數', '空床數', '佔床率']
for row in rows[1:]: # 跳過標題行
cols = row.find_all('td')
if len(cols) == 5:
row_data = [col.get_text(strip=True) for col in cols]
if all(row_data): # 確保數據不為空
data.append(row_data)
df = pd.DataFrame(data, columns=columns)
# 確保所有醫院的數據都有 '病床數' 列
if '病床數' not in df.columns and '開放床數' in df.columns:
df = df.rename(columns={'開放床數': '病床數'})
return df
# Streamlit UI
st.title("醫院床位分配表爬取工具")
# 下拉式選單選擇醫院
hospital_options = {
"台南醫院": {
"url": "https://www.tmh.org.tw/tmh2016/ImpBD.aspx?Kind=2",
"table_id": "ctl00_ContentPlaceHolder1_GV_Bed"
},
"奇美醫院": {
"url": "https://www.chimei.org.tw/%E4%BD%94%E5%BA%8A%E7%8E%87%E6%9F%A5%E8%A9%A2/%E4%BD%94%E5%BA%8A%E7%8E%87%E6%9F%A5%E8%A9%A2.aspx?ihospital=10&ffloor=",
"table_id": "DG1"
},
"成大醫院": {
"url": "https://web.hosp.ncku.edu.tw/nckm/Bedstatus/BedStatus.aspx",
"table_id": "GV_EmgInsure"
}
}
selected_hospitals = st.multiselect("選擇醫院", list(hospital_options.keys()))
# 當用戶按下按鈕時,開始爬取數據
if st.button("爬取資料"):
st.write("正在爬取資料...")
progress_bar = st.progress(0)
all_data = pd.DataFrame()
for i, hospital_name in enumerate(selected_hospitals):
hospital_data = hospital_options[hospital_name]
df = fetch_data(hospital_data["url"], hospital_data["table_id"], hospital_name)
if df.empty:
st.warning(f"{hospital_name} 的數據爬取結果為空,請檢查是否存在問題。")
else:
df['醫院'] = hospital_name
all_data = pd.concat([all_data, df], ignore_index=True)
# 更新進度條
progress_bar.progress((i + 1) / len(selected_hospitals))
if not all_data.empty:
st.write("爬取完成,合併的數據如下:")
st.dataframe(all_data)
# 下載數據為 CSV
csv = all_data.to_csv(index=False).encode('utf-8-sig')
st.download_button(
label="下載數據為 CSV",
data=csv,
file_name='合併的床位分配資料.csv',
mime='text/csv'
)
# 自動繪製圖表
st.write("正在繪製圖表...")
# 準備數據
bed_column = '病床數'
all_data[bed_column] = pd.to_numeric(all_data[bed_column], errors='coerce')
bed_counts = all_data.groupby(['醫院', '病床種類'])[bed_column].sum().reset_index()
# 繪製圓餅圖
fig_pie = px.pie(bed_counts, values=bed_column, names='病床種類', title='各類型病床分佈',
hover_data=['醫院'], labels={bed_column:'病床數'})
st.plotly_chart(fig_pie)
# 繪製柱狀圖
fig_bar = px.bar(bed_counts, x='醫院', y=bed_column, color='病床種類', title='醫院病床分佈',
labels={bed_column:'病床數'}, barmode='group')
st.plotly_chart(fig_bar)
else:
st.error("沒有成功爬取任何數據。")