Spaces:
Sleeping
Sleeping
File size: 4,489 Bytes
8e305b9 a70796f 688ffbc 8e305b9 f5a9890 4a00c92 8e305b9 4a00c92 8e305b9 688ffbc 4a00c92 eec48a9 4a00c92 7ce362d 4a00c92 688ffbc eec48a9 8e305b9 f5a9890 8e305b9 f5a9890 8e305b9 4a00c92 8e305b9 f5a9890 8e305b9 f5a9890 4a00c92 f5a9890 4a00c92 f5a9890 4a00c92 f5a9890 688ffbc f5a9890 8e305b9 f5a9890 688ffbc f5a9890 1cfe701 f5a9890 688ffbc 2d992de 7ce362d 98ba0e8 eec48a9 2d992de eec48a9 7ce362d 2d992de 688ffbc 2d992de 98ba0e8 7ce362d 2d992de a70796f f5a9890 688ffbc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
# 定義爬取數據的函數
def fetch_data(hospital_url, table_id, hospital_name):
response = requests.get(hospital_url)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table', {'id': table_id})
if not table:
return pd.DataFrame() # 若表格未找到,返回空的DataFrame
rows = table.find_all('tr')
data = []
if hospital_name == "成大醫院":
# 成大醫院的表格結構不同
columns = [th.text.strip() for th in rows[0].find_all("th")]
# 將 "病床類別" 改名為 "病床種類"
columns = ['病床種類' if col == '病床類別' else col for col in columns]
data_rows = rows[1:]
for row in data_rows:
row_data = [td.text.strip() for td in row.find_all("td")]
if all(row_data): # 確保數據不為空
data.append(row_data)
else:
# 台南醫院和奇美醫院的表格結構類似
columns = ['病床種類', '病床數', '住院人數', '空床數', '佔床率']
for row in rows[1:]: # 跳過標題行
cols = row.find_all('td')
if len(cols) == 5:
row_data = [col.get_text(strip=True) for col in cols]
if all(row_data): # 確保數據不為空
data.append(row_data)
df = pd.DataFrame(data, columns=columns)
# 確保所有醫院的數據都有 '病床數' 列
if '病床數' not in df.columns and '開放床數' in df.columns:
df = df.rename(columns={'開放床數': '病床數'})
return df
# Streamlit UI
st.title("醫院床位分配表爬取工具")
# 下拉式選單選擇醫院
hospital_options = {
"台南醫院": {
"url": "https://www.tmh.org.tw/tmh2016/ImpBD.aspx?Kind=2",
"table_id": "ctl00_ContentPlaceHolder1_GV_Bed"
},
"奇美醫院": {
"url": "https://www.chimei.org.tw/%E4%BD%94%E5%BA%8A%E7%8E%87%E6%9F%A5%E8%A9%A2/%E4%BD%94%E5%BA%8A%E7%8E%87%E6%9F%A5%E8%A9%A2.aspx?ihospital=10&ffloor=",
"table_id": "DG1"
},
"成大醫院": {
"url": "https://web.hosp.ncku.edu.tw/nckm/Bedstatus/BedStatus.aspx",
"table_id": "GV_EmgInsure"
}
}
selected_hospitals = st.multiselect("選擇醫院", list(hospital_options.keys()))
# 當用戶按下按鈕時,開始爬取數據
if st.button("爬取資料"):
st.write("正在爬取資料...")
progress_bar = st.progress(0)
all_data = pd.DataFrame()
for i, hospital_name in enumerate(selected_hospitals):
hospital_data = hospital_options[hospital_name]
df = fetch_data(hospital_data["url"], hospital_data["table_id"], hospital_name)
if df.empty:
st.warning(f"{hospital_name} 的數據爬取結果為空,請檢查是否存在問題。")
else:
df['醫院'] = hospital_name
all_data = pd.concat([all_data, df], ignore_index=True)
# 更新進度條
progress_bar.progress((i + 1) / len(selected_hospitals))
if not all_data.empty:
st.write("爬取完成,合併的數據如下:")
st.dataframe(all_data)
# 下載數據為 CSV
csv = all_data.to_csv(index=False).encode('utf-8-sig')
st.download_button(
label="下載數據為 CSV",
data=csv,
file_name='合併的床位分配資料.csv',
mime='text/csv'
)
# 自動繪製圖表
st.write("正在繪製圖表...")
# 準備數據
bed_column = '病床數'
all_data[bed_column] = pd.to_numeric(all_data[bed_column], errors='coerce')
bed_counts = all_data.groupby(['醫院', '病床種類'])[bed_column].sum().reset_index()
# 繪製圓餅圖
fig_pie = px.pie(bed_counts, values=bed_column, names='病床種類', title='各類型病床分佈',
hover_data=['醫院'], labels={bed_column:'病床數'})
st.plotly_chart(fig_pie)
# 繪製柱狀圖
fig_bar = px.bar(bed_counts, x='醫院', y=bed_column, color='病床種類', title='醫院病床分佈',
labels={bed_column:'病床數'}, barmode='group')
st.plotly_chart(fig_bar)
else:
st.error("沒有成功爬取任何數據。") |