import streamlit as st import requests from bs4 import BeautifulSoup import pandas as pd import plotly.express as px import plotly.graph_objects as go # 定義爬取數據的函數 def fetch_data(hospital_url, table_id, hospital_name): response = requests.get(hospital_url) soup = BeautifulSoup(response.text, 'html.parser') table = soup.find('table', {'id': table_id}) if not table: return pd.DataFrame() # 若表格未找到,返回空的DataFrame rows = table.find_all('tr') data = [] if hospital_name == "成大醫院": # 成大醫院的表格結構不同 columns = [th.text.strip() for th in rows[0].find_all("th")] # 將 "病床類別" 改名為 "病床種類" columns = ['病床種類' if col == '病床類別' else col for col in columns] data_rows = rows[1:] for row in data_rows: row_data = [td.text.strip() for td in row.find_all("td")] if all(row_data): # 確保數據不為空 data.append(row_data) else: # 台南醫院和奇美醫院的表格結構類似 columns = ['病床種類', '病床數', '住院人數', '空床數', '佔床率'] for row in rows[1:]: # 跳過標題行 cols = row.find_all('td') if len(cols) == 5: row_data = [col.get_text(strip=True) for col in cols] if all(row_data): # 確保數據不為空 data.append(row_data) df = pd.DataFrame(data, columns=columns) # 確保所有醫院的數據都有 '病床數' 列 if '病床數' not in df.columns and '開放床數' in df.columns: df = df.rename(columns={'開放床數': '病床數'}) return df # Streamlit UI st.title("醫院床位分配表爬取工具") # 下拉式選單選擇醫院 hospital_options = { "台南醫院": { "url": "https://www.tmh.org.tw/tmh2016/ImpBD.aspx?Kind=2", "table_id": "ctl00_ContentPlaceHolder1_GV_Bed" }, "奇美醫院": { "url": "https://www.chimei.org.tw/%E4%BD%94%E5%BA%8A%E7%8E%87%E6%9F%A5%E8%A9%A2/%E4%BD%94%E5%BA%8A%E7%8E%87%E6%9F%A5%E8%A9%A2.aspx?ihospital=10&ffloor=", "table_id": "DG1" }, "成大醫院": { "url": "https://web.hosp.ncku.edu.tw/nckm/Bedstatus/BedStatus.aspx", "table_id": "GV_EmgInsure" } } selected_hospitals = st.multiselect("選擇醫院", list(hospital_options.keys())) # 當用戶按下按鈕時,開始爬取數據 if st.button("爬取資料"): st.write("正在爬取資料...") progress_bar = st.progress(0) all_data = pd.DataFrame() for i, hospital_name in enumerate(selected_hospitals): hospital_data = hospital_options[hospital_name] df = fetch_data(hospital_data["url"], hospital_data["table_id"], hospital_name) if df.empty: st.warning(f"{hospital_name} 的數據爬取結果為空,請檢查是否存在問題。") else: df['醫院'] = hospital_name all_data = pd.concat([all_data, df], ignore_index=True) # 更新進度條 progress_bar.progress((i + 1) / len(selected_hospitals)) if not all_data.empty: st.write("爬取完成,合併的數據如下:") st.dataframe(all_data) # 下載數據為 CSV csv = all_data.to_csv(index=False).encode('utf-8-sig') st.download_button( label="下載數據為 CSV", data=csv, file_name='合併的床位分配資料.csv', mime='text/csv' ) # 自動繪製圖表 st.write("正在繪製圖表...") # 準備數據 bed_column = '病床數' all_data[bed_column] = pd.to_numeric(all_data[bed_column], errors='coerce') bed_counts = all_data.groupby(['醫院', '病床種類'])[bed_column].sum().reset_index() # 繪製圓餅圖 fig_pie = px.pie(bed_counts, values=bed_column, names='病床種類', title='各類型病床分佈', hover_data=['醫院'], labels={bed_column:'病床數'}) st.plotly_chart(fig_pie) # 繪製柱狀圖 fig_bar = px.bar(bed_counts, x='醫院', y=bed_column, color='病床種類', title='醫院病床分佈', labels={bed_column:'病床數'}, barmode='group') st.plotly_chart(fig_bar) else: st.error("沒有成功爬取任何數據。")