Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,36 +4,36 @@ from bs4 import BeautifulSoup
|
|
4 |
import pandas as pd
|
5 |
|
6 |
# 定義爬取數據的函數
|
7 |
-
def fetch_data(hospital_url,
|
8 |
response = requests.get(hospital_url)
|
9 |
soup = BeautifulSoup(response.text, 'html.parser')
|
10 |
table = soup.find('table', {'id': table_id})
|
|
|
|
|
|
|
|
|
11 |
rows = table.find_all('tr')
|
12 |
-
|
13 |
data = []
|
14 |
-
for row in rows[1:]:
|
15 |
-
columns = row.find_all('td')
|
16 |
-
if len(columns) == 5:
|
17 |
-
bed_type = columns[0].get_text(strip=True)
|
18 |
-
bed_count = columns[1].get_text(strip=True)
|
19 |
-
inpatient_count = columns[2].get_text(strip=True)
|
20 |
-
empty_bed_count = columns[3].get_text(strip=True)
|
21 |
-
occupancy_rate = columns[4].get_text(strip=True)
|
22 |
-
|
23 |
-
# 检查所有数据是否为空
|
24 |
-
if not all([bed_type, bed_count, inpatient_count, empty_bed_count, occupancy_rate]):
|
25 |
-
continue
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
# Streamlit UI
|
39 |
st.title("醫院床位分配表爬取工具")
|
@@ -56,24 +56,23 @@ hospital_options = {
|
|
56 |
|
57 |
selected_hospitals = st.multiselect("選擇醫院", list(hospital_options.keys()))
|
58 |
|
59 |
-
#
|
60 |
if st.button("爬取資料"):
|
61 |
st.write("正在爬取資料...")
|
62 |
-
|
63 |
-
# 顯示進度條
|
64 |
progress_bar = st.progress(0)
|
65 |
-
|
66 |
all_data = pd.DataFrame()
|
67 |
-
|
68 |
for i, hospital_name in enumerate(selected_hospitals):
|
69 |
hospital_data = hospital_options[hospital_name]
|
70 |
-
df = fetch_data(hospital_data["url"],
|
71 |
-
|
72 |
if df.empty:
|
73 |
st.warning(f"{hospital_name} 的數據爬取結果為空,請檢查是否存在問題。")
|
74 |
else:
|
|
|
75 |
all_data = pd.concat([all_data, df], ignore_index=True)
|
76 |
-
|
77 |
# 更新進度條
|
78 |
progress_bar.progress((i + 1) / len(selected_hospitals))
|
79 |
|
|
|
4 |
import pandas as pd
|
5 |
|
6 |
# 定義爬取數據的函數
|
7 |
+
def fetch_data(hospital_url, table_id, hospital_name):
|
8 |
response = requests.get(hospital_url)
|
9 |
soup = BeautifulSoup(response.text, 'html.parser')
|
10 |
table = soup.find('table', {'id': table_id})
|
11 |
+
|
12 |
+
if not table:
|
13 |
+
return pd.DataFrame() # 若表格未找到,返回空的DataFrame
|
14 |
+
|
15 |
rows = table.find_all('tr')
|
|
|
16 |
data = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
+
if hospital_name == "成大醫院":
|
19 |
+
# 成大醫院的表格結構不同
|
20 |
+
columns = [th.text.strip() for th in rows[0].find_all("th")]
|
21 |
+
data_rows = rows[1:]
|
22 |
+
for row in data_rows:
|
23 |
+
row_data = [td.text.strip() for td in row.find_all("td")]
|
24 |
+
if all(row_data): # 確保數據不為空
|
25 |
+
data.append(row_data)
|
26 |
+
else:
|
27 |
+
# 台南醫院和奇美醫院的表格結構類似
|
28 |
+
for row in rows[1:]: # 跳過標題行
|
29 |
+
columns = ['病床種類', '床位別數', '住院人數', '空床數', '佔床率']
|
30 |
+
cols = row.find_all('td')
|
31 |
+
if len(cols) == 5:
|
32 |
+
row_data = [col.get_text(strip=True) for col in cols]
|
33 |
+
if all(row_data): # 確保數據不為空
|
34 |
+
data.append(row_data)
|
35 |
+
|
36 |
+
return pd.DataFrame(data, columns=columns)
|
37 |
|
38 |
# Streamlit UI
|
39 |
st.title("醫院床位分配表爬取工具")
|
|
|
56 |
|
57 |
selected_hospitals = st.multiselect("選擇醫院", list(hospital_options.keys()))
|
58 |
|
59 |
+
# 當用戶按下按鈕時,開始爬取數據
|
60 |
if st.button("爬取資料"):
|
61 |
st.write("正在爬取資料...")
|
|
|
|
|
62 |
progress_bar = st.progress(0)
|
63 |
+
|
64 |
all_data = pd.DataFrame()
|
65 |
+
|
66 |
for i, hospital_name in enumerate(selected_hospitals):
|
67 |
hospital_data = hospital_options[hospital_name]
|
68 |
+
df = fetch_data(hospital_data["url"], hospital_data["table_id"], hospital_name)
|
69 |
+
|
70 |
if df.empty:
|
71 |
st.warning(f"{hospital_name} 的數據爬取結果為空,請檢查是否存在問題。")
|
72 |
else:
|
73 |
+
df['醫院'] = hospital_name
|
74 |
all_data = pd.concat([all_data, df], ignore_index=True)
|
75 |
+
|
76 |
# 更新進度條
|
77 |
progress_bar.progress((i + 1) / len(selected_hospitals))
|
78 |
|