Roberta2024 commited on
Commit
4a00c92
·
verified ·
1 Parent(s): f5a9890

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -32
app.py CHANGED
@@ -4,36 +4,36 @@ from bs4 import BeautifulSoup
4
  import pandas as pd
5
 
6
  # 定義爬取數據的函數
7
- def fetch_data(hospital_url, hospital_name, table_id):
8
  response = requests.get(hospital_url)
9
  soup = BeautifulSoup(response.text, 'html.parser')
10
  table = soup.find('table', {'id': table_id})
 
 
 
 
11
  rows = table.find_all('tr')
12
-
13
  data = []
14
- for row in rows[1:]:
15
- columns = row.find_all('td')
16
- if len(columns) == 5:
17
- bed_type = columns[0].get_text(strip=True)
18
- bed_count = columns[1].get_text(strip=True)
19
- inpatient_count = columns[2].get_text(strip=True)
20
- empty_bed_count = columns[3].get_text(strip=True)
21
- occupancy_rate = columns[4].get_text(strip=True)
22
-
23
- # 检查所有数据是否为空
24
- if not all([bed_type, bed_count, inpatient_count, empty_bed_count, occupancy_rate]):
25
- continue
26
 
27
- data.append({
28
- '病床種類': bed_type,
29
- '床位別數': bed_count,
30
- '住院人數': inpatient_count,
31
- '空床數': empty_bed_count,
32
- '佔床率': occupancy_rate,
33
- '醫院': hospital_name
34
- })
35
-
36
- return pd.DataFrame(data)
 
 
 
 
 
 
 
 
 
37
 
38
  # Streamlit UI
39
  st.title("醫院床位分配表爬取工具")
@@ -56,24 +56,23 @@ hospital_options = {
56
 
57
  selected_hospitals = st.multiselect("選擇醫院", list(hospital_options.keys()))
58
 
59
- # 当用户按下按钮时,开始爬取数据
60
  if st.button("爬取資料"):
61
  st.write("正在爬取資料...")
62
-
63
- # 顯示進度條
64
  progress_bar = st.progress(0)
65
-
66
  all_data = pd.DataFrame()
67
-
68
  for i, hospital_name in enumerate(selected_hospitals):
69
  hospital_data = hospital_options[hospital_name]
70
- df = fetch_data(hospital_data["url"], hospital_name, hospital_data["table_id"])
71
-
72
  if df.empty:
73
  st.warning(f"{hospital_name} 的數據爬取結果為空,請檢查是否存在問題。")
74
  else:
 
75
  all_data = pd.concat([all_data, df], ignore_index=True)
76
-
77
  # 更新進度條
78
  progress_bar.progress((i + 1) / len(selected_hospitals))
79
 
 
4
  import pandas as pd
5
 
6
  # 定義爬取數據的函數
7
+ def fetch_data(hospital_url, table_id, hospital_name):
8
  response = requests.get(hospital_url)
9
  soup = BeautifulSoup(response.text, 'html.parser')
10
  table = soup.find('table', {'id': table_id})
11
+
12
+ if not table:
13
+ return pd.DataFrame() # 若表格未找到,返回空的DataFrame
14
+
15
  rows = table.find_all('tr')
 
16
  data = []
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ if hospital_name == "成大醫院":
19
+ # 成大醫院的表格結構不同
20
+ columns = [th.text.strip() for th in rows[0].find_all("th")]
21
+ data_rows = rows[1:]
22
+ for row in data_rows:
23
+ row_data = [td.text.strip() for td in row.find_all("td")]
24
+ if all(row_data): # 確保數據不為空
25
+ data.append(row_data)
26
+ else:
27
+ # 台南醫院和奇美醫院的表格結構類似
28
+ for row in rows[1:]: # 跳過標題行
29
+ columns = ['病床種類', '床位別數', '住院人數', '空床數', '佔床率']
30
+ cols = row.find_all('td')
31
+ if len(cols) == 5:
32
+ row_data = [col.get_text(strip=True) for col in cols]
33
+ if all(row_data): # 確保數據不為空
34
+ data.append(row_data)
35
+
36
+ return pd.DataFrame(data, columns=columns)
37
 
38
  # Streamlit UI
39
  st.title("醫院床位分配表爬取工具")
 
56
 
57
  selected_hospitals = st.multiselect("選擇醫院", list(hospital_options.keys()))
58
 
59
+ # 當用戶按下按鈕時,開始爬取數據
60
  if st.button("爬取資料"):
61
  st.write("正在爬取資料...")
 
 
62
  progress_bar = st.progress(0)
63
+
64
  all_data = pd.DataFrame()
65
+
66
  for i, hospital_name in enumerate(selected_hospitals):
67
  hospital_data = hospital_options[hospital_name]
68
+ df = fetch_data(hospital_data["url"], hospital_data["table_id"], hospital_name)
69
+
70
  if df.empty:
71
  st.warning(f"{hospital_name} 的數據爬取結果為空,請檢查是否存在問題。")
72
  else:
73
+ df['醫院'] = hospital_name
74
  all_data = pd.concat([all_data, df], ignore_index=True)
75
+
76
  # 更新進度條
77
  progress_bar.progress((i + 1) / len(selected_hospitals))
78