# -*- coding: utf-8 -*- import streamlit as st import requests import json import pandas as pd import time import matplotlib.pyplot as plt import matplotlib.font_manager as fm import matplotlib as mpl from io import BytesIO # Set up Streamlit app st.title("PCHOME 商品價格爬蟲分析") # Prompt user for keyword input keyword = st.text_input("請輸入關鍵字:", "平板") num_pages = st.number_input("請輸入要爬取的頁數:", min_value=1, max_value=100, value=1) # Define base URL base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q=' # Data collection if st.button("開始爬取"): start_time = time.time() alldata = pd.DataFrame() progress = st.progress(0) for i in range(1, num_pages + 1): url = f'{base_url}{keyword}&page={i}&sort=sale/dc' list_req = requests.get(url) getdata = json.loads(list_req.content) todataFrame = pd.DataFrame(getdata['prods']) alldata = pd.concat([alldata, todataFrame]) progress.progress(i / num_pages) time.sleep(10) # Simulate delay # Load data directly from the collected DataFrame data = alldata # Check for null values st.write(f'Total null values: {data.isnull().sum().sum()}') # Data analysis df = data[["name", "price"]] mean_price = df["price"].mean() st.write(f'Mean price: {mean_price}') st.write(f'Max price: {df["price"].max()}') st.write(f'Min price: {df["price"].min()}') # Display the results st.write("爬取結果:", df) # Download and set custom font font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download" font_response = requests.get(font_url) with open("TaipeiSansTCBeta-Regular.ttf", "wb") as font_file: font_file.write(font_response.content) fm.fontManager.addfont("TaipeiSansTCBeta-Regular.ttf") mpl.rc('font', family='Taipei Sans TC Beta') # Visualization st.subheader("價格分布圖") fig, ax = plt.subplots(figsize=(15, 8)) ax.plot(df.index[:70], df['price'][:70], 'o', color='skyblue', markersize=8) ax.set_title('PCHOME 電商網站上商品售價', fontsize=20, fontweight='bold') ax.axhline(y=mean_price, color='red', linestyle='--', linewidth=2, label=f'Mean Price: {mean_price:.2f}') ax.set_xlabel('Index', fontsize=14) ax.set_ylabel('Price', fontsize=14) ax.tick_params(axis='x', rotation=45, labelsize=12) ax.tick_params(axis='y', labelsize=12) ax.legend(fontsize=12, loc='upper left') ax.grid(axis='y', linestyle='--', alpha=0.7) plt.tight_layout() st.pyplot(fig) # Measure execution time end_time = time.time() execution_time = end_time - start_time st.write(f"Execution time: {execution_time:.2f} seconds")