PCHOME_ / app.py
Roberta2024's picture
Update app.py
7f4c8ea verified
# -*- coding: utf-8 -*-
import streamlit as st
import requests
import json
import pandas as pd
import time
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import matplotlib as mpl
from io import BytesIO
# Set up Streamlit app
st.title("PCHOME 商品價格爬蟲分析")
# Prompt user for keyword input
keyword = st.text_input("請輸入關鍵字:", "平板")
num_pages = st.number_input("請輸入要爬取的頁數:", min_value=1, max_value=100, value=1)
# Define base URL
base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q='
# Data collection
if st.button("開始爬取"):
start_time = time.time()
alldata = pd.DataFrame()
progress = st.progress(0)
for i in range(1, num_pages + 1):
url = f'{base_url}{keyword}&page={i}&sort=sale/dc'
list_req = requests.get(url)
getdata = json.loads(list_req.content)
todataFrame = pd.DataFrame(getdata['prods'])
alldata = pd.concat([alldata, todataFrame])
progress.progress(i / num_pages)
time.sleep(10) # Simulate delay
# Load data directly from the collected DataFrame
data = alldata
# Check for null values
st.write(f'Total null values: {data.isnull().sum().sum()}')
# Data analysis
df = data[["name", "price"]]
mean_price = df["price"].mean()
st.write(f'Mean price: {mean_price}')
st.write(f'Max price: {df["price"].max()}')
st.write(f'Min price: {df["price"].min()}')
# Display the results
st.write("爬取結果:", df)
# Download and set custom font
font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download"
font_response = requests.get(font_url)
with open("TaipeiSansTCBeta-Regular.ttf", "wb") as font_file:
font_file.write(font_response.content)
fm.fontManager.addfont("TaipeiSansTCBeta-Regular.ttf")
mpl.rc('font', family='Taipei Sans TC Beta')
# Visualization
st.subheader("價格分布圖")
fig, ax = plt.subplots(figsize=(15, 8))
ax.plot(df.index[:70], df['price'][:70], 'o', color='skyblue', markersize=8)
ax.set_title('PCHOME 電商網站上商品售價', fontsize=20, fontweight='bold')
ax.axhline(y=mean_price, color='red', linestyle='--', linewidth=2, label=f'Mean Price: {mean_price:.2f}')
ax.set_xlabel('Index', fontsize=14)
ax.set_ylabel('Price', fontsize=14)
ax.tick_params(axis='x', rotation=45, labelsize=12)
ax.tick_params(axis='y', labelsize=12)
ax.legend(fontsize=12, loc='upper left')
ax.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
st.pyplot(fig)
# Measure execution time
end_time = time.time()
execution_time = end_time - start_time
st.write(f"Execution time: {execution_time:.2f} seconds")