Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
import streamlit as st | |
import requests | |
import json | |
import pandas as pd | |
import time | |
import matplotlib.pyplot as plt | |
import matplotlib.font_manager as fm | |
import matplotlib as mpl | |
from io import BytesIO | |
# Set up Streamlit app | |
st.title("PCHOME 商品價格爬蟲分析") | |
# Prompt user for keyword input | |
keyword = st.text_input("請輸入關鍵字:", "平板") | |
num_pages = st.number_input("請輸入要爬取的頁數:", min_value=1, max_value=100, value=1) | |
# Define base URL | |
base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q=' | |
# Data collection | |
if st.button("開始爬取"): | |
start_time = time.time() | |
alldata = pd.DataFrame() | |
progress = st.progress(0) | |
for i in range(1, num_pages + 1): | |
url = f'{base_url}{keyword}&page={i}&sort=sale/dc' | |
list_req = requests.get(url) | |
getdata = json.loads(list_req.content) | |
todataFrame = pd.DataFrame(getdata['prods']) | |
alldata = pd.concat([alldata, todataFrame]) | |
progress.progress(i / num_pages) | |
time.sleep(10) # Simulate delay | |
# Load data directly from the collected DataFrame | |
data = alldata | |
# Check for null values | |
st.write(f'Total null values: {data.isnull().sum().sum()}') | |
# Data analysis | |
df = data[["name", "price"]] | |
mean_price = df["price"].mean() | |
st.write(f'Mean price: {mean_price}') | |
st.write(f'Max price: {df["price"].max()}') | |
st.write(f'Min price: {df["price"].min()}') | |
# Display the results | |
st.write("爬取結果:", df) | |
# Download and set custom font | |
font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download" | |
font_response = requests.get(font_url) | |
with open("TaipeiSansTCBeta-Regular.ttf", "wb") as font_file: | |
font_file.write(font_response.content) | |
fm.fontManager.addfont("TaipeiSansTCBeta-Regular.ttf") | |
mpl.rc('font', family='Taipei Sans TC Beta') | |
# Visualization | |
st.subheader("價格分布圖") | |
fig, ax = plt.subplots(figsize=(15, 8)) | |
ax.plot(df.index[:70], df['price'][:70], 'o', color='skyblue', markersize=8) | |
ax.set_title('PCHOME 電商網站上商品售價', fontsize=20, fontweight='bold') | |
ax.axhline(y=mean_price, color='red', linestyle='--', linewidth=2, label=f'Mean Price: {mean_price:.2f}') | |
ax.set_xlabel('Index', fontsize=14) | |
ax.set_ylabel('Price', fontsize=14) | |
ax.tick_params(axis='x', rotation=45, labelsize=12) | |
ax.tick_params(axis='y', labelsize=12) | |
ax.legend(fontsize=12, loc='upper left') | |
ax.grid(axis='y', linestyle='--', alpha=0.7) | |
plt.tight_layout() | |
st.pyplot(fig) | |
# Measure execution time | |
end_time = time.time() | |
execution_time = end_time - start_time | |
st.write(f"Execution time: {execution_time:.2f} seconds") | |