File size: 2,788 Bytes
82e75b7
 
 
 
 
 
 
 
 
237f2f6
82e75b7
 
7f4c8ea
82e75b7
 
8e4d40f
223bdd4
82e75b7
 
 
 
 
 
0146dd4
82e75b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0146dd4
 
 
82e75b7
 
223bdd4
 
 
82e75b7
237f2f6
 
 
 
 
82e75b7
 
 
 
 
0146dd4
7f4c8ea
0146dd4
 
82e75b7
0146dd4
 
82e75b7
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# -*- coding: utf-8 -*-
import streamlit as st
import requests
import json
import pandas as pd
import time
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import matplotlib as mpl
from io import BytesIO

# Set up Streamlit app
st.title("PCHOME 商品價格爬蟲分析")

# Prompt user for keyword input
keyword = st.text_input("請輸入關鍵字:", "平板")
num_pages = st.number_input("請輸入要爬取的頁數:", min_value=1, max_value=100, value=1)

# Define base URL
base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q='

# Data collection
if st.button("開始爬取"):
    start_time = time.time()
    alldata = pd.DataFrame()
    progress = st.progress(0)
    
    for i in range(1, num_pages + 1):
        url = f'{base_url}{keyword}&page={i}&sort=sale/dc'
        list_req = requests.get(url)
        getdata = json.loads(list_req.content)
        todataFrame = pd.DataFrame(getdata['prods'])
        alldata = pd.concat([alldata, todataFrame])
        progress.progress(i / num_pages)
        time.sleep(10)  # Simulate delay

    # Load data directly from the collected DataFrame
    data = alldata

    # Check for null values
    st.write(f'Total null values: {data.isnull().sum().sum()}')

    # Data analysis
    df = data[["name", "price"]]
    mean_price = df["price"].mean()
    st.write(f'Mean price: {mean_price}')
    st.write(f'Max price: {df["price"].max()}')
    st.write(f'Min price: {df["price"].min()}')

    # Display the results
    st.write("爬取結果:", df)

    # Download and set custom font
    font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download"
    font_response = requests.get(font_url)
    with open("TaipeiSansTCBeta-Regular.ttf", "wb") as font_file:
        font_file.write(font_response.content)
    fm.fontManager.addfont("TaipeiSansTCBeta-Regular.ttf")
    mpl.rc('font', family='Taipei Sans TC Beta')

    # Visualization
    st.subheader("價格分布圖")
    fig, ax = plt.subplots(figsize=(15, 8))
    ax.plot(df.index[:70], df['price'][:70], 'o', color='skyblue', markersize=8)
    ax.set_title('PCHOME 電商網站上商品售價', fontsize=20, fontweight='bold')
    ax.axhline(y=mean_price, color='red', linestyle='--', linewidth=2, label=f'Mean Price: {mean_price:.2f}')
    ax.set_xlabel('Index', fontsize=14)
    ax.set_ylabel('Price', fontsize=14)
    ax.tick_params(axis='x', rotation=45, labelsize=12)
    ax.tick_params(axis='y', labelsize=12)
    ax.legend(fontsize=12, loc='upper left')
    ax.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    st.pyplot(fig)

    # Measure execution time
    end_time = time.time()
    execution_time = end_time - start_time
    st.write(f"Execution time: {execution_time:.2f} seconds")