Spaces:

dryade36513
/

MooMooChecker

Sleeping

File size: 5,261 Bytes

cff7733

# app.py
import streamlit as st
import jieba
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import difflib
import numpy as np
import time

# 設置網頁標題等信息
st.set_page_config(
    page_title="哞哞文章相似度檢測",
    page_icon="🐮",
    layout="wide",
    initial_sidebar_state="collapsed"
)

# 自定義CSS樣式
st.markdown("""

<style>

    .stTextArea textarea {

        font-size: 16px !important;

    }

    .big-font {

        font-size: 24px !important;

        font-weight: bold !important;

        color: #FF4B4B !important;

    }

    .result-font {

        font-size: 20px !important;

        color: #1E88E5 !important;

    }

</style>

""", unsafe_allow_html=True)

# 顯示標題
st.markdown("<h1 style='text-align: center; color: #FF4B4B;'>🐮 哞哞文章相似度檢測</h1>", unsafe_allow_html=True)

# 創建兩列佈局
col1, col2 = st.columns(2)

with col1:
    st.markdown("### 📝 文章1")
    text1 = st.text_area("", height=300, placeholder="請在這裡輸入第一篇文章...", key="text1")

with col2:
    st.markdown("### 📝 文章2")
    text2 = st.text_area("", height=300, placeholder="請在這裡輸入第二篇文章...", key="text2")

# 創建按鈕列
col_btn1, col_btn2, col_btn3 = st.columns([1,1,1])

with col_btn2:
    start_btn = st.button("🚀 開始計算相似度", type="primary", use_container_width=True)

def calculate_similarity(text1, text2):
    """計算文本相似度"""
    if not text1.strip() or not text2.strip():
        return None, None
    
    # 1. 計算字詞重合度
    words1 = list(jieba.cut(text1))
    words2 = list(jieba.cut(text2))
    word_set1 = set(words1)
    word_set2 = set(words2)
    word_similarity = len(word_set1.intersection(word_set2)) / len(word_set1.union(word_set2))
    
    # 2. 計算句子相似度
    sentences1 = text1.split("。")
    sentences2 = text2.split("。")
    sentence_matcher = difflib.SequenceMatcher(None, sentences1, sentences2)
    sentence_similarity = sentence_matcher.ratio()
    
    # 3. 計算TF-IDF相似度
    vectorizer = TfidfVectorizer()
    try:
        tfidf_matrix = vectorizer.fit_transform([text1, text2])
        cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
    except:
        cosine_sim = 0
    
    # 計算總相似度
    weights = [0.4, 0.3, 0.3]
    total_similarity = (word_similarity * weights[0] + 
                       sentence_similarity * weights[1] + 
                       cosine_sim * weights[2]) * 100
                       
    similarity_score = round(total_similarity, 2)
    
    # 判定結果
    if similarity_score <= 30:
        result = "兩篇文章沒有關係"
    elif similarity_score <= 60:
        result = "兩篇文章似乎有那麼一點關係"
    elif similarity_score <= 80:
        result = "兩篇文章很類似"
    else:
        result = "兩篇文章有抄襲犯罪的味道"
        
    return similarity_score, result

if start_btn and text1 and text2:
    with st.spinner('🔍 分析中，請稍等...'):
        # 顯示進度條
        progress_text = "計算中..."
        my_bar = st.progress(0, text=progress_text)
        for percent_complete in range(100):
            time.sleep(0.01)
            my_bar.progress(percent_complete + 1, text=progress_text)
            
        # 計算相似度
        similarity_score, result = calculate_similarity(text1, text2)
        
        if similarity_score is not None:
            # 清除進度條
            my_bar.empty()
            
            # 顯示結果
            st.markdown("---")
            st.markdown("<h3 style='text-align: center;'>✨ 分析結果</h3>", unsafe_allow_html=True)
            
            result_text = f"""

            <div style='text-align: center;'>

                <p class='big-font'>相似度：{similarity_score}%</p>

                <p class='result-font'>分析結果：{result}</p>

            </div>

            """
            st.markdown(result_text, unsafe_allow_html=True)
            
            # 顯示可愛的表情符號
            if similarity_score <= 30:
                st.markdown("<h1 style='text-align: center;'>😌</h1>", unsafe_allow_html=True)
            elif similarity_score <= 60:
                st.markdown("<h1 style='text-align: center;'>🤔</h1>", unsafe_allow_html=True)
            elif similarity_score <= 80:
                st.markdown("<h1 style='text-align: center;'>😮</h1>", unsafe_allow_html=True)
            else:
                st.markdown("<h1 style='text-align: center;'>😱</h1>", unsafe_allow_html=True)
else:
    st.info('👆 請在上方輸入兩篇要比較的文章，然後點擊"開始計算相似度"按鈕')

# 在底部添加說明
st.markdown("---")
st.markdown("""

<div style='text-align: center;'>

    <p style='color: gray; font-size: 14px;'>

        💡 判定標準：<br>

        0-30%：文章沒有關係 | 31-60%：稍有關係 | 61-80%：很類似 | 81-100%：疑似抄襲

    </p>

</div>

""", unsafe_allow_html=True)