Spaces:

dryade36513
/

MooMooChecker

Sleeping

App Files Files Community

MooMooChecker / app.py

dryade36513

Upload 2 files

cff7733 verified 6 months ago

raw

history blame

5.26 kB

	# app.py
	import streamlit as st
	import jieba
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	import difflib
	import numpy as np
	import time

	# 設置網頁標題等信息
	st.set_page_config(
	page_title="哞哞文章相似度檢測",
	page_icon="🐮",
	layout="wide",
	initial_sidebar_state="collapsed"
	)

	# 自定義CSS樣式
	st.markdown("""
	<style>
	.stTextArea textarea {
	font-size: 16px !important;
	}
	.big-font {
	font-size: 24px !important;
	font-weight: bold !important;
	color: #FF4B4B !important;
	}
	.result-font {
	font-size: 20px !important;
	color: #1E88E5 !important;
	}
	</style>
	""", unsafe_allow_html=True)

	# 顯示標題
	st.markdown("<h1 style='text-align: center; color: #FF4B4B;'>🐮 哞哞文章相似度檢測</h1>", unsafe_allow_html=True)

	# 創建兩列佈局
	col1, col2 = st.columns(2)

	with col1:
	st.markdown("### 📝 文章1")
	text1 = st.text_area("", height=300, placeholder="請在這裡輸入第一篇文章...", key="text1")

	with col2:
	st.markdown("### 📝 文章2")
	text2 = st.text_area("", height=300, placeholder="請在這裡輸入第二篇文章...", key="text2")

	# 創建按鈕列
	col_btn1, col_btn2, col_btn3 = st.columns([1,1,1])

	with col_btn2:
	start_btn = st.button("🚀 開始計算相似度", type="primary", use_container_width=True)

	def calculate_similarity(text1, text2):
	"""計算文本相似度"""
	if not text1.strip() or not text2.strip():
	return None, None

	# 1. 計算字詞重合度
	words1 = list(jieba.cut(text1))
	words2 = list(jieba.cut(text2))
	word_set1 = set(words1)
	word_set2 = set(words2)
	word_similarity = len(word_set1.intersection(word_set2)) / len(word_set1.union(word_set2))

	# 2. 計算句子相似度
	sentences1 = text1.split("。")
	sentences2 = text2.split("。")
	sentence_matcher = difflib.SequenceMatcher(None, sentences1, sentences2)
	sentence_similarity = sentence_matcher.ratio()

	# 3. 計算TF-IDF相似度
	vectorizer = TfidfVectorizer()
	try:
	tfidf_matrix = vectorizer.fit_transform([text1, text2])
	cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
	except:
	cosine_sim = 0

	# 計算總相似度
	weights = [0.4, 0.3, 0.3]
	total_similarity = (word_similarity * weights[0] +
	sentence_similarity * weights[1] +
	cosine_sim * weights[2]) * 100

	similarity_score = round(total_similarity, 2)

	# 判定結果
	if similarity_score <= 30:
	result = "兩篇文章沒有關係"
	elif similarity_score <= 60:
	result = "兩篇文章似乎有那麼一點關係"
	elif similarity_score <= 80:
	result = "兩篇文章很類似"
	else:
	result = "兩篇文章有抄襲犯罪的味道"

	return similarity_score, result

	if start_btn and text1 and text2:
	with st.spinner('🔍 分析中，請稍等...'):
	# 顯示進度條
	progress_text = "計算中..."
	my_bar = st.progress(0, text=progress_text)
	for percent_complete in range(100):
	time.sleep(0.01)
	my_bar.progress(percent_complete + 1, text=progress_text)

	# 計算相似度
	similarity_score, result = calculate_similarity(text1, text2)

	if similarity_score is not None:
	# 清除進度條
	my_bar.empty()

	# 顯示結果
	st.markdown("---")
	st.markdown("<h3 style='text-align: center;'>✨ 分析結果</h3>", unsafe_allow_html=True)

	result_text = f"""
	<div style='text-align: center;'>
	<p class='big-font'>相似度：{similarity_score}%</p>
	<p class='result-font'>分析結果：{result}</p>
	</div>
	"""
	st.markdown(result_text, unsafe_allow_html=True)

	# 顯示可愛的表情符號
	if similarity_score <= 30:
	st.markdown("<h1 style='text-align: center;'>😌</h1>", unsafe_allow_html=True)
	elif similarity_score <= 60:
	st.markdown("<h1 style='text-align: center;'>🤔</h1>", unsafe_allow_html=True)
	elif similarity_score <= 80:
	st.markdown("<h1 style='text-align: center;'>😮</h1>", unsafe_allow_html=True)
	else:
	st.markdown("<h1 style='text-align: center;'>😱</h1>", unsafe_allow_html=True)
	else:
	st.info('👆 請在上方輸入兩篇要比較的文章，然後點擊"開始計算相似度"按鈕')

	# 在底部添加說明
	st.markdown("---")
	st.markdown("""
	<div style='text-align: center;'>
	<p style='color: gray; font-size: 14px;'>
	💡 判定標準：<br>
	0-30%：文章沒有關係 \| 31-60%：稍有關係 \| 61-80%：很類似 \| 81-100%：疑似抄襲
	</p>
	</div>
	""", unsafe_allow_html=True)