dafiqrahman
commited on
Commit
β’
a91d85c
1
Parent(s):
a637ca3
update app
Browse filesadding new features to select model to predict the sentiment, add new pages to look at each model information
- app.py +14 -3
- assets/data.csv +98 -100
- assets/df_model.pkl +3 -0
- assets/valid.csv +0 -0
- indobert/config.json +7 -7
- indobert/pytorch_model.bin +1 -1
- pages/1__model_information.py +36 -0
- requirements.txt +3 -0
- script/__pycache__/functions.cpython-310.pyc +0 -0
- script/__pycache__/text_proc.cpython-310.pyc +0 -0
- script/functions.py +65 -5
- script/text_proc.py +13 -5
app.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import script.functions as fn
|
@@ -7,15 +9,25 @@ import matplotlib.pyplot as plt
|
|
7 |
import script.text_proc as tp
|
8 |
from sentence_transformers import SentenceTransformer
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
# Load data
|
11 |
# add tiwtter logo inside title
|
12 |
st.markdown("<h1 style='text-align: center;'>π Twitter Sentiment Analysis App</h1>", unsafe_allow_html=True)
|
13 |
st.write("Aplikasi sederhana untuk melakukan analisis sentimen terhadap tweet yang diinputkan dan mengekstrak topik dari setiap sentimen.")
|
14 |
# streamlit selectbox simple and advanced
|
15 |
|
16 |
-
sb1,sb2 = st.columns([
|
17 |
with sb1:
|
18 |
option = st.selectbox('Pilih Mode Pencarian',('Simple','Advanced'))
|
|
|
|
|
19 |
|
20 |
if option == 'Simple':
|
21 |
# create col1 and col2
|
@@ -37,11 +49,10 @@ submit = st.button("πCari Tweet")
|
|
37 |
st.caption("semakin banyak tweet yang diambil maka semakin lama proses analisis sentimen")
|
38 |
|
39 |
if submit:
|
40 |
-
# df = pd.read_csv("assets/data.csv")
|
41 |
with st.spinner('Mengambil data dari twitter... (1/2)'):
|
42 |
df = fn.get_tweets(input, length, option)
|
43 |
with st.spinner('Melakukan Prediksi Sentimen... (2/2)'):
|
44 |
-
df = fn.get_sentiment(df)
|
45 |
df.to_csv('assets/data.csv',index=False)
|
46 |
# plot
|
47 |
st.write("<b>Preview Dataset</b>",unsafe_allow_html=True)
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
import streamlit as st
|
4 |
import pandas as pd
|
5 |
import script.functions as fn
|
|
|
9 |
import script.text_proc as tp
|
10 |
from sentence_transformers import SentenceTransformer
|
11 |
|
12 |
+
|
13 |
+
st.set_page_config(
|
14 |
+
page_title="twitter sentiment analysis",
|
15 |
+
page_icon="π",
|
16 |
+
)
|
17 |
+
|
18 |
+
st.sidebar.markdown("π Twitter Sentiment Analysis App")
|
19 |
+
|
20 |
# Load data
|
21 |
# add tiwtter logo inside title
|
22 |
st.markdown("<h1 style='text-align: center;'>π Twitter Sentiment Analysis App</h1>", unsafe_allow_html=True)
|
23 |
st.write("Aplikasi sederhana untuk melakukan analisis sentimen terhadap tweet yang diinputkan dan mengekstrak topik dari setiap sentimen.")
|
24 |
# streamlit selectbox simple and advanced
|
25 |
|
26 |
+
sb1,sb2 = st.columns([2,4])
|
27 |
with sb1:
|
28 |
option = st.selectbox('Pilih Mode Pencarian',('Simple','Advanced'))
|
29 |
+
with sb2:
|
30 |
+
option_model = st.selectbox('Pilih Model',("IndoBERT (Accurate,Slow) ",'Naive Bayes','Logistic Regression (Less Accurate,Fast)','XGBoost','Catboost','SVM','Random Forest'))
|
31 |
|
32 |
if option == 'Simple':
|
33 |
# create col1 and col2
|
|
|
49 |
st.caption("semakin banyak tweet yang diambil maka semakin lama proses analisis sentimen")
|
50 |
|
51 |
if submit:
|
|
|
52 |
with st.spinner('Mengambil data dari twitter... (1/2)'):
|
53 |
df = fn.get_tweets(input, length, option)
|
54 |
with st.spinner('Melakukan Prediksi Sentimen... (2/2)'):
|
55 |
+
df = fn.get_sentiment(df,option_model)
|
56 |
df.to_csv('assets/data.csv',index=False)
|
57 |
# plot
|
58 |
st.write("<b>Preview Dataset</b>",unsafe_allow_html=True)
|
assets/data.csv
CHANGED
@@ -1,101 +1,99 @@
|
|
1 |
sentiment,content
|
2 |
-
netral,"
|
3 |
-
netral,"
|
4 |
-
|
5 |
-
netral,
|
6 |
-
|
7 |
-
netral,
|
8 |
-
|
9 |
-
netral,"
|
10 |
-
netral,"
|
11 |
-
|
12 |
-
netral,
|
13 |
-
netral,
|
14 |
-
netral
|
15 |
-
netral,"
|
16 |
-
|
17 |
-
|
18 |
-
netral,
|
19 |
-
netral,"
|
20 |
-
netral,
|
21 |
-
netral,
|
22 |
-
|
23 |
-
netral,"
|
24 |
-
netral,"
|
25 |
-
netral,
|
26 |
-
|
27 |
-
netral
|
28 |
-
netral,
|
29 |
-
|
30 |
-
netral,"
|
31 |
-
netral,"
|
32 |
-
netral,"
|
33 |
-
netral,
|
34 |
-
netral,
|
35 |
-
netral,"
|
36 |
-
netral,"
|
37 |
-
netral,
|
38 |
-
netral,"Hai, Kak.
|
39 |
-
|
40 |
-
|
41 |
-
netral,
|
42 |
-
netral,
|
43 |
-
netral,"
|
44 |
-
netral,"
|
45 |
-
|
46 |
-
netral,"
|
47 |
-
netral,"
|
48 |
-
|
49 |
-
netral,"
|
50 |
-
|
51 |
-
netral,
|
52 |
-
netral,
|
53 |
-
netral,"
|
54 |
-
|
55 |
-
netral,"
|
56 |
-
netral,
|
57 |
-
netral,
|
58 |
-
|
59 |
-
netral,"Hai,
|
60 |
-
|
61 |
-
|
62 |
-
netral,Hai Kak.
|
63 |
-
netral,
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
netral,"
|
69 |
-
netral,
|
70 |
-
netral,
|
71 |
-
netral,
|
72 |
-
netral,
|
73 |
-
netral,
|
74 |
-
|
75 |
-
netral,
|
76 |
-
netral,
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
netral,
|
82 |
-
netral,
|
83 |
-
netral,
|
84 |
-
netral,
|
85 |
-
netral,
|
86 |
-
|
87 |
-
netral,"
|
88 |
-
netral,
|
89 |
-
netral,
|
90 |
-
netral,"
|
91 |
-
netral,"
|
92 |
-
netral,"
|
93 |
-
netral,
|
94 |
-
netral,
|
95 |
-
|
96 |
-
|
97 |
-
netral,
|
98 |
-
netral,"
|
99 |
-
netral,
|
100 |
-
netral,"Hai Kak Risnawati, maaf banget ketidaknyamanan kamu. Boleh kamu infokan nama email kamu yang mungkin terdaftar di sebagai akun Traveloka melalui DM Atau kalaupun tidak ada, silakan infokan nama email kamu saja Kak, supaya mimin bisa bantu cek kalau memang ada indik"
|
101 |
-
netral,Oke minn ku dm yah
|
|
|
1 |
sentiment,content
|
2 |
+
netral,"Gabisa DM min ,gaada tempat buat dm kamu hehe"
|
3 |
+
netral,"Halo Haeni, mimin belum menerima DM kamu, yuk infokan mimin kembali melalui DM ya. Terima kasih. -OR"
|
4 |
+
positif,"halo ini saya mau atur ulang jam penerbangan, tulisannya tersedia untuk re schedule, tapi setelah di ajukan kenapa tidak tertolak dan tulisannya tidak tersedia untuk penerbangan ini ya ,padahal ada pilihan untuk jam yang ingin diubah"
|
5 |
+
netral,"Well, the night comes, but my refund money is still no where in sight. 92 days of waiting and still counting. Been told to wait for another 2x24hr time."
|
6 |
+
netral,Mimin meluncur ke DM kamu ya. Ditunggu -TK
|
7 |
+
netral,Hai Riri. DM kamu sudah mimin balas ya. Terima kasih. -DA
|
8 |
+
netral,min cek dm
|
9 |
+
netral,"hallo, min. Cek dm ya. Terima kasih"
|
10 |
+
netral,"Siap, Kak. Udah mimin balas ya. -SM"
|
11 |
+
negatif,Aamin. ehem ehem
|
12 |
+
netral,Tolong dibalas kka
|
13 |
+
netral,Hai Kak. Mohon maaf banget ya atas kendala kamu. Supaya nyaman dan aman ngobrolnya boleh informasikan ke mimin nomor pemesanan kamu via DM Kak Supaya mimin bisa cek lebih lanjut. Mimin tunggu ya Kak. Terima kasih. -DA
|
14 |
+
netral,kalau purchase pending artinya apa ya Cek akun udah ada receipt nya. Mohon dibantu ya min. Thanks.
|
15 |
+
netral,"Sudah bsa,sudah aku DM kak"
|
16 |
+
negatif,Ayam rasa rasa yang dulu pernah ada.
|
17 |
+
positif,"Aku coba DM tapi ga bsa , coba KK dluan ya"
|
18 |
+
netral,"Hai, Kak. Untuk lakukan pengecekan lebih lanjut, yuk infoin nomor pesanan kamu dulu via DM ya. Mimin tunggu -TK"
|
19 |
+
netral,"kak aku mau reschedule tiket balik, biaya yg sebelumnya akan direfund atau bagaimana"
|
20 |
+
netral,min tolong cek DM saya mau reschedule tiket balik
|
21 |
+
netral,admin tolong cek DM
|
22 |
+
netral,Cek DM min
|
23 |
+
netral,"Hai, Kak. Mohon maaf atas kendala yang kamu alami. Untuk lakukan pengecekan lebih lanjut, mohon infokan nomor pesanan Traveloka kamu via DM ya. Mimin tunggu -TK"
|
24 |
+
netral,"Hai Kak Hendri, pada dasarnya untuk asuransi pada produk yang kamu pesan bersifat optional ya Kak, dapat dipesan atau tidak. Apabila kamu memiliki ketidaksesuaian silakan lirkan screenshot dan detail kendalanya via DM, agar mimin dapat coba cek lebih lanjut, terima k"
|
25 |
+
netral,Admin saya mau bertanyaΒ² ke CS tentang pembatalan penerbangan maskapai melalui aplikasi traveloka. Saya sudah hubungi CS di apk tapi belum ada tanggapan. Harusnya besok saya sudah flight
|
26 |
+
positif,"kenapa setiap kita booking traveloka kita selalu harus bayar asuransi Chubb, saya sudah ada asuransi sendiri ngapain banyak2"
|
27 |
+
netral,-PA
|
28 |
+
netral,Liburan ke Jawa Barat seru dan murah murah kak :D
|
29 |
+
netral,"Hai, Kak. Mimin udah balas DM kamu ya. Yuk cek DM dari mimin Kak. -SM"
|
30 |
+
netral,"hi min, cek dm dong"
|
31 |
+
netral,"Hai Kak. Mimin sudah balas DM kamu ya, silakan dicek kembali. Terima kasih. -KA"
|
32 |
+
netral,"Hi min, cek DM ya terima kasih"
|
33 |
+
netral,mohon cek dm
|
34 |
+
netral,Gk bisa dm
|
35 |
+
netral,"Halo, Kak Budi. Pindah ke DMyuk, biar mimin bisa jelasin detail. -Mimin tunggu DM-nya. Terima kasih. -OE"
|
36 |
+
netral,"halo min, tolong cek DM yaa"
|
37 |
+
netral,"Kalau kamu terkendala hapus akun dengan cara sebelumnya, silakan email langsung ke tim internal mimin dengan alamat email privacy ya. Infokan aja kalo kamu mau hapus akun Traveloka dengan email xxx atau telepon xxx. Terima kasih. -AM 2/2"
|
38 |
+
netral,"Hai, Kak. kamu bisa nonaktifkan akun kamu dengan cara Self Deactivation. Kamu klik forgot password, nanti akan ada link untuk nonaktifkan akun yang bisa kamu klik ya dengan pilih Self Deactivation 1/2."
|
39 |
+
positif,"hi admin saya mau bayar via uangku kok gaada pilihannya ya Cuma asa debit card sama paylater, mohon bantuannya"
|
40 |
+
positif,Ini link apa coba
|
41 |
+
netral,"Fly Eat Sleep Shop Repeat No Refund on Cancelation please add it to your tagline , -I Dont think do do the same."
|
42 |
+
netral,hahahahah
|
43 |
+
netral,"Sudah kak mohon dicheck dmnya ya, terima kasih"
|
44 |
+
netral,"Hai, Kak. Mohon maaf udah buat kamu gak nyaman ya. Supaya bisa dibantu cek lebih lanjut, bisa infoin dulu email kamu via DM Mimin tunggu ya. -WR"
|
45 |
+
positif,"hi admin, saya udah topup uangku kok gak masukΒ² udah 30 menit lebih, thanks"
|
46 |
+
netral,"Oh sure, thank you, Ive been waiting for 92 days, surely its a very delightful experience to keep waiting another 5x60 minutes. Im sure the Flight Specialist has been doing a great job to fight for my rights, keep up the good work What would I do without you"
|
47 |
+
netral,"Hi, Kiky. We do apologize for the inconvenience. Regarding your issue, we inform you that currently it is still in the escalation process by the Flight Specialist team. Please wait for further information in an estimated 5x60 minutes via the Traveloka inbox feature o"
|
48 |
+
netral,Good morning world. Oh hi i see its the 92th day I havent been getting my refund back from you. Thank you for the helpful and re-assuring messages from you.
|
49 |
+
netral,"hi min, please cek dm ya"
|
50 |
+
netral,Kak Jajang bisa aja deh -RZ
|
51 |
+
netral,ada kode promo untuk tiket bus gak min
|
52 |
+
netral,tetep gak bisa min
|
53 |
+
netral,"Hai Kak Rhea. Terkait gambar yang kamu berikan, saat ini tidak ada kendala pada sistem pemesanan kami. Mohon pastikan jaringan yang kamu gunakan stabil serta sudah menggunakan aplikasi Traveloka versi terbaru. Mohon untuk relogin akun Traveloka kamu terlebih dahulu da"
|
54 |
+
netral,wkwkwkkw
|
55 |
+
netral,"Lewat agen kak, di ig narendra ada. Monggo dicek"
|
56 |
+
netral,minn tolong cek dm urgent banget ini min
|
57 |
+
netral,oke minn cek dm min
|
58 |
+
netral,beli tiketnya lewat aplikasi atau gmn kak
|
59 |
+
netral,"Hai, Kak. Sebelumnya bisa infokan nomor pemesanan Traveloka kamu via DM Supaya bisa kami bantu cek lebih lanjut. Terima kasih. -FR"
|
60 |
+
positif,minn klo udah mesen tiket esawat tapi nama penumpang ada yang salah gimana ya min bisa diperbaiki ga aku nulis namanya ke double gitu min
|
61 |
+
netral,"Ada Rosalia Indah, Harapan Jaya, Narendra"
|
62 |
+
netral,Hai Kak Setyo. Bergantung tanggal dan jam yang kamu pilih saat melakukan pemesanan ya. Kamu bisa cek ketersediaan jam keberangkatan melalui aplikasi Traveloka kamu. Yuk di cek -FR
|
63 |
+
netral,Dari jakarta jam berapa
|
64 |
+
netral,"Bisa kak, di sesuaiin aja sm kebutuhan"
|
65 |
+
positif,Nice sowbat mintownquh...
|
66 |
+
netral,"Hai, Kak. Untuk perihal tersebut tergantung ketersediaan dari pihak PO Bus ya Kak. Nantinya kamu bisa cek detailnya melalui aplikasi Traveloka pada saat pesan, ataupun bisa konfirmasi kepada pihak PO Bus Kak. Nantinya apabila kamu ada pertanyaan lain, jangan ragu DM mimi"
|
67 |
+
positif,ada gak sih double decker yg jkt-madiunn pliss ingfonyaa
|
68 |
+
netral,"Baik Kak, silakan cek DM-nya kembali ya -RU"
|
69 |
+
netral,"Hai, Kak. Mohon menginformasikan kembali kendala atau pertanyaan yang ingin Kakak saikan terkait Booking ID tersebut via DM agar mimin bisa bantu lebih lanjut informasinya. Terima kasih. -RU"
|
70 |
+
netral,Hi kak tolong cdm yaa urgent
|
71 |
+
netral,Terima kasih ya min
|
72 |
+
netral,"You know what, Im gonna keep this thread long. I will start my Day-91. Please anticipate for more tweets days ahead ok Im counting days on you."
|
73 |
+
netral,"So now tell me, what actions you have been doing for the past 90 days Can you show me your communication history with PAL"
|
74 |
+
netral,We apologize for the inconvenience caused. Our team is currently looking into your issue and we will get back to you shortly via inbox on Traveloka app with the latest update. Thank you for your understanding -RU
|
75 |
+
netral,Sudah DM
|
76 |
+
netral,"Hai, Nita. Makasih udah hubungin mimin ya. Sambil mimin infoin pertanyaan kamu, bisikin ke mimin juga yuk data email kamu yang terdaftar di Traveloka via DM , mimin mau ajak kamu untuk ikutan isi survey layanan nih. Mimin tunggu ya. Terima kasih : -RU"
|
77 |
+
positif,"Klo paspor lama expired, masih bisa beli tiket ga dg paspor tsb, Perpanjangan paspor msh nunggu antrian soalnya."
|
78 |
+
netral,"Hai Kak, makasih udah hubungin mimin ya. Sambil mimin infoin pertanyaan kamu, bisikin ke mimin juga yuk data email kamu yang terdaftar di Traveloka melalui DM, mimin mau ajak kamu untuk ikutan isi survey layanan nih -DZ 1/2"
|
79 |
+
netral,Next trip cobain double decker
|
80 |
+
netral,Min event 2.2 bakal ada promo bus gkkkk Promo kemaren udh gk kebagiann
|
81 |
+
netral,"Baik Kak Hazna, mimin sudah balas DM kamu ya, mohon untuk cek DM Kak -NT"
|
82 |
+
netral,Trip kapan min
|
83 |
+
netral,Is 4.6million too much for you to return Arent you a Unicorn arent you ranked 1st as The Best Place to Work in Indonesia So you take care your employees but you dont take your customers seriously
|
84 |
+
netral,You know what I am out of patience. You guys did not take me seriously for the past 90 days What is this International Flight Specialist have been doing with my inquiry the whole 3 MONTHS Were you aware that this is a SERIOUS issue You guys are keeping my money
|
85 |
+
netral,We are coordinating closely with related airlines regarding to your issue. Our International Flight Specialist team will update and provide you the confirmation through inbox on Traveloka app on Case ID 30xxxx77. We thank you for your patience -RU
|
86 |
+
netral,What is now your offer to solve this How are you gonna escalated this To who Who is the one going to ensure my money will return
|
87 |
+
netral,"Ive been contacting your cs team dozen times. No answers every time. You all keep wanting me to wait and throw your responsibilities to the airlines. Do remember I made my transaction to you, I transferred my money to you. You got your profit already from my transaction. But me"
|
88 |
+
netral,"Iyaa, aku berharapnya yaa bisa dibedakan gituu"
|
89 |
+
netral,Dear mohon bantuannya. Saya ingin melaporkan ketidakpuasan saya terhadap penanganan isu refund dari Mohon dapat diberikan info prosedur pelaporan. Terima kasih.
|
90 |
+
netral,"Hi, Kiky. Were deeply sorry for the inconvenience youve experienced. We understand how frustrating this experience must be. However we will need your help to kindly inform us your booking details via DM so we can address your concern to internal team as well as our"
|
91 |
+
netral,"Hehe, minimal atur jadwal dan ambil cuti kamu aja dulu Kak Ecko, sambil ngumpulin dananya -NT"
|
92 |
+
netral,"Baik Kak, mohon cek DM-nya kembali ya. Terima kasih -RU"
|
93 |
+
netral,"valid no debat, sering disandingin, biar kaya lagi ngobrol, twitter kiri, traveloka kanan"
|
94 |
+
netral,"Hai Kak Hazna, agar mimin dapat bantu cek lebih lanjut mengenai pemesanan kamu, mohon sebutkan nomor pemesanan kamu di Traveloka yang terdiri dari 9 Digit angka via DM ya Kak, mimin tunggu konfirmasinya. -NT"
|
95 |
+
positif,Udh saya inbox
|
96 |
+
positif,Servis makannya ga bedaa ya sama class yg lain:
|
97 |
+
netral,"Halo, Vazeryn. Kami mohon maaf atas kendala yang dialami saat menggunakan aplikasi kami. Untuk investigasi lebih lanjut, kamu bisa kirimkan screenshot error atau penjelasan kendala yang ditemui via DM ya Kak. Terima kasih -RU"
|
98 |
+
netral,"Makasih ya Kak Kadek udah berbagi cerita Pasti seru banget perjalanannya Oh Iya, mimin minta bantuan Kakak buat ngisi survey meningkatkan kualitas pelayanan Traveloka. Boleh infoin alamat email kamu via DM Mimin tunggu ya. Makasih -RU"
|
99 |
+
netral,cek dm min
|
|
|
|
assets/df_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29ceb6f9e4327d2b9181cf5a651f8e88876920b94c205a52340f401f8c2ae536
|
3 |
+
size 63943096
|
assets/valid.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
indobert/config.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"_name_or_path": "indobenchmark/indobert-lite-base-p1",
|
3 |
-
"_num_labels":
|
4 |
"architectures": [
|
5 |
"AlbertForSequenceClassification"
|
6 |
],
|
@@ -15,17 +15,17 @@
|
|
15 |
"hidden_dropout_prob": 0,
|
16 |
"hidden_size": 768,
|
17 |
"id2label": {
|
18 |
-
"0":
|
19 |
-
"1":
|
20 |
-
"2":
|
21 |
},
|
22 |
"initializer_range": 0.02,
|
23 |
"inner_group_num": 1,
|
24 |
"intermediate_size": 3072,
|
25 |
"label2id": {
|
26 |
-
"
|
27 |
-
"
|
28 |
-
"
|
29 |
},
|
30 |
"layer_norm_eps": 1e-12,
|
31 |
"max_position_embeddings": 512,
|
|
|
1 |
{
|
2 |
"_name_or_path": "indobenchmark/indobert-lite-base-p1",
|
3 |
+
"_num_labels": 5,
|
4 |
"architectures": [
|
5 |
"AlbertForSequenceClassification"
|
6 |
],
|
|
|
15 |
"hidden_dropout_prob": 0,
|
16 |
"hidden_size": 768,
|
17 |
"id2label": {
|
18 |
+
"0": 0,
|
19 |
+
"1": 1,
|
20 |
+
"2": 2
|
21 |
},
|
22 |
"initializer_range": 0.02,
|
23 |
"inner_group_num": 1,
|
24 |
"intermediate_size": 3072,
|
25 |
"label2id": {
|
26 |
+
"0": 0,
|
27 |
+
"1": 1,
|
28 |
+
"2": 2
|
29 |
},
|
30 |
"layer_norm_eps": 1e-12,
|
31 |
"max_position_embeddings": 512,
|
indobert/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 46756497
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e809df40150362591b772ddb66962ab7765d118dc0b7b104d5f725da628ecdc9
|
3 |
size 46756497
|
pages/1__model_information.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import time
|
3 |
+
import numpy as np
|
4 |
+
import joblib
|
5 |
+
import plotly.express as px
|
6 |
+
import script.functions as fn
|
7 |
+
import pandas as pd
|
8 |
+
|
9 |
+
st.set_page_config(page_title="Model Information", page_icon="π")
|
10 |
+
|
11 |
+
st.sidebar.markdown("π Model Information")
|
12 |
+
|
13 |
+
st.markdown("<h1 style='text-align: center;'>π Model Information</h1>", unsafe_allow_html=True)
|
14 |
+
st.write("halaman ini berisi mengenai informasi model yang tersedia pada aplikasi. anda bisa melihat bagaimana performa model dalam memprediksi sentiment baik dari waktu maupun hasil prediksi.")
|
15 |
+
|
16 |
+
st.markdown("<h3>β Model Perfomance</h3>", unsafe_allow_html=True)
|
17 |
+
st.caption("Perfomance model dihitung berdasarkan akurasi dan waktu yang dibutuhkan model untuk memprediksi 100 data")
|
18 |
+
df_model = joblib.load("./assets/df_model.pkl")
|
19 |
+
fig = fn.plot_model_summary(df_model)
|
20 |
+
st.plotly_chart(fig,use_container_width=True,theme="streamlit")
|
21 |
+
|
22 |
+
|
23 |
+
st.markdown("<h3>π Model Evaluation</h3>", unsafe_allow_html=True)
|
24 |
+
st.caption("Hasil evaluasi model berdasarkan data IndoNLU subset smsa pada validation split")
|
25 |
+
|
26 |
+
df = pd.read_csv("./assets/valid.csv")
|
27 |
+
option = st.selectbox('Pilih Model',["IndoBERT",'Naive Bayes','Logistic Regression','XGBoost','Catboost','SVM','Random Forest'],key = "model1")
|
28 |
+
clfr_fig = fn.plot_clfr(df_model,option,df)
|
29 |
+
conf_m_fig = fn.plot_confusion_matrix(df_model,option,df)
|
30 |
+
clfr,conf_m = st.columns([1,1])
|
31 |
+
with clfr:
|
32 |
+
st.plotly_chart(clfr_fig,use_container_width=True,theme="streamlit")
|
33 |
+
with conf_m:
|
34 |
+
st.plotly_chart(conf_m_fig,use_container_width=True,theme="streamlit")
|
35 |
+
st.caption("CLassification Report : Classification report merupakan metode evaluasi yang menyedakan data mengenai akurasi klasifikasi, recall, precision, dan F1 score.")
|
36 |
+
st.caption("Confusion Matrix : mengukur jumlah prediksi benar dan salah yang dibuat oleh model yang berguna untuk menunjukkan kinerja dari model untuk setiap kelas")
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
pandas
|
2 |
numpy
|
3 |
matplotlib
|
@@ -7,3 +8,5 @@ transformers
|
|
7 |
sentence-transformers
|
8 |
bertopic
|
9 |
snscrape == 0.5.0.20230113
|
|
|
|
|
|
1 |
+
scikit-learn == 1.2.1
|
2 |
pandas
|
3 |
numpy
|
4 |
matplotlib
|
|
|
8 |
sentence-transformers
|
9 |
bertopic
|
10 |
snscrape == 0.5.0.20230113
|
11 |
+
xgboost == 1.7.3
|
12 |
+
catboost == 1.1.1
|
script/__pycache__/functions.cpython-310.pyc
CHANGED
Binary files a/script/__pycache__/functions.cpython-310.pyc and b/script/__pycache__/functions.cpython-310.pyc differ
|
|
script/__pycache__/text_proc.cpython-310.pyc
CHANGED
Binary files a/script/__pycache__/text_proc.cpython-310.pyc and b/script/__pycache__/text_proc.cpython-310.pyc differ
|
|
script/functions.py
CHANGED
@@ -4,6 +4,14 @@ import re
|
|
4 |
import snscrape.modules.twitter as sntwitter
|
5 |
from transformers import pipeline
|
6 |
import plotly.express as px
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
|
9 |
def get_tweets(username, length=10, option = None):
|
@@ -41,10 +49,19 @@ def get_tweets(username, length=10, option = None):
|
|
41 |
return tweets_df
|
42 |
|
43 |
|
44 |
-
def get_sentiment(df):
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
# change order sentiment to first column
|
49 |
cols = df.columns.tolist()
|
50 |
cols = cols[-1:] + cols[:-1]
|
@@ -60,7 +77,7 @@ def get_bar_chart(df):
|
|
60 |
# hide legend
|
61 |
fig.update_layout(showlegend=False)
|
62 |
# set margin top
|
63 |
-
fig.update_layout(margin=dict(t=0, b=
|
64 |
# set title in center
|
65 |
# set annotation in bar
|
66 |
fig.update_traces(textposition='outside')
|
@@ -70,3 +87,46 @@ def get_bar_chart(df):
|
|
70 |
fig.update_yaxes(title_text='Jumlah Komentar')
|
71 |
|
72 |
return fig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import snscrape.modules.twitter as sntwitter
|
5 |
from transformers import pipeline
|
6 |
import plotly.express as px
|
7 |
+
import joblib
|
8 |
+
from sklearn.metrics import classification_report,confusion_matrix
|
9 |
+
|
10 |
+
|
11 |
+
import nltk
|
12 |
+
nltk.download("punkt")
|
13 |
+
nltk.download('stopwords')
|
14 |
+
from nltk.tokenize import word_tokenize
|
15 |
|
16 |
|
17 |
def get_tweets(username, length=10, option = None):
|
|
|
49 |
return tweets_df
|
50 |
|
51 |
|
52 |
+
def get_sentiment(df,option_model):
|
53 |
+
id2label = {0: "negatif", 1: "netral", 2: "positif"}
|
54 |
+
if option_model == "IndoBERT (Accurate,Slow)":
|
55 |
+
classifier = pipeline("sentiment-analysis",model = "indobert")
|
56 |
+
df['sentiment'] = df['content'].apply(lambda x: id2label[classifier(x)[0]['label']])
|
57 |
+
elif (option_model == "Logistic Regression (Less Accurate,Fast)"):
|
58 |
+
df_model = joblib.load('assets/df_model.pkl')
|
59 |
+
classifier = df_model[df_model.model_name == "Logistic Regression"].model.values[0]
|
60 |
+
df['sentiment'] = df['content'].apply(lambda x: id2label[classifier.predict([x])[0]])
|
61 |
+
else :
|
62 |
+
df_model = joblib.load('assets/df_model.pkl')
|
63 |
+
classifier = df_model[df_model.model_name == option_model].model.values[0]
|
64 |
+
df['sentiment'] = df['content'].apply(lambda x: id2label[classifier.predict([x])[0]])
|
65 |
# change order sentiment to first column
|
66 |
cols = df.columns.tolist()
|
67 |
cols = cols[-1:] + cols[:-1]
|
|
|
77 |
# hide legend
|
78 |
fig.update_layout(showlegend=False)
|
79 |
# set margin top
|
80 |
+
fig.update_layout(margin=dict(t=0, b=150, l=0, r=0))
|
81 |
# set title in center
|
82 |
# set annotation in bar
|
83 |
fig.update_traces(textposition='outside')
|
|
|
87 |
fig.update_yaxes(title_text='Jumlah Komentar')
|
88 |
|
89 |
return fig
|
90 |
+
|
91 |
+
def plot_model_summary(df_model):
|
92 |
+
df_scatter = df_model[df_model.set_data == "test"][["score","time","model_name"]]
|
93 |
+
# plot scatter
|
94 |
+
fig = px.scatter(df_scatter, x="time", y="score", color="model_name", hover_data=['model_name'])
|
95 |
+
# set xlabel to time (s)
|
96 |
+
fig.update_xaxes(title_text="time (s)")
|
97 |
+
# set ylabel to accuracy
|
98 |
+
fig.update_yaxes(title_text="accuracy")
|
99 |
+
|
100 |
+
# set point size
|
101 |
+
fig.update_traces(marker=dict(size=10))
|
102 |
+
fig.update_layout(autosize = False,margin=dict(t=0, l=0, r=0),height = 400)
|
103 |
+
return fig
|
104 |
+
|
105 |
+
def plot_clfr(df_model,option_model,df):
|
106 |
+
df_clfr = pd.DataFrame(classification_report(df["label"],df[f"{option_model}_pred"],output_dict=True))
|
107 |
+
# heatmap using plotly
|
108 |
+
df_clfr.columns = ["positif","netral","negatif","accuracy","macro_avg","weighted_avg"]
|
109 |
+
fig = px.imshow(df_clfr.T.iloc[:,:-1], x=df_clfr.T.iloc[:,:-1].columns, y=df_clfr.T.iloc[:,:-1].index)
|
110 |
+
# remove colorbar
|
111 |
+
fig.update_layout(coloraxis_showscale=False)
|
112 |
+
fig.update_layout(coloraxis_colorscale='gnbu')
|
113 |
+
# get annot
|
114 |
+
annot = df_clfr.T.iloc[:,:-1].values
|
115 |
+
# add annot and set font size
|
116 |
+
fig.update_traces(text=annot, texttemplate='%{text:.2f}',textfont_size=12)
|
117 |
+
# set title to classification report
|
118 |
+
fig.update_layout(title_text="π Classification Report")
|
119 |
+
return fig
|
120 |
+
|
121 |
+
def plot_confusion_matrix(df_model,option_model,df):
|
122 |
+
# plot confusion matrix
|
123 |
+
cm = confusion_matrix(df['label'],df[f"{option_model}_pred"])
|
124 |
+
fig = px.imshow(cm, x=['negatif','netral','positif'], y=['negatif','netral','positif'])
|
125 |
+
# remove colorbar
|
126 |
+
fig.update_layout(coloraxis_showscale=False)
|
127 |
+
fig.update_layout(coloraxis_colorscale='gnbu',title_text = "π Confusion Matrix")
|
128 |
+
# get annot
|
129 |
+
annot = cm
|
130 |
+
# add annot
|
131 |
+
fig.update_traces(text=annot, texttemplate='%{text:.0f}',textfont_size=15)
|
132 |
+
return fig
|
script/text_proc.py
CHANGED
@@ -36,11 +36,19 @@ def get_wordcloud(df,kelas_sentiment):
|
|
36 |
tokens = tokenisasi(df[df.sentiment == kelas_sentiment])
|
37 |
tokens = tokens.apply(lambda x: ' '.join(x))
|
38 |
text = ' '.join(tokens)
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
return wordcloud
|
45 |
|
46 |
def plot_text(df,kelas,embedding_model):
|
|
|
36 |
tokens = tokenisasi(df[df.sentiment == kelas_sentiment])
|
37 |
tokens = tokens.apply(lambda x: ' '.join(x))
|
38 |
text = ' '.join(tokens)
|
39 |
+
# check if text empty or not
|
40 |
+
try :
|
41 |
+
wordcloud = WordCloud(width = 800, height = 800,
|
42 |
+
background_color ='black',
|
43 |
+
min_font_size = 10,
|
44 |
+
colormap = cmap_dict[kelas_sentiment],
|
45 |
+
mask = mask).generate(text)
|
46 |
+
except:
|
47 |
+
wordcloud = WordCloud(width = 800, height = 800,
|
48 |
+
background_color ='black',
|
49 |
+
min_font_size = 10,
|
50 |
+
colormap = cmap_dict[kelas_sentiment],
|
51 |
+
mask = mask).generate("None")
|
52 |
return wordcloud
|
53 |
|
54 |
def plot_text(df,kelas,embedding_model):
|