Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| # import numpy as np | |
| import pandas as pd | |
| from topic_modeling import TopicModeling | |
| st.set_page_config(page_title='تحلیلگر متن عهد', page_icon = './ahd_logo.png', layout = 'wide') | |
| def get_model(): | |
| tp_model = TopicModeling() | |
| return tp_model | |
| tp_model = get_model() | |
| col1, col2, col3 = st.columns(3) | |
| with col2: | |
| st.title("تحلیل اسناد متنی") | |
| # Upload CSV file | |
| uploaded_file = st.file_uploader("آپلود فایل") | |
| if uploaded_file is not None: | |
| filename = uploaded_file.name | |
| if filename.endswith('.xlsx'): | |
| df = pd.read_excel(uploaded_file) | |
| elif filename.endswith('.csv'): | |
| df = pd.read_csv(uploaded_file) | |
| else: | |
| raise ValueError('Unsupported file format') | |
| # Show first 10 rows of dataframe | |
| st.write(df.head(10)) | |
| # Select columns to use for topic modeling | |
| cols = st.multiselect("ستونهای متنی موردنظر را انتخاب نمایید", df.columns) | |
| ratio = st.slider('چند درصد از کل دادگان پردازش شود',min_value=0, max_value=100) | |
| col1, col2, col3 , col4, col5 = st.columns(5) | |
| with col3: | |
| done_button = st.button("پردازش دادگان") | |
| if done_button: | |
| # print('colssssssssssssss ', cols) | |
| # Concatenate selected text columns | |
| df = df[cols] | |
| df = df.head(int(len(df) * (ratio/100))) | |
| df = df.dropna() | |
| # text = df.apply(lambda x:' '.join(x), axis=1) | |
| # Run topic modeling function | |
| col1, col2, col3 = st.columns(3) | |
| with col2: | |
| data_progress = st.spinner('در حال پردازش دادگان') | |
| with data_progress: | |
| docs = tp_model.add_data(df) | |
| st.success('پردازش دادگان با موفقیت به پایان رسید') | |
| # print('before docs') | |
| with st.spinner('در حال آموزش مدل'): | |
| # print('fittttttttttt') | |
| tp_model.fit(docs) | |
| st.success('آموزش پایان یافت') | |
| col1, col2, col3 = st.columns(3) | |
| with col3: | |
| st.title(" فضای تاپیکها ") | |
| st.header("") | |
| fig = tp_model.get_vis_topics() | |
| st.plotly_chart( | |
| fig, | |
| use_container_width=True, | |
| theme="streamlit", # ✨ Optional, this is already set by default! | |
| ) | |
| col1, col2, col3 = st.columns(3) | |
| with col3: | |
| st.title(" کلمات هر تاپیک ") | |
| st.header("") | |
| fig = tp_model.get_barchart() | |
| st.plotly_chart( | |
| fig, | |
| use_container_width=True, | |
| theme="streamlit", # ✨ Optional, this is already set by default! | |
| ) | |
| col1, col2, col3 = st.columns(3) | |
| with col3: | |
| st.title("لیست تاپیکها") | |
| st.header("") | |
| topics_info = tp_model.get_topic_info() | |
| st.write(topics_info) | |
| col1, col2, col3 = st.columns(3) | |
| with col3: | |
| st.title(" ابر کلمات ") | |
| st.header("") | |
| # # figs = tp_model.get_wordcloud() | |
| # topic_counts = len(tp_model.topic_model.get_topic_info()) | |
| # print('topic count ', topic_counts) | |
| # if topic_counts > 15: | |
| # topic_counts = 15 | |
| # for topic_index in range(topic_counts): | |
| # fig = tp_model.get_wordcloud_by_topic(topic_index) | |
| # if fig: | |
| figs = tp_model.get_wordcloud() | |
| for fig in figs: | |
| st.header("") | |
| st.markdown('topic:') | |
| # st.title(f'topic:{topic_index}') | |
| st.pyplot(fig) | |