# bsky2M_emojis_streamlit.py # Packages required import streamlit as st from datasets import load_dataset import emoji from dateutil import parser from collections import Counter import plotly.express as px import pandas as pd from collections import defaultdict # Streamlit app title st.title("Top 200 Most Frequent Emojis in Bluesky Posts") st.image('Bluesky Emoji Model 1.jpeg', caption='Bluesky Emoji Model 1') # Step 1: Load the Dataset # Load the dataset dataset = load_dataset("alpindale/two-million-bluesky-posts") # Access the first split data = dataset['train'] # Step 2: Extract Emojis from Text def extract_emojis(text): return [e['emoji'] for e in emoji.emoji_list(text)] # Apply the function to the 'text' column and ensure the dataset is updated data = data.map(lambda x: {"emojis": extract_emojis(x["text"])}) # Step 3: Convert created_ad to Datatime # Convert 'created_at' to datetime data = data.map(lambda x: {"created_at": parser.isoparse(x["created_at"])}) # Step 4: Count Emoji Frequencies # Flatten the list of emojis all_emojis = [emoji for entry in data for emoji in entry["emojis"]] # Count the frequency of each emoji emoji_counts = Counter(all_emojis) # Step 5: Visualize Emoji Frequencies # Get the top 200 most common emojis top_emojis = emoji_counts.most_common(200) # Extract emojis and their counts emojis, counts = zip(*top_emojis) # Create a DataFrame for Plotly df = pd.DataFrame({'Emojis': emojis, 'Frequency': counts}) # Display the dataframe in the Streamlit app st.image('Bluesky Emoji Model 4.jpeg', caption='Bluesky Emoji Model 4') st.write('### Top 200 Emojis Dataframe') st.dataframe(df) st.image('Bluesky Emoji Model 2.jpeg', caption='Bluesky Emoji Model 2') # Plot the bar chart fig = px.bar(df, x='Emojis', y='Frequency', title='Top 200 Most Frequent Emojis') fig.update_xaxes(title_text='Emojis') fig.update_yaxes(title_text='Frequency') # Display the plot in the Streamlit app st.image('Bluesky Emoji Model 3.jpeg', caption='Bluesky Emoji Model 3') st.plotly_chart(fig)