voice-craft-ai / app.py
Vadhana's picture
Update app.py
43578d8 verified
import streamlit as st
import whisper
import torch
import spacy
from transformers import pipeline
# Load spaCy Model
try:
nlp = spacy.load("en_core_web_sm")
except:
import subprocess
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
nlp = spacy.load("en_core_web_sm")
# Load Hugging Face Models
@st.cache_resource
def load_models():
sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
return sentiment_analyzer, summarizer
sentiment_analyzer, summarizer = load_models()
# Load Whisper Model
@st.cache_resource
def load_whisper():
return whisper.load_model("base")
whisper_model = load_whisper()
# Streamlit UI
st.title("πŸŽ™οΈ Voice-Controlled AI Text Editor")
st.subheader("Supports Speech-to-Text, Sentiment Analysis, Summarization & POS Tagging")
# File Upload for Whisper
uploaded_audio = st.file_uploader("🎡 Upload an audio file", type=["wav", "mp3", "m4a"])
if uploaded_audio:
st.audio(uploaded_audio, format="audio/wav")
with open("temp_audio.wav", "wb") as f:
f.write(uploaded_audio.read())
with st.spinner("Transcribing..."):
result = whisper_model.transcribe("temp_audio.wav")
text = result["text"]
st.success("Transcription Complete!")
st.text_area("Transcribed Text", text, height=200)
# Function to process voice commands
def process_command(command, text):
command = command.lower()
if "summarize" in command:
return summarize_text(text)
elif "analyze sentiment" in command:
return analyze_sentiment(text)
elif "delete" in command:
return ""
else:
return text # Return original text if no command is matched
# Summarization Function
def summarize_text(text):
if len(text.split()) < 30:
return "Text is too short for summarization."
summary = summarizer(text[:1024], max_length=100, min_length=30, do_sample=False)
return summary[0]['summary_text']
# Sentiment Analysis Function
def analyze_sentiment(text):
result = sentiment_analyzer(text[:512])
return f"Sentiment: {result[0]['label']} (Confidence: {result[0]['score']:.2f})"
# POS Tagging Function
def pos_tagging(text):
doc = nlp(text)
return [f"{token.text} -> {token.pos_}" for token in doc]
# Voice Command for Summarization or Sentiment Analysis
if st.button("πŸŽ™οΈ Speak Command"):
with st.spinner("Listening..."):
result = whisper_model.transcribe("temp_audio.wav")
command = result["text"]
st.write(f"Command Recognized: {command}")
# Process the command
processed_text = process_command(command, text)
st.text_area("Processed Text", processed_text, height=200)
# Sidebar Options
with st.sidebar:
st.header("⚑ Actions")
if st.button("😊 Analyze Sentiment"):
sentiment = analyze_sentiment(text)
st.success(sentiment)
if st.button("πŸ“ Summarize Text"):
summary = summarize_text(text)
st.success(summary)
if st.button("πŸ” Show POS Tags"):
pos_tags = pos_tagging(text)
st.write("πŸ”Ž POS Tags:", pos_tags)
if st.button("❌ Clear Text"):
text = ""
st.success("Text cleared.")