import streamlit as st from transformers import pipeline from PIL import Image import time #pipelines story_generator = pipeline("text-generation", model="google/flan-t5-large") def generate_caption(image): image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") caption = image_to_text(image)[0]["generated_text"] return caption def generate_story(caption): pipe = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2") story = pipe(caption)[0]['generated_text'] return story def generate_audio(story): pipe = pipeline("text-to-speech", model="microsoft/speecht5_tts") audio = pipe(story) return audio # Streamlit UI # Title of the Streamlit app st.title("Upload your image for an instant storytelling!") # Write a description st.write("This app tests various Streamlit elements like file uploader, image display, and audio playback.") # File uploader for image uploaded_file = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"]) if uploaded_file is not None: # Display the uploaded image image = Image.open(uploaded_file) st.image(image, caption="Uploaded Image", use_container_width=True) # Generate Image Caption image_caption = generate_caption(image) # Display results st.write(f"Caption: {image_caption}") # Generate Story story_telling = generate_story(image_caption) # Display results st.write(f"Story: {story_telling}") # Generate Audio audio = generate_audio(story_telling) # Display an audio file with a spinner effect st.write("Playing audio...") with st.spinner("Loading audio..."): time.sleep(2) # Simulating a short delay st.audio(audio['audio'], format="audio/wav", start_time=0, sample_rate = audio['sampling_rate'])