avinash commited on
Commit
4771966
·
1 Parent(s): dd75d12

added the files

Browse files
Files changed (3) hide show
  1. app.py +76 -0
  2. poem_data.txt +19 -0
  3. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor, WhisperForConditionalGeneration
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ from gtts import gTTS
5
+ import tempfile
6
+ import torch
7
+
8
+ # 1. Load Whisper STT model (CPU mode)
9
+ processor = AutoProcessor.from_pretrained("openai/whisper-small")
10
+ stt_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
11
+ stt_model.to("cpu") # Make sure it runs on CPU
12
+
13
+ # 2. Load TinyLlama (or similar LLM)
14
+ llm_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
15
+ tokenizer = AutoTokenizer.from_pretrained(llm_name)
16
+ llm_model = AutoModelForCausalLM.from_pretrained(llm_name)
17
+ llm_model.to("cpu")
18
+
19
+ # 3. Reference poem for style
20
+ with open("poem_data.txt", "r") as f:
21
+ reference_poem = f.read().strip()
22
+
23
+
24
+ # 4. Transcribe using Whisper
25
+ def transcribe(audio_path):
26
+ if audio_path is None:
27
+ return ""
28
+
29
+ # Load audio as input features
30
+ input_features = processor(audio_path, return_tensors="pt", sampling_rate=16000).input_features
31
+ predicted_ids = stt_model.generate(input_features.to("cpu"))
32
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
33
+ return transcription.strip()
34
+
35
+
36
+ # 5. Generate poem
37
+ def generate_poem(prompt):
38
+ final_prompt = f"Here is a reference poem:\n{reference_poem}\n\nNow write a new poem about {prompt.strip()} in the same style."
39
+ inputs = tokenizer.encode(final_prompt, return_tensors="pt", truncation=True)
40
+ outputs = llm_model.generate(inputs, max_new_tokens=120, temperature=0.7)
41
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
42
+
43
+
44
+ # 6. Text-to-speech
45
+ def synthesize(text):
46
+ tts = gTTS(text)
47
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
48
+ tts.save(fp.name)
49
+ return fp.name
50
+
51
+
52
+ # 7. Gradio pipeline
53
+ def full_pipeline(audio_input, typed_object):
54
+ obj = typed_object or transcribe(audio_input)
55
+ poem = generate_poem(obj)
56
+ audio_poem = synthesize(poem)
57
+ return poem, audio_poem
58
+
59
+
60
+ # 8. Gradio app
61
+ demo = gr.Interface(
62
+ fn=full_pipeline,
63
+ inputs=[
64
+ gr.Audio(source="microphone", type="filepath", label="Speak object"),
65
+ gr.Textbox(label="Or type object name")
66
+ ],
67
+ outputs=[
68
+ gr.Textbox(label="Generated Poem"),
69
+ gr.Audio(label="Audio of Poem")
70
+ ],
71
+ title="AI Poetry Assistant",
72
+ description="Speak or type a topic, and the assistant generates a poem in the style of 'A Photograph'."
73
+ )
74
+
75
+ if __name__ == "__main__":
76
+ demo.launch()
poem_data.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The cardboard shows me how it was
2
+ When the two girl cousins went paddling
3
+ Each one holding one of my mother’s hands,
4
+ And she the big girl – some twelve years or so.
5
+ All three stood still to smile through their hair
6
+ At the uncle with the camera, A sweet face
7
+ My mother’s, that was before I was born
8
+ And the sea, which appears to have changed less
9
+ Washed their terribly transient feet.
10
+ Some twenty- thirty- years later
11
+ She’d laugh at the snapshot. “See Betty
12
+ And Dolly,” she’d say, “and look how they
13
+ Dressed us for the beach.” The sea holiday
14
+ was her past, mine is her laughter. Both wry
15
+ With the laboured ease of loss.
16
+ Now she’s has been dead nearly as many years
17
+ As that girl lived. And of this circumstance
18
+ There is nothing to say at all,
19
+ Its silence silences.
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ gradio
4
+ gtts
5
+ librosa
6
+ ffmpeg-python
7
+