saheedniyi commited on
Commit
0f25530
·
verified ·
1 Parent(s): 197b11b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +50 -29
README.md CHANGED
@@ -107,8 +107,7 @@ torchaudio.save(f"Sample.wav", audio, sample_rate=24000)
107
  ```python
108
  !git clone https://github.com/saheedniyi02/yarngpt.git
109
 
110
- # install some necessary libraries
111
- !pip install outetts uroman trafilatura pydub
112
 
113
  import os
114
  import re
@@ -128,77 +127,99 @@ from pydub.effects import normalize
128
  from transformers import AutoModelForCausalLM, AutoTokenizer
129
  from outetts.wav_tokenizer.decoder import WavTokenizer
130
 
131
-
132
  !wget https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml
133
  !wget https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt
134
 
135
- from yarngpt.audiotokenizer import AudioTokenizer
136
 
137
- tokenizer_path="saheedniyi/YarnGPT"
138
  wav_tokenizer_config_path="/content/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
139
  wav_tokenizer_model_path = "/content/wavtokenizer_large_speech_320_24k.ckpt"
140
 
141
-
142
-
143
- audio_tokenizer=AudioTokenizer(
144
  tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path
145
  )
146
 
147
-
148
  model = AutoModelForCausalLM.from_pretrained(tokenizer_path,torch_dtype="auto").to(audio_tokenizer.device)
149
 
150
-
151
  def split_text_into_chunks(text, word_limit=25):
152
- """
153
- Function to split a long web page into reasonable chunks
154
- """
155
  sentences=[sentence.strip() for sentence in text.split('.') if sentence.strip()]
156
  chunks=[]
157
  for sentence in sentences:
158
  chunks.append(".")
159
  sentence_splitted=sentence.split(" ")
160
  num_words=len(sentence_splitted)
161
- start_index=0
162
- if num_words>word_limit:
163
- while start_index<num_words:
164
- end_index=min(num_words,start_index+word_limit)
165
- chunks.append(" ".join(sentence_splitted[start_index:start_index+word_limit]))
166
- start_index=end_index
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  else:
168
  chunks.append(sentence)
169
  return chunks
170
 
171
- #Extracting the content of a webpage
172
- page=requests.get("https://punchng.com/expensive-feud-how-burna-boy-cubana-chief-priests-fight-led-to-dollar-rain/")
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  content=trafilatura.extract(page.text)
174
  chunks=split_text_into_chunks(content)
175
 
176
- #Looping over the chunks and adding creating a large `all_codes` list
177
  all_codes=[]
 
178
  for i,chunk in enumerate(chunks):
179
  print(i)
180
  print("\n")
181
  print(chunk)
182
  if chunk==".":
183
- #add silence for 0.25 seconds if we encounter a full stop
184
- all_codes.extend([453]*20)
185
  else:
186
- prompt=audio_tokenizer.create_prompt(chunk,"chinenye")
 
187
  input_ids=audio_tokenizer.tokenize_prompt(prompt)
188
  output = model.generate(
189
  input_ids=input_ids,
190
  temperature=0.1,
191
  repetition_penalty=1.1,
192
  max_length=4000,
 
193
  )
194
  codes=audio_tokenizer.get_codes(output)
195
  all_codes.extend(codes)
196
 
197
-
198
- # Converting to audio
199
  audio=audio_tokenizer.get_audio(all_codes)
200
  IPython.display.Audio(audio,rate=24000)
201
- torchaudio.save(f"news1.wav", audio, sample_rate=24000)
 
 
 
202
  ```
203
 
204
  ## Model Description
@@ -210,7 +231,7 @@ torchaudio.save(f"news1.wav", audio, sample_rate=24000)
210
  - **Repository:** [YarnGPT Github Repository](https://github.com/saheedniyi02/yarngpt)
211
  - **Paper:** IN PROGRESS.
212
  - **Demo:** 1) [Prompt YarnGPT2b notebook](https://colab.research.google.com/drive/13-o1X5F3CLeHixjqobNf2TJN1T6LWOqx?usp=sharing)
213
- 2) [Simple news reader](https://colab.research.google.com/drive/1SsXV08kly1TUJVM_NFpKqQWOZ1gUZpGe?usp=sharing)
214
 
215
 
216
 
 
107
  ```python
108
  !git clone https://github.com/saheedniyi02/yarngpt.git
109
 
110
+ pip install outetts uroman trafilatura pydub
 
111
 
112
  import os
113
  import re
 
127
  from transformers import AutoModelForCausalLM, AutoTokenizer
128
  from outetts.wav_tokenizer.decoder import WavTokenizer
129
 
 
130
  !wget https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml
131
  !wget https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt
132
 
133
+ from yarngpt.audiotokenizer import AudioTokenizerV2
134
 
135
+ tokenizer_path="saheedniyi/YarnGPT2b"
136
  wav_tokenizer_config_path="/content/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
137
  wav_tokenizer_model_path = "/content/wavtokenizer_large_speech_320_24k.ckpt"
138
 
139
+ audio_tokenizer=AudioTokenizerV2(
 
 
140
  tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path
141
  )
142
 
 
143
  model = AutoModelForCausalLM.from_pretrained(tokenizer_path,torch_dtype="auto").to(audio_tokenizer.device)
144
 
145
+ # Split text into chunks
146
  def split_text_into_chunks(text, word_limit=25):
 
 
 
147
  sentences=[sentence.strip() for sentence in text.split('.') if sentence.strip()]
148
  chunks=[]
149
  for sentence in sentences:
150
  chunks.append(".")
151
  sentence_splitted=sentence.split(" ")
152
  num_words=len(sentence_splitted)
153
+
154
+ if (num_words>word_limit) and (num_words<=word_limit*2):
155
+ chunks.append(" ".join(sentence_splitted[:int(num_words/2)]))
156
+ chunks.append(" ".join(sentence_splitted[int(num_words/2):]))
157
+ elif (num_words>word_limit*2) and (num_words<=word_limit*3):
158
+ chunks.append(" ".join(sentence_splitted[:int(num_words/3)]))
159
+ chunks.append(" ".join(sentence_splitted[int(num_words/3):int(2*num_words/3)]))
160
+ chunks.append(" ".join(sentence_splitted[int(2*num_words/3):]))
161
+ elif (num_words>word_limit*3) and (num_words<=word_limit*4):
162
+ chunks.append(" ".join(sentence_splitted[:int(num_words/4)]))
163
+ chunks.append(" ".join(sentence_splitted[int(num_words/4):word_limit*2]))
164
+ chunks.append(" ".join(sentence_splitted[int(2*num_words/4):int(3*num_words/4)]))
165
+ chunks.append(" ".join(sentence_splitted[int(3*num_words/4):]))
166
+ elif (num_words>word_limit*4) and (num_words<=word_limit*5):
167
+ chunks.append(" ".join(sentence_splitted[:int(num_words/5)]))
168
+ chunks.append(" ".join(sentence_splitted[int(num_words/5):int(2*num_words/5)]))
169
+ chunks.append(" ".join(sentence_splitted[int(2*num_words/5):int(3*num_words/5)]))
170
+ chunks.append(" ".join(sentence_splitted[int(3*num_words/5):int(4*num_words/5)]))
171
+ chunks.append(" ".join(sentence_splitted[int(4*num_words/5):]))
172
  else:
173
  chunks.append(sentence)
174
  return chunks
175
 
176
+ def speed_change(sound, speed=0.9):
177
+ # Manually override the frame_rate. This tells the computer how many
178
+ # samples to play per second
179
+ sound_with_altered_frame_rate = sound._spawn(sound.raw_data, overrides={
180
+ "frame_rate": int(sound.frame_rate * speed)
181
+ })
182
+ # convert the sound with altered frame rate to a standard frame rate
183
+ # so that regular playback programs will work right. They often only
184
+ # know how to play audio at standard frame rate (like 44.1k)
185
+ return sound_with_altered_frame_rate.set_frame_rate(sound.frame_rate)
186
+
187
+ #change the url
188
+ url="https://punchng.com/im-not-desperate-for-2027-presidential-ticket-obi/"
189
+
190
+ page=requests.get(url)
191
  content=trafilatura.extract(page.text)
192
  chunks=split_text_into_chunks(content)
193
 
 
194
  all_codes=[]
195
+ #Looping over the chunks and adding creating a large `all_codes` list
196
  for i,chunk in enumerate(chunks):
197
  print(i)
198
  print("\n")
199
  print(chunk)
200
  if chunk==".":
201
+ #add silence for 0.5 seconds if we encounter a full stop
202
+ all_codes.extend([453]*38)
203
  else:
204
+ # Change the language and voice here
205
+ prompt=audio_tokenizer.create_prompt(chunk,lang="english",speaker_name="jude")
206
  input_ids=audio_tokenizer.tokenize_prompt(prompt)
207
  output = model.generate(
208
  input_ids=input_ids,
209
  temperature=0.1,
210
  repetition_penalty=1.1,
211
  max_length=4000,
212
+ #num_beams=5,
213
  )
214
  codes=audio_tokenizer.get_codes(output)
215
  all_codes.extend(codes)
216
 
 
 
217
  audio=audio_tokenizer.get_audio(all_codes)
218
  IPython.display.Audio(audio,rate=24000)
219
+ torchaudio.save(f"news1.wav",
220
+ audio,
221
+ sample_rate=24000,
222
+ )
223
  ```
224
 
225
  ## Model Description
 
231
  - **Repository:** [YarnGPT Github Repository](https://github.com/saheedniyi02/yarngpt)
232
  - **Paper:** IN PROGRESS.
233
  - **Demo:** 1) [Prompt YarnGPT2b notebook](https://colab.research.google.com/drive/13-o1X5F3CLeHixjqobNf2TJN1T6LWOqx?usp=sharing)
234
+ 2) [Simple news reader](https://colab.research.google.com/drive/1FLTUmESJbG52Bj21XX3-AoevjaXwtmhE?usp=sharing)
235
 
236
 
237