Hameed13 commited on
Commit
6d73ed9
·
verified ·
1 Parent(s): 74a56ca

Delete yarngpt/generate.py

Browse files
Files changed (1) hide show
  1. yarngpt/generate.py +0 -151
yarngpt/generate.py DELETED
@@ -1,151 +0,0 @@
1
- import os
2
- import sys
3
- import logging
4
- import torch
5
- import torchaudio
6
- import numpy as np
7
- from transformers import AutoTokenizer, AutoProcessor, AutoModelForSpeechSeq2Seq, Speech2Text2Config
8
- from huggingface_hub import hf_hub_download
9
- import warnings
10
- import scipy.io.wavfile as wav
11
- from datetime import datetime
12
- import json
13
-
14
- # Configure logging
15
- logging.basicConfig(level=logging.INFO,
16
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
17
- logger = logging.getLogger(__name__)
18
-
19
- # Constants
20
- INIT_TIMESTAMP = "2025-05-21 02:21:23"
21
- CURRENT_USER = "Abdulhameed556"
22
-
23
- class TextToSpeech:
24
- def __init__(self, model_name_or_path, processor_name_or_path=None):
25
- """Initialize the TextToSpeech class."""
26
- self.model_name_or_path = model_name_or_path
27
- self.processor_name_or_path = processor_name_or_path or model_name_or_path
28
- self.init_time = INIT_TIMESTAMP
29
- self.user = CURRENT_USER
30
- self.cache_dir = "/code/cache"
31
-
32
- logger.info(f"Initializing TextToSpeech with model: {model_name_or_path}")
33
-
34
- try:
35
- # Create cache directory if it doesn't exist
36
- os.makedirs(self.cache_dir, exist_ok=True)
37
-
38
- # Create tokenizer files locally if they don't exist
39
- self._create_tokenizer_files()
40
-
41
- # Initialize configuration
42
- config = Speech2Text2Config.from_pretrained(
43
- pretrained_model_name_or_path=self.model_name_or_path,
44
- cache_dir=self.cache_dir,
45
- token=os.getenv('HF_TOKEN')
46
- )
47
-
48
- # Initialize tokenizer
49
- logger.info("Loading tokenizer...")
50
- self.tokenizer = AutoTokenizer.from_pretrained(
51
- self.cache_dir, # Use local cache directory
52
- config=config,
53
- token=os.getenv('HF_TOKEN')
54
- )
55
-
56
- # Initialize model
57
- logger.info("Loading model...")
58
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
59
- logger.info(f"Using device: {self.device}")
60
-
61
- self.model = AutoModelForSpeechSeq2Seq.from_pretrained(
62
- self.model_name_or_path,
63
- config=config,
64
- cache_dir=self.cache_dir,
65
- token=os.getenv('HF_TOKEN')
66
- ).to(self.device)
67
-
68
- logger.info("Model initialization complete")
69
-
70
- except Exception as e:
71
- logger.error(f"Error initializing TextToSpeech: {e}")
72
- raise
73
-
74
- def _create_tokenizer_files(self):
75
- """Create necessary tokenizer files in cache directory."""
76
- tokenizer_files = {
77
- "tokenizer_config.json": {
78
- "name_or_path": self.model_name_or_path,
79
- "padding_side": "right",
80
- "truncation_side": "right",
81
- "model_max_length": 1024,
82
- "bos_token": "<s>",
83
- "eos_token": "</s>",
84
- "unk_token": "<unk>",
85
- "pad_token": "<pad>",
86
- "mask_token": "<mask>",
87
- "special_tokens_map_file": "special_tokens_map.json",
88
- "tokenizer_class": "Speech2Text2Tokenizer"
89
- },
90
- "special_tokens_map.json": {
91
- "bos_token": "<s>",
92
- "eos_token": "</s>",
93
- "pad_token": "<pad>",
94
- "unk_token": "<unk>",
95
- "mask_token": "<mask>"
96
- },
97
- "vocab.json": {
98
- "<s>": 0,
99
- "<pad>": 1,
100
- "</s>": 2,
101
- "<unk>": 3,
102
- "<mask>": 4
103
- }
104
- }
105
-
106
- logger.info("Creating tokenizer files in cache directory...")
107
- for filename, content in tokenizer_files.items():
108
- filepath = os.path.join(self.cache_dir, filename)
109
- with open(filepath, 'w', encoding='utf-8') as f:
110
- json.dump(content, f, indent=2)
111
- logger.info(f"Created {filename}")
112
-
113
- def tts(self, text, speed=1.0):
114
- """Generate speech from text."""
115
- try:
116
- logger.info(f"Processing text: {text[:50]}...")
117
-
118
- # Tokenize text
119
- inputs = self.tokenizer(
120
- text,
121
- return_tensors="pt",
122
- padding=True,
123
- truncation=True,
124
- max_length=self.tokenizer.model_max_length
125
- ).to(self.device)
126
-
127
- # Generate speech
128
- with torch.no_grad():
129
- output = self.model.generate(
130
- **inputs,
131
- max_length=500,
132
- num_beams=5,
133
- early_stopping=True
134
- )
135
-
136
- # Convert to audio
137
- audio = output[0].cpu().numpy()
138
-
139
- # Apply speed adjustment if needed
140
- if speed != 1.0:
141
- audio = np.interp(
142
- np.arange(0, len(audio), speed),
143
- np.arange(0, len(audio)),
144
- audio
145
- )
146
-
147
- return audio.astype(np.float32)
148
-
149
- except Exception as e:
150
- logger.error(f"Error generating speech: {e}")
151
- raise