Commit 
							
							·
						
						6e6b619
	
1
								Parent(s):
							
							c0dd2e2
								
add option to save log to file
Browse files- whisper_online.py +29 -39
 
    	
        whisper_online.py
    CHANGED
    
    | 
         @@ -46,10 +46,6 @@ class ASRBase: 
     | 
|
| 46 | 
         
             
                    raise NotImplemented("must be implemented in the child class")
         
     | 
| 47 | 
         | 
| 48 | 
         | 
| 49 | 
         
            -
            ## requires imports:
         
     | 
| 50 | 
         
            -
            #      import whisper
         
     | 
| 51 | 
         
            -
            #      import whisper_timestamped
         
     | 
| 52 | 
         
            -
             
     | 
| 53 | 
         
             
            class WhisperTimestampedASR(ASRBase):
         
     | 
| 54 | 
         
             
                """Uses whisper_timestamped library as the backend. Initially, we tested the code on this backend. It worked, but slower than faster-whisper.
         
     | 
| 55 | 
         
             
                On the other hand, the installation for GPU could be easier.
         
     | 
| 
         @@ -64,7 +60,7 @@ class WhisperTimestampedASR(ASRBase): 
     | 
|
| 64 | 
         | 
| 65 | 
         
             
                def load_model(self, modelsize=None, cache_dir=None, model_dir=None):
         
     | 
| 66 | 
         
             
                    if model_dir is not None:
         
     | 
| 67 | 
         
            -
                        print("ignoring model_dir, not implemented",file= 
     | 
| 68 | 
         
             
                    return whisper.load_model(modelsize, download_root=cache_dir)
         
     | 
| 69 | 
         | 
| 70 | 
         
             
                def transcribe(self, audio, init_prompt=""):
         
     | 
| 
         @@ -89,9 +85,6 @@ class WhisperTimestampedASR(ASRBase): 
     | 
|
| 89 | 
         | 
| 90 | 
         
             
            class FasterWhisperASR(ASRBase):
         
     | 
| 91 | 
         
             
                """Uses faster-whisper library as the backend. Works much faster, appx 4-times (in offline mode). For GPU, it requires installation with a specific CUDNN version.
         
     | 
| 92 | 
         
            -
             
     | 
| 93 | 
         
            -
                Requires imports, if used:
         
     | 
| 94 | 
         
            -
                    import faster_whisper
         
     | 
| 95 | 
         
             
                """
         
     | 
| 96 | 
         | 
| 97 | 
         
             
                sep = ""
         
     | 
| 
         @@ -101,11 +94,8 @@ class FasterWhisperASR(ASRBase): 
     | 
|
| 101 | 
         
             
                    import faster_whisper
         
     | 
| 102 | 
         | 
| 103 | 
         
             
                def load_model(self, modelsize=None, cache_dir=None, model_dir=None):
         
     | 
| 104 | 
         
            -
                    #from faster_whisper import WhisperModel
         
     | 
| 105 | 
         
            -
             
     | 
| 106 | 
         
            -
             
     | 
| 107 | 
         
             
                    if model_dir is not None:
         
     | 
| 108 | 
         
            -
                        print(f"Loading whisper model from model_dir {model_dir}. modelsize and cache_dir parameters are not used.",file= 
     | 
| 109 | 
         
             
                        model_size_or_path = model_dir
         
     | 
| 110 | 
         
             
                    elif modelsize is not None:
         
     | 
| 111 | 
         
             
                        model_size_or_path = modelsize
         
     | 
| 
         @@ -153,7 +143,8 @@ class FasterWhisperASR(ASRBase): 
     | 
|
| 153 | 
         | 
| 154 | 
         
             
            class HypothesisBuffer:
         
     | 
| 155 | 
         | 
| 156 | 
         
            -
                def __init__(self):
         
     | 
| 
         | 
|
| 157 | 
         
             
                    self.commited_in_buffer = []
         
     | 
| 158 | 
         
             
                    self.buffer = []
         
     | 
| 159 | 
         
             
                    self.new = []
         
     | 
| 
         @@ -161,6 +152,8 @@ class HypothesisBuffer: 
     | 
|
| 161 | 
         
             
                    self.last_commited_time = 0
         
     | 
| 162 | 
         
             
                    self.last_commited_word = None
         
     | 
| 163 | 
         | 
| 
         | 
|
| 
         | 
|
| 164 | 
         
             
                def insert(self, new, offset):
         
     | 
| 165 | 
         
             
                    # compare self.commited_in_buffer and new. It inserts only the words in new that extend the commited_in_buffer, it means they are roughly behind last_commited_time and new in content
         
     | 
| 166 | 
         
             
                    # the new tail is added to self.new
         
     | 
| 
         @@ -179,9 +172,9 @@ class HypothesisBuffer: 
     | 
|
| 179 | 
         
             
                                    c = " ".join([self.commited_in_buffer[-j][2] for j in range(1,i+1)][::-1])
         
     | 
| 180 | 
         
             
                                    tail = " ".join(self.new[j-1][2] for j in range(1,i+1))
         
     | 
| 181 | 
         
             
                                    if c == tail:
         
     | 
| 182 | 
         
            -
                                        print("removing last",i,"words:",file= 
     | 
| 183 | 
         
             
                                        for j in range(i):
         
     | 
| 184 | 
         
            -
                                            print("\t",self.new.pop(0),file= 
     | 
| 185 | 
         
             
                                        break
         
     | 
| 186 | 
         | 
| 187 | 
         
             
                def flush(self):
         
     | 
| 
         @@ -218,12 +211,14 @@ class OnlineASRProcessor: 
     | 
|
| 218 | 
         | 
| 219 | 
         
             
                SAMPLING_RATE = 16000
         
     | 
| 220 | 
         | 
| 221 | 
         
            -
                def __init__(self, asr, tokenizer):
         
     | 
| 222 | 
         
             
                    """asr: WhisperASR object
         
     | 
| 223 | 
         
             
                    tokenizer: sentence tokenizer object for the target language. Must have a method *split* that behaves like the one of MosesTokenizer.
         
     | 
| 
         | 
|
| 224 | 
         
             
                    """
         
     | 
| 225 | 
         
             
                    self.asr = asr
         
     | 
| 226 | 
         
             
                    self.tokenizer = tokenizer
         
     | 
| 
         | 
|
| 227 | 
         | 
| 228 | 
         
             
                    self.init()
         
     | 
| 229 | 
         | 
| 
         @@ -232,7 +227,7 @@ class OnlineASRProcessor: 
     | 
|
| 232 | 
         
             
                    self.audio_buffer = np.array([],dtype=np.float32)
         
     | 
| 233 | 
         
             
                    self.buffer_time_offset = 0
         
     | 
| 234 | 
         | 
| 235 | 
         
            -
                    self.transcript_buffer = HypothesisBuffer()
         
     | 
| 236 | 
         
             
                    self.commited = []
         
     | 
| 237 | 
         
             
                    self.last_chunked_at = 0
         
     | 
| 238 | 
         | 
| 
         @@ -263,13 +258,13 @@ class OnlineASRProcessor: 
     | 
|
| 263 | 
         
             
                def process_iter(self):
         
     | 
| 264 | 
         
             
                    """Runs on the current audio buffer.
         
     | 
| 265 | 
         
             
                    Returns: a tuple (beg_timestamp, end_timestamp, "text"), or (None, None, ""). 
         
     | 
| 266 | 
         
            -
                    The non-emty text is confirmed ( 
     | 
| 267 | 
         
             
                    """
         
     | 
| 268 | 
         | 
| 269 | 
         
             
                    prompt, non_prompt = self.prompt()
         
     | 
| 270 | 
         
            -
                    print("PROMPT:", prompt, file= 
     | 
| 271 | 
         
            -
                    print("CONTEXT:", non_prompt, file= 
     | 
| 272 | 
         
            -
                    print(f"transcribing {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f} seconds from {self.buffer_time_offset:2.2f}",file= 
     | 
| 273 | 
         
             
                    res = self.asr.transcribe(self.audio_buffer, init_prompt=prompt)
         
     | 
| 274 | 
         | 
| 275 | 
         
             
                    # transform to [(beg,end,"word1"), ...]
         
     | 
| 
         @@ -278,8 +273,8 @@ class OnlineASRProcessor: 
     | 
|
| 278 | 
         
             
                    self.transcript_buffer.insert(tsw, self.buffer_time_offset)
         
     | 
| 279 | 
         
             
                    o = self.transcript_buffer.flush()
         
     | 
| 280 | 
         
             
                    self.commited.extend(o)
         
     | 
| 281 | 
         
            -
                    print(">>>>COMPLETE NOW:",self.to_flush(o),file= 
     | 
| 282 | 
         
            -
                    print("INCOMPLETE:",self.to_flush(self.transcript_buffer.complete()),file= 
     | 
| 283 | 
         | 
| 284 | 
         
             
                    # there is a newly confirmed text
         
     | 
| 285 | 
         
             
                    if o:
         
     | 
| 
         @@ -298,14 +293,14 @@ class OnlineASRProcessor: 
     | 
|
| 298 | 
         
             
            #        elif self.transcript_buffer.complete():
         
     | 
| 299 | 
         
             
            #            self.silence_iters = 0
         
     | 
| 300 | 
         
             
            #        elif not self.transcript_buffer.complete():
         
     | 
| 301 | 
         
            -
            #        #    print("NOT COMPLETE:",to_flush(self.transcript_buffer.complete()),file= 
     | 
| 302 | 
         
             
            #            self.silence_iters += 1
         
     | 
| 303 | 
         
             
            #            if self.silence_iters >= 3:
         
     | 
| 304 | 
         
             
            #                n = self.last_chunked_at
         
     | 
| 305 | 
         
             
            ##                self.chunk_completed_sentence()
         
     | 
| 306 | 
         
             
            ##                if n == self.last_chunked_at:
         
     | 
| 307 | 
         
             
            #                self.chunk_at(self.last_chunked_at+self.chunk)
         
     | 
| 308 | 
         
            -
            #                print(f"\tCHUNK: 3-times silence! chunk_at {n}+{self.chunk}",file= 
     | 
| 309 | 
         
             
            ##                self.silence_iters = 0
         
     | 
| 310 | 
         | 
| 311 | 
         | 
| 
         @@ -321,18 +316,18 @@ class OnlineASRProcessor: 
     | 
|
| 321 | 
         
             
                        #while k>0 and self.commited[k][1] > l:
         
     | 
| 322 | 
         
             
                        #    k -= 1
         
     | 
| 323 | 
         
             
                        #t = self.commited[k][1] 
         
     | 
| 324 | 
         
            -
                        print(f"chunking because of len",file= 
     | 
| 325 | 
         
             
                        #self.chunk_at(t)
         
     | 
| 326 | 
         | 
| 327 | 
         
            -
                    print(f"len of buffer now: {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f}",file= 
     | 
| 328 | 
         
             
                    return self.to_flush(o)
         
     | 
| 329 | 
         | 
| 330 | 
         
             
                def chunk_completed_sentence(self):
         
     | 
| 331 | 
         
             
                    if self.commited == []: return
         
     | 
| 332 | 
         
            -
                    print(self.commited,file= 
     | 
| 333 | 
         
             
                    sents = self.words_to_sentences(self.commited)
         
     | 
| 334 | 
         
             
                    for s in sents:
         
     | 
| 335 | 
         
            -
                        print("\t\tSENT:",s,file= 
     | 
| 336 | 
         
             
                    if len(sents) < 2:
         
     | 
| 337 | 
         
             
                        return
         
     | 
| 338 | 
         
             
                    while len(sents) > 2:
         
     | 
| 
         @@ -340,7 +335,7 @@ class OnlineASRProcessor: 
     | 
|
| 340 | 
         
             
                    # we will continue with audio processing at this timestamp
         
     | 
| 341 | 
         
             
                    chunk_at = sents[-2][1]
         
     | 
| 342 | 
         | 
| 343 | 
         
            -
                    print(f"--- sentence chunked at {chunk_at:2.2f}",file= 
     | 
| 344 | 
         
             
                    self.chunk_at(chunk_at)
         
     | 
| 345 | 
         | 
| 346 | 
         
             
                def chunk_completed_segment(self, res):
         
     | 
| 
         @@ -357,12 +352,12 @@ class OnlineASRProcessor: 
     | 
|
| 357 | 
         
             
                            ends.pop(-1)
         
     | 
| 358 | 
         
             
                            e = ends[-2]+self.buffer_time_offset
         
     | 
| 359 | 
         
             
                        if e <= t:
         
     | 
| 360 | 
         
            -
                            print(f"--- segment chunked at {e:2.2f}",file= 
     | 
| 361 | 
         
             
                            self.chunk_at(e)
         
     | 
| 362 | 
         
             
                        else:
         
     | 
| 363 | 
         
            -
                            print(f"--- last segment not within commited area",file= 
     | 
| 364 | 
         
             
                    else:
         
     | 
| 365 | 
         
            -
                        print(f"--- not enough segments to chunk",file= 
     | 
| 366 | 
         | 
| 367 | 
         | 
| 368 | 
         | 
| 
         @@ -408,7 +403,7 @@ class OnlineASRProcessor: 
     | 
|
| 408 | 
         
             
                    """
         
     | 
| 409 | 
         
             
                    o = self.transcript_buffer.complete()
         
     | 
| 410 | 
         
             
                    f = self.to_flush(o)
         
     | 
| 411 | 
         
            -
                    print("last, noncommited:",f,file= 
     | 
| 412 | 
         
             
                    return f
         
     | 
| 413 | 
         | 
| 414 | 
         | 
| 
         @@ -473,15 +468,10 @@ if __name__ == "__main__": 
     | 
|
| 473 | 
         | 
| 474 | 
         
             
                t = time.time()
         
     | 
| 475 | 
         
             
                print(f"Loading Whisper {size} model for {language}...",file=sys.stderr,end=" ",flush=True)
         
     | 
| 476 | 
         
            -
                #asr = WhisperASR(lan=language, modelsize=size)
         
     | 
| 477 | 
         | 
| 478 | 
         
             
                if args.backend == "faster-whisper":
         
     | 
| 479 | 
         
            -
                    #from faster_whisper import WhisperModel
         
     | 
| 480 | 
         
             
                    asr_cls = FasterWhisperASR
         
     | 
| 481 | 
         
             
                else:
         
     | 
| 482 | 
         
            -
                    #import whisper
         
     | 
| 483 | 
         
            -
                    #import whisper_timestamped
         
     | 
| 484 | 
         
            -
                #    from whisper_timestamped_model import WhisperTimestampedASR
         
     | 
| 485 | 
         
             
                    asr_cls = WhisperTimestampedASR
         
     | 
| 486 | 
         | 
| 487 | 
         
             
                asr = asr_cls(modelsize=size, lan=language, cache_dir=args.model_cache_dir, model_dir=args.model_dir)
         
     | 
| 
         | 
|
| 46 | 
         
             
                    raise NotImplemented("must be implemented in the child class")
         
     | 
| 47 | 
         | 
| 48 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 49 | 
         
             
            class WhisperTimestampedASR(ASRBase):
         
     | 
| 50 | 
         
             
                """Uses whisper_timestamped library as the backend. Initially, we tested the code on this backend. It worked, but slower than faster-whisper.
         
     | 
| 51 | 
         
             
                On the other hand, the installation for GPU could be easier.
         
     | 
| 
         | 
|
| 60 | 
         | 
| 61 | 
         
             
                def load_model(self, modelsize=None, cache_dir=None, model_dir=None):
         
     | 
| 62 | 
         
             
                    if model_dir is not None:
         
     | 
| 63 | 
         
            +
                        print("ignoring model_dir, not implemented",file=self.output)
         
     | 
| 64 | 
         
             
                    return whisper.load_model(modelsize, download_root=cache_dir)
         
     | 
| 65 | 
         | 
| 66 | 
         
             
                def transcribe(self, audio, init_prompt=""):
         
     | 
| 
         | 
|
| 85 | 
         | 
| 86 | 
         
             
            class FasterWhisperASR(ASRBase):
         
     | 
| 87 | 
         
             
                """Uses faster-whisper library as the backend. Works much faster, appx 4-times (in offline mode). For GPU, it requires installation with a specific CUDNN version.
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 88 | 
         
             
                """
         
     | 
| 89 | 
         | 
| 90 | 
         
             
                sep = ""
         
     | 
| 
         | 
|
| 94 | 
         
             
                    import faster_whisper
         
     | 
| 95 | 
         | 
| 96 | 
         
             
                def load_model(self, modelsize=None, cache_dir=None, model_dir=None):
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 97 | 
         
             
                    if model_dir is not None:
         
     | 
| 98 | 
         
            +
                        print(f"Loading whisper model from model_dir {model_dir}. modelsize and cache_dir parameters are not used.",file=self.output)
         
     | 
| 99 | 
         
             
                        model_size_or_path = model_dir
         
     | 
| 100 | 
         
             
                    elif modelsize is not None:
         
     | 
| 101 | 
         
             
                        model_size_or_path = modelsize
         
     | 
| 
         | 
|
| 143 | 
         | 
| 144 | 
         
             
            class HypothesisBuffer:
         
     | 
| 145 | 
         | 
| 146 | 
         
            +
                def __init__(self, output=sys.stderr):
         
     | 
| 147 | 
         
            +
                    """output: where to store the log. Leave it unchanged to print to terminal."""
         
     | 
| 148 | 
         
             
                    self.commited_in_buffer = []
         
     | 
| 149 | 
         
             
                    self.buffer = []
         
     | 
| 150 | 
         
             
                    self.new = []
         
     | 
| 
         | 
|
| 152 | 
         
             
                    self.last_commited_time = 0
         
     | 
| 153 | 
         
             
                    self.last_commited_word = None
         
     | 
| 154 | 
         | 
| 155 | 
         
            +
                    self.output = output
         
     | 
| 156 | 
         
            +
             
     | 
| 157 | 
         
             
                def insert(self, new, offset):
         
     | 
| 158 | 
         
             
                    # compare self.commited_in_buffer and new. It inserts only the words in new that extend the commited_in_buffer, it means they are roughly behind last_commited_time and new in content
         
     | 
| 159 | 
         
             
                    # the new tail is added to self.new
         
     | 
| 
         | 
|
| 172 | 
         
             
                                    c = " ".join([self.commited_in_buffer[-j][2] for j in range(1,i+1)][::-1])
         
     | 
| 173 | 
         
             
                                    tail = " ".join(self.new[j-1][2] for j in range(1,i+1))
         
     | 
| 174 | 
         
             
                                    if c == tail:
         
     | 
| 175 | 
         
            +
                                        print("removing last",i,"words:",file=self.output)
         
     | 
| 176 | 
         
             
                                        for j in range(i):
         
     | 
| 177 | 
         
            +
                                            print("\t",self.new.pop(0),file=self.output)
         
     | 
| 178 | 
         
             
                                        break
         
     | 
| 179 | 
         | 
| 180 | 
         
             
                def flush(self):
         
     | 
| 
         | 
|
| 211 | 
         | 
| 212 | 
         
             
                SAMPLING_RATE = 16000
         
     | 
| 213 | 
         | 
| 214 | 
         
            +
                def __init__(self, asr, tokenizer, output=sys.stderr):
         
     | 
| 215 | 
         
             
                    """asr: WhisperASR object
         
     | 
| 216 | 
         
             
                    tokenizer: sentence tokenizer object for the target language. Must have a method *split* that behaves like the one of MosesTokenizer.
         
     | 
| 217 | 
         
            +
                    output: where to store the log. Leave it unchanged to print to terminal.
         
     | 
| 218 | 
         
             
                    """
         
     | 
| 219 | 
         
             
                    self.asr = asr
         
     | 
| 220 | 
         
             
                    self.tokenizer = tokenizer
         
     | 
| 221 | 
         
            +
                    self.output = output
         
     | 
| 222 | 
         | 
| 223 | 
         
             
                    self.init()
         
     | 
| 224 | 
         | 
| 
         | 
|
| 227 | 
         
             
                    self.audio_buffer = np.array([],dtype=np.float32)
         
     | 
| 228 | 
         
             
                    self.buffer_time_offset = 0
         
     | 
| 229 | 
         | 
| 230 | 
         
            +
                    self.transcript_buffer = HypothesisBuffer(output=self.output)
         
     | 
| 231 | 
         
             
                    self.commited = []
         
     | 
| 232 | 
         
             
                    self.last_chunked_at = 0
         
     | 
| 233 | 
         | 
| 
         | 
|
| 258 | 
         
             
                def process_iter(self):
         
     | 
| 259 | 
         
             
                    """Runs on the current audio buffer.
         
     | 
| 260 | 
         
             
                    Returns: a tuple (beg_timestamp, end_timestamp, "text"), or (None, None, ""). 
         
     | 
| 261 | 
         
            +
                    The non-emty text is confirmed (committed) partial transcript.
         
     | 
| 262 | 
         
             
                    """
         
     | 
| 263 | 
         | 
| 264 | 
         
             
                    prompt, non_prompt = self.prompt()
         
     | 
| 265 | 
         
            +
                    print("PROMPT:", prompt, file=self.output)
         
     | 
| 266 | 
         
            +
                    print("CONTEXT:", non_prompt, file=self.output)
         
     | 
| 267 | 
         
            +
                    print(f"transcribing {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f} seconds from {self.buffer_time_offset:2.2f}",file=self.output)
         
     | 
| 268 | 
         
             
                    res = self.asr.transcribe(self.audio_buffer, init_prompt=prompt)
         
     | 
| 269 | 
         | 
| 270 | 
         
             
                    # transform to [(beg,end,"word1"), ...]
         
     | 
| 
         | 
|
| 273 | 
         
             
                    self.transcript_buffer.insert(tsw, self.buffer_time_offset)
         
     | 
| 274 | 
         
             
                    o = self.transcript_buffer.flush()
         
     | 
| 275 | 
         
             
                    self.commited.extend(o)
         
     | 
| 276 | 
         
            +
                    print(">>>>COMPLETE NOW:",self.to_flush(o),file=self.output,flush=True)
         
     | 
| 277 | 
         
            +
                    print("INCOMPLETE:",self.to_flush(self.transcript_buffer.complete()),file=self.output,flush=True)
         
     | 
| 278 | 
         | 
| 279 | 
         
             
                    # there is a newly confirmed text
         
     | 
| 280 | 
         
             
                    if o:
         
     | 
| 
         | 
|
| 293 | 
         
             
            #        elif self.transcript_buffer.complete():
         
     | 
| 294 | 
         
             
            #            self.silence_iters = 0
         
     | 
| 295 | 
         
             
            #        elif not self.transcript_buffer.complete():
         
     | 
| 296 | 
         
            +
            #        #    print("NOT COMPLETE:",to_flush(self.transcript_buffer.complete()),file=self.output,flush=True)
         
     | 
| 297 | 
         
             
            #            self.silence_iters += 1
         
     | 
| 298 | 
         
             
            #            if self.silence_iters >= 3:
         
     | 
| 299 | 
         
             
            #                n = self.last_chunked_at
         
     | 
| 300 | 
         
             
            ##                self.chunk_completed_sentence()
         
     | 
| 301 | 
         
             
            ##                if n == self.last_chunked_at:
         
     | 
| 302 | 
         
             
            #                self.chunk_at(self.last_chunked_at+self.chunk)
         
     | 
| 303 | 
         
            +
            #                print(f"\tCHUNK: 3-times silence! chunk_at {n}+{self.chunk}",file=self.output)
         
     | 
| 304 | 
         
             
            ##                self.silence_iters = 0
         
     | 
| 305 | 
         | 
| 306 | 
         | 
| 
         | 
|
| 316 | 
         
             
                        #while k>0 and self.commited[k][1] > l:
         
     | 
| 317 | 
         
             
                        #    k -= 1
         
     | 
| 318 | 
         
             
                        #t = self.commited[k][1] 
         
     | 
| 319 | 
         
            +
                        print(f"chunking because of len",file=self.output)
         
     | 
| 320 | 
         
             
                        #self.chunk_at(t)
         
     | 
| 321 | 
         | 
| 322 | 
         
            +
                    print(f"len of buffer now: {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f}",file=self.output)
         
     | 
| 323 | 
         
             
                    return self.to_flush(o)
         
     | 
| 324 | 
         | 
| 325 | 
         
             
                def chunk_completed_sentence(self):
         
     | 
| 326 | 
         
             
                    if self.commited == []: return
         
     | 
| 327 | 
         
            +
                    print(self.commited,file=self.output)
         
     | 
| 328 | 
         
             
                    sents = self.words_to_sentences(self.commited)
         
     | 
| 329 | 
         
             
                    for s in sents:
         
     | 
| 330 | 
         
            +
                        print("\t\tSENT:",s,file=self.output)
         
     | 
| 331 | 
         
             
                    if len(sents) < 2:
         
     | 
| 332 | 
         
             
                        return
         
     | 
| 333 | 
         
             
                    while len(sents) > 2:
         
     | 
| 
         | 
|
| 335 | 
         
             
                    # we will continue with audio processing at this timestamp
         
     | 
| 336 | 
         
             
                    chunk_at = sents[-2][1]
         
     | 
| 337 | 
         | 
| 338 | 
         
            +
                    print(f"--- sentence chunked at {chunk_at:2.2f}",file=self.output)
         
     | 
| 339 | 
         
             
                    self.chunk_at(chunk_at)
         
     | 
| 340 | 
         | 
| 341 | 
         
             
                def chunk_completed_segment(self, res):
         
     | 
| 
         | 
|
| 352 | 
         
             
                            ends.pop(-1)
         
     | 
| 353 | 
         
             
                            e = ends[-2]+self.buffer_time_offset
         
     | 
| 354 | 
         
             
                        if e <= t:
         
     | 
| 355 | 
         
            +
                            print(f"--- segment chunked at {e:2.2f}",file=self.output)
         
     | 
| 356 | 
         
             
                            self.chunk_at(e)
         
     | 
| 357 | 
         
             
                        else:
         
     | 
| 358 | 
         
            +
                            print(f"--- last segment not within commited area",file=self.output)
         
     | 
| 359 | 
         
             
                    else:
         
     | 
| 360 | 
         
            +
                        print(f"--- not enough segments to chunk",file=self.output)
         
     | 
| 361 | 
         | 
| 362 | 
         | 
| 363 | 
         | 
| 
         | 
|
| 403 | 
         
             
                    """
         
     | 
| 404 | 
         
             
                    o = self.transcript_buffer.complete()
         
     | 
| 405 | 
         
             
                    f = self.to_flush(o)
         
     | 
| 406 | 
         
            +
                    print("last, noncommited:",f,file=self.output)
         
     | 
| 407 | 
         
             
                    return f
         
     | 
| 408 | 
         | 
| 409 | 
         | 
| 
         | 
|
| 468 | 
         | 
| 469 | 
         
             
                t = time.time()
         
     | 
| 470 | 
         
             
                print(f"Loading Whisper {size} model for {language}...",file=sys.stderr,end=" ",flush=True)
         
     | 
| 
         | 
|
| 471 | 
         | 
| 472 | 
         
             
                if args.backend == "faster-whisper":
         
     | 
| 
         | 
|
| 473 | 
         
             
                    asr_cls = FasterWhisperASR
         
     | 
| 474 | 
         
             
                else:
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 475 | 
         
             
                    asr_cls = WhisperTimestampedASR
         
     | 
| 476 | 
         | 
| 477 | 
         
             
                asr = asr_cls(modelsize=size, lan=language, cache_dir=args.model_cache_dir, model_dir=args.model_dir)
         
     |