|  |  | 
					
						
						|  | from whisper_online import * | 
					
						
						|  |  | 
					
						
						|  | import sys | 
					
						
						|  | import argparse | 
					
						
						|  | import os | 
					
						
						|  | import logging | 
					
						
						|  | import numpy as np | 
					
						
						|  |  | 
					
						
						|  | parser = argparse.ArgumentParser() | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | parser.add_argument("--host", type=str, default='localhost') | 
					
						
						|  | parser.add_argument("--port", type=int, default=43007) | 
					
						
						|  |  | 
					
						
						|  | parser.add_argument("-l", "--log-level", dest="log_level", | 
					
						
						|  | choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], | 
					
						
						|  | help="Set the log level", | 
					
						
						|  | default='INFO') | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | add_shared_args(parser) | 
					
						
						|  | args = parser.parse_args() | 
					
						
						|  |  | 
					
						
						|  | if args.log_level: | 
					
						
						|  | logging.basicConfig(format='whisper-server-%(levelname)s: %(message)s', | 
					
						
						|  | level=getattr(logging, args.log_level)) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | SAMPLING_RATE = 16000 | 
					
						
						|  |  | 
					
						
						|  | size = args.model | 
					
						
						|  | language = args.lan | 
					
						
						|  |  | 
					
						
						|  | asr = asr_factory(args) | 
					
						
						|  |  | 
					
						
						|  | if args.task == "translate": | 
					
						
						|  | asr.set_translate_task() | 
					
						
						|  | tgt_language = "en" | 
					
						
						|  | else: | 
					
						
						|  | tgt_language = language | 
					
						
						|  |  | 
					
						
						|  | min_chunk = args.min_chunk_size | 
					
						
						|  |  | 
					
						
						|  | if args.buffer_trimming == "sentence": | 
					
						
						|  | tokenizer = create_tokenizer(tgt_language) | 
					
						
						|  | else: | 
					
						
						|  | tokenizer = None | 
					
						
						|  | online = OnlineASRProcessor(asr,tokenizer,buffer_trimming=(args.buffer_trimming, args.buffer_trimming_sec)) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | demo_audio_path = "cs-maji-2.16k.wav" | 
					
						
						|  | if os.path.exists(demo_audio_path): | 
					
						
						|  |  | 
					
						
						|  | logging.debug(f"Warming up on {demo_audio_path}") | 
					
						
						|  | a = load_audio_chunk(demo_audio_path,0,1) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | asr.transcribe(a) | 
					
						
						|  | logging.debug("Whisper is warmed up") | 
					
						
						|  | else: | 
					
						
						|  | logging.debug("Whisper is not warmed up") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | import line_packet | 
					
						
						|  | import socket | 
					
						
						|  |  | 
					
						
						|  | class Connection: | 
					
						
						|  | '''it wraps conn object''' | 
					
						
						|  | PACKET_SIZE = 65536 | 
					
						
						|  |  | 
					
						
						|  | def __init__(self, conn): | 
					
						
						|  | self.conn = conn | 
					
						
						|  | self.last_line = "" | 
					
						
						|  |  | 
					
						
						|  | self.conn.setblocking(True) | 
					
						
						|  |  | 
					
						
						|  | def send(self, line): | 
					
						
						|  | '''it doesn't send the same line twice, because it was problematic in online-text-flow-events''' | 
					
						
						|  | if line == self.last_line: | 
					
						
						|  | return | 
					
						
						|  | line_packet.send_one_line(self.conn, line) | 
					
						
						|  | self.last_line = line | 
					
						
						|  |  | 
					
						
						|  | def receive_lines(self): | 
					
						
						|  | in_line = line_packet.receive_lines(self.conn) | 
					
						
						|  | return in_line | 
					
						
						|  |  | 
					
						
						|  | def non_blocking_receive_audio(self): | 
					
						
						|  | r = self.conn.recv(self.PACKET_SIZE) | 
					
						
						|  | return r | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | import io | 
					
						
						|  | import soundfile | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | class ServerProcessor: | 
					
						
						|  |  | 
					
						
						|  | def __init__(self, c, online_asr_proc, min_chunk): | 
					
						
						|  | self.connection = c | 
					
						
						|  | self.online_asr_proc = online_asr_proc | 
					
						
						|  | self.min_chunk = min_chunk | 
					
						
						|  |  | 
					
						
						|  | self.last_end = None | 
					
						
						|  |  | 
					
						
						|  | def receive_audio_chunk(self): | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | out = [] | 
					
						
						|  | while sum(len(x) for x in out) < self.min_chunk*SAMPLING_RATE: | 
					
						
						|  | raw_bytes = self.connection.non_blocking_receive_audio() | 
					
						
						|  | if not raw_bytes: | 
					
						
						|  | break | 
					
						
						|  | sf = soundfile.SoundFile(io.BytesIO(raw_bytes), channels=1,endian="LITTLE",samplerate=SAMPLING_RATE, subtype="PCM_16",format="RAW") | 
					
						
						|  | audio, _ = librosa.load(sf,sr=SAMPLING_RATE,dtype=np.float32) | 
					
						
						|  | out.append(audio) | 
					
						
						|  | if not out: | 
					
						
						|  | return None | 
					
						
						|  | return np.concatenate(out) | 
					
						
						|  |  | 
					
						
						|  | def format_output_transcript(self,o): | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if o[0] is not None: | 
					
						
						|  | beg, end = o[0]*1000,o[1]*1000 | 
					
						
						|  | if self.last_end is not None: | 
					
						
						|  | beg = max(beg, self.last_end) | 
					
						
						|  |  | 
					
						
						|  | self.last_end = end | 
					
						
						|  | print("%1.0f %1.0f %s" % (beg,end,o[2]),flush=True,file=sys.stderr) | 
					
						
						|  | return "%1.0f %1.0f %s" % (beg,end,o[2]) | 
					
						
						|  | else: | 
					
						
						|  |  | 
					
						
						|  | return None | 
					
						
						|  |  | 
					
						
						|  | def send_result(self, o): | 
					
						
						|  | msg = self.format_output_transcript(o) | 
					
						
						|  | if msg is not None: | 
					
						
						|  | self.connection.send(msg) | 
					
						
						|  |  | 
					
						
						|  | def process(self): | 
					
						
						|  |  | 
					
						
						|  | self.online_asr_proc.init() | 
					
						
						|  | while True: | 
					
						
						|  | a = self.receive_audio_chunk() | 
					
						
						|  | if a is None: | 
					
						
						|  | break | 
					
						
						|  | self.online_asr_proc.insert_audio_chunk(a) | 
					
						
						|  | o = online.process_iter() | 
					
						
						|  | try: | 
					
						
						|  | self.send_result(o) | 
					
						
						|  | except BrokenPipeError: | 
					
						
						|  | logging.info("broken pipe -- connection closed?") | 
					
						
						|  | break | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: | 
					
						
						|  | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | 
					
						
						|  | s.bind((args.host, args.port)) | 
					
						
						|  | s.listen(1) | 
					
						
						|  | logging.info('Listening on'+str((args.host, args.port))) | 
					
						
						|  | while True: | 
					
						
						|  | conn, addr = s.accept() | 
					
						
						|  | logging.info('Connected to client on {}'.format(addr)) | 
					
						
						|  | connection = Connection(conn) | 
					
						
						|  | proc = ServerProcessor(connection, online, min_chunk) | 
					
						
						|  | proc.process() | 
					
						
						|  | conn.close() | 
					
						
						|  | logging.info('Connection to client closed') | 
					
						
						|  | logging.info('Connection closed, terminating.') | 
					
						
						|  |  |