Avihu commited on
Commit
f620137
·
verified ·
1 Parent(s): ef32982

Update README.md

Browse files

Simplify the example code

Files changed (1) hide show
  1. README.md +16 -45
README.md CHANGED
@@ -62,61 +62,32 @@ from huggingface_hub import hf_hub_download
62
  device = "cuda" if torch.cuda.is_available() else "cpu"
63
 
64
  model_name = "ibm-granite/granite-speech-3.3-8b"
65
- speech_granite_processor = AutoProcessor.from_pretrained(
66
- model_name)
67
- tokenizer = speech_granite_processor.tokenizer
68
- speech_granite = AutoModelForSpeechSeq2Seq.from_pretrained(
69
- model_name).to(device)
70
-
71
- # prepare speech and text prompt, using the appropriate prompt template
72
-
73
- audio_path = hf_hub_download(repo_id=model_name, filename='10226_10111_000000.wav')
74
  wav, sr = torchaudio.load(audio_path, normalize=True)
75
- assert wav.shape[0] == 1 and sr == 16000 # mono, 16khz
76
 
77
  # create text prompt
 
 
78
  chat = [
79
- {
80
- "role": "system",
81
- "content": "Knowledge Cutoff Date: April 2024.\nToday's Date: April 9, 2025.\nYou are Granite, developed by IBM. You are a helpful AI assistant",
82
- },
83
- {
84
- "role": "user",
85
- "content": "<|audio|>can you transcribe the speech into a written format?",
86
- }
87
  ]
 
88
 
89
- text = tokenizer.apply_chat_template(
90
- chat, tokenize=False, add_generation_prompt=True
91
- )
92
-
93
- # compute audio embeddings
94
- model_inputs = speech_granite_processor(
95
- text,
96
- wav,
97
- device=device, # Computation device; returned tensors are put on CPU
98
- return_tensors="pt",
99
- ).to(device)
100
-
101
- model_outputs = speech_granite.generate(
102
- **model_inputs,
103
- max_new_tokens=200,
104
- num_beams=4,
105
- do_sample=False,
106
- min_length=1,
107
- top_p=1.0,
108
- repetition_penalty=1.0,
109
- length_penalty=1.0,
110
- temperature=1.0,
111
- bos_token_id=tokenizer.bos_token_id,
112
- eos_token_id=tokenizer.eos_token_id,
113
- pad_token_id=tokenizer.pad_token_id,
114
- )
115
 
116
  # Transformers includes the input IDs in the response.
117
  num_input_tokens = model_inputs["input_ids"].shape[-1]
118
  new_tokens = torch.unsqueeze(model_outputs[0, num_input_tokens:], dim=0)
119
-
120
  output_text = tokenizer.batch_decode(
121
  new_tokens, add_special_tokens=False, skip_special_tokens=True
122
  )
 
62
  device = "cuda" if torch.cuda.is_available() else "cpu"
63
 
64
  model_name = "ibm-granite/granite-speech-3.3-8b"
65
+ processor = AutoProcessor.from_pretrained(model_name)
66
+ tokenizer = processor.tokenizer
67
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
68
+ model_name, device_map=device, torch_dtype=torch.bfloat16
69
+ )
70
+ # load audio
71
+ audio_path = hf_hub_download(repo_id=model_name, filename="10226_10111_000000.wav")
 
 
72
  wav, sr = torchaudio.load(audio_path, normalize=True)
73
+ assert wav.shape[0] == 1 and sr == 16000 # mono, 16khz
74
 
75
  # create text prompt
76
+ system_prompt = "Knowledge Cutoff Date: April 2024.\nToday's Date: April 9, 2025.\nYou are Granite, developed by IBM. You are a helpful AI assistant"
77
+ user_prompt = "<|audio|>can you transcribe the speech into a written format?"
78
  chat = [
79
+ dict(role="system", content=system_prompt),
80
+ dict(role="user", content=user_prompt),
 
 
 
 
 
 
81
  ]
82
+ prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
83
 
84
+ # run the processor+model
85
+ model_inputs = processor(prompt, wav, device=device, return_tensors="pt").to(device)
86
+ model_outputs = model.generate(**model_inputs, max_new_tokens=200, do_sample=False, num_beams=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  # Transformers includes the input IDs in the response.
89
  num_input_tokens = model_inputs["input_ids"].shape[-1]
90
  new_tokens = torch.unsqueeze(model_outputs[0, num_input_tokens:], dim=0)
 
91
  output_text = tokenizer.batch_decode(
92
  new_tokens, add_special_tokens=False, skip_special_tokens=True
93
  )