Spaces:

jesseplusplus
/

easy-translate

Runtime error

Iker commited on Apr 26, 2022

Commit

749ff6d

1 Parent(s): dacb273

Fix bugs

Files changed (1) hide show

translate.py CHANGED Viewed

@@ -54,26 +54,35 @@ def main(
     if tensorrt:
         import torch_tensorrt
         traced_model = torch.jit.trace(
-            model, [torch.randn((batch_size, max_length)).to("cuda")]
         )
         model = torch_tensorrt.compile(
             traced_model,
-            inputs=[torch_tensorrt.Input((batch_size, max_length), dtype=dtype)],
             enabled_precisions={dtype},
         )
     else:
         if torch.cuda.is_available():
-            model.to("cuda", dtype=dtype)
         else:
-            model.to("cpu", dtype=dtype)
             print("CUDA not available. Using CPU. This will be slow.")
     with tqdm(total=total_lines, desc="Dataset translation") as pbar, open(
         output_path, "w+", encoding="utf-8"
     ) as output_file:
         with torch.no_grad():
             for batch in data_loader:
                 generated_tokens = model.generate(
                     **batch, forced_bos_token_id=lang_code_to_idx
                 )

     if tensorrt:
         import torch_tensorrt
+        device = "cuda"
+        model.to(device)
         traced_model = torch.jit.trace(
+            model, [torch.randn((batch_size, max_length)).to("cuda", dtype=torch.long)]
         )
         model = torch_tensorrt.compile(
             traced_model,
+            inputs=[torch_tensorrt.Input((batch_size, max_length), dtype=torch.long)],
             enabled_precisions={dtype},
         )
     else:
         if torch.cuda.is_available():
+            device = "cuda"
         else:
+            device = "cpu"
             print("CUDA not available. Using CPU. This will be slow.")
+        model.to(device, dtype=dtype)
     with tqdm(total=total_lines, desc="Dataset translation") as pbar, open(
         output_path, "w+", encoding="utf-8"
     ) as output_file:
         with torch.no_grad():
             for batch in data_loader:
+                batch["input_ids"] = batch["input_ids"].to(device)
+                batch["attention_mask"] = batch["attention_mask"].to(device)
                 generated_tokens = model.generate(
                     **batch, forced_bos_token_id=lang_code_to_idx
                 )