DeepMostInnovations commited on
Commit
f40b58a
·
verified ·
1 Parent(s): 7e8e702

Add inference script

Browse files
Files changed (1) hide show
  1. hindi_embeddings.py +3 -9
hindi_embeddings.py CHANGED
@@ -384,24 +384,18 @@ class SentenceEmbeddingModel(nn.Module):
384
  return pooled_output
385
 
386
  class HindiEmbedder:
387
- def __init__(self, model_path="/home/ubuntu/output/hindi-embeddings-custom-tokenizer/final", tokenizer_path=None):
388
  """
389
  Initialize the Hindi sentence embedder.
390
 
391
  Args:
392
  model_path: Path to the model directory
393
- tokenizer_path: Optional path to tokenizer. If None, will look in the model directory.
394
  """
395
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
396
  print(f"Using device: {self.device}")
397
 
398
- # Load tokenizer
399
- if tokenizer_path is None:
400
- # Try standard location in model directory
401
- tokenizer_path = os.path.join(model_path, "tokenizer.model")
402
- if not os.path.exists(tokenizer_path):
403
- # Try original location
404
- tokenizer_path = "/home/ubuntu/hindi_tokenizer/tokenizer.model"
405
 
406
  if not os.path.exists(tokenizer_path):
407
  raise FileNotFoundError(f"Could not find tokenizer at {tokenizer_path}")
 
384
  return pooled_output
385
 
386
  class HindiEmbedder:
387
+ def __init__(self, model_path="/home/ubuntu/output/hindi-embeddings-custom-tokenizer/final"):
388
  """
389
  Initialize the Hindi sentence embedder.
390
 
391
  Args:
392
  model_path: Path to the model directory
 
393
  """
394
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
395
  print(f"Using device: {self.device}")
396
 
397
+ # Load tokenizer - look for it in the model directory
398
+ tokenizer_path = os.path.join(model_path, "tokenizer.model")
 
 
 
 
 
399
 
400
  if not os.path.exists(tokenizer_path):
401
  raise FileNotFoundError(f"Could not find tokenizer at {tokenizer_path}")