cahya commited on
Commit
e10b1cf
1 Parent(s): 126d43b

add inference code

Browse files
Files changed (1) hide show
  1. README.md +34 -0
README.md CHANGED
@@ -52,3 +52,37 @@ This is Bloomz-7b1-mt model fine-tuned with multilingual instruction dataset and
52
  - Thai
53
  - Vietnamese
54
  - Chinese
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  - Thai
53
  - Vietnamese
54
  - Chinese
55
+
56
+ ## Usage
57
+
58
+ Following is the code to do the inference using this model:
59
+ ```
60
+ import torch
61
+ from peft import PeftModel, PeftConfig
62
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
63
+
64
+ peft_model_id = "cahya/bloomz-7b1-instruct"
65
+ config = PeftConfig.from_pretrained(peft_model_id)
66
+
67
+ model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True,
68
+ load_in_8bit=True, device_map='auto')
69
+ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
70
+
71
+ # Load the Lora model
72
+ model = PeftModel.from_pretrained(model, peft_model_id)
73
+
74
+ batch = tokenizer("User: How old is the universe?\nAssistant: ", return_tensors='pt').to(0)
75
+
76
+
77
+ with torch.cuda.amp.autocast():
78
+ output_tokens = model.generate(**batch, max_new_tokens=200,
79
+ min_length=50,
80
+ do_sample=True,
81
+ top_k=40,
82
+ top_p=0.9,
83
+ temperature=0.2,
84
+ repetition_penalty=1.2,
85
+ num_return_sequences=1)
86
+
87
+ print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))
88
+ ```