entai2965 commited on
Commit
536bfe4
·
verified ·
1 Parent(s): db01991

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +34 -27
README.md CHANGED
@@ -124,32 +124,6 @@ To force the target language id as the first generated token, pass the `forced_b
124
 
125
  To install `sentencepiece` run `pip install sentencepiece`
126
 
127
-
128
- ```python
129
- from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
130
-
131
- hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
132
- chinese_text = "生活就像一盒巧克力。"
133
-
134
- model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
135
- tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
136
-
137
- # translate Hindi to French
138
- tokenizer.src_lang = "hi"
139
- encoded_hi = tokenizer(hi_text, return_tensors="pt")
140
- generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
141
- tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
142
- # => "La vie est comme une boîte de chocolat."
143
-
144
- # translate Chinese to English
145
- tokenizer.src_lang = "zh"
146
- encoded_zh = tokenizer(chinese_text, return_tensors="pt")
147
- generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
148
- tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
149
- # => "Life is like a box of chocolate."
150
- ```
151
-
152
-
153
  See the [model hub](https://huggingface.co/models?filter=m2m_100) to look for more fine-tuned versions.
154
 
155
 
@@ -167,4 +141,37 @@ Afrikaans (af), Amharic (am), Arabic (ar), Asturian (ast), Azerbaijani (az), Ba
167
  archivePrefix={arXiv},
168
  primaryClass={cs.CL}
169
  }
170
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
  To install `sentencepiece` run `pip install sentencepiece`
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  See the [model hub](https://huggingface.co/models?filter=m2m_100) to look for more fine-tuned versions.
128
 
129
 
 
141
  archivePrefix={arXiv},
142
  primaryClass={cs.CL}
143
  }
144
+ ```
145
+
146
+ ## How to download this model using python
147
+ - Install Python https://www.python.org/downloads/
148
+ - `cmd`
149
+ - `python --version`
150
+ - `python -m pip install huggingface_hub`
151
+ - `python`
152
+
153
+ ```
154
+ import huggingface_hub
155
+ huggingface_hub.download_snapshot('entai2965/m2m100-418M-ctranslate2',local_dir='m2m100-418M-ctranslate2')
156
+ ```
157
+
158
+ ## How to run this model
159
+ - https://opennmt.net/CTranslate2/guides/transformers.html#m2m-100
160
+ - `cmd`
161
+ - `python -m pip install ctranslate2 transformers`
162
+ - `python`
163
+ ```
164
+ import ctranslate2
165
+ import transformers
166
+
167
+ translator = ctranslate2.Translator("m2m100-418M-ctranslate2", device="cpu")
168
+ tokenizer = transformers.AutoTokenizer.from_pretrained("m2m100-418M-ctranslate2",clean_up_tokenization_spaces=True)
169
+ tokenizer.src_lang = "en"
170
+
171
+ source = tokenizer.convert_ids_to_tokens(tokenizer.encode("Hello world!"))
172
+ target_prefix = [tokenizer.lang_code_to_token["de"]]
173
+ results = translator.translate_batch([source], target_prefix=[target_prefix])
174
+ target = results[0].hypotheses[0][1:]
175
+
176
+ print(tokenizer.decode(tokenizer.convert_tokens_to_ids(target)))
177
+ ```