Spaces:
Sleeping
Sleeping
Commit
·
bdaea1d
1
Parent(s):
ab6a69a
changed device to cpu
Browse files
app.py
CHANGED
@@ -37,8 +37,8 @@ os.makedirs("data", exist_ok=True)
|
|
37 |
# SLM: Microsoft PHI-2 model is loaded
|
38 |
# It does have higher memory and compute requirements compared to TinyLlama and Falcon
|
39 |
# But it gives the best results among the three
|
40 |
-
|
41 |
-
DEVICE = "cuda" # or cuda
|
42 |
# MODEL_NAME = "TinyLlama/TinyLlama_v1.1"
|
43 |
# MODEL_NAME = "tiiuae/falcon-rw-1b"
|
44 |
MODEL_NAME = "microsoft/phi-2"
|
@@ -55,7 +55,7 @@ if tokenizer.pad_token is None:
|
|
55 |
# Since the model is to be hosted on a cpu instance, we use float32
|
56 |
# For GPU, we can use float16 or bfloat16
|
57 |
model = AutoModelForCausalLM.from_pretrained(
|
58 |
-
MODEL_NAME, torch_dtype=torch.
|
59 |
).to(DEVICE)
|
60 |
model.eval()
|
61 |
# model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
|
|
|
37 |
# SLM: Microsoft PHI-2 model is loaded
|
38 |
# It does have higher memory and compute requirements compared to TinyLlama and Falcon
|
39 |
# But it gives the best results among the three
|
40 |
+
DEVICE = "cpu" # or cuda
|
41 |
+
# DEVICE = "cuda" # or cuda
|
42 |
# MODEL_NAME = "TinyLlama/TinyLlama_v1.1"
|
43 |
# MODEL_NAME = "tiiuae/falcon-rw-1b"
|
44 |
MODEL_NAME = "microsoft/phi-2"
|
|
|
55 |
# Since the model is to be hosted on a cpu instance, we use float32
|
56 |
# For GPU, we can use float16 or bfloat16
|
57 |
model = AutoModelForCausalLM.from_pretrained(
|
58 |
+
MODEL_NAME, torch_dtype=torch.float32, trust_remote_code=True
|
59 |
).to(DEVICE)
|
60 |
model.eval()
|
61 |
# model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
|