Spaces:
Running
on
Zero
Running
on
Zero
burtenshaw
commited on
Commit
·
3372a22
1
Parent(s):
348c664
move max_new_tokens to model definition
Browse files- app/app.py +3 -2
app/app.py
CHANGED
@@ -58,12 +58,13 @@ def create_inference_client(
|
|
58 |
"""
|
59 |
if ZERO_GPU:
|
60 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
61 |
-
model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
|
62 |
return pipeline(
|
63 |
"text-generation",
|
64 |
model=model,
|
65 |
tokenizer=tokenizer,
|
66 |
-
model_kwargs={"max_new_tokens": 2000},
|
67 |
)
|
68 |
else:
|
69 |
return InferenceClient(
|
|
|
58 |
"""
|
59 |
if ZERO_GPU:
|
60 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
61 |
+
model = AutoModelForCausalLM.from_pretrained(
|
62 |
+
BASE_MODEL, load_in_8bit=True, max_new_tokens=2000
|
63 |
+
)
|
64 |
return pipeline(
|
65 |
"text-generation",
|
66 |
model=model,
|
67 |
tokenizer=tokenizer,
|
|
|
68 |
)
|
69 |
else:
|
70 |
return InferenceClient(
|