burtenshaw commited on
Commit
3372a22
·
1 Parent(s): 348c664

move max_new_tokens to model definition

Browse files
Files changed (1) hide show
  1. app/app.py +3 -2
app/app.py CHANGED
@@ -58,12 +58,13 @@ def create_inference_client(
58
  """
59
  if ZERO_GPU:
60
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
61
- model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, load_in_8bit=True)
 
 
62
  return pipeline(
63
  "text-generation",
64
  model=model,
65
  tokenizer=tokenizer,
66
- model_kwargs={"max_new_tokens": 2000},
67
  )
68
  else:
69
  return InferenceClient(
 
58
  """
59
  if ZERO_GPU:
60
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
61
+ model = AutoModelForCausalLM.from_pretrained(
62
+ BASE_MODEL, load_in_8bit=True, max_new_tokens=2000
63
+ )
64
  return pipeline(
65
  "text-generation",
66
  model=model,
67
  tokenizer=tokenizer,
 
68
  )
69
  else:
70
  return InferenceClient(