rodrigomasini commited on
Commit
dacf75f
·
1 Parent(s): 51717bc

Update app_v4.py

Browse files
Files changed (1) hide show
  1. app_v4.py +4 -2
app_v4.py CHANGED
@@ -64,7 +64,9 @@ if model_loaded:
64
  st.write(f"GPU Memory Info after loading the model: {gpu_memory_after}")
65
 
66
  # User input for the model
67
- user_input = st.text_input("Input a phrase")
 
 
68
 
69
  # Generate button
70
  if st.button("Generate the prompt"):
@@ -74,7 +76,7 @@ if model_loaded:
74
  inputs = inputs.to(device) # Move inputs to the same device as model
75
  # Generate text using torch.inference_mode for better performance during inference
76
  with torch.inference_mode():
77
- output = model.generate(**inputs, max_new_tokens=50) # Adjust max_new_tokens if needed
78
 
79
  # Cut the tokens at the input length to display only the generated text
80
  output_ids_cut = output[:, inputs["input_ids"].shape[1]:]
 
64
  st.write(f"GPU Memory Info after loading the model: {gpu_memory_after}")
65
 
66
  # User input for the model
67
+ col1, col2 =st.columns(2)
68
+ user_input = col1.st.text_input("Input a phrase")
69
+ max_token = col2.st.number_input(label="Select maxnumber of generated tokens", min_value=1, max_value=1024, value=350, step = 5)
70
 
71
  # Generate button
72
  if st.button("Generate the prompt"):
 
76
  inputs = inputs.to(device) # Move inputs to the same device as model
77
  # Generate text using torch.inference_mode for better performance during inference
78
  with torch.inference_mode():
79
+ output = model.generate(**inputs, max_new_tokens=max_token)
80
 
81
  # Cut the tokens at the input length to display only the generated text
82
  output_ids_cut = output[:, inputs["input_ids"].shape[1]:]