kimhyunwoo commited on
Commit
00c54fb
·
verified ·
1 Parent(s): 82dec9c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -12
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 
3
 
4
  # --- Model Loading (Do this only once, outside the function) ---
5
 
@@ -7,15 +8,13 @@ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
7
  use_pipeline = True # Set to False to use the manual method
8
 
9
  if use_pipeline:
10
- pipe = pipeline("text-generation", model="kakaocorp/kanana-nano-2.1b-base")
11
  else:
12
  # Option 2: Manual Tokenizer and Model (More Control)
13
  tokenizer = AutoTokenizer.from_pretrained("kakaocorp/kanana-nano-2.1b-base")
14
  model = AutoModelForCausalLM.from_pretrained("kakaocorp/kanana-nano-2.1b-base")
15
- # Move model to GPU if available
16
- if model.device.type != 'cuda' and torch.cuda.is_available():
17
- model = model.to("cuda")
18
- print("Model moved to CUDA")
19
 
20
  # --- Generation Function ---
21
 
@@ -45,9 +44,7 @@ def generate_text(prompt, max_length=50, temperature=1.0, top_k=50, top_p=1.0, n
45
  else: # Manual method
46
  try:
47
  inputs = tokenizer(prompt, return_tensors="pt")
48
- # Move input tensors to the same device as the model
49
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
50
-
51
  outputs = model.generate(
52
  **inputs,
53
  max_length=max_length,
@@ -59,12 +56,12 @@ def generate_text(prompt, max_length=50, temperature=1.0, top_k=50, top_p=1.0, n
59
  pad_token_id=tokenizer.eos_token_id, # Ensure padding is correct
60
  do_sample=True # Ensure sampling happens.
61
  )
62
-
63
  generated_texts = []
64
  for i in range(outputs.shape[0]):
65
  generated_text = tokenizer.decode(outputs[i], skip_special_tokens=True)
66
  generated_texts.append(generated_text)
67
-
68
  return "\n\n".join(generated_texts)
69
  except Exception as e:
70
  return f"Error during generation: {e}"
@@ -90,7 +87,7 @@ with gr.Blocks() as demo:
90
  generate_button = gr.Button("Generate")
91
 
92
  with gr.Column():
93
- output_text = gr.Textbox(label="Generated Text", readonly=True)
94
 
95
  generate_button.click(
96
  generate_text,
@@ -106,4 +103,4 @@ with gr.Blocks() as demo:
106
  outputs=output_text,
107
  )
108
 
109
- demo.launch(share=True)
 
1
  import gradio as gr
2
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
3
+ import torch # Import torch
4
 
5
  # --- Model Loading (Do this only once, outside the function) ---
6
 
 
8
  use_pipeline = True # Set to False to use the manual method
9
 
10
  if use_pipeline:
11
+ pipe = pipeline("text-generation", model="kakaocorp/kanana-nano-2.1b-base", device="cpu") #Explicitly on CPU
12
  else:
13
  # Option 2: Manual Tokenizer and Model (More Control)
14
  tokenizer = AutoTokenizer.from_pretrained("kakaocorp/kanana-nano-2.1b-base")
15
  model = AutoModelForCausalLM.from_pretrained("kakaocorp/kanana-nano-2.1b-base")
16
+ # No need to move to GPU. It will default to CPU.
17
+ print("Model loaded on CPU")
 
 
18
 
19
  # --- Generation Function ---
20
 
 
44
  else: # Manual method
45
  try:
46
  inputs = tokenizer(prompt, return_tensors="pt")
47
+ # No need to move to GPU. Inputs will default to CPU.
 
 
48
  outputs = model.generate(
49
  **inputs,
50
  max_length=max_length,
 
56
  pad_token_id=tokenizer.eos_token_id, # Ensure padding is correct
57
  do_sample=True # Ensure sampling happens.
58
  )
59
+
60
  generated_texts = []
61
  for i in range(outputs.shape[0]):
62
  generated_text = tokenizer.decode(outputs[i], skip_special_tokens=True)
63
  generated_texts.append(generated_text)
64
+
65
  return "\n\n".join(generated_texts)
66
  except Exception as e:
67
  return f"Error during generation: {e}"
 
87
  generate_button = gr.Button("Generate")
88
 
89
  with gr.Column():
90
+ output_text = gr.Textbox(label="Generated Text", interactive=False) # Use interactive=False
91
 
92
  generate_button.click(
93
  generate_text,
 
103
  outputs=output_text,
104
  )
105
 
106
+ demo.launch() # Remove share=True for local testing, add it back for deployment