sbicy commited on
Commit
62e3b90
·
verified ·
1 Parent(s): d507155

major code adjustments to hopefully reduce latency

Browse files
Files changed (1) hide show
  1. app.py +52 -35
app.py CHANGED
@@ -1,50 +1,65 @@
1
  import torch
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import gradio as gr
 
4
 
5
- # Set Device to CPU
6
- device = torch.device('cpu')
7
 
8
- # Load the Models
9
- # Model 1: GPT-2 Medium
10
- tokenizer1 = AutoTokenizer.from_pretrained('gpt2-medium')
11
- model1 = AutoModelForCausalLM.from_pretrained('gpt2-medium')
12
  model1.to(device)
13
 
14
- # Model 2: GPT-Neo 125M
15
- tokenizer2 = AutoTokenizer.from_pretrained('EleutherAI/gpt-neo-125M')
16
- model2 = AutoModelForCausalLM.from_pretrained('EleutherAI/gpt-neo-125M')
17
  model2.to(device)
18
 
19
- # Define Text Generation Functions
20
- def generate_text_model1(prompt, temperature, top_p):
21
- inputs = tokenizer1(prompt, return_tensors='pt').to(device)
22
  with torch.no_grad():
23
  outputs = model1.generate(
24
- **inputs,
25
- max_new_tokens=30,
 
 
26
  do_sample=True,
27
  top_k=50,
28
- top_p=top_p,
29
- temperature=temperature
30
  )
31
  text = tokenizer1.decode(outputs[0], skip_special_tokens=True)
32
  return text
33
 
34
- def generate_text_model2(prompt, temperature, top_p):
35
- inputs = tokenizer2(prompt, return_tensors='pt').to(device)
36
  with torch.no_grad():
37
  outputs = model2.generate(
38
- **inputs,
39
- max_new_tokens=30,
 
 
40
  do_sample=True,
41
  top_k=50,
42
- top_p=top_p,
43
- temperature=temperature
44
  )
45
  text = tokenizer2.decode(outputs[0], skip_special_tokens=True)
46
  return text
47
 
 
 
 
 
 
 
 
 
 
 
48
  def compare_models(prompt, temperature, top_p):
49
  output1 = generate_text_model1(prompt, temperature, top_p)
50
  output2 = generate_text_model2(prompt, temperature, top_p)
@@ -55,21 +70,23 @@ def compare_models(prompt, temperature, top_p):
55
 
56
  return output1_with_params, output2_with_params
57
 
 
 
 
 
 
 
 
 
 
58
  # Create Gradio Interface
59
  iface = gr.Interface(
60
  fn=compare_models,
61
- inputs=[
62
- gr.Textbox(lines=2, placeholder='Enter a prompt here...', label='Prompt'),
63
- gr.Slider(minimum=0.1, maximum=1.0, value=0.8, label='Temperature'),
64
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, label='Top-p')
65
- ],
66
- outputs=[
67
- gr.Markdown(label='GPT-2 Medium Output'),
68
- gr.Markdown(label='GPT-Neo 125M Output')
69
- ],
70
- title='Compare Text Generation Models with Adjustable Parameters',
71
- description='Enter a prompt and adjust the temperature and top-p parameters to see how they affect the generated text.'
72
  )
73
 
74
- if __name__ == "__main__":
75
- iface.launch()
 
1
  import torch
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import gradio as gr
4
+ import concurrent.futures
5
 
6
+ # Set Device
7
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
8
 
9
+ # Load Models
10
+ # Model 1: Bloom 560M
11
+ tokenizer1 = AutoTokenizer.from_pretrained('bigscience/bloom-560m')
12
+ model1 = AutoModelForCausalLM.from_pretrained('bigscience/bloom-560m', torch_dtype=torch.float16)
13
  model1.to(device)
14
 
15
+ # Model 2: GPT-Neo 1.3B
16
+ tokenizer2 = AutoTokenizer.from_pretrained('EleutherAI/gpt-neo-1.3B')
17
+ model2 = AutoModelForCausalLM.from_pretrained('EleutherAI/gpt-neo-1.3B', torch_dtype=torch.float16)
18
  model2.to(device)
19
 
20
+ # Define Functions with Improved Parameters
21
+ def generate_text_model1(prompt):
22
+ inputs = tokenizer1.encode(prompt, return_tensors='pt').to(device)
23
  with torch.no_grad():
24
  outputs = model1.generate(
25
+ inputs,
26
+ max_length=50,
27
+ num_return_sequences=1,
28
+ no_repeat_ngram_size=2,
29
  do_sample=True,
30
  top_k=50,
31
+ top_p=0.95,
32
+ temperature=0.8
33
  )
34
  text = tokenizer1.decode(outputs[0], skip_special_tokens=True)
35
  return text
36
 
37
+ def generate_text_model2(prompt):
38
+ inputs = tokenizer2.encode(prompt, return_tensors='pt').to(device)
39
  with torch.no_grad():
40
  outputs = model2.generate(
41
+ inputs,
42
+ max_length=50,
43
+ num_return_sequences=1,
44
+ no_repeat_ngram_size=2,
45
  do_sample=True,
46
  top_k=50,
47
+ top_p=0.95,
48
+ temperature=0.8
49
  )
50
  text = tokenizer2.decode(outputs[0], skip_special_tokens=True)
51
  return text
52
 
53
+ # Use ThreadPoolExecutor to Process in Parallel
54
+ def compare_models(prompt):
55
+ with concurrent.futures.ThreadPoolExecutor() as executor:
56
+ future1 = executor.submit(generate_text_model1, prompt)
57
+ future2 = executor.submit(generate_text_model2, prompt)
58
+ output1 = future1.result()
59
+ output2 = future2.result()
60
+ return output1, output2
61
+
62
+
63
  def compare_models(prompt, temperature, top_p):
64
  output1 = generate_text_model1(prompt, temperature, top_p)
65
  output2 = generate_text_model2(prompt, temperature, top_p)
 
70
 
71
  return output1_with_params, output2_with_params
72
 
73
+ # Use ThreadPoolExecutor to Process in Parallel
74
+ def compare_models(prompt):
75
+ with concurrent.futures.ThreadPoolExecutor() as executor:
76
+ future1 = executor.submit(generate_text_model1, prompt)
77
+ future2 = executor.submit(generate_text_model2, prompt)
78
+ output1 = future1.result()
79
+ output2 = future2.result()
80
+ return output1, output2
81
+
82
  # Create Gradio Interface
83
  iface = gr.Interface(
84
  fn=compare_models,
85
+ inputs=gr.Textbox(lines=2, placeholder='Enter a prompt here...'),
86
+ outputs=[gr.Textbox(label='Bloom 560M Output'), gr.Textbox(label='GPT-Neo 1.3B Output')],
87
+ title='Compare Text Generation Models',
88
+ description='Enter a prompt and see how two different models generate text.'
 
 
 
 
 
 
 
89
  )
90
 
91
+ # Launch Interface
92
+ iface.launch()