Amir230703 commited on
Commit
377e612
Β·
verified Β·
1 Parent(s): dd89c04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -34
app.py CHANGED
@@ -1,46 +1,38 @@
1
- import torch
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
4
 
5
- # Model name
6
  model_name = "Amir230703/phi3-medmcqa-finetuned"
7
-
8
- # Load tokenizer and model
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
10
- model = AutoModelForCausalLM.from_pretrained(
11
- model_name,
12
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, # Use float16 for speed if GPU available
13
- device_map="auto" # Moves model to GPU if available
14
- )
15
-
16
- # Function for generating responses
17
- def generate_response(input_text):
18
- try:
19
- input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(model.device)
20
-
21
- output = model.generate(
22
- input_ids,
23
- max_length=200,
24
- temperature=0.7,
25
- top_p=0.9,
26
- do_sample=True
27
- )
28
-
29
- generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
30
- return generated_text
31
 
32
- except Exception as e:
33
- return f"Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- # Create Gradio interface
36
  demo = gr.Interface(
37
- fn=generate_response,
38
- inputs=gr.Textbox(placeholder="Enter a medical question..."),
39
  outputs=gr.Textbox(),
40
  title="Medical QA Model",
41
- description="Enter a medical question, and the AI will provide an answer.",
42
  )
43
 
44
- # Run the app
45
- if __name__ == "__main__":
46
- demo.launch()
 
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
 
5
+ # Load the model and tokenizer
6
  model_name = "Amir230703/phi3-medmcqa-finetuned"
 
 
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ def generate_answer(question):
11
+ # Tokenize the input question
12
+ input_ids = tokenizer(question, return_tensors="pt").input_ids.to(model.device)
13
+
14
+ # Generate the answer
15
+ output = model.generate(
16
+ input_ids,
17
+ max_length=100, # Reduced max_length for faster response
18
+ temperature=0.7,
19
+ top_p=0.9,
20
+ do_sample=True,
21
+ num_return_sequences=1 # Only return one answer
22
+ )
23
+
24
+ # Decode the output
25
+ answer = tokenizer.decode(output[0], skip_special_tokens=True)
26
+ return answer
27
 
28
+ # Gradio Interface
29
  demo = gr.Interface(
30
+ fn=generate_answer,
31
+ inputs=gr.Textbox(placeholder="Enter a medical question here..."),
32
  outputs=gr.Textbox(),
33
  title="Medical QA Model",
34
+ description="Enter a medical question, and the AI will provide an answer."
35
  )
36
 
37
+ # Launch the Gradio app
38
+ demo.launch()