bhaskartripathi commited on
Commit
1e8edc1
1 Parent(s): efb89c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -194
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import urllib.request
2
  import fitz
3
  import re
4
  import numpy as np
@@ -8,208 +8,34 @@ import gradio as gr
8
  import os
9
  from sklearn.neighbors import NearestNeighbors
10
 
11
- def download_pdf(url, output_path):
12
- urllib.request.urlretrieve(url, output_path)
13
 
 
 
 
14
 
15
- def preprocess(text):
16
- text = text.replace('\n', ' ')
17
- text = re.sub('\s+', ' ', text)
18
- return text
19
 
20
-
21
- def pdf_to_text(path, start_page=1, end_page=None):
22
- doc = fitz.open(path)
23
- total_pages = doc.page_count
24
-
25
- if end_page is None:
26
- end_page = total_pages
27
-
28
- text_list = []
29
-
30
- for i in range(start_page-1, end_page):
31
- text = doc.load_page(i).get_text("text")
32
- text = preprocess(text)
33
- text_list.append(text)
34
-
35
- doc.close()
36
- return text_list
37
-
38
-
39
- def text_to_chunks(texts, word_length=150, start_page=1):
40
- text_toks = [t.split(' ') for t in texts]
41
- page_nums = []
42
- chunks = []
43
-
44
- for idx, words in enumerate(text_toks):
45
- for i in range(0, len(words), word_length):
46
- chunk = words[i:i+word_length]
47
- if (i+word_length) > len(words) and (len(chunk) < word_length) and (
48
- len(text_toks) != (idx+1)):
49
- text_toks[idx+1] = chunk + text_toks[idx+1]
50
- continue
51
- chunk = ' '.join(chunk).strip()
52
- chunk = f'[Page no. {idx+start_page}]' + ' ' + '"' + chunk + '"'
53
- chunks.append(chunk)
54
- return chunks
55
-
56
-
57
- class SemanticSearch:
58
-
59
- def __init__(self):
60
- self.use = hub.load('https://tfhub.dev/google/universal-sentence-encoder/4')
61
- self.fitted = False
62
-
63
-
64
- def fit(self, data, batch=1000, n_neighbors=5):
65
- self.data = data
66
- self.embeddings = self.get_text_embedding(data, batch=batch)
67
- n_neighbors = min(n_neighbors, len(self.embeddings))
68
- self.nn = NearestNeighbors(n_neighbors=n_neighbors)
69
- self.nn.fit(self.embeddings)
70
- self.fitted = True
71
-
72
-
73
- def __call__(self, text, return_data=True):
74
- inp_emb = self.use([text])
75
- neighbors = self.nn.kneighbors(inp_emb, return_distance=False)[0]
76
-
77
- if return_data:
78
- return [self.data[i] for i in neighbors]
79
- else:
80
- return neighbors
81
-
82
-
83
- def get_text_embedding(self, texts, batch=1000):
84
- embeddings = []
85
- for i in range(0, len(texts), batch):
86
- text_batch = texts[i:(i+batch)]
87
- emb_batch = self.use(text_batch)
88
- embeddings.append(emb_batch)
89
- embeddings = np.vstack(embeddings)
90
- return embeddings
91
-
92
-
93
-
94
- def load_recommender(path, start_page=1):
95
- global recommender
96
- texts = pdf_to_text(path, start_page=start_page)
97
- chunks = text_to_chunks(texts, start_page=start_page)
98
- recommender.fit(chunks)
99
- return 'Corpus Loaded.'
100
-
101
-
102
- def generate_text(openAI_key,prompt, engine="text-davinci-003"):
103
- openai.api_key = openAI_key
104
- completions = openai.Completion.create(
105
- engine=engine,
106
- prompt=prompt,
107
- max_tokens=512,
108
- n=1,
109
- stop=None,
110
- temperature=0.7,
111
- )
112
- message = completions.choices[0].text
113
- return message
114
-
115
-
116
- def generate_answer(question,openAI_key):
117
- topn_chunks = recommender(question)
118
- prompt = ""
119
- prompt += 'search results:\n\n'
120
- for c in topn_chunks:
121
- prompt += c + '\n\n'
122
-
123
- prompt += "Instructions: Compose a comprehensive reply to the query using the search results given. "\
124
- "Cite each reference using [ Page Number] notation (every result has this number at the beginning). "\
125
- "Citation should be done at the end of each sentence. If the search results mention multiple subjects "\
126
- "with the same name, create separate answers for each. Only include information found in the results and "\
127
- "don't add any additional information. Make sure the answer is correct and don't output false content. "\
128
- "If the text does not relate to the query, simply state 'Found Nothing'. Ignore outlier "\
129
- "search results which has nothing to do with the question. Only answer what is asked. The "\
130
- "answer should be short and concise. \n\nQuery: {question}\nAnswer: "
131
-
132
- prompt += f"Query: {question}\nAnswer:"
133
- answer = generate_text(openAI_key, prompt,"text-davinci-003")
134
- return answer
135
-
136
-
137
- def question_answer(url, file, question,openAI_key):
138
- if openAI_key.strip()=='':
139
- return '[ERROR]: Please enter you Open AI Key. Get your key here : https://platform.openai.com/account/api-keys'
140
- if url.strip() == '' and file == None:
141
- return '[ERROR]: Both URL and PDF is empty. Provide atleast one.'
142
-
143
- if url.strip() != '' and file != None:
144
- return '[ERROR]: Both URL and PDF is provided. Please provide only one (eiter URL or PDF).'
145
-
146
- if url.strip() != '':
147
- glob_url = url
148
- download_pdf(glob_url, 'corpus.pdf')
149
- load_recommender('corpus.pdf')
150
-
151
- else:
152
- old_file_name = file.name
153
- file_name = file.name
154
- file_name = file_name[:-12] + file_name[-4:]
155
- os.rename(old_file_name, file_name)
156
- load_recommender(file_name)
157
-
158
- if question.strip() == '':
159
- return '[ERROR]: Question field is empty'
160
-
161
- return generate_answer(question,openAI_key)
162
-
163
-
164
- recommender = SemanticSearch()
165
-
166
- title = 'PDF GPT'
167
- description = """ PDF GPT allows you to chat with your PDF file using Universal Sentence Encoder and Open AI. It gives hallucination free response than other tools as the embeddings are better than OpenAI. The returned response can even cite the page number in square brackets([]) where the information is located, adding credibility to the responses and helping to locate pertinent information quickly."""
168
-
169
- questions = [
170
- "what did the study investigate?",
171
- "what are the methods used in this study?",
172
- "what are the data intervals used in this study? Give me the start dates and end dates?",
173
- "what are the main limitations of this study?",
174
- "what are the main shortcomings of this study?",
175
- "what are the main findings of the study?",
176
- "what are the main results of the study?",
177
- "what are the input features used in this study?",
178
- "what is the dependent variable in this study?"
179
- ]
180
- question = gr.Textbox(label='Enter your question here')
181
-
182
- # Create a button for each question, which will call set_question_text when clicked
183
- question_buttons = [gr.Button(f"{q}", fn=set_question_text) for q in questions]
184
-
185
-
186
- # Create a function to set the text of the question Textbox
187
- def set_question_text(q):
188
- question.set_value(q)
189
-
190
- # Add the question buttons to the Gradio interface
191
- with gr.Blocks() as demo:
192
- gr.Markdown(f'<center><h1>{title}</h1></center>')
193
  gr.Markdown(description)
194
 
195
  with gr.Row():
 
196
  with gr.Group():
197
- gr.Markdown(f'<p style="text-align:center">Get your Open AI API key <a href="https://platform.openai.com/account/api-keys">here</a></p>')
198
- openAI_key=gr.Textbox(label='Enter your OpenAI API key here')
199
- url = gr.Textbox(label='Enter PDF URL here')
200
- gr.Markdown("<center><h4>OR<h4></center>")
201
- file = gr.File(label='Upload your PDF/ Research Paper / Book here', file_types=['.pdf'])
202
- # Add the question buttons to the Gradio interface
203
- for btn in question_buttons:
204
- gr.Block(btn)
205
- question = gr.Textbox(label='Enter your question here')
206
- btn = gr.Button(value='Submit')
207
- btn.style(full_width=True)
208
 
209
  with gr.Group():
210
- answer = gr.Textbox(label='The answer to your question is :')
211
 
212
- btn.click(question_answer, inputs=[url, file, question, openAI_key], outputs=[answer])
 
 
 
 
213
 
214
- #openai.api_key = os.getenv('Your_Key_Here')
215
  demo.launch()
 
1
+ import urllib.request
2
  import fitz
3
  import re
4
  import numpy as np
 
8
  import os
9
  from sklearn.neighbors import NearestNeighbors
10
 
 
 
11
 
12
+ title = 'MediDiagnostix AI'
13
+ description = """MediDiagnostix AI allows you to upload medical reports for analysis. Just click a picture of your medical report or upload a pdf report, it will
14
+ extract, analyze and provide you the medical interpretations of the report, potential diagnoses, and recommended follow-up actions. Furthermore, you can save diagnosis for future reference"""
15
 
16
+ with gr.Blocks(css="""#chatbot { font-size: 14px; min-height: 1200; }""") as demo:
 
 
 
17
 
18
+ gr.Markdown(f'<center><h3>{title}</h3></center>')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  gr.Markdown(description)
20
 
21
  with gr.Row():
22
+
23
  with gr.Group():
24
+ gr.Markdown(f'<p style="text-align:center">Enter the number of reports to analyze</p>')
25
+ num_reports = gr.Number(label='Number of Reports', value=1)
26
+
27
+ with gr.Accordion("Upload Reports"):
28
+ file_upload = gr.File(label='Upload Reports (PDF/Image)', file_types=['.pdf', '.jpg', '.png'], interactive=True, type="file", allow_multiple=True)
29
+
30
+ analyze_button = gr.Button(value='Analyze Reports')
 
 
 
 
31
 
32
  with gr.Group():
33
+ analysis_results = gr.Textbox(label='Analysis Results', placeholder="Results will appear here after analysis", lines=20)
34
 
35
+ analyze_button.click(
36
+ func=analyze_reports, # This function needs to be defined to handle the report analysis.
37
+ inputs=[file_upload, num_reports],
38
+ outputs=[analysis_results],
39
+ )
40
 
 
41
  demo.launch()