GVAmaresh commited on
Commit
3c32de9
·
1 Parent(s): ebe2b18

dev check working

Browse files
Files changed (1) hide show
  1. app.py +35 -337
app.py CHANGED
@@ -1,353 +1,51 @@
1
- from fastapi import FastAPI
2
- import os
3
- import subprocess
4
- import gdown
5
- import h5py
6
-
7
- app = FastAPI()
8
-
9
- @app.get("/")
10
- def greet_json():
11
- return {"Hello": "World!"}
12
-
13
- os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
14
- os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
15
- os.environ["FONTCONFIG_PATH"] = "/tmp/fontconfig"
16
- os.environ["HF_HOME"] = "/tmp/huggingface_cache"
17
-
18
- os.makedirs("/tmp/matplotlib", exist_ok=True)
19
- os.makedirs("/tmp/fontconfig", exist_ok=True)
20
- os.makedirs("/tmp/huggingface_cache", exist_ok=True)
21
-
22
- from torchaudio.pipelines import WAV2VEC2_BASE
23
- bundle = WAV2VEC2_BASE
24
- model = bundle.get_model()
25
- print("Model downloaded successfully!")
26
-
27
- def reencode_audio(input_path, output_path):
28
- command = [
29
- 'ffmpeg', '-i', input_path, '-acodec', 'pcm_s16le', '-ar', '16000', '-ac', '1', output_path
30
- ]
31
- subprocess.run(command, check=True)
32
-
33
-
34
- #-----------------------------------------------------------------------------------------
35
-
36
- # import os
37
- # from dotenv import load_dotenv
38
- # from googleapiclient.discovery import build
39
- # from google.auth.transport.requests import Request
40
- # from google.oauth2.credentials import Credentials
41
- # from google.oauth2 import service_account
42
-
43
- # SCOPES = ['https://www.googleapis.com/auth/drive']
44
-
45
-
46
- # details = {
47
- # "refresh_token": "1//0gYLCF5OE4fTmCgYIARAAGBASNwF-L9Irp3Ik0q5OtsQClcLwW7sxPZSuMboe7wyjteuSuOD_WvavEHfhuTvkSjkLHitkh76XaD4",
48
- # "token": "ya29.a0ARW5m753vyDgN_C7kUnnYTkeCfknSnDDj8tuVCe99dL2ieN3IzvCPVoN5kVg49CAYDz-pS5AgpjH7whiy7dr7QhwX4EiGQreJCzu109nlH6kxultrNup5q-_W2dNepbOa5YV8iH7OwP28RjQVR7fs9IlMO7BfnA9hw-WQqXNaCgYKAXMSARMSFQHGX2MieHrC7CpySZFYpoZWln6vxA0175",
49
- # "token_uri": "https://oauth2.googleapis.com/token",
50
- # "client_id": "573421158717-a2tulr4s7gg6or7sd76336busnmk22vu.apps.googleusercontent.com",
51
- # "client_secret": "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",
52
- # "scopes": ["https://www.googleapis.com/auth/drive"],
53
- # "universe_domain": "googleapis.com",
54
- # "account": "",
55
- # }
56
-
57
-
58
- # def authenticate_with_env_vars(details):
59
- # creds = Credentials.from_authorized_user_info(details, SCOPES)
60
- # if not creds or not creds.valid:
61
- # if creds and creds.expired and creds.refresh_token:
62
- # creds.refresh(Request())
63
- # else:
64
- # raise ValueError("Credentials are invalid and cannot be refreshed.")
65
- # return creds
66
-
67
- #-----------------------------------------------------------------------------------------
68
- from fastapi import UploadFile
69
- from googleapiclient.http import MediaIoBaseUpload
70
- import io
71
- import PyPDF2
72
-
73
- Folder_Name = "Document_DB"
74
- file_metadata = {
75
- "name": "Fake",
76
- "mimeType": "application/vnd.google-apps.folder",
77
- }
78
-
79
- def check_folder(service):
80
- try:
81
- resource = service.files()
82
- result = resource.list(
83
- q=f"mimeType = 'application/vnd.google-apps.folder' and 'root' in parents",
84
- fields="nextPageToken, files(id, name)",
85
- ).execute()
86
- list_folders = result.get("files")
87
-
88
- folder_id = None
89
-
90
- for folder in list_folders:
91
- if folder["name"] == Folder_Name:
92
- folder_id = folder["id"]
93
- break
94
-
95
- if not folder_id:
96
- folder = service.files().create(body=file_metadata, fields="id").execute()
97
- folder_id = folder["id"]
98
-
99
- return folder_id, "success"
100
- except Exception as e:
101
- print(f"Error occurred while pushing file to DB: {e}")
102
- return None, str(e)
103
-
104
- def extract_text_from_pdf(pdf_file_content):
105
- extracted_text = ""
106
- try:
107
- pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file_content))
108
- num_pages = len(pdf_reader.pages)
109
- for i in range(num_pages):
110
- page = pdf_reader.pages[i]
111
- page_text = page.extract_text()
112
- if "ABSTRACT" in page_text:
113
- extracted_text += page_text + "\n"
114
- break
115
- return extracted_text
116
- except Exception as e:
117
- print("An error occurred:", e)
118
- return None
119
-
120
- async def extract_text_url(file:UploadFile):
121
- try:
122
- file_content = await file.read()
123
- extract_text = extract_text_from_pdf(file_content)
124
- return extract_text, "success"
125
- except Exception as e:
126
- print(f"Error occurred while pushing file to DB: {e}")
127
- return None, str(e)
128
-
129
-
130
- async def push_file_db(service, file: UploadFile):
131
- try:
132
- folder_id, status = check_folder(service)
133
-
134
- if not folder_id:
135
- return [None, None, status]
136
-
137
- file_content = await file.read()
138
-
139
- file_metadata = {"name": file.filename, "parents": [folder_id]}
140
- media = MediaIoBaseUpload(io.BytesIO(file_content), mimetype="application/pdf")
141
- print("hh1")
142
- new_file = (
143
- service.files()
144
- .create(body=file_metadata, media_body=media, fields="id")
145
- .execute()
146
- )
147
- print("hh2")
148
- service.permissions().create(
149
- fileId=new_file["id"],
150
- body={"role": "reader", "type": "anyone"},
151
- fields="id",
152
- ).execute()
153
-
154
- extracted_text = extract_text_from_pdf(file_content)
155
-
156
- return new_file.get("id"), extracted_text, "success"
157
-
158
- except Exception as e:
159
- print(f"Error occurred while pushing file to DB: {e}")
160
- return None, None, str(e)
161
-
162
-
163
- #-----------------------------------------------------------------------------------------
164
- import os
165
- import gdown
166
 
167
- file_id = "1zhisRgRi2qBFX73VFhzh-Ho93MORQqVa"
168
- output_dir = "./downloads"
169
- output_file = "file.h5"
 
 
 
 
 
 
 
170
 
171
- if not os.path.exists(output_dir):
172
- os.makedirs(output_dir)
173
 
174
- output_path = os.path.join(output_dir, output_file)
175
 
176
- url = f"https://drive.google.com/uc?id={file_id}"
177
 
178
- try:
179
- gdown.download(url, output_path, quiet=False)
180
- print(f"File downloaded successfully to: {output_path}")
181
- except Exception as e:
182
- print(f"Error downloading file: {e}")
183
 
184
- output_file = "file.h5"
185
- file_path = os.path.join(output_dir, output_file)
186
- #-----------------------------------------------------------------------------------------
187
 
188
- import os
189
- import gdown
 
190
 
191
- file_id = "1wIaycDFGTF3e0PpAHKk-GLnxk4cMehOU"
192
- output_dir = "./downloads"
193
- output_file = "file2.h5"
194
 
195
- if not os.path.exists(output_dir):
196
- os.makedirs(output_dir)
197
 
198
- output_path = os.path.join(output_dir, output_file)
199
 
200
- url = f"https://drive.google.com/uc?id={file_id}"
 
 
 
201
 
202
- try:
203
- gdown.download(url, output_path, quiet=False)
204
- print(f"File downloaded successfully to: {output_path}")
205
- except Exception as e:
206
- print(f"Error downloading file: {e}")
207
 
208
- output_file = "file2.h5"
209
- file_path = os.path.join(output_dir, output_file)
210
-
211
-
212
- if os.path.exists(file_path):
213
- print(f"The file '{output_file}' exists at '{file_path}'.")
214
- else:
215
- print(f"The file '{output_file}' does not exist at '{file_path}'.")
216
-
217
- #-----------------------------------------------------------------------------------------
218
- import os
219
- from dotenv import load_dotenv
220
- from googleapiclient.discovery import build
221
- from google_auth_oauthlib.flow import InstalledAppFlow
222
- from google.auth.transport.requests import Request
223
- from google.oauth2.credentials import Credentials
224
- from google.oauth2 import service_account
225
-
226
- SCOPES = ['https://www.googleapis.com/auth/drive']
227
-
228
-
229
- details = {
230
- "refresh_token": "1//0gYLCF5OE4fTmCgYIARAAGBASNwF-L9Irp3Ik0q5OtsQClcLwW7sxPZSuMboe7wyjteuSuOD_WvavEHfhuTvkSjkLHitkh76XaD4",
231
- "token": "ya29.a0ARW5m753vyDgN_C7kUnnYTkeCfknSnDDj8tuVCe99dL2ieN3IzvCPVoN5kVg49CAYDz-pS5AgpjH7whiy7dr7QhwX4EiGQreJCzu109nlH6kxultrNup5q-_W2dNepbOa5YV8iH7OwP28RjQVR7fs9IlMO7BfnA9hw-WQqXNaCgYKAXMSARMSFQHGX2MieHrC7CpySZFYpoZWln6vxA0175",
232
- "token_uri": "https://oauth2.googleapis.com/token",
233
- "client_id": "573421158717-a2tulr4s7gg6or7sd76336busnmk22vu.apps.googleusercontent.com",
234
- "client_secret": "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",
235
- "scopes": ["https://www.googleapis.com/auth/drive"],
236
- "universe_domain": "googleapis.com",
237
- "account": "",
238
- }
239
-
240
-
241
- def main():
242
- try:
243
- print(details)
244
- creds = None
245
- creds = Credentials.from_authorized_user_info(details, SCOPES)
246
-
247
- if not creds or not creds.valid:
248
- if creds and creds.expired and creds.refresh_token:
249
- creds.refresh(Request())
250
- else:
251
- flow = InstalledAppFlow.from_client_secrets_file(
252
- 'credentials.json', SCOPES)
253
- creds = flow.run_local_server(port=0)
254
-
255
- service = build('drive', 'v3', credentials=creds)
256
- return service
257
-
258
- except Exception as error:
259
- print(f'An error occurred: {error}')
260
-
261
- #-----------------------------------------------------------------------------------------
262
-
263
- from fastapi.responses import JSONResponse
264
- from pydantic import BaseModel
265
- from fastapi import FastAPI, File, UploadFile, HTTPException
266
- from typing import List
267
-
268
- service = main()
269
 
270
- @app.get("/api/check")
271
- def check_working():
272
- try:
273
- return JSONResponse(
274
- {"status": 200, "message": "Server is working fine", "data": None},
275
- status_code=200,
276
- )
277
- except Exception as e:
278
- return JSONResponse(
279
- {"status": 500, "message": "Server is not working", "error": e},
280
- )
281
 
282
  @app.get("/")
283
- def read():
284
- return JSONResponse(
285
- {"status": 200, "message": "Working Successfully"}, status_code=200
286
- )
287
-
288
- @app.post("/api/save-docs")
289
- async def save_docs(file: UploadFile = File(...)):
290
- try:
291
- print(file.filename)
292
- file_id, extracted_text, status = await push_file_db(service, file)
293
- if not file_id:
294
- return JSONResponse(
295
- {"status": 500, "message": status, "data": None}, status_code=500
296
- )
297
- print(file_id)
298
- return JSONResponse(
299
- {
300
- "status": 200,
301
- "message": "Document saved successfully",
302
- "data": {
303
- "file_id": file_id,
304
- "extracted_text": extracted_text,
305
- "title": file.filename,
306
- },
307
- },
308
- status_code=200,
309
- )
310
- except Exception as e:
311
- print(f"Error: {e}")
312
- return JSONResponse({"status": 500, "message": str(e)}, status_code=500)
313
-
314
-
315
- @app.post("/api/get-abstract")
316
- async def get_abstract(file: UploadFile = File(...)):
317
- try:
318
- print(file.filename)
319
- extracted_text, status = await extract_text_url(file)
320
- if not extracted_text:
321
- return JSONResponse(
322
- {"status": 500, "message": status, "data": None}, status_code=500
323
- )
324
- return JSONResponse(
325
- {
326
- "status": 200,
327
- "message": "Document saved successfully",
328
- "data": {"extracted_text": extracted_text, "title": file.filename},
329
- },
330
- status_code=200,
331
- )
332
- except Exception as e:
333
- print(f"Error: {e}")
334
- return JSONResponse({"status": 500, "message": str(e)}, status_code=500)
335
-
336
- class DeleteRequest(BaseModel):
337
- _id: List[str]
338
-
339
- @app.post("/api/delete-report")
340
- async def delete_report(req: DeleteRequest):
341
- print(req._id)
342
- try:
343
- delete_result = delete_reports_by_ids(req._id)
344
- return {"message": "Deleted successfully", "deleted_ids": req._id}
345
- except Exception as e:
346
- raise HTTPException(status_code=500, detail=f"Error deleting reports: {str(e)}")
347
-
348
- def delete_reports_by_ids(ids: List[str]):
349
- print(f"Deleting reports with IDs: {ids}")
350
- return True
351
-
352
-
353
-
 
1
+ from transformers import AutoTokenizer, AutoModel
2
+ import torch
3
+ import torch.nn.functional as F
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ def mean_pooling(model_output, attention_mask):
6
+ token_embeddings = model_output[
7
+ 0
8
+ ]
9
+ input_mask_expanded = (
10
+ attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
11
+ )
12
+ return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
13
+ input_mask_expanded.sum(1), min=1e-9
14
+ )
15
 
16
+ def cosine_similarity(u, v):
17
+ return F.cosine_similarity(u, v, dim=1)
18
 
 
19
 
20
+ def compare(text1, text2):
21
 
22
+ sentences = [text1, text2]
 
 
 
 
23
 
24
+ tokenizer = AutoTokenizer.from_pretrained("dmlls/all-mpnet-base-v2-negation")
25
+ model = AutoModel.from_pretrained("dmlls/all-mpnet-base-v2-negation")
 
26
 
27
+ encoded_input = tokenizer(
28
+ sentences, padding=True, truncation=True, return_tensors="pt"
29
+ )
30
 
31
+ with torch.no_grad():
32
+ model_output = model(**encoded_input)
 
33
 
34
+ sentence_embeddings = mean_pooling(model_output, encoded_input["attention_mask"])
 
35
 
36
+ sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
37
 
38
+ similarity_score = cosine_similarity(
39
+ sentence_embeddings[0].unsqueeze(0), sentence_embeddings[1].unsqueeze(0)
40
+ )
41
+ return similarity_score.item()
42
 
 
 
 
 
 
43
 
44
+ #------------------------------------------------------------
45
+ from fastapi import FastAPI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ app = FastAPI()
 
 
 
 
 
 
 
 
 
 
48
 
49
  @app.get("/")
50
+ def greet_json():
51
+ return {"Hello": "World!"}