88hours commited on
Commit
cc81bd2
·
1 Parent(s): 7d9878f

Changing embedding from PredictionGuard to Local

Browse files
lrn_vector_embeddings.py CHANGED
@@ -102,8 +102,10 @@ def bt_with_masked_input():
102
 
103
  print(results)
104
  return results
105
- #res = bt_embeddingsl()
106
- #print((res['text_embeddings']))
107
- for img in [img1, img2, img3]:
108
- embeddings = bt_embeddings_from_local(img['caption'], Image.open(img['image_path']))
109
- print(embeddings['cross_modal_embeddings'][0].shape)
 
 
 
102
 
103
  print(results)
104
  return results
105
+
106
+ if __name__ == "__main__":
107
+ #res = bt_embeddingsl()
108
+ #print((res['text_embeddings']))
109
+ for img in [img1, img2, img3]:
110
+ embeddings = bt_embeddings_from_local(img['caption'], Image.open(img['image_path']))
111
+ print(embeddings['cross_modal_embeddings'][0].shape)
mm_rag/embeddings/__pycache__/bridgetower_embeddings.cpython-311.pyc CHANGED
Binary files a/mm_rag/embeddings/__pycache__/bridgetower_embeddings.cpython-311.pyc and b/mm_rag/embeddings/__pycache__/bridgetower_embeddings.cpython-311.pyc differ
 
mm_rag/embeddings/bridgetower_embeddings.py CHANGED
@@ -3,9 +3,10 @@ from langchain_core.embeddings import Embeddings
3
  from langchain_core.pydantic_v1 import (
4
  BaseModel,
5
  )
 
6
  from utility import encode_image, bt_embedding_from_prediction_guard
7
  from tqdm import tqdm
8
-
9
  class BridgeTowerEmbeddings(BaseModel, Embeddings):
10
  """ BridgeTower embedding model """
11
 
@@ -51,6 +52,6 @@ class BridgeTowerEmbeddings(BaseModel, Embeddings):
51
 
52
  embeddings = []
53
  for path_to_img, text in tqdm(zip(images, texts), total=len(texts)):
54
- embedding = bt_embedding_from_prediction_guard(text, encode_image(path_to_img))
55
  embeddings.append(embedding)
56
  return embeddings
 
3
  from langchain_core.pydantic_v1 import (
4
  BaseModel,
5
  )
6
+ from lrn_vector_embeddings import bt_embeddings_from_local
7
  from utility import encode_image, bt_embedding_from_prediction_guard
8
  from tqdm import tqdm
9
+ from PIL import Image
10
  class BridgeTowerEmbeddings(BaseModel, Embeddings):
11
  """ BridgeTower embedding model """
12
 
 
52
 
53
  embeddings = []
54
  for path_to_img, text in tqdm(zip(images, texts), total=len(texts)):
55
+ embedding = bt_embeddings_from_local(text, Image.open(path_to_img))
56
  embeddings.append(embedding)
57
  return embeddings
requirements.txt CHANGED
@@ -14,4 +14,6 @@ whisper
14
  webvtt-py
15
  tqdm
16
  lancedb
17
- mmrag
 
 
 
14
  webvtt-py
15
  tqdm
16
  lancedb
17
+ langchain-core
18
+ langchain-community
19
+ ollama
utility.py CHANGED
@@ -9,6 +9,7 @@ from typing import Iterator, TextIO, List, Dict, Any, Optional, Sequence, Union
9
  from enum import auto, Enum
10
  import base64
11
  import glob
 
12
  from tqdm import tqdm
13
  from pytubefix import YouTube, Stream
14
  import webvtt
@@ -18,6 +19,8 @@ from predictionguard import PredictionGuard
18
  import cv2
19
  import json
20
  import PIL
 
 
21
  from PIL import Image
22
  import dataclasses
23
  import random
@@ -234,6 +237,7 @@ def write_srt(transcript: Iterator[dict], file: TextIO, maxLineWidth=None):
234
  Example usage:
235
  from pathlib import Path
236
  from whisper.utils import write_srt
 
237
  result = transcribe(model, audio_path, temperature=temperature, **args)
238
  # save SRT
239
  audio_basename = Path(audio_path).stem
@@ -520,6 +524,29 @@ def lvlm_inference_with_conversation(conversation, max_tokens: int = 200, temper
520
  )
521
  return response['choices'][-1]['message']['content']
522
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523
  # function `extract_and_save_frames_and_metadata``:
524
  # receives as input a video and its transcript
525
  # does extracting and saving frames and their metadatas
 
9
  from enum import auto, Enum
10
  import base64
11
  import glob
12
+ import requests
13
  from tqdm import tqdm
14
  from pytubefix import YouTube, Stream
15
  import webvtt
 
19
  import cv2
20
  import json
21
  import PIL
22
+ from ollama import chat
23
+ from ollama import ChatResponse
24
  from PIL import Image
25
  import dataclasses
26
  import random
 
237
  Example usage:
238
  from pathlib import Path
239
  from whisper.utils import write_srt
240
+ import requests
241
  result = transcribe(model, audio_path, temperature=temperature, **args)
242
  # save SRT
243
  audio_basename = Path(audio_path).stem
 
524
  )
525
  return response['choices'][-1]['message']['content']
526
 
527
+ def lvlm_inference_with_ollama(conversation, max_tokens: int = 200, temperature: float = 0.95, top_p: float = 0.1, top_k: int = 10):
528
+
529
+
530
+
531
+ # Send the request to the local Ollama server
532
+ #response = requests.post("http://localhost:8000/api/v1/completions", json=payload)
533
+
534
+ stream = chat(
535
+ model="llava-1.5-7b-hf",
536
+ messages= conversation,
537
+ stream=True,
538
+ temperature=temperature,
539
+ max_tokens=max_tokens,
540
+ top_p=top_p,
541
+ top_k=top_k
542
+ )
543
+
544
+ response_data = ''
545
+ for chunk in stream:
546
+ response_data += chunk['message']['content']
547
+
548
+ return response_data
549
+
550
  # function `extract_and_save_frames_and_metadata``:
551
  # receives as input a video and its transcript
552
  # does extracting and saving frames and their metadatas