Update app.py
Browse files
app.py
CHANGED
@@ -23,17 +23,16 @@ def get_transcript(video_url):
|
|
23 |
return "Invalid YouTube URL"
|
24 |
|
25 |
video_id = video_id_match.group(1)
|
26 |
-
print(video_id)
|
27 |
|
28 |
# Fetch the transcript
|
29 |
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
30 |
-
|
31 |
# Join the transcript text into a single string
|
32 |
text = "\n".join([t["text"] for t in transcript])
|
33 |
return text # Return the transcript as a string
|
34 |
|
35 |
except Exception as e:
|
36 |
-
return f"Error fetching transcript:
|
37 |
|
38 |
|
39 |
|
@@ -47,12 +46,10 @@ def create_db_from_video_url(video_url, api_key):
|
|
47 |
|
48 |
# Convert transcript string into a Document
|
49 |
doc_convert = Document(page_content=transcripts)
|
50 |
-
|
51 |
-
print(doc_convert)
|
52 |
# cannot provide this directly to the model so we are splitting the transcripts into small chunks
|
53 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
54 |
docs = text_splitter.split_documents([doc_convert])
|
55 |
-
print(docs)
|
56 |
|
57 |
db = FAISS.from_documents(docs, embedding=embeddings)
|
58 |
return db
|
|
|
23 |
return "Invalid YouTube URL"
|
24 |
|
25 |
video_id = video_id_match.group(1)
|
|
|
26 |
|
27 |
# Fetch the transcript
|
28 |
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
29 |
+
|
30 |
# Join the transcript text into a single string
|
31 |
text = "\n".join([t["text"] for t in transcript])
|
32 |
return text # Return the transcript as a string
|
33 |
|
34 |
except Exception as e:
|
35 |
+
return f"Error fetching transcript: Unable to fetch subtitles."
|
36 |
|
37 |
|
38 |
|
|
|
46 |
|
47 |
# Convert transcript string into a Document
|
48 |
doc_convert = Document(page_content=transcripts)
|
49 |
+
|
|
|
50 |
# cannot provide this directly to the model so we are splitting the transcripts into small chunks
|
51 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
52 |
docs = text_splitter.split_documents([doc_convert])
|
|
|
53 |
|
54 |
db = FAISS.from_documents(docs, embedding=embeddings)
|
55 |
return db
|