Spaces:
Running
Running
Commit
·
74cf6bd
1
Parent(s):
958cc77
Vibe coded implementation (with some manual fixes)
Browse files(cherry picked from commit 255acda8c8bcb989fd72006b84dee18553468356)
- .dockerignore +49 -0
- CLAUDE.md +33 -0
- Dockerfile +41 -0
- app/__init__.py +1 -0
- app/api/__init__.py +1 -0
- app/api/router.py +6 -0
- app/api/video.py +143 -0
- app/main.py +63 -0
- app/models/__init__.py +1 -0
- app/models/video.py +32 -0
- app/services/__init__.py +1 -0
- app/services/qdrant_service.py +41 -0
- app/services/video_service.py +647 -0
- app/static/css/style.css +137 -0
- app/static/js/index.js +268 -0
- app/static/js/main.js +139 -0
- app/static/js/video.js +440 -0
- app/templates/base.html +82 -0
- app/templates/index.html +98 -0
- app/templates/video.html +62 -0
- docker-compose.yml +28 -0
- example.env +3 -0
- gunicorn.conf.py +32 -0
- poetry.lock +0 -0
- pyproject.toml +12 -1
.dockerignore
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Git
|
| 2 |
+
.git
|
| 3 |
+
.gitignore
|
| 4 |
+
|
| 5 |
+
# Python
|
| 6 |
+
__pycache__/
|
| 7 |
+
*.py[cod]
|
| 8 |
+
*$py.class
|
| 9 |
+
*.so
|
| 10 |
+
.Python
|
| 11 |
+
env/
|
| 12 |
+
build/
|
| 13 |
+
develop-eggs/
|
| 14 |
+
dist/
|
| 15 |
+
downloads/
|
| 16 |
+
eggs/
|
| 17 |
+
.eggs/
|
| 18 |
+
lib/
|
| 19 |
+
lib64/
|
| 20 |
+
parts/
|
| 21 |
+
sdist/
|
| 22 |
+
var/
|
| 23 |
+
*.egg-info/
|
| 24 |
+
.installed.cfg
|
| 25 |
+
*.egg
|
| 26 |
+
|
| 27 |
+
# Virtual environment
|
| 28 |
+
venv/
|
| 29 |
+
.env
|
| 30 |
+
.venv/
|
| 31 |
+
ENV/
|
| 32 |
+
|
| 33 |
+
# Docker
|
| 34 |
+
.dockerignore
|
| 35 |
+
Dockerfile
|
| 36 |
+
docker-compose.yml
|
| 37 |
+
|
| 38 |
+
# IDE
|
| 39 |
+
.idea/
|
| 40 |
+
.vscode/
|
| 41 |
+
*.swp
|
| 42 |
+
*.swo
|
| 43 |
+
|
| 44 |
+
# Misc
|
| 45 |
+
.DS_Store
|
| 46 |
+
.pytest_cache/
|
| 47 |
+
htmlcov/
|
| 48 |
+
.coverage
|
| 49 |
+
.tox/
|
CLAUDE.md
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Development Guidelines for Vibe Coding RAG
|
| 2 |
+
|
| 3 |
+
## Commands
|
| 4 |
+
- Build/Install: `poetry install`
|
| 5 |
+
- Run: `poetry run python -m app.main` (once app is created)
|
| 6 |
+
- Lint: `poetry run ruff check .`
|
| 7 |
+
- Format: `poetry run ruff format .`
|
| 8 |
+
- Test: `poetry run pytest`
|
| 9 |
+
- Run single test: `poetry run pytest path/to/test.py::test_function_name -v`
|
| 10 |
+
|
| 11 |
+
## Code Style
|
| 12 |
+
- **Imports**: Group standard library, third-party, and local imports
|
| 13 |
+
- **Formatting**: Use Black/Ruff compatible formatting
|
| 14 |
+
- **Types**: Use type annotations for function parameters and return values
|
| 15 |
+
- **Naming**:
|
| 16 |
+
- Variables/functions: snake_case
|
| 17 |
+
- Classes: PascalCase
|
| 18 |
+
- Constants: UPPER_SNAKE_CASE
|
| 19 |
+
- **Error Handling**: Use try/except with specific exceptions
|
| 20 |
+
- **Documentation**: Docstrings for all public functions and classes
|
| 21 |
+
|
| 22 |
+
## Technologies
|
| 23 |
+
- Vector DB: Qdrant
|
| 24 |
+
- Embeddings: SentenceTransformers with sentence-transformers/static-retrieval-mrl-en-v1
|
| 25 |
+
- API: FastAPI (when implemented)
|
| 26 |
+
- Frontend: HTML/CSS/JavaScript with DaisyUI components
|
| 27 |
+
|
| 28 |
+
## MCP Integration
|
| 29 |
+
- Always call qdrant-code-search find tool when you are about to generate frontend code (HTML/CSS/JS)
|
| 30 |
+
- Store generated code snippets in qdrant-code-search store tool for future reference
|
| 31 |
+
|
| 32 |
+
## Qdrant
|
| 33 |
+
- Point IDs have to be string-like UUIDs
|
Dockerfile
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
# Install system dependencies
|
| 4 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 5 |
+
curl \
|
| 6 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 7 |
+
|
| 8 |
+
WORKDIR /app
|
| 9 |
+
|
| 10 |
+
# Install Poetry
|
| 11 |
+
RUN pip install poetry==1.8.3
|
| 12 |
+
|
| 13 |
+
# Copy poetry configuration files
|
| 14 |
+
COPY pyproject.toml poetry.lock poetry.toml* ./
|
| 15 |
+
|
| 16 |
+
# Configure poetry to not create a virtual environment
|
| 17 |
+
RUN poetry config virtualenvs.create false
|
| 18 |
+
|
| 19 |
+
# Install dependencies
|
| 20 |
+
RUN poetry install --no-dev --no-interaction --no-ansi
|
| 21 |
+
|
| 22 |
+
# Copy application code
|
| 23 |
+
COPY app ./app
|
| 24 |
+
|
| 25 |
+
# Expose port
|
| 26 |
+
EXPOSE 8000
|
| 27 |
+
|
| 28 |
+
# Set environment variables
|
| 29 |
+
ENV PYTHONPATH=/app
|
| 30 |
+
ENV QDRANT_URL=http://localhost:6333
|
| 31 |
+
# ENV QDRANT_API_KEY=your_api_key_here (uncomment and set if needed)
|
| 32 |
+
|
| 33 |
+
# Calculate the number of workers based on available CPUs
|
| 34 |
+
# Using the recommended formula: (2 * CPU cores) + 1
|
| 35 |
+
ENV WORKERS=4
|
| 36 |
+
|
| 37 |
+
# Create gunicorn config file
|
| 38 |
+
COPY gunicorn.conf.py ./
|
| 39 |
+
|
| 40 |
+
# Command to run the application with Gunicorn and Uvicorn workers
|
| 41 |
+
CMD ["gunicorn", "app.main:app", "-c", "gunicorn.conf.py"]
|
app/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Initialize app package
|
app/api/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Initialize API package
|
app/api/router.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter
|
| 2 |
+
from app.api import video
|
| 3 |
+
|
| 4 |
+
router = APIRouter()
|
| 5 |
+
|
| 6 |
+
router.include_router(video.router, prefix="/video", tags=["video"])
|
app/api/video.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException, Query
|
| 2 |
+
from typing import List, Optional
|
| 3 |
+
from app.models.video import Video, SearchResult, VideoSegment
|
| 4 |
+
from app.services.video_service import (
|
| 5 |
+
process_video,
|
| 6 |
+
search_video_segments,
|
| 7 |
+
get_all_segments,
|
| 8 |
+
get_processed_videos,
|
| 9 |
+
get_video_by_id,
|
| 10 |
+
)
|
| 11 |
+
from pydantic import BaseModel
|
| 12 |
+
|
| 13 |
+
router = APIRouter()
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class VideoRequest(BaseModel):
|
| 17 |
+
url: str
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class VideoResponse(BaseModel):
|
| 21 |
+
"""Response model for video processing with additional status information."""
|
| 22 |
+
|
| 23 |
+
video: Video
|
| 24 |
+
newly_processed: bool = False
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@router.post("/process", response_model=VideoResponse)
|
| 28 |
+
async def process_video_endpoint(video_request: VideoRequest) -> VideoResponse:
|
| 29 |
+
"""Process a YouTube video to extract and store transcript segments.
|
| 30 |
+
If the video has already been processed, returns the existing data without reprocessing."""
|
| 31 |
+
try:
|
| 32 |
+
import logging
|
| 33 |
+
|
| 34 |
+
# Get the video ID first
|
| 35 |
+
from app.services.video_service import extract_video_id, get_video_by_id
|
| 36 |
+
|
| 37 |
+
video_id = extract_video_id(video_request.url)
|
| 38 |
+
|
| 39 |
+
# Check if already processed
|
| 40 |
+
existing_video = get_video_by_id(video_id)
|
| 41 |
+
already_processed = existing_video is not None and existing_video.processed
|
| 42 |
+
|
| 43 |
+
if already_processed:
|
| 44 |
+
logging.info(f"Video {video_id} already processed, returning existing data")
|
| 45 |
+
return VideoResponse(video=existing_video, newly_processed=False)
|
| 46 |
+
|
| 47 |
+
# Process the video if needed
|
| 48 |
+
result = process_video(video_request.url)
|
| 49 |
+
return VideoResponse(video=result, newly_processed=True)
|
| 50 |
+
|
| 51 |
+
except Exception as e:
|
| 52 |
+
import logging
|
| 53 |
+
import traceback
|
| 54 |
+
|
| 55 |
+
logging.error(f"Error processing video URL {video_request.url}: {str(e)}")
|
| 56 |
+
logging.error(traceback.format_exc())
|
| 57 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
@router.get("/search")
|
| 61 |
+
async def search_video_endpoint(
|
| 62 |
+
query: str = Query(..., description="Search query for video content"),
|
| 63 |
+
video_id: Optional[str] = Query(
|
| 64 |
+
None, description="Optional YouTube video ID to limit search"
|
| 65 |
+
),
|
| 66 |
+
limit: int = Query(5, description="Maximum number of results to return"),
|
| 67 |
+
) -> List[SearchResult]:
|
| 68 |
+
"""Search for video segments based on the provided query."""
|
| 69 |
+
import logging
|
| 70 |
+
|
| 71 |
+
# Check for invalid video_id
|
| 72 |
+
if video_id and (video_id.lower() == "undefined" or video_id.lower() == "null"):
|
| 73 |
+
logging.warning(f"Invalid video_id in search request: '{video_id}'")
|
| 74 |
+
video_id = None # Clear invalid video_id to perform a global search instead
|
| 75 |
+
|
| 76 |
+
try:
|
| 77 |
+
results = search_video_segments(query, video_id, limit)
|
| 78 |
+
return results
|
| 79 |
+
except Exception as e:
|
| 80 |
+
logging.error(
|
| 81 |
+
f"Error searching for query '{query}' with video_id '{video_id}': {str(e)}"
|
| 82 |
+
)
|
| 83 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
@router.get("/segments/{video_id}")
|
| 87 |
+
async def get_segments_endpoint(video_id: str) -> List[VideoSegment]:
|
| 88 |
+
"""Get all segments for a specific video, ordered by start time."""
|
| 89 |
+
import logging
|
| 90 |
+
|
| 91 |
+
# Check for invalid video ID
|
| 92 |
+
if not video_id or video_id.lower() == "undefined" or video_id.lower() == "null":
|
| 93 |
+
logging.warning(f"Invalid video ID requested: '{video_id}'")
|
| 94 |
+
return [] # Return empty list for invalid IDs to avoid frontend errors
|
| 95 |
+
|
| 96 |
+
try:
|
| 97 |
+
segments = get_all_segments(video_id)
|
| 98 |
+
if not segments:
|
| 99 |
+
# Return an empty list instead of 404 to allow frontend to handle gracefully
|
| 100 |
+
return []
|
| 101 |
+
return segments
|
| 102 |
+
except Exception as e:
|
| 103 |
+
# Log the exception for debugging
|
| 104 |
+
logging.error(f"Error getting segments for video {video_id}: {str(e)}")
|
| 105 |
+
raise HTTPException(
|
| 106 |
+
status_code=500, detail=f"Could not retrieve video segments: {str(e)}"
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
@router.get("/recent")
|
| 111 |
+
async def get_recent_videos_endpoint(
|
| 112 |
+
limit: int = Query(10, description="Maximum number of videos to return"),
|
| 113 |
+
) -> List[Video]:
|
| 114 |
+
"""Get recently processed videos ordered by creation time."""
|
| 115 |
+
try:
|
| 116 |
+
videos = get_processed_videos(limit=limit)
|
| 117 |
+
return videos
|
| 118 |
+
except Exception as e:
|
| 119 |
+
# Log the exception for debugging
|
| 120 |
+
import logging
|
| 121 |
+
|
| 122 |
+
logging.error(f"Error getting recent videos: {str(e)}")
|
| 123 |
+
raise HTTPException(
|
| 124 |
+
status_code=500, detail=f"Could not retrieve recent videos: {str(e)}"
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
@router.get("/info/{video_id}")
|
| 129 |
+
async def get_video_info_endpoint(video_id: str) -> Video:
|
| 130 |
+
"""Get metadata for a specific video."""
|
| 131 |
+
try:
|
| 132 |
+
video = get_video_by_id(video_id)
|
| 133 |
+
if not video:
|
| 134 |
+
# Return a basic video object if not found in database
|
| 135 |
+
return Video(video_id=video_id, title=f"Video {video_id}")
|
| 136 |
+
return video
|
| 137 |
+
except Exception as e:
|
| 138 |
+
import logging
|
| 139 |
+
|
| 140 |
+
logging.error(f"Error getting video info for {video_id}: {str(e)}")
|
| 141 |
+
raise HTTPException(
|
| 142 |
+
status_code=500, detail=f"Could not retrieve video info: {str(e)}"
|
| 143 |
+
)
|
app/main.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, Request
|
| 2 |
+
from fastapi.staticfiles import StaticFiles
|
| 3 |
+
from fastapi.templating import Jinja2Templates
|
| 4 |
+
from fastapi.responses import HTMLResponse, RedirectResponse
|
| 5 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 6 |
+
from app.api import router as api_router
|
| 7 |
+
from app.services.video_service import get_video_by_id
|
| 8 |
+
|
| 9 |
+
app = FastAPI(title="In-Video Search", docs_url=None, redoc_url=None, openapi_url=None)
|
| 10 |
+
|
| 11 |
+
# Enable CORS
|
| 12 |
+
app.add_middleware(
|
| 13 |
+
CORSMiddleware,
|
| 14 |
+
allow_origins=["*"], # Adjust this in production
|
| 15 |
+
allow_credentials=True,
|
| 16 |
+
allow_methods=["*"],
|
| 17 |
+
allow_headers=["*"],
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
# Mount static files
|
| 21 |
+
app.mount("/static", StaticFiles(directory="app/static"), name="static")
|
| 22 |
+
|
| 23 |
+
# Templates
|
| 24 |
+
templates = Jinja2Templates(directory="app/templates")
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@app.get("/", response_class=HTMLResponse)
|
| 28 |
+
async def index(request: Request):
|
| 29 |
+
return templates.TemplateResponse(
|
| 30 |
+
"index.html", {"request": request, "title": "In-Video Search"}
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
@app.get("/video/{video_id}", response_class=HTMLResponse)
|
| 35 |
+
async def video_page(request: Request, video_id: str):
|
| 36 |
+
# Try to get video info from database
|
| 37 |
+
video = get_video_by_id(video_id)
|
| 38 |
+
title = "Video Player"
|
| 39 |
+
|
| 40 |
+
# If video exists and has a title, use it
|
| 41 |
+
if video and video.title:
|
| 42 |
+
title = video.title
|
| 43 |
+
|
| 44 |
+
return templates.TemplateResponse(
|
| 45 |
+
"video.html",
|
| 46 |
+
{"request": request, "title": title, "video_id": video_id},
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
@app.get("/watch")
|
| 51 |
+
async def watch_redirect(request: Request, v: str):
|
| 52 |
+
# Redirect YouTube-style URLs to our video page
|
| 53 |
+
return RedirectResponse(url=f"/video/{v}")
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# Include API routers
|
| 57 |
+
app.include_router(api_router.router, prefix="/api")
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
if __name__ == "__main__":
|
| 61 |
+
import uvicorn
|
| 62 |
+
|
| 63 |
+
uvicorn.run("app.main:app", host="0.0.0.0", port=8000, reload=True)
|
app/models/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Initialize models package
|
app/models/video.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
+
from typing import Optional
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class VideoSegment(BaseModel):
|
| 6 |
+
"""Model for a video segment with transcript."""
|
| 7 |
+
|
| 8 |
+
text: str = Field(..., description="Transcript text of the segment")
|
| 9 |
+
start: float = Field(..., description="Start time in seconds")
|
| 10 |
+
end: float = Field(..., description="End time in seconds")
|
| 11 |
+
segment_id: str = Field(..., description="Unique identifier for the segment")
|
| 12 |
+
video_id: str = Field(..., description="YouTube video ID this segment belongs to")
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class Video(BaseModel):
|
| 16 |
+
"""Model for a YouTube video with metadata."""
|
| 17 |
+
|
| 18 |
+
video_id: str = Field(..., description="YouTube video ID")
|
| 19 |
+
title: Optional[str] = Field(None, description="Video title")
|
| 20 |
+
description: Optional[str] = Field(None, description="Video description")
|
| 21 |
+
channel: Optional[str] = Field(None, description="Channel name")
|
| 22 |
+
processed: bool = Field(False, description="Whether the video has been processed")
|
| 23 |
+
created_at: Optional[int] = Field(
|
| 24 |
+
None, description="Unix timestamp (seconds since epoch) when the video was processed"
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class SearchResult(BaseModel):
|
| 29 |
+
"""Model for a video segment search result."""
|
| 30 |
+
|
| 31 |
+
score: float = Field(..., description="Similarity score")
|
| 32 |
+
segment: VideoSegment = Field(..., description="The matching video segment")
|
app/services/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Initialize services package
|
app/services/qdrant_service.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from qdrant_client import QdrantClient
|
| 3 |
+
import logging
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def get_qdrant_client() -> QdrantClient:
|
| 7 |
+
"""
|
| 8 |
+
Initialize a Qdrant client using environment variables or default to localhost.
|
| 9 |
+
|
| 10 |
+
Environment variables:
|
| 11 |
+
- QDRANT_URL: URL for Qdrant server (default: http://localhost:6333)
|
| 12 |
+
- QDRANT_API_KEY: Optional API key for authentication
|
| 13 |
+
|
| 14 |
+
Returns:
|
| 15 |
+
QdrantClient: Configured Qdrant client
|
| 16 |
+
"""
|
| 17 |
+
# Get configuration from environment variables with defaults
|
| 18 |
+
url = os.getenv("QDRANT_URL", "http://localhost:6333")
|
| 19 |
+
api_key = os.getenv("QDRANT_API_KEY")
|
| 20 |
+
|
| 21 |
+
# Configure client with or without API key
|
| 22 |
+
if api_key:
|
| 23 |
+
client = QdrantClient(location=url, api_key=api_key)
|
| 24 |
+
logging.info(f"Connecting to Qdrant at {url} with API key")
|
| 25 |
+
else:
|
| 26 |
+
client = QdrantClient(location=url)
|
| 27 |
+
logging.info(f"Connecting to Qdrant at {url}")
|
| 28 |
+
|
| 29 |
+
# Test connection
|
| 30 |
+
try:
|
| 31 |
+
client.get_collections()
|
| 32 |
+
logging.info(f"Successfully connected to Qdrant at {url}")
|
| 33 |
+
except Exception as e:
|
| 34 |
+
logging.error(f"Failed to connect to Qdrant at {url}: {e}")
|
| 35 |
+
# Connection will be tested again when used
|
| 36 |
+
|
| 37 |
+
return client
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
# Initialize global client instance
|
| 41 |
+
qdrant_client = get_qdrant_client()
|
app/services/video_service.py
ADDED
|
@@ -0,0 +1,647 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import uuid
|
| 2 |
+
from typing import List, Dict, Any, Optional
|
| 3 |
+
import re
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
from sentence_transformers import SentenceTransformer
|
| 6 |
+
from qdrant_client.http import models
|
| 7 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
| 8 |
+
import yt_dlp
|
| 9 |
+
from app.models.video import VideoSegment, Video, SearchResult
|
| 10 |
+
from app.services.qdrant_service import qdrant_client
|
| 11 |
+
|
| 12 |
+
# Initialize the sentence transformer model
|
| 13 |
+
model = SentenceTransformer("sentence-transformers/static-retrieval-mrl-en-v1")
|
| 14 |
+
|
| 15 |
+
# Collection names
|
| 16 |
+
COLLECTION_NAME = "video_segments"
|
| 17 |
+
PROCESSED_VIDEOS_COLLECTION = "processed_videos"
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def _fetch_youtube_metadata(video_id: str, video: Optional[Video] = None) -> Video:
|
| 21 |
+
"""Helper function to fetch video metadata from YouTube using yt-dlp."""
|
| 22 |
+
import logging
|
| 23 |
+
|
| 24 |
+
if not video:
|
| 25 |
+
video = Video(video_id=video_id)
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
logging.info(f"Fetching metadata for video {video_id} from YouTube")
|
| 29 |
+
|
| 30 |
+
# Configure yt-dlp options
|
| 31 |
+
ydl_opts = {
|
| 32 |
+
"skip_download": True, # Don't download the video
|
| 33 |
+
"quiet": True, # Don't print progress
|
| 34 |
+
"no_warnings": True, # Don't print warnings
|
| 35 |
+
"extract_flat": True, # Don't extract videos in playlists
|
| 36 |
+
"format": "best", # Best quality (doesn't matter since we're not downloading)
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
# Use yt-dlp to extract video info
|
| 40 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 41 |
+
info = ydl.extract_info(
|
| 42 |
+
f"https://www.youtube.com/watch?v={video_id}", download=False
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
# Set video properties if available
|
| 46 |
+
if info.get("title"):
|
| 47 |
+
video.title = info.get("title")
|
| 48 |
+
|
| 49 |
+
if info.get("description"):
|
| 50 |
+
video.description = info.get("description")
|
| 51 |
+
|
| 52 |
+
if info.get("uploader"):
|
| 53 |
+
video.channel = info.get("uploader")
|
| 54 |
+
|
| 55 |
+
logging.info(
|
| 56 |
+
f"Successfully retrieved video metadata: title='{video.title}', channel='{video.channel}'"
|
| 57 |
+
)
|
| 58 |
+
except Exception as meta_error:
|
| 59 |
+
logging.warning(f"Could not fetch metadata from YouTube: {str(meta_error)}")
|
| 60 |
+
if not video.title:
|
| 61 |
+
video.title = f"Video {video_id}"
|
| 62 |
+
|
| 63 |
+
return video
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# Ensure collections exist
|
| 67 |
+
def ensure_collection_exists():
|
| 68 |
+
"""Ensure the required collections exist in Qdrant."""
|
| 69 |
+
import logging
|
| 70 |
+
|
| 71 |
+
try:
|
| 72 |
+
logging.info("Checking Qdrant collections")
|
| 73 |
+
collections = qdrant_client.get_collections().collections
|
| 74 |
+
collection_names = [collection.name for collection in collections]
|
| 75 |
+
logging.info(f"Existing collections: {collection_names}")
|
| 76 |
+
|
| 77 |
+
# Create video segments collection if it doesn't exist
|
| 78 |
+
if COLLECTION_NAME not in collection_names:
|
| 79 |
+
logging.info(f"Creating collection: {COLLECTION_NAME}")
|
| 80 |
+
vector_size = model.get_sentence_embedding_dimension()
|
| 81 |
+
qdrant_client.create_collection(
|
| 82 |
+
collection_name=COLLECTION_NAME,
|
| 83 |
+
vectors_config=models.VectorParams(
|
| 84 |
+
size=vector_size,
|
| 85 |
+
distance=models.Distance.COSINE,
|
| 86 |
+
),
|
| 87 |
+
)
|
| 88 |
+
logging.info(
|
| 89 |
+
f"Successfully created {COLLECTION_NAME} collection with vector size {vector_size}"
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
# Create processed videos collection if it doesn't exist
|
| 93 |
+
if PROCESSED_VIDEOS_COLLECTION not in collection_names:
|
| 94 |
+
logging.info(f"Creating collection: {PROCESSED_VIDEOS_COLLECTION}")
|
| 95 |
+
vector_size = model.get_sentence_embedding_dimension()
|
| 96 |
+
qdrant_client.create_collection(
|
| 97 |
+
collection_name=PROCESSED_VIDEOS_COLLECTION,
|
| 98 |
+
vectors_config=models.VectorParams(
|
| 99 |
+
size=vector_size,
|
| 100 |
+
distance=models.Distance.COSINE,
|
| 101 |
+
),
|
| 102 |
+
)
|
| 103 |
+
qdrant_client.create_payload_index(
|
| 104 |
+
collection_name=PROCESSED_VIDEOS_COLLECTION,
|
| 105 |
+
field_name="video_id",
|
| 106 |
+
field_schema=models.PayloadSchemaType.KEYWORD,
|
| 107 |
+
)
|
| 108 |
+
qdrant_client.create_payload_index(
|
| 109 |
+
collection_name=PROCESSED_VIDEOS_COLLECTION,
|
| 110 |
+
field_name="created_at",
|
| 111 |
+
field_schema=models.IntegerIndexParams(
|
| 112 |
+
type=models.IntegerIndexType.INTEGER,
|
| 113 |
+
range=True,
|
| 114 |
+
),
|
| 115 |
+
)
|
| 116 |
+
logging.info(
|
| 117 |
+
f"Successfully created {PROCESSED_VIDEOS_COLLECTION} collection with vector size {vector_size}"
|
| 118 |
+
)
|
| 119 |
+
except Exception as e:
|
| 120 |
+
import traceback
|
| 121 |
+
|
| 122 |
+
logging.error(f"Error ensuring collections exist: {str(e)}")
|
| 123 |
+
logging.error(traceback.format_exc())
|
| 124 |
+
raise
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def get_embeddings(text: str) -> List[float]:
|
| 128 |
+
"""Get embeddings for the given text using SentenceTransformer."""
|
| 129 |
+
return model.encode(text).tolist()
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def extract_video_id(youtube_url: str) -> str:
|
| 133 |
+
"""Extract YouTube video ID from URL."""
|
| 134 |
+
import logging
|
| 135 |
+
|
| 136 |
+
logging.info(f"Extracting video ID from URL: {youtube_url}")
|
| 137 |
+
|
| 138 |
+
# Match patterns like: https://www.youtube.com/watch?v=VIDEO_ID or https://youtu.be/VIDEO_ID
|
| 139 |
+
patterns = [
|
| 140 |
+
r"(?:youtube\.com/watch\?v=|youtu\.be/)([\w-]+)",
|
| 141 |
+
r"(?:youtube\.com/embed/)([\w-]+)",
|
| 142 |
+
r"(?:youtube\.com/v/)([\w-]+)",
|
| 143 |
+
]
|
| 144 |
+
|
| 145 |
+
for pattern in patterns:
|
| 146 |
+
match = re.search(pattern, youtube_url)
|
| 147 |
+
if match:
|
| 148 |
+
video_id = match.group(1)
|
| 149 |
+
logging.info(f"Extracted video ID: {video_id}")
|
| 150 |
+
return video_id
|
| 151 |
+
|
| 152 |
+
# If no pattern matches, assume the input might be a direct video ID
|
| 153 |
+
if re.match(r"^[\w-]+$", youtube_url):
|
| 154 |
+
logging.info(f"Using direct video ID: {youtube_url}")
|
| 155 |
+
return youtube_url
|
| 156 |
+
|
| 157 |
+
logging.error(f"Failed to extract video ID from URL: {youtube_url}")
|
| 158 |
+
raise ValueError(f"Could not extract video ID from URL: {youtube_url}")
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def get_video_transcript(video_id: str) -> List[Dict[str, Any]]:
|
| 162 |
+
"""
|
| 163 |
+
Get transcript for a YouTube video in any available language.
|
| 164 |
+
Will try to get transcripts in this priority:
|
| 165 |
+
1. English transcript (if available)
|
| 166 |
+
2. Any available transcript translated to English (if translatable)
|
| 167 |
+
3. Any available transcript in its original language
|
| 168 |
+
"""
|
| 169 |
+
import logging
|
| 170 |
+
import traceback
|
| 171 |
+
|
| 172 |
+
try:
|
| 173 |
+
# Try to get available transcript languages
|
| 174 |
+
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
| 175 |
+
|
| 176 |
+
# First, look for English transcript
|
| 177 |
+
english_transcript = None
|
| 178 |
+
other_transcripts = []
|
| 179 |
+
|
| 180 |
+
# Categorize available transcripts
|
| 181 |
+
for transcript_item in transcript_list:
|
| 182 |
+
if transcript_item.language_code == "en":
|
| 183 |
+
english_transcript = transcript_item
|
| 184 |
+
else:
|
| 185 |
+
other_transcripts.append(transcript_item)
|
| 186 |
+
|
| 187 |
+
# 1. Try English first if available
|
| 188 |
+
if english_transcript:
|
| 189 |
+
try:
|
| 190 |
+
logging.info("Found English transcript, using it directly")
|
| 191 |
+
return english_transcript.fetch()
|
| 192 |
+
except Exception as e:
|
| 193 |
+
logging.warning(f"Failed to fetch English transcript: {str(e)}")
|
| 194 |
+
|
| 195 |
+
# 2. Try translatable transcripts
|
| 196 |
+
translatable_transcripts = [t for t in other_transcripts if t.is_translatable]
|
| 197 |
+
for transcript_item in translatable_transcripts:
|
| 198 |
+
try:
|
| 199 |
+
logging.info(
|
| 200 |
+
f"Trying to translate {transcript_item.language_code} transcript to English"
|
| 201 |
+
)
|
| 202 |
+
translated = transcript_item.translate("en").fetch()
|
| 203 |
+
logging.info(
|
| 204 |
+
f"Successfully translated {transcript_item.language_code} transcript to English"
|
| 205 |
+
)
|
| 206 |
+
return translated
|
| 207 |
+
except Exception as e:
|
| 208 |
+
logging.warning(
|
| 209 |
+
f"Failed to translate {transcript_item.language_code} transcript: {str(e)}"
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
# 3. Try any transcript in original language
|
| 213 |
+
for transcript_item in other_transcripts:
|
| 214 |
+
try:
|
| 215 |
+
logging.info(
|
| 216 |
+
f"Using non-translated {transcript_item.language_code} transcript"
|
| 217 |
+
)
|
| 218 |
+
return transcript_item.fetch()
|
| 219 |
+
except Exception as e:
|
| 220 |
+
logging.warning(
|
| 221 |
+
f"Failed to fetch {transcript_item.language_code} transcript: {str(e)}"
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
# If we get here, no transcripts worked
|
| 225 |
+
available_langs = [t.language_code for t in transcript_list]
|
| 226 |
+
raise ValueError(
|
| 227 |
+
f"No usable transcripts found for video {video_id}. Available languages: {available_langs}"
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
except Exception as e:
|
| 231 |
+
logging.error(f"Transcript API error for video {video_id}: {str(e)}")
|
| 232 |
+
logging.error(traceback.format_exc())
|
| 233 |
+
raise ValueError(f"Could not get transcript for video {video_id}: {str(e)}")
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
def store_processed_video(video: Video) -> bool:
|
| 237 |
+
"""Store a processed video in Qdrant."""
|
| 238 |
+
try:
|
| 239 |
+
# Get a simple embedding for the video ID
|
| 240 |
+
vector = get_embeddings(f"video_{video.video_id}")
|
| 241 |
+
|
| 242 |
+
# Prepare payload
|
| 243 |
+
payload = video.model_dump()
|
| 244 |
+
|
| 245 |
+
# Store in Qdrant
|
| 246 |
+
qdrant_client.upsert(
|
| 247 |
+
collection_name=PROCESSED_VIDEOS_COLLECTION,
|
| 248 |
+
points=[
|
| 249 |
+
models.PointStruct(
|
| 250 |
+
id=uuid.uuid4().hex,
|
| 251 |
+
vector=vector,
|
| 252 |
+
payload=payload,
|
| 253 |
+
),
|
| 254 |
+
],
|
| 255 |
+
)
|
| 256 |
+
return True
|
| 257 |
+
except Exception as e:
|
| 258 |
+
print(f"Error storing processed video: {e}")
|
| 259 |
+
return False
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
def get_processed_videos(limit: int = 10) -> List[Video]:
|
| 263 |
+
"""Get recently processed videos ordered by creation time."""
|
| 264 |
+
try:
|
| 265 |
+
# Scroll through the processed videos collection
|
| 266 |
+
scroll_result = qdrant_client.scroll(
|
| 267 |
+
collection_name=PROCESSED_VIDEOS_COLLECTION,
|
| 268 |
+
limit=limit,
|
| 269 |
+
with_payload=True,
|
| 270 |
+
order_by=models.OrderBy(key="created_at", direction=models.Direction.DESC),
|
| 271 |
+
)
|
| 272 |
+
|
| 273 |
+
# Extract videos from the result
|
| 274 |
+
videos = []
|
| 275 |
+
for point in scroll_result[0]:
|
| 276 |
+
# Convert payload to Video
|
| 277 |
+
video = Video(**point.payload)
|
| 278 |
+
videos.append(video)
|
| 279 |
+
|
| 280 |
+
# Sort by created_at timestamp (most recent first)
|
| 281 |
+
videos.sort(key=lambda x: x.created_at or "", reverse=True)
|
| 282 |
+
|
| 283 |
+
return videos[:limit]
|
| 284 |
+
except Exception as e:
|
| 285 |
+
print(f"Error getting processed videos: {e}")
|
| 286 |
+
return []
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
def process_video(youtube_url: str) -> Video:
|
| 290 |
+
"""Process a YouTube video to extract and store transcript segments."""
|
| 291 |
+
import logging
|
| 292 |
+
import traceback
|
| 293 |
+
|
| 294 |
+
logging.info(f"Processing video URL: {youtube_url}")
|
| 295 |
+
transcript = None
|
| 296 |
+
video_id = None
|
| 297 |
+
|
| 298 |
+
# Extract video ID and get transcript
|
| 299 |
+
try:
|
| 300 |
+
# Extract video ID
|
| 301 |
+
video_id = extract_video_id(youtube_url)
|
| 302 |
+
logging.info(f"Successfully extracted video ID: {video_id}")
|
| 303 |
+
|
| 304 |
+
# Check if video has already been processed
|
| 305 |
+
existing_video = get_video_by_id(video_id)
|
| 306 |
+
if existing_video and existing_video.processed:
|
| 307 |
+
logging.info(
|
| 308 |
+
f"Video {video_id} has already been processed. Skipping processing."
|
| 309 |
+
)
|
| 310 |
+
return existing_video
|
| 311 |
+
|
| 312 |
+
# Create basic video object with current timestamp
|
| 313 |
+
current_time = int(datetime.utcnow().timestamp())
|
| 314 |
+
video = Video(video_id=video_id, created_at=current_time)
|
| 315 |
+
|
| 316 |
+
# Get video metadata from YouTube using the helper function
|
| 317 |
+
try:
|
| 318 |
+
video = _fetch_youtube_metadata(video_id, video)
|
| 319 |
+
except Exception as meta_error:
|
| 320 |
+
logging.warning(
|
| 321 |
+
f"Error fetching YouTube metadata during processing: {str(meta_error)}"
|
| 322 |
+
)
|
| 323 |
+
# Continue with processing even if metadata fetch fails
|
| 324 |
+
|
| 325 |
+
# Get transcript
|
| 326 |
+
logging.info(f"Fetching transcript for video ID: {video_id}")
|
| 327 |
+
transcript = get_video_transcript(video_id)
|
| 328 |
+
logging.info(
|
| 329 |
+
f"Successfully retrieved transcript with {len(transcript)} entries"
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
+
# If we couldn't get metadata and have a transcript, try to extract a title from transcript
|
| 333 |
+
if (
|
| 334 |
+
(not video.title or video.title == f"Video {video_id}")
|
| 335 |
+
and transcript
|
| 336 |
+
and len(transcript) > 0
|
| 337 |
+
):
|
| 338 |
+
# Handle different transcript formats
|
| 339 |
+
try:
|
| 340 |
+
# Check if transcript is a list of dictionaries (original format)
|
| 341 |
+
if isinstance(transcript[0], dict) and "text" in transcript[0]:
|
| 342 |
+
video.title = f"{transcript[0]['text'][:30]}..."
|
| 343 |
+
# Check if transcript is a list of objects with text attribute
|
| 344 |
+
elif hasattr(transcript[0], "text"):
|
| 345 |
+
video.title = f"{transcript[0].text[:30]}..."
|
| 346 |
+
# If it's another format, just use the string representation of first item
|
| 347 |
+
else:
|
| 348 |
+
first_item_str = str(transcript[0])[:30]
|
| 349 |
+
video.title = f"{first_item_str}..."
|
| 350 |
+
logging.info(f"Set video title from transcript: {video.title}")
|
| 351 |
+
except Exception as title_error:
|
| 352 |
+
logging.warning(
|
| 353 |
+
f"Could not set title from transcript: {str(title_error)}"
|
| 354 |
+
)
|
| 355 |
+
except Exception as e:
|
| 356 |
+
logging.error(f"Error in initial video processing: {str(e)}")
|
| 357 |
+
logging.error(traceback.format_exc())
|
| 358 |
+
raise
|
| 359 |
+
|
| 360 |
+
# Process transcript into segments
|
| 361 |
+
try:
|
| 362 |
+
# Process transcript into overlapping 30-second segments with 10-second overlap
|
| 363 |
+
logging.info(f"Processing {len(transcript)} transcript entries into segments")
|
| 364 |
+
segments = []
|
| 365 |
+
|
| 366 |
+
# First, normalize the transcript to a standard format
|
| 367 |
+
normalized_transcript = []
|
| 368 |
+
for item in transcript:
|
| 369 |
+
if (
|
| 370 |
+
isinstance(item, dict)
|
| 371 |
+
and "text" in item
|
| 372 |
+
and "start" in item
|
| 373 |
+
and "duration" in item
|
| 374 |
+
):
|
| 375 |
+
# Original dictionary format
|
| 376 |
+
normalized_transcript.append(
|
| 377 |
+
{
|
| 378 |
+
"text": item["text"],
|
| 379 |
+
"start": item["start"],
|
| 380 |
+
"duration": item["duration"],
|
| 381 |
+
}
|
| 382 |
+
)
|
| 383 |
+
elif (
|
| 384 |
+
hasattr(item, "text")
|
| 385 |
+
and hasattr(item, "start")
|
| 386 |
+
and hasattr(item, "duration")
|
| 387 |
+
):
|
| 388 |
+
# Object with attributes
|
| 389 |
+
normalized_transcript.append(
|
| 390 |
+
{"text": item.text, "start": item.start, "duration": item.duration}
|
| 391 |
+
)
|
| 392 |
+
else:
|
| 393 |
+
# Unknown format, try to extract what we can
|
| 394 |
+
logging.warning(
|
| 395 |
+
f"Encountered unknown transcript item format: {type(item)}"
|
| 396 |
+
)
|
| 397 |
+
try:
|
| 398 |
+
# Convert to string if we can't determine the structure
|
| 399 |
+
text = str(item)
|
| 400 |
+
# Use index as a timestamp approximation
|
| 401 |
+
idx = transcript.index(item)
|
| 402 |
+
normalized_transcript.append(
|
| 403 |
+
{
|
| 404 |
+
"text": text,
|
| 405 |
+
"start": float(idx * 5), # Approximate 5 seconds per item
|
| 406 |
+
"duration": 5.0,
|
| 407 |
+
}
|
| 408 |
+
)
|
| 409 |
+
except Exception as e:
|
| 410 |
+
logging.error(f"Failed to normalize transcript item: {str(e)}")
|
| 411 |
+
continue
|
| 412 |
+
|
| 413 |
+
# Use the normalized transcript for segment processing
|
| 414 |
+
for i in range(len(normalized_transcript)):
|
| 415 |
+
# Find segments that form approximately 30 seconds
|
| 416 |
+
segment_text = []
|
| 417 |
+
start_time = normalized_transcript[i]["start"]
|
| 418 |
+
end_time = start_time
|
| 419 |
+
current_index = i
|
| 420 |
+
|
| 421 |
+
while (
|
| 422 |
+
current_index < len(normalized_transcript)
|
| 423 |
+
and end_time - start_time < 30
|
| 424 |
+
):
|
| 425 |
+
segment_text.append(normalized_transcript[current_index]["text"])
|
| 426 |
+
end_time = (
|
| 427 |
+
normalized_transcript[current_index]["start"]
|
| 428 |
+
+ normalized_transcript[current_index]["duration"]
|
| 429 |
+
)
|
| 430 |
+
current_index += 1
|
| 431 |
+
|
| 432 |
+
if segment_text: # Only create segment if we have text
|
| 433 |
+
segment_id = f"{video_id}_{i}"
|
| 434 |
+
text = " ".join(segment_text)
|
| 435 |
+
|
| 436 |
+
# Create VideoSegment
|
| 437 |
+
segment = VideoSegment(
|
| 438 |
+
text=text,
|
| 439 |
+
start=start_time,
|
| 440 |
+
end=end_time,
|
| 441 |
+
segment_id=segment_id,
|
| 442 |
+
video_id=video_id,
|
| 443 |
+
)
|
| 444 |
+
|
| 445 |
+
segments.append(segment)
|
| 446 |
+
|
| 447 |
+
# Skip forward with 10-second overlap (if we're not at the end)
|
| 448 |
+
if (
|
| 449 |
+
i + 1 < len(normalized_transcript)
|
| 450 |
+
and normalized_transcript[i + 1]["start"] < end_time - 10
|
| 451 |
+
):
|
| 452 |
+
# Find the next segment that starts at least 20 seconds after our current start
|
| 453 |
+
while (
|
| 454 |
+
i + 1 < len(normalized_transcript)
|
| 455 |
+
and normalized_transcript[i + 1]["start"] < start_time + 20
|
| 456 |
+
):
|
| 457 |
+
i += 1
|
| 458 |
+
|
| 459 |
+
logging.info(f"Created {len(segments)} segments from transcript")
|
| 460 |
+
|
| 461 |
+
# Store segments in Qdrant
|
| 462 |
+
logging.info("Ensuring Qdrant collections exist")
|
| 463 |
+
ensure_collection_exists()
|
| 464 |
+
|
| 465 |
+
# Store each segment
|
| 466 |
+
logging.info(f"Storing {len(segments)} segments in Qdrant")
|
| 467 |
+
for segment in segments:
|
| 468 |
+
store_segment(segment)
|
| 469 |
+
except Exception as e:
|
| 470 |
+
logging.error(f"Error processing transcript segments: {str(e)}")
|
| 471 |
+
logging.error(traceback.format_exc())
|
| 472 |
+
raise
|
| 473 |
+
|
| 474 |
+
# Mark video as processed and store it
|
| 475 |
+
try:
|
| 476 |
+
logging.info(f"Marking video {video_id} as processed")
|
| 477 |
+
video.processed = True
|
| 478 |
+
|
| 479 |
+
# Store the processed video in Qdrant
|
| 480 |
+
logging.info("Storing processed video in Qdrant")
|
| 481 |
+
store_result = store_processed_video(video)
|
| 482 |
+
if store_result:
|
| 483 |
+
logging.info(f"Successfully stored processed video: {video_id}")
|
| 484 |
+
else:
|
| 485 |
+
logging.warning(f"Failed to store processed video in Qdrant: {video_id}")
|
| 486 |
+
|
| 487 |
+
return video
|
| 488 |
+
except Exception as e:
|
| 489 |
+
logging.error(f"Error storing processed video: {str(e)}")
|
| 490 |
+
logging.error(traceback.format_exc())
|
| 491 |
+
raise
|
| 492 |
+
|
| 493 |
+
|
| 494 |
+
def store_segment(segment: VideoSegment) -> bool:
|
| 495 |
+
"""Store a video segment in Qdrant."""
|
| 496 |
+
import logging
|
| 497 |
+
|
| 498 |
+
try:
|
| 499 |
+
# Get embeddings
|
| 500 |
+
logging.debug(f"Getting embeddings for segment {segment.segment_id}")
|
| 501 |
+
vector = get_embeddings(segment.text)
|
| 502 |
+
|
| 503 |
+
# Prepare payload
|
| 504 |
+
payload = segment.model_dump()
|
| 505 |
+
|
| 506 |
+
# Store in Qdrant
|
| 507 |
+
point_id = uuid.uuid4().hex
|
| 508 |
+
logging.debug(
|
| 509 |
+
f"Storing segment {segment.segment_id} in Qdrant with point ID {point_id}"
|
| 510 |
+
)
|
| 511 |
+
qdrant_client.upsert(
|
| 512 |
+
collection_name=COLLECTION_NAME,
|
| 513 |
+
points=[
|
| 514 |
+
models.PointStruct(
|
| 515 |
+
id=point_id,
|
| 516 |
+
vector=vector,
|
| 517 |
+
payload=payload,
|
| 518 |
+
),
|
| 519 |
+
],
|
| 520 |
+
)
|
| 521 |
+
return True
|
| 522 |
+
except Exception as e:
|
| 523 |
+
import traceback
|
| 524 |
+
|
| 525 |
+
logging.error(f"Error storing segment {segment.segment_id}: {str(e)}")
|
| 526 |
+
logging.error(traceback.format_exc())
|
| 527 |
+
return False
|
| 528 |
+
|
| 529 |
+
|
| 530 |
+
def search_video_segments(
|
| 531 |
+
query: str, video_id: Optional[str] = None, limit: int = 5
|
| 532 |
+
) -> List[SearchResult]:
|
| 533 |
+
"""Search for video segments based on the provided query."""
|
| 534 |
+
# Get query embeddings
|
| 535 |
+
query_vector = get_embeddings(query)
|
| 536 |
+
|
| 537 |
+
# Prepare filter if video_id is provided
|
| 538 |
+
filter_param = None
|
| 539 |
+
if video_id:
|
| 540 |
+
filter_param = models.Filter(
|
| 541 |
+
must=[
|
| 542 |
+
models.FieldCondition(
|
| 543 |
+
key="video_id",
|
| 544 |
+
match=models.MatchValue(value=video_id),
|
| 545 |
+
),
|
| 546 |
+
],
|
| 547 |
+
)
|
| 548 |
+
|
| 549 |
+
# Search in Qdrant
|
| 550 |
+
search_result = qdrant_client.search(
|
| 551 |
+
collection_name=COLLECTION_NAME,
|
| 552 |
+
query_vector=query_vector,
|
| 553 |
+
limit=limit,
|
| 554 |
+
query_filter=filter_param,
|
| 555 |
+
)
|
| 556 |
+
|
| 557 |
+
# Format results
|
| 558 |
+
results = []
|
| 559 |
+
for scored_point in search_result:
|
| 560 |
+
# Convert payload to VideoSegment
|
| 561 |
+
segment = VideoSegment(**scored_point.payload)
|
| 562 |
+
|
| 563 |
+
# Create SearchResult
|
| 564 |
+
result = SearchResult(
|
| 565 |
+
score=scored_point.score,
|
| 566 |
+
segment=segment,
|
| 567 |
+
)
|
| 568 |
+
results.append(result)
|
| 569 |
+
|
| 570 |
+
return results
|
| 571 |
+
|
| 572 |
+
|
| 573 |
+
def get_all_segments(video_id: str) -> List[VideoSegment]:
|
| 574 |
+
"""Get all segments for a specific video, ordered by start time."""
|
| 575 |
+
# Prepare filter for the video_id
|
| 576 |
+
filter_param = models.Filter(
|
| 577 |
+
must=[
|
| 578 |
+
models.FieldCondition(
|
| 579 |
+
key="video_id",
|
| 580 |
+
match=models.MatchValue(value=video_id),
|
| 581 |
+
),
|
| 582 |
+
],
|
| 583 |
+
)
|
| 584 |
+
|
| 585 |
+
# Search in Qdrant without vector, just to get all segments
|
| 586 |
+
scroll_result = qdrant_client.scroll(
|
| 587 |
+
collection_name=COLLECTION_NAME,
|
| 588 |
+
scroll_filter=filter_param,
|
| 589 |
+
limit=10000, # Adjust based on expected maximum segments
|
| 590 |
+
)
|
| 591 |
+
|
| 592 |
+
# Format results
|
| 593 |
+
segments = []
|
| 594 |
+
for point in scroll_result[0]:
|
| 595 |
+
# Convert payload to VideoSegment
|
| 596 |
+
segment = VideoSegment(**point.payload)
|
| 597 |
+
segments.append(segment)
|
| 598 |
+
|
| 599 |
+
# Sort by start time
|
| 600 |
+
segments.sort(key=lambda x: x.start)
|
| 601 |
+
|
| 602 |
+
return segments
|
| 603 |
+
|
| 604 |
+
|
| 605 |
+
def get_video_by_id(video_id: str) -> Optional[Video]:
|
| 606 |
+
"""Get a specific video by its video_id. If not found in database, attempt to fetch from YouTube."""
|
| 607 |
+
import logging
|
| 608 |
+
|
| 609 |
+
try:
|
| 610 |
+
# Create filter for the video_id
|
| 611 |
+
filter_param = models.Filter(
|
| 612 |
+
must=[
|
| 613 |
+
models.FieldCondition(
|
| 614 |
+
key="video_id",
|
| 615 |
+
match=models.MatchValue(value=video_id),
|
| 616 |
+
),
|
| 617 |
+
],
|
| 618 |
+
)
|
| 619 |
+
|
| 620 |
+
# Search in the processed_videos collection
|
| 621 |
+
scroll_result = qdrant_client.scroll(
|
| 622 |
+
collection_name=PROCESSED_VIDEOS_COLLECTION,
|
| 623 |
+
scroll_filter=filter_param,
|
| 624 |
+
limit=1, # We only need one result
|
| 625 |
+
with_payload=True,
|
| 626 |
+
)
|
| 627 |
+
|
| 628 |
+
# Check if any results were found
|
| 629 |
+
if scroll_result[0]:
|
| 630 |
+
# Convert payload to Video
|
| 631 |
+
video = Video(**scroll_result[0][0].payload)
|
| 632 |
+
|
| 633 |
+
# If video exists but doesn't have title, try to fetch it from YouTube
|
| 634 |
+
if not video.title or video.title == f"Video {video_id}":
|
| 635 |
+
video = _fetch_youtube_metadata(video_id, video)
|
| 636 |
+
|
| 637 |
+
return video
|
| 638 |
+
|
| 639 |
+
# If video not found in database, fetch basic metadata from YouTube
|
| 640 |
+
logging.info(f"Video {video_id} not found in database, fetching from YouTube")
|
| 641 |
+
video = Video(video_id=video_id)
|
| 642 |
+
return _fetch_youtube_metadata(video_id, video)
|
| 643 |
+
|
| 644 |
+
except Exception as e:
|
| 645 |
+
logging.error(f"Error getting video by ID {video_id}: {str(e)}")
|
| 646 |
+
# Return a basic video object with just the ID
|
| 647 |
+
return Video(video_id=video_id, title=f"Video {video_id}")
|
app/static/css/style.css
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* Custom styles */
|
| 2 |
+
|
| 3 |
+
/* Video Carousel */
|
| 4 |
+
.carousel-item {
|
| 5 |
+
scroll-snap-align: center;
|
| 6 |
+
}
|
| 7 |
+
|
| 8 |
+
/* Make carousel items responsive but maintain minimum width */
|
| 9 |
+
@media (max-width: 640px) {
|
| 10 |
+
.carousel-item {
|
| 11 |
+
min-width: 200px;
|
| 12 |
+
}
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
@media (min-width: 640px) {
|
| 16 |
+
.carousel-item {
|
| 17 |
+
min-width: 250px;
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
/* Carousel container - don't let arrows overlap content */
|
| 22 |
+
.carousel {
|
| 23 |
+
overflow-x: hidden;
|
| 24 |
+
scrollbar-width: none; /* Hide scrollbar for Firefox */
|
| 25 |
+
-ms-overflow-style: none; /* Hide scrollbar for IE/Edge */
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
.carousel::-webkit-scrollbar {
|
| 29 |
+
display: none; /* Hide scrollbar for Chrome/Safari/Opera */
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
/* Navigation arrow styles */
|
| 33 |
+
.btn-circle.btn-disabled {
|
| 34 |
+
opacity: 0.5;
|
| 35 |
+
cursor: not-allowed;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
/* Video card styling - ensure proper structure */
|
| 39 |
+
.carousel-item .card {
|
| 40 |
+
display: flex;
|
| 41 |
+
flex-direction: column;
|
| 42 |
+
height: 100%;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
.carousel-item .card figure {
|
| 46 |
+
width: 100%;
|
| 47 |
+
flex: 0 0 auto;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
.carousel-item .card .card-body {
|
| 51 |
+
flex: 1 0 auto;
|
| 52 |
+
display: flex;
|
| 53 |
+
flex-direction: column;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
/* Transcript container */
|
| 57 |
+
.transcript-container {
|
| 58 |
+
max-height: 500px;
|
| 59 |
+
overflow-y: auto;
|
| 60 |
+
padding-right: 1rem;
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
/* Transcript segments */
|
| 64 |
+
.transcript-segment {
|
| 65 |
+
padding: 0.625rem;
|
| 66 |
+
margin-bottom: 0.5rem;
|
| 67 |
+
border-radius: 0.5rem;
|
| 68 |
+
cursor: pointer;
|
| 69 |
+
transition: all 0.2s ease;
|
| 70 |
+
border: 1px solid transparent;
|
| 71 |
+
background-color: var(--base-200, #f3f4f6);
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
.transcript-segment:hover {
|
| 75 |
+
background-color: var(--base-300, #e5e7eb);
|
| 76 |
+
transform: translateY(-1px);
|
| 77 |
+
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
.transcript-segment.highlight {
|
| 81 |
+
background-color: var(--primary-focus, rgba(59, 130, 246, 0.2));
|
| 82 |
+
border-left: 3px solid var(--primary, #3b82f6);
|
| 83 |
+
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.08);
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
.transcript-segment.hidden-segment {
|
| 87 |
+
display: none;
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
/* Timestamp */
|
| 91 |
+
.timestamp {
|
| 92 |
+
display: inline-block;
|
| 93 |
+
background-color: var(--neutral, #e5e7eb);
|
| 94 |
+
padding: 0.125rem 0.5rem;
|
| 95 |
+
border-radius: 9999px;
|
| 96 |
+
font-size: 0.75rem;
|
| 97 |
+
font-weight: bold;
|
| 98 |
+
color: var(--neutral-content, #4b5563);
|
| 99 |
+
margin-right: 0.5rem;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
/* Score badge */
|
| 103 |
+
.score-badge {
|
| 104 |
+
display: inline-block;
|
| 105 |
+
background-color: var(--primary, #3b82f6);
|
| 106 |
+
color: var(--primary-content, white);
|
| 107 |
+
border-radius: 9999px;
|
| 108 |
+
padding: 0.125rem 0.5rem;
|
| 109 |
+
font-size: 0.75rem;
|
| 110 |
+
margin-left: 0.5rem;
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
/* Search result */
|
| 114 |
+
.search-result {
|
| 115 |
+
transition: all 0.2s ease;
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
.search-result:hover {
|
| 119 |
+
transform: translateY(-2px);
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
/* Metadata tags */
|
| 123 |
+
.metadata-tags {
|
| 124 |
+
display: flex;
|
| 125 |
+
flex-wrap: wrap;
|
| 126 |
+
gap: 0.25rem;
|
| 127 |
+
margin-top: 0.5rem;
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
.metadata-tag {
|
| 131 |
+
font-size: 0.7rem;
|
| 132 |
+
padding: 0.1rem 0.4rem;
|
| 133 |
+
border-radius: 9999px;
|
| 134 |
+
background-color: var(--accent, #d8b4fe);
|
| 135 |
+
color: var(--accent-content, #581c87);
|
| 136 |
+
white-space: nowrap;
|
| 137 |
+
}
|
app/static/js/index.js
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Index page functionality
|
| 2 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 3 |
+
const youtubeUrlInput = document.getElementById('youtube-url');
|
| 4 |
+
const processButton = document.getElementById('process-button');
|
| 5 |
+
const processStatus = document.getElementById('process-status');
|
| 6 |
+
const processingIndicator = document.getElementById('processing');
|
| 7 |
+
const recentlyProcessedCard = document.getElementById('recently-processed');
|
| 8 |
+
const videoListContainer = document.getElementById('video-list');
|
| 9 |
+
|
| 10 |
+
// Example video buttons
|
| 11 |
+
const exampleButtons = document.querySelectorAll('.example-video');
|
| 12 |
+
|
| 13 |
+
// Process button click handler
|
| 14 |
+
processButton.addEventListener('click', () => processVideo());
|
| 15 |
+
|
| 16 |
+
// Enter key in input field
|
| 17 |
+
youtubeUrlInput.addEventListener('keypress', (e) => {
|
| 18 |
+
if (e.key === 'Enter') processVideo();
|
| 19 |
+
});
|
| 20 |
+
|
| 21 |
+
// Example video buttons
|
| 22 |
+
exampleButtons.forEach(button => {
|
| 23 |
+
button.addEventListener('click', () => {
|
| 24 |
+
youtubeUrlInput.value = button.dataset.url;
|
| 25 |
+
processVideo();
|
| 26 |
+
});
|
| 27 |
+
});
|
| 28 |
+
|
| 29 |
+
// Process video function
|
| 30 |
+
function processVideo() {
|
| 31 |
+
const youtubeUrl = youtubeUrlInput.value.trim();
|
| 32 |
+
if (!youtubeUrl) {
|
| 33 |
+
processStatus.innerHTML = '<div class="alert alert-warning">Please enter a YouTube URL</div>';
|
| 34 |
+
return;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
// Extract video ID
|
| 38 |
+
const videoId = extractVideoId(youtubeUrl);
|
| 39 |
+
if (!videoId) {
|
| 40 |
+
processStatus.innerHTML = '<div class="alert alert-error">Invalid YouTube URL</div>';
|
| 41 |
+
return;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
// Show loading indicator with spinner and text
|
| 45 |
+
processStatus.innerHTML = `
|
| 46 |
+
<div class="flex items-center justify-center my-4">
|
| 47 |
+
<span class="loading loading-spinner loading-md text-primary"></span>
|
| 48 |
+
<span class="ml-2">Processing video... This may take a few moments</span>
|
| 49 |
+
</div>
|
| 50 |
+
`;
|
| 51 |
+
|
| 52 |
+
// Set a timeout to handle overly long processing
|
| 53 |
+
const timeoutId = setTimeout(() => {
|
| 54 |
+
processStatus.innerHTML = `
|
| 55 |
+
<div class="alert alert-warning">
|
| 56 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
|
| 57 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z" />
|
| 58 |
+
</svg>
|
| 59 |
+
<span>Processing is taking longer than expected. Please wait...</span>
|
| 60 |
+
</div>
|
| 61 |
+
`;
|
| 62 |
+
}, 20000); // 20 seconds
|
| 63 |
+
|
| 64 |
+
// Send request to process the video
|
| 65 |
+
fetch('/api/video/process', {
|
| 66 |
+
method: 'POST',
|
| 67 |
+
headers: {
|
| 68 |
+
'Content-Type': 'application/json'
|
| 69 |
+
},
|
| 70 |
+
body: JSON.stringify({ url: youtubeUrl })
|
| 71 |
+
})
|
| 72 |
+
.then(response => {
|
| 73 |
+
if (!response.ok) {
|
| 74 |
+
throw new Error('Failed to process video');
|
| 75 |
+
}
|
| 76 |
+
return response.json();
|
| 77 |
+
})
|
| 78 |
+
.then(data => {
|
| 79 |
+
// Clear timeout for long-running process
|
| 80 |
+
clearTimeout(timeoutId);
|
| 81 |
+
|
| 82 |
+
// Extract video ID from response (handles both old and new API formats)
|
| 83 |
+
const videoId = data.video ? data.video.video_id : data.video_id;
|
| 84 |
+
const isNewlyProcessed = data.newly_processed !== undefined ? data.newly_processed : true;
|
| 85 |
+
|
| 86 |
+
if (!videoId) {
|
| 87 |
+
throw new Error('Invalid response: Missing video ID');
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
// Get video title (for display)
|
| 91 |
+
const videoTitle = data.video ? data.video.title : (data.title || `Video ${videoId}`);
|
| 92 |
+
|
| 93 |
+
// Log for debugging
|
| 94 |
+
console.log('Process response:', {videoId, isNewlyProcessed, data});
|
| 95 |
+
|
| 96 |
+
// Show success message
|
| 97 |
+
processStatus.innerHTML = `
|
| 98 |
+
<div role="alert" class="alert alert-success">
|
| 99 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
|
| 100 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
| 101 |
+
</svg>
|
| 102 |
+
<span>${isNewlyProcessed ? 'Video processed successfully!' : 'Video was already processed!'}</span>
|
| 103 |
+
<div>
|
| 104 |
+
<a href="/video/${videoId}" class="btn btn-sm btn-primary">
|
| 105 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4 mr-1" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
| 106 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z" />
|
| 107 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
| 108 |
+
</svg>
|
| 109 |
+
Open Video
|
| 110 |
+
</a>
|
| 111 |
+
</div>
|
| 112 |
+
</div>
|
| 113 |
+
`;
|
| 114 |
+
|
| 115 |
+
// Update recent videos lists
|
| 116 |
+
displayRecentVideos();
|
| 117 |
+
loadFooterRecentVideos(); // Update footer videos as well
|
| 118 |
+
})
|
| 119 |
+
.catch(error => {
|
| 120 |
+
// Clear timeout for long-running process
|
| 121 |
+
clearTimeout(timeoutId);
|
| 122 |
+
|
| 123 |
+
// Show error message
|
| 124 |
+
console.error('Process error:', error);
|
| 125 |
+
processStatus.innerHTML = handleError(error);
|
| 126 |
+
});
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
// Display recently processed videos
|
| 130 |
+
function displayRecentVideos() {
|
| 131 |
+
// Show loading state
|
| 132 |
+
recentlyProcessedCard.classList.remove('hidden');
|
| 133 |
+
videoListContainer.innerHTML = `
|
| 134 |
+
<div class="flex justify-center items-center p-4">
|
| 135 |
+
<span class="loading loading-spinner loading-md"></span>
|
| 136 |
+
<span class="ml-2">Loading recent videos...</span>
|
| 137 |
+
</div>
|
| 138 |
+
`;
|
| 139 |
+
|
| 140 |
+
const carouselPrev = document.getElementById('carousel-prev');
|
| 141 |
+
const carouselNext = document.getElementById('carousel-next');
|
| 142 |
+
|
| 143 |
+
// Fetch recent videos from server
|
| 144 |
+
fetch('/api/video/recent?limit=5')
|
| 145 |
+
.then(response => {
|
| 146 |
+
if (!response.ok) {
|
| 147 |
+
throw new Error('Failed to fetch recent videos');
|
| 148 |
+
}
|
| 149 |
+
return response.json();
|
| 150 |
+
})
|
| 151 |
+
.then(videos => {
|
| 152 |
+
if (videos && videos.length > 0) {
|
| 153 |
+
// Limit to 5 videos
|
| 154 |
+
const limitedVideos = videos.slice(0, 5);
|
| 155 |
+
|
| 156 |
+
// Generate carousel items
|
| 157 |
+
const carouselItems = limitedVideos.map((video, index) => {
|
| 158 |
+
// Format date if available
|
| 159 |
+
let formattedDate = '';
|
| 160 |
+
if (video.created_at) {
|
| 161 |
+
const date = new Date(video.created_at * 1000); // Convert Unix timestamp to milliseconds
|
| 162 |
+
formattedDate = date.toLocaleDateString();
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
// Use title or default
|
| 166 |
+
const videoTitle = video.title || `Video ${video.video_id}`;
|
| 167 |
+
|
| 168 |
+
return `
|
| 169 |
+
<div id="video-${index}" class="carousel-item">
|
| 170 |
+
<a href="/video/${video.video_id}" class="card bg-base-100 shadow-sm hover:shadow-md transition-all w-64 md:w-72 flex flex-col">
|
| 171 |
+
<figure class="w-full h-36 overflow-hidden">
|
| 172 |
+
<img src="https://img.youtube.com/vi/${video.video_id}/mqdefault.jpg" alt="Thumbnail" class="w-full h-full object-cover">
|
| 173 |
+
</figure>
|
| 174 |
+
<div class="card-body p-3">
|
| 175 |
+
<h3 class="card-title text-sm line-clamp-2">${videoTitle}</h3>
|
| 176 |
+
<div class="text-xs opacity-70">${formattedDate}</div>
|
| 177 |
+
</div>
|
| 178 |
+
</a>
|
| 179 |
+
</div>
|
| 180 |
+
`;
|
| 181 |
+
}).join('');
|
| 182 |
+
|
| 183 |
+
// Add carousel items to container
|
| 184 |
+
videoListContainer.innerHTML = carouselItems;
|
| 185 |
+
|
| 186 |
+
// Setup navigation arrows
|
| 187 |
+
if (limitedVideos.length > 1) {
|
| 188 |
+
// Show arrows for multiple videos
|
| 189 |
+
let currentIndex = 0;
|
| 190 |
+
const maxIndex = limitedVideos.length - 1;
|
| 191 |
+
|
| 192 |
+
// Show navigation arrows
|
| 193 |
+
carouselPrev.classList.remove('hidden');
|
| 194 |
+
carouselNext.classList.remove('hidden');
|
| 195 |
+
|
| 196 |
+
// Left button is disabled by default (we're at the start)
|
| 197 |
+
const prevButton = carouselPrev.querySelector('button');
|
| 198 |
+
const nextButton = carouselNext.querySelector('button');
|
| 199 |
+
prevButton.classList.add('btn-disabled');
|
| 200 |
+
|
| 201 |
+
// Functions to update button states
|
| 202 |
+
const updateButtonStates = () => {
|
| 203 |
+
if (currentIndex === 0) {
|
| 204 |
+
prevButton.classList.add('btn-disabled');
|
| 205 |
+
} else {
|
| 206 |
+
prevButton.classList.remove('btn-disabled');
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
if (currentIndex === maxIndex) {
|
| 210 |
+
nextButton.classList.add('btn-disabled');
|
| 211 |
+
} else {
|
| 212 |
+
nextButton.classList.remove('btn-disabled');
|
| 213 |
+
}
|
| 214 |
+
};
|
| 215 |
+
|
| 216 |
+
// Setup navigation buttons
|
| 217 |
+
prevButton.addEventListener('click', () => {
|
| 218 |
+
if (currentIndex > 0) {
|
| 219 |
+
currentIndex--;
|
| 220 |
+
document.getElementById(`video-${currentIndex}`).scrollIntoView({
|
| 221 |
+
behavior: 'smooth',
|
| 222 |
+
block: 'nearest',
|
| 223 |
+
inline: 'center'
|
| 224 |
+
});
|
| 225 |
+
updateButtonStates();
|
| 226 |
+
}
|
| 227 |
+
});
|
| 228 |
+
|
| 229 |
+
nextButton.addEventListener('click', () => {
|
| 230 |
+
if (currentIndex < maxIndex) {
|
| 231 |
+
currentIndex++;
|
| 232 |
+
document.getElementById(`video-${currentIndex}`).scrollIntoView({
|
| 233 |
+
behavior: 'smooth',
|
| 234 |
+
block: 'nearest',
|
| 235 |
+
inline: 'center'
|
| 236 |
+
});
|
| 237 |
+
updateButtonStates();
|
| 238 |
+
}
|
| 239 |
+
});
|
| 240 |
+
} else {
|
| 241 |
+
// Hide arrows for single video
|
| 242 |
+
carouselPrev.classList.add('hidden');
|
| 243 |
+
carouselNext.classList.add('hidden');
|
| 244 |
+
}
|
| 245 |
+
} else {
|
| 246 |
+
recentlyProcessedCard.classList.add('hidden');
|
| 247 |
+
carouselPrev.classList.add('hidden');
|
| 248 |
+
carouselNext.classList.add('hidden');
|
| 249 |
+
}
|
| 250 |
+
})
|
| 251 |
+
.catch(error => {
|
| 252 |
+
console.error('Error fetching recent videos:', error);
|
| 253 |
+
videoListContainer.innerHTML = `
|
| 254 |
+
<div class="alert alert-error">
|
| 255 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
|
| 256 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
| 257 |
+
</svg>
|
| 258 |
+
<span>Failed to load recent videos</span>
|
| 259 |
+
</div>
|
| 260 |
+
`;
|
| 261 |
+
carouselPrev.classList.add('hidden');
|
| 262 |
+
carouselNext.classList.add('hidden');
|
| 263 |
+
});
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
// Display recent videos on page load
|
| 267 |
+
displayRecentVideos();
|
| 268 |
+
});
|
app/static/js/main.js
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Common functionality
|
| 2 |
+
|
| 3 |
+
// Initialize on page load
|
| 4 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 5 |
+
// Display recent videos in the footer on page load
|
| 6 |
+
loadFooterRecentVideos();
|
| 7 |
+
|
| 8 |
+
// Handle theme switching
|
| 9 |
+
const themeItems = document.querySelectorAll('.theme-item');
|
| 10 |
+
themeItems.forEach(item => {
|
| 11 |
+
item.addEventListener('click', () => {
|
| 12 |
+
const theme = item.dataset.theme;
|
| 13 |
+
document.documentElement.setAttribute('data-theme', theme);
|
| 14 |
+
localStorage.setItem('theme', theme);
|
| 15 |
+
});
|
| 16 |
+
});
|
| 17 |
+
|
| 18 |
+
// Apply saved theme from localStorage if available
|
| 19 |
+
const savedTheme = localStorage.getItem('theme');
|
| 20 |
+
if (savedTheme) {
|
| 21 |
+
document.documentElement.setAttribute('data-theme', savedTheme);
|
| 22 |
+
}
|
| 23 |
+
});
|
| 24 |
+
|
| 25 |
+
// Format seconds to MM:SS format
|
| 26 |
+
function formatTime(seconds) {
|
| 27 |
+
const minutes = Math.floor(seconds / 60);
|
| 28 |
+
const secs = Math.floor(seconds % 60);
|
| 29 |
+
return `${minutes.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
// Error handling function
|
| 33 |
+
function handleError(error) {
|
| 34 |
+
console.error('Error:', error);
|
| 35 |
+
return `<div role="alert" class="alert alert-error">
|
| 36 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
|
| 37 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
| 38 |
+
</svg>
|
| 39 |
+
<span>Error: ${error.message || 'Something went wrong'}</span>
|
| 40 |
+
<div>
|
| 41 |
+
<button class="btn btn-sm btn-ghost" onclick="window.location.reload()">Retry</button>
|
| 42 |
+
</div>
|
| 43 |
+
</div>`;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
// Toast notification function
|
| 47 |
+
function showToast(message, type = 'info') {
|
| 48 |
+
const toast = document.createElement('div');
|
| 49 |
+
toast.className = `alert alert-${type} fixed bottom-4 right-4 max-w-xs z-50 shadow-lg`;
|
| 50 |
+
|
| 51 |
+
// Different icon based on type
|
| 52 |
+
let icon = '';
|
| 53 |
+
switch(type) {
|
| 54 |
+
case 'success':
|
| 55 |
+
icon = `<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
|
| 56 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
| 57 |
+
</svg>`;
|
| 58 |
+
break;
|
| 59 |
+
case 'warning':
|
| 60 |
+
icon = `<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
|
| 61 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z" />
|
| 62 |
+
</svg>`;
|
| 63 |
+
break;
|
| 64 |
+
case 'error':
|
| 65 |
+
icon = `<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
|
| 66 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
| 67 |
+
</svg>`;
|
| 68 |
+
break;
|
| 69 |
+
default: // info
|
| 70 |
+
icon = `<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6">
|
| 71 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path>
|
| 72 |
+
</svg>`;
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
toast.innerHTML = `
|
| 76 |
+
${icon}
|
| 77 |
+
<span>${message}</span>
|
| 78 |
+
<div>
|
| 79 |
+
<button class="btn btn-sm btn-ghost" onclick="this.parentElement.parentElement.remove()">
|
| 80 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
| 81 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M6 18L18 6M6 6l12 12" />
|
| 82 |
+
</svg>
|
| 83 |
+
</button>
|
| 84 |
+
</div>
|
| 85 |
+
`;
|
| 86 |
+
document.body.appendChild(toast);
|
| 87 |
+
|
| 88 |
+
// Auto-dismiss after 3 seconds
|
| 89 |
+
setTimeout(() => {
|
| 90 |
+
toast.classList.add('opacity-0', 'transition-opacity', 'duration-500');
|
| 91 |
+
setTimeout(() => toast.remove(), 500);
|
| 92 |
+
}, 3000);
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
// Extract video ID from YouTube URL
|
| 96 |
+
function extractVideoId(url) {
|
| 97 |
+
const regExp = /^.*((youtu.be\/)|(v\/)|(\/u\/\w\/)|(embed\/)|(watch\?))\??v?=?([^#&?]*).*/;
|
| 98 |
+
const match = url.match(regExp);
|
| 99 |
+
return (match && match[7].length === 11) ? match[7] : null;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
// Load recent videos into the footer from the API
|
| 103 |
+
function loadFooterRecentVideos() {
|
| 104 |
+
const footerRecentVideos = document.getElementById('footer-recent-videos');
|
| 105 |
+
if (!footerRecentVideos) return;
|
| 106 |
+
|
| 107 |
+
// Show loading state
|
| 108 |
+
footerRecentVideos.innerHTML = '<p class="text-sm opacity-70">Loading recent videos...</p>';
|
| 109 |
+
|
| 110 |
+
// Fetch recent videos from server API
|
| 111 |
+
fetch('/api/video/recent?limit=3')
|
| 112 |
+
.then(response => {
|
| 113 |
+
if (!response.ok) {
|
| 114 |
+
throw new Error('Failed to fetch recent videos');
|
| 115 |
+
}
|
| 116 |
+
return response.json();
|
| 117 |
+
})
|
| 118 |
+
.then(videos => {
|
| 119 |
+
if (videos && videos.length > 0) {
|
| 120 |
+
// Generate HTML for recent videos
|
| 121 |
+
const videoLinks = videos.map(video => {
|
| 122 |
+
return `
|
| 123 |
+
<a href="/video/${video.video_id}" class="link link-hover block py-1 truncate">
|
| 124 |
+
<span class="text-xs text-primary">▶</span> ${video.title || `Video ${video.video_id}`}
|
| 125 |
+
</a>
|
| 126 |
+
`;
|
| 127 |
+
}).join('');
|
| 128 |
+
|
| 129 |
+
// Add videos to the footer
|
| 130 |
+
footerRecentVideos.innerHTML = videoLinks;
|
| 131 |
+
} else {
|
| 132 |
+
footerRecentVideos.innerHTML = '<p class="text-sm opacity-70">No recent videos</p>';
|
| 133 |
+
}
|
| 134 |
+
})
|
| 135 |
+
.catch(error => {
|
| 136 |
+
console.error('Error loading footer videos:', error);
|
| 137 |
+
footerRecentVideos.innerHTML = '<p class="text-sm opacity-70">Failed to load recent videos</p>';
|
| 138 |
+
});
|
| 139 |
+
}
|
app/static/js/video.js
ADDED
|
@@ -0,0 +1,440 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Video page functionality
|
| 2 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 3 |
+
const playerElement = document.getElementById('youtube-player');
|
| 4 |
+
const searchInput = document.getElementById('search-input');
|
| 5 |
+
const searchButton = document.getElementById('search-button');
|
| 6 |
+
const transcriptContainer = document.getElementById('transcript-container');
|
| 7 |
+
const loadingIndicator = document.getElementById('loading');
|
| 8 |
+
const toggleTranscriptButton = document.getElementById('toggle-transcript');
|
| 9 |
+
|
| 10 |
+
let transcriptSegments = [];
|
| 11 |
+
let ytPlayer = null;
|
| 12 |
+
let isProcessingUrl = false;
|
| 13 |
+
|
| 14 |
+
// Check if there's a search query in the URL
|
| 15 |
+
const urlParams = new URLSearchParams(window.location.search);
|
| 16 |
+
const searchQuery = urlParams.get('q');
|
| 17 |
+
const processingUrl = urlParams.get('processing');
|
| 18 |
+
|
| 19 |
+
// Format time to display as HH:MM:SS
|
| 20 |
+
function formatTime(seconds) {
|
| 21 |
+
const hours = Math.floor(seconds / 3600);
|
| 22 |
+
const mins = Math.floor((seconds % 3600) / 60);
|
| 23 |
+
const secs = Math.floor(seconds % 60);
|
| 24 |
+
|
| 25 |
+
if (hours > 0) {
|
| 26 |
+
return `${hours}:${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
|
| 27 |
+
} else {
|
| 28 |
+
return `${mins}:${secs.toString().padStart(2, '0')}`;
|
| 29 |
+
}
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
// Handle error display
|
| 33 |
+
function handleError(error) {
|
| 34 |
+
console.error(error);
|
| 35 |
+
return `<div class="alert alert-error">Error: ${error.message}</div>`;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
// Initialize YouTube iframe API
|
| 39 |
+
function initYouTubePlayer() {
|
| 40 |
+
// Get the existing iframe
|
| 41 |
+
const iframeId = playerElement.getAttribute('id');
|
| 42 |
+
|
| 43 |
+
// Load the YouTube iframe API if it's not already loaded
|
| 44 |
+
if (!window.YT) {
|
| 45 |
+
const tag = document.createElement('script');
|
| 46 |
+
tag.src = 'https://www.youtube.com/iframe_api';
|
| 47 |
+
const firstScriptTag = document.getElementsByTagName('script')[0];
|
| 48 |
+
firstScriptTag.parentNode.insertBefore(tag, firstScriptTag);
|
| 49 |
+
|
| 50 |
+
window.onYouTubeIframeAPIReady = function() {
|
| 51 |
+
createYouTubePlayer(iframeId);
|
| 52 |
+
};
|
| 53 |
+
} else {
|
| 54 |
+
createYouTubePlayer(iframeId);
|
| 55 |
+
}
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
// Create YouTube player object
|
| 59 |
+
function createYouTubePlayer(iframeId) {
|
| 60 |
+
ytPlayer = new YT.Player(iframeId, {
|
| 61 |
+
events: {
|
| 62 |
+
'onReady': onPlayerReady
|
| 63 |
+
}
|
| 64 |
+
});
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
// When player is ready
|
| 68 |
+
function onPlayerReady(event) {
|
| 69 |
+
console.log('Player ready');
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
// Load transcript segments
|
| 73 |
+
function loadTranscript() {
|
| 74 |
+
transcriptContainer.innerHTML = '<div class="flex justify-center my-4"><span class="loading loading-spinner loading-md"></span><span class="ml-2">Loading transcript...</span></div>';
|
| 75 |
+
|
| 76 |
+
// Check if video ID is valid before making API call
|
| 77 |
+
if (!videoId || videoId === 'undefined' || videoId === 'null') {
|
| 78 |
+
transcriptContainer.innerHTML = `
|
| 79 |
+
<div class="alert alert-error">
|
| 80 |
+
<div>
|
| 81 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" /></svg>
|
| 82 |
+
<span>Invalid video ID. Please return to the home page and select a valid video.</span>
|
| 83 |
+
</div>
|
| 84 |
+
</div>
|
| 85 |
+
`;
|
| 86 |
+
return;
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
fetch(`/api/video/segments/${videoId}`)
|
| 90 |
+
.then(response => {
|
| 91 |
+
if (!response.ok) {
|
| 92 |
+
throw new Error('Failed to load transcript: ' + response.status);
|
| 93 |
+
}
|
| 94 |
+
return response.json();
|
| 95 |
+
})
|
| 96 |
+
.then(segments => {
|
| 97 |
+
transcriptSegments = segments;
|
| 98 |
+
|
| 99 |
+
if (!segments || segments.length === 0) {
|
| 100 |
+
transcriptContainer.innerHTML = `
|
| 101 |
+
<div class="alert alert-info">
|
| 102 |
+
<div>
|
| 103 |
+
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path></svg>
|
| 104 |
+
<span>No transcript available for this video. Try processing the video first from the home page.</span>
|
| 105 |
+
</div>
|
| 106 |
+
</div>
|
| 107 |
+
`;
|
| 108 |
+
} else {
|
| 109 |
+
displayTranscript(segments);
|
| 110 |
+
}
|
| 111 |
+
})
|
| 112 |
+
.catch(error => {
|
| 113 |
+
console.error('Error loading transcript:', error);
|
| 114 |
+
transcriptContainer.innerHTML = `
|
| 115 |
+
<div class="alert alert-error">
|
| 116 |
+
<div>
|
| 117 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" /></svg>
|
| 118 |
+
<span>Error loading transcript: ${error.message}</span>
|
| 119 |
+
</div>
|
| 120 |
+
</div>
|
| 121 |
+
<p class="mt-4">This may happen if:</p>
|
| 122 |
+
<ul class="list-disc ml-8 mt-2">
|
| 123 |
+
<li>The video hasn't been processed yet</li>
|
| 124 |
+
<li>The video ID is incorrect</li>
|
| 125 |
+
<li>The server is experiencing issues</li>
|
| 126 |
+
</ul>
|
| 127 |
+
<p class="mt-4">Try processing this video from the home page first.</p>
|
| 128 |
+
`;
|
| 129 |
+
});
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
// Display transcript segments
|
| 133 |
+
function displayTranscript(segments) {
|
| 134 |
+
const html = segments.map((segment, index) => {
|
| 135 |
+
const formattedTime = formatTime(segment.start);
|
| 136 |
+
|
| 137 |
+
return `
|
| 138 |
+
<div class="transcript-segment" data-start="${segment.start}" data-end="${segment.end}" data-index="${index}">
|
| 139 |
+
<span class="timestamp">${formattedTime}</span>
|
| 140 |
+
<span class="segment-text">${segment.text}</span>
|
| 141 |
+
</div>
|
| 142 |
+
`;
|
| 143 |
+
}).join('');
|
| 144 |
+
|
| 145 |
+
transcriptContainer.innerHTML = html;
|
| 146 |
+
|
| 147 |
+
// Add click handlers to segments
|
| 148 |
+
document.querySelectorAll('.transcript-segment').forEach(segment => {
|
| 149 |
+
segment.addEventListener('click', () => {
|
| 150 |
+
const startTime = parseFloat(segment.dataset.start);
|
| 151 |
+
seekToTime(startTime);
|
| 152 |
+
});
|
| 153 |
+
});
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
// Seek to specific time in the video
|
| 157 |
+
function seekToTime(seconds) {
|
| 158 |
+
console.log('Seeking to time:', seconds);
|
| 159 |
+
|
| 160 |
+
if (ytPlayer && typeof ytPlayer.seekTo === 'function') {
|
| 161 |
+
try {
|
| 162 |
+
// Ensure seconds is a number
|
| 163 |
+
seconds = parseFloat(seconds);
|
| 164 |
+
|
| 165 |
+
// Seek to time
|
| 166 |
+
ytPlayer.seekTo(seconds, true);
|
| 167 |
+
|
| 168 |
+
// Try to play the video (may be blocked by browser autoplay policies)
|
| 169 |
+
try {
|
| 170 |
+
ytPlayer.playVideo();
|
| 171 |
+
} catch (e) {
|
| 172 |
+
console.warn('Could not autoplay video:', e);
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
// Highlight the current segment
|
| 176 |
+
highlightSegment(seconds);
|
| 177 |
+
} catch (error) {
|
| 178 |
+
console.error('Error seeking to time:', error);
|
| 179 |
+
}
|
| 180 |
+
} else {
|
| 181 |
+
console.error('YouTube player is not ready yet or seekTo method is not available');
|
| 182 |
+
}
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
// Highlight segment containing the current time
|
| 186 |
+
function highlightSegment(time) {
|
| 187 |
+
// Remove highlight from all segments
|
| 188 |
+
document.querySelectorAll('.transcript-segment').forEach(segment => {
|
| 189 |
+
segment.classList.remove('highlight');
|
| 190 |
+
});
|
| 191 |
+
|
| 192 |
+
// Find the segment containing current time
|
| 193 |
+
// Need to find by approximate match since floating point exact matches may not work
|
| 194 |
+
const segments = document.querySelectorAll('.transcript-segment');
|
| 195 |
+
let currentSegment = null;
|
| 196 |
+
|
| 197 |
+
for (const segment of segments) {
|
| 198 |
+
const start = parseFloat(segment.dataset.start);
|
| 199 |
+
const end = parseFloat(segment.dataset.end);
|
| 200 |
+
|
| 201 |
+
if (time >= start && time <= end) {
|
| 202 |
+
currentSegment = segment;
|
| 203 |
+
break;
|
| 204 |
+
}
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
// If exact time match not found, find the closest segment
|
| 208 |
+
if (!currentSegment) {
|
| 209 |
+
const exactMatch = document.querySelector(`.transcript-segment[data-start="${time}"]`);
|
| 210 |
+
if (exactMatch) {
|
| 211 |
+
currentSegment = exactMatch;
|
| 212 |
+
}
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
if (currentSegment) {
|
| 216 |
+
currentSegment.classList.add('highlight');
|
| 217 |
+
currentSegment.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
| 218 |
+
}
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
// Search functionality
|
| 222 |
+
searchButton.addEventListener('click', performSearch);
|
| 223 |
+
searchInput.addEventListener('keypress', e => {
|
| 224 |
+
if (e.key === 'Enter') performSearch();
|
| 225 |
+
});
|
| 226 |
+
|
| 227 |
+
function performSearch() {
|
| 228 |
+
const query = searchInput.value.trim();
|
| 229 |
+
if (!query) {
|
| 230 |
+
transcriptContainer.innerHTML = '<div class="alert alert-warning">Please enter a search query</div>';
|
| 231 |
+
return;
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
// Validate video ID before searching
|
| 235 |
+
if (!videoId || videoId === 'undefined' || videoId === 'null') {
|
| 236 |
+
transcriptContainer.innerHTML = `
|
| 237 |
+
<div class="alert alert-error">
|
| 238 |
+
<div>
|
| 239 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" /></svg>
|
| 240 |
+
<span>Invalid video ID. Please return to the home page and select a valid video.</span>
|
| 241 |
+
</div>
|
| 242 |
+
</div>
|
| 243 |
+
`;
|
| 244 |
+
return;
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
// Show loading indicator
|
| 248 |
+
loadingIndicator.classList.remove('hidden');
|
| 249 |
+
|
| 250 |
+
// Send search request
|
| 251 |
+
fetch(`/api/video/search?query=${encodeURIComponent(query)}&video_id=${videoId}`)
|
| 252 |
+
.then(response => {
|
| 253 |
+
if (!response.ok) {
|
| 254 |
+
throw new Error('Search failed');
|
| 255 |
+
}
|
| 256 |
+
return response.json();
|
| 257 |
+
})
|
| 258 |
+
.then(results => {
|
| 259 |
+
// Hide loading indicator
|
| 260 |
+
loadingIndicator.classList.add('hidden');
|
| 261 |
+
|
| 262 |
+
if (results.length === 0) {
|
| 263 |
+
// Show "no results" message in transcript container
|
| 264 |
+
transcriptContainer.innerHTML = `
|
| 265 |
+
<div role="alert" class="alert alert-info">
|
| 266 |
+
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6">
|
| 267 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path>
|
| 268 |
+
</svg>
|
| 269 |
+
<span>No results found for "${query}". <a href="#" id="reset-search" class="link link-primary">Show all transcript</a></span>
|
| 270 |
+
</div>`;
|
| 271 |
+
|
| 272 |
+
// Add click handler to reset search link
|
| 273 |
+
document.getElementById('reset-search').addEventListener('click', (e) => {
|
| 274 |
+
e.preventDefault();
|
| 275 |
+
resetTranscriptFilter();
|
| 276 |
+
displayTranscript(transcriptSegments);
|
| 277 |
+
});
|
| 278 |
+
|
| 279 |
+
return;
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
// Display search results as filtered transcript
|
| 283 |
+
filterTranscript(results);
|
| 284 |
+
|
| 285 |
+
// Add a header with search info and reset option
|
| 286 |
+
const searchInfoHeader = document.createElement('div');
|
| 287 |
+
searchInfoHeader.className = 'mb-4 flex justify-between items-center';
|
| 288 |
+
searchInfoHeader.innerHTML = `
|
| 289 |
+
<div class="badge badge-accent">${results.length} results for "${query}"</div>
|
| 290 |
+
<a href="#" id="reset-search" class="link link-primary text-sm">Show all transcript</a>
|
| 291 |
+
`;
|
| 292 |
+
|
| 293 |
+
// Insert the header before transcript segments
|
| 294 |
+
transcriptContainer.insertBefore(searchInfoHeader, transcriptContainer.firstChild);
|
| 295 |
+
|
| 296 |
+
// Add click handler to reset search link
|
| 297 |
+
document.getElementById('reset-search').addEventListener('click', (e) => {
|
| 298 |
+
e.preventDefault();
|
| 299 |
+
resetTranscriptFilter();
|
| 300 |
+
displayTranscript(transcriptSegments);
|
| 301 |
+
});
|
| 302 |
+
})
|
| 303 |
+
.catch(error => {
|
| 304 |
+
// Hide loading indicator
|
| 305 |
+
loadingIndicator.classList.add('hidden');
|
| 306 |
+
|
| 307 |
+
// Show error
|
| 308 |
+
transcriptContainer.innerHTML = handleError(error);
|
| 309 |
+
});
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
// Filter transcript to show only matching segments
|
| 313 |
+
function filterTranscript(results) {
|
| 314 |
+
// Create a highlighted version of the transcript with only matching segments
|
| 315 |
+
const html = results.map(result => {
|
| 316 |
+
const segment = result.segment;
|
| 317 |
+
const formattedTime = formatTime(segment.start);
|
| 318 |
+
const score = (result.score * 100).toFixed(0);
|
| 319 |
+
const index = transcriptSegments.findIndex(s => s.segment_id === segment.segment_id);
|
| 320 |
+
|
| 321 |
+
return `
|
| 322 |
+
<div class="transcript-segment search-result" data-start="${segment.start}" data-end="${segment.end}" data-index="${index}">
|
| 323 |
+
<div class="flex justify-between items-center">
|
| 324 |
+
<span class="timestamp">${formattedTime}</span>
|
| 325 |
+
<div class="badge badge-primary">${score}% match</div>
|
| 326 |
+
</div>
|
| 327 |
+
<span class="segment-text mt-1">${segment.text}</span>
|
| 328 |
+
</div>
|
| 329 |
+
`;
|
| 330 |
+
}).join('');
|
| 331 |
+
|
| 332 |
+
// Replace transcript with filtered results
|
| 333 |
+
transcriptContainer.innerHTML = html;
|
| 334 |
+
|
| 335 |
+
// Add click handlers to segments
|
| 336 |
+
document.querySelectorAll('.transcript-segment').forEach(segment => {
|
| 337 |
+
segment.addEventListener('click', () => {
|
| 338 |
+
const startTime = parseFloat(segment.dataset.start);
|
| 339 |
+
seekToTime(startTime);
|
| 340 |
+
});
|
| 341 |
+
});
|
| 342 |
+
}
|
| 343 |
+
|
| 344 |
+
// Transcript is always visible - toggle functionality removed
|
| 345 |
+
|
| 346 |
+
// Reset transcript filter to show all segments
|
| 347 |
+
function resetTranscriptFilter() {
|
| 348 |
+
searchInput.value = '';
|
| 349 |
+
}
|
| 350 |
+
|
| 351 |
+
// Show processing indicator if URL was just processed
|
| 352 |
+
function showProcessingIndicator() {
|
| 353 |
+
if (processingUrl === 'true') {
|
| 354 |
+
isProcessingUrl = true;
|
| 355 |
+
transcriptContainer.innerHTML = `
|
| 356 |
+
<div class="flex items-center justify-center my-4">
|
| 357 |
+
<span class="loading loading-spinner loading-md text-primary"></span>
|
| 358 |
+
<span class="ml-2">Processing video from URL... This may take a few moments</span>
|
| 359 |
+
</div>
|
| 360 |
+
`;
|
| 361 |
+
|
| 362 |
+
// Check for segments every second
|
| 363 |
+
const processingInterval = setInterval(() => {
|
| 364 |
+
// Validate video ID before making API call
|
| 365 |
+
if (!videoId || videoId === 'undefined' || videoId === 'null') {
|
| 366 |
+
clearInterval(processingInterval);
|
| 367 |
+
transcriptContainer.innerHTML = `
|
| 368 |
+
<div class="alert alert-error">
|
| 369 |
+
<div>
|
| 370 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" /></svg>
|
| 371 |
+
<span>Invalid video ID. Please return to the home page and select a valid video.</span>
|
| 372 |
+
</div>
|
| 373 |
+
</div>
|
| 374 |
+
`;
|
| 375 |
+
return;
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
+
fetch(`/api/video/segments/${videoId}`)
|
| 379 |
+
.then(response => {
|
| 380 |
+
if (!response.ok) {
|
| 381 |
+
return null;
|
| 382 |
+
}
|
| 383 |
+
return response.json();
|
| 384 |
+
})
|
| 385 |
+
.then(segments => {
|
| 386 |
+
if (segments && segments.length > 0) {
|
| 387 |
+
clearInterval(processingInterval);
|
| 388 |
+
isProcessingUrl = false;
|
| 389 |
+
loadTranscript();
|
| 390 |
+
}
|
| 391 |
+
})
|
| 392 |
+
.catch(error => {
|
| 393 |
+
console.error('Error checking segments:', error);
|
| 394 |
+
});
|
| 395 |
+
}, 2000);
|
| 396 |
+
|
| 397 |
+
// Set timeout to stop checking after 2 minutes
|
| 398 |
+
setTimeout(() => {
|
| 399 |
+
clearInterval(processingInterval);
|
| 400 |
+
if (isProcessingUrl) {
|
| 401 |
+
transcriptContainer.innerHTML = `
|
| 402 |
+
<div class="alert alert-warning">
|
| 403 |
+
<div>
|
| 404 |
+
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path></svg>
|
| 405 |
+
<span>Processing is taking longer than expected. Refresh the page to check progress.</span>
|
| 406 |
+
</div>
|
| 407 |
+
</div>
|
| 408 |
+
`;
|
| 409 |
+
isProcessingUrl = false;
|
| 410 |
+
}
|
| 411 |
+
}, 120000);
|
| 412 |
+
|
| 413 |
+
return true;
|
| 414 |
+
}
|
| 415 |
+
return false;
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
// Initialize
|
| 419 |
+
initYouTubePlayer();
|
| 420 |
+
|
| 421 |
+
// Show processing indicator or load transcript
|
| 422 |
+
if (!showProcessingIndicator()) {
|
| 423 |
+
loadTranscript();
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
// If there's a search query in the URL, apply it after transcript loads
|
| 427 |
+
if (searchQuery) {
|
| 428 |
+
const checkTranscriptInterval = setInterval(() => {
|
| 429 |
+
if (transcriptSegments.length > 0) {
|
| 430 |
+
clearInterval(checkTranscriptInterval);
|
| 431 |
+
// Set the search input value and trigger search
|
| 432 |
+
searchInput.value = searchQuery;
|
| 433 |
+
performSearch();
|
| 434 |
+
}
|
| 435 |
+
}, 500);
|
| 436 |
+
|
| 437 |
+
// Set timeout to stop checking after 10 seconds
|
| 438 |
+
setTimeout(() => clearInterval(checkTranscriptInterval), 10000);
|
| 439 |
+
}
|
| 440 |
+
});
|
app/templates/base.html
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en" data-theme="light">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>{{ title }}</title>
|
| 7 |
+
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/full.css" rel="stylesheet" type="text/css" />
|
| 8 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
| 9 |
+
<link rel="stylesheet" href="{{ url_for('static', path='/css/style.css') }}">
|
| 10 |
+
</head>
|
| 11 |
+
<body class="min-h-screen flex flex-col">
|
| 12 |
+
<!-- Header/Navbar -->
|
| 13 |
+
<div class="navbar bg-base-200 shadow-md">
|
| 14 |
+
<div class="navbar-start">
|
| 15 |
+
<a href="/" class="btn btn-ghost text-xl">
|
| 16 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6 mr-2" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
| 17 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 10l4.553-2.276A1 1 0 0121 8.618v6.764a1 1 0 01-1.447.894L15 14M5 18h8a2 2 0 002-2V8a2 2 0 00-2-2H5a2 2 0 00-2 2v8a2 2 0 002 2z" />
|
| 18 |
+
</svg>
|
| 19 |
+
In-Video Search
|
| 20 |
+
</a>
|
| 21 |
+
</div>
|
| 22 |
+
<div class="navbar-center">
|
| 23 |
+
<div class="form-control">
|
| 24 |
+
<div class="join">
|
| 25 |
+
<input type="text" id="global-search" placeholder="Search videos..." class="input input-bordered join-item w-full md:w-96" />
|
| 26 |
+
<button id="global-search-button" class="btn btn-primary join-item">
|
| 27 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
| 28 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
|
| 29 |
+
</svg>
|
| 30 |
+
</button>
|
| 31 |
+
</div>
|
| 32 |
+
</div>
|
| 33 |
+
</div>
|
| 34 |
+
<div class="navbar-end">
|
| 35 |
+
<div class="dropdown dropdown-end">
|
| 36 |
+
<div tabindex="0" role="button" class="btn btn-ghost btn-circle">
|
| 37 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
| 38 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 3v1m0 16v1m9-9h-1M4 12H3m15.364 6.364l-.707-.707M6.343 6.343l-.707-.707m12.728 0l-.707.707M6.343 17.657l-.707.707M16 12a4 4 0 11-8 0 4 4 0 018 0z" />
|
| 39 |
+
</svg>
|
| 40 |
+
</div>
|
| 41 |
+
<ul tabindex="0" class="dropdown-content z-[1] menu p-2 shadow bg-base-100 rounded-box w-52">
|
| 42 |
+
<li><button class="theme-item" data-theme="light">Light</button></li>
|
| 43 |
+
<li><button class="theme-item" data-theme="dark">Dark</button></li>
|
| 44 |
+
<li><button class="theme-item" data-theme="cupcake">Cupcake</button></li>
|
| 45 |
+
<li><button class="theme-item" data-theme="synthwave">Synthwave</button></li>
|
| 46 |
+
</ul>
|
| 47 |
+
</div>
|
| 48 |
+
</div>
|
| 49 |
+
</div>
|
| 50 |
+
|
| 51 |
+
<!-- Main Content -->
|
| 52 |
+
<main class="container mx-auto px-4 py-8 flex-grow">
|
| 53 |
+
{% block content %}{% endblock %}
|
| 54 |
+
</main>
|
| 55 |
+
|
| 56 |
+
<!-- Footer -->
|
| 57 |
+
<footer class="footer p-10 bg-base-200 text-base-content">
|
| 58 |
+
<div>
|
| 59 |
+
<span class="footer-title">In-Video Search</span>
|
| 60 |
+
<p>Powered by Qdrant & FastAPI</p>
|
| 61 |
+
<p>Search through video content semantically</p>
|
| 62 |
+
</div>
|
| 63 |
+
<div>
|
| 64 |
+
<span class="footer-title">Recent Videos</span>
|
| 65 |
+
<div id="footer-recent-videos">
|
| 66 |
+
<!-- Recent videos will be loaded here by JavaScript -->
|
| 67 |
+
<p class="text-sm opacity-70">No recent videos</p>
|
| 68 |
+
</div>
|
| 69 |
+
</div>
|
| 70 |
+
<div>
|
| 71 |
+
<span class="footer-title">Resources</span>
|
| 72 |
+
<a class="link link-hover" href="https://qdrant.tech/" target="_blank">Qdrant</a>
|
| 73 |
+
<a class="link link-hover" href="https://fastapi.tiangolo.com/" target="_blank">FastAPI</a>
|
| 74 |
+
<a class="link link-hover" href="https://daisyui.com/" target="_blank">DaisyUI</a>
|
| 75 |
+
</div>
|
| 76 |
+
</footer>
|
| 77 |
+
|
| 78 |
+
<!-- Scripts -->
|
| 79 |
+
<script src="{{ url_for('static', path='/js/main.js') }}"></script>
|
| 80 |
+
{% block scripts %}{% endblock %}
|
| 81 |
+
</body>
|
| 82 |
+
</html>
|
app/templates/index.html
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block content %}
|
| 4 |
+
<div class="max-w-4xl mx-auto">
|
| 5 |
+
<div class="card bg-base-100 shadow-xl">
|
| 6 |
+
<div class="card-body">
|
| 7 |
+
<h2 class="card-title">Process YouTube Video</h2>
|
| 8 |
+
<p class="text-gray-600 mb-4">Enter a YouTube URL to process its transcript for searching</p>
|
| 9 |
+
|
| 10 |
+
<div class="form-control">
|
| 11 |
+
<label class="label">
|
| 12 |
+
<span class="label-text">Enter YouTube URL</span>
|
| 13 |
+
</label>
|
| 14 |
+
<div class="join w-full">
|
| 15 |
+
<input type="text" id="youtube-url" placeholder="https://www.youtube.com/watch?v=..." class="input input-bordered join-item w-full" />
|
| 16 |
+
<button id="process-button" class="btn btn-primary join-item">Process</button>
|
| 17 |
+
</div>
|
| 18 |
+
</div>
|
| 19 |
+
|
| 20 |
+
<div class="mt-4" id="process-status">
|
| 21 |
+
<!-- Processing status messages will appear here -->
|
| 22 |
+
</div>
|
| 23 |
+
|
| 24 |
+
<div class="divider">OR</div>
|
| 25 |
+
|
| 26 |
+
<h3 class="font-bold mb-2">Example Videos</h3>
|
| 27 |
+
<div class="grid grid-cols-1 md:grid-cols-3 gap-2">
|
| 28 |
+
<button class="btn btn-outline btn-accent btn-sm example-video w-full h-auto" data-url="https://www.youtube.com/watch?v=zjkBMFhNj_g">
|
| 29 |
+
<div class="flex items-center w-full">
|
| 30 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4 mr-2 flex-shrink-0" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
| 31 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z" />
|
| 32 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
| 33 |
+
</svg>
|
| 34 |
+
<span class="truncate text-left">Intro to Large Language Models</span>
|
| 35 |
+
</div>
|
| 36 |
+
</button>
|
| 37 |
+
<button class="btn btn-outline btn-accent btn-sm example-video w-full h-auto" data-url="https://www.youtube.com/watch?v=7xTGNNLPyMI">
|
| 38 |
+
<div class="flex items-center w-full">
|
| 39 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4 mr-2 flex-shrink-0" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
| 40 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z" />
|
| 41 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
| 42 |
+
</svg>
|
| 43 |
+
<span class="truncate text-left">Deep Dive into LLMs like ChatGPT</span>
|
| 44 |
+
</div>
|
| 45 |
+
</button>
|
| 46 |
+
<button class="btn btn-outline btn-accent btn-sm example-video w-full h-auto" data-url="https://www.youtube.com/watch?v=EWvNQjAaOHw">
|
| 47 |
+
<div class="flex items-center w-full">
|
| 48 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4 mr-2 flex-shrink-0" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
| 49 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z" />
|
| 50 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
| 51 |
+
</svg>
|
| 52 |
+
<span class="truncate text-left">How I use LLMs</span>
|
| 53 |
+
</div>
|
| 54 |
+
</button>
|
| 55 |
+
</div>
|
| 56 |
+
</div>
|
| 57 |
+
</div>
|
| 58 |
+
|
| 59 |
+
<div class="card bg-base-100 shadow-xl mt-6 hidden" id="recently-processed">
|
| 60 |
+
<div class="card-body">
|
| 61 |
+
<h2 class="card-title">Recently Processed Videos</h2>
|
| 62 |
+
<div class="mt-4">
|
| 63 |
+
<!-- Video carousel with navigation arrows -->
|
| 64 |
+
<div class="flex items-center gap-2">
|
| 65 |
+
<!-- Left arrow navigation -->
|
| 66 |
+
<div class="hidden md:block" id="carousel-prev">
|
| 67 |
+
<button class="btn btn-circle btn-primary btn-disabled">
|
| 68 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
| 69 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 19l-7-7 7-7" />
|
| 70 |
+
</svg>
|
| 71 |
+
</button>
|
| 72 |
+
</div>
|
| 73 |
+
|
| 74 |
+
<!-- Carousel content -->
|
| 75 |
+
<div class="carousel carousel-center rounded-box w-full p-2 overflow-x-auto">
|
| 76 |
+
<div id="video-list" class="flex space-x-4 items-stretch">
|
| 77 |
+
<!-- Video cards will be populated here as carousel items -->
|
| 78 |
+
</div>
|
| 79 |
+
</div>
|
| 80 |
+
|
| 81 |
+
<!-- Right arrow navigation -->
|
| 82 |
+
<div class="hidden md:block" id="carousel-next">
|
| 83 |
+
<button class="btn btn-circle btn-primary">
|
| 84 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
| 85 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7" />
|
| 86 |
+
</svg>
|
| 87 |
+
</button>
|
| 88 |
+
</div>
|
| 89 |
+
</div>
|
| 90 |
+
</div>
|
| 91 |
+
</div>
|
| 92 |
+
</div>
|
| 93 |
+
</div>
|
| 94 |
+
{% endblock %}
|
| 95 |
+
|
| 96 |
+
{% block scripts %}
|
| 97 |
+
<script src="{{ url_for('static', path='/js/index.js') }}"></script>
|
| 98 |
+
{% endblock %}
|
app/templates/video.html
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block content %}
|
| 4 |
+
<div class="grid grid-cols-1 lg:grid-cols-2 gap-6">
|
| 5 |
+
<div class="lg:col-span-1">
|
| 6 |
+
<div class="card bg-base-100 shadow-xl">
|
| 7 |
+
<div class="card-body p-4">
|
| 8 |
+
<div class="aspect-video">
|
| 9 |
+
<iframe id="youtube-player" class="w-full h-full"
|
| 10 |
+
src="https://www.youtube.com/embed/{{ video_id }}?enablejsapi=1"
|
| 11 |
+
frameborder="0"
|
| 12 |
+
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
|
| 13 |
+
allowfullscreen>
|
| 14 |
+
</iframe>
|
| 15 |
+
</div>
|
| 16 |
+
</div>
|
| 17 |
+
</div>
|
| 18 |
+
</div>
|
| 19 |
+
|
| 20 |
+
<div class="lg:col-span-1">
|
| 21 |
+
<div class="card bg-base-100 shadow-xl sticky top-4">
|
| 22 |
+
<div class="card-body">
|
| 23 |
+
<div class="flex justify-between items-center">
|
| 24 |
+
<h2 class="card-title">Video Transcript</h2>
|
| 25 |
+
</div>
|
| 26 |
+
|
| 27 |
+
<div class="form-control mb-4">
|
| 28 |
+
<label class="label">
|
| 29 |
+
<span class="label-text">Search in transcript</span>
|
| 30 |
+
</label>
|
| 31 |
+
<div class="join w-full">
|
| 32 |
+
<input type="text" id="search-input" placeholder="Search in this video..." class="input input-bordered join-item w-full" />
|
| 33 |
+
<button id="search-button" class="btn btn-primary join-item">
|
| 34 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
| 35 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
|
| 36 |
+
</svg>
|
| 37 |
+
Search
|
| 38 |
+
</button>
|
| 39 |
+
</div>
|
| 40 |
+
</div>
|
| 41 |
+
|
| 42 |
+
<div id="loading" class="hidden mt-2 mb-2">
|
| 43 |
+
<span class="loading loading-spinner loading-md"></span>
|
| 44 |
+
<span class="ml-2">Searching...</span>
|
| 45 |
+
</div>
|
| 46 |
+
|
| 47 |
+
<div id="transcript-container" class="mt-2 transcript-container">
|
| 48 |
+
<!-- Transcript will be loaded here -->
|
| 49 |
+
</div>
|
| 50 |
+
</div>
|
| 51 |
+
</div>
|
| 52 |
+
</div>
|
| 53 |
+
</div>
|
| 54 |
+
{% endblock %}
|
| 55 |
+
|
| 56 |
+
{% block scripts %}
|
| 57 |
+
<script>
|
| 58 |
+
// Store the video ID in a JavaScript variable
|
| 59 |
+
const videoId = "{{ video_id }}";
|
| 60 |
+
</script>
|
| 61 |
+
<script src="{{ url_for('static', path='/js/video.js') }}"></script>
|
| 62 |
+
{% endblock %}
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: '3.8'
|
| 2 |
+
|
| 3 |
+
services:
|
| 4 |
+
app:
|
| 5 |
+
build: .
|
| 6 |
+
ports:
|
| 7 |
+
- "8000:8000"
|
| 8 |
+
environment:
|
| 9 |
+
- QDRANT_URL=http://qdrant:6333
|
| 10 |
+
- WORKERS=4 # Set number of workers
|
| 11 |
+
# - QDRANT_API_KEY=your_api_key_here (uncomment and set if needed)
|
| 12 |
+
depends_on:
|
| 13 |
+
- qdrant
|
| 14 |
+
restart: unless-stopped
|
| 15 |
+
healthcheck:
|
| 16 |
+
test: ["CMD", "curl", "-f", "http://localhost:8000/"]
|
| 17 |
+
interval: 30s
|
| 18 |
+
timeout: 10s
|
| 19 |
+
retries: 3
|
| 20 |
+
start_period: 40s
|
| 21 |
+
|
| 22 |
+
qdrant:
|
| 23 |
+
image: qdrant/qdrant:v1.13.5
|
| 24 |
+
volumes:
|
| 25 |
+
- qdrant_data:/qdrant/storage
|
| 26 |
+
|
| 27 |
+
volumes:
|
| 28 |
+
qdrant_data:
|
example.env
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Qdrant Configuration
|
| 2 |
+
QDRANT_URL=http://localhost:6333
|
| 3 |
+
QDRANT_API_KEY=
|
gunicorn.conf.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import multiprocessing
|
| 3 |
+
|
| 4 |
+
# Get the number of workers from environment variable or calculate based on CPU cores
|
| 5 |
+
workers_env = os.getenv("WORKERS")
|
| 6 |
+
if workers_env:
|
| 7 |
+
workers = int(workers_env)
|
| 8 |
+
else:
|
| 9 |
+
# Use the recommended formula: (2 * CPU cores) + 1
|
| 10 |
+
workers = (2 * multiprocessing.cpu_count()) + 1
|
| 11 |
+
|
| 12 |
+
# Use Uvicorn worker class for ASGI support
|
| 13 |
+
worker_class = "uvicorn.workers.UvicornWorker"
|
| 14 |
+
|
| 15 |
+
# Bind to 0.0.0.0:8000
|
| 16 |
+
bind = "0.0.0.0:8000"
|
| 17 |
+
|
| 18 |
+
# Logging
|
| 19 |
+
accesslog = "-" # Log to stdout
|
| 20 |
+
errorlog = "-" # Log to stderr
|
| 21 |
+
loglevel = "info"
|
| 22 |
+
|
| 23 |
+
# Timeout configuration
|
| 24 |
+
timeout = 120 # 2 minutes
|
| 25 |
+
graceful_timeout = 30
|
| 26 |
+
|
| 27 |
+
# Worker settings
|
| 28 |
+
worker_connections = 1000 # Maximum number of connections each worker can handle
|
| 29 |
+
keepalive = 5 # Seconds to wait between client requests before closing connection
|
| 30 |
+
|
| 31 |
+
# For better performance with Uvicorn
|
| 32 |
+
proc_name = "vibe-coding-rag"
|
poetry.lock
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pyproject.toml
CHANGED
|
@@ -7,16 +7,27 @@ readme = "README.md"
|
|
| 7 |
package-mode = false
|
| 8 |
|
| 9 |
[tool.poetry.dependencies]
|
| 10 |
-
python = "^3.10"
|
| 11 |
torch = {version = "^2.6.0+cpu", source = "pytorch-cpu"}
|
| 12 |
sentence-transformers = "^3.4.1"
|
| 13 |
qdrant-client = "^1.13.3"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
[[tool.poetry.source]]
|
| 16 |
name = "pytorch-cpu"
|
| 17 |
url = "https://download.pytorch.org/whl/cpu"
|
| 18 |
priority = "explicit"
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
[build-system]
|
| 21 |
requires = ["poetry-core"]
|
| 22 |
build-backend = "poetry.core.masonry.api"
|
|
|
|
| 7 |
package-mode = false
|
| 8 |
|
| 9 |
[tool.poetry.dependencies]
|
| 10 |
+
python = "^3.10,<3.14"
|
| 11 |
torch = {version = "^2.6.0+cpu", source = "pytorch-cpu"}
|
| 12 |
sentence-transformers = "^3.4.1"
|
| 13 |
qdrant-client = "^1.13.3"
|
| 14 |
+
fastapi = "^0.115.11"
|
| 15 |
+
uvicorn = "^0.34.0"
|
| 16 |
+
gunicorn = "^21.2.0"
|
| 17 |
+
jinja2 = "^3.1.6"
|
| 18 |
+
youtube-transcript-api = "^1.0.2"
|
| 19 |
+
pytube = "^15.0.0"
|
| 20 |
+
yt-dlp = "^2025.2.19"
|
| 21 |
|
| 22 |
[[tool.poetry.source]]
|
| 23 |
name = "pytorch-cpu"
|
| 24 |
url = "https://download.pytorch.org/whl/cpu"
|
| 25 |
priority = "explicit"
|
| 26 |
|
| 27 |
+
|
| 28 |
+
[tool.poetry.group.dev.dependencies]
|
| 29 |
+
ruff = "^0.11.0"
|
| 30 |
+
|
| 31 |
[build-system]
|
| 32 |
requires = ["poetry-core"]
|
| 33 |
build-backend = "poetry.core.masonry.api"
|