# ðŸ“Œ Notebook 1: Embed and Store Shot Data in Qdrant

This notebook loads your cleaned shot data, embeds it using `bge-small-en-v1.5`, and stores the embeddings in Qdrant for use in retrieval and recommendation.

# Initial Setup

In [1]:
# # Step 1: Initial Setup:

# # Load sentence-transformers and GTE-small model
# from sentence_transformers import SentenceTransformer
# model = SentenceTransformer('thenlper/gte-small')

## Load Shot Data From .csv

In [2]:
# Step 2: Load cleaned shot data
import pandas as pd
shot_data = pd.read_csv('../data/raw/cleaned_shot_data.csv')
shot_data.head()

Unnamed: 0,Date,Club Type,Club Description,Carry Distance,Total Distance,Ball Speed,Club Speed,Spin Rate,Attack Angle,Descent Angle,Shot Classification
0,2025-02-04 12:41:00,Driver,TopGolf - Driver (+1; N; 2.75T),124.33,171.19,122.16,85.92,1154,2.95,11.33,Hook
1,2025-02-04 12:41:42,Driver,TopGolf - Driver (+1; N; 2.75T),104.75,150.95,120.35,84.2,1666,2.45,8.19,Push Hook
2,2025-02-04 12:42:17,Driver,TopGolf - Driver (+1; N; 2.75T),163.45,195.51,115.05,86.28,1227,4.3,23.02,Push
3,2025-02-04 12:43:05,Driver,TopGolf - Driver (+1; N; 2.75T),162.57,192.56,110.91,81.96,1783,1.74,24.87,Push
4,2025-02-04 12:44:18,Driver,TopGolf - Driver (+1; N; 2.75T),105.3,152.0,118.83,80.78,1478,1.29,8.67,Push Draw


## Embed Shot Data

In [3]:
# Step 3: Format shot data into text chunks for embedding

def create_embedding_text(row):
    return (
        f"On {row['Date']}, the golfer hit a shot {row['Total Distance']} yards with a carry of {row['Carry Distance']} yards "
        f"using a {row['Club Type']} ({row['Club Description']}). "
        f"The shot was classified as {row['Shot Classification']}. "
        f"The known contributing factors to this result were: "
        f"Ball speed: {row['Ball Speed']} mph. "
        f"Club speed: {row['Club Speed']} mph. "
        f"Spin rate: {row['Spin Rate']} rpm. "
        f"Attack angle: {row['Attack Angle']} degrees. "
        f"Descent angle: {row['Descent Angle']} degrees."
    )

texts = shot_data.apply(create_embedding_text, axis=1).tolist()


In [4]:
# Select embedding model
# Options: 'e5-base-v2', 'bge-base-en-v1.5'
embedding_model_choice = 'bge-base-en-v1.5'

# Use a model-specific collection name
descriptive_collection_name = f"golf_shot_vectors_{embedding_model_choice.replace('-', '_')}"

from sentence_transformers import SentenceTransformer

def load_embedding_model(name: str):
    if name == 'e5-base-v2':
        return SentenceTransformer('intfloat/e5-base-v2')
    elif name == 'bge-base-en-v1.5':
        return SentenceTransformer('BAAI/bge-base-en-v1.5')
    else:
        raise ValueError(f'Unsupported model: {name}')

model = load_embedding_model(embedding_model_choice)

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Generate embeddings with model-specific query formatting
if embedding_model_choice.startswith('e5'):
    texts_for_embedding = [f'passage: {t}' for t in texts]
else:
    texts_for_embedding = texts
embeddings = model.encode(texts_for_embedding, show_progress_bar=True)

Batches: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 16/16 [02:35<00:00,  9.73s/it]


## Upload Embedded Shot Data to Qdrant

In [6]:
# Collect the Qdrant API key
from getpass import getpass

qdrant_api_key = getpass('ðŸ”‘ Enter your Qdrant API Key: ')


In [7]:
# Qdrant setup
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, PointStruct, Distance

client = QdrantClient(
    url='https://6f592f43-f667-4234-ad3a-4f15ed5882ef.us-west-2-0.aws.cloud.qdrant.io:6333',
    api_key=qdrant_api_key
)

# Recreate the collection to flush old data
client.recreate_collection(
    collection_name=descriptive_collection_name,
    vectors_config=VectorParams(size=768, distance=Distance.COSINE)
)

  client.recreate_collection(


True

In [8]:
# Upload embedded vectors to Qdrant
points = [
    PointStruct(id=i, vector=embeddings[i], payload={'text': texts[i]})
    for i in range(len(embeddings))
]
client.upsert(collection_name=descriptive_collection_name, points=points)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)