Spaces:
Sleeping
Sleeping
from sklearn.metrics.pairwise import cosine_similarity | |
import numpy as np | |
import gradio as gr | |
import pandas as pd | |
from sentence_transformers import SentenceTransformer | |
# 讟注谉 讗转 讛诪讜讚诇 | |
model = SentenceTransformer("all-MiniLM-L6-v2") | |
# 讟注谉 讗转 讛讚讗讟讛住讟 诪讛诇讬谞拽 | |
url = "https://huggingface.co/datasets/Pablinho/movies-dataset/resolve/main/9000plus.csv" | |
print("Loading dataset...") | |
dataset = pd.read_csv(url) | |
# 讜讚讗 砖讛注诪讜讚讜转 拽讬讬诪讜转 | |
assert "Title" in dataset.columns | |
assert "Overview" in dataset.columns | |
# 谞拽讛 砖讜专讜转 注诐 Overview 讞住专 讗讜 诇讗 诪讞专讜讝转 | |
dataset = dataset.dropna(subset=["Overview"]) | |
dataset = dataset[dataset["Overview"].apply(lambda x: isinstance(x, str))] | |
# 讛讙讘诇 诇志500 住专讟讬诐 | |
MAX_MOVIES = 500 | |
dataset = dataset.head(MAX_MOVIES) | |
print(f"Encoding {len(dataset)} movie descriptions...") | |
dataset["embeddings"] = dataset["Overview"].apply(lambda x: model.encode(x).tolist()) | |
print("Done encoding!") | |
def recommend_similar_movies(input_text, top_n=5): | |
input_embedding = model.encode([input_text]) | |
similarities = cosine_similarity(input_embedding, np.vstack(dataset['embeddings'].to_numpy()))[0] | |
top_indices = similarities.argsort()[::-1][:top_n] | |
results = dataset.iloc[top_indices][['Title', 'Overview']] | |
return "\n\n".join(f"馃幀 **{row['Title']}**\n{row['Overview']}" for _, row in results.iterrows()) | |
demo = gr.Interface( | |
fn=recommend_similar_movies, | |
inputs=gr.Textbox(lines=2, placeholder="Describe a movie..."), | |
outputs="text", | |
title="Movie Recommender", | |
description="Get movie recommendations based on your description. Powered by sentence-transformers and cosine similarity." | |
) | |
if __name__ == "__main__": | |
demo.launch() |