Spaces:

karouswissem
/

the_maze_RS

Sleeping

App Files Files Community

karouswissem commited on Apr 24

Commit

3291f62

1 Parent(s): cc9afbc

RS

Browse files

Files changed (6) hide show

Netflix_Recommendation_Notebook_Code +42 -0
README.md +6 -5
app.py +101 -0
netflix_embeddings.npy +3 -0
netflix_metadata.csv +0 -0
requirements.txt +159 -0

Netflix_Recommendation_Notebook_Code ADDED Viewed

	@@ -0,0 +1,42 @@

+#ran on Kaggle
+!pip install sentence-transformers
+!pip install torch
+import torch
+from sentence_transformers import SentenceTransformer
+import numpy as np
+import pandas as pd
+from tqdm import tqdm  # For tracking progress in batches
+# check if GPU is available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+# load dataset
+dataset = pd.read_csv('/kaggle/input/d/infamouscoder/dataset-netflix-shows/netflix_titles.csv')
+# load model to GPU if available
+model = SentenceTransformer("all-MiniLM-L6-v2").to(device)
+# combine fields (title, genre, description) for embeddings
+def combine_description_title_and_genre(description, listed_in, title):
+    return f"{description} Genre: {listed_in} Title: {title}"
+# create combined text column
+dataset['combined_text'] = dataset.apply(lambda row: combine_description_title_and_genre(row['description'], row['listed_in'], row['title']), axis=1)
+# generate embeddings in batches to save memory
+batch_size = 32
+embeddings = []
+for i in tqdm(range(0, len(dataset), batch_size), desc="Generating Embeddings"):
+    batch_texts = dataset['combined_text'][i:i+batch_size].tolist()
+    batch_embeddings = model.encode(batch_texts, convert_to_tensor=True, device=device)
+    embeddings.extend(batch_embeddings.cpu().numpy())  # move to CPU to save memory
+# convert list to numpy array
+embeddings = np.array(embeddings)
+# save embeddings and metadata
+np.save("/kaggle/working/netflix_embeddings.npy", embeddings)
+dataset[['show_id', 'title', 'description', 'listed_in']].to_csv("/kaggle/working/netflix_metadata.csv", index=False)

README.md CHANGED Viewed

@@ -1,12 +1,13 @@
 ---
-title: The Maze RS
-emoji: 👁
-colorFrom: red
-colorTo: indigo
 sdk: gradio
-sdk_version: 5.26.0
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Netflix Recommendation
+emoji: 📈
+colorFrom: green
+colorTo: purple
 sdk: gradio
+sdk_version: 5.5.0
 app_file: app.py
 pinned: false
+short_description: Recommends Netflix Show/Movie based on description and genre
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,101 @@

+# Gradio Interface
+import gradio as gr
+import numpy as np
+import pandas as pd
+from sklearn.metrics.pairwise import cosine_similarity
+from sentence_transformers import SentenceTransformer
+import requests
+from PIL import Image
+from transformers import BlipProcessor, BlipForConditionalGeneration
+sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+image_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+def generate_input(input_type, image=None, text=None, response_amount=3):
+    # initalize input variable
+    combined_input = ""
+    # handle image input if chosen
+    if input_type == "Image" and image:
+        inputs = processor(images=image, return_tensors="pt") #process image with BlipProcessor
+        out = image_model.generate(**inputs)  #generate caption with BlipModel
+        image_caption = processor.decode(out[0], skip_special_tokens=True)  #decode output w/ processor
+        combined_input += image_caption  # add the image caption to input
+    # handle text input if chosen
+    elif input_type == "Text" and text:
+        combined_input += text  # add the text to input
+    # handle both text and image input if chosen
+    elif input_type == "Both" and image and text:
+        inputs = processor(images=image, return_tensors="pt")
+        out = image_model.generate(**inputs)
+        image_caption = processor.decode(out[0], skip_special_tokens=True)  #repeat image processing + caption generation and decoding
+        combined_input += image_caption + " and " + text  # combine image caption and text
+    # if no input, fallback
+    if not combined_input:
+        combined_input = "No input provided."
+    if response_amount is None:
+        response_amount=3
+    return vector_search(combined_input,response_amount) #search through embedded document w/ input
+# load embeddings and metadata
+embeddings = np.load("netflix_embeddings.npy")  #created using sentence_transformers on kaggle
+metadata = pd.read_csv("netflix_metadata.csv") #created using sentence_transformers on kaggle
+# vector search function
+def vector_search(query,top_n=3):
+    query_embedding = sentence_model.encode(query)  #encode input w/ Sentence Transformers
+    similarities = cosine_similarity([query_embedding], embeddings)[0]  #similarity function
+    if top_n is None:
+        top_n=3
+    top_indices = similarities.argsort()[-top_n:][::-1]  #return top n indices based on chosen output amount
+    results = metadata.iloc[top_indices]  #get metadata
+    result_text=""
+    for index,row in results.iterrows():   #loop through results to get Title, Description, and Genre for top n outputs
+        if index!=top_n-1:
+            result_text+=f"Title: {row['title']}  Description: {row['description']}  Genre: {row['listed_in']}\n\n"
+        else:
+            result_text+=f"Title: {row['title']}  Description: {row['description']}  Genre: {row['listed_in']}"
+    return result_text
+def set_response_amount(response_amount):  #set response amount
+    if response_amount is None:
+        return 3
+    return response_amount
+ # based on the selected input type, make the appropriate input visible
+def update_inputs(input_type):
+    if input_type == "Image":
+        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)
+    elif input_type == "Text":
+        return gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
+    elif input_type == "Both":
+        return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
+with gr.Blocks() as demo:
+    gr.Markdown("# Netflix Recommendation System")
+    gr.Markdown("Enter a query to receive Netflix show recommendations based on title, description, and genre.")
+    input_type = gr.Radio(["Image", "Text", "Both"], label="Select Input Type", type="value")
+    response_type=gr.Dropdown(choices=[3,5,10,25], type="value", label="Select Response Amount", visible=False)
+    image_input = gr.Image(label="Upload Image", type="pil", visible=False)  # Hidden initially
+    text_input = gr.Textbox(label="Enter Text Query", placeholder="Enter a description or query here", visible=False)  # hidden initially
+    input_type.change(fn=update_inputs, inputs=input_type, outputs=[image_input, text_input, response_type])
+   # state variable to store the selected response amount
+    selected_response_amount = gr.State()
+    # capture response amount immediately when dropdown changes
+    response_type.change(fn=set_response_amount, inputs=response_type, outputs=selected_response_amount)
+    submit_button = gr.Button("Submit")
+    output = gr.Textbox(label="Recommendations")
+    if selected_response_amount is None:
+        selected_response_amount=3
+    submit_button.click(fn=generate_input, inputs=[input_type,image_input, text_input,selected_response_amount], outputs=output)
+demo.launch()

netflix_embeddings.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:07256cdb32b19dd08130152df55edbeb2da75211ba101fe4305020679bf1e225
+size 13527680

netflix_metadata.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,159 @@

+accelerate==1.1.1
+aiofiles==23.2.1
+aiohappyeyeballs==2.4.3
+aiohttp==3.10.10
+aiosignal==1.3.1
+annotated-types==0.7.0
+anyio==4.6.2.post1
+attrs==24.2.0
+audioread==3.0.1
+certifi==2024.8.30
+cffi==1.17.1
+charset-normalizer==3.4.0
+click==8.1.7
+contourpy==1.3.0
+cycler==0.12.1
+datasets==3.1.0
+decorator==5.1.1
+dill==0.3.8
+fastapi==0.115.4
+ffmpy==0.4.0
+filelock==3.16.1
+fonttools==4.54.1
+frozenlist==1.5.0
+fsspec==2024.9.0
+gradio==5.5.0
+gradio_client==1.4.2
+h11==0.14.0
+httpcore==1.0.6
+httpx==0.27.2
+huggingface-hub==0.26.2
+idna==3.10
+Jinja2==3.1.4
+joblib==1.4.2
+kiwisolver==1.4.7
+lazy_loader==0.4
+librosa==0.10.2.post1
+llvmlite==0.43.0
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.2
+mdurl==0.1.2
+mpmath==1.3.0
+msgpack==1.1.0
+multidict==6.1.0
+multiprocess==0.70.16
+networkx==3.4.2
+numba==0.60.0
+numpy==2.0.2
+orjson==3.10.11
+packaging==24.1
+pandas==2.2.3
+pillow==11.0.0
+platformdirs==4.3.6
+pooch==1.8.2
+propcache==0.2.0
+psutil==6.1.0
+pyarrow==18.0.0
+pycparser==2.22
+pydantic==2.9.2
+pydantic_core==2.23.4
+accelerate==1.1.1
+aiofiles==23.2.1
+aiohappyeyeballs==2.4.3
+aiohttp==3.10.10
+aiosignal==1.3.1
+annotated-types==0.7.0
+anyio==4.6.2.post1
+attrs==24.2.0
+audioread==3.0.1
+certifi==2024.8.30
+cffi==1.17.1
+charset-normalizer==3.4.0
+click==8.1.7
+contourpy==1.3.0
+cycler==0.12.1
+datasets==3.1.0
+decorator==5.1.1
+dill==0.3.8
+fastapi==0.115.4
+ffmpy==0.4.0
+filelock==3.16.1
+fonttools==4.54.1
+frozenlist==1.5.0
+fsspec==2024.9.0
+gradio==5.5.0
+gradio_client==1.4.2
+h11==0.14.0
+httpcore==1.0.6
+httpx==0.27.2
+huggingface-hub==0.26.2
+idna==3.10
+Jinja2==3.1.4
+joblib==1.4.2
+kiwisolver==1.4.7
+lazy_loader==0.4
+librosa==0.10.2.post1
+llvmlite==0.43.0
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.2
+mdurl==0.1.2
+mpmath==1.3.0
+msgpack==1.1.0
+multidict==6.1.0
+multiprocess==0.70.16
+networkx==3.4.2
+numba==0.60.0
+numpy==2.0.2
+orjson==3.10.11
+packaging==24.1
+pandas==2.2.3
+pillow==11.0.0
+platformdirs==4.3.6
+pooch==1.8.2
+propcache==0.2.0
+psutil==6.1.0
+pyarrow==18.0.0
+pycparser==2.22
+pydantic==2.9.2
+pydantic_core==2.23.4
+pydub==0.25.1
+Pygments==2.18.0
+pyparsing==3.2.0
+python-dateutil==2.9.0.post0
+python-multipart==0.0.12
+pytz==2024.2
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+rich==13.9.4
+ruff==0.7.2
+safehttpx==0.1.1
+safetensors==0.4.5
+scikit-learn==1.5.2
+scipy==1.14.1
+semantic-version==2.10.0
+sentence_transformers
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+soundfile==0.12.1
+soxr==0.5.0.post1
+starlette==0.41.2
+sympy==1.13.1
+threadpoolctl==3.5.0
+tokenizers==0.20.3
+tomlkit==0.12.0
+torch==2.5.1
+tqdm==4.67.0
+transformers==4.46.2
+typer==0.12.5
+typing_extensions==4.12.2
+tzdata==2024.2
+urllib3==2.2.3
+uvicorn==0.32.0
+websockets==12.0
+xxhash==3.5.0
+yarl==1.17.1