File size: 9,210 Bytes
b5a06b6 f863d36 0245eb1 b5a06b6 8fafcd4 b5a06b6 258beac b5a06b6 5422f92 b5a06b6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
import pandas as pd
import numpy as np
import subprocess
import sys
def install_package(package):
""" Install the necessary package using pip """
subprocess.check_call([sys.executable, "-m", "pip", "install", package])
install_package('scikit-learn')
from sklearn.metrics.pairwise import cosine_similarity
from typing_extensions import Doc
import gradio as gr
df = pd.read_csv('dataframe.csv')
df['Course Number'] = df['Course Number'].astype(str)
tfidf_matrix = pd.read_csv('tfidf_matrix.csv', header=None).values
tfidf_matrix.shape
word2vec_matrix = pd.read_csv('word2vecmatrix.csv',header=None).values
word2vec_matrix.shape
sbert1_matrix = pd.read_csv('sentencetransformer1.csv',header=None).values
sbert1_matrix.shape
sbert2_matrix = pd.read_csv('sentencetransformer2.csv',header=None).values
sbert2_matrix.shape
def course_recommendation(model, course_subject_code, course_number, whether_not_lower_level=False, whether_only_sameorlower_level = False, whether_not_same_subject=False, whether_only_same_subject=False, recomendations_number = 5):
if model == "tf-idf":
docmatrix = tfidf_matrix
elif model == "word2vec":
docmatrix = word2vec_matrix
elif model == "sbert1":
docmatrix = sbert1_matrix # This appears to have been a typo in the original code
elif model == "sbert2":
docmatrix = sbert2_matrix
# Check if the course exists in the dataframe
if not ((df['Course Subject Code'] == course_subject_code) & (df['Course Number'] == course_number)).any():
return pd.DataFrame({'Message': ["The course you input does not exist in this semester or we do not have enough course description information about it. Please try another course. "]})
if whether_not_lower_level == True and whether_only_sameorlower_level == True:
return pd.DataFrame({'Message': ["There seems to be a conflict in the filtering logic. Please double-check the checkboxes for filtering carefully."]})
if whether_not_same_subject == True and whether_only_same_subject == True:
return pd.DataFrame({'Message': ["There seems to be a conflict in the filtering logic. Please double-check the checkboxes for filtering carefully."]})
# Get the index and level of the course in the dataframe
course_info = df[(df['Course Subject Code'] == course_subject_code) & (df['Course Number'] == course_number)]
course_index = course_info.index[0]
course_level = course_info.iloc[0]['Course Level']
# Normalize "First-year Student Seminar" to "100-level"
course_level = "100-level" if course_level == "First-year Student Seminar" else course_level
df_filtered = df.copy()
if whether_not_same_subject:
df_filtered = df_filtered[df_filtered['Course Subject Code'] != course_subject_code]
if whether_only_same_subject:
df_filtered = df_filtered[df_filtered['Course Subject Code'] == course_subject_code]
if whether_not_lower_level:
levels_to_include = ['100-level', '200-level', '300-level', '400-level', 'Graduate level']
current_level_index = levels_to_include.index(course_level)
allowed_levels = levels_to_include[current_level_index:] # Include current and higher levels
df_filtered = df_filtered[df_filtered['Course Level'].isin(allowed_levels)]
if whether_only_sameorlower_level:
levels_to_include = ['100-level', '200-level', '300-level', '400-level', 'Graduate level']
current_level_index = levels_to_include.index(course_level)
allowed_levels = levels_to_include[:current_level_index + 1] # Include current and lower levels
df_filtered = df_filtered[df_filtered['Course Level'].isin(allowed_levels)]
# Retrieve the vector for the specified course
course_vector = docmatrix[course_index]
# Calculate the cosine similarity with filtered courses
cosine_similarities = cosine_similarity(docmatrix[df_filtered.index], course_vector.reshape(1, -1)).flatten()
# Get the indices of the courses with the highest cosine similarity scores
similar_courses_indices = np.argsort(-cosine_similarities)[:int(recomendations_number)+1]
# Retrieve the course details for the most similar courses
similar_courses = df_filtered.iloc[similar_courses_indices][['Course Code', 'Course Title', 'Course Description Text']]
if similar_courses.index[0] == course_index:
similar_courses = similar_courses.iloc[1:] # Exclude the original course if it is the highest ranked
else:
similar_courses = similar_courses.head(int(recomendations_number))
# Insert a column for similarity rank
input_course_details = course_info[['Course Code', 'Course Title', 'Course Description Text']]
result_df = pd.concat([input_course_details, similar_courses]).reset_index(drop=True)
result_df .insert(0, 'Similar Rank', range(0, len(similar_courses) + 1))
return result_df
import gradio as gr
import pandas as pd
from functools import partial
def highlight_first_row(s, props=''):
return [props if s.name == 0 else '' for _ in range(len(s))]
def recommend(model_name, course_subject_code, course_number, exclude_lower_levels, exclude_upper_levels, exclude_same_subject, exclude_other_subject, recomendations_number):
outputdf = course_recommendation(model_name, course_subject_code, course_number, exclude_lower_levels, exclude_upper_levels, exclude_same_subject, exclude_other_subject, recomendations_number)
outputdf = outputdf.style.apply(highlight_first_row, props='background-color: orange;', axis=1)
return outputdf
def main():
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
gr.Markdown("# Course Recommendation System - For UIUC fall 2024 semester")
gr.Markdown("This project provides course recommendations using different NLP models. Select a model and enter course details to see recommendations.")
gr.Markdown("Want to know how these models work? Check out the **ABOUT** tab:)")
gr.Markdown("*Here are some courses that you may want try: STAT 107 - Data Science Discovery, SOC 100 - Intro to Sociology. Any course that you are interested!!")
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("*Choose the course you want to explore:*" )
with gr.Row():
subject = gr.Dropdown(choices=sorted(df['Course Subject Code'].unique()), label="Course Subject Code")
number = gr.Textbox(label="Course Number")
recommendation_no = gr.Slider(3, 100, step = 1, label="Recommendation Number", info="Choose between 3 and 100")
with gr.Column(scale=1):
gr.Markdown("*You may want to add a filter:*")
with gr.Row():
exclude_lower = gr.Checkbox(label="Only Upper Level", info = "Same level and higher level courses will be shown")
exclude_upper = gr.Checkbox(label="Only Lower Level", info = "Same level and lower level courses will be shown")
with gr.Row():
exclude_same = gr.Checkbox(label="Only Different Subject")
exclude_other = gr.Checkbox(label="Only Same Subject")
tf_idf_submit = gr.Button("Recommend", variant="primary")
with gr.Tabs() as tabs:
# Setting up the interface for each model
with gr.Tab("Word2Vec Model"):
tf_idf_submit.click(
fn=partial(recommend, "word2vec"),
inputs=[subject, number, exclude_lower, exclude_upper, exclude_same, exclude_other, recommendation_no],
outputs=gr.Dataframe(wrap = True, column_widths = ["10%","10%", "20%", "63%"])
)
with gr.Tab("TF-IDF Model"):
tf_idf_submit.click(
fn=partial(recommend, "tf-idf"),
inputs=[subject, number, exclude_lower, exclude_upper, exclude_same, exclude_other, recommendation_no],
outputs=gr.Dataframe(wrap = True, column_widths = ["10%","10%", "20%", "63%"])
)
with gr.Tab("SBERT Model1"):
tf_idf_submit.click(
fn=partial(recommend, "sbert1"),
inputs=[subject, number, exclude_lower, exclude_upper, exclude_same, exclude_other, recommendation_no],
outputs=gr.Dataframe(wrap = True, column_widths = ["10%","10%", "20%", "63%"])
)
with gr.Tab("SBERT Model2"):
tf_idf_submit.click(
fn=partial(recommend, "sbert2"),
inputs=[subject, number, exclude_lower, exclude_upper, exclude_same, exclude_other, recommendation_no],
outputs=gr.Dataframe(wrap = True, column_widths = ["10%","10%", "20%", "63%"])
)
with gr.Tab("ABOUT"):
gr.Markdown("This project provides course recommendations using different NLP models. Select a model and enter course details to see recommendations.")
return demo
# Launch the interface
if __name__ == "__main__":
main().launch(share=True)
|