File size: 9,210 Bytes
b5a06b6
 
f863d36
 
0245eb1
 
 
 
b5a06b6
 
 
 
8fafcd4
b5a06b6
 
 
 
 
 
 
 
258beac
b5a06b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5422f92
b5a06b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import pandas as pd
import numpy as np
import subprocess
import sys
def install_package(package):
    """ Install the necessary package using pip """
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])
install_package('scikit-learn')
from sklearn.metrics.pairwise import cosine_similarity
from typing_extensions import Doc
import gradio as gr
df = pd.read_csv('dataframe.csv')
df['Course Number'] = df['Course Number'].astype(str)
tfidf_matrix = pd.read_csv('tfidf_matrix.csv', header=None).values
tfidf_matrix.shape
word2vec_matrix = pd.read_csv('word2vecmatrix.csv',header=None).values
word2vec_matrix.shape

sbert1_matrix = pd.read_csv('sentencetransformer1.csv',header=None).values
sbert1_matrix.shape

sbert2_matrix = pd.read_csv('sentencetransformer2.csv',header=None).values
sbert2_matrix.shape

def course_recommendation(model, course_subject_code, course_number, whether_not_lower_level=False, whether_only_sameorlower_level = False, whether_not_same_subject=False, whether_only_same_subject=False, recomendations_number = 5):
    if model == "tf-idf":
        docmatrix = tfidf_matrix
    elif model == "word2vec":
        docmatrix = word2vec_matrix
    elif model == "sbert1":
        docmatrix = sbert1_matrix  # This appears to have been a typo in the original code
    elif model == "sbert2":
        docmatrix = sbert2_matrix

    # Check if the course exists in the dataframe
    if not ((df['Course Subject Code'] == course_subject_code) & (df['Course Number'] == course_number)).any():
        return pd.DataFrame({'Message': ["The course you input does not exist in this semester or we do not have enough course description information about it. Please try another course. "]})

    if whether_not_lower_level == True and whether_only_sameorlower_level == True:
        return pd.DataFrame({'Message': ["There seems to be a conflict in the filtering logic. Please double-check the checkboxes for filtering carefully."]})
    if whether_not_same_subject == True and whether_only_same_subject == True:
        return pd.DataFrame({'Message': ["There seems to be a conflict in the filtering logic. Please double-check the checkboxes for filtering carefully."]})

    # Get the index and level of the course in the dataframe
    course_info = df[(df['Course Subject Code'] == course_subject_code) & (df['Course Number'] == course_number)]
    course_index = course_info.index[0]
    course_level = course_info.iloc[0]['Course Level']
    # Normalize "First-year Student Seminar" to "100-level"
    course_level = "100-level" if course_level == "First-year Student Seminar" else course_level

    df_filtered = df.copy()
    if whether_not_same_subject:
        df_filtered = df_filtered[df_filtered['Course Subject Code'] != course_subject_code]
    if whether_only_same_subject:
        df_filtered = df_filtered[df_filtered['Course Subject Code'] == course_subject_code]
    
    if whether_not_lower_level:
        levels_to_include = ['100-level', '200-level', '300-level', '400-level', 'Graduate level']
        current_level_index = levels_to_include.index(course_level)
        allowed_levels = levels_to_include[current_level_index:]  # Include current and higher levels
        df_filtered = df_filtered[df_filtered['Course Level'].isin(allowed_levels)]
    
    if whether_only_sameorlower_level:
        levels_to_include = ['100-level', '200-level', '300-level', '400-level', 'Graduate level']
        current_level_index = levels_to_include.index(course_level)
        allowed_levels = levels_to_include[:current_level_index + 1]  # Include current and lower levels
        df_filtered = df_filtered[df_filtered['Course Level'].isin(allowed_levels)]

    # Retrieve the vector for the specified course
    course_vector = docmatrix[course_index]

    # Calculate the cosine similarity with filtered courses
    cosine_similarities = cosine_similarity(docmatrix[df_filtered.index], course_vector.reshape(1, -1)).flatten()

    # Get the indices of the courses with the highest cosine similarity scores
    similar_courses_indices = np.argsort(-cosine_similarities)[:int(recomendations_number)+1]

    # Retrieve the course details for the most similar courses
    similar_courses = df_filtered.iloc[similar_courses_indices][['Course Code', 'Course Title', 'Course Description Text']]
    if similar_courses.index[0] == course_index:
        similar_courses = similar_courses.iloc[1:]  # Exclude the original course if it is the highest ranked
    else:
        similar_courses = similar_courses.head(int(recomendations_number))
    
    # Insert a column for similarity rank
    
    input_course_details = course_info[['Course Code', 'Course Title', 'Course Description Text']]
    result_df = pd.concat([input_course_details, similar_courses]).reset_index(drop=True)
    result_df .insert(0, 'Similar Rank', range(0, len(similar_courses) + 1))
    return result_df

import gradio as gr
import pandas as pd
from functools import partial

def highlight_first_row(s, props=''):
    return [props if s.name == 0 else '' for _ in range(len(s))]

def recommend(model_name, course_subject_code, course_number, exclude_lower_levels, exclude_upper_levels, exclude_same_subject, exclude_other_subject, recomendations_number):
    outputdf = course_recommendation(model_name, course_subject_code, course_number, exclude_lower_levels, exclude_upper_levels, exclude_same_subject, exclude_other_subject, recomendations_number)
    outputdf = outputdf.style.apply(highlight_first_row, props='background-color: orange;', axis=1)
    return outputdf



def main():
    with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
        gr.Markdown("# Course Recommendation System - For UIUC fall 2024 semester")
        gr.Markdown("This project provides course recommendations using different NLP models. Select a model and enter course details to see recommendations.")
        gr.Markdown("Want to know how these models work? Check out the **ABOUT** tab:)")
        gr.Markdown("*Here are some courses that you may want try: STAT 107 - Data Science Discovery, SOC 100 - Intro to Sociology. Any course that you are interested!!")
        with gr.Row():
            with gr.Column(scale=2):
                gr.Markdown("*Choose the course you want to explore:*" )
                with gr.Row():
                    subject = gr.Dropdown(choices=sorted(df['Course Subject Code'].unique()), label="Course Subject Code")
                    number = gr.Textbox(label="Course Number")
                recommendation_no = gr.Slider(3, 100, step = 1, label="Recommendation Number", info="Choose between 3 and 100")
            with gr.Column(scale=1):
                gr.Markdown("*You may want to add a filter:*")
                with gr.Row():
                    exclude_lower = gr.Checkbox(label="Only Upper Level", info = "Same level and higher level courses will be shown")
                    exclude_upper = gr.Checkbox(label="Only Lower Level", info = "Same level and lower level courses will be shown")
                with gr.Row():
                    exclude_same = gr.Checkbox(label="Only Different Subject")
                    exclude_other = gr.Checkbox(label="Only Same Subject")
                tf_idf_submit = gr.Button("Recommend", variant="primary")
        with gr.Tabs() as tabs:
            
            # Setting up the interface for each model
            with gr.Tab("Word2Vec Model"):
                tf_idf_submit.click(
                    fn=partial(recommend, "word2vec"),
                    inputs=[subject, number, exclude_lower, exclude_upper, exclude_same, exclude_other, recommendation_no],
                    outputs=gr.Dataframe(wrap = True, column_widths = ["10%","10%", "20%", "63%"])
                )
            with gr.Tab("TF-IDF Model"):
                tf_idf_submit.click(
                    fn=partial(recommend, "tf-idf"),
                    inputs=[subject, number, exclude_lower, exclude_upper, exclude_same, exclude_other, recommendation_no],
                    outputs=gr.Dataframe(wrap = True, column_widths = ["10%","10%", "20%", "63%"])
                )
            with gr.Tab("SBERT Model1"):
                tf_idf_submit.click(
                    fn=partial(recommend, "sbert1"),
                    inputs=[subject, number, exclude_lower, exclude_upper, exclude_same, exclude_other, recommendation_no],
                    outputs=gr.Dataframe(wrap = True, column_widths = ["10%","10%", "20%", "63%"])
                )
            with gr.Tab("SBERT Model2"):
                tf_idf_submit.click(
                    fn=partial(recommend, "sbert2"),
                    inputs=[subject, number, exclude_lower, exclude_upper, exclude_same, exclude_other, recommendation_no],
                    outputs=gr.Dataframe(wrap = True, column_widths = ["10%","10%", "20%", "63%"])
                )
            with gr.Tab("ABOUT"):
                gr.Markdown("This project provides course recommendations using different NLP models. Select a model and enter course details to see recommendations.")
    return demo

# Launch the interface
if __name__ == "__main__":
    main().launch(share=True)