Spaces:
Sleeping
Sleeping
| import requests | |
| import random | |
| import time | |
| import pandas as pd | |
| import gradio as gr | |
| import numpy as np | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| from transformers import pipeline | |
| import shap | |
| def read1(lang, num_selected_former): | |
| if lang in ['en']: | |
| fname = 'data1_en.txt' | |
| else: | |
| fname = 'data1_nl_10.txt' | |
| with open(fname, encoding='utf-8') as f: | |
| content = f.readlines() | |
| index_selected = random.randint(0,len(content)/2-1) | |
| while index_selected == num_selected_former: | |
| index_selected = random.randint(0,len(content)/2-1) | |
| text = eval(content[index_selected*2]) | |
| interpretation = eval(content[int(index_selected*2+1)]) | |
| if lang == 'en': | |
| min_len = 4 | |
| else: | |
| min_len = 2 | |
| while len(text['text'].split(' ')) <= min_len or '\\' in text['text'] or '//' in text['text']: | |
| index_selected = random.randint(0,len(content)/2-1) | |
| text = eval(content[int(index_selected*2)]) | |
| res_tmp = [(i, 0) for i in text['text'].split(' ')] | |
| res = {"original": text['text'], "interpretation": res_tmp} | |
| return res, lang, index_selected | |
| def read1_written(lang): | |
| if lang in ['en']: | |
| fname = 'data1_en.txt' | |
| else: | |
| fname = 'data1_nl_10.txt' | |
| with open(fname, encoding='utf-8') as f: | |
| content = f.readlines() | |
| index_selected = random.randint(0,len(content)/2-1) | |
| text = eval(content[index_selected*2]) | |
| if lang == 'en': | |
| min_len = 4 | |
| else: | |
| min_len = 2 | |
| while (len(text['text'].split(' '))) <= min_len or '\\' in text['text'] or '//' in text['text']: | |
| # while (len(text['text'].split(' '))) <= min_len: | |
| index_selected = random.randint(0,len(content)/2-1) | |
| text = eval(content[int(index_selected*2)]) | |
| # interpretation = [(i, 0) for i in text['text'].split(' ')] | |
| # res = {"original": text['text'], "interpretation": interpretation} | |
| # print(res) | |
| return text['text'] | |
| def func1(lang_selected, num_selected, human_predict, num1, num2, user_important): | |
| chatbot = [] | |
| # num1: Human score; num2: AI score | |
| if lang_selected in ['en']: | |
| fname = 'data1_en.txt' | |
| else: | |
| fname = 'data1_nl_10.txt' | |
| with open(fname) as f: | |
| content = f.readlines() | |
| text = eval(content[int(num_selected*2)]) | |
| interpretation = eval(content[int(num_selected*2+1)]) | |
| if lang_selected in ['en']: | |
| golden_label = text['label'] * 2.5 | |
| else: | |
| golden_label = text['label'] * 10 | |
| ''' | |
| # (START) API version -- quick | |
| API_URL = "https://api-inference.huggingface.co/models/nlptown/bert-base-multilingual-uncased-sentiment" | |
| # API_URL = "https://api-inference.huggingface.co/models/cmarkea/distilcamembert-base-sentiment" | |
| headers = {"Authorization": "Bearer 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"} | |
| response = requests.post(API_URL, headers=headers, json=text['text']) | |
| output = response.json() | |
| # result = dict() | |
| star2num = { | |
| "5 stars": 100, | |
| "4 stars": 75, | |
| "3 stars": 50, | |
| "2 stars": 25, | |
| "1 star": 0, | |
| } | |
| print(output) | |
| out = output[0][0] | |
| # (END) API version | |
| ''' | |
| # (START) off-the-shelf version -- slow at the beginning | |
| # Load model directly | |
| tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment") | |
| model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment") | |
| # Use a pipeline as a high-level helper | |
| device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") | |
| print(device) | |
| classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, device=device) | |
| output = classifier([text['text']]) | |
| star2num = { | |
| "5 stars": 10, | |
| "4 stars": 7.5, | |
| "3 stars": 5, | |
| "2 stars": 2.5, | |
| "1 star": 0, | |
| } | |
| print(output) | |
| out = output[0] | |
| # (END) off-the-shelf version | |
| ai_predict = star2num[out['label']] | |
| # result[label] = out['score'] | |
| user_select = "You focused on " | |
| flag_select = False | |
| if user_important == "": | |
| user_select += "nothing. Interesting! " | |
| else: | |
| user_select += "'" + user_important + "'. " | |
| # for i in range(len(user_marks)): | |
| # if user_marks[i][1] != None and h1[i][0] not in ["P", "N"]: | |
| # flag_select = True | |
| # user_select += "'" + h1[i][0] + "'" | |
| # if i == len(h1) - 1: | |
| # user_select += ". " | |
| # else: | |
| # user_select += ", " | |
| # if not flag_select: | |
| # user_select += "nothing. Interesting! " | |
| user_select += "Wanna see how the AI made the guess? Click here. ⬅️" | |
| if lang_selected in ['en']: | |
| # 0 1 2 3 4 5 6 7 8 9 10 | |
| if ai_predict == golden_label: | |
| if abs(human_predict - golden_label) <= 2: # Both correct | |
| golden_label = int((human_predict + ai_predict) / 2) | |
| ai_predict = golden_label | |
| chatbot.append(("The correct answer is " + str(golden_label) + ". Congratulations! 🎉 Both of you get the correct answer!", user_select)) | |
| num1 += 1 | |
| num2 += 1 | |
| else: | |
| golden_label += random.randint(-1, 1) * 0.5 | |
| while golden_label > 10 or golden_label < 0: | |
| golden_label += random.randint(-1, 1) * 0.5 | |
| golden_label = int(golden_label) | |
| ai_predict = golden_label | |
| chatbot.append(("The correct answer is " + str(golden_label) + ". Sorry.. AI wins in this round.", user_select)) | |
| num2 += 1 | |
| else: | |
| if abs(human_predict - golden_label) < abs(ai_predict - golden_label): | |
| if abs(human_predict - golden_label) < 2: | |
| golden_label = int((golden_label + human_predict) / 2) | |
| ai_predict += random.randint(-1, 1) * 0.5 | |
| ai_predict = int(ai_predict) | |
| chatbot.append(("The correct answer is " + str(golden_label) + ". Great! 🎉 You are closer to the answer and better than AI!", user_select)) | |
| num1 += 1 | |
| else: | |
| golden_label = int(golden_label) | |
| ai_predict = int(ai_predict) | |
| chatbot.append(("The correct answer is " + str(golden_label) + ". Both wrong... Maybe next time you'll win!", user_select)) | |
| else: | |
| golden_label = int(golden_label) | |
| ai_predict = int(ai_predict) | |
| chatbot.append(("The correct answer is " + str(golden_label) + ". Sorry.. No one gets the correct answer. But nice try! 😉", user_select)) | |
| else: | |
| if golden_label == 10: | |
| if ai_predict > 5 and human_predict > 5: | |
| golden_label = int((human_predict + ai_predict)/2) + random.randint(-1, 1) | |
| while golden_label > 10: | |
| golden_label = int((human_predict + ai_predict)/2) + random.randint(-1, 1) | |
| ai_predict = int((golden_label + ai_predict) / 2) | |
| chatbot.append(("The correct answer is " + str(golden_label) + ". Congratulations! 🎉 Both of you get the correct answer!", user_select)) | |
| num1 += 1 | |
| num2 += 1 | |
| elif ai_predict > 5 and human_predict <= 5: | |
| golden_label -= random.randint(0, 3) | |
| ai_predict = 7 + random.randint(-1, 2) | |
| chatbot.append(("The correct answer is " + str(golden_label) + ". Sorry.. AI wins in this round.", user_select)) | |
| num2 += 1 | |
| elif ai_predict <= 5 and human_predict > 5: | |
| golden_label = human_predict + random.randint(-1, 1) | |
| while golden_label > 10: | |
| golden_label = human_predict + random.randint(-1, 1) | |
| ai_predict = int(ai_predict) | |
| golden_label = int(golden_label) | |
| chatbot.append(("The correct answer is " + str(golden_label) + ". Great! 🎉 You are close to the answer and better than AI!", user_select)) | |
| num1 += 1 | |
| else: | |
| golden_label = int(golden_label) | |
| ai_predict = int(ai_predict) | |
| chatbot.append(("The correct answer is " + str(golden_label) + ". Sorry... No one gets the correct answer. But nice try! 😉", user_select)) | |
| else: | |
| if ai_predict < 5 and human_predict < 5: | |
| golden_label = int((human_predict + ai_predict)/2) + random.randint(-1, 1) | |
| while golden_label < 0: | |
| golden_label = int((human_predict + ai_predict)/2) + random.randint(-1, 1) | |
| ai_predict = int((golden_label + ai_predict) / 2) | |
| chatbot.append(("The correct answer is " + str(golden_label) + ". Congratulations! 🎉 Both of you get the correct answer!", user_select)) | |
| num1 += 1 | |
| num2 += 1 | |
| elif ai_predict < 5 and human_predict >= 5: | |
| golden_label += random.randint(0, 3) | |
| ai_predict = 3 + random.randint(-2, 1) | |
| chatbot.append(("The correct answer is " + str(golden_label) + ". Sorry.. AI wins in this round.", user_select)) | |
| num2 += 1 | |
| elif ai_predict >= 5 and human_predict < 5: | |
| golden_label = human_predict + random.randint(-1, 1) | |
| while golden_label < 0: | |
| golden_label = human_predict + random.randint(-1, 1) | |
| ai_predict = int(ai_predict) | |
| chatbot.append(("The correct answer is " + str(golden_label) + ". Great! 🎉 You are close to the answer and better than AI!", user_select)) | |
| num1 += 1 | |
| else: | |
| golden_label = int(golden_label) | |
| ai_predict = int(ai_predict) | |
| chatbot.append(("The correct answer is " + str(golden_label) + ". Sorry... No one gets the correct answer. But nice try! 😉", user_select)) | |
| # data = pd.DataFrame( | |
| # { | |
| # "Role": ["AI 🤖", "HUMAN 👨👩"], | |
| # "Scores": [num2, num1], | |
| # } | |
| # ) | |
| # scroe_human = ''' # Human: ''' + str(int(num1)) | |
| # scroe_robot = ''' # Robot: ''' + str(int(num2)) | |
| # tot_scores = ''' ### <p style="text-align: center;"> 🤖 Machine   ''' + str(int(num2)) + '''   VS   ''' + str(int(num1)) + '''   Human 👨👩 </p>''' | |
| # tot_scores = ''' #### <p style="text-align: center;"> Today's Scores:</p> | |
| # #### <p style="text-align: center;"> 🤖 Machine   <span style="color: red;">''' + str(int(num2)) + '''</span>   VS   <span style="color: red;">''' + str(int(num1)) + '''</span>   Human 🙋 </p>''' | |
| tot_scores = ''' #### <p style="text-align: center;"> Today's Scores:     🤖 Machine   <span style="color: red;">''' + str(int(num2)) + '''</span>   VS   <span style="color: red;">''' + str(int(num1)) + '''</span>   Human 🙋 </p>''' | |
| # num_tmp = max(num1, num2) | |
| # y_lim_upper = (int((num_tmp + 3)/10)+1) * 10 | |
| # figure = gr.BarPlot.update( | |
| # data, | |
| # x="Role", | |
| # y="Scores", | |
| # color="Role", | |
| # vertical=False, | |
| # y_lim=[0,y_lim_upper], | |
| # color_legend_position='none', | |
| # height=250, | |
| # width=500, | |
| # show_label=False, | |
| # container=False, | |
| # ) | |
| # tooltip=["Role", "Scores"], | |
| return ai_predict, chatbot, num1, num2, tot_scores | |
| def interpre1(lang_selected, num_selected): | |
| if lang_selected in ['en']: | |
| fname = 'data1_en.txt' | |
| tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english") | |
| else: | |
| fname = 'data1_nl_10.txt' | |
| tokenizer = AutoTokenizer.from_pretrained("DTAI-KULeuven/robbert-v2-dutch-sentiment") | |
| with open(fname) as f: | |
| content = f.readlines() | |
| text = eval(content[int(num_selected*2)]) | |
| interpretation = eval(content[int(num_selected*2+1)]) | |
| encodings = tokenizer(text['text'], return_offsets_mapping=True) | |
| print(encodings['offset_mapping']) | |
| is_subword = [False, False] | |
| for i in range(2, len(encodings['offset_mapping'])): | |
| if encodings['offset_mapping'][i][0] == encodings['offset_mapping'][i-1][1]: | |
| is_subword.append(True) | |
| else: | |
| is_subword.append(False) | |
| print(is_subword) | |
| interpretation_combined = [] | |
| index_tmp = 0 | |
| while index_tmp < (len(interpretation) - 1): | |
| if not is_subword[index_tmp+1]: | |
| interpretation_combined.append(interpretation[index_tmp]) | |
| index_tmp += 1 | |
| else: | |
| text_combined = interpretation[index_tmp][0] | |
| score_combinded = interpretation[index_tmp][1] | |
| length = 1 | |
| while is_subword[index_tmp+length]: | |
| text_combined += interpretation[index_tmp+length][0] | |
| score_combinded += interpretation[index_tmp+length][1] | |
| length += 1 | |
| interpretation_combined.append((text_combined, score_combinded/length)) | |
| index_tmp += length | |
| interpretation_combined.append(('', 0.0)) | |
| print(interpretation_combined) | |
| res = {"original": text['text'], "interpretation": interpretation_combined} | |
| # pos = [] | |
| # neg = [] | |
| # res = [] | |
| # for i in interpretation: | |
| # if i[1] > 0: | |
| # pos.append(i[1]) | |
| # elif i[1] < 0: | |
| # neg.append(i[1]) | |
| # else: | |
| # continue | |
| # median_pos = np.median(pos) | |
| # median_neg = np.median(neg) | |
| # res.append(("P", "+")) | |
| # res.append(("/", None)) | |
| # res.append(("N", "-")) | |
| # res.append(("Review:", None)) | |
| # for i in interpretation: | |
| # if i[1] > median_pos: | |
| # res.append((i[0], "+")) | |
| # elif i[1] < median_neg: | |
| # res.append((i[0], "-")) | |
| # else: | |
| # res.append((i[0], None)) | |
| return res | |
| def func1_written(text_written, human_predict, lang_written): | |
| chatbot = [] | |
| # num1: Human score; num2: AI score | |
| ''' | |
| # (START) API version | |
| API_URL = "https://api-inference.huggingface.co/models/nlptown/bert-base-multilingual-uncased-sentiment" | |
| # API_URL = "https://api-inference.huggingface.co/models/cmarkea/distilcamembert-base-sentiment" | |
| headers = {"Authorization": "Bearer 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"} | |
| response = requests.post(API_URL, headers=headers, json=text_written) | |
| output = response.json() | |
| # result = dict() | |
| star2num = { | |
| "5 stars": 100, | |
| "4 stars": 75, | |
| "3 stars": 50, | |
| "2 stars": 25, | |
| "1 star": 0, | |
| } | |
| out = output[0][0] | |
| # (END) API version | |
| ''' | |
| # (START) off-the-shelf version | |
| # tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment") | |
| # model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment") | |
| device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") | |
| classifier = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment", device=device) | |
| output = classifier([text_written]) | |
| star2num = { | |
| "5 stars": 10, | |
| "4 stars": 7.5, | |
| "3 stars": 5, | |
| "2 stars": 2.5, | |
| "1 star": 0, | |
| } | |
| print(output) | |
| out = output[0] | |
| # (END) off-the-shelf version | |
| ai_predict = star2num[out['label']] | |
| # result[label] = out['score'] | |
| if abs(ai_predict - human_predict) <= 2: | |
| ai_predict = int(ai_predict) | |
| chatbot.append(("AI gives it a close score! 🎉", "⬅️ Feel free to try another one! This time let’s see if you can trick the AI into giving a wrong rating. ⬅️")) | |
| else: | |
| ai_predict += int(random.randint(-1, 1)) | |
| while ai_predict > 10 or ai_predict < 0: | |
| ai_predict += int(random.randint(-1, 1)) | |
| chatbot.append(("AI thinks in a different way from human. 😉", "⬅️ Feel free to try another one! ⬅️")) | |
| # sentiment_classifier = pipeline("text-classification", return_all_scores=True) | |
| device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") | |
| if lang_written == "Dutch": | |
| sentiment_classifier = pipeline("text-classification", model='DTAI-KULeuven/robbert-v2-dutch-sentiment', return_all_scores=True, device=device) | |
| tokenizer = AutoTokenizer.from_pretrained("DTAI-KULeuven/robbert-v2-dutch-sentiment") | |
| else: | |
| sentiment_classifier = pipeline("text-classification", model='distilbert-base-uncased-finetuned-sst-2-english', return_all_scores=True, device=device) | |
| tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english") | |
| explainer = shap.Explainer(sentiment_classifier) | |
| shap_values = explainer([text_written]) | |
| interpretation = list(zip(shap_values.data[0], shap_values.values[0, :, 1])) | |
| encodings = tokenizer(text_written, return_offsets_mapping=True) | |
| print(encodings['offset_mapping']) | |
| is_subword = [False, False] | |
| for i in range(2, len(encodings['offset_mapping'])): | |
| if encodings['offset_mapping'][i][0] == encodings['offset_mapping'][i-1][1]: | |
| is_subword.append(True) | |
| else: | |
| is_subword.append(False) | |
| print(is_subword) | |
| interpretation_combined = [] | |
| index_tmp = 0 | |
| while index_tmp < (len(interpretation) - 1): | |
| if not is_subword[index_tmp+1]: | |
| interpretation_combined.append(interpretation[index_tmp]) | |
| index_tmp += 1 | |
| else: | |
| text_combined = interpretation[index_tmp][0] | |
| score_combinded = interpretation[index_tmp][1] | |
| length = 1 | |
| while is_subword[index_tmp+length]: | |
| text_combined += interpretation[index_tmp+length][0] | |
| score_combinded += interpretation[index_tmp+length][1] | |
| length += 1 | |
| interpretation_combined.append((text_combined, score_combinded/length)) | |
| index_tmp += length | |
| interpretation_combined.append(('', 0.0)) | |
| print(interpretation_combined) | |
| res = {"original": text_written, "interpretation": interpretation_combined} | |
| print(res) | |
| return res, ai_predict, chatbot | |