Spaces:
Sleeping
Sleeping
Update game1.py
Browse files
game1.py
CHANGED
|
@@ -312,6 +312,7 @@ def interpre1(lang_selected, num_selected):
|
|
| 312 |
interpretation_combined.append((text_combined, score_combinded/length))
|
| 313 |
index_tmp += length
|
| 314 |
|
|
|
|
| 315 |
print(interpretation_combined)
|
| 316 |
res = {"original": text['text'], "interpretation": interpretation_combined}
|
| 317 |
# pos = []
|
|
@@ -408,15 +409,49 @@ def func1_written(text_written, human_predict, lang_written):
|
|
| 408 |
|
| 409 |
if lang_written == "Dutch":
|
| 410 |
sentiment_classifier = pipeline("text-classification", model='DTAI-KULeuven/robbert-v2-dutch-sentiment', return_all_scores=True, device=device)
|
|
|
|
| 411 |
else:
|
| 412 |
sentiment_classifier = pipeline("text-classification", model='distilbert-base-uncased-finetuned-sst-2-english', return_all_scores=True, device=device)
|
|
|
|
| 413 |
|
| 414 |
explainer = shap.Explainer(sentiment_classifier)
|
| 415 |
|
| 416 |
shap_values = explainer([text_written])
|
| 417 |
interpretation = list(zip(shap_values.data[0], shap_values.values[0, :, 1]))
|
|
|
|
| 418 |
|
| 419 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
print(res)
|
| 421 |
|
| 422 |
return res, ai_predict, chatbot
|
|
|
|
| 312 |
interpretation_combined.append((text_combined, score_combinded/length))
|
| 313 |
index_tmp += length
|
| 314 |
|
| 315 |
+
interpretation_combined.append(('', 0.0))
|
| 316 |
print(interpretation_combined)
|
| 317 |
res = {"original": text['text'], "interpretation": interpretation_combined}
|
| 318 |
# pos = []
|
|
|
|
| 409 |
|
| 410 |
if lang_written == "Dutch":
|
| 411 |
sentiment_classifier = pipeline("text-classification", model='DTAI-KULeuven/robbert-v2-dutch-sentiment', return_all_scores=True, device=device)
|
| 412 |
+
tokenizer = AutoTokenizer.from_pretrained("DTAI-KULeuven/robbert-v2-dutch-sentiment")
|
| 413 |
else:
|
| 414 |
sentiment_classifier = pipeline("text-classification", model='distilbert-base-uncased-finetuned-sst-2-english', return_all_scores=True, device=device)
|
| 415 |
+
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
|
| 416 |
|
| 417 |
explainer = shap.Explainer(sentiment_classifier)
|
| 418 |
|
| 419 |
shap_values = explainer([text_written])
|
| 420 |
interpretation = list(zip(shap_values.data[0], shap_values.values[0, :, 1]))
|
| 421 |
+
|
| 422 |
|
| 423 |
+
encodings = tokenizer(text_written, return_offsets_mapping=True)
|
| 424 |
+
|
| 425 |
+
print(encodings['offset_mapping'])
|
| 426 |
+
is_subword = [False, False]
|
| 427 |
+
for i in range(2, len(encodings['offset_mapping'])):
|
| 428 |
+
if encodings['offset_mapping'][i][0] == encodings['offset_mapping'][i-1][1]:
|
| 429 |
+
is_subword.append(True)
|
| 430 |
+
else:
|
| 431 |
+
is_subword.append(False)
|
| 432 |
+
print(is_subword)
|
| 433 |
+
interpretation_combined = []
|
| 434 |
+
|
| 435 |
+
index_tmp = 0
|
| 436 |
+
while index_tmp < (len(interpretation) - 1):
|
| 437 |
+
if not is_subword[index_tmp+1]:
|
| 438 |
+
interpretation_combined.append(interpretation[index_tmp])
|
| 439 |
+
index_tmp += 1
|
| 440 |
+
else:
|
| 441 |
+
text_combined = interpretation[index_tmp][0]
|
| 442 |
+
score_combinded = interpretation[index_tmp][1]
|
| 443 |
+
length = 1
|
| 444 |
+
while is_subword[index_tmp+length]:
|
| 445 |
+
text_combined += interpretation[index_tmp+length][0]
|
| 446 |
+
score_combinded += interpretation[index_tmp+length][1]
|
| 447 |
+
length += 1
|
| 448 |
+
interpretation_combined.append((text_combined, score_combinded/length))
|
| 449 |
+
index_tmp += length
|
| 450 |
+
|
| 451 |
+
interpretation_combined.append(('', 0.0))
|
| 452 |
+
print(interpretation_combined)
|
| 453 |
+
|
| 454 |
+
res = {"original": text_written, "interpretation": interpretation_combined}
|
| 455 |
print(res)
|
| 456 |
|
| 457 |
return res, ai_predict, chatbot
|