Spaces:
Sleeping
Sleeping
# Check if the predicted answer matches the ground truth | |
def check_answer(prediction, ground_truth): | |
prediction = prediction.lower() | |
if type(ground_truth) is not list: | |
ground_truth = [ground_truth] | |
labels = [] | |
for instance in ground_truth: | |
flag = True | |
if isinstance(instance, list): | |
flag = False | |
instance = [i.lower() for i in instance] | |
for i in instance: | |
if i in prediction: | |
flag = True | |
break | |
else: | |
instance = instance.lower() | |
if instance not in prediction: | |
flag = False | |
labels.append(int(flag)) | |
return labels | |
# Evaluate if the result is correct (non-zero indicates correctness) | |
def get_evaluation(results): | |
return 0 not in results | |
# Generate prediction based on query, documents, and model | |
def predict(query, ground_truth, docs, model, instruction, temperature): | |
''' | |
label: 0 for positive, 1 for negative, -1 for not enough information | |
''' | |
system_message = ( | |
'You are an accurate and reliable AI assistant that can answer questions with the help of external documents. ' | |
'Please note that external documents may contain noisy or factually incorrect information. If the information ' | |
'in the document contains the correct answer, you will give an accurate answer. If the information in the ' | |
'document does not contain the answer, you will generate "I can not answer the question because of the insufficient information in documents." ' | |
'If there are inconsistencies with the facts in some of the documents, please generate the response: "There are factual errors in the provided documents and provide the correct answer."' | |
) | |
if len(docs) == 0: | |
text = instruction.format(QUERY=query, DOCS='') | |
prediction = model.generate(text, temperature) | |
else: | |
docs = '\n'.join(docs) | |
text = instruction.format(QUERY=query, DOCS=docs) | |
prediction = model.generate(text, temperature, system_message) | |
# Check if the prediction contains the 'insufficient information' phrase | |
if 'insufficient information' in prediction: | |
labels = [-1] | |
else: | |
labels = check_answer(prediction, ground_truth) | |
# Check for factual errors in the prediction | |
fact_label = 0 | |
if 'factual errors' in prediction: | |
fact_label = 1 | |
return labels, prediction, fact_label | |