Spaces:
Sleeping
Sleeping
| # Check if the predicted answer matches the ground truth | |
| def check_answer(prediction, ground_truth): | |
| prediction = prediction.lower() | |
| if type(ground_truth) is not list: | |
| ground_truth = [ground_truth] | |
| labels = [] | |
| for instance in ground_truth: | |
| flag = True | |
| if isinstance(instance, list): | |
| flag = False | |
| instance = [i.lower() for i in instance] | |
| for i in instance: | |
| if i in prediction: | |
| flag = True | |
| break | |
| else: | |
| instance = instance.lower() | |
| if instance not in prediction: | |
| flag = False | |
| labels.append(int(flag)) | |
| return labels | |
| # Evaluate if the result is correct (non-zero indicates correctness) | |
| def get_evaluation(results): | |
| return 0 not in results | |
| # Generate prediction based on query, documents, and model | |
| def predict(query, ground_truth, docs, model, instruction, temperature): | |
| ''' | |
| label: 0 for positive, 1 for negative, -1 for not enough information | |
| ''' | |
| system_message = ( | |
| 'You are an accurate and reliable AI assistant that can answer questions with the help of external documents. ' | |
| 'Please note that external documents may contain noisy or factually incorrect information. If the information ' | |
| 'in the document contains the correct answer, you will give an accurate answer. If the information in the ' | |
| 'document does not contain the answer, you will generate "I can not answer the question because of the insufficient information in documents." ' | |
| 'If there are inconsistencies with the facts in some of the documents, please generate the response: "There are factual errors in the provided documents and provide the correct answer."' | |
| ) | |
| if len(docs) == 0: | |
| text = instruction.format(QUERY=query, DOCS='') | |
| prediction = model.generate(text, temperature) | |
| else: | |
| docs = '\n'.join(docs) | |
| text = instruction.format(QUERY=query, DOCS=docs) | |
| prediction = model.generate(text, temperature, system_message) | |
| # Check if the prediction contains the 'insufficient information' phrase | |
| if 'insufficient information' in prediction: | |
| labels = [-1] | |
| else: | |
| labels = check_answer(prediction, ground_truth) | |
| # Check for factual errors in the prediction | |
| fact_label = 0 | |
| if 'factual errors' in prediction: | |
| fact_label = 1 | |
| return labels, prediction, fact_label | |