Spaces:
Sleeping
Sleeping
| from rest_framework.views import APIView | |
| from adrf.views import APIView as AsyncAPIView | |
| import tempfile, os | |
| from rest_framework.response import Response | |
| from _utils.resumo_completo_cursor import ( | |
| get_llm_summary_answer_by_cursor_complete, | |
| test_ragas, | |
| ) | |
| from _utils.resumo_simples_cursor import get_llm_summary_answer_by_cursor | |
| from _utils.utils import DEFAULT_SYSTEM_PROMPT | |
| from .serializer import ( | |
| RagasFromTextSerializer, | |
| ResumoCursorCompeltoSerializer, | |
| ResumoPDFSerializer, | |
| ResumoCursorSerializer, | |
| RagasSerializer, | |
| ) | |
| from _utils.main import get_llm_answer_summary, get_llm_answer_summary_with_embedding | |
| from setup.environment import default_model | |
| from rest_framework.parsers import MultiPartParser | |
| from drf_spectacular.utils import extend_schema | |
| class ResumoView(APIView): | |
| parser_classes = [MultiPartParser] | |
| def post(self, request): | |
| serializer = ResumoPDFSerializer(data=request.data) | |
| if serializer.is_valid(raise_exception=True): | |
| listaPDFs = [] | |
| data = serializer.validated_data | |
| model = serializer.validated_data.get("model", default_model) | |
| print("serializer.validated_data: ", serializer.validated_data) | |
| for file in serializer.validated_data["files"]: | |
| print("file: ", file) | |
| file.seek(0) | |
| with tempfile.NamedTemporaryFile( | |
| delete=False, suffix=".pdf" | |
| ) as temp_file: # Create a temporary file to save the uploaded PDF | |
| for ( | |
| chunk | |
| ) in ( | |
| file.chunks() | |
| ): # Write the uploaded file content to the temporary file | |
| temp_file.write(chunk) | |
| temp_file_path = ( | |
| temp_file.name | |
| ) # Get the path of the temporary file | |
| listaPDFs.append(temp_file_path) | |
| # print('listaPDFs: ', listaPDFs) | |
| system_prompt = data.get("system_prompt", DEFAULT_SYSTEM_PROMPT) | |
| resposta_llm = get_llm_answer_summary( | |
| system_prompt, | |
| data["user_message"], | |
| listaPDFs, | |
| model=model, | |
| isIterativeRefinement=data["iterative_refinement"], | |
| ) | |
| for file in listaPDFs: | |
| os.remove(file) | |
| return Response({"resposta": resposta_llm}) | |
| class ResumoEmbeddingView(APIView): | |
| parser_classes = [MultiPartParser] | |
| def post(self, request): | |
| serializer = ResumoPDFSerializer(data=request.data) | |
| if serializer.is_valid(raise_exception=True): | |
| listaPDFs = [] | |
| data = serializer.validated_data | |
| model = serializer.validated_data.get("model", default_model) | |
| print("serializer.validated_data: ", serializer.validated_data) | |
| for file in serializer.validated_data["files"]: | |
| file.seek(0) | |
| with tempfile.NamedTemporaryFile( | |
| delete=False, suffix=".pdf" | |
| ) as temp_file: # Create a temporary file to save the uploaded PDF | |
| for ( | |
| chunk | |
| ) in ( | |
| file.chunks() | |
| ): # Write the uploaded file content to the temporary file | |
| temp_file.write(chunk) | |
| temp_file_path = ( | |
| temp_file.name | |
| ) # Get the path of the temporary file | |
| listaPDFs.append(temp_file_path) | |
| print("listaPDFs: ", listaPDFs) | |
| system_prompt = data.get("system_prompt", DEFAULT_SYSTEM_PROMPT) | |
| resposta_llm = get_llm_answer_summary_with_embedding( | |
| system_prompt, | |
| data["user_message"], | |
| listaPDFs, | |
| model=model, | |
| isIterativeRefinement=data["iterative_refinement"], | |
| ) | |
| for file in listaPDFs: | |
| os.remove(file) | |
| return Response({"resposta": resposta_llm}) | |
| class ResumoSimplesCursorView(APIView): | |
| parser_classes = [MultiPartParser] | |
| def post(self, request): | |
| serializer = ResumoCursorSerializer(data=request.data) | |
| if serializer.is_valid(raise_exception=True): | |
| listaPDFs = [] | |
| data = serializer.validated_data | |
| print("\nserializer.validated_data: ", serializer.validated_data) | |
| for file in serializer.validated_data["files"]: | |
| file.seek(0) | |
| with tempfile.NamedTemporaryFile( | |
| delete=False, suffix=".pdf" | |
| ) as temp_file: # Create a temporary file to save the uploaded PDF | |
| for ( | |
| chunk | |
| ) in ( | |
| file.chunks() | |
| ): # Write the uploaded file content to the temporary file | |
| temp_file.write(chunk) | |
| temp_file_path = ( | |
| temp_file.name | |
| ) # Get the path of the temporary file | |
| listaPDFs.append(temp_file_path) | |
| print("listaPDFs: ", listaPDFs) | |
| resposta_llm = get_llm_summary_answer_by_cursor(data, listaPDFs) | |
| for file in listaPDFs: | |
| os.remove(file) | |
| return Response({"resposta": resposta_llm}) | |
| class ResumoSimplesCursorCompletoView(AsyncAPIView): | |
| parser_classes = [MultiPartParser] | |
| async def post(self, request): | |
| serializer = ResumoCursorCompeltoSerializer(data=request.data) | |
| if serializer.is_valid(raise_exception=True): | |
| print("\n\n\n") | |
| print("serializer.validated_data: ", serializer.validated_data) | |
| print("\n\n\n") | |
| listaPDFs = [] | |
| data = serializer.validated_data | |
| print("\nserializer.validated_data: ", serializer.validated_data) | |
| for file in serializer.validated_data["files"]: | |
| file.seek(0) | |
| with tempfile.NamedTemporaryFile( | |
| delete=False, suffix=".pdf" | |
| ) as temp_file: # Create a temporary file to save the uploaded PDF | |
| for ( | |
| chunk | |
| ) in ( | |
| file.chunks() | |
| ): # Write the uploaded file content to the temporary file | |
| temp_file.write(chunk) | |
| temp_file_path = ( | |
| temp_file.name | |
| ) # Get the path of the temporary file | |
| listaPDFs.append(temp_file_path) | |
| print("listaPDFs: ", listaPDFs) | |
| # resposta_llm = await get_llm_summary_answer_by_cursor_complete( | |
| # data, listaPDFs | |
| # ) | |
| resposta_llm = await get_llm_summary_answer_by_cursor_complete( | |
| data, listaPDFs | |
| ) | |
| final = resposta_llm | |
| print("\n\n\n") | |
| print("final: ", final) | |
| for file in listaPDFs: | |
| os.remove(file) | |
| return Response({"resposta": final}) | |
| class RagasView(APIView): | |
| parser_classes = [MultiPartParser] | |
| def post(self, request): | |
| serializer = RagasSerializer(data=request.data) | |
| print("\n\n\n") | |
| print("\n\n\n") | |
| print("serializer.data: ", serializer) | |
| listaPDFs = [] | |
| if serializer.is_valid(raise_exception=True): | |
| for file in serializer.validated_data["files"]: | |
| file.seek(0) | |
| with tempfile.NamedTemporaryFile( | |
| delete=False, suffix=".pdf" | |
| ) as temp_file: # Create a temporary file to save the uploaded PDF | |
| for ( | |
| chunk | |
| ) in ( | |
| file.chunks() | |
| ): # Write the uploaded file content to the temporary file | |
| temp_file.write(chunk) | |
| temp_file_path = ( | |
| temp_file.name | |
| ) # Get the path of the temporary file | |
| listaPDFs.append(temp_file_path) | |
| result = test_ragas(serializer, listaPDFs) | |
| for file in listaPDFs: | |
| os.remove(file) | |
| return Response({"msg": result}) | |
| class RagasFromTextView(APIView): | |
| def post(self, request): | |
| serializer = RagasFromTextSerializer(data=request.data) | |
| if serializer.is_valid(raise_exception=True): | |
| from datasets import Dataset | |
| from ragas import evaluate | |
| from ragas.metrics import ( | |
| faithfulness, | |
| answer_relevancy, | |
| answer_correctness, | |
| context_precision, | |
| context_recall, | |
| ) | |
| import os | |
| from datasets import load_dataset | |
| import pandas as pd | |
| os.environ.get("OPENAI_API_KEY") | |
| df_pandas = pd.read_csv( | |
| "D:/repositorios/projetos-pessoais/projeto-y-backend-hugginf-face-teste-01/vella-backend/_utils/files/ragas_testset.csv" | |
| ) | |
| # print(df_pandas["position"]) # Print a specific column | |
| data = { | |
| "user_input": [ | |
| "What is the capital of France?", | |
| ], | |
| "response": [], | |
| "retrieved_contexts": [], | |
| } | |
| reference = [ | |
| "Paris is the capital of France. It is a major European city known for its culture." | |
| ] | |
| for x in df_pandas["user_input"]: | |
| data["user_input"].append(x) | |
| for x in df_pandas["reference"]: | |
| reference.append(x) | |
| print("data: ", reference) | |
| for i in range(len(reference)): | |
| serializer.validated_data["user_message"] = data["user_input"][i] | |
| resposta_llm = get_llm_summary_answer_by_cursor_complete( | |
| serializer.validated_data, contexto=reference[i] | |
| ) | |
| data["response"].append(resposta_llm["texto_completo"]) | |
| lista_reference_contexts = [] | |
| for x in resposta_llm["resultado"]: | |
| lista_reference_contexts.append(x["source"]["text"]) | |
| data["retrieved_contexts"].append(lista_reference_contexts) | |
| # Convert the data to a Hugging Face Dataset | |
| dataset = Dataset.from_dict(data) | |
| # Define the metrics you want to evaluate | |
| metrics = [ | |
| faithfulness, | |
| # answer_relevancy, | |
| # answer_correctness, | |
| # context_precision, | |
| # context_recall, | |
| ] | |
| # Evaluate the dataset using the selected metrics | |
| results = evaluate(dataset, metrics) | |
| # results.to_pandas().to_csv("./result.csv") | |
| return Response({"resposta": results.to_pandas().to_string()}) | |