import numpy as np import os import re from io import BytesIO import datetime import time import openai, tenacity import argparse import configparser import json import tiktoken import PyPDF2 import gradio # 定义Reviewer类 class Reviewer: # 初始化方法,设置属性 def __init__(self, api, review_format, paper_pdf, language): self.api = api self.review_format = review_format self.language = language self.paper_pdf = paper_pdf self.max_token_num = 4097 self.encoding = tiktoken.get_encoding("gpt2") def review_by_chatgpt(self, paper_list): text = self.extract_chapter(self.paper_pdf) chat_review_text, total_token_used = self.chat_review(text=text) return chat_review_text, total_token_used @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5), reraise=True) def chat_review(self, text): openai.api_key = self.api # 读取api review_prompt_token = 1000 text_token = len(self.encoding.encode(text)) input_text_index = int(len(text)*(self.max_token_num-review_prompt_token)/(text_token+1)) input_text = "This is the paper for your review:" + text[:input_text_index] messages=[ {"role": "system", "content": "You are a professional reviewer. Now I will give you a paper. You need to give a complete review opinion according to the following requirements and format:"+ self.review_format +" Must be output in {}.".format(self.language)}, {"role": "user", "content": input_text}, ] response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=messages, ) result = '' for choice in response.choices: result += choice.message.content print("********"*10) print(result) print("********"*10) print("prompt_token_used:", response.usage.prompt_tokens) print("completion_token_used:", response.usage.completion_tokens) print("total_token_used:", response.usage.total_tokens) print("response_time:", response.response_ms/1000.0, 's') return result, response.usage.total_tokens def extract_chapter(self, pdf_path): file_object = BytesIO(pdf_path) # 创建一个PDF阅读器对象 pdf_reader = PyPDF2.PdfReader(file_object) # 获取PDF的总页数 num_pages = len(pdf_reader.pages) # 初始化提取状态和提取文本 extraction_started = False extracted_text = "" # 遍历PDF中的每一页 for page_number in range(num_pages): page = pdf_reader.pages[page_number] page_text = page.extract_text() # 如果找到了章节标题,开始提取 if 'Abstract'.lower() in page_text.lower() and not extraction_started: extraction_started = True page_number_start = page_number # 如果提取已开始,将页面文本添加到提取文本中 if extraction_started: extracted_text += page_text # 如果找到下一章节标题,停止提取 if page_number_start + 1 < page_number: break return extracted_text def main(api, review_format, paper_pdf, language): start_time = time.time() if not api or not review_format or not paper_pdf: return "请输入完整内容!" # 判断PDF文件 else: # 创建一个Reader对象 reviewer1 = Reviewer(api, review_format, paper_pdf, language) # 开始判断是路径还是文件: comments, total_token_used = reviewer1.review_by_chatgpt(paper_list=paper_pdf) time_used = time.time() - start_time output2 ="使用token数:"+ str(total_token_used)+"\n花费时间:"+ str(round(time_used, 2)) +"秒" return comments, output2 ######################################################################################################## # 标题 title = "🤖ChatReviewer🤖" # 描述 description = '''
ChatReviewer是一款基于ChatGPT-3.5的API开发的论文自动评审AI助手。其用途如下:
⭐️对论文进行快速总结和评审,提高科研人员的文献阅读和理解的效率,紧跟研究前沿。
⭐️对自己的论文进行评审,根据ChatReviewer生成的审稿意见进行查漏补缺,进一步提高自己的论文质量。
⭐️辅助论文审稿,给出参考意见,提高审稿效率和质量。(🈲:禁止直接复制生成的评论用于任何论文审稿工作!)
如果觉得很卡,可以点击右上角的Duplicate this Space,把ChatReviewer复制到你自己的Space中!
本项目的[Github](https://github.com/nishiwen1214/ChatReviewer),欢迎Star和Fork,也欢迎大佬赞助让本项目快速成长!💗([获取Api Key](https://chatgpt.cn.obiscr.com/blog/posts/2023/How-to-get-api-key/))