Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	| import asyncio | |
| from metagpt.config2 import config | |
| from metagpt.const import EXAMPLE_DATA_PATH | |
| from metagpt.logs import logger | |
| from metagpt.rag.parsers import OmniParse | |
| from metagpt.rag.schema import OmniParseOptions, OmniParseType, ParseResultType | |
| from metagpt.utils.omniparse_client import OmniParseClient | |
| TEST_DOCX = EXAMPLE_DATA_PATH / "omniparse/test01.docx" | |
| TEST_PDF = EXAMPLE_DATA_PATH / "omniparse/test02.pdf" | |
| TEST_VIDEO = EXAMPLE_DATA_PATH / "omniparse/test03.mp4" | |
| TEST_AUDIO = EXAMPLE_DATA_PATH / "omniparse/test04.mp3" | |
| async def omniparse_client_example(): | |
| client = OmniParseClient(base_url=config.omniparse.base_url) | |
| # docx | |
| with open(TEST_DOCX, "rb") as f: | |
| file_input = f.read() | |
| document_parse_ret = await client.parse_document(file_input=file_input, bytes_filename="test_01.docx") | |
| logger.info(document_parse_ret) | |
| pdf_parse_ret = await client.parse_pdf(file_input=TEST_PDF) | |
| logger.info(pdf_parse_ret) | |
| # video | |
| video_parse_ret = await client.parse_video(file_input=TEST_VIDEO) | |
| logger.info(video_parse_ret) | |
| # audio | |
| audio_parse_ret = await client.parse_audio(file_input=TEST_AUDIO) | |
| logger.info(audio_parse_ret) | |
| async def omniparse_example(): | |
| parser = OmniParse( | |
| api_key=config.omniparse.api_key, | |
| base_url=config.omniparse.base_url, | |
| parse_options=OmniParseOptions( | |
| parse_type=OmniParseType.PDF, | |
| result_type=ParseResultType.MD, | |
| max_timeout=120, | |
| num_workers=3, | |
| ), | |
| ) | |
| ret = parser.load_data(file_path=TEST_PDF) | |
| logger.info(ret) | |
| file_paths = [TEST_DOCX, TEST_PDF] | |
| parser.parse_type = OmniParseType.DOCUMENT | |
| ret = await parser.aload_data(file_path=file_paths) | |
| logger.info(ret) | |
| async def main(): | |
| await omniparse_client_example() | |
| await omniparse_example() | |
| if __name__ == "__main__": | |
| asyncio.run(main()) | |
