Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	File size: 1,125 Bytes
			
			| a1fe67d 5c0a088 a1fe67d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | import requests
import random
from functools import lru_cache
class GROBID_OFFLINE_EXCEPTION(Exception): pass
def get_avail_grobid_url():
    from toolbox import get_conf
    GROBID_URLS, = get_conf('GROBID_URLS')
    if len(GROBID_URLS) == 0: return None
    try:
        _grobid_url = random.choice(GROBID_URLS) # 随机负载均衡
        if _grobid_url.endswith('/'): _grobid_url = _grobid_url.rstrip('/')
        res = requests.get(_grobid_url+'/api/isalive')
        if res.text=='true': return _grobid_url
        else: return None
    except:
        return None
@lru_cache(maxsize=32)
def parse_pdf(pdf_path, grobid_url):
    import scipdf   # pip install scipdf_parser
    if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/')
    try:
        article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
    except GROBID_OFFLINE_EXCEPTION:
        raise GROBID_OFFLINE_EXCEPTION("GROBID服务不可用,请修改config中的GROBID_URL,可修改成本地GROBID服务。")
    except:
        raise RuntimeError("解析PDF失败,请检查PDF是否损坏。")
    return article_dict
 |