Spaces:
Sleeping
Sleeping
| from cnocr import CnOcr | |
| import pandas as pd | |
| def check_telecode(input_string): | |
| if len(input_string) == 12: | |
| try: | |
| int(input_string) | |
| return True | |
| except ValueError: | |
| return False | |
| if len(input_string) == 8: | |
| try: | |
| int(input_string) | |
| return True | |
| except ValueError: | |
| return False | |
| return False | |
| def extract_integers(input_string): | |
| if len(input_string) == 12: | |
| w1 = input_string[:4] | |
| w2 = input_string[4:8] | |
| w3 = input_string[8:] | |
| return w1, w2, w3 | |
| elif len(input_string) == 8: | |
| w1 = input_string[:4] | |
| w2 = input_string[4:] | |
| return w1, w2 | |
| else: | |
| return None, None, None | |
| def get_chinese_name(path): | |
| ocr = CnOcr(rec_model_name='en_PP-OCRv3') | |
| # ocr = CnOcr(rec_model_name='densenet_lite_136-fc') | |
| out = ocr.ocr(path) | |
| df = pd.read_csv('hkTelecode.csv', dtype={'code': str}, index_col=False) | |
| chinese_name = [] | |
| for data in out: | |
| text = data['text'] | |
| text = text.replace(' ', '') | |
| if check_telecode(text): | |
| w1, w2, w3 = extract_integers(text) | |
| print(w1) | |
| print(w2) | |
| print(w3) | |
| chinese_name.append(df['word'][df['code'] == str(w1)].iloc[0]) | |
| chinese_name.append(df['word'][df['code'] == str(w2)].iloc[0]) | |
| if w3 is not None: | |
| chinese_name.append(df['word'][df['code'] == str(w3)].iloc[0]) | |
| return chinese_name | |
| chinese_name = [] | |
| return [] | |
| print(get_chinese_name('dontTouchMe/IMG_4495.jpg')) | |