Spaces:

lingyu98
/

CiJiang

Sleeping

App Files Files Community

lingyu98 commited on Aug 4

Commit

77ad230

verified ·

1 Parent(s): f4134b8

Create utils.py

Browse files

Files changed (1) hide show

cijiang/utils.py +103 -0

cijiang/utils.py ADDED Viewed

	@@ -0,0 +1,103 @@

+from collections import defaultdict
+import json
+from colorama import Fore, Style, init
+init(autoreset=True)
+with open('rules/ALL_SYLLABLES.txt', 'r', encoding='utf-8') as f:
+    ALL_SYLLABLES = f.read().strip().split()
+    ALL_SYLLABLES = [syllable for syllable in ALL_SYLLABLES if syllable]
+YUNMU_LIST = ['a', 'o', 'e', 'i', 'u', 'v',
+            'ai', 'ei', 'ao', 'ou', 'ia', 'ie', 'iao', 'iu', 'ua', 'uo', 'uai', 'ui', 've',
+            'an', 'en', 'in', 'un', 'vn', 'ian', 'uan', 'vuan',
+            'ang', 'eng', 'ing', 'ong',
+            'zhi', 'chi', 'shi', 'ri', 'zi', 'ci', 'si',
+            'yi', 'wu', 'yu', 'yin', 'yun', 'ye', 'yue', 'yuan','ying']
+def get_yunmu(syllable):
+    syllable = syllable.lower().replace('ü', 'v')
+    yunmu_list = sorted(YUNMU_LIST, key=lambda x: -len(x))
+    if syllable in yunmu_list:
+        return syllable
+    shengmus = [
+        'zh', 'ch', 'sh', 'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h',
+        'j', 'q', 'x', 'z', 'c', 's', 'r', 'y', 'w'
+    ]
+    for shengmu in sorted(shengmus, key=lambda x: -len(x)):
+        if syllable.startswith(shengmu):
+            possible_yunmu = syllable[len(shengmu):]
+            for yunmu in yunmu_list:
+                if possible_yunmu == yunmu:
+                    return yunmu
+            if shengmu in ['j', 'q', 'x', 'y'] and possible_yunmu.startswith('u'):
+                possible_yunmu_v = 'v' + possible_yunmu[1:]
+                for yunmu in yunmu_list:
+                    if possible_yunmu_v == yunmu:
+                        return yunmu
+            if shengmu == 'y':
+                y_map = {
+                    'u': 'yu',
+                    'ue': 'yue',
+                    'uan': 'yuan',
+                    'un': 'yun',
+                    'i': 'yi',
+                    'in': 'yin',
+                    'ing': 'ying',
+                    'e': 'ye'
+                }
+                if possible_yunmu in y_map:
+                    return y_map[possible_yunmu]
+            if shengmu == 'w' and possible_yunmu == 'u':
+                return 'wu'
+            if shengmu == 'y' and possible_yunmu == 'i':
+                return 'yi'
+            if shengmu == 'y' and possible_yunmu == 'v':
+                return 'yu'
+            if possible_yunmu.startswith('v'):
+                for yunmu in yunmu_list:
+                    if possible_yunmu == yunmu:
+                        return yunmu
+    for yunmu in yunmu_list:
+        if syllable == yunmu:
+            return yunmu
+    for yunmu in yunmu_list:
+        if syllable.endswith(yunmu):
+            return yunmu
+    return None
+def print_results(rhymer, text, target_rhyme, top_results=8, beam_width=20, num_candidates=5000):
+    out = rhymer.get_rhymes(text, target_rhyme, beam_width=beam_width, num_candidates=num_candidates)
+    mask_count = text.count("[M]")
+    context = text.split('[M]')[0]
+    print(f"======= 韵脚: |{target_rhyme}|")
+    for i, (seq, log_prob) in enumerate(out[:top_results]):
+        rhymes = seq[-mask_count:].split()
+        colored_rhymes = [Fore.RED + part + Style.RESET_ALL if idx < mask_count else part for idx, part in enumerate(rhymes)]
+        colored_rhymes = ''.join(colored_rhymes)  # Join the parts back together
+        print(f"{i+1}. {context}{colored_rhymes} (score: {log_prob:.3f})")
+    print("=" + "=" * 40)
+if __name__ == "__main__":
+    syllable_to_yunmu = defaultdict(str)
+    for syllable in ALL_SYLLABLES:
+        yunmu = get_yunmu(syllable)
+        if yunmu:
+            syllable_to_yunmu[syllable] = yunmu
+    with open('rules/syllable_to_yunmu.json', 'w', encoding='utf-8') as f:
+        json.dump(syllable_to_yunmu, f, ensure_ascii=False, indent=4)