fixed spaces in dictionary text display
Browse files- lsj_dict.py +4 -3
lsj_dict.py
CHANGED
|
@@ -28,8 +28,8 @@ def extract_entry_info(entry):
|
|
| 28 |
"""
|
| 29 |
definitions = defaultdict(dict)
|
| 30 |
|
| 31 |
-
# Save the lemma in the dictionary
|
| 32 |
-
lemma = entry.get('key')
|
| 33 |
|
| 34 |
# Save the orthographies in the dictionary
|
| 35 |
orthographies = [orth.text for orth in entry.findall('orth')]
|
|
@@ -108,7 +108,7 @@ def format_text(data):
|
|
| 108 |
text = data['definitions']['text']
|
| 109 |
|
| 110 |
# Change <tr> tags to bold
|
| 111 |
-
text = text.replace("<tr>", "**").replace("</tr>", "**")
|
| 112 |
|
| 113 |
# Change [SENSE_SEPARATOR] to integers
|
| 114 |
for i in range(len(text.split("[SENSE_SEPARATOR]"))):
|
|
@@ -143,6 +143,7 @@ def main():
|
|
| 143 |
for word, info in xml_info.items():
|
| 144 |
# Merge dictionaries, assuming word is unique across all files
|
| 145 |
merged_info.setdefault(word, {}).update(info)
|
|
|
|
| 146 |
|
| 147 |
# Store merged dictionaries as .json file with pretty print
|
| 148 |
with open("lsj_dict.json", "w", encoding="utf-8") as file:
|
|
|
|
| 28 |
"""
|
| 29 |
definitions = defaultdict(dict)
|
| 30 |
|
| 31 |
+
# Save the lemma in the dictionary and remove digits
|
| 32 |
+
lemma = ''.join([i for i in entry.get('key') if not i.isdigit()])
|
| 33 |
|
| 34 |
# Save the orthographies in the dictionary
|
| 35 |
orthographies = [orth.text for orth in entry.findall('orth')]
|
|
|
|
| 108 |
text = data['definitions']['text']
|
| 109 |
|
| 110 |
# Change <tr> tags to bold
|
| 111 |
+
text = text.replace("<tr>", "**").replace("</tr>", "**").replace(",", ", ").replace(";", "; ").replace(":", ": ").replace("(", " (").replace(")", ") ").replace("[", " [").replace("]", "] ").replace(" ,", ", ").replace(" ; ", "; ").replace(" : ", ": ").replace(" ." , ". ")
|
| 112 |
|
| 113 |
# Change [SENSE_SEPARATOR] to integers
|
| 114 |
for i in range(len(text.split("[SENSE_SEPARATOR]"))):
|
|
|
|
| 143 |
for word, info in xml_info.items():
|
| 144 |
# Merge dictionaries, assuming word is unique across all files
|
| 145 |
merged_info.setdefault(word, {}).update(info)
|
| 146 |
+
|
| 147 |
|
| 148 |
# Store merged dictionaries as .json file with pretty print
|
| 149 |
with open("lsj_dict.json", "w", encoding="utf-8") as file:
|