Spaces:
Running
Running
| import enum | |
| import subprocess | |
| import spacy | |
| import pyinflect | |
| from difflib import ndiff | |
| from typing import List, Union, Tuple | |
| # BES auxiliary “be” Let it **be**. | |
| # HVS forms of “have” I**’ve** seen the Queen | |
| # MD verb, modal auxiliary VerbType=mod This **could** work. | |
| # VB verb, base form VerbForm=inf I want to **go**. | |
| # VBD verb, past tense VerbForm=fin Tense=past This **was** a sentence. | |
| # VBG verb, gerund or present participle VerbForm=part Tense=pres Aspect=prog I am **going**. | |
| # VBN verb, past participle VerbForm=part Tense=past Aspect=perf The treasure was **lost**. | |
| # VBP verb, non-3rd person singular present VerbForm=fin Tense=pres I **want** to go. | |
| # VBZ verb, 3rd person singular present VerbForm=fin Tense=pres Number=sing Person=3 He **wants** to go. | |
| class APVoice: | |
| class Tense(enum.Enum): | |
| simple_present = { | |
| 'aux':[None,'VBZ'], | |
| 'main':['VBZ','VBP', 'VB'], | |
| 'tobe':{'NN':'is{}','NNS':'are{}'} | |
| } | |
| simple_past = { | |
| 'aux':[None, 'VBD'], | |
| 'main':['VBD', 'VB'], | |
| 'tobe':{'NN':'was{}','NNS':'were{}'} | |
| } | |
| future_simple = { | |
| 'aux':['MD'], | |
| 'main':['VB'], | |
| 'tobe':{'NN':'will{} be','NNS':'will{} be'} | |
| } | |
| present_cont = { | |
| 'aux':['VBP','VBZ'], | |
| 'main':['VBG'], | |
| 'tobe':{'NN':'is{} being','NNS':'are{} being'} | |
| } | |
| past_cont = { | |
| 'aux':['VBD'], | |
| 'main':['VBG'], | |
| 'tobe':{'NN':'was{} being','NNS':'were{} being'} | |
| } | |
| present_perfect = { | |
| 'aux':['VBP','VBZ'], | |
| 'main':['VBN'], | |
| 'tobe':{'NN':'has{} been','NNS':'have{} been'} | |
| } | |
| def __init__( | |
| self | |
| ) -> None: | |
| self.parser = None | |
| self.__init_parser(model="en_core_web_sm") | |
| def __init_parser( | |
| self, | |
| model: str | |
| ) -> None: | |
| self.parser = None | |
| try: | |
| self.parser = spacy.load(model) | |
| except: | |
| print(f"* Downloading {model} model...") | |
| _ = subprocess.Popen( | |
| f"python -m spacy download {model}", | |
| stdout=subprocess.PIPE, | |
| shell=True).communicate() | |
| self.parser = spacy.load(model) | |
| def verb2participle( | |
| self, | |
| verb: str | |
| ) -> str: | |
| tk = self.parser(verb)[0] | |
| return tk._.inflect('VBN') | |
| def subjp2objp( | |
| self, | |
| pronoun: str | |
| ) -> str: | |
| """ | |
| Convert Subject pronouns to Object pronouns. | |
| """ | |
| mapping = {"i":"me","you":"you","we":"us","they":"them","he":"him","she":"her", "it":"it"} | |
| return mapping.get(pronoun.lower(), None) | |
| def get_gramatical_number( | |
| self, | |
| dobj_data: List[List[Tuple[str,str,str]]] | |
| ) -> Union[str, None]: | |
| result = [tag for _,dep,tag in dobj_data if dep == 'dobj'] | |
| if len(result) == 0: | |
| result = None | |
| else: | |
| result = result[0].replace('NNP', 'NN') | |
| return result | |
| def get_verbal_tense( | |
| self, | |
| verb_data: List[List[Tuple[str,str,str,int]]] | |
| ) -> Union[str, None]: | |
| aux, neg, root = verb_data | |
| root = root[0][2] if len(root) > 0 else None | |
| aux = aux[0][2] if len(aux) > 0 else None | |
| tense_name = None | |
| for tense in self.Tense: | |
| if aux in tense.value['aux'] and root in tense.value['main']: | |
| tense_name = tense.name | |
| break | |
| return tense_name | |
| def get_subj( | |
| self, | |
| sentence: str, | |
| ) -> Tuple[ List[Tuple[str,str,str]], str]: | |
| out_data = [] | |
| for tk in self.parser(sentence): | |
| if "subj" in tk.dep_: | |
| out_data = [(t,t.dep_,t.tag_) for t in tk.subtree] | |
| break | |
| out_str = ' '.join([t.text for t,_,_ in out_data]) | |
| return out_data, out_str | |
| def get_verb( | |
| self, | |
| sentence: str, | |
| ) -> Tuple[ List[List[Tuple[str,str,str,int]]], str]: | |
| main_data = [] | |
| aux_data = [] | |
| neg_data = [] | |
| out_data = [] | |
| for tk in self.parser(sentence): | |
| if "ROOT" in tk.dep_: | |
| main_data = [ (tk,tk.dep_,tk.tag_,tk.i)] | |
| aux_data = [(t,t.dep_,t.tag_,t.i) for t in tk.children if t.dep_ == "aux"] | |
| neg_data = [(t,t.dep_,t.tag_,t.i) for t in tk.children if t.dep_ == "neg"] | |
| out_data = [aux_data, neg_data, main_data] | |
| break | |
| out_str = sorted([tup for list_ in out_data for tup in list_], key=lambda x: x[3]) | |
| out_str = ' '.join([t.text for t,_,_,_ in out_str]) | |
| return out_data, out_str | |
| def get_dobj( | |
| self, | |
| sentence: str, | |
| ) -> Tuple[ List[Tuple[str,str,str]], str]: | |
| out_data = [] | |
| for tk in self.parser(sentence): | |
| if "dobj" in tk.dep_: | |
| out_data = [(t,t.dep_,t.tag_)for t in tk.subtree] | |
| break | |
| out_str = ' '.join([t.text for t,_,_ in out_data]) | |
| return out_data, out_str | |
| def get_complement( | |
| self, | |
| subj: str, | |
| verb: str, | |
| dobj: str, | |
| full_sentence: str, | |
| ) -> str: | |
| concat_sentence = subj + ' ' + verb + ' ' + dobj | |
| diff = "" | |
| for tk in ndiff(concat_sentence.split(), full_sentence.split()): | |
| mark, word = tk[0], tk[2:] | |
| if mark == '+': | |
| diff += word + " " | |
| return diff.strip() | |
| def active2passive( | |
| self, | |
| active_sentence: str, | |
| debug: bool=False | |
| ) -> List[str]: | |
| active_sentence = active_sentence.strip() | |
| if active_sentence == "": | |
| raise RuntimeError( | |
| f"Error: The sentence does not be empty!" | |
| ) | |
| subj_data, subj_str = self.get_subj(active_sentence) | |
| if debug: print(subj_data) | |
| if subj_str == "": | |
| raise RuntimeError( | |
| f"Error: The sentence's subject has not been found or the sentence does not be the correct format!" | |
| ) | |
| verb_data, verb_str = self.get_verb(active_sentence) | |
| if debug: print(verb_data) | |
| if verb_str == "": | |
| raise RuntimeError( | |
| f"Error: The sentence's verb has not been found or the sentence does not be the correct format!" | |
| ) | |
| dobj_data, dobj_str = self.get_dobj(active_sentence) | |
| if debug: print(dobj_data) | |
| if dobj_str == "": | |
| raise RuntimeError( | |
| f"Error: The sentence's direct object has not been found or the sentence does not be the correct format!" | |
| ) | |
| complement = self.get_complement(subj_str, verb_str, dobj_str, active_sentence) | |
| # Get pasive subject | |
| p_subj = dobj_str | |
| # Get tense + participle verb | |
| verbal_tense = self.get_verbal_tense(verb_data) | |
| if debug: print(verbal_tense) | |
| if verbal_tense is None: | |
| raise RuntimeError( | |
| f"Error: The sentence does not be the correct format or the verbal tense has not been implemented yet!" | |
| ) | |
| _, neg_data, main_data = verb_data | |
| neg = " not" if len(neg_data) > 0 else "" | |
| gramatical_number = self.get_gramatical_number(dobj_data) | |
| if debug: print(gramatical_number) | |
| p_tobe = self.Tense[verbal_tense].value['tobe'][gramatical_number].format(neg) | |
| p_verb = self.verb2participle(main_data[0][0].text) | |
| # Convert active_object to pasive_agent | |
| p_agent = "by " | |
| for tk,_,tag in subj_data: | |
| word = tk.text | |
| if tag == 'PRP': | |
| word = self.subjp2objp(word) | |
| p_agent += word + " " | |
| # return f"{p_subj.capitalize()} {p_tobe} {p_verb} {p_agent.strip().lower()} {complement}" | |
| return [p_subj.capitalize(), p_tobe, p_verb, p_agent.strip().lower(), complement] |