Spaces:

nanom
/

to_passive_voice

Running

App Files Files Community

to_passive_voice / modules /m_apvoice.py

nanom

First commit

5d0a311 over 2 years ago

raw

history blame

8.06 kB

	import enum
	import subprocess
	import spacy
	import pyinflect
	from difflib import ndiff
	from typing import List, Union, Tuple

	# BES auxiliary “be” Let it be.
	# HVS forms of “have” I’ve seen the Queen
	# MD verb, modal auxiliary VerbType=mod This could work.
	# VB verb, base form VerbForm=inf I want to go.
	# VBD verb, past tense VerbForm=fin Tense=past This was a sentence.
	# VBG verb, gerund or present participle VerbForm=part Tense=pres Aspect=prog I am going.
	# VBN verb, past participle VerbForm=part Tense=past Aspect=perf The treasure was lost.
	# VBP verb, non-3rd person singular present VerbForm=fin Tense=pres I want to go.
	# VBZ verb, 3rd person singular present VerbForm=fin Tense=pres Number=sing Person=3 He wants to go.

	class APVoice:
	class Tense(enum.Enum):
	simple_present = {
	'aux':[None,'VBZ'],
	'main':['VBZ','VBP', 'VB'],
	'tobe':{'NN':'is{}','NNS':'are{}'}
	}
	simple_past = {
	'aux':[None, 'VBD'],
	'main':['VBD', 'VB'],
	'tobe':{'NN':'was{}','NNS':'were{}'}
	}
	future_simple = {
	'aux':['MD'],
	'main':['VB'],
	'tobe':{'NN':'will{} be','NNS':'will{} be'}
	}
	present_cont = {
	'aux':['VBP','VBZ'],
	'main':['VBG'],
	'tobe':{'NN':'is{} being','NNS':'are{} being'}
	}
	past_cont = {
	'aux':['VBD'],
	'main':['VBG'],
	'tobe':{'NN':'was{} being','NNS':'were{} being'}
	}
	present_perfect = {
	'aux':['VBP','VBZ'],
	'main':['VBN'],
	'tobe':{'NN':'has{} been','NNS':'have{} been'}
	}

	def __init__(
	self
	) -> None:

	self.parser = None
	self.__init_parser(model="en_core_web_sm")

	def __init_parser(
	self,
	model: str
	) -> None:

	self.parser = None
	try:
	self.parser = spacy.load(model)
	except:
	print(f"* Downloading {model} model...")
	_ = subprocess.Popen(
	f"python -m spacy download {model}",
	stdout=subprocess.PIPE,
	shell=True).communicate()

	self.parser = spacy.load(model)

	def verb2participle(
	self,
	verb: str
	) -> str:

	tk = self.parser(verb)[0]
	return tk._.inflect('VBN')

	def subjp2objp(
	self,
	pronoun: str
	) -> str:
	"""
	Convert Subject pronouns to Object pronouns.
	"""
	mapping = {"i":"me","you":"you","we":"us","they":"them","he":"him","she":"her", "it":"it"}
	return mapping.get(pronoun.lower(), None)

	def get_gramatical_number(
	self,
	dobj_data: List[List[Tuple[str,str,str]]]
	) -> Union[str, None]:

	result = [tag for _,dep,tag in dobj_data if dep == 'dobj']
	if len(result) == 0:
	result = None
	else:
	result = result[0].replace('NNP', 'NN')

	return result

	def get_verbal_tense(
	self,
	verb_data: List[List[Tuple[str,str,str,int]]]
	) -> Union[str, None]:

	aux, neg, root = verb_data

	root = root[0][2] if len(root) > 0 else None
	aux = aux[0][2] if len(aux) > 0 else None

	tense_name = None
	for tense in self.Tense:
	if aux in tense.value['aux'] and root in tense.value['main']:
	tense_name = tense.name
	break

	return tense_name

	def get_subj(
	self,
	sentence: str,
	) -> Tuple[ List[Tuple[str,str,str]], str]:

	out_data = []
	for tk in self.parser(sentence):
	if "subj" in tk.dep_:
	out_data = [(t,t.dep_,t.tag_) for t in tk.subtree]
	break

	out_str = ' '.join([t.text for t,_,_ in out_data])
	return out_data, out_str

	def get_verb(
	self,
	sentence: str,
	) -> Tuple[ List[List[Tuple[str,str,str,int]]], str]:

	main_data = []
	aux_data = []
	neg_data = []
	out_data = []

	for tk in self.parser(sentence):
	if "ROOT" in tk.dep_:
	main_data = [ (tk,tk.dep_,tk.tag_,tk.i)]
	aux_data = [(t,t.dep_,t.tag_,t.i) for t in tk.children if t.dep_ == "aux"]
	neg_data = [(t,t.dep_,t.tag_,t.i) for t in tk.children if t.dep_ == "neg"]
	out_data = [aux_data, neg_data, main_data]
	break

	out_str = sorted([tup for list_ in out_data for tup in list_], key=lambda x: x[3])
	out_str = ' '.join([t.text for t,_,_,_ in out_str])
	return out_data, out_str

	def get_dobj(
	self,
	sentence: str,
	) -> Tuple[ List[Tuple[str,str,str]], str]:

	out_data = []
	for tk in self.parser(sentence):
	if "dobj" in tk.dep_:
	out_data = [(t,t.dep_,t.tag_)for t in tk.subtree]
	break

	out_str = ' '.join([t.text for t,_,_ in out_data])
	return out_data, out_str

	def get_complement(
	self,
	subj: str,
	verb: str,
	dobj: str,
	full_sentence: str,
	) -> str:

	concat_sentence = subj + ' ' + verb + ' ' + dobj
	diff = ""
	for tk in ndiff(concat_sentence.split(), full_sentence.split()):
	mark, word = tk[0], tk[2:]
	if mark == '+':
	diff += word + " "

	return diff.strip()

	def active2passive(
	self,
	active_sentence: str,
	debug: bool=False
	) -> List[str]:

	active_sentence = active_sentence.strip()
	if active_sentence == "":
	raise RuntimeError(
	f"Error: The sentence does not be empty!"
	)

	subj_data, subj_str = self.get_subj(active_sentence)
	if debug: print(subj_data)
	if subj_str == "":
	raise RuntimeError(
	f"Error: The sentence's subject has not been found or the sentence does not be the correct format!"
	)

	verb_data, verb_str = self.get_verb(active_sentence)
	if debug: print(verb_data)
	if verb_str == "":
	raise RuntimeError(
	f"Error: The sentence's verb has not been found or the sentence does not be the correct format!"
	)

	dobj_data, dobj_str = self.get_dobj(active_sentence)
	if debug: print(dobj_data)
	if dobj_str == "":
	raise RuntimeError(
	f"Error: The sentence's direct object has not been found or the sentence does not be the correct format!"
	)

	complement = self.get_complement(subj_str, verb_str, dobj_str, active_sentence)

	# Get pasive subject
	p_subj = dobj_str

	# Get tense + participle verb
	verbal_tense = self.get_verbal_tense(verb_data)
	if debug: print(verbal_tense)
	if verbal_tense is None:
	raise RuntimeError(
	f"Error: The sentence does not be the correct format or the verbal tense has not been implemented yet!"
	)

	_, neg_data, main_data = verb_data
	neg = " not" if len(neg_data) > 0 else ""
	gramatical_number = self.get_gramatical_number(dobj_data)
	if debug: print(gramatical_number)
	p_tobe = self.Tense[verbal_tense].value['tobe'][gramatical_number].format(neg)
	p_verb = self.verb2participle(main_data[0][0].text)

	# Convert active_object to pasive_agent
	p_agent = "by "
	for tk,_,tag in subj_data:
	word = tk.text
	if tag == 'PRP':
	word = self.subjp2objp(word)
	p_agent += word + " "

	# return f"{p_subj.capitalize()} {p_tobe} {p_verb} {p_agent.strip().lower()} {complement}"
	return [p_subj.capitalize(), p_tobe, p_verb, p_agent.strip().lower(), complement]