Spaces:
Runtime error
Runtime error
""" | |
Pruthi2019: Combating with Robust Word Recognition | |
================================================================= | |
""" | |
from textattack import Attack | |
from textattack.constraints.overlap import MaxWordsPerturbed | |
from textattack.constraints.pre_transformation import ( | |
MinWordLength, | |
RepeatModification, | |
StopwordModification, | |
) | |
from textattack.goal_functions import UntargetedClassification | |
from textattack.search_methods import GreedySearch | |
from textattack.transformations import ( | |
CompositeTransformation, | |
WordSwapNeighboringCharacterSwap, | |
WordSwapQWERTY, | |
WordSwapRandomCharacterDeletion, | |
WordSwapRandomCharacterInsertion, | |
) | |
from .attack_recipe import AttackRecipe | |
class Pruthi2019(AttackRecipe): | |
"""An implementation of the attack used in "Combating Adversarial | |
Misspellings with Robust Word Recognition", Pruthi et al., 2019. | |
This attack focuses on a small number of character-level changes that simulate common typos. It combines: | |
- Swapping neighboring characters | |
- Deleting characters | |
- Inserting characters | |
- Swapping characters for adjacent keys on a QWERTY keyboard. | |
https://arxiv.org/abs/1905.11268 | |
:param model: Model to attack. | |
:param max_num_word_swaps: Maximum number of modifications to allow. | |
""" | |
def build(model_wrapper, max_num_word_swaps=1): | |
# a combination of 4 different character-based transforms | |
# ignore the first and last letter of each word, as in the paper | |
transformation = CompositeTransformation( | |
[ | |
WordSwapNeighboringCharacterSwap( | |
random_one=False, skip_first_char=True, skip_last_char=True | |
), | |
WordSwapRandomCharacterDeletion( | |
random_one=False, skip_first_char=True, skip_last_char=True | |
), | |
WordSwapRandomCharacterInsertion( | |
random_one=False, skip_first_char=True, skip_last_char=True | |
), | |
WordSwapQWERTY( | |
random_one=False, skip_first_char=True, skip_last_char=True | |
), | |
] | |
) | |
# only edit words of length >= 4, edit max_num_word_swaps words. | |
# note that we also are not editing the same word twice, so | |
# max_num_word_swaps is really the max number of character | |
# changes that can be made. The paper looks at 1 and 2 char attacks. | |
constraints = [ | |
MinWordLength(min_length=4), | |
StopwordModification(), | |
MaxWordsPerturbed(max_num_words=max_num_word_swaps), | |
RepeatModification(), | |
] | |
# untargeted attack | |
goal_function = UntargetedClassification(model_wrapper) | |
search_method = GreedySearch() | |
return Attack(goal_function, constraints, transformation, search_method) | |