Spaces:
Runtime error
Runtime error
""" | |
CLARE Recipe | |
============= | |
(Contextualized Perturbation for Textual Adversarial Attack) | |
""" | |
import transformers | |
from textattack import Attack | |
from textattack.constraints.pre_transformation import ( | |
RepeatModification, | |
StopwordModification, | |
) | |
from textattack.constraints.semantics.sentence_encoders import UniversalSentenceEncoder | |
from textattack.goal_functions import UntargetedClassification | |
from textattack.search_methods import GreedySearch | |
from textattack.transformations import ( | |
CompositeTransformation, | |
WordInsertionMaskedLM, | |
WordMergeMaskedLM, | |
WordSwapMaskedLM, | |
) | |
from .attack_recipe import AttackRecipe | |
class CLARE2020(AttackRecipe): | |
"""Li, Zhang, Peng, Chen, Brockett, Sun, Dolan. | |
"Contextualized Perturbation for Textual Adversarial Attack" (Li et al., 2020) | |
https://arxiv.org/abs/2009.07502 | |
This method uses greedy search with replace, merge, and insertion transformations that leverage a | |
pretrained language model. It also uses USE similarity constraint. | |
""" | |
def build(model_wrapper): | |
# "This paper presents CLARE, a ContextuaLized AdversaRial Example generation model | |
# that produces fluent and grammatical outputs through a mask-then-infill procedure. | |
# CLARE builds on a pre-trained masked language model and modifies the inputs in a context-aware manner. | |
# We propose three contex-tualized perturbations, Replace, Insert and Merge, allowing for generating outputs of | |
# varied lengths." | |
# | |
# "We experiment with a distilled version of RoBERTa (RoBERTa_{distill}; Sanh et al., 2019) | |
# as the masked language model for contextualized infilling." | |
# Because BAE and CLARE both use similar replacement papers, we use BAE's replacement method here. | |
shared_masked_lm = transformers.AutoModelForCausalLM.from_pretrained( | |
"distilroberta-base" | |
) | |
shared_tokenizer = transformers.AutoTokenizer.from_pretrained( | |
"distilroberta-base" | |
) | |
transformation = CompositeTransformation( | |
[ | |
WordSwapMaskedLM( | |
method="bae", | |
masked_language_model=shared_masked_lm, | |
tokenizer=shared_tokenizer, | |
max_candidates=50, | |
min_confidence=5e-4, | |
), | |
WordInsertionMaskedLM( | |
masked_language_model=shared_masked_lm, | |
tokenizer=shared_tokenizer, | |
max_candidates=50, | |
min_confidence=0.0, | |
), | |
WordMergeMaskedLM( | |
masked_language_model=shared_masked_lm, | |
tokenizer=shared_tokenizer, | |
max_candidates=50, | |
min_confidence=5e-3, | |
), | |
] | |
) | |
# | |
# Don't modify the same word twice or stopwords. | |
# | |
constraints = [RepeatModification(), StopwordModification()] | |
# "A common choice of sim(·,·) is to encode sentences using neural networks, | |
# and calculate their cosine similarity in the embedding space (Jin et al., 2020)." | |
# The original implementation uses similarity of 0.7. | |
use_constraint = UniversalSentenceEncoder( | |
threshold=0.7, | |
metric="cosine", | |
compare_against_original=True, | |
window_size=15, | |
skip_text_shorter_than_window=True, | |
) | |
constraints.append(use_constraint) | |
# Goal is untargeted classification. | |
# "The score is then the negative probability of predicting the gold label from f, using [x_{adv}] as the input" | |
goal_function = UntargetedClassification(model_wrapper) | |
# "To achieve this, we iteratively apply the actions, | |
# and first select those minimizing the probability of outputting the gold label y from f." | |
# | |
# "Only one of the three actions can be applied at each position, and we select the one with the highest score." | |
# | |
# "Actions are iteratively applied to the input, until an adversarial example is found or a limit of actions T | |
# is reached. | |
# Each step selects the highest-scoring action from the remaining ones." | |
# | |
search_method = GreedySearch() | |
return Attack(goal_function, constraints, transformation, search_method) | |