Spaces:

dar-tau
/

selfie

Sleeping

App Files Files Community

dar-tau commited on Apr 11, 2024

Commit

fa45463

verified ·

1 Parent(s): 9a230a0

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -10

app.py CHANGED Viewed

@@ -23,7 +23,8 @@ class GlobalState:
     model : Optional[PreTrainedModel] = None
     hidden_states : Optional[torch.Tensor] = None
     interpretation_prompt_template : str = '{prompt}'
-    original_prompt_template : str = '{prompt}'
 suggested_interpretation_prompts = [
@@ -46,6 +47,7 @@ def reset_model(model_name, *extra_components):
     model_path = model_args.pop('model_path')
     global_state.original_prompt_template = model_args.pop('original_prompt_template')
     global_state.interpretation_prompt_template = model_args.pop('interpretation_prompt_template')
     tokenizer_path = model_args.pop('tokenizer') if 'tokenizer' in model_args else model_path
     use_ctransformers = model_args.pop('ctransformers', False)
     AutoModelClass = CAutoModelForCausalLM if use_ctransformers else AutoModelForCausalLM
@@ -96,7 +98,7 @@ def run_interpretation(raw_interpretation_prompt, max_new_tokens, do_sample,
     # create an InterpretationPrompt object from raw_interpretation_prompt (after putting it in the right template)
     interpretation_prompt = global_state.interpretation_prompt_template.format(prompt=raw_interpretation_prompt, repeat=5)
-    interpretation_prompt = InterpretationPrompt(global_state.tokenizer, interpretation_prompt)
     # generate the interpretations
     # generate = generate_interpretation_gpu if use_gpu else lambda interpretation_prompt, *args, **kwargs: interpretation_prompt.generate(*args, **kwargs)
@@ -138,23 +140,24 @@ with gr.Blocks(theme=gr.themes.Default(), css='styles.css') as demo:
             gr.Markdown(
             '''
-            **👾 The idea is really simple: models are able to understand their own hidden states by nature! 👾**
-            In line with the residual stream view ([nostalgebraist, 2020](https://www.lesswrong.com/posts/AcKRB8wDpdaN6v6ru/interpreting-gpt-the-logit-lens)), internal representations from different layers are transferable between layers.
-            So we can inject an representation from (roughly) any layer into any layer! If we give a model a prompt of the form ``User: [X] Assistant: Sure'll I'll repeat your message`` and replace the internal representation of ``[X]`` *during computation* with the hidden state we want to understand,
-            we expect to get back a summary of the information that exists inside the hidden state, despite being from a different layer and a different run!! How cool is that! 😯😯😯
             ''', line_breaks=True)
         # with gr.Column(scale=1):
         #     gr.Markdown('<span style="font-size:180px;">🤔</span>')
     with gr.Group():
-        model_chooser = gr.Radio(label='Model', choices=list(model_info.keys()), value=model_name)
     with gr.Blocks() as demo_blocks:
         gr.Markdown('## Choose Your Interpretation Prompt')
         with gr.Group('Interpretation'):
             interpretation_prompt = gr.Text(suggested_interpretation_prompts[0], label='Interpretation Prompt')
-            gr.Examples([[p] for p in suggested_interpretation_prompts], [interpretation_prompt], cache_examples=False)
         gr.Markdown('## The Prompt to Analyze')
@@ -198,8 +201,8 @@ with gr.Blocks(theme=gr.themes.Default(), css='styles.css') as demo:
     # event listeners
-    extra_components = [*interpretation_bubbles, *tokens_container, original_prompt_btn,
-                      original_prompt_raw]
     model_chooser.change(reset_model, [model_chooser, *extra_components], extra_components)
     for i, btn in enumerate(tokens_container):

     model : Optional[PreTrainedModel] = None
     hidden_states : Optional[torch.Tensor] = None
     interpretation_prompt_template : str = '{prompt}'
+    original_prompt_template : str = 'User: [X]\n\nAnswer: {prompt}'
+    layers_format : str = 'model.layers.{k}'
 suggested_interpretation_prompts = [
     model_path = model_args.pop('model_path')
     global_state.original_prompt_template = model_args.pop('original_prompt_template')
     global_state.interpretation_prompt_template = model_args.pop('interpretation_prompt_template')
+    global_state.layers_format = model_args.pop('layers_format')
     tokenizer_path = model_args.pop('tokenizer') if 'tokenizer' in model_args else model_path
     use_ctransformers = model_args.pop('ctransformers', False)
     AutoModelClass = CAutoModelForCausalLM if use_ctransformers else AutoModelForCausalLM
     # create an InterpretationPrompt object from raw_interpretation_prompt (after putting it in the right template)
     interpretation_prompt = global_state.interpretation_prompt_template.format(prompt=raw_interpretation_prompt, repeat=5)
+    interpretation_prompt = InterpretationPrompt(global_state.tokenizer, interpretation_prompt, layers_format=global_state.layers_format)
     # generate the interpretations
     # generate = generate_interpretation_gpu if use_gpu else lambda interpretation_prompt, *args, **kwargs: interpretation_prompt.generate(*args, **kwargs)
             gr.Markdown(
             '''
+                **👾 The idea is really simple: models are able to understand their own hidden states by nature! 👾**
+                In line with the residual stream view ([nostalgebraist, 2020](https://www.lesswrong.com/posts/AcKRB8wDpdaN6v6ru/interpreting-gpt-the-logit-lens)), internal representations from different layers are transferable between layers.
+                So we can inject an representation from (roughly) any layer into any layer! If we give a model a prompt of the form ``User: [X] Assistant: Sure'll I'll repeat your message`` and replace the internal representation of ``[X]`` *during computation* with the hidden state we want to understand,
+                we expect to get back a summary of the information that exists inside the hidden state, despite being from a different layer and a different run!! How cool is that! 😯😯😯
             ''', line_breaks=True)
         # with gr.Column(scale=1):
         #     gr.Markdown('<span style="font-size:180px;">🤔</span>')
     with gr.Group():
+        model_chooser = gr.Radio(label='Choose Your Model', choices=list(model_info.keys()), value=model_name)
     with gr.Blocks() as demo_blocks:
         gr.Markdown('## Choose Your Interpretation Prompt')
         with gr.Group('Interpretation'):
             interpretation_prompt = gr.Text(suggested_interpretation_prompts[0], label='Interpretation Prompt')
+            interpretation_prompt_examples = gr.Examples([[p] for p in suggested_interpretation_prompts],
+                                                         [interpretation_prompt], cache_examples=False)
         gr.Markdown('## The Prompt to Analyze')
     # event listeners
+    extra_components = [interpretation_prompt, interpretation_prompt_examples, original_prompt_raw, *tokens_container,
+                        original_prompt_btn, *interpretation_bubbles]
     model_chooser.change(reset_model, [model_chooser, *extra_components], extra_components)
     for i, btn in enumerate(tokens_container):