Update app.py
Browse files
app.py
CHANGED
|
@@ -192,17 +192,19 @@ with gr.Blocks(theme=gr.themes.Default(), css=css) as demo:
|
|
| 192 |
We will follow the SelfIE implementation in this space for concreteness. Patchscopes are so general that they encompass many other interpretation techniques too!!!
|
| 193 |
''', line_breaks=True)
|
| 194 |
|
| 195 |
-
gr.Markdown('**πΎ The idea is really simple: models are able to understand their own hidden states by nature! πΎ**',
|
| 196 |
-
|
| 197 |
-
|
| 198 |
gr.Markdown(
|
| 199 |
-
'''
|
|
|
|
|
|
|
| 200 |
So we can inject an representation from (roughly) any layer to any layer! If I give a model a prompt of the form ``User: [X] Assistant: Sure'll I'll repeat your message`` and replace the internal representation of ``[X]`` *during computation* with the hidden state we want to understand,
|
| 201 |
we expect to get back a summary of the information that exists inside the hidden state. Since the model uses a roughly common latent space, it can understand representations from different layers and different runs!! How cool is that! π―π―π―
|
| 202 |
''', line_breaks=True)
|
| 203 |
|
| 204 |
-
with gr.Column(scale=1):
|
| 205 |
-
|
| 206 |
|
| 207 |
with gr.Group('Interpretation'):
|
| 208 |
interpretation_prompt = gr.Text(suggested_interpretation_prompts[0], label='Interpretation Prompt')
|
|
@@ -233,7 +235,7 @@ with gr.Blocks(theme=gr.themes.Default(), css=css) as demo:
|
|
| 233 |
use_gpu = False # gr.Checkbox(value=False, label='Use GPU')
|
| 234 |
progress_dummy = gr.Markdown('', elem_id='progress_dummy')
|
| 235 |
|
| 236 |
-
interpretation_bubbles = [gr.Textbox('', container=False, visible=False, elem_classes=['bubble',
|
| 237 |
'even_bubble' if i % 2 == 0 else 'odd_bubble'])
|
| 238 |
for i in range(model.config.num_hidden_layers)]
|
| 239 |
|
|
|
|
| 192 |
We will follow the SelfIE implementation in this space for concreteness. Patchscopes are so general that they encompass many other interpretation techniques too!!!
|
| 193 |
''', line_breaks=True)
|
| 194 |
|
| 195 |
+
# gr.Markdown('**πΎ The idea is really simple: models are able to understand their own hidden states by nature! πΎ**',
|
| 196 |
+
# # elem_classes=['explanation_accordion']
|
| 197 |
+
# )
|
| 198 |
gr.Markdown(
|
| 199 |
+
'''
|
| 200 |
+
**πΎ The idea is really simple: models are able to understand their own hidden states by nature! πΎ**
|
| 201 |
+
According to the residual stream view ([nostalgebraist, 2020](https://www.lesswrong.com/posts/AcKRB8wDpdaN6v6ru/interpreting-gpt-the-logit-lens)), internal representations from different layers are transferable between layers.
|
| 202 |
So we can inject an representation from (roughly) any layer to any layer! If I give a model a prompt of the form ``User: [X] Assistant: Sure'll I'll repeat your message`` and replace the internal representation of ``[X]`` *during computation* with the hidden state we want to understand,
|
| 203 |
we expect to get back a summary of the information that exists inside the hidden state. Since the model uses a roughly common latent space, it can understand representations from different layers and different runs!! How cool is that! π―π―π―
|
| 204 |
''', line_breaks=True)
|
| 205 |
|
| 206 |
+
# with gr.Column(scale=1):
|
| 207 |
+
# gr.Markdown('<span style="font-size:180px;">π€</span>')
|
| 208 |
|
| 209 |
with gr.Group('Interpretation'):
|
| 210 |
interpretation_prompt = gr.Text(suggested_interpretation_prompts[0], label='Interpretation Prompt')
|
|
|
|
| 235 |
use_gpu = False # gr.Checkbox(value=False, label='Use GPU')
|
| 236 |
progress_dummy = gr.Markdown('', elem_id='progress_dummy')
|
| 237 |
|
| 238 |
+
interpretation_bubbles = [gr.Textbox('', label=f'Layer {i}', container=False, visible=False, elem_classes=['bubble',
|
| 239 |
'even_bubble' if i % 2 == 0 else 'odd_bubble'])
|
| 240 |
for i in range(model.config.num_hidden_layers)]
|
| 241 |
|