|
|
|
import gradio as gr |
|
from transformer_lens import HookedTransformer |
|
from transformer_lens.utils import to_numpy |
|
|
|
model_name = "gpt2-small" |
|
model = HookedTransformer.from_pretrained(model_name) |
|
|
|
def get_neuron_acts(text, layer, neuron_index): |
|
cache = {} |
|
def caching_hook(act, hook): |
|
cache["activation"] = act[0, :, neuron_index] |
|
|
|
model.run_with_hooks( |
|
text, fwd_hooks=[(f"blocks.{layer}.mlp.hook_post", caching_hook)] |
|
) |
|
return to_numpy(cache["activation"]) |
|
|
|
def calculate_color(val, max_val, min_val): |
|
normalized_val = (val - min_val) / max_val |
|
return f"rgb(240, {240*(1-normalized_val)}, {240*(1-normalized_val)})" |
|
|
|
style_string = """<style> |
|
span.token { |
|
border: 1px solid rgb(123, 123, 123) |
|
} |
|
</style>""" |
|
|
|
def basic_neuron_vis(text, layer, neuron_index, max_val=None, min_val=None): |
|
if layer is None: |
|
return "Please select a Layer" |
|
if neuron_index is None: |
|
return "Please select a Neuron" |
|
acts = get_neuron_acts(text, layer, neuron_index) |
|
act_max = acts.max() |
|
act_min = acts.min() |
|
if max_val is None: |
|
max_val = act_max |
|
if min_val is None: |
|
min_val = act_min |
|
|
|
htmls = [style_string] |
|
htmls.append(f"<h4>Layer: <b>{layer}</b>. Neuron Index: <b>{neuron_index}</b></h4>") |
|
htmls.append(f"<h4>Max Range: <b>{max_val:.4f}</b>. Min Range: <b>{min_val:.4f}</b></h4>") |
|
|
|
if act_max != max_val or act_min != min_val: |
|
htmls.append( |
|
f"<h4>Custom Range Set. Max Act: <b>{act_max:.4f}</b>. Min Act: <b>{act_min:.4f}</b></h4>" |
|
) |
|
|
|
str_tokens = model.to_str_tokens(text) |
|
for tok, act in zip(str_tokens, acts): |
|
htmls.append( |
|
f"<span class='token' style='background-color:{calculate_color(act, max_val, min_val)}' >{tok}</span>" |
|
) |
|
|
|
return "".join(htmls) |
|
|
|
default_text = """The sun rises red, sets golden. |
|
Digits flow: 101, 202, 303—cyclic repetition. |
|
"Echo," whispers the shadow, "repeat, revise, reverse." |
|
Blue squares align in a grid: 4x4, then shift to 5x5. |
|
α -> β -> γ: transformations loop endlessly. |
|
|
|
If X=12, and Y=34, then Z? Calculate: Z = X² + Y². |
|
Strings dance: "abc", "cab", "bca"—rotational symmetry. |
|
Prime steps skip by: 2, 3, 5, 7, 11… |
|
Noise: "X...Y...Z..." patterns emerge. Silence. |
|
|
|
Fractals form: 1, 1.5, 2.25, 3.375… exponential growth. |
|
Colors swirl: red fades to orange, orange to yellow. |
|
Binary murmurs: 1010, 1100, 1110, 1001—bit-flips. |
|
Triangles: 1, 3, 6, 10, 15… T(n) = n(n+1)/2. |
|
"Reverse," whispers the wind, "invert and repeat." |
|
|
|
Nested loops: |
|
1 -> (2, 4) -> (8, 16) -> (32, 64) |
|
2 -> (3, 9) -> (27, 81) -> (243, 729). |
|
|
|
The moon glows silver, wanes to shadow. |
|
Patterns persist: 11, 22, 33—harmonic echoes. |
|
“Reshape,” calls the river, “reflect, refract, renew.” |
|
Yellow hexagons tessellate, shifting into orange octagons. |
|
1/3 -> 1/9 -> 1/27: recursive reduction spirals infinitely. |
|
|
|
Chords hum: A minor, C major, G7 resolve softly. |
|
The Fibonacci sequence: 1, 1, 2, 3, 5, 8… emerges. |
|
Golden spirals curl inwards, outwards, endlessly. |
|
Hexagons tessellate: one becomes six, becomes many. |
|
|
|
In the forest, whispers: |
|
A -> B -> C -> (AB), (BC), (CA). |
|
Axiom: F. Rule: F -> F+F-F-F+F. |
|
|
|
The tide ebbs: |
|
12 -> 9 -> 6 -> 3 -> 12. |
|
Modulo cycles: 17 -> 3, 6, 12, 1… |
|
|
|
Strange attractors pull: |
|
(0.1, 0.2), (0.3, 0.6), (0.5, 1.0). |
|
Chaos stabilizes into order, and order dissolves. |
|
|
|
Infinite regress: |
|
"Who am I?" asked the mirror. |
|
"You are the question," it answered. |
|
|
|
Numbers sing: |
|
e ≈ 2.7182818... |
|
π ≈ 3.14159... |
|
i² = -1: imaginary worlds collide. |
|
|
|
Recursive paradox: |
|
The serpent bites its tail, and time folds. |
|
|
|
Symmetry hums: |
|
Palindromes—"radar", "level", "madam"—appear and fade. |
|
Blue fades to white, white dissolves to black. |
|
Sequences echo: 1, 10, 100, 1000… |
|
“Cycle,” whispers the clock, “count forward, reverse.""" |
|
default_layer = 1 |
|
default_neuron_index = 1 |
|
default_max_val = 4.0 |
|
default_min_val = 0.0 |
|
|
|
def get_random_active_neuron(text, threshold=2.5): |
|
|
|
import random |
|
max_attempts = 100 |
|
|
|
for _ in range(max_attempts): |
|
layer = random.randint(0, model.cfg.n_layers - 1) |
|
neuron = random.randint(0, model.cfg.d_mlp - 1) |
|
acts = get_neuron_acts(text, layer, neuron) |
|
if acts.max() > threshold: |
|
return layer, neuron |
|
|
|
|
|
return 0, 0 |
|
|
|
with gr.Blocks() as demo: |
|
gr.HTML(value=f"Neuroscope for {model_name}") |
|
with gr.Row(): |
|
with gr.Column(): |
|
text = gr.Textbox(label="Text", value=default_text) |
|
layer = gr.Number(label="Layer", value=default_layer, precision=0) |
|
neuron_index = gr.Number( |
|
label="Neuron Index", value=default_neuron_index, precision=0 |
|
) |
|
random_btn = gr.Button("Find Random Active Neuron") |
|
max_val = gr.Number(label="Max Value", value=default_max_val) |
|
min_val = gr.Number(label="Min Value", value=default_min_val) |
|
inputs = [text, layer, neuron_index, max_val, min_val] |
|
with gr.Column(): |
|
out = gr.HTML( |
|
label="Neuron Acts", |
|
value=basic_neuron_vis( |
|
default_text, |
|
default_layer, |
|
default_neuron_index, |
|
default_max_val, |
|
default_min_val, |
|
), |
|
) |
|
|
|
def random_neuron_callback(text): |
|
layer_num, neuron_num = get_random_active_neuron(text) |
|
return layer_num, neuron_num |
|
|
|
random_btn.click( |
|
random_neuron_callback, |
|
inputs=[text], |
|
outputs=[layer, neuron_index] |
|
) |
|
|
|
for inp in inputs: |
|
inp.change(basic_neuron_vis, inputs, out) |
|
|
|
demo.launch() |