percepta-computer-as-llm/manim_project/scene.py

from manim import *
import numpy as np

# Color palette
BG = "#0f0f1a"
ACCENT = "#4fc3f7"
ACCENT2 = "#ab47bc"
ACCENT3 = "#66bb6a"
WARN = "#ff7043"
TEXT_COL = "#e0e0e0"
DIM = "#666677"
GOLD = "#ffd54f"


class Scene1_Intro(Scene):
    """The big question: Can LLMs be computers?"""

    def construct(self):
        self.camera.background_color = BG

        title = Text("Can LLMs Be Computers?", font_size=56, color=ACCENT, weight=BOLD)
        sub = Text(
            "Executing programs inside transformers",
            font_size=28, color=DIM,
        ).next_to(title, DOWN, buff=0.4)
        self.play(Write(title, run_time=1.5))
        self.play(FadeIn(sub, shift=UP * 0.2))
        self.wait(1.5)

        # Fade out title
        self.play(FadeOut(title), FadeOut(sub))

        # --- What LLMs are good at vs bad at ---
        good_title = Text("LLMs are great at…", font_size=32, color=ACCENT3)
        good_title.to_edge(UP, buff=0.6)
        good_items = VGroup(
            Text("✓ Solving hard math (IMO gold-medal level)", font_size=24, color=TEXT_COL),
            Text("✓ Writing code & reasoning about algorithms", font_size=24, color=TEXT_COL),
            Text("✓ Understanding natural language", font_size=24, color=TEXT_COL),
        ).arrange(DOWN, aligned_edge=LEFT, buff=0.25).next_to(good_title, DOWN, buff=0.4)

        self.play(Write(good_title))
        for item in good_items:
            self.play(FadeIn(item, shift=RIGHT * 0.3), run_time=0.5)
        self.wait(1)

        bad_title = Text("…but terrible at simple computation", font_size=32, color=WARN)
        bad_title.next_to(good_items, DOWN, buff=0.6)
        bad_items = VGroup(
            Text("✗ Multiplying two numbers reliably", font_size=24, color=WARN),
            Text("✗ Solving even easy Sudoku puzzles", font_size=24, color=WARN),
            Text("✗ Any task needing many exact steps", font_size=24, color=WARN),
        ).arrange(DOWN, aligned_edge=LEFT, buff=0.25).next_to(bad_title, DOWN, buff=0.4)

        self.play(Write(bad_title))
        for item in bad_items:
            self.play(FadeIn(item, shift=RIGHT * 0.3), run_time=0.5)
        self.wait(2)
        self.play(*[FadeOut(m) for m in self.mobjects])


class Scene2_ToolUse(Scene):
    """How LLMs currently handle computation: tool use (the airplane analogy)."""

    def construct(self):
        self.camera.background_color = BG

        # --- Airplane analogy ---
        analogy_title = Text("The Airplane Analogy", font_size=36, color=ACCENT, weight=BOLD)
        analogy_title.to_edge(UP, buff=0.5)
        self.play(Write(analogy_title))

        human_label = Text("Human", font_size=28, color=TEXT_COL)
        human_box = RoundedRectangle(
            corner_radius=0.15, width=2.2, height=1.2, color=ACCENT
        )
        human_grp = VGroup(human_box, human_label).move_to(LEFT * 3)

        plane_label = Text("Airplane", font_size=28, color=TEXT_COL)
        plane_box = RoundedRectangle(
            corner_radius=0.15, width=2.2, height=1.2, color=ACCENT3
        )
        plane_grp = VGroup(plane_box, plane_label).move_to(RIGHT * 3)

        arrow = Arrow(human_box.get_right(), plane_box.get_left(), color=DIM, buff=0.2)
        arrow_label = Text("delegates flying", font_size=20, color=DIM).next_to(arrow, UP, buff=0.15)

        cant = Text("Humans can't fly.", font_size=24, color=WARN).next_to(
            VGroup(human_grp, plane_grp), DOWN, buff=0.8
        )
        but = Text("Building airplanes doesn't change that.", font_size=24, color=WARN).next_to(
            cant, DOWN, buff=0.25
        )

        self.play(FadeIn(human_grp), FadeIn(plane_grp))
        self.play(GrowArrow(arrow), FadeIn(arrow_label))
        self.wait(0.5)
        self.play(Write(cant))
        self.play(Write(but))
        self.wait(2)
        self.play(*[FadeOut(m) for m in self.mobjects if m != analogy_title])
        self.play(FadeOut(analogy_title))

        # --- Tool use diagram ---
        tool_title = Text("Today: LLMs Use External Tools", font_size=34, color=ACCENT, weight=BOLD)
        tool_title.to_edge(UP, buff=0.5)
        self.play(Write(tool_title))

        llm_box = RoundedRectangle(corner_radius=0.2, width=2.5, height=1.4, color=ACCENT)
        llm_label = Text("LLM", font_size=30, color=ACCENT, weight=BOLD)
        llm_grp = VGroup(llm_box, llm_label).move_to(LEFT * 3.5)

        interp_box = RoundedRectangle(corner_radius=0.2, width=3, height=1.4, color=ACCENT3)
        interp_label = Text("Interpreter", font_size=26, color=ACCENT3)
        interp_grp = VGroup(interp_box, interp_label).move_to(RIGHT * 3)

        a1 = Arrow(llm_box.get_right(), interp_box.get_left(), color=GOLD, buff=0.2).shift(UP * 0.2)
        a1_label = Text("sends code", font_size=18, color=GOLD).next_to(a1, UP, buff=0.1)
        a2 = Arrow(interp_box.get_left(), llm_box.get_right(), color=ACCENT3, buff=0.2).shift(DOWN * 0.2)
        a2_label = Text("returns result", font_size=18, color=ACCENT3).next_to(a2, DOWN, buff=0.1)

        note = Text(
            "The LLM describes computation\nbut never executes it.",
            font_size=22, color=WARN, line_spacing=1.3,
        ).next_to(VGroup(llm_grp, interp_grp), DOWN, buff=0.9)

        self.play(FadeIn(llm_grp), FadeIn(interp_grp))
        self.play(GrowArrow(a1), FadeIn(a1_label))
        self.play(GrowArrow(a2), FadeIn(a2_label))
        self.wait(0.5)
        self.play(Write(note))
        self.wait(2.5)
        self.play(*[FadeOut(m) for m in self.mobjects])


class Scene3_KeyIdea(Scene):
    """The breakthrough: building a computer INSIDE the transformer."""

    def construct(self):
        self.camera.background_color = BG

        title = Text("The Breakthrough", font_size=40, color=GOLD, weight=BOLD)
        title.to_edge(UP, buff=0.5)
        self.play(Write(title))

        idea = Text(
            "Build a computer INSIDE the transformer.",
            font_size=30, color=TEXT_COL,
        ).next_to(title, DOWN, buff=0.6)
        self.play(Write(idea))
        self.wait(1)

        # Big transformer box
        tf_box = RoundedRectangle(
            corner_radius=0.25, width=9, height=4.5, color=ACCENT, stroke_width=3
        ).shift(DOWN * 0.5)
        tf_label = Text("Transformer", font_size=22, color=ACCENT).next_to(tf_box, UP, buff=0.15)

        # CPU inside
        cpu_box = RoundedRectangle(
            corner_radius=0.15, width=3, height=2, color=GOLD, stroke_width=2
        ).move_to(tf_box.get_center() + LEFT * 2.2)
        cpu_label = Text("Virtual CPU\n(WebAssembly)", font_size=20, color=GOLD, line_spacing=1.2).move_to(cpu_box)

        # Memory inside
        mem_box = RoundedRectangle(
            corner_radius=0.15, width=2.5, height=2, color=ACCENT3, stroke_width=2
        ).move_to(tf_box.get_center() + RIGHT * 2)
        mem_label = Text("Memory\n& Stack", font_size=20, color=ACCENT3, line_spacing=1.2).move_to(mem_box)

        conn = Arrow(cpu_box.get_right(), mem_box.get_left(), color=DIM, buff=0.15)

        self.play(Create(tf_box), Write(tf_label))
        self.play(Create(cpu_box), Write(cpu_label))
        self.play(Create(mem_box), Write(mem_label))
        self.play(GrowArrow(conn))

        result = Text(
            "Arbitrary C programs → tokens → executed by the model itself",
            font_size=22, color=ACCENT,
        ).next_to(tf_box, DOWN, buff=0.5)
        self.play(Write(result))
        self.wait(2.5)
        self.play(*[FadeOut(m) for m in self.mobjects])


class Scene4_ToolVsInModel(Scene):
    """Side-by-side: tool use vs in-model execution for 3+5."""

    def construct(self):
        self.camera.background_color = BG

        title = Text("Tool Use  vs  In-Model Execution", font_size=34, color=ACCENT, weight=BOLD)
        title.to_edge(UP, buff=0.4)
        self.play(Write(title))

        divider = DashedLine(UP * 2.5, DOWN * 2.5, color=DIM, dash_length=0.15).shift(DOWN * 0.3)
        self.play(Create(divider))

        # LEFT: tool use
        left_title = Text("Tool Use", font_size=26, color=WARN, weight=BOLD).move_to(LEFT * 3.5 + UP * 2)
        self.play(Write(left_title))

        left_steps = [
            ("LLM:", 'print(3+5)', ACCENT, GOLD),
            ("→ send to interpreter", "", DIM, DIM),
            ("← result: 8", "", ACCENT3, ACCENT3),
        ]
        left_grp = VGroup()
        y = 1.0
        for label_text, code_text, lc, cc in left_steps:
            row = VGroup()
            lab = Text(label_text, font_size=20, color=lc).move_to(LEFT * 5 + UP * y)
            lab.align_to(LEFT * 5.5, LEFT)
            row.add(lab)
            if code_text:
                cd = Text(code_text, font_size=18, color=cc, font="Monospace").next_to(lab, RIGHT, buff=0.2)
                row.add(cd)
            left_grp.add(row)
            y -= 0.7

        opaque = Text("Execution is opaque\n(black box)", font_size=18, color=WARN, line_spacing=1.2)
        opaque.move_to(LEFT * 3.5 + DOWN * 1.5)

        # RIGHT: in-model
        right_title = Text("In-Model", font_size=26, color=ACCENT3, weight=BOLD).move_to(RIGHT * 3.5 + UP * 2)
        self.play(Write(right_title))

        right_lines = [
            "i32.const 03",
            "i32.const 05",
            "i32.add → 08",
            "output(08)",
            "halt",
        ]
        right_grp = VGroup()
        y = 1.0
        for line in right_lines:
            t = Text(line, font_size=18, color=ACCENT3, font="Monospace").move_to(RIGHT * 3.5 + UP * y)
            right_grp.add(t)
            y -= 0.55

        transparent = Text("Every step visible\nin the token stream", font_size=18, color=ACCENT3, line_spacing=1.2)
        transparent.move_to(RIGHT * 3.5 + DOWN * 2)

        for row in left_grp:
            self.play(FadeIn(row, shift=RIGHT * 0.2), run_time=0.5)
        self.play(Write(opaque))
        self.wait(0.5)

        for t in right_grp:
            self.play(FadeIn(t, shift=LEFT * 0.2), run_time=0.4)
        self.play(Write(transparent))
        self.wait(2.5)
        self.play(*[FadeOut(m) for m in self.mobjects])


class Scene5_AppendOnlyTrace(Scene):
    """Computation as an append-only trace — the notebook analogy."""

    def construct(self):
        self.camera.background_color = BG

        title = Text("How It Works: The Append-Only Notebook", font_size=32, color=ACCENT, weight=BOLD)
        title.to_edge(UP, buff=0.4)
        self.play(Write(title))

        # Notebook visual
        nb_rect = Rectangle(width=7, height=4.5, color=DIM, stroke_width=1.5).shift(DOWN * 0.4)
        nb_label = Text("Notebook (token stream)", font_size=18, color=DIM).next_to(nb_rect, UP, buff=0.1)
        self.play(Create(nb_rect), Write(nb_label))

        # Lines
        prompt_words = ["the", "cat", "runs", "and", "dog", "jumps", "over"]
        colors = [DIM, DIM, ACCENT3, DIM, DIM, ACCENT3, DIM]
        prompt_label = Text("PROMPT (input words):", font_size=18, color=ACCENT).move_to(
            nb_rect.get_top() + DOWN * 0.4
        )
        self.play(Write(prompt_label))

        word_mobs = VGroup()
        for i, (w, c) in enumerate(zip(prompt_words, colors)):
            t = Text(w, font_size=22, color=c, font="Monospace")
            t.move_to(nb_rect.get_top() + DOWN * 0.8 + RIGHT * (i - 3) * 1.0)
            word_mobs.add(t)
        self.play(*[FadeIn(w) for w in word_mobs], run_time=0.8)
        self.wait(0.5)

        # Trace section
        trace_label = Text("EXECUTION TRACE (generated tokens):", font_size=18, color=GOLD).move_to(
            nb_rect.get_center() + UP * 0.3
        )
        self.play(Write(trace_label))

        # Show parity counting step by step
        trace_vals = ["odd=0", "odd=0", "odd=1", "odd=1", "odd=1", "odd=0", "odd=0"]
        trace_mobs = VGroup()
        for i, val in enumerate(trace_vals):
            t = Text(val, font_size=20, color=GOLD, font="Monospace")
            t.move_to(nb_rect.get_center() + DOWN * 0.3 + RIGHT * (i - 3) * 1.0)
            trace_mobs.add(t)

        rule = Text(
            "Each new token looks back at (1) the input word and (2) the previous trace token",
            font_size=17, color=TEXT_COL,
        ).move_to(nb_rect.get_bottom() + DOWN * 0.05)

        for i, tm in enumerate(trace_mobs):
            anims = [FadeIn(tm)]
            # Draw arrows from word and previous trace
            arr1 = Arrow(
                word_mobs[i].get_bottom(), tm.get_top(),
                color=ACCENT, stroke_width=1.5, buff=0.08, max_tip_length_to_length_ratio=0.15,
            )
            anims.append(GrowArrow(arr1))
            if i > 0:
                arr2 = Arrow(
                    trace_mobs[i - 1].get_right(), tm.get_left(),
                    color=GOLD, stroke_width=1.5, buff=0.08, max_tip_length_to_length_ratio=0.15,
                )
                anims.append(GrowArrow(arr2))
            self.play(*anims, run_time=0.6)

        self.play(Write(rule))

        key_insight = Text(
            "Key: only 2 lookbacks per step — cost doesn't grow with length!",
            font_size=20, color=ACCENT3, weight=BOLD,
        ).next_to(nb_rect, DOWN, buff=0.6)
        self.play(Write(key_insight))
        self.wait(2.5)
        self.play(*[FadeOut(m) for m in self.mobjects])


class Scene6_QuadraticProblem(Scene):
    """The quadratic cost problem of standard attention."""

    def construct(self):
        self.camera.background_color = BG

        title = Text("The Problem: Attention Cost Grows", font_size=34, color=ACCENT, weight=BOLD)
        title.to_edge(UP, buff=0.5)
        self.play(Write(title))

        # Axes
        ax = Axes(
            x_range=[0, 10, 2], y_range=[0, 100, 20],
            x_length=5, y_length=3.5,
            axis_config={"color": DIM, "include_numbers": False},
        ).shift(DOWN * 0.5)
        x_lab = Text("tokens generated (t)", font_size=18, color=DIM).next_to(ax.x_axis, DOWN, buff=0.3)
        y_lab = Text("work per step", font_size=18, color=DIM).next_to(ax.y_axis, LEFT, buff=0.3).rotate(PI / 2)

        self.play(Create(ax), Write(x_lab), Write(y_lab))

        # Standard: linear cost per step → quadratic total
        std_graph = ax.plot(lambda x: x * 10, x_range=[0.1, 10], color=WARN, stroke_width=3)
        std_label = Text("Standard attention: O(t) per step", font_size=18, color=WARN)
        std_label.next_to(ax, RIGHT, buff=0.3).shift(UP * 1)

        self.play(Create(std_graph), Write(std_label), run_time=1.5)
        self.wait(1)

        # Log cost
        log_graph = ax.plot(lambda x: np.log2(x + 1) * 8, x_range=[0.1, 10], color=ACCENT3, stroke_width=3)
        log_label = Text("Hull attention: O(log t) per step", font_size=18, color=ACCENT3)
        log_label.next_to(std_label, DOWN, buff=0.4)

        self.play(Create(log_graph), Write(log_label), run_time=1.5)

        gap_text = Text(
            "At 1 million steps:\nstandard = 1,000,000 ops/step\nhull = ~20 ops/step",
            font_size=18, color=GOLD, line_spacing=1.3,
        ).next_to(ax, DOWN, buff=0.7)
        self.play(Write(gap_text))
        self.wait(3)
        self.play(*[FadeOut(m) for m in self.mobjects])


class Scene7_2DHeads(Scene):
    """The key unlock: 2D attention heads and convex hull queries."""

    def construct(self):
        self.camera.background_color = BG

        title = Text("The Key Unlock: 2D Attention Heads", font_size=34, color=ACCENT, weight=BOLD)
        title.to_edge(UP, buff=0.4)
        self.play(Write(title))

        # Explain head dimension
        expl = Text(
            "Restrict each attention head to dimension 2\n→ keys & queries become 2D points",
            font_size=22, color=TEXT_COL, line_spacing=1.3,
        ).next_to(title, DOWN, buff=0.5)
        self.play(Write(expl))
        self.wait(1)

        # 2D scatter of keys
        np.random.seed(42)
        n_pts = 20
        pts = np.random.randn(n_pts, 2) * 1.2
        plane = NumberPlane(
            x_range=[-3, 3, 1], y_range=[-3, 3, 1],
            x_length=5, y_length=4,
            background_line_style={"stroke_color": DIM, "stroke_width": 0.5},
            axis_config={"stroke_color": DIM},
        ).shift(DOWN * 0.7)

        dots = VGroup()
        for pt in pts:
            d = Dot(plane.c2p(pt[0], pt[1]), radius=0.06, color=ACCENT)
            dots.add(d)

        key_label = Text("Keys (past tokens)", font_size=16, color=ACCENT).next_to(plane, LEFT, buff=0.3)
        self.play(Create(plane), Write(key_label))
        self.play(*[FadeIn(d, scale=0.5) for d in dots], run_time=1)
        self.wait(0.5)

        # Convex hull
        from scipy.spatial import ConvexHull  # noqa: E402

        hull = ConvexHull(pts)
        hull_pts = [plane.c2p(pts[i][0], pts[i][1]) for i in hull.vertices]
        hull_pts.append(hull_pts[0])
        hull_poly = Polygon(*hull_pts, color=GOLD, stroke_width=2, fill_opacity=0.08, fill_color=GOLD)

        hull_label = Text("Convex Hull", font_size=18, color=GOLD).next_to(plane, RIGHT, buff=0.3).shift(UP)
        self.play(Create(hull_poly), Write(hull_label))
        self.wait(0.5)

        # Query point
        q_pt = plane.c2p(1.5, 0.5)
        q_dot = Dot(q_pt, radius=0.1, color=WARN)
        q_label = Text("Query", font_size=16, color=WARN).next_to(q_dot, UR, buff=0.1)
        self.play(FadeIn(q_dot, scale=2), Write(q_label))

        insight = Text(
            "Finding the best-matching key = a geometric query on the hull\n"
            "→ binary search in O(log t) instead of scanning all t keys",
            font_size=18, color=ACCENT3, line_spacing=1.3,
        ).next_to(plane, DOWN, buff=0.5)
        self.play(Write(insight))
        self.wait(3)
        self.play(*[FadeOut(m) for m in self.mobjects])


class Scene8_Results(Scene):
    """Performance results and what this means."""

    def construct(self):
        self.camera.background_color = BG

        title = Text("Results: What This Enables", font_size=36, color=ACCENT, weight=BOLD)
        title.to_edge(UP, buff=0.5)
        self.play(Write(title))

        results = VGroup(
            self._result_card("30,000+ tok/s", "Execution speed on CPU", ACCENT3),
            self._result_card("Millions of steps", "Correct execution length", GOLD),
            self._result_card("100% accuracy", "On Sudoku benchmarks\n(incl. world's hardest)", ACCENT),
        ).arrange(RIGHT, buff=0.6).next_to(title, DOWN, buff=0.8)

        for card in results:
            self.play(FadeIn(card, shift=UP * 0.3), run_time=0.7)
            self.wait(0.5)

        # Sudoku callout
        sudoku_note = Text(
            "Solves Arto Inkala's Sudoku (\"world's hardest\")\nin under 3 minutes — fully inside the transformer",
            font_size=20, color=TEXT_COL, line_spacing=1.3,
        ).next_to(results, DOWN, buff=0.8)
        self.play(Write(sudoku_note))
        self.wait(2.5)
        self.play(*[FadeOut(m) for m in self.mobjects])

    def _result_card(self, big_text, small_text, color):
        box = RoundedRectangle(corner_radius=0.15, width=3.5, height=2.5, color=color, stroke_width=2)
        big = Text(big_text, font_size=28, color=color, weight=BOLD)
        small = Text(small_text, font_size=16, color=TEXT_COL, line_spacing=1.2)
        grp = VGroup(big, small).arrange(DOWN, buff=0.3)
        return VGroup(box, grp)


class Scene9_Recap(Scene):
    """Final recap and takeaway."""

    def construct(self):
        self.camera.background_color = BG

        title = Text("Recap", font_size=40, color=ACCENT, weight=BOLD)
        title.to_edge(UP, buff=0.5)
        self.play(Write(title))

        steps = [
            ("1", "LLMs struggle with long, exact computation", TEXT_COL),
            ("2", "Today we bolt on external tools (like airplanes for humans)", DIM),
            ("3", "This team built a real computer inside a transformer", ACCENT3),
            ("4", "C code → WebAssembly tokens → executed by the model itself", GOLD),
            ("5", "2D attention heads enable O(log t) lookups (exponentially faster)", ACCENT),
            ("6", "Result: millions of correct steps, 30k+ tokens/sec, 100% Sudoku", ACCENT3),
        ]

        grp = VGroup()
        for num, text, color in steps:
            num_mob = Text(num, font_size=24, color=ACCENT2, weight=BOLD)
            text_mob = Text(text, font_size=21, color=color)
            row = VGroup(num_mob, text_mob).arrange(RIGHT, buff=0.3)
            grp.add(row)
        grp.arrange(DOWN, aligned_edge=LEFT, buff=0.35).next_to(title, DOWN, buff=0.6)

        for row in grp:
            self.play(FadeIn(row, shift=RIGHT * 0.3), run_time=0.6)
            self.wait(0.3)

        self.wait(1)

        takeaway = Text(
            "The model stops being a coordinator of computation\nand becomes a computer itself.",
            font_size=24, color=GOLD, weight=BOLD, line_spacing=1.3,
        ).next_to(grp, DOWN, buff=0.7)
        self.play(Write(takeaway, run_time=2))
        self.wait(3)
        self.play(*[FadeOut(m) for m in self.mobjects])
Interactive explainer and manim visualization of Percepta's 'Can LLMs Be Computers?' - Interactive web app (interactive/) explaining how transformer weights execute deterministic WASM programs: softmax sharpening, 2D parabola trick for exact memory lookup, stack machine step-through, and full execution trace visualization - Manim animation script (manim_project/scene.py) with 9 scenes covering the article's key concepts Co-authored-by: Ona <no-reply@ona.com> 2026-03-22 21:16:16 +00:00			`from manim import *`
			`import numpy as np`

			`# Color palette`
			`BG = "#0f0f1a"`
			`ACCENT = "#4fc3f7"`
			`ACCENT2 = "#ab47bc"`
			`ACCENT3 = "#66bb6a"`
			`WARN = "#ff7043"`
			`TEXT_COL = "#e0e0e0"`
			`DIM = "#666677"`
			`GOLD = "#ffd54f"`


			`class Scene1_Intro(Scene):`
			`"""The big question: Can LLMs be computers?"""`

			`def construct(self):`
			`self.camera.background_color = BG`

			`title = Text("Can LLMs Be Computers?", font_size=56, color=ACCENT, weight=BOLD)`
			`sub = Text(`
			`"Executing programs inside transformers",`
			`font_size=28, color=DIM,`
			`).next_to(title, DOWN, buff=0.4)`
			`self.play(Write(title, run_time=1.5))`
			`self.play(FadeIn(sub, shift=UP * 0.2))`
			`self.wait(1.5)`

			`# Fade out title`
			`self.play(FadeOut(title), FadeOut(sub))`

			`# --- What LLMs are good at vs bad at ---`
			`good_title = Text("LLMs are great at…", font_size=32, color=ACCENT3)`
			`good_title.to_edge(UP, buff=0.6)`
			`good_items = VGroup(`
			`Text("✓ Solving hard math (IMO gold-medal level)", font_size=24, color=TEXT_COL),`
			`Text("✓ Writing code & reasoning about algorithms", font_size=24, color=TEXT_COL),`
			`Text("✓ Understanding natural language", font_size=24, color=TEXT_COL),`
			`).arrange(DOWN, aligned_edge=LEFT, buff=0.25).next_to(good_title, DOWN, buff=0.4)`

			`self.play(Write(good_title))`
			`for item in good_items:`
			`self.play(FadeIn(item, shift=RIGHT * 0.3), run_time=0.5)`
			`self.wait(1)`

			`bad_title = Text("…but terrible at simple computation", font_size=32, color=WARN)`
			`bad_title.next_to(good_items, DOWN, buff=0.6)`
			`bad_items = VGroup(`
			`Text("✗ Multiplying two numbers reliably", font_size=24, color=WARN),`
			`Text("✗ Solving even easy Sudoku puzzles", font_size=24, color=WARN),`
			`Text("✗ Any task needing many exact steps", font_size=24, color=WARN),`
			`).arrange(DOWN, aligned_edge=LEFT, buff=0.25).next_to(bad_title, DOWN, buff=0.4)`

			`self.play(Write(bad_title))`
			`for item in bad_items:`
			`self.play(FadeIn(item, shift=RIGHT * 0.3), run_time=0.5)`
			`self.wait(2)`
			`self.play(*[FadeOut(m) for m in self.mobjects])`


			`class Scene2_ToolUse(Scene):`
			`"""How LLMs currently handle computation: tool use (the airplane analogy)."""`

			`def construct(self):`
			`self.camera.background_color = BG`

			`# --- Airplane analogy ---`
			`analogy_title = Text("The Airplane Analogy", font_size=36, color=ACCENT, weight=BOLD)`
			`analogy_title.to_edge(UP, buff=0.5)`
			`self.play(Write(analogy_title))`

			`human_label = Text("Human", font_size=28, color=TEXT_COL)`
			`human_box = RoundedRectangle(`
			`corner_radius=0.15, width=2.2, height=1.2, color=ACCENT`
			`)`
			`human_grp = VGroup(human_box, human_label).move_to(LEFT * 3)`

			`plane_label = Text("Airplane", font_size=28, color=TEXT_COL)`
			`plane_box = RoundedRectangle(`
			`corner_radius=0.15, width=2.2, height=1.2, color=ACCENT3`
			`)`
			`plane_grp = VGroup(plane_box, plane_label).move_to(RIGHT * 3)`

			`arrow = Arrow(human_box.get_right(), plane_box.get_left(), color=DIM, buff=0.2)`
			`arrow_label = Text("delegates flying", font_size=20, color=DIM).next_to(arrow, UP, buff=0.15)`

			`cant = Text("Humans can't fly.", font_size=24, color=WARN).next_to(`
			`VGroup(human_grp, plane_grp), DOWN, buff=0.8`
			`)`
			`but = Text("Building airplanes doesn't change that.", font_size=24, color=WARN).next_to(`
			`cant, DOWN, buff=0.25`
			`)`

			`self.play(FadeIn(human_grp), FadeIn(plane_grp))`
			`self.play(GrowArrow(arrow), FadeIn(arrow_label))`
			`self.wait(0.5)`
			`self.play(Write(cant))`
			`self.play(Write(but))`
			`self.wait(2)`
			`self.play(*[FadeOut(m) for m in self.mobjects if m != analogy_title])`
			`self.play(FadeOut(analogy_title))`

			`# --- Tool use diagram ---`
			`tool_title = Text("Today: LLMs Use External Tools", font_size=34, color=ACCENT, weight=BOLD)`
			`tool_title.to_edge(UP, buff=0.5)`
			`self.play(Write(tool_title))`

			`llm_box = RoundedRectangle(corner_radius=0.2, width=2.5, height=1.4, color=ACCENT)`
			`llm_label = Text("LLM", font_size=30, color=ACCENT, weight=BOLD)`
			`llm_grp = VGroup(llm_box, llm_label).move_to(LEFT * 3.5)`

			`interp_box = RoundedRectangle(corner_radius=0.2, width=3, height=1.4, color=ACCENT3)`
			`interp_label = Text("Interpreter", font_size=26, color=ACCENT3)`
			`interp_grp = VGroup(interp_box, interp_label).move_to(RIGHT * 3)`

			`a1 = Arrow(llm_box.get_right(), interp_box.get_left(), color=GOLD, buff=0.2).shift(UP * 0.2)`
			`a1_label = Text("sends code", font_size=18, color=GOLD).next_to(a1, UP, buff=0.1)`
			`a2 = Arrow(interp_box.get_left(), llm_box.get_right(), color=ACCENT3, buff=0.2).shift(DOWN * 0.2)`
			`a2_label = Text("returns result", font_size=18, color=ACCENT3).next_to(a2, DOWN, buff=0.1)`

			`note = Text(`
			`"The LLM describes computation\nbut never executes it.",`
			`font_size=22, color=WARN, line_spacing=1.3,`
			`).next_to(VGroup(llm_grp, interp_grp), DOWN, buff=0.9)`

			`self.play(FadeIn(llm_grp), FadeIn(interp_grp))`
			`self.play(GrowArrow(a1), FadeIn(a1_label))`
			`self.play(GrowArrow(a2), FadeIn(a2_label))`
			`self.wait(0.5)`
			`self.play(Write(note))`
			`self.wait(2.5)`
			`self.play(*[FadeOut(m) for m in self.mobjects])`


			`class Scene3_KeyIdea(Scene):`
			`"""The breakthrough: building a computer INSIDE the transformer."""`

			`def construct(self):`
			`self.camera.background_color = BG`

			`title = Text("The Breakthrough", font_size=40, color=GOLD, weight=BOLD)`
			`title.to_edge(UP, buff=0.5)`
			`self.play(Write(title))`

			`idea = Text(`
			`"Build a computer INSIDE the transformer.",`
			`font_size=30, color=TEXT_COL,`
			`).next_to(title, DOWN, buff=0.6)`
			`self.play(Write(idea))`
			`self.wait(1)`

			`# Big transformer box`
			`tf_box = RoundedRectangle(`
			`corner_radius=0.25, width=9, height=4.5, color=ACCENT, stroke_width=3`
			`).shift(DOWN * 0.5)`
			`tf_label = Text("Transformer", font_size=22, color=ACCENT).next_to(tf_box, UP, buff=0.15)`

			`# CPU inside`
			`cpu_box = RoundedRectangle(`
			`corner_radius=0.15, width=3, height=2, color=GOLD, stroke_width=2`
			`).move_to(tf_box.get_center() + LEFT * 2.2)`
			`cpu_label = Text("Virtual CPU\n(WebAssembly)", font_size=20, color=GOLD, line_spacing=1.2).move_to(cpu_box)`

			`# Memory inside`
			`mem_box = RoundedRectangle(`
			`corner_radius=0.15, width=2.5, height=2, color=ACCENT3, stroke_width=2`
			`).move_to(tf_box.get_center() + RIGHT * 2)`
			`mem_label = Text("Memory\n& Stack", font_size=20, color=ACCENT3, line_spacing=1.2).move_to(mem_box)`

			`conn = Arrow(cpu_box.get_right(), mem_box.get_left(), color=DIM, buff=0.15)`

			`self.play(Create(tf_box), Write(tf_label))`
			`self.play(Create(cpu_box), Write(cpu_label))`
			`self.play(Create(mem_box), Write(mem_label))`
			`self.play(GrowArrow(conn))`

			`result = Text(`
			`"Arbitrary C programs → tokens → executed by the model itself",`
			`font_size=22, color=ACCENT,`
			`).next_to(tf_box, DOWN, buff=0.5)`
			`self.play(Write(result))`
			`self.wait(2.5)`
			`self.play(*[FadeOut(m) for m in self.mobjects])`


			`class Scene4_ToolVsInModel(Scene):`
			`"""Side-by-side: tool use vs in-model execution for 3+5."""`

			`def construct(self):`
			`self.camera.background_color = BG`

			`title = Text("Tool Use vs In-Model Execution", font_size=34, color=ACCENT, weight=BOLD)`
			`title.to_edge(UP, buff=0.4)`
			`self.play(Write(title))`

			`divider = DashedLine(UP * 2.5, DOWN * 2.5, color=DIM, dash_length=0.15).shift(DOWN * 0.3)`
			`self.play(Create(divider))`

			`# LEFT: tool use`
			`left_title = Text("Tool Use", font_size=26, color=WARN, weight=BOLD).move_to(LEFT * 3.5 + UP * 2)`
			`self.play(Write(left_title))`

			`left_steps = [`
			`("LLM:", 'print(3+5)', ACCENT, GOLD),`
			`("→ send to interpreter", "", DIM, DIM),`
			`("← result: 8", "", ACCENT3, ACCENT3),`
			`]`
			`left_grp = VGroup()`
			`y = 1.0`
			`for label_text, code_text, lc, cc in left_steps:`
			`row = VGroup()`
			`lab = Text(label_text, font_size=20, color=lc).move_to(LEFT * 5 + UP * y)`
			`lab.align_to(LEFT * 5.5, LEFT)`
			`row.add(lab)`
			`if code_text:`
			`cd = Text(code_text, font_size=18, color=cc, font="Monospace").next_to(lab, RIGHT, buff=0.2)`
			`row.add(cd)`
			`left_grp.add(row)`
			`y -= 0.7`

			`opaque = Text("Execution is opaque\n(black box)", font_size=18, color=WARN, line_spacing=1.2)`
			`opaque.move_to(LEFT * 3.5 + DOWN * 1.5)`

			`# RIGHT: in-model`
			`right_title = Text("In-Model", font_size=26, color=ACCENT3, weight=BOLD).move_to(RIGHT * 3.5 + UP * 2)`
			`self.play(Write(right_title))`

			`right_lines = [`
			`"i32.const 03",`
			`"i32.const 05",`
			`"i32.add → 08",`
			`"output(08)",`
			`"halt",`
			`]`
			`right_grp = VGroup()`
			`y = 1.0`
			`for line in right_lines:`
			`t = Text(line, font_size=18, color=ACCENT3, font="Monospace").move_to(RIGHT * 3.5 + UP * y)`
			`right_grp.add(t)`
			`y -= 0.55`

			`transparent = Text("Every step visible\nin the token stream", font_size=18, color=ACCENT3, line_spacing=1.2)`
			`transparent.move_to(RIGHT * 3.5 + DOWN * 2)`

			`for row in left_grp:`
			`self.play(FadeIn(row, shift=RIGHT * 0.2), run_time=0.5)`
			`self.play(Write(opaque))`
			`self.wait(0.5)`

			`for t in right_grp:`
			`self.play(FadeIn(t, shift=LEFT * 0.2), run_time=0.4)`
			`self.play(Write(transparent))`
			`self.wait(2.5)`
			`self.play(*[FadeOut(m) for m in self.mobjects])`


			`class Scene5_AppendOnlyTrace(Scene):`
			`"""Computation as an append-only trace — the notebook analogy."""`

			`def construct(self):`
			`self.camera.background_color = BG`

			`title = Text("How It Works: The Append-Only Notebook", font_size=32, color=ACCENT, weight=BOLD)`
			`title.to_edge(UP, buff=0.4)`
			`self.play(Write(title))`

			`# Notebook visual`
			`nb_rect = Rectangle(width=7, height=4.5, color=DIM, stroke_width=1.5).shift(DOWN * 0.4)`
			`nb_label = Text("Notebook (token stream)", font_size=18, color=DIM).next_to(nb_rect, UP, buff=0.1)`
			`self.play(Create(nb_rect), Write(nb_label))`

			`# Lines`
			`prompt_words = ["the", "cat", "runs", "and", "dog", "jumps", "over"]`
			`colors = [DIM, DIM, ACCENT3, DIM, DIM, ACCENT3, DIM]`
			`prompt_label = Text("PROMPT (input words):", font_size=18, color=ACCENT).move_to(`
			`nb_rect.get_top() + DOWN * 0.4`
			`)`
			`self.play(Write(prompt_label))`

			`word_mobs = VGroup()`
			`for i, (w, c) in enumerate(zip(prompt_words, colors)):`
			`t = Text(w, font_size=22, color=c, font="Monospace")`
			`t.move_to(nb_rect.get_top() + DOWN * 0.8 + RIGHT * (i - 3) * 1.0)`
			`word_mobs.add(t)`
			`self.play(*[FadeIn(w) for w in word_mobs], run_time=0.8)`
			`self.wait(0.5)`

			`# Trace section`
			`trace_label = Text("EXECUTION TRACE (generated tokens):", font_size=18, color=GOLD).move_to(`
			`nb_rect.get_center() + UP * 0.3`
			`)`
			`self.play(Write(trace_label))`

			`# Show parity counting step by step`
			`trace_vals = ["odd=0", "odd=0", "odd=1", "odd=1", "odd=1", "odd=0", "odd=0"]`
			`trace_mobs = VGroup()`
			`for i, val in enumerate(trace_vals):`
			`t = Text(val, font_size=20, color=GOLD, font="Monospace")`
			`t.move_to(nb_rect.get_center() + DOWN * 0.3 + RIGHT * (i - 3) * 1.0)`
			`trace_mobs.add(t)`

			`rule = Text(`
			`"Each new token looks back at (1) the input word and (2) the previous trace token",`
			`font_size=17, color=TEXT_COL,`
			`).move_to(nb_rect.get_bottom() + DOWN * 0.05)`

			`for i, tm in enumerate(trace_mobs):`
			`anims = [FadeIn(tm)]`
			`# Draw arrows from word and previous trace`
			`arr1 = Arrow(`
			`word_mobs[i].get_bottom(), tm.get_top(),`
			`color=ACCENT, stroke_width=1.5, buff=0.08, max_tip_length_to_length_ratio=0.15,`
			`)`
			`anims.append(GrowArrow(arr1))`
			`if i > 0:`
			`arr2 = Arrow(`
			`trace_mobs[i - 1].get_right(), tm.get_left(),`
			`color=GOLD, stroke_width=1.5, buff=0.08, max_tip_length_to_length_ratio=0.15,`
			`)`
			`anims.append(GrowArrow(arr2))`
			`self.play(*anims, run_time=0.6)`

			`self.play(Write(rule))`

			`key_insight = Text(`
			`"Key: only 2 lookbacks per step — cost doesn't grow with length!",`
			`font_size=20, color=ACCENT3, weight=BOLD,`
			`).next_to(nb_rect, DOWN, buff=0.6)`
			`self.play(Write(key_insight))`
			`self.wait(2.5)`
			`self.play(*[FadeOut(m) for m in self.mobjects])`


			`class Scene6_QuadraticProblem(Scene):`
			`"""The quadratic cost problem of standard attention."""`

			`def construct(self):`
			`self.camera.background_color = BG`

			`title = Text("The Problem: Attention Cost Grows", font_size=34, color=ACCENT, weight=BOLD)`
			`title.to_edge(UP, buff=0.5)`
			`self.play(Write(title))`

			`# Axes`
			`ax = Axes(`
			`x_range=[0, 10, 2], y_range=[0, 100, 20],`
			`x_length=5, y_length=3.5,`
			`axis_config={"color": DIM, "include_numbers": False},`
			`).shift(DOWN * 0.5)`
			`x_lab = Text("tokens generated (t)", font_size=18, color=DIM).next_to(ax.x_axis, DOWN, buff=0.3)`
			`y_lab = Text("work per step", font_size=18, color=DIM).next_to(ax.y_axis, LEFT, buff=0.3).rotate(PI / 2)`

			`self.play(Create(ax), Write(x_lab), Write(y_lab))`

			`# Standard: linear cost per step → quadratic total`
			`std_graph = ax.plot(lambda x: x * 10, x_range=[0.1, 10], color=WARN, stroke_width=3)`
			`std_label = Text("Standard attention: O(t) per step", font_size=18, color=WARN)`
			`std_label.next_to(ax, RIGHT, buff=0.3).shift(UP * 1)`

			`self.play(Create(std_graph), Write(std_label), run_time=1.5)`
			`self.wait(1)`

			`# Log cost`
			`log_graph = ax.plot(lambda x: np.log2(x + 1) * 8, x_range=[0.1, 10], color=ACCENT3, stroke_width=3)`
			`log_label = Text("Hull attention: O(log t) per step", font_size=18, color=ACCENT3)`
			`log_label.next_to(std_label, DOWN, buff=0.4)`

			`self.play(Create(log_graph), Write(log_label), run_time=1.5)`

			`gap_text = Text(`
			`"At 1 million steps:\nstandard = 1,000,000 ops/step\nhull = ~20 ops/step",`
			`font_size=18, color=GOLD, line_spacing=1.3,`
			`).next_to(ax, DOWN, buff=0.7)`
			`self.play(Write(gap_text))`
			`self.wait(3)`
			`self.play(*[FadeOut(m) for m in self.mobjects])`


			`class Scene7_2DHeads(Scene):`
			`"""The key unlock: 2D attention heads and convex hull queries."""`

			`def construct(self):`
			`self.camera.background_color = BG`

			`title = Text("The Key Unlock: 2D Attention Heads", font_size=34, color=ACCENT, weight=BOLD)`
			`title.to_edge(UP, buff=0.4)`
			`self.play(Write(title))`

			`# Explain head dimension`
			`expl = Text(`
			`"Restrict each attention head to dimension 2\n→ keys & queries become 2D points",`
			`font_size=22, color=TEXT_COL, line_spacing=1.3,`
			`).next_to(title, DOWN, buff=0.5)`
			`self.play(Write(expl))`
			`self.wait(1)`

			`# 2D scatter of keys`
			`np.random.seed(42)`
			`n_pts = 20`
			`pts = np.random.randn(n_pts, 2) * 1.2`
			`plane = NumberPlane(`
			`x_range=[-3, 3, 1], y_range=[-3, 3, 1],`
			`x_length=5, y_length=4,`
			`background_line_style={"stroke_color": DIM, "stroke_width": 0.5},`
			`axis_config={"stroke_color": DIM},`
			`).shift(DOWN * 0.7)`

			`dots = VGroup()`
			`for pt in pts:`
			`d = Dot(plane.c2p(pt[0], pt[1]), radius=0.06, color=ACCENT)`
			`dots.add(d)`

			`key_label = Text("Keys (past tokens)", font_size=16, color=ACCENT).next_to(plane, LEFT, buff=0.3)`
			`self.play(Create(plane), Write(key_label))`
			`self.play(*[FadeIn(d, scale=0.5) for d in dots], run_time=1)`
			`self.wait(0.5)`

			`# Convex hull`
			`from scipy.spatial import ConvexHull # noqa: E402`

			`hull = ConvexHull(pts)`
			`hull_pts = [plane.c2p(pts[i][0], pts[i][1]) for i in hull.vertices]`
			`hull_pts.append(hull_pts[0])`
			`hull_poly = Polygon(*hull_pts, color=GOLD, stroke_width=2, fill_opacity=0.08, fill_color=GOLD)`

			`hull_label = Text("Convex Hull", font_size=18, color=GOLD).next_to(plane, RIGHT, buff=0.3).shift(UP)`
			`self.play(Create(hull_poly), Write(hull_label))`
			`self.wait(0.5)`

			`# Query point`
			`q_pt = plane.c2p(1.5, 0.5)`
			`q_dot = Dot(q_pt, radius=0.1, color=WARN)`
			`q_label = Text("Query", font_size=16, color=WARN).next_to(q_dot, UR, buff=0.1)`
			`self.play(FadeIn(q_dot, scale=2), Write(q_label))`

			`insight = Text(`
			`"Finding the best-matching key = a geometric query on the hull\n"`
			`"→ binary search in O(log t) instead of scanning all t keys",`
			`font_size=18, color=ACCENT3, line_spacing=1.3,`
			`).next_to(plane, DOWN, buff=0.5)`
			`self.play(Write(insight))`
			`self.wait(3)`
			`self.play(*[FadeOut(m) for m in self.mobjects])`


			`class Scene8_Results(Scene):`
			`"""Performance results and what this means."""`

			`def construct(self):`
			`self.camera.background_color = BG`

			`title = Text("Results: What This Enables", font_size=36, color=ACCENT, weight=BOLD)`
			`title.to_edge(UP, buff=0.5)`
			`self.play(Write(title))`

			`results = VGroup(`
			`self._result_card("30,000+ tok/s", "Execution speed on CPU", ACCENT3),`
			`self._result_card("Millions of steps", "Correct execution length", GOLD),`
			`self._result_card("100% accuracy", "On Sudoku benchmarks\n(incl. world's hardest)", ACCENT),`
			`).arrange(RIGHT, buff=0.6).next_to(title, DOWN, buff=0.8)`

			`for card in results:`
			`self.play(FadeIn(card, shift=UP * 0.3), run_time=0.7)`
			`self.wait(0.5)`

			`# Sudoku callout`
			`sudoku_note = Text(`
			`"Solves Arto Inkala's Sudoku (\"world's hardest\")\nin under 3 minutes — fully inside the transformer",`
			`font_size=20, color=TEXT_COL, line_spacing=1.3,`
			`).next_to(results, DOWN, buff=0.8)`
			`self.play(Write(sudoku_note))`
			`self.wait(2.5)`
			`self.play(*[FadeOut(m) for m in self.mobjects])`

			`def _result_card(self, big_text, small_text, color):`
			`box = RoundedRectangle(corner_radius=0.15, width=3.5, height=2.5, color=color, stroke_width=2)`
			`big = Text(big_text, font_size=28, color=color, weight=BOLD)`
			`small = Text(small_text, font_size=16, color=TEXT_COL, line_spacing=1.2)`
			`grp = VGroup(big, small).arrange(DOWN, buff=0.3)`
			`return VGroup(box, grp)`


			`class Scene9_Recap(Scene):`
			`"""Final recap and takeaway."""`

			`def construct(self):`
			`self.camera.background_color = BG`

			`title = Text("Recap", font_size=40, color=ACCENT, weight=BOLD)`
			`title.to_edge(UP, buff=0.5)`
			`self.play(Write(title))`

			`steps = [`
			`("1", "LLMs struggle with long, exact computation", TEXT_COL),`
			`("2", "Today we bolt on external tools (like airplanes for humans)", DIM),`
			`("3", "This team built a real computer inside a transformer", ACCENT3),`
			`("4", "C code → WebAssembly tokens → executed by the model itself", GOLD),`
			`("5", "2D attention heads enable O(log t) lookups (exponentially faster)", ACCENT),`
			`("6", "Result: millions of correct steps, 30k+ tokens/sec, 100% Sudoku", ACCENT3),`
			`]`

			`grp = VGroup()`
			`for num, text, color in steps:`
			`num_mob = Text(num, font_size=24, color=ACCENT2, weight=BOLD)`
			`text_mob = Text(text, font_size=21, color=color)`
			`row = VGroup(num_mob, text_mob).arrange(RIGHT, buff=0.3)`
			`grp.add(row)`
			`grp.arrange(DOWN, aligned_edge=LEFT, buff=0.35).next_to(title, DOWN, buff=0.6)`

			`for row in grp:`
			`self.play(FadeIn(row, shift=RIGHT * 0.3), run_time=0.6)`
			`self.wait(0.3)`

			`self.wait(1)`

			`takeaway = Text(`
			`"The model stops being a coordinator of computation\nand becomes a computer itself.",`
			`font_size=24, color=GOLD, weight=BOLD, line_spacing=1.3,`
			`).next_to(grp, DOWN, buff=0.7)`
			`self.play(Write(takeaway, run_time=2))`
			`self.wait(3)`
			`self.play(*[FadeOut(m) for m in self.mobjects])`