from manim import * import numpy as np # Color palette BG = "#0f0f1a" ACCENT = "#4fc3f7" ACCENT2 = "#ab47bc" ACCENT3 = "#66bb6a" WARN = "#ff7043" TEXT_COL = "#e0e0e0" DIM = "#666677" GOLD = "#ffd54f" class Scene1_Intro(Scene): """The big question: Can LLMs be computers?""" def construct(self): self.camera.background_color = BG title = Text("Can LLMs Be Computers?", font_size=56, color=ACCENT, weight=BOLD) sub = Text( "Executing programs inside transformers", font_size=28, color=DIM, ).next_to(title, DOWN, buff=0.4) self.play(Write(title, run_time=1.5)) self.play(FadeIn(sub, shift=UP * 0.2)) self.wait(1.5) # Fade out title self.play(FadeOut(title), FadeOut(sub)) # --- What LLMs are good at vs bad at --- good_title = Text("LLMs are great at…", font_size=32, color=ACCENT3) good_title.to_edge(UP, buff=0.6) good_items = VGroup( Text("✓ Solving hard math (IMO gold-medal level)", font_size=24, color=TEXT_COL), Text("✓ Writing code & reasoning about algorithms", font_size=24, color=TEXT_COL), Text("✓ Understanding natural language", font_size=24, color=TEXT_COL), ).arrange(DOWN, aligned_edge=LEFT, buff=0.25).next_to(good_title, DOWN, buff=0.4) self.play(Write(good_title)) for item in good_items: self.play(FadeIn(item, shift=RIGHT * 0.3), run_time=0.5) self.wait(1) bad_title = Text("…but terrible at simple computation", font_size=32, color=WARN) bad_title.next_to(good_items, DOWN, buff=0.6) bad_items = VGroup( Text("✗ Multiplying two numbers reliably", font_size=24, color=WARN), Text("✗ Solving even easy Sudoku puzzles", font_size=24, color=WARN), Text("✗ Any task needing many exact steps", font_size=24, color=WARN), ).arrange(DOWN, aligned_edge=LEFT, buff=0.25).next_to(bad_title, DOWN, buff=0.4) self.play(Write(bad_title)) for item in bad_items: self.play(FadeIn(item, shift=RIGHT * 0.3), run_time=0.5) self.wait(2) self.play(*[FadeOut(m) for m in self.mobjects]) class Scene2_ToolUse(Scene): """How LLMs currently handle computation: tool use (the airplane analogy).""" def construct(self): self.camera.background_color = BG # --- Airplane analogy --- analogy_title = Text("The Airplane Analogy", font_size=36, color=ACCENT, weight=BOLD) analogy_title.to_edge(UP, buff=0.5) self.play(Write(analogy_title)) human_label = Text("Human", font_size=28, color=TEXT_COL) human_box = RoundedRectangle( corner_radius=0.15, width=2.2, height=1.2, color=ACCENT ) human_grp = VGroup(human_box, human_label).move_to(LEFT * 3) plane_label = Text("Airplane", font_size=28, color=TEXT_COL) plane_box = RoundedRectangle( corner_radius=0.15, width=2.2, height=1.2, color=ACCENT3 ) plane_grp = VGroup(plane_box, plane_label).move_to(RIGHT * 3) arrow = Arrow(human_box.get_right(), plane_box.get_left(), color=DIM, buff=0.2) arrow_label = Text("delegates flying", font_size=20, color=DIM).next_to(arrow, UP, buff=0.15) cant = Text("Humans can't fly.", font_size=24, color=WARN).next_to( VGroup(human_grp, plane_grp), DOWN, buff=0.8 ) but = Text("Building airplanes doesn't change that.", font_size=24, color=WARN).next_to( cant, DOWN, buff=0.25 ) self.play(FadeIn(human_grp), FadeIn(plane_grp)) self.play(GrowArrow(arrow), FadeIn(arrow_label)) self.wait(0.5) self.play(Write(cant)) self.play(Write(but)) self.wait(2) self.play(*[FadeOut(m) for m in self.mobjects if m != analogy_title]) self.play(FadeOut(analogy_title)) # --- Tool use diagram --- tool_title = Text("Today: LLMs Use External Tools", font_size=34, color=ACCENT, weight=BOLD) tool_title.to_edge(UP, buff=0.5) self.play(Write(tool_title)) llm_box = RoundedRectangle(corner_radius=0.2, width=2.5, height=1.4, color=ACCENT) llm_label = Text("LLM", font_size=30, color=ACCENT, weight=BOLD) llm_grp = VGroup(llm_box, llm_label).move_to(LEFT * 3.5) interp_box = RoundedRectangle(corner_radius=0.2, width=3, height=1.4, color=ACCENT3) interp_label = Text("Interpreter", font_size=26, color=ACCENT3) interp_grp = VGroup(interp_box, interp_label).move_to(RIGHT * 3) a1 = Arrow(llm_box.get_right(), interp_box.get_left(), color=GOLD, buff=0.2).shift(UP * 0.2) a1_label = Text("sends code", font_size=18, color=GOLD).next_to(a1, UP, buff=0.1) a2 = Arrow(interp_box.get_left(), llm_box.get_right(), color=ACCENT3, buff=0.2).shift(DOWN * 0.2) a2_label = Text("returns result", font_size=18, color=ACCENT3).next_to(a2, DOWN, buff=0.1) note = Text( "The LLM describes computation\nbut never executes it.", font_size=22, color=WARN, line_spacing=1.3, ).next_to(VGroup(llm_grp, interp_grp), DOWN, buff=0.9) self.play(FadeIn(llm_grp), FadeIn(interp_grp)) self.play(GrowArrow(a1), FadeIn(a1_label)) self.play(GrowArrow(a2), FadeIn(a2_label)) self.wait(0.5) self.play(Write(note)) self.wait(2.5) self.play(*[FadeOut(m) for m in self.mobjects]) class Scene3_KeyIdea(Scene): """The breakthrough: building a computer INSIDE the transformer.""" def construct(self): self.camera.background_color = BG title = Text("The Breakthrough", font_size=40, color=GOLD, weight=BOLD) title.to_edge(UP, buff=0.5) self.play(Write(title)) idea = Text( "Build a computer INSIDE the transformer.", font_size=30, color=TEXT_COL, ).next_to(title, DOWN, buff=0.6) self.play(Write(idea)) self.wait(1) # Big transformer box tf_box = RoundedRectangle( corner_radius=0.25, width=9, height=4.5, color=ACCENT, stroke_width=3 ).shift(DOWN * 0.5) tf_label = Text("Transformer", font_size=22, color=ACCENT).next_to(tf_box, UP, buff=0.15) # CPU inside cpu_box = RoundedRectangle( corner_radius=0.15, width=3, height=2, color=GOLD, stroke_width=2 ).move_to(tf_box.get_center() + LEFT * 2.2) cpu_label = Text("Virtual CPU\n(WebAssembly)", font_size=20, color=GOLD, line_spacing=1.2).move_to(cpu_box) # Memory inside mem_box = RoundedRectangle( corner_radius=0.15, width=2.5, height=2, color=ACCENT3, stroke_width=2 ).move_to(tf_box.get_center() + RIGHT * 2) mem_label = Text("Memory\n& Stack", font_size=20, color=ACCENT3, line_spacing=1.2).move_to(mem_box) conn = Arrow(cpu_box.get_right(), mem_box.get_left(), color=DIM, buff=0.15) self.play(Create(tf_box), Write(tf_label)) self.play(Create(cpu_box), Write(cpu_label)) self.play(Create(mem_box), Write(mem_label)) self.play(GrowArrow(conn)) result = Text( "Arbitrary C programs → tokens → executed by the model itself", font_size=22, color=ACCENT, ).next_to(tf_box, DOWN, buff=0.5) self.play(Write(result)) self.wait(2.5) self.play(*[FadeOut(m) for m in self.mobjects]) class Scene4_ToolVsInModel(Scene): """Side-by-side: tool use vs in-model execution for 3+5.""" def construct(self): self.camera.background_color = BG title = Text("Tool Use vs In-Model Execution", font_size=34, color=ACCENT, weight=BOLD) title.to_edge(UP, buff=0.4) self.play(Write(title)) divider = DashedLine(UP * 2.5, DOWN * 2.5, color=DIM, dash_length=0.15).shift(DOWN * 0.3) self.play(Create(divider)) # LEFT: tool use left_title = Text("Tool Use", font_size=26, color=WARN, weight=BOLD).move_to(LEFT * 3.5 + UP * 2) self.play(Write(left_title)) left_steps = [ ("LLM:", 'print(3+5)', ACCENT, GOLD), ("→ send to interpreter", "", DIM, DIM), ("← result: 8", "", ACCENT3, ACCENT3), ] left_grp = VGroup() y = 1.0 for label_text, code_text, lc, cc in left_steps: row = VGroup() lab = Text(label_text, font_size=20, color=lc).move_to(LEFT * 5 + UP * y) lab.align_to(LEFT * 5.5, LEFT) row.add(lab) if code_text: cd = Text(code_text, font_size=18, color=cc, font="Monospace").next_to(lab, RIGHT, buff=0.2) row.add(cd) left_grp.add(row) y -= 0.7 opaque = Text("Execution is opaque\n(black box)", font_size=18, color=WARN, line_spacing=1.2) opaque.move_to(LEFT * 3.5 + DOWN * 1.5) # RIGHT: in-model right_title = Text("In-Model", font_size=26, color=ACCENT3, weight=BOLD).move_to(RIGHT * 3.5 + UP * 2) self.play(Write(right_title)) right_lines = [ "i32.const 03", "i32.const 05", "i32.add → 08", "output(08)", "halt", ] right_grp = VGroup() y = 1.0 for line in right_lines: t = Text(line, font_size=18, color=ACCENT3, font="Monospace").move_to(RIGHT * 3.5 + UP * y) right_grp.add(t) y -= 0.55 transparent = Text("Every step visible\nin the token stream", font_size=18, color=ACCENT3, line_spacing=1.2) transparent.move_to(RIGHT * 3.5 + DOWN * 2) for row in left_grp: self.play(FadeIn(row, shift=RIGHT * 0.2), run_time=0.5) self.play(Write(opaque)) self.wait(0.5) for t in right_grp: self.play(FadeIn(t, shift=LEFT * 0.2), run_time=0.4) self.play(Write(transparent)) self.wait(2.5) self.play(*[FadeOut(m) for m in self.mobjects]) class Scene5_AppendOnlyTrace(Scene): """Computation as an append-only trace — the notebook analogy.""" def construct(self): self.camera.background_color = BG title = Text("How It Works: The Append-Only Notebook", font_size=32, color=ACCENT, weight=BOLD) title.to_edge(UP, buff=0.4) self.play(Write(title)) # Notebook visual nb_rect = Rectangle(width=7, height=4.5, color=DIM, stroke_width=1.5).shift(DOWN * 0.4) nb_label = Text("Notebook (token stream)", font_size=18, color=DIM).next_to(nb_rect, UP, buff=0.1) self.play(Create(nb_rect), Write(nb_label)) # Lines prompt_words = ["the", "cat", "runs", "and", "dog", "jumps", "over"] colors = [DIM, DIM, ACCENT3, DIM, DIM, ACCENT3, DIM] prompt_label = Text("PROMPT (input words):", font_size=18, color=ACCENT).move_to( nb_rect.get_top() + DOWN * 0.4 ) self.play(Write(prompt_label)) word_mobs = VGroup() for i, (w, c) in enumerate(zip(prompt_words, colors)): t = Text(w, font_size=22, color=c, font="Monospace") t.move_to(nb_rect.get_top() + DOWN * 0.8 + RIGHT * (i - 3) * 1.0) word_mobs.add(t) self.play(*[FadeIn(w) for w in word_mobs], run_time=0.8) self.wait(0.5) # Trace section trace_label = Text("EXECUTION TRACE (generated tokens):", font_size=18, color=GOLD).move_to( nb_rect.get_center() + UP * 0.3 ) self.play(Write(trace_label)) # Show parity counting step by step trace_vals = ["odd=0", "odd=0", "odd=1", "odd=1", "odd=1", "odd=0", "odd=0"] trace_mobs = VGroup() for i, val in enumerate(trace_vals): t = Text(val, font_size=20, color=GOLD, font="Monospace") t.move_to(nb_rect.get_center() + DOWN * 0.3 + RIGHT * (i - 3) * 1.0) trace_mobs.add(t) rule = Text( "Each new token looks back at (1) the input word and (2) the previous trace token", font_size=17, color=TEXT_COL, ).move_to(nb_rect.get_bottom() + DOWN * 0.05) for i, tm in enumerate(trace_mobs): anims = [FadeIn(tm)] # Draw arrows from word and previous trace arr1 = Arrow( word_mobs[i].get_bottom(), tm.get_top(), color=ACCENT, stroke_width=1.5, buff=0.08, max_tip_length_to_length_ratio=0.15, ) anims.append(GrowArrow(arr1)) if i > 0: arr2 = Arrow( trace_mobs[i - 1].get_right(), tm.get_left(), color=GOLD, stroke_width=1.5, buff=0.08, max_tip_length_to_length_ratio=0.15, ) anims.append(GrowArrow(arr2)) self.play(*anims, run_time=0.6) self.play(Write(rule)) key_insight = Text( "Key: only 2 lookbacks per step — cost doesn't grow with length!", font_size=20, color=ACCENT3, weight=BOLD, ).next_to(nb_rect, DOWN, buff=0.6) self.play(Write(key_insight)) self.wait(2.5) self.play(*[FadeOut(m) for m in self.mobjects]) class Scene6_QuadraticProblem(Scene): """The quadratic cost problem of standard attention.""" def construct(self): self.camera.background_color = BG title = Text("The Problem: Attention Cost Grows", font_size=34, color=ACCENT, weight=BOLD) title.to_edge(UP, buff=0.5) self.play(Write(title)) # Axes ax = Axes( x_range=[0, 10, 2], y_range=[0, 100, 20], x_length=5, y_length=3.5, axis_config={"color": DIM, "include_numbers": False}, ).shift(DOWN * 0.5) x_lab = Text("tokens generated (t)", font_size=18, color=DIM).next_to(ax.x_axis, DOWN, buff=0.3) y_lab = Text("work per step", font_size=18, color=DIM).next_to(ax.y_axis, LEFT, buff=0.3).rotate(PI / 2) self.play(Create(ax), Write(x_lab), Write(y_lab)) # Standard: linear cost per step → quadratic total std_graph = ax.plot(lambda x: x * 10, x_range=[0.1, 10], color=WARN, stroke_width=3) std_label = Text("Standard attention: O(t) per step", font_size=18, color=WARN) std_label.next_to(ax, RIGHT, buff=0.3).shift(UP * 1) self.play(Create(std_graph), Write(std_label), run_time=1.5) self.wait(1) # Log cost log_graph = ax.plot(lambda x: np.log2(x + 1) * 8, x_range=[0.1, 10], color=ACCENT3, stroke_width=3) log_label = Text("Hull attention: O(log t) per step", font_size=18, color=ACCENT3) log_label.next_to(std_label, DOWN, buff=0.4) self.play(Create(log_graph), Write(log_label), run_time=1.5) gap_text = Text( "At 1 million steps:\nstandard = 1,000,000 ops/step\nhull = ~20 ops/step", font_size=18, color=GOLD, line_spacing=1.3, ).next_to(ax, DOWN, buff=0.7) self.play(Write(gap_text)) self.wait(3) self.play(*[FadeOut(m) for m in self.mobjects]) class Scene7_2DHeads(Scene): """The key unlock: 2D attention heads and convex hull queries.""" def construct(self): self.camera.background_color = BG title = Text("The Key Unlock: 2D Attention Heads", font_size=34, color=ACCENT, weight=BOLD) title.to_edge(UP, buff=0.4) self.play(Write(title)) # Explain head dimension expl = Text( "Restrict each attention head to dimension 2\n→ keys & queries become 2D points", font_size=22, color=TEXT_COL, line_spacing=1.3, ).next_to(title, DOWN, buff=0.5) self.play(Write(expl)) self.wait(1) # 2D scatter of keys np.random.seed(42) n_pts = 20 pts = np.random.randn(n_pts, 2) * 1.2 plane = NumberPlane( x_range=[-3, 3, 1], y_range=[-3, 3, 1], x_length=5, y_length=4, background_line_style={"stroke_color": DIM, "stroke_width": 0.5}, axis_config={"stroke_color": DIM}, ).shift(DOWN * 0.7) dots = VGroup() for pt in pts: d = Dot(plane.c2p(pt[0], pt[1]), radius=0.06, color=ACCENT) dots.add(d) key_label = Text("Keys (past tokens)", font_size=16, color=ACCENT).next_to(plane, LEFT, buff=0.3) self.play(Create(plane), Write(key_label)) self.play(*[FadeIn(d, scale=0.5) for d in dots], run_time=1) self.wait(0.5) # Convex hull from scipy.spatial import ConvexHull # noqa: E402 hull = ConvexHull(pts) hull_pts = [plane.c2p(pts[i][0], pts[i][1]) for i in hull.vertices] hull_pts.append(hull_pts[0]) hull_poly = Polygon(*hull_pts, color=GOLD, stroke_width=2, fill_opacity=0.08, fill_color=GOLD) hull_label = Text("Convex Hull", font_size=18, color=GOLD).next_to(plane, RIGHT, buff=0.3).shift(UP) self.play(Create(hull_poly), Write(hull_label)) self.wait(0.5) # Query point q_pt = plane.c2p(1.5, 0.5) q_dot = Dot(q_pt, radius=0.1, color=WARN) q_label = Text("Query", font_size=16, color=WARN).next_to(q_dot, UR, buff=0.1) self.play(FadeIn(q_dot, scale=2), Write(q_label)) insight = Text( "Finding the best-matching key = a geometric query on the hull\n" "→ binary search in O(log t) instead of scanning all t keys", font_size=18, color=ACCENT3, line_spacing=1.3, ).next_to(plane, DOWN, buff=0.5) self.play(Write(insight)) self.wait(3) self.play(*[FadeOut(m) for m in self.mobjects]) class Scene8_Results(Scene): """Performance results and what this means.""" def construct(self): self.camera.background_color = BG title = Text("Results: What This Enables", font_size=36, color=ACCENT, weight=BOLD) title.to_edge(UP, buff=0.5) self.play(Write(title)) results = VGroup( self._result_card("30,000+ tok/s", "Execution speed on CPU", ACCENT3), self._result_card("Millions of steps", "Correct execution length", GOLD), self._result_card("100% accuracy", "On Sudoku benchmarks\n(incl. world's hardest)", ACCENT), ).arrange(RIGHT, buff=0.6).next_to(title, DOWN, buff=0.8) for card in results: self.play(FadeIn(card, shift=UP * 0.3), run_time=0.7) self.wait(0.5) # Sudoku callout sudoku_note = Text( "Solves Arto Inkala's Sudoku (\"world's hardest\")\nin under 3 minutes — fully inside the transformer", font_size=20, color=TEXT_COL, line_spacing=1.3, ).next_to(results, DOWN, buff=0.8) self.play(Write(sudoku_note)) self.wait(2.5) self.play(*[FadeOut(m) for m in self.mobjects]) def _result_card(self, big_text, small_text, color): box = RoundedRectangle(corner_radius=0.15, width=3.5, height=2.5, color=color, stroke_width=2) big = Text(big_text, font_size=28, color=color, weight=BOLD) small = Text(small_text, font_size=16, color=TEXT_COL, line_spacing=1.2) grp = VGroup(big, small).arrange(DOWN, buff=0.3) return VGroup(box, grp) class Scene9_Recap(Scene): """Final recap and takeaway.""" def construct(self): self.camera.background_color = BG title = Text("Recap", font_size=40, color=ACCENT, weight=BOLD) title.to_edge(UP, buff=0.5) self.play(Write(title)) steps = [ ("1", "LLMs struggle with long, exact computation", TEXT_COL), ("2", "Today we bolt on external tools (like airplanes for humans)", DIM), ("3", "This team built a real computer inside a transformer", ACCENT3), ("4", "C code → WebAssembly tokens → executed by the model itself", GOLD), ("5", "2D attention heads enable O(log t) lookups (exponentially faster)", ACCENT), ("6", "Result: millions of correct steps, 30k+ tokens/sec, 100% Sudoku", ACCENT3), ] grp = VGroup() for num, text, color in steps: num_mob = Text(num, font_size=24, color=ACCENT2, weight=BOLD) text_mob = Text(text, font_size=21, color=color) row = VGroup(num_mob, text_mob).arrange(RIGHT, buff=0.3) grp.add(row) grp.arrange(DOWN, aligned_edge=LEFT, buff=0.35).next_to(title, DOWN, buff=0.6) for row in grp: self.play(FadeIn(row, shift=RIGHT * 0.3), run_time=0.6) self.wait(0.3) self.wait(1) takeaway = Text( "The model stops being a coordinator of computation\nand becomes a computer itself.", font_size=24, color=GOLD, weight=BOLD, line_spacing=1.3, ).next_to(grp, DOWN, buff=0.7) self.play(Write(takeaway, run_time=2)) self.wait(3) self.play(*[FadeOut(m) for m in self.mobjects])