from manim import *
import numpy as np

# Color palette
BG = "#0f0f1a"
ACCENT = "#4fc3f7"
ACCENT2 = "#ab47bc"
ACCENT3 = "#66bb6a"
WARN = "#ff7043"
TEXT_COL = "#e0e0e0"
DIM = "#666677"
GOLD = "#ffd54f"


class Scene1_Intro(Scene):
    """The big question: Can LLMs be computers?"""

    def construct(self):
        self.camera.background_color = BG

        title = Text("Can LLMs Be Computers?", font_size=56, color=ACCENT, weight=BOLD)
        sub = Text(
            "Executing programs inside transformers",
            font_size=28, color=DIM,
        ).next_to(title, DOWN, buff=0.4)
        self.play(Write(title, run_time=1.5))
        self.play(FadeIn(sub, shift=UP * 0.2))
        self.wait(1.5)

        # Fade out title
        self.play(FadeOut(title), FadeOut(sub))

        # --- What LLMs are good at vs bad at ---
        good_title = Text("LLMs are great at…", font_size=32, color=ACCENT3)
        good_title.to_edge(UP, buff=0.6)
        good_items = VGroup(
            Text("✓ Solving hard math (IMO gold-medal level)", font_size=24, color=TEXT_COL),
            Text("✓ Writing code & reasoning about algorithms", font_size=24, color=TEXT_COL),
            Text("✓ Understanding natural language", font_size=24, color=TEXT_COL),
        ).arrange(DOWN, aligned_edge=LEFT, buff=0.25).next_to(good_title, DOWN, buff=0.4)

        self.play(Write(good_title))
        for item in good_items:
            self.play(FadeIn(item, shift=RIGHT * 0.3), run_time=0.5)
        self.wait(1)

        bad_title = Text("…but terrible at simple computation", font_size=32, color=WARN)
        bad_title.next_to(good_items, DOWN, buff=0.6)
        bad_items = VGroup(
            Text("✗ Multiplying two numbers reliably", font_size=24, color=WARN),
            Text("✗ Solving even easy Sudoku puzzles", font_size=24, color=WARN),
            Text("✗ Any task needing many exact steps", font_size=24, color=WARN),
        ).arrange(DOWN, aligned_edge=LEFT, buff=0.25).next_to(bad_title, DOWN, buff=0.4)

        self.play(Write(bad_title))
        for item in bad_items:
            self.play(FadeIn(item, shift=RIGHT * 0.3), run_time=0.5)
        self.wait(2)
        self.play(*[FadeOut(m) for m in self.mobjects])


class Scene2_ToolUse(Scene):
    """How LLMs currently handle computation: tool use (the airplane analogy)."""

    def construct(self):
        self.camera.background_color = BG

        # --- Airplane analogy ---
        analogy_title = Text("The Airplane Analogy", font_size=36, color=ACCENT, weight=BOLD)
        analogy_title.to_edge(UP, buff=0.5)
        self.play(Write(analogy_title))

        human_label = Text("Human", font_size=28, color=TEXT_COL)
        human_box = RoundedRectangle(
            corner_radius=0.15, width=2.2, height=1.2, color=ACCENT
        )
        human_grp = VGroup(human_box, human_label).move_to(LEFT * 3)

        plane_label = Text("Airplane", font_size=28, color=TEXT_COL)
        plane_box = RoundedRectangle(
            corner_radius=0.15, width=2.2, height=1.2, color=ACCENT3
        )
        plane_grp = VGroup(plane_box, plane_label).move_to(RIGHT * 3)

        arrow = Arrow(human_box.get_right(), plane_box.get_left(), color=DIM, buff=0.2)
        arrow_label = Text("delegates flying", font_size=20, color=DIM).next_to(arrow, UP, buff=0.15)

        cant = Text("Humans can't fly.", font_size=24, color=WARN).next_to(
            VGroup(human_grp, plane_grp), DOWN, buff=0.8
        )
        but = Text("Building airplanes doesn't change that.", font_size=24, color=WARN).next_to(
            cant, DOWN, buff=0.25
        )

        self.play(FadeIn(human_grp), FadeIn(plane_grp))
        self.play(GrowArrow(arrow), FadeIn(arrow_label))
        self.wait(0.5)
        self.play(Write(cant))
        self.play(Write(but))
        self.wait(2)
        self.play(*[FadeOut(m) for m in self.mobjects if m != analogy_title])
        self.play(FadeOut(analogy_title))

        # --- Tool use diagram ---
        tool_title = Text("Today: LLMs Use External Tools", font_size=34, color=ACCENT, weight=BOLD)
        tool_title.to_edge(UP, buff=0.5)
        self.play(Write(tool_title))

        llm_box = RoundedRectangle(corner_radius=0.2, width=2.5, height=1.4, color=ACCENT)
        llm_label = Text("LLM", font_size=30, color=ACCENT, weight=BOLD)
        llm_grp = VGroup(llm_box, llm_label).move_to(LEFT * 3.5)

        interp_box = RoundedRectangle(corner_radius=0.2, width=3, height=1.4, color=ACCENT3)
        interp_label = Text("Interpreter", font_size=26, color=ACCENT3)
        interp_grp = VGroup(interp_box, interp_label).move_to(RIGHT * 3)

        a1 = Arrow(llm_box.get_right(), interp_box.get_left(), color=GOLD, buff=0.2).shift(UP * 0.2)
        a1_label = Text("sends code", font_size=18, color=GOLD).next_to(a1, UP, buff=0.1)
        a2 = Arrow(interp_box.get_left(), llm_box.get_right(), color=ACCENT3, buff=0.2).shift(DOWN * 0.2)
        a2_label = Text("returns result", font_size=18, color=ACCENT3).next_to(a2, DOWN, buff=0.1)

        note = Text(
            "The LLM describes computation\nbut never executes it.",
            font_size=22, color=WARN, line_spacing=1.3,
        ).next_to(VGroup(llm_grp, interp_grp), DOWN, buff=0.9)

        self.play(FadeIn(llm_grp), FadeIn(interp_grp))
        self.play(GrowArrow(a1), FadeIn(a1_label))
        self.play(GrowArrow(a2), FadeIn(a2_label))
        self.wait(0.5)
        self.play(Write(note))
        self.wait(2.5)
        self.play(*[FadeOut(m) for m in self.mobjects])


class Scene3_KeyIdea(Scene):
    """The breakthrough: building a computer INSIDE the transformer."""

    def construct(self):
        self.camera.background_color = BG

        title = Text("The Breakthrough", font_size=40, color=GOLD, weight=BOLD)
        title.to_edge(UP, buff=0.5)
        self.play(Write(title))

        idea = Text(
            "Build a computer INSIDE the transformer.",
            font_size=30, color=TEXT_COL,
        ).next_to(title, DOWN, buff=0.6)
        self.play(Write(idea))
        self.wait(1)

        # Big transformer box
        tf_box = RoundedRectangle(
            corner_radius=0.25, width=9, height=4.5, color=ACCENT, stroke_width=3
        ).shift(DOWN * 0.5)
        tf_label = Text("Transformer", font_size=22, color=ACCENT).next_to(tf_box, UP, buff=0.15)

        # CPU inside
        cpu_box = RoundedRectangle(
            corner_radius=0.15, width=3, height=2, color=GOLD, stroke_width=2
        ).move_to(tf_box.get_center() + LEFT * 2.2)
        cpu_label = Text("Virtual CPU\n(WebAssembly)", font_size=20, color=GOLD, line_spacing=1.2).move_to(cpu_box)

        # Memory inside
        mem_box = RoundedRectangle(
            corner_radius=0.15, width=2.5, height=2, color=ACCENT3, stroke_width=2
        ).move_to(tf_box.get_center() + RIGHT * 2)
        mem_label = Text("Memory\n& Stack", font_size=20, color=ACCENT3, line_spacing=1.2).move_to(mem_box)

        conn = Arrow(cpu_box.get_right(), mem_box.get_left(), color=DIM, buff=0.15)

        self.play(Create(tf_box), Write(tf_label))
        self.play(Create(cpu_box), Write(cpu_label))
        self.play(Create(mem_box), Write(mem_label))
        self.play(GrowArrow(conn))

        result = Text(
            "Arbitrary C programs → tokens → executed by the model itself",
            font_size=22, color=ACCENT,
        ).next_to(tf_box, DOWN, buff=0.5)
        self.play(Write(result))
        self.wait(2.5)
        self.play(*[FadeOut(m) for m in self.mobjects])


class Scene4_ToolVsInModel(Scene):
    """Side-by-side: tool use vs in-model execution for 3+5."""

    def construct(self):
        self.camera.background_color = BG

        title = Text("Tool Use  vs  In-Model Execution", font_size=34, color=ACCENT, weight=BOLD)
        title.to_edge(UP, buff=0.4)
        self.play(Write(title))

        divider = DashedLine(UP * 2.5, DOWN * 2.5, color=DIM, dash_length=0.15).shift(DOWN * 0.3)
        self.play(Create(divider))

        # LEFT: tool use
        left_title = Text("Tool Use", font_size=26, color=WARN, weight=BOLD).move_to(LEFT * 3.5 + UP * 2)
        self.play(Write(left_title))

        left_steps = [
            ("LLM:", 'print(3+5)', ACCENT, GOLD),
            ("→ send to interpreter", "", DIM, DIM),
            ("← result: 8", "", ACCENT3, ACCENT3),
        ]
        left_grp = VGroup()
        y = 1.0
        for label_text, code_text, lc, cc in left_steps:
            row = VGroup()
            lab = Text(label_text, font_size=20, color=lc).move_to(LEFT * 5 + UP * y)
            lab.align_to(LEFT * 5.5, LEFT)
            row.add(lab)
            if code_text:
                cd = Text(code_text, font_size=18, color=cc, font="Monospace").next_to(lab, RIGHT, buff=0.2)
                row.add(cd)
            left_grp.add(row)
            y -= 0.7

        opaque = Text("Execution is opaque\n(black box)", font_size=18, color=WARN, line_spacing=1.2)
        opaque.move_to(LEFT * 3.5 + DOWN * 1.5)

        # RIGHT: in-model
        right_title = Text("In-Model", font_size=26, color=ACCENT3, weight=BOLD).move_to(RIGHT * 3.5 + UP * 2)
        self.play(Write(right_title))

        right_lines = [
            "i32.const 03",
            "i32.const 05",
            "i32.add → 08",
            "output(08)",
            "halt",
        ]
        right_grp = VGroup()
        y = 1.0
        for line in right_lines:
            t = Text(line, font_size=18, color=ACCENT3, font="Monospace").move_to(RIGHT * 3.5 + UP * y)
            right_grp.add(t)
            y -= 0.55

        transparent = Text("Every step visible\nin the token stream", font_size=18, color=ACCENT3, line_spacing=1.2)
        transparent.move_to(RIGHT * 3.5 + DOWN * 2)

        for row in left_grp:
            self.play(FadeIn(row, shift=RIGHT * 0.2), run_time=0.5)
        self.play(Write(opaque))
        self.wait(0.5)

        for t in right_grp:
            self.play(FadeIn(t, shift=LEFT * 0.2), run_time=0.4)
        self.play(Write(transparent))
        self.wait(2.5)
        self.play(*[FadeOut(m) for m in self.mobjects])


class Scene5_AppendOnlyTrace(Scene):
    """Computation as an append-only trace — the notebook analogy."""

    def construct(self):
        self.camera.background_color = BG

        title = Text("How It Works: The Append-Only Notebook", font_size=32, color=ACCENT, weight=BOLD)
        title.to_edge(UP, buff=0.4)
        self.play(Write(title))

        # Notebook visual
        nb_rect = Rectangle(width=7, height=4.5, color=DIM, stroke_width=1.5).shift(DOWN * 0.4)
        nb_label = Text("Notebook (token stream)", font_size=18, color=DIM).next_to(nb_rect, UP, buff=0.1)
        self.play(Create(nb_rect), Write(nb_label))

        # Lines
        prompt_words = ["the", "cat", "runs", "and", "dog", "jumps", "over"]
        colors = [DIM, DIM, ACCENT3, DIM, DIM, ACCENT3, DIM]
        prompt_label = Text("PROMPT (input words):", font_size=18, color=ACCENT).move_to(
            nb_rect.get_top() + DOWN * 0.4
        )
        self.play(Write(prompt_label))

        word_mobs = VGroup()
        for i, (w, c) in enumerate(zip(prompt_words, colors)):
            t = Text(w, font_size=22, color=c, font="Monospace")
            t.move_to(nb_rect.get_top() + DOWN * 0.8 + RIGHT * (i - 3) * 1.0)
            word_mobs.add(t)
        self.play(*[FadeIn(w) for w in word_mobs], run_time=0.8)
        self.wait(0.5)

        # Trace section
        trace_label = Text("EXECUTION TRACE (generated tokens):", font_size=18, color=GOLD).move_to(
            nb_rect.get_center() + UP * 0.3
        )
        self.play(Write(trace_label))

        # Show parity counting step by step
        trace_vals = ["odd=0", "odd=0", "odd=1", "odd=1", "odd=1", "odd=0", "odd=0"]
        trace_mobs = VGroup()
        for i, val in enumerate(trace_vals):
            t = Text(val, font_size=20, color=GOLD, font="Monospace")
            t.move_to(nb_rect.get_center() + DOWN * 0.3 + RIGHT * (i - 3) * 1.0)
            trace_mobs.add(t)

        rule = Text(
            "Each new token looks back at (1) the input word and (2) the previous trace token",
            font_size=17, color=TEXT_COL,
        ).move_to(nb_rect.get_bottom() + DOWN * 0.05)

        for i, tm in enumerate(trace_mobs):
            anims = [FadeIn(tm)]
            # Draw arrows from word and previous trace
            arr1 = Arrow(
                word_mobs[i].get_bottom(), tm.get_top(),
                color=ACCENT, stroke_width=1.5, buff=0.08, max_tip_length_to_length_ratio=0.15,
            )
            anims.append(GrowArrow(arr1))
            if i > 0:
                arr2 = Arrow(
                    trace_mobs[i - 1].get_right(), tm.get_left(),
                    color=GOLD, stroke_width=1.5, buff=0.08, max_tip_length_to_length_ratio=0.15,
                )
                anims.append(GrowArrow(arr2))
            self.play(*anims, run_time=0.6)

        self.play(Write(rule))

        key_insight = Text(
            "Key: only 2 lookbacks per step — cost doesn't grow with length!",
            font_size=20, color=ACCENT3, weight=BOLD,
        ).next_to(nb_rect, DOWN, buff=0.6)
        self.play(Write(key_insight))
        self.wait(2.5)
        self.play(*[FadeOut(m) for m in self.mobjects])


class Scene6_QuadraticProblem(Scene):
    """The quadratic cost problem of standard attention."""

    def construct(self):
        self.camera.background_color = BG

        title = Text("The Problem: Attention Cost Grows", font_size=34, color=ACCENT, weight=BOLD)
        title.to_edge(UP, buff=0.5)
        self.play(Write(title))

        # Axes
        ax = Axes(
            x_range=[0, 10, 2], y_range=[0, 100, 20],
            x_length=5, y_length=3.5,
            axis_config={"color": DIM, "include_numbers": False},
        ).shift(DOWN * 0.5)
        x_lab = Text("tokens generated (t)", font_size=18, color=DIM).next_to(ax.x_axis, DOWN, buff=0.3)
        y_lab = Text("work per step", font_size=18, color=DIM).next_to(ax.y_axis, LEFT, buff=0.3).rotate(PI / 2)

        self.play(Create(ax), Write(x_lab), Write(y_lab))

        # Standard: linear cost per step → quadratic total
        std_graph = ax.plot(lambda x: x * 10, x_range=[0.1, 10], color=WARN, stroke_width=3)
        std_label = Text("Standard attention: O(t) per step", font_size=18, color=WARN)
        std_label.next_to(ax, RIGHT, buff=0.3).shift(UP * 1)

        self.play(Create(std_graph), Write(std_label), run_time=1.5)
        self.wait(1)

        # Log cost
        log_graph = ax.plot(lambda x: np.log2(x + 1) * 8, x_range=[0.1, 10], color=ACCENT3, stroke_width=3)
        log_label = Text("Hull attention: O(log t) per step", font_size=18, color=ACCENT3)
        log_label.next_to(std_label, DOWN, buff=0.4)

        self.play(Create(log_graph), Write(log_label), run_time=1.5)

        gap_text = Text(
            "At 1 million steps:\nstandard = 1,000,000 ops/step\nhull = ~20 ops/step",
            font_size=18, color=GOLD, line_spacing=1.3,
        ).next_to(ax, DOWN, buff=0.7)
        self.play(Write(gap_text))
        self.wait(3)
        self.play(*[FadeOut(m) for m in self.mobjects])


class Scene7_2DHeads(Scene):
    """The key unlock: 2D attention heads and convex hull queries."""

    def construct(self):
        self.camera.background_color = BG

        title = Text("The Key Unlock: 2D Attention Heads", font_size=34, color=ACCENT, weight=BOLD)
        title.to_edge(UP, buff=0.4)
        self.play(Write(title))

        # Explain head dimension
        expl = Text(
            "Restrict each attention head to dimension 2\n→ keys & queries become 2D points",
            font_size=22, color=TEXT_COL, line_spacing=1.3,
        ).next_to(title, DOWN, buff=0.5)
        self.play(Write(expl))
        self.wait(1)

        # 2D scatter of keys
        np.random.seed(42)
        n_pts = 20
        pts = np.random.randn(n_pts, 2) * 1.2
        plane = NumberPlane(
            x_range=[-3, 3, 1], y_range=[-3, 3, 1],
            x_length=5, y_length=4,
            background_line_style={"stroke_color": DIM, "stroke_width": 0.5},
            axis_config={"stroke_color": DIM},
        ).shift(DOWN * 0.7)

        dots = VGroup()
        for pt in pts:
            d = Dot(plane.c2p(pt[0], pt[1]), radius=0.06, color=ACCENT)
            dots.add(d)

        key_label = Text("Keys (past tokens)", font_size=16, color=ACCENT).next_to(plane, LEFT, buff=0.3)
        self.play(Create(plane), Write(key_label))
        self.play(*[FadeIn(d, scale=0.5) for d in dots], run_time=1)
        self.wait(0.5)

        # Convex hull
        from scipy.spatial import ConvexHull  # noqa: E402

        hull = ConvexHull(pts)
        hull_pts = [plane.c2p(pts[i][0], pts[i][1]) for i in hull.vertices]
        hull_pts.append(hull_pts[0])
        hull_poly = Polygon(*hull_pts, color=GOLD, stroke_width=2, fill_opacity=0.08, fill_color=GOLD)

        hull_label = Text("Convex Hull", font_size=18, color=GOLD).next_to(plane, RIGHT, buff=0.3).shift(UP)
        self.play(Create(hull_poly), Write(hull_label))
        self.wait(0.5)

        # Query point
        q_pt = plane.c2p(1.5, 0.5)
        q_dot = Dot(q_pt, radius=0.1, color=WARN)
        q_label = Text("Query", font_size=16, color=WARN).next_to(q_dot, UR, buff=0.1)
        self.play(FadeIn(q_dot, scale=2), Write(q_label))

        insight = Text(
            "Finding the best-matching key = a geometric query on the hull\n"
            "→ binary search in O(log t) instead of scanning all t keys",
            font_size=18, color=ACCENT3, line_spacing=1.3,
        ).next_to(plane, DOWN, buff=0.5)
        self.play(Write(insight))
        self.wait(3)
        self.play(*[FadeOut(m) for m in self.mobjects])


class Scene8_Results(Scene):
    """Performance results and what this means."""

    def construct(self):
        self.camera.background_color = BG

        title = Text("Results: What This Enables", font_size=36, color=ACCENT, weight=BOLD)
        title.to_edge(UP, buff=0.5)
        self.play(Write(title))

        results = VGroup(
            self._result_card("30,000+ tok/s", "Execution speed on CPU", ACCENT3),
            self._result_card("Millions of steps", "Correct execution length", GOLD),
            self._result_card("100% accuracy", "On Sudoku benchmarks\n(incl. world's hardest)", ACCENT),
        ).arrange(RIGHT, buff=0.6).next_to(title, DOWN, buff=0.8)

        for card in results:
            self.play(FadeIn(card, shift=UP * 0.3), run_time=0.7)
            self.wait(0.5)

        # Sudoku callout
        sudoku_note = Text(
            "Solves Arto Inkala's Sudoku (\"world's hardest\")\nin under 3 minutes — fully inside the transformer",
            font_size=20, color=TEXT_COL, line_spacing=1.3,
        ).next_to(results, DOWN, buff=0.8)
        self.play(Write(sudoku_note))
        self.wait(2.5)
        self.play(*[FadeOut(m) for m in self.mobjects])

    def _result_card(self, big_text, small_text, color):
        box = RoundedRectangle(corner_radius=0.15, width=3.5, height=2.5, color=color, stroke_width=2)
        big = Text(big_text, font_size=28, color=color, weight=BOLD)
        small = Text(small_text, font_size=16, color=TEXT_COL, line_spacing=1.2)
        grp = VGroup(big, small).arrange(DOWN, buff=0.3)
        return VGroup(box, grp)


class Scene9_Recap(Scene):
    """Final recap and takeaway."""

    def construct(self):
        self.camera.background_color = BG

        title = Text("Recap", font_size=40, color=ACCENT, weight=BOLD)
        title.to_edge(UP, buff=0.5)
        self.play(Write(title))

        steps = [
            ("1", "LLMs struggle with long, exact computation", TEXT_COL),
            ("2", "Today we bolt on external tools (like airplanes for humans)", DIM),
            ("3", "This team built a real computer inside a transformer", ACCENT3),
            ("4", "C code → WebAssembly tokens → executed by the model itself", GOLD),
            ("5", "2D attention heads enable O(log t) lookups (exponentially faster)", ACCENT),
            ("6", "Result: millions of correct steps, 30k+ tokens/sec, 100% Sudoku", ACCENT3),
        ]

        grp = VGroup()
        for num, text, color in steps:
            num_mob = Text(num, font_size=24, color=ACCENT2, weight=BOLD)
            text_mob = Text(text, font_size=21, color=color)
            row = VGroup(num_mob, text_mob).arrange(RIGHT, buff=0.3)
            grp.add(row)
        grp.arrange(DOWN, aligned_edge=LEFT, buff=0.35).next_to(title, DOWN, buff=0.6)

        for row in grp:
            self.play(FadeIn(row, shift=RIGHT * 0.3), run_time=0.6)
            self.wait(0.3)

        self.wait(1)

        takeaway = Text(
            "The model stops being a coordinator of computation\nand becomes a computer itself.",
            font_size=24, color=GOLD, weight=BOLD, line_spacing=1.3,
        ).next_to(grp, DOWN, buff=0.7)
        self.play(Write(takeaway, run_time=2))
        self.wait(3)
        self.play(*[FadeOut(m) for m in self.mobjects])