// markdown treesitter playground: https://ikatyang.github.io/tree-sitter-markdown/ use std::sync::{Arc, LazyLock}; use gpui::{AppContext, FontWeight, ParentElement, Styled, div, relative, rems}; use crate::{ app, component::rich_text::{RichText, RichTextContent, RichTextContentBuilder, rich_text}, theme, }; const MARKDOWN_KIND_ID_SETEXT_H1_UNDERLINE: u16 = 8; const MARKDOWN_KIND_ID_SETEXT_H2_UNDERLINE: u16 = 9; const MARKDOWN_KIND_ID_ATX_H1_MARKER: u16 = 11; const MARKDOWN_KIND_ID_ATX_H2_MARKER: u16 = 12; const MARKDOWN_KIND_ID_ATX_H3_MARKER: u16 = 13; const MARKDOWN_KIND_ID_ATX_H4_MARKER: u16 = 14; const MARKDOWN_KIND_ID_ATX_H5_MARKER: u16 = 15; const MARKDOWN_KIND_ID_ATX_H6_MARKER: u16 = 16; const MARKDOWN_KIND_ID_LIST_MARKER: u16 = 48; const MARKDOWN_KIND_ID_BACKSLASH_ESCAPE: u16 = 56; const MARKDOWN_KIND_ID_CHARACTER_REFERENCE: u16 = 57; const MARKDOWN_KIND_ID_TABLE_COLUMN_ALIGNMENT: u16 = 107; const MARKDOWN_KIND_ID_HARD_LINE_BREAK: u16 = 110; const MARKDOWN_KIND_ID_SOFT_LINE_BREAK: u16 = 111; const MARKDOWN_KIND_ID_HTML_TAG_NAME: u16 = 117; const MARKDOWN_KIND_ID_VIRTUAL_SPACE: u16 = 118; const MARKDOWN_KIND_ID_DOCUMENT: u16 = 119; const MARKDOWN_KIND_ID_THEMATIC_BREAK: u16 = 122; const MARKDOWN_KIND_ID_PARAGRAPH: u16 = 124; const MARKDOWN_KIND_ID_LINK_REFERENCE_DEFINITION: u16 = 126; const MARKDOWN_KIND_ID_SETEXT_HEADING: u16 = 129; const MARKDOWN_KIND_ID_ATX_HEADING: u16 = 132; const MARKDOWN_KIND_ID_INDENTED_CODE_BLOCK: u16 = 134; const MARKDOWN_KIND_ID_FENCED_CODE_BLOCK: u16 = 136; const MARKDOWN_KIND_ID_CODE_FENCE_CONTENT: u16 = 138; const MARKDOWN_KIND_ID_HTML_BLOCK_SCRIPT: u16 = 140; const MARKDOWN_KIND_ID_HTML_BLOCK_COMMENT: u16 = 142; const MARKDOWN_KIND_ID_HTML_BLOCK_PROCESSING: u16 = 144; const MARKDOWN_KIND_ID_HTML_BLOCK_DECLARATION: u16 = 146; const MARKDOWN_KIND_ID_HTML_BLOCK_CDATA: u16 = 148; const MARKDOWN_KIND_ID_HTML_BLOCK_DIV: u16 = 150; const MARKDOWN_KIND_ID_HTML_BLOCK_CMP: u16 = 152; const MARKDOWN_KIND_ID_BLOCK_QUOTE: u16 = 154; const MARKDOWN_KIND_ID_TIGHT_LIST: u16 = 156; const MARKDOWN_KIND_ID_LOOSE_LIST: u16 = 158; const MARKDOWN_KIND_ID_LIST_ITEM_TIGHT: u16 = 160; const MARKDOWN_KIND_ID_TASK_LIST_ITEM_TIGHT: u16 = 161; const MARKDOWN_KIND_ID_LIST_ITEM_LOOSE: u16 = 163; const MARKDOWN_KIND_ID_TASK_LIST_ITEM_LOOSE: u16 = 164; const MARKDOWN_KIND_ID_PARAGRAPH_TASK_LIST: u16 = 166; const MARKDOWN_KIND_ID_SETEXT_HEADING_TASK_LIST: u16 = 168; const MARKDOWN_KIND_ID_HEADING_CONTENT: u16 = 169; const MARKDOWN_KIND_ID_TABLE: u16 = 170; const MARKDOWN_KIND_ID_TABLE_HEADER_ROW: u16 = 172; const MARKDOWN_KIND_ID_TABLE_DELIMITER_ROW: u16 = 174; const MARKDOWN_KIND_ID_TABLE_DATA_ROW: u16 = 176; const MARKDOWN_KIND_ID_EMPHASIS: u16 = 181; const MARKDOWN_KIND_ID_STRONG_EMPHASIS: u16 = 182; const MARKDOWN_KIND_ID_STRIKETHROUGH: u16 = 183; const MARKDOWN_KIND_ID_LINK: u16 = 184; const MARKDOWN_KIND_ID_IMAGE: u16 = 185; const MARKDOWN_KIND_ID_LINK_DESTINATION: u16 = 190; const MARKDOWN_KIND_ID_LINK_TITLE: u16 = 191; const MARKDOWN_KIND_ID_WWW_AUTOLINK: u16 = 192; const MARKDOWN_KIND_ID_URI_AUTOLINK_EXTENDED: u16 = 194; const MARKDOWN_KIND_ID_EMAIL_AUTOLINK_EXTENDED: u16 = 196; const MARKDOWN_KIND_ID_URI_AUTOLINK_ANGLE: u16 = 198; const MARKDOWN_KIND_ID_EMAIL_AUTOLINK_ANGLE: u16 = 199; const MARKDOWN_KIND_ID_CODE_SPAN: u16 = 200; const MARKDOWN_KIND_ID_HTML_OPEN_TAG: u16 = 201; const MARKDOWN_KIND_ID_HTML_SELF_CLOSING_TAG: u16 = 202; const MARKDOWN_KIND_ID_HTML_CLOSE_TAG: u16 = 204; const MARKDOWN_KIND_ID_HTML_COMMENT: u16 = 205; const MARKDOWN_KIND_ID_HTML_PROCESSING_INSTRUCTION: u16 = 206; const MARKDOWN_KIND_ID_HTML_DECLARATION: u16 = 207; const MARKDOWN_KIND_ID_HTML_CDATA_SECTION: u16 = 208; const MARKDOWN_KIND_ID_HTML_ATTRRIBUTE: u16 = 209; const MARKDOWN_KIND_ID_HTML_ATTRIBUTE_VALUE: u16 = 210; const MARKDOWN_KIND_ID_TEXT: u16 = 211; const MARKDOWN_KIND_ID_HTML_ATTRIBUTE_KEY: u16 = 228; const MARKDOWN_KIND_ID_HTML_DECLARATION_NAME: u16 = 229; const MARKDOWN_KIND_ID_IMAGE_DESCRIPTION: u16 = 230; const MARKDOWN_KIND_ID_INFO_STRING: u16 = 231; const MARKDOWN_KIND_ID_LINE_BREAK: u16 = 232; const MARKDOWN_KIND_ID_LINK_LABEL: u16 = 233; const MARKDOWN_KIND_ID_LINK_TEXT: u16 = 234; const MARKDOWN_KIND_ID_TABLE_CELL: u16 = 235; const MARKDOWN_KIND_ID_TASK_LIST_ITEM_MARKER: u16 = 236; pub(crate) struct MarkdownText { content: Arc, blocks: Vec, } enum ContentBlock { Heading { font_size: gpui::Rems, font_weight: gpui::FontWeight, mt: gpui::Rems, mb: gpui::Rems, content: RichTextContent, }, Code { content: gpui::SharedString, }, Paragraph { decoration: Option, content: RichTextContent, }, Empty, Table { row_count: usize, col_count: usize, cells: Vec, }, } pub(crate) fn new(content: Arc, cx: &mut gpui::Context) -> MarkdownText { let mut view = MarkdownText { content, blocks: Vec::new(), }; view.on_create(cx); view } impl MarkdownText { fn on_create(&mut self, cx: &gpui::Context) { let content = Arc::clone(&self.content); let t = cx.background_spawn(async move { let mut parser = tree_sitter::Parser::new(); parser .set_language(tree_sitter_markdown::language()) .expect("tree-sitter-markdown language should load"); parser.parse(content.as_bytes(), None) }); cx.spawn(async |weak, cx| { if let Some(tree) = t.await { _ = weak.update(cx, |this, cx| { let theme = app::current_theme(cx); this.render_tree(&tree, &theme); cx.notify(); }); }; }) .detach(); } fn on_open_link(&self, _link: &str, _cx: &gpui::Context) {} fn render_tree(&mut self, tree: &tree_sitter::Tree, theme: &theme::Theme) { static ORDERED_LIST_MARKER_REGEX: LazyLock = LazyLock::new(|| regex::Regex::new(r"^\d+\.$").unwrap()); let mut cursor = tree.walk(); cursor.goto_first_child(); let mut is_first_heading = true; fn build_rich_text_for_node( cursor: &mut tree_sitter::TreeCursor, builder: &mut RichTextContentBuilder, content: &str, // byte_offset is the number of bytes to offset the content start byte by byte_offset: usize, theme: &theme::Theme, parent_style: Option, ) { let node_start_byte = cursor.node().start_byte(); let style = parent_style.unwrap_or_default(); cursor.goto_first_child(); loop { let node = cursor.node(); macro_rules! node_range { () => { (node.start_byte() - node_start_byte - byte_offset) ..(node.end_byte() - node_start_byte - byte_offset) }; } match node.kind_id() { | MARKDOWN_KIND_ID_TEXT => { println!( "current node start byte {} parent node start byte {}", node.start_byte(), node_start_byte ); if let Some(t) = node.utf8_text(content.as_ref()).ok() { builder.push_text(t, style); } } | MARKDOWN_KIND_ID_EMPHASIS => { build_rich_text_for_node( cursor, builder, content, byte_offset, theme, Some(gpui::HighlightStyle { font_style: Some(gpui::FontStyle::Italic), ..style }), ); } | MARKDOWN_KIND_ID_STRONG_EMPHASIS => { build_rich_text_for_node( cursor, builder, content, byte_offset, theme, Some(gpui::HighlightStyle { font_weight: Some(gpui::FontWeight::BOLD), ..style }), ); } | MARKDOWN_KIND_ID_LINK => { cursor.goto_first_child(); let (description, src) = if cursor.node().kind_id() == MARKDOWN_KIND_ID_LINK_DESTINATION { let node = cursor.node(); let src = &content[node_range!()]; (src, src) } else { let node = cursor.node(); let description = &content[node_range!()]; if cursor.goto_next_sibling() { debug_assert!( cursor.node().kind_id() == MARKDOWN_KIND_ID_LINK_DESTINATION ); let node = cursor.node(); (description, &content[node_range!()]) } else { // no src for this link node (description, "") } }; builder.push_link(description, src.to_owned()); cursor.goto_parent(); } | _ => { // extend here to support more styles } }; if !cursor.goto_next_sibling() { break; } } cursor.goto_parent(); } loop { let current_node = cursor.node(); fn render_list_node( cursor: &mut tree_sitter::TreeCursor, content: &str, blocks: &mut Vec, theme: &theme::Theme, indentation: usize, ) -> bool { // expected tree shape for node pointed to by cursor: // tight_list // list_item // list_marker // paragraph // tight_list <-- recursive point // go to list_item node cursor.goto_first_child(); let mut list_index: Option = None; loop { if cursor.node().kind_id() != MARKDOWN_KIND_ID_LIST_ITEM_TIGHT // if is list_item node, dive into list_marker node || !cursor.goto_first_child() { // encountered non lists item node under tight list node // dont know what to do, so skipping this node if !cursor.goto_next_sibling() { return false; } continue; } let marker_node = cursor.node(); let marker_content = &content[marker_node.byte_range()]; let list_marker_char = match marker_content { // unordered list item | "-" | "+" | "*" => "•".to_string(), | marker_content if ORDERED_LIST_MARKER_REGEX.is_match(marker_content) => { let i = list_index.get_or_insert_with(|| { marker_content .strip_suffix('.') .unwrap() .parse::() .unwrap() }); let j = *i; *i = j + 1; format!("{j}.") } | _ => "•".to_string(), }; // go to paragraph sibling node let block = if cursor.goto_next_sibling() { let mut builder = RichTextContentBuilder::new(); build_rich_text_for_node(cursor, &mut builder, content, 0, theme, None); ContentBlock::Paragraph { decoration: Some(list_marker_char.clone()), content: builder.build(), } } else { // empty block ContentBlock::Empty }; blocks.push(block); // if there is a nested tight_light after paragraph // render it recursively if cursor.goto_next_sibling() && cursor.node().kind_id() == MARKDOWN_KIND_ID_TIGHT_LIST { render_list_node(cursor, content, blocks, theme, indentation + 1); } // go back to list_item node cursor.goto_parent(); if !cursor.goto_next_sibling() { // no more list_item in tight_list node // go back up to tight_list node cursor.goto_parent(); return true; } } } match current_node.kind_id() { | MARKDOWN_KIND_ID_ATX_HEADING => { cursor.goto_first_child(); let marker_node_kind = cursor.node().kind_id(); let Some(content) = (if cursor.goto_next_sibling() && cursor.node().kind_id() == MARKDOWN_KIND_ID_HEADING_CONTENT { let mut builder = RichTextContentBuilder::new(); // because HEADING_CONTENT node includes the space after the heading marker // offset by 1 to exclude the space build_rich_text_for_node( &mut cursor, &mut builder, &self.content, 1, theme, None, ); Some(builder.build()) } else { None }) else { continue; }; let mut block = match marker_node_kind { | MARKDOWN_KIND_ID_ATX_H1_MARKER => ContentBlock::Heading { font_size: rems(2.25), font_weight: gpui::FontWeight::EXTRA_BOLD, mt: rems(0.), mb: rems(1.5), content, }, | MARKDOWN_KIND_ID_ATX_H2_MARKER => ContentBlock::Heading { font_size: rems(1.5), font_weight: gpui::FontWeight::BOLD, mt: rems(1.5), mb: rems(1.), content, }, | MARKDOWN_KIND_ID_ATX_H3_MARKER => ContentBlock::Heading { font_size: rems(1.25), font_weight: gpui::FontWeight::SEMIBOLD, mt: rems(2.), mb: rems(0.75), content, }, | MARKDOWN_KIND_ID_ATX_H4_MARKER => ContentBlock::Heading { font_size: rems(1.), font_weight: FontWeight::SEMIBOLD, mt: rems(1.5), mb: rems(0.5), content, }, | MARKDOWN_KIND_ID_ATX_H5_MARKER | MARKDOWN_KIND_ID_ATX_H6_MARKER | _ => { ContentBlock::Heading { font_size: rems(1.), font_weight: FontWeight::NORMAL, mt: rems(1.5), mb: rems(0.5), content, } } }; if is_first_heading { is_first_heading = false; } cursor.goto_parent(); self.blocks.push(block); } | MARKDOWN_KIND_ID_PARAGRAPH => { let mut builder = RichTextContentBuilder::new(); // because HEADING_CONTENT node includes the space after the heading marker // offset by 1 to exclude the space build_rich_text_for_node(&mut cursor, &mut builder, &self.content, 0, theme, None); self.blocks.push(ContentBlock::Paragraph { decoration: None, content: builder.build(), }); } | MARKDOWN_KIND_ID_TIGHT_LIST => { let is_rendered = render_list_node(&mut cursor, &self.content, &mut self.blocks, theme, 0); if !is_rendered { continue; } } | MARKDOWN_KIND_ID_FENCED_CODE_BLOCK => { // expected tree shape: // fenced_code_block // ├── info_string? (present if there is a language annotation) // └── code_fence_content? (present if there is some content between the backticks) if !cursor.goto_first_child() { self.blocks.push(ContentBlock::Empty); continue; } let content = if cursor.node().kind_id() == MARKDOWN_KIND_ID_INFO_STRING { // skipping info string (which annotates the code block) if cursor.goto_next_sibling() { // this is code_fence_content node gpui::SharedString::new( cursor .node() .utf8_text(self.content.as_bytes()) .unwrap_or_default(), ) } else { gpui::SharedString::default() } } else { // assuming the current node is already code_fence_content gpui::SharedString::new( cursor .node() .utf8_text(self.content.as_bytes()) .unwrap_or_default(), ) }; cursor.goto_parent(); self.blocks.push(ContentBlock::Code { content }); } // | MARKDOWN_KIND_ID_TABLE => { // cursor.goto_first_child(); // debug_assert!(cursor.node().kind_id() == MARKDOWN_KIND_ID_TABLE_HEADER_ROW); // let col_count = cursor.node().child_count(); // // markdown tables aren't usually that big // // lets assume the average markdown table has 10 rows (inc header) // // preallocate the vec with capacity row * col, should be big enough to avoid realloc // let min_row_count = 10; // // cell text blocks are stored in row-major order // let cell_blocks: Vec = Vec::with_capacity(col_count * min_row_count); // cursor.goto_first_child(); // debug_assert!(cursor.node().kind_id() == MARKDOWN_KIND_ID_TABLE_CELL); // loop { // let cell_node = cursor.node(); // let cell_text_block = rich_text_for_node(&mut cursor, &self.content, 1, theme); // cell_blocks.push(ContentBlock::Paragraph(cell_text_block)); // if !cursor.goto_next_sibling() { // break; // } // } // } | _ => { println!( "[WARN] formatting not implemenetd for node type {:?}", current_node.kind() ); let mut builder = RichTextContentBuilder::new(); build_rich_text_for_node(&mut cursor, &mut builder, &self.content, 0, theme, None); self.blocks.push(ContentBlock::Paragraph { decoration: None, content: builder.build(), }); } } if !cursor.goto_next_sibling() { break; } } } } impl gpui::Render for MarkdownText { fn render( &mut self, _window: &mut gpui::Window, cx: &mut gpui::prelude::Context, ) -> impl gpui::prelude::IntoElement { let theme = app::current_theme(cx); let children = self .blocks .iter() .enumerate() .map(|(i, block)| match block { | ContentBlock::Heading { font_size, font_weight, mt, mb, content, } => div() .min_w_0() .mt(gpui::Length::from(*mt)) .mb(gpui::Length::from(*mb)) .text_size(gpui::AbsoluteLength::from(*font_size)) .font_weight(*font_weight) .child(rich_text(content.clone())), | ContentBlock::Paragraph { decoration, content, } => match decoration { | None => div().min_w_0().child(rich_text(content.clone())), | Some(decoration) => div() .w_full() .flex() .flex_row() .gap_2() .items_start() .text_color(theme.colors.text) .child(decoration.clone()) .child(div().flex_1().min_w_0().child(rich_text(content.clone()))), }, | ContentBlock::Code { content } => div() .min_w_0() .w_full() .text_sm() .text_color(theme.colors.text) .line_height(relative(1.2)) .font_family("Menlo") .px_3() .py_2() .rounded_sm() .bg(theme.colors.code_bg) .border_1() .my_4() .border_color(theme.colors.code_border) .child(content.clone()), | ContentBlock::Table { row_count, col_count, cells, } => div(), | ContentBlock::Empty => div(), }); div().w_full().children(children) } }