Files
novem/src/component/markdown.rs

591 lines
22 KiB
Rust
Raw Normal View History

2026-05-11 00:32:12 +08:00
// markdown treesitter playground: https://ikatyang.github.io/tree-sitter-markdown/
2026-05-23 18:45:44 +01:00
use std::{
ops::Range,
sync::{Arc, LazyLock},
};
2026-05-11 00:32:12 +08:00
2026-05-13 00:52:13 +08:00
use gpui::{AppContext, ParentElement, Refineable, Styled, div, px, relative, rems};
2026-05-11 00:32:12 +08:00
use crate::{app, theme};
const MARKDOWN_KIND_ID_SETEXT_H1_UNDERLINE: u16 = 8;
const MARKDOWN_KIND_ID_SETEXT_H2_UNDERLINE: u16 = 9;
const MARKDOWN_KIND_ID_ATX_H1_MARKER: u16 = 11;
const MARKDOWN_KIND_ID_ATX_H2_MARKER: u16 = 12;
const MARKDOWN_KIND_ID_ATX_H3_MARKER: u16 = 13;
const MARKDOWN_KIND_ID_ATX_H4_MARKER: u16 = 14;
const MARKDOWN_KIND_ID_ATX_H5_MARKER: u16 = 15;
const MARKDOWN_KIND_ID_ATX_H6_MARKER: u16 = 16;
const MARKDOWN_KIND_ID_LIST_MARKER: u16 = 48;
const MARKDOWN_KIND_ID_BACKSLASH_ESCAPE: u16 = 56;
const MARKDOWN_KIND_ID_CHARACTER_REFERENCE: u16 = 57;
const MARKDOWN_KIND_ID_TABLE_COLUMN_ALIGNMENT: u16 = 107;
const MARKDOWN_KIND_ID_HARD_LINE_BREAK: u16 = 110;
const MARKDOWN_KIND_ID_SOFT_LINE_BREAK: u16 = 111;
const MARKDOWN_KIND_ID_HTML_TAG_NAME: u16 = 117;
const MARKDOWN_KIND_ID_VIRTUAL_SPACE: u16 = 118;
const MARKDOWN_KIND_ID_DOCUMENT: u16 = 119;
const MARKDOWN_KIND_ID_THEMATIC_BREAK: u16 = 122;
const MARKDOWN_KIND_ID_PARAGRAPH: u16 = 124;
const MARKDOWN_KIND_ID_LINK_REFERENCE_DEFINITION: u16 = 126;
const MARKDOWN_KIND_ID_SETEXT_HEADING: u16 = 129;
const MARKDOWN_KIND_ID_ATX_HEADING: u16 = 132;
const MARKDOWN_KIND_ID_INDENTED_CODE_BLOCK: u16 = 134;
const MARKDOWN_KIND_ID_FENCED_CODE_BLOCK: u16 = 136;
const MARKDOWN_KIND_ID_CODE_FENCE_CONTENT: u16 = 138;
const MARKDOWN_KIND_ID_HTML_BLOCK_SCRIPT: u16 = 140;
const MARKDOWN_KIND_ID_HTML_BLOCK_COMMENT: u16 = 142;
const MARKDOWN_KIND_ID_HTML_BLOCK_PROCESSING: u16 = 144;
const MARKDOWN_KIND_ID_HTML_BLOCK_DECLARATION: u16 = 146;
const MARKDOWN_KIND_ID_HTML_BLOCK_CDATA: u16 = 148;
const MARKDOWN_KIND_ID_HTML_BLOCK_DIV: u16 = 150;
const MARKDOWN_KIND_ID_HTML_BLOCK_CMP: u16 = 152;
const MARKDOWN_KIND_ID_BLOCK_QUOTE: u16 = 154;
const MARKDOWN_KIND_ID_TIGHT_LIST: u16 = 156;
const MARKDOWN_KIND_ID_LOOSE_LIST: u16 = 158;
const MARKDOWN_KIND_ID_LIST_ITEM_TIGHT: u16 = 160;
const MARKDOWN_KIND_ID_TASK_LIST_ITEM_TIGHT: u16 = 161;
const MARKDOWN_KIND_ID_LIST_ITEM_LOOSE: u16 = 163;
const MARKDOWN_KIND_ID_TASK_LIST_ITEM_LOOSE: u16 = 164;
const MARKDOWN_KIND_ID_PARAGRAPH_TASK_LIST: u16 = 166;
const MARKDOWN_KIND_ID_SETEXT_HEADING_TASK_LIST: u16 = 168;
const MARKDOWN_KIND_ID_HEADING_CONTENT: u16 = 169;
const MARKDOWN_KIND_ID_TABLE: u16 = 170;
const MARKDOWN_KIND_ID_TABLE_HEADER_ROW: u16 = 172;
const MARKDOWN_KIND_ID_TABLE_DELIMITER_ROW: u16 = 174;
const MARKDOWN_KIND_ID_TABLE_DATA_ROW: u16 = 176;
const MARKDOWN_KIND_ID_EMPHASIS: u16 = 181;
const MARKDOWN_KIND_ID_STRONG_EMPHASIS: u16 = 182;
const MARKDOWN_KIND_ID_STRIKETHROUGH: u16 = 183;
const MARKDOWN_KIND_ID_LINK: u16 = 184;
const MARKDOWN_KIND_ID_IMAGE: u16 = 185;
const MARKDOWN_KIND_ID_LINK_DESTINATION: u16 = 190;
const MARKDOWN_KIND_ID_LINK_TITLE: u16 = 191;
const MARKDOWN_KIND_ID_WWW_AUTOLINK: u16 = 192;
const MARKDOWN_KIND_ID_URI_AUTOLINK_EXTENDED: u16 = 194;
const MARKDOWN_KIND_ID_EMAIL_AUTOLINK_EXTENDED: u16 = 196;
const MARKDOWN_KIND_ID_URI_AUTOLINK_ANGLE: u16 = 198;
const MARKDOWN_KIND_ID_EMAIL_AUTOLINK_ANGLE: u16 = 199;
const MARKDOWN_KIND_ID_CODE_SPAN: u16 = 200;
const MARKDOWN_KIND_ID_HTML_OPEN_TAG: u16 = 201;
const MARKDOWN_KIND_ID_HTML_SELF_CLOSING_TAG: u16 = 202;
const MARKDOWN_KIND_ID_HTML_CLOSE_TAG: u16 = 204;
const MARKDOWN_KIND_ID_HTML_COMMENT: u16 = 205;
const MARKDOWN_KIND_ID_HTML_PROCESSING_INSTRUCTION: u16 = 206;
const MARKDOWN_KIND_ID_HTML_DECLARATION: u16 = 207;
const MARKDOWN_KIND_ID_HTML_CDATA_SECTION: u16 = 208;
const MARKDOWN_KIND_ID_HTML_ATTRRIBUTE: u16 = 209;
const MARKDOWN_KIND_ID_HTML_ATTRIBUTE_VALUE: u16 = 210;
const MARKDOWN_KIND_ID_TEXT: u16 = 211;
const MARKDOWN_KIND_ID_HTML_ATTRIBUTE_KEY: u16 = 228;
const MARKDOWN_KIND_ID_HTML_DECLARATION_NAME: u16 = 229;
const MARKDOWN_KIND_ID_IMAGE_DESCRIPTION: u16 = 230;
const MARKDOWN_KIND_ID_INFO_STRING: u16 = 231;
const MARKDOWN_KIND_ID_LINE_BREAK: u16 = 232;
const MARKDOWN_KIND_ID_LINK_LABEL: u16 = 233;
const MARKDOWN_KIND_ID_LINK_TEXT: u16 = 234;
const MARKDOWN_KIND_ID_TABLE_CELL: u16 = 235;
const MARKDOWN_KIND_ID_TASK_LIST_ITEM_MARKER: u16 = 236;
pub(crate) struct MarkdownText {
2026-05-23 18:45:44 +01:00
content: Arc<str>,
2026-05-11 00:32:12 +08:00
blocks: Vec<ContentBlock>,
}
enum ContentBlock {
Text {
2026-05-13 00:52:13 +08:00
decoration: Option<gpui::SharedString>,
2026-05-11 00:32:12 +08:00
text: gpui::SharedString,
highlights: Vec<(Range<usize>, gpui::HighlightStyle)>,
links: Vec<(Range<usize>, gpui::SharedString)>,
style: gpui::StyleRefinement,
},
}
2026-05-23 18:45:44 +01:00
pub(crate) fn new(content: Arc<str>, cx: &mut gpui::Context<MarkdownText>) -> MarkdownText {
2026-05-11 00:32:12 +08:00
let mut view = MarkdownText {
content,
blocks: Vec::new(),
};
view.on_create(cx);
view
}
impl Styled for ContentBlock {
fn style(&mut self) -> &mut gpui::StyleRefinement {
match self {
2026-05-23 18:45:44 +01:00
| ContentBlock::Text { style, .. } => style,
2026-05-11 00:32:12 +08:00
}
}
}
impl MarkdownText {
fn on_create(&mut self, cx: &gpui::Context<Self>) {
2026-05-23 18:45:44 +01:00
let content = Arc::clone(&self.content);
2026-05-11 00:32:12 +08:00
let t = cx.background_spawn(async move {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(tree_sitter_markdown::language())
.expect("tree-sitter-markdown language should load");
2026-05-23 18:45:44 +01:00
parser.parse(content.as_bytes(), None)
2026-05-11 00:32:12 +08:00
});
cx.spawn(async |weak, cx| {
if let Some(tree) = t.await {
_ = weak.update(cx, |this, cx| {
let theme = app::current_theme(cx);
this.render_tree(&tree, &theme);
cx.notify();
});
};
})
.detach();
}
fn on_open_link(&self, _link: &str, _cx: &gpui::Context<Self>) {}
fn render_tree(&mut self, tree: &tree_sitter::Tree, theme: &theme::Theme) {
2026-05-13 00:52:13 +08:00
static ORDERED_LIST_MARKER_REGEX: LazyLock<regex::Regex> =
LazyLock::new(|| regex::Regex::new(r"^\d+\.$").unwrap());
2026-05-11 00:32:12 +08:00
let mut cursor = tree.walk();
cursor.goto_first_child();
2026-05-13 02:30:26 +08:00
let mut is_first_heading = true;
2026-05-11 00:32:12 +08:00
fn block_for_node(
cursor: &mut tree_sitter::TreeCursor,
content: &str,
// byte_offset is the number of bytes to offset the content start byte by
byte_offset: usize,
theme: &theme::Theme,
) -> ContentBlock {
let node_start_byte = cursor.node().start_byte();
let mut highlights: Vec<(Range<usize>, gpui::HighlightStyle)> = Vec::new();
let mut links: Vec<(Range<usize>, gpui::SharedString)> = Vec::new();
cursor.goto_first_child();
loop {
let node = cursor.node();
macro_rules! node_range {
() => {
(node.start_byte() - node_start_byte - byte_offset)
..(node.end_byte() - node_start_byte - byte_offset)
};
}
match node.kind_id() {
2026-05-23 18:45:44 +01:00
| MARKDOWN_KIND_ID_EMPHASIS => {
highlights.push((
node_range!(),
gpui::HighlightStyle {
font_style: Some(gpui::FontStyle::Italic),
..Default::default()
},
));
}
| MARKDOWN_KIND_ID_STRONG_EMPHASIS => highlights.push((
node_range!(),
gpui::HighlightStyle {
font_weight: Some(gpui::FontWeight::BOLD),
..Default::default()
},
)),
| MARKDOWN_KIND_ID_LINK => {
if cursor.goto_first_child() {
2026-05-11 00:32:12 +08:00
highlights.push((
node_range!(),
gpui::HighlightStyle {
2026-05-23 18:45:44 +01:00
color: Some(theme.colors.link.into()),
underline: Some(gpui::UnderlineStyle {
color: Some(theme.colors.link.into()),
thickness: px(1.),
wavy: false,
}),
2026-05-11 00:32:12 +08:00
..Default::default()
},
));
2026-05-23 18:45:44 +01:00
if cursor.goto_next_sibling()
&& let Ok(src) = cursor.node().utf8_text(content.as_bytes())
{
links
.push((node_range!(), gpui::SharedString::from(String::from(src))));
} else {
// the link src is invalid, use an empty string as a fallback
// link on click handler will ignore empty string
links.push((node_range!(), "".into()))
2026-05-11 00:32:12 +08:00
}
}
2026-05-23 18:45:44 +01:00
}
2026-05-11 00:32:12 +08:00
2026-05-23 18:45:44 +01:00
| _ => {
// extend here to support more markdown node stylings
}
2026-05-11 00:32:12 +08:00
};
if !cursor.goto_next_sibling() {
break;
}
}
cursor.goto_parent();
ContentBlock::Text {
decoration: None,
text: gpui::SharedString::new(
&content[(node_start_byte + byte_offset)..cursor.node().end_byte()],
),
highlights: highlights,
links: links,
style: gpui::StyleRefinement::default(),
}
}
loop {
let current_node = cursor.node();
fn render_fallback_content(
cursor: &tree_sitter::TreeCursor,
content: &str,
blocks: &mut Vec<ContentBlock>,
) {
blocks.push(ContentBlock::Text {
decoration: None,
text: gpui::SharedString::new(&content[cursor.node().byte_range()]),
highlights: Vec::new(),
links: Vec::new(),
style: gpui::StyleRefinement::default(),
});
}
fn render_list_node(
cursor: &mut tree_sitter::TreeCursor,
content: &str,
blocks: &mut Vec<ContentBlock>,
theme: &theme::Theme,
indentation: usize,
) -> bool {
// expected tree shape for node pointed to by cursor:
// tight_list
// list_item
// list_marker
// paragraph
// tight_list <-- recursive point
// go to list_item node
if !cursor.goto_first_child() {
render_fallback_content(&cursor, content, blocks);
return false;
}
2026-05-13 00:52:13 +08:00
let mut list_index: Option<usize> = None;
2026-05-11 00:32:12 +08:00
loop {
if cursor.node().kind_id() != MARKDOWN_KIND_ID_LIST_ITEM_TIGHT
// if is list_item node, dive into list_marker node
|| !cursor.goto_first_child()
{
// encountered non lists item node under tight list node
// dont know what to do, so skipping this node
if !cursor.goto_next_sibling() {
return false;
}
continue;
}
let marker_node = cursor.node();
let marker_content = &content[marker_node.byte_range()];
2026-05-13 00:52:13 +08:00
let list_marker_char = match marker_content {
2026-05-23 18:45:44 +01:00
// unordered list item
| "-" | "+" | "*" => Some("".to_string()),
| marker_content if ORDERED_LIST_MARKER_REGEX.is_match(marker_content) => {
let i = list_index.get_or_insert_with(|| {
marker_content
.strip_suffix('.')
.unwrap()
.parse::<usize>()
.unwrap()
});
let j = *i;
*i = j + 1;
Some(format!("{j}."))
}
2026-05-11 00:32:12 +08:00
2026-05-23 18:45:44 +01:00
| _ => None,
2026-05-13 00:52:13 +08:00
};
let Some(list_marker_char) = list_marker_char else {
2026-05-11 00:32:12 +08:00
render_fallback_content(&cursor, content, blocks);
return false;
2026-05-13 00:52:13 +08:00
};
// go to paragraph sibling node
let block = if cursor.goto_next_sibling() {
let mut b = block_for_node(cursor, content, 0, theme);
match b {
2026-05-23 18:45:44 +01:00
| ContentBlock::Text {
ref mut decoration, ..
} => *decoration = Some(list_marker_char.into()),
2026-05-13 00:52:13 +08:00
}
b
} else {
ContentBlock::Text {
decoration: Some(list_marker_char.into()),
text: gpui::SharedString::default(),
highlights: Vec::new(),
links: Vec::new(),
style: gpui::StyleRefinement::default(),
}
2026-05-11 00:32:12 +08:00
}
2026-05-13 00:52:13 +08:00
.text_sm()
.text_color(theme.colors.text)
.p(rems(indentation as f32));
blocks.push(block);
// if there is a nested tight_light after paragraph
// render it recursively
if cursor.goto_next_sibling()
&& cursor.node().kind_id() == MARKDOWN_KIND_ID_TIGHT_LIST
{
render_list_node(cursor, content, blocks, theme, indentation + 1);
2026-05-11 00:32:12 +08:00
}
// go back to list_item node
cursor.goto_parent();
if !cursor.goto_next_sibling() {
// no more list_item in tight_list node
// go back up to tight_list node
cursor.goto_parent();
return true;
}
}
}
match current_node.kind_id() {
2026-05-23 18:45:44 +01:00
| MARKDOWN_KIND_ID_ATX_HEADING => {
if !cursor.goto_first_child() {
render_fallback_content(&cursor, &self.content, &mut self.blocks);
continue;
}
2026-05-11 00:32:12 +08:00
2026-05-23 18:45:44 +01:00
let marker_node_kind = cursor.node().kind_id();
2026-05-11 00:32:12 +08:00
2026-05-23 18:45:44 +01:00
let block = if cursor.goto_next_sibling()
&& cursor.node().kind_id() == MARKDOWN_KIND_ID_HEADING_CONTENT
{
// because HEADING_CONTENT node includes the space after the heading marker
// offset by 1 to exclude the space
block_for_node(&mut cursor, &self.content, 1, theme)
} else {
ContentBlock::Text {
decoration: None,
text: gpui::SharedString::new(&self.content[current_node.byte_range()]),
highlights: Vec::new(),
links: Vec::new(),
style: gpui::StyleRefinement::default(),
}
2026-05-23 18:45:44 +01:00
};
2026-05-13 02:30:26 +08:00
2026-05-23 18:45:44 +01:00
let mut block = match marker_node_kind {
| MARKDOWN_KIND_ID_ATX_H1_MARKER => block
.text_size(rems(2.25))
.font_weight(gpui::FontWeight::EXTRA_BOLD)
.mb_6(),
| MARKDOWN_KIND_ID_ATX_H2_MARKER => block
.text_2xl()
.font_weight(gpui::FontWeight::BOLD)
.mt_12()
.mb_4(),
| MARKDOWN_KIND_ID_ATX_H3_MARKER => block
.text_xl()
.font_weight(gpui::FontWeight::SEMIBOLD)
.mt_8()
.mb_3(),
| MARKDOWN_KIND_ID_ATX_H4_MARKER => block
.text_base()
.font_weight(gpui::FontWeight::SEMIBOLD)
.mt_6()
.mb_2(),
| _ => block,
}
.text_color(theme.colors.text);
2026-05-11 00:32:12 +08:00
2026-05-23 18:45:44 +01:00
if is_first_heading {
is_first_heading = false;
block = block.mt_0();
}
2026-05-11 00:32:12 +08:00
2026-05-23 18:45:44 +01:00
cursor.goto_parent();
2026-05-11 00:32:12 +08:00
2026-05-23 18:45:44 +01:00
self.blocks.push(block);
}
2026-05-11 00:32:12 +08:00
2026-05-23 18:45:44 +01:00
| MARKDOWN_KIND_ID_PARAGRAPH => {
let block = block_for_node(&mut cursor, &self.content, 0, theme)
.text_color(theme.colors.text)
.text_sm();
self.blocks.push(block);
}
| MARKDOWN_KIND_ID_TIGHT_LIST => {
let is_rendered =
render_list_node(&mut cursor, &self.content, &mut self.blocks, theme, 0);
if !is_rendered {
continue;
2026-05-11 00:32:12 +08:00
}
2026-05-23 18:45:44 +01:00
}
2026-05-11 00:32:12 +08:00
2026-05-23 18:45:44 +01:00
| MARKDOWN_KIND_ID_FENCED_CODE_BLOCK => {
// expected tree shape:
// fenced_code_block
// ├── info_string? (present if there is a language annotation)
// └── code_fence_content? (present if there is some content between the backticks)
2026-05-11 01:22:19 +08:00
2026-05-23 18:45:44 +01:00
if !cursor.goto_first_child() {
render_fallback_content(&cursor, &self.content, &mut self.blocks);
continue;
}
2026-05-11 01:22:19 +08:00
2026-05-23 18:45:44 +01:00
let content = if cursor.node().kind_id() == MARKDOWN_KIND_ID_INFO_STRING {
// skipping info string (which annotates the code block)
if cursor.goto_next_sibling() {
// this is code_fence_content node
2026-05-11 01:22:19 +08:00
gpui::SharedString::new(
cursor
.node()
.utf8_text(self.content.as_bytes())
.unwrap_or_default(),
)
2026-05-23 18:45:44 +01:00
} else {
gpui::SharedString::default()
}
} else {
// assuming the current node is already code_fence_content
gpui::SharedString::new(
cursor
.node()
.utf8_text(self.content.as_bytes())
.unwrap_or_default(),
)
};
2026-05-11 01:22:19 +08:00
2026-05-23 18:45:44 +01:00
cursor.goto_parent();
2026-05-11 01:22:19 +08:00
2026-05-23 18:45:44 +01:00
let block = ContentBlock::Text {
decoration: None,
text: content,
highlights: Vec::new(),
links: Vec::new(),
style: gpui::StyleRefinement::default(),
2026-05-11 01:22:19 +08:00
}
2026-05-23 18:45:44 +01:00
.text_sm()
.text_color(theme.colors.text)
.line_height(relative(1.2))
.font_family("Menlo")
.px_3()
.py_2()
.rounded_sm()
.bg(theme.colors.code_bg)
.border_1()
.my_4()
.border_color(theme.colors.code_border);
self.blocks.push(block);
}
2026-05-11 01:22:19 +08:00
2026-05-23 18:45:44 +01:00
| _ => {
println!(
"[WARN] formatting not implemenetd for node type {:?}",
current_node.kind()
);
2026-05-11 00:32:12 +08:00
2026-05-23 18:45:44 +01:00
let block = block_for_node(&mut cursor, &self.content, 0, theme)
.text_color(theme.colors.text)
.text_sm();
2026-05-11 00:32:12 +08:00
2026-05-23 18:45:44 +01:00
self.blocks.push(block);
}
2026-05-11 00:32:12 +08:00
}
if !cursor.goto_next_sibling() {
break;
}
}
}
}
impl gpui::Render for MarkdownText {
fn render(
&mut self,
_window: &mut gpui::Window,
cx: &mut gpui::prelude::Context<Self>,
) -> impl gpui::prelude::IntoElement {
let children = self.blocks.iter().enumerate().map(|(i, block)| {
match block {
2026-05-23 18:45:44 +01:00
| ContentBlock::Text {
decoration,
text,
highlights,
links,
style,
} => {
let styled_text =
gpui::StyledText::new(text.clone()).with_highlights(highlights.clone());
let content = if links.is_empty() {
div().w_full().child(styled_text)
} else {
// if link in block, interactive text is needed
// to handle link clicks
let (link_ranges, srcs): (Vec<_>, Vec<_>) = links.iter().cloned().unzip();
let weak = cx.entity();
let t = gpui::InteractiveText::new(i, styled_text).on_click(
link_ranges,
move |i, _, cx| {
if let Some(src) = srcs.get(i) {
weak.update(cx, |this, cx| {
this.on_open_link(src, cx);
cx.notify();
})
}
},
);
2026-05-11 00:32:12 +08:00
2026-05-23 18:45:44 +01:00
div().w_full().child(t)
};
2026-05-11 02:14:05 +08:00
2026-05-23 18:45:44 +01:00
let mut div = match decoration {
| Some(d) => div()
.w_full()
.flex()
.flex_row()
.gap_2()
.items_start()
.child(d.clone())
.child(div().flex_1().min_w_0().child(content)),
| None => div().w_full().child(content),
};
2026-05-11 00:32:12 +08:00
2026-05-23 18:45:44 +01:00
div.style().refine(&style);
2026-05-11 00:32:12 +08:00
2026-05-23 18:45:44 +01:00
div
}
2026-05-11 00:32:12 +08:00
}
});
2026-05-11 02:14:05 +08:00
div().w_full().children(children)
2026-05-11 00:32:12 +08:00
}
}