comrak/nodes.rs
1//! The CommonMark AST.
2
3use crate::arena_tree::Node;
4use std::cell::RefCell;
5use std::convert::TryFrom;
6
7#[cfg(feature = "shortcodes")]
8pub use crate::parser::shortcodes::NodeShortCode;
9
10pub use crate::parser::alert::{AlertType, NodeAlert};
11pub use crate::parser::math::NodeMath;
12pub use crate::parser::multiline_block_quote::NodeMultilineBlockQuote;
13
14/// The core AST node enum.
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub enum NodeValue {
17 /// The root of every CommonMark document. Contains **blocks**.
18 Document,
19
20 /// Non-Markdown front matter. Treated as an opaque blob.
21 FrontMatter(String),
22
23 /// **Block**. A [block quote](https://github.github.com/gfm/#block-quotes). Contains other
24 /// **blocks**.
25 ///
26 /// ``` md
27 /// > A block quote.
28 /// ```
29 BlockQuote,
30
31 /// **Block**. A [list](https://github.github.com/gfm/#lists). Contains
32 /// [list items](https://github.github.com/gfm/#list-items).
33 ///
34 /// ``` md
35 /// * An unordered list
36 /// * Another item
37 ///
38 /// 1. An ordered list
39 /// 2. Another item
40 /// ```
41 List(NodeList),
42
43 /// **Block**. A [list item](https://github.github.com/gfm/#list-items). Contains other
44 /// **blocks**.
45 Item(NodeList),
46
47 /// **Block**. A description list, enabled with `ext_description_lists` option. Contains
48 /// description items.
49 ///
50 /// It is required to put a blank line between terms and details.
51 ///
52 /// ``` md
53 /// Term 1
54 ///
55 /// : Details 1
56 ///
57 /// Term 2
58 ///
59 /// : Details 2
60 /// ```
61 DescriptionList,
62
63 /// *Block**. An item of a description list. Contains a term and one details block.
64 DescriptionItem(NodeDescriptionItem),
65
66 /// **Block**. Term of an item in a definition list.
67 DescriptionTerm,
68
69 /// **Block**. Details of an item in a definition list.
70 DescriptionDetails,
71
72 /// **Block**. A code block; may be [fenced](https://github.github.com/gfm/#fenced-code-blocks)
73 /// or [indented](https://github.github.com/gfm/#indented-code-blocks). Contains raw text
74 /// which is not parsed as Markdown, although is HTML escaped.
75 CodeBlock(NodeCodeBlock),
76
77 /// **Block**. A [HTML block](https://github.github.com/gfm/#html-blocks). Contains raw text
78 /// which is neither parsed as Markdown nor HTML escaped.
79 HtmlBlock(NodeHtmlBlock),
80
81 /// **Block**. A [paragraph](https://github.github.com/gfm/#paragraphs). Contains **inlines**.
82 Paragraph,
83
84 /// **Block**. A heading; may be an [ATX heading](https://github.github.com/gfm/#atx-headings)
85 /// or a [setext heading](https://github.github.com/gfm/#setext-headings). Contains
86 /// **inlines**.
87 Heading(NodeHeading),
88
89 /// **Block**. A [thematic break](https://github.github.com/gfm/#thematic-breaks). Has no
90 /// children.
91 ThematicBreak,
92
93 /// **Block**. A footnote definition. The `String` is the footnote's name.
94 /// Contains other **blocks**.
95 FootnoteDefinition(NodeFootnoteDefinition),
96
97 /// **Block**. A [table](https://github.github.com/gfm/#tables-extension-) per the GFM spec.
98 /// Contains table rows.
99 Table(NodeTable),
100
101 /// **Block**. A table row. The `bool` represents whether the row is the header row or not.
102 /// Contains table cells.
103 TableRow(bool),
104
105 /// **Block**. A table cell. Contains **inlines**.
106 TableCell,
107
108 /// **Inline**. [Textual content](https://github.github.com/gfm/#textual-content). All text
109 /// in a document will be contained in a `Text` node.
110 Text(String),
111
112 /// **Block**. [Task list item](https://github.github.com/gfm/#task-list-items-extension-).
113 /// The value is the symbol that was used in the brackets to mark a task item as checked, or
114 /// None if the item is unchecked.
115 TaskItem(Option<char>),
116
117 /// **Inline**. A [soft line break](https://github.github.com/gfm/#soft-line-breaks). If
118 /// the `hardbreaks` option is set in `Options` during formatting, it will be formatted
119 /// as a `LineBreak`.
120 SoftBreak,
121
122 /// **Inline**. A [hard line break](https://github.github.com/gfm/#hard-line-breaks).
123 LineBreak,
124
125 /// **Inline**. A [code span](https://github.github.com/gfm/#code-spans).
126 Code(NodeCode),
127
128 /// **Inline**. [Raw HTML](https://github.github.com/gfm/#raw-html) contained inline.
129 HtmlInline(String),
130
131 /// **Block/Inline**. A Raw output node. This will be inserted verbatim into CommonMark and
132 /// HTML output. It can only be created programmatically, and is never parsed from input.
133 Raw(String),
134
135 /// **Inline**. [Emphasized](https://github.github.com/gfm/#emphasis-and-strong-emphasis)
136 /// text.
137 Emph,
138
139 /// **Inline**. [Strong](https://github.github.com/gfm/#emphasis-and-strong-emphasis) text.
140 Strong,
141
142 /// **Inline**. [Strikethrough](https://github.github.com/gfm/#strikethrough-extension-) text
143 /// per the GFM spec.
144 Strikethrough,
145
146 /// **Inline**. Superscript. Enabled with `ext_superscript` option.
147 Superscript,
148
149 /// **Inline**. A [link](https://github.github.com/gfm/#links) to some URL, with possible
150 /// title.
151 Link(NodeLink),
152
153 /// **Inline**. An [image](https://github.github.com/gfm/#images).
154 Image(NodeLink),
155
156 /// **Inline**. A footnote reference.
157 FootnoteReference(NodeFootnoteReference),
158
159 #[cfg(feature = "shortcodes")]
160 /// **Inline**. An Emoji character generated from a shortcode. Enable with feature "shortcodes".
161 ShortCode(NodeShortCode),
162
163 /// **Inline**. A math span. Contains raw text which is not parsed as Markdown.
164 /// Dollar math or code math
165 ///
166 /// Inline math $1 + 2$ and $`1 + 2`$
167 ///
168 /// Display math $$1 + 2$$ and
169 /// $$
170 /// 1 + 2
171 /// $$
172 ///
173 Math(NodeMath),
174
175 /// **Block**. A [multiline block quote](https://github.github.com/gfm/#block-quotes). Spans multiple
176 /// lines and contains other **blocks**.
177 ///
178 /// ``` md
179 /// >>>
180 /// A paragraph.
181 ///
182 /// - item one
183 /// - item two
184 /// >>>
185 /// ```
186 MultilineBlockQuote(NodeMultilineBlockQuote),
187
188 /// **Inline**. A character that has been [escaped](https://github.github.com/gfm/#backslash-escapes)
189 ///
190 /// Enabled with [`escaped_char_spans`](crate::RenderOptionsBuilder::escaped_char_spans).
191 Escaped,
192
193 /// **Inline**. A wikilink to some URL.
194 WikiLink(NodeWikiLink),
195
196 /// **Inline**. Underline. Enabled with `underline` option.
197 Underline,
198
199 /// **Inline**. Subscript. Enabled with `subscript` options.
200 Subscript,
201
202 /// **Inline**. Spoilered text. Enabled with `spoiler` option.
203 SpoileredText,
204
205 /// **Inline**. Text surrounded by escaped markup. Enabled with `spoiler` option.
206 /// The `String` is the tag to be escaped.
207 EscapedTag(String),
208
209 /// **Block**. GitHub style alert boxes which uses a modified blockquote syntax.
210 /// Enabled with the `alerts` option.
211 Alert(NodeAlert),
212}
213
214/// Alignment of a single table cell.
215#[derive(Debug, Copy, Clone, PartialEq, Eq)]
216pub enum TableAlignment {
217 /// Cell content is unaligned.
218 None,
219
220 /// Cell content is aligned left.
221 Left,
222
223 /// Cell content is centered.
224 Center,
225
226 /// Cell content is aligned right.
227 Right,
228}
229
230impl TableAlignment {
231 pub(crate) fn xml_name(&self) -> Option<&'static str> {
232 match *self {
233 TableAlignment::None => None,
234 TableAlignment::Left => Some("left"),
235 TableAlignment::Center => Some("center"),
236 TableAlignment::Right => Some("right"),
237 }
238 }
239}
240
241/// The metadata of a table
242#[derive(Debug, Default, Clone, PartialEq, Eq)]
243pub struct NodeTable {
244 /// The table alignments
245 pub alignments: Vec<TableAlignment>,
246
247 /// Number of columns of the table
248 pub num_columns: usize,
249
250 /// Number of rows of the table
251 pub num_rows: usize,
252
253 /// Number of non-empty, non-autocompleted cells
254 pub num_nonempty_cells: usize,
255}
256
257/// An inline [code span](https://github.github.com/gfm/#code-spans).
258#[derive(Debug, Clone, PartialEq, Eq)]
259pub struct NodeCode {
260 /// The number of backticks
261 pub num_backticks: usize,
262
263 /// The content of the inline code span.
264 /// As the contents are not interpreted as Markdown at all,
265 /// they are contained within this structure,
266 /// rather than inserted into a child inline of any kind.
267 pub literal: String,
268}
269
270/// The details of a link's destination, or an image's source.
271#[derive(Debug, Clone, PartialEq, Eq)]
272pub struct NodeLink {
273 /// The URL for the link destination or image source.
274 pub url: String,
275
276 /// The title for the link or image.
277 ///
278 /// Note this field is used for the `title` attribute by the HTML formatter even for images;
279 /// `alt` text is supplied in the image inline text.
280 pub title: String,
281}
282
283/// The details of a wikilink's destination.
284#[derive(Debug, Clone, PartialEq, Eq)]
285pub struct NodeWikiLink {
286 /// The URL for the link destination.
287 pub url: String,
288}
289
290/// The metadata of a list; the kind of list, the delimiter used and so on.
291#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
292pub struct NodeList {
293 /// The kind of list (bullet (unordered) or ordered).
294 pub list_type: ListType,
295
296 /// Number of spaces before the list marker.
297 pub marker_offset: usize,
298
299 /// Number of characters between the start of the list marker and the item text (including the list marker(s)).
300 pub padding: usize,
301
302 /// For ordered lists, the ordinal the list starts at.
303 pub start: usize,
304
305 /// For ordered lists, the delimiter after each number.
306 pub delimiter: ListDelimType,
307
308 /// For bullet lists, the character used for each bullet.
309 pub bullet_char: u8,
310
311 /// Whether the list is [tight](https://github.github.com/gfm/#tight), i.e. whether the
312 /// paragraphs are wrapped in `<p>` tags when formatted as HTML.
313 pub tight: bool,
314
315 /// Whether the list contains tasks (checkbox items)
316 pub is_task_list: bool,
317}
318
319/// The metadata of a description list
320#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
321pub struct NodeDescriptionItem {
322 /// Number of spaces before the list marker.
323 pub marker_offset: usize,
324
325 /// Number of characters between the start of the list marker and the item text (including the list marker(s)).
326 pub padding: usize,
327
328 /// Whether the list is [tight](https://github.github.com/gfm/#tight), i.e. whether the
329 /// paragraphs are wrapped in `<p>` tags when formatted as HTML.
330 pub tight: bool,
331}
332
333/// The type of list.
334#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
335pub enum ListType {
336 /// A bullet list, i.e. an unordered list.
337 #[default]
338 Bullet,
339
340 /// An ordered list.
341 Ordered,
342}
343
344/// The delimiter for ordered lists, i.e. the character which appears after each number.
345#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
346pub enum ListDelimType {
347 /// A period character `.`.
348 #[default]
349 Period,
350
351 /// A paren character `)`.
352 Paren,
353}
354
355impl ListDelimType {
356 pub(crate) fn xml_name(&self) -> &'static str {
357 match *self {
358 ListDelimType::Period => "period",
359 ListDelimType::Paren => "paren",
360 }
361 }
362}
363
364/// The metadata and data of a code block (fenced or indented).
365#[derive(Default, Debug, Clone, PartialEq, Eq)]
366pub struct NodeCodeBlock {
367 /// Whether the code block is fenced.
368 pub fenced: bool,
369
370 /// For fenced code blocks, the fence character itself (`` ` `` or `~`).
371 pub fence_char: u8,
372
373 /// For fenced code blocks, the length of the fence.
374 pub fence_length: usize,
375
376 /// For fenced code blocks, the indentation level of the code within the block.
377 pub fence_offset: usize,
378
379 /// For fenced code blocks, the [info string](https://github.github.com/gfm/#info-string) after
380 /// the opening fence, if any.
381 pub info: String,
382
383 /// The literal contents of the code block. As the contents are not interpreted as Markdown at
384 /// all, they are contained within this structure, rather than inserted into a child inline of
385 /// any kind.
386 pub literal: String,
387}
388
389/// The metadata of a heading.
390#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
391pub struct NodeHeading {
392 /// The level of the header; from 1 to 6 for ATX headings, 1 or 2 for setext headings.
393 pub level: u8,
394
395 /// Whether the heading is setext (if not, ATX).
396 pub setext: bool,
397}
398
399/// The metadata of an included HTML block.
400#[derive(Debug, Default, Clone, PartialEq, Eq)]
401pub struct NodeHtmlBlock {
402 /// The HTML block's type
403 pub block_type: u8,
404
405 /// The literal contents of the HTML block. Per NodeCodeBlock, the content is included here
406 /// rather than in any inline.
407 pub literal: String,
408}
409
410/// The metadata of a footnote definition.
411#[derive(Debug, Default, Clone, PartialEq, Eq)]
412pub struct NodeFootnoteDefinition {
413 /// The name of the footnote.
414 pub name: String,
415
416 /// Total number of references to this footnote
417 pub total_references: u32,
418}
419
420/// The metadata of a footnote reference.
421#[derive(Debug, Default, Clone, PartialEq, Eq)]
422pub struct NodeFootnoteReference {
423 /// The name of the footnote.
424 pub name: String,
425
426 /// The index of reference to the same footnote
427 pub ref_num: u32,
428
429 /// The index of the footnote in the document.
430 pub ix: u32,
431}
432
433impl NodeValue {
434 /// Indicates whether this node is a block node or inline node.
435 pub fn block(&self) -> bool {
436 matches!(
437 *self,
438 NodeValue::Document
439 | NodeValue::BlockQuote
440 | NodeValue::FootnoteDefinition(_)
441 | NodeValue::List(..)
442 | NodeValue::DescriptionList
443 | NodeValue::DescriptionItem(_)
444 | NodeValue::DescriptionTerm
445 | NodeValue::DescriptionDetails
446 | NodeValue::Item(..)
447 | NodeValue::CodeBlock(..)
448 | NodeValue::HtmlBlock(..)
449 | NodeValue::Paragraph
450 | NodeValue::Heading(..)
451 | NodeValue::ThematicBreak
452 | NodeValue::Table(..)
453 | NodeValue::TableRow(..)
454 | NodeValue::TableCell
455 | NodeValue::TaskItem(..)
456 | NodeValue::MultilineBlockQuote(_)
457 | NodeValue::Alert(_)
458 )
459 }
460
461 /// Whether the type the node is of can contain inline nodes.
462 pub fn contains_inlines(&self) -> bool {
463 matches!(
464 *self,
465 NodeValue::Paragraph | NodeValue::Heading(..) | NodeValue::TableCell
466 )
467 }
468
469 /// Return a reference to the text of a `Text` inline, if this node is one.
470 ///
471 /// Convenience method.
472 pub fn text(&self) -> Option<&String> {
473 match *self {
474 NodeValue::Text(ref t) => Some(t),
475 _ => None,
476 }
477 }
478
479 /// Return a mutable reference to the text of a `Text` inline, if this node is one.
480 ///
481 /// Convenience method.
482 pub fn text_mut(&mut self) -> Option<&mut String> {
483 match *self {
484 NodeValue::Text(ref mut t) => Some(t),
485 _ => None,
486 }
487 }
488
489 pub(crate) fn accepts_lines(&self) -> bool {
490 matches!(
491 *self,
492 NodeValue::Paragraph | NodeValue::Heading(..) | NodeValue::CodeBlock(..)
493 )
494 }
495
496 pub(crate) fn xml_node_name(&self) -> &'static str {
497 match *self {
498 NodeValue::Document => "document",
499 NodeValue::BlockQuote => "block_quote",
500 NodeValue::FootnoteDefinition(_) => "footnote_definition",
501 NodeValue::List(..) => "list",
502 NodeValue::DescriptionList => "description_list",
503 NodeValue::DescriptionItem(_) => "description_item",
504 NodeValue::DescriptionTerm => "description_term",
505 NodeValue::DescriptionDetails => "description_details",
506 NodeValue::Item(..) => "item",
507 NodeValue::CodeBlock(..) => "code_block",
508 NodeValue::HtmlBlock(..) => "html_block",
509 NodeValue::Paragraph => "paragraph",
510 NodeValue::Heading(..) => "heading",
511 NodeValue::ThematicBreak => "thematic_break",
512 NodeValue::Table(..) => "table",
513 NodeValue::TableRow(..) => "table_row",
514 NodeValue::TableCell => "table_cell",
515 NodeValue::Text(..) => "text",
516 NodeValue::SoftBreak => "softbreak",
517 NodeValue::LineBreak => "linebreak",
518 NodeValue::Image(..) => "image",
519 NodeValue::Link(..) => "link",
520 NodeValue::Emph => "emph",
521 NodeValue::Strong => "strong",
522 NodeValue::Code(..) => "code",
523 NodeValue::HtmlInline(..) => "html_inline",
524 NodeValue::Raw(..) => "raw",
525 NodeValue::Strikethrough => "strikethrough",
526 NodeValue::FrontMatter(_) => "frontmatter",
527 NodeValue::TaskItem { .. } => "taskitem",
528 NodeValue::Superscript => "superscript",
529 NodeValue::FootnoteReference(..) => "footnote_reference",
530 #[cfg(feature = "shortcodes")]
531 NodeValue::ShortCode(_) => "shortcode",
532 NodeValue::MultilineBlockQuote(_) => "multiline_block_quote",
533 NodeValue::Escaped => "escaped",
534 NodeValue::Math(..) => "math",
535 NodeValue::WikiLink(..) => "wikilink",
536 NodeValue::Underline => "underline",
537 NodeValue::Subscript => "subscript",
538 NodeValue::SpoileredText => "spoiler",
539 NodeValue::EscapedTag(_) => "escaped_tag",
540 NodeValue::Alert(_) => "alert",
541 }
542 }
543}
544
545/// A single node in the CommonMark AST.
546///
547/// The struct contains metadata about the node's position in the original document, and the core
548/// enum, `NodeValue`.
549#[derive(Debug, Clone, PartialEq, Eq)]
550pub struct Ast {
551 /// The node value itself.
552 pub value: NodeValue,
553
554 /// The positions in the source document this node comes from.
555 pub sourcepos: Sourcepos,
556 pub(crate) internal_offset: usize,
557
558 pub(crate) content: String,
559 pub(crate) open: bool,
560 pub(crate) last_line_blank: bool,
561 pub(crate) table_visited: bool,
562 pub(crate) line_offsets: Vec<usize>,
563}
564
565/// Represents the position in the source Markdown this node was rendered from.
566#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
567pub struct Sourcepos {
568 /// The line and column of the first character of this node.
569 pub start: LineColumn,
570 /// The line and column of the last character of this node.
571 pub end: LineColumn,
572}
573
574impl std::fmt::Display for Sourcepos {
575 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
576 write!(
577 f,
578 "{}:{}-{}:{}",
579 self.start.line, self.start.column, self.end.line, self.end.column,
580 )
581 }
582}
583
584impl From<(usize, usize, usize, usize)> for Sourcepos {
585 fn from(sp: (usize, usize, usize, usize)) -> Sourcepos {
586 Sourcepos {
587 start: LineColumn {
588 line: sp.0,
589 column: sp.1,
590 },
591 end: LineColumn {
592 line: sp.2,
593 column: sp.3,
594 },
595 }
596 }
597}
598
599/// Represents the 1-based line and column positions of a given character.
600#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
601pub struct LineColumn {
602 /// The 1-based line number of the character.
603 pub line: usize,
604 /// The 1-based column number of the character.
605 pub column: usize,
606}
607
608impl From<(usize, usize)> for LineColumn {
609 fn from(lc: (usize, usize)) -> LineColumn {
610 LineColumn {
611 line: lc.0,
612 column: lc.1,
613 }
614 }
615}
616
617impl LineColumn {
618 /// Return a new LineColumn based on this one, with the column adjusted by offset.
619 pub fn column_add(&self, offset: isize) -> LineColumn {
620 LineColumn {
621 line: self.line,
622 column: usize::try_from((self.column as isize) + offset).unwrap(),
623 }
624 }
625}
626
627impl Ast {
628 /// Create a new AST node with the given value.
629 pub fn new(value: NodeValue, start: LineColumn) -> Self {
630 Ast {
631 value,
632 content: String::new(),
633 sourcepos: (start.line, start.column, start.line, 0).into(),
634 internal_offset: 0,
635 open: true,
636 last_line_blank: false,
637 table_visited: false,
638 line_offsets: Vec::with_capacity(0),
639 }
640 }
641}
642
643/// The type of a node within the document.
644///
645/// It is bound by the lifetime `'a`, which corresponds to the `Arena` nodes are
646/// allocated in. Child `Ast`s are wrapped in `RefCell` for interior mutability.
647///
648/// You can construct a new `AstNode` from a `NodeValue` using the `From` trait:
649///
650/// ```no_run
651/// # use comrak::nodes::{AstNode, NodeValue};
652/// let root = AstNode::from(NodeValue::Document);
653/// ```
654///
655/// Note that no sourcepos information is given to the created node. If you wish
656/// to assign sourcepos information, use the `From` trait to create an `AstNode`
657/// from an `Ast`:
658///
659/// ```no_run
660/// # use comrak::nodes::{Ast, AstNode, NodeValue};
661/// let root = AstNode::from(Ast::new(
662/// NodeValue::Paragraph,
663/// (4, 1).into(), // start_line, start_col
664/// ));
665/// ```
666///
667/// Adjust the `end` position manually.
668///
669/// For practical use, you'll probably need it allocated in an `Arena`, in which
670/// case you can use `.into()` to simplify creation:
671///
672/// ```no_run
673/// # use comrak::{nodes::{AstNode, NodeValue}, Arena};
674/// # let arena = Arena::<AstNode>::new();
675/// let node_in_arena = arena.alloc(NodeValue::Document.into());
676/// ```
677pub type AstNode<'a> = Node<'a, RefCell<Ast>>;
678
679impl<'a> From<NodeValue> for AstNode<'a> {
680 /// Create a new AST node with the given value. The sourcepos is set to (0,0)-(0,0).
681 fn from(value: NodeValue) -> Self {
682 Node::new(RefCell::new(Ast::new(value, LineColumn::default())))
683 }
684}
685
686impl<'a> From<Ast> for AstNode<'a> {
687 /// Create a new AST node with the given Ast.
688 fn from(ast: Ast) -> Self {
689 Node::new(RefCell::new(ast))
690 }
691}
692
693/// Validation errors produced by [Node::validate].
694#[derive(Debug, Clone)]
695pub enum ValidationError<'a> {
696 /// The type of a child node is not allowed in the parent node. This can happen when an inline
697 /// node is found in a block container, a block is found in an inline node, etc.
698 InvalidChildType {
699 /// The parent node.
700 parent: &'a AstNode<'a>,
701 /// The child node.
702 child: &'a AstNode<'a>,
703 },
704}
705
706impl<'a> Node<'a, RefCell<Ast>> {
707 /// The comrak representation of a markdown node in Rust isn't strict enough to rule out
708 /// invalid trees according to the CommonMark specification. One simple example is that block
709 /// containers, such as lists, should only contain blocks, but it's possible to put naked
710 /// inline text in a list item. Such invalid trees can lead comrak to generate incorrect output
711 /// if rendered.
712 ///
713 /// This method performs additional structural checks to ensure that a markdown AST is valid
714 /// according to the CommonMark specification.
715 ///
716 /// Note that those invalid trees can only be generated programmatically. Parsing markdown with
717 /// comrak, on the other hand, should always produce a valid tree.
718 pub fn validate(&'a self) -> Result<(), ValidationError<'a>> {
719 let mut stack = vec![self];
720
721 while let Some(node) = stack.pop() {
722 // Check that this node type is valid wrt to the type of its parent.
723 if let Some(parent) = node.parent() {
724 if !can_contain_type(parent, &node.data.borrow().value) {
725 return Err(ValidationError::InvalidChildType {
726 parent,
727 child: node,
728 });
729 }
730 }
731
732 stack.extend(node.children());
733 }
734
735 Ok(())
736 }
737}
738
739pub(crate) fn last_child_is_open<'a>(node: &'a AstNode<'a>) -> bool {
740 node.last_child().map_or(false, |n| n.data.borrow().open)
741}
742
743/// Returns true if the given node can contain a node with the given value.
744pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool {
745 match *child {
746 NodeValue::Document => {
747 return false;
748 }
749 NodeValue::FrontMatter(_) => {
750 return matches!(node.data.borrow().value, NodeValue::Document);
751 }
752 _ => {}
753 }
754
755 match node.data.borrow().value {
756 NodeValue::Document
757 | NodeValue::BlockQuote
758 | NodeValue::FootnoteDefinition(_)
759 | NodeValue::DescriptionTerm
760 | NodeValue::DescriptionDetails
761 | NodeValue::Item(..)
762 | NodeValue::TaskItem(..) => {
763 child.block() && !matches!(*child, NodeValue::Item(..) | NodeValue::TaskItem(..))
764 }
765
766 NodeValue::List(..) => matches!(*child, NodeValue::Item(..) | NodeValue::TaskItem(..)),
767
768 NodeValue::DescriptionList => matches!(*child, NodeValue::DescriptionItem(_)),
769
770 NodeValue::DescriptionItem(_) => matches!(
771 *child,
772 NodeValue::DescriptionTerm | NodeValue::DescriptionDetails
773 ),
774
775 #[cfg(feature = "shortcodes")]
776 NodeValue::ShortCode(..) => !child.block(),
777
778 NodeValue::Paragraph
779 | NodeValue::Heading(..)
780 | NodeValue::Emph
781 | NodeValue::Strong
782 | NodeValue::Link(..)
783 | NodeValue::Image(..)
784 | NodeValue::WikiLink(..)
785 | NodeValue::Strikethrough
786 | NodeValue::Superscript
787 | NodeValue::SpoileredText
788 | NodeValue::Underline
789 | NodeValue::Subscript
790 // XXX: this is quite a hack: the EscapedTag _contains_ whatever was
791 // possibly going to fall into the spoiler. This should be fixed in
792 // inlines.
793 | NodeValue::EscapedTag(_)
794 => !child.block(),
795
796 NodeValue::Table(..) => matches!(*child, NodeValue::TableRow(..)),
797
798 NodeValue::TableRow(..) => matches!(*child, NodeValue::TableCell),
799
800 #[cfg(not(feature = "shortcodes"))]
801 NodeValue::TableCell => matches!(
802 *child,
803 NodeValue::Text(..)
804 | NodeValue::Code(..)
805 | NodeValue::Emph
806 | NodeValue::Strong
807 | NodeValue::Link(..)
808 | NodeValue::Image(..)
809 | NodeValue::Strikethrough
810 | NodeValue::HtmlInline(..)
811 | NodeValue::Math(..)
812 | NodeValue::WikiLink(..)
813 | NodeValue::FootnoteReference(..)
814 | NodeValue::Superscript
815 | NodeValue::SpoileredText
816 | NodeValue::Underline
817 | NodeValue::Subscript
818 ),
819
820 #[cfg(feature = "shortcodes")]
821 NodeValue::TableCell => matches!(
822 *child,
823 NodeValue::Text(..)
824 | NodeValue::Code(..)
825 | NodeValue::Emph
826 | NodeValue::Strong
827 | NodeValue::Link(..)
828 | NodeValue::Image(..)
829 | NodeValue::Strikethrough
830 | NodeValue::HtmlInline(..)
831 | NodeValue::Math(..)
832 | NodeValue::WikiLink(..)
833 | NodeValue::FootnoteReference(..)
834 | NodeValue::Superscript
835 | NodeValue::SpoileredText
836 | NodeValue::Underline
837 | NodeValue::Subscript
838 | NodeValue::ShortCode(..)
839 ),
840
841 NodeValue::MultilineBlockQuote(_) => {
842 child.block() && !matches!(*child, NodeValue::Item(..) | NodeValue::TaskItem(..))
843 }
844
845 NodeValue::Alert(_) => {
846 child.block() && !matches!(*child, NodeValue::Item(..) | NodeValue::TaskItem(..))
847 }
848 _ => false,
849 }
850}
851
852pub(crate) fn ends_with_blank_line<'a>(node: &'a AstNode<'a>) -> bool {
853 let mut it = Some(node);
854 while let Some(cur) = it {
855 if cur.data.borrow().last_line_blank {
856 return true;
857 }
858 match cur.data.borrow().value {
859 NodeValue::List(..) | NodeValue::Item(..) | NodeValue::TaskItem(..) => {
860 it = cur.last_child()
861 }
862 _ => it = None,
863 };
864 }
865 false
866}
867
868pub(crate) fn containing_block<'a>(node: &'a AstNode<'a>) -> Option<&'a AstNode<'a>> {
869 let mut ch = Some(node);
870 while let Some(n) = ch {
871 if n.data.borrow().value.block() {
872 return Some(n);
873 }
874 ch = n.parent();
875 }
876 None
877}