comrak/
xml.rs

1use crate::character_set::character_set;
2use crate::nodes::{AstNode, ListType, NodeCode, NodeMath, NodeTable, NodeValue};
3use crate::parser::{Options, Plugins};
4use std::cmp;
5use std::io::{self, Write};
6
7use crate::nodes::NodeHtmlBlock;
8
9const MAX_INDENT: u32 = 40;
10
11/// Formats an AST as HTML, modified by the given options.
12pub fn format_document<'a>(
13    root: &'a AstNode<'a>,
14    options: &Options,
15    output: &mut dyn Write,
16) -> io::Result<()> {
17    format_document_with_plugins(root, options, output, &Plugins::default())
18}
19
20/// Formats an AST as HTML, modified by the given options. Accepts custom plugins.
21pub fn format_document_with_plugins<'a>(
22    root: &'a AstNode<'a>,
23    options: &Options,
24    output: &mut dyn Write,
25    plugins: &Plugins,
26) -> io::Result<()> {
27    output.write_all(b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")?;
28    output.write_all(b"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n")?;
29
30    XmlFormatter::new(options, output, plugins).format(root, false)
31}
32
33struct XmlFormatter<'o, 'c> {
34    output: &'o mut dyn Write,
35    options: &'o Options<'c>,
36    _plugins: &'o Plugins<'o>,
37    indent: u32,
38}
39
40impl<'o, 'c> XmlFormatter<'o, 'c> {
41    fn new(options: &'o Options<'c>, output: &'o mut dyn Write, plugins: &'o Plugins) -> Self {
42        XmlFormatter {
43            options,
44            output,
45            _plugins: plugins,
46            indent: 0,
47        }
48    }
49
50    fn escape(&mut self, buffer: &[u8]) -> io::Result<()> {
51        const XML_UNSAFE: [bool; 256] = character_set!(b"&<>\"");
52
53        let mut offset = 0;
54        for (i, &byte) in buffer.iter().enumerate() {
55            if XML_UNSAFE[byte as usize] {
56                let esc: &[u8] = match byte {
57                    b'"' => b"&quot;",
58                    b'&' => b"&amp;",
59                    b'<' => b"&lt;",
60                    b'>' => b"&gt;",
61                    _ => unreachable!(),
62                };
63                self.output.write_all(&buffer[offset..i])?;
64                self.output.write_all(esc)?;
65                offset = i + 1;
66            }
67        }
68        self.output.write_all(&buffer[offset..])?;
69        Ok(())
70    }
71
72    fn format<'a>(&mut self, node: &'a AstNode<'a>, plain: bool) -> io::Result<()> {
73        // Traverse the AST iteratively using a work stack, with pre- and
74        // post-child-traversal phases. During pre-order traversal render the
75        // opening tags, then push the node back onto the stack for the
76        // post-order traversal phase, then push the children in reverse order
77        // onto the stack and begin rendering first child.
78
79        enum Phase {
80            Pre,
81            Post,
82        }
83        let mut stack = vec![(node, plain, Phase::Pre)];
84
85        while let Some((node, plain, phase)) = stack.pop() {
86            match phase {
87                Phase::Pre => {
88                    let new_plain = if plain {
89                        match node.data.borrow().value {
90                            NodeValue::Text(ref literal)
91                            | NodeValue::Code(NodeCode { ref literal, .. })
92                            | NodeValue::HtmlInline(ref literal)
93                            | NodeValue::Raw(ref literal) => {
94                                self.escape(literal.as_bytes())?;
95                            }
96                            NodeValue::LineBreak | NodeValue::SoftBreak => {
97                                self.output.write_all(b" ")?;
98                            }
99                            NodeValue::Math(NodeMath { ref literal, .. }) => {
100                                self.escape(literal.as_bytes())?;
101                            }
102                            _ => (),
103                        }
104                        plain
105                    } else {
106                        stack.push((node, false, Phase::Post));
107                        self.format_node(node, true)?
108                    };
109
110                    for ch in node.reverse_children() {
111                        stack.push((ch, new_plain, Phase::Pre));
112                    }
113                }
114                Phase::Post => {
115                    debug_assert!(!plain);
116                    self.format_node(node, false)?;
117                }
118            }
119        }
120
121        Ok(())
122    }
123
124    fn indent(&mut self) -> io::Result<()> {
125        for _ in 0..(cmp::min(self.indent, MAX_INDENT)) {
126            self.output.write_all(b" ")?;
127        }
128        Ok(())
129    }
130
131    fn format_node<'a>(&mut self, node: &'a AstNode<'a>, entering: bool) -> io::Result<bool> {
132        if entering {
133            self.indent()?;
134
135            let ast = node.data.borrow();
136
137            write!(self.output, "<{}", ast.value.xml_node_name())?;
138
139            if self.options.render.sourcepos && ast.sourcepos.start.line != 0 {
140                write!(self.output, " sourcepos=\"{}\"", ast.sourcepos)?;
141            }
142
143            let mut was_literal = false;
144
145            match ast.value {
146                NodeValue::Document => self
147                    .output
148                    .write_all(b" xmlns=\"http://commonmark.org/xml/1.0\"")?,
149                NodeValue::Text(ref literal)
150                | NodeValue::Code(NodeCode { ref literal, .. })
151                | NodeValue::HtmlBlock(NodeHtmlBlock { ref literal, .. })
152                | NodeValue::HtmlInline(ref literal)
153                | NodeValue::Raw(ref literal) => {
154                    self.output.write_all(b" xml:space=\"preserve\">")?;
155                    self.escape(literal.as_bytes())?;
156                    write!(self.output, "</{}", ast.value.xml_node_name())?;
157                    was_literal = true;
158                }
159                NodeValue::List(ref nl) => {
160                    match nl.list_type {
161                        ListType::Bullet => {
162                            self.output.write_all(b" type=\"bullet\"")?;
163                        }
164                        ListType::Ordered => {
165                            write!(
166                                self.output,
167                                " type=\"ordered\" start=\"{}\" delim=\"{}\"",
168                                nl.start,
169                                nl.delimiter.xml_name()
170                            )?;
171                        }
172                    }
173                    if nl.is_task_list {
174                        self.output.write_all(b" tasklist=\"true\"")?;
175                    }
176                    write!(self.output, " tight=\"{}\"", nl.tight)?;
177                }
178                NodeValue::FrontMatter(_) => (),
179                NodeValue::BlockQuote => {}
180                NodeValue::MultilineBlockQuote(..) => {}
181                NodeValue::Item(..) => {}
182                NodeValue::DescriptionList => {}
183                NodeValue::DescriptionItem(..) => (),
184                NodeValue::DescriptionTerm => {}
185                NodeValue::DescriptionDetails => {}
186                NodeValue::Heading(ref nch) => {
187                    write!(self.output, " level=\"{}\"", nch.level)?;
188                }
189                NodeValue::CodeBlock(ref ncb) => {
190                    if !ncb.info.is_empty() {
191                        self.output.write_all(b" info=\"")?;
192                        self.output.write_all(ncb.info.as_bytes())?;
193                        self.output.write_all(b"\"")?;
194
195                        if ncb.info.eq("math") {
196                            self.output.write_all(b" math_style=\"display\"")?;
197                        }
198                    }
199                    self.output.write_all(b" xml:space=\"preserve\">")?;
200                    self.escape(ncb.literal.as_bytes())?;
201                    write!(self.output, "</{}", ast.value.xml_node_name())?;
202                    was_literal = true;
203                }
204                NodeValue::ThematicBreak => {}
205                NodeValue::Paragraph => {}
206                NodeValue::LineBreak => {}
207                NodeValue::SoftBreak => {}
208                NodeValue::Strong => {}
209                NodeValue::Emph => {}
210                NodeValue::Strikethrough => {}
211                NodeValue::Superscript => {}
212                NodeValue::Link(ref nl) | NodeValue::Image(ref nl) => {
213                    self.output.write_all(b" destination=\"")?;
214                    self.escape(nl.url.as_bytes())?;
215                    self.output.write_all(b"\" title=\"")?;
216                    self.escape(nl.title.as_bytes())?;
217                    self.output.write_all(b"\"")?;
218                }
219                NodeValue::Table(..) => {
220                    // noop
221                }
222                NodeValue::TableRow(..) => {
223                    // noop
224                }
225                NodeValue::TableCell => {
226                    let mut ancestors = node.ancestors().skip(1);
227
228                    let header_row = &ancestors.next().unwrap().data.borrow().value;
229                    let table = &ancestors.next().unwrap().data.borrow().value;
230
231                    if let (
232                        NodeValue::TableRow(true),
233                        NodeValue::Table(NodeTable { alignments, .. }),
234                    ) = (header_row, table)
235                    {
236                        let ix = node.preceding_siblings().count() - 1;
237                        if let Some(xml_align) = alignments[ix].xml_name() {
238                            write!(self.output, " align=\"{}\"", xml_align)?;
239                        }
240                    }
241                }
242                NodeValue::FootnoteDefinition(ref fd) => {
243                    self.output.write_all(b" label=\"")?;
244                    self.escape(fd.name.as_bytes())?;
245                    self.output.write_all(b"\"")?;
246                }
247                NodeValue::FootnoteReference(ref nfr) => {
248                    self.output.write_all(b" label=\"")?;
249                    self.escape(nfr.name.as_bytes())?;
250                    self.output.write_all(b"\"")?;
251                }
252                NodeValue::TaskItem(Some(_)) => {
253                    self.output.write_all(b" completed=\"true\"")?;
254                }
255                NodeValue::TaskItem(None) => {
256                    self.output.write_all(b" completed=\"false\"")?;
257                }
258                #[cfg(feature = "shortcodes")]
259                NodeValue::ShortCode(ref nsc) => {
260                    self.output.write_all(b" id=\"")?;
261                    self.escape(nsc.code.as_bytes())?;
262                    self.output.write_all(b"\"")?;
263                }
264                NodeValue::Escaped => {
265                    // noop
266                }
267                NodeValue::Math(ref math, ..) => {
268                    if math.display_math {
269                        self.output.write_all(b" math_style=\"display\"")?;
270                    } else {
271                        self.output.write_all(b" math_style=\"inline\"")?;
272                    }
273                    self.output.write_all(b" xml:space=\"preserve\">")?;
274                    self.escape(math.literal.as_bytes())?;
275                    write!(self.output, "</{}", ast.value.xml_node_name())?;
276                    was_literal = true;
277                }
278                NodeValue::WikiLink(ref nl) => {
279                    self.output.write_all(b" destination=\"")?;
280                    self.escape(nl.url.as_bytes())?;
281                    self.output.write_all(b"\"")?;
282                }
283                NodeValue::Underline => {}
284                NodeValue::Subscript => {}
285                NodeValue::SpoileredText => {}
286                NodeValue::EscapedTag(ref data) => {
287                    self.output.write_all(data.as_bytes())?;
288                }
289                NodeValue::Alert(ref alert) => {
290                    self.output.write_all(b" type=\"")?;
291                    self.output
292                        .write_all(alert.alert_type.default_title().to_lowercase().as_bytes())?;
293                    self.output.write_all(b"\"")?;
294                    if alert.title.is_some() {
295                        let title = alert.title.as_ref().unwrap();
296
297                        self.output.write_all(b" title=\"")?;
298                        self.escape(title.as_bytes())?;
299                        self.output.write_all(b"\"")?;
300                    }
301
302                    if alert.multiline {
303                        self.output.write_all(b" multiline=\"true\"")?;
304                    }
305                }
306            }
307
308            if node.first_child().is_some() {
309                self.indent += 2;
310            } else if !was_literal {
311                self.output.write_all(b" /")?;
312            }
313            self.output.write_all(b">\n")?;
314        } else if node.first_child().is_some() {
315            self.indent -= 2;
316            self.indent()?;
317            writeln!(
318                self.output,
319                "</{}>",
320                node.data.borrow().value.xml_node_name()
321            )?;
322        }
323        Ok(false)
324    }
325}