toml_parser/
source.rs

1use crate::decoder::Encoding;
2use crate::decoder::StringBuilder;
3use crate::lexer::Lexer;
4use crate::ErrorSink;
5use crate::Expected;
6
7/// Data encoded as TOML
8#[derive(Copy, Clone, Debug, PartialEq, Eq)]
9pub struct Source<'i> {
10    input: &'i str,
11}
12
13impl<'i> Source<'i> {
14    pub fn new(input: &'i str) -> Self {
15        Self { input }
16    }
17
18    /// Start lexing the TOML encoded data
19    pub fn lex(&self) -> Lexer<'i> {
20        Lexer::new(self.input)
21    }
22
23    /// Access the TOML encoded `&str`
24    pub fn input(&self) -> &'i str {
25        self.input
26    }
27
28    /// Return a subslice of the input
29    pub fn get(&self, span: impl SourceIndex) -> Option<Raw<'i>> {
30        span.get(self)
31    }
32
33    /// Return an unchecked subslice of the input
34    ///
35    /// ## Safety
36    ///
37    /// Callers of this function are responsible that these preconditions are satisfied:
38    /// - The starting index must not exceed the ending index;
39    /// - Indexes must be within bounds of the original slice;
40    /// - Indexes must lie on UTF-8 sequence boundaries.
41    ///
42    /// Or one of:
43    /// - `span` came from [`Source::lex`]
44    ///
45    /// Failing any of those, the returned string slice may reference invalid memory or violate the invariants communicated by `str` type.
46    #[cfg(feature = "unsafe")]
47    pub unsafe fn get_unchecked(&self, span: impl SourceIndex) -> Raw<'i> {
48        // SAFETY: Same safety guarantees are required
49        unsafe { span.get_unchecked(self) }
50    }
51
52    /// Return a subslice of the input
53    fn get_raw_str(&self, span: Span) -> Option<&'i str> {
54        let index = span.start()..span.end();
55        self.input.get(index)
56    }
57
58    /// Return an unchecked subslice of the input
59    ///
60    /// ## Safety
61    ///
62    /// Callers of this function are responsible that these preconditions are satisfied:
63    /// - The starting index must not exceed the ending index;
64    /// - Indexes must be within bounds of the original slice;
65    /// - Indexes must lie on UTF-8 sequence boundaries.
66    ///
67    /// Or one of:
68    /// - `span` came from [`Source::lex`]
69    ///
70    /// Failing any of those, the returned string slice may reference invalid memory or violate the invariants communicated by `str` type.
71    #[cfg(feature = "unsafe")]
72    unsafe fn get_raw_str_unchecked(&self, span: Span) -> &'i str {
73        let index = span.start()..span.end();
74        // SAFETY: Same safety guarantees are required
75        unsafe { self.input.get_unchecked(index) }
76    }
77}
78
79/// A slice of [`Source`]
80#[derive(Copy, Clone, Debug)]
81pub struct Raw<'i> {
82    raw: &'i str,
83    encoding: Option<Encoding>,
84    span: Span,
85}
86
87impl<'i> Raw<'i> {
88    pub fn new_unchecked(raw: &'i str, encoding: Option<Encoding>, span: Span) -> Self {
89        Self {
90            raw,
91            encoding,
92            span,
93        }
94    }
95
96    pub fn decode_key(&self, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink) {
97        let mut error = |err: crate::ParseError| {
98            error.report_error(err.rebase_spans(self.span.start));
99        };
100        match self.encoding {
101            Some(Encoding::LiteralString) => {
102                crate::decoder::string::decode_literal_string(*self, output, &mut error);
103            }
104            Some(Encoding::BasicString) => {
105                crate::decoder::string::decode_basic_string(*self, output, &mut error);
106            }
107            Some(Encoding::MlLiteralString) => {
108                error.report_error(
109                    crate::ParseError::new("keys cannot be multi-line literal strings")
110                        .with_expected(&[
111                            Expected::Description("basic string"),
112                            Expected::Description("literal string"),
113                        ])
114                        .with_unexpected(Span::new_unchecked(0, self.len())),
115                );
116                crate::decoder::string::decode_ml_literal_string(*self, output, &mut error);
117            }
118            Some(Encoding::MlBasicString) => {
119                error.report_error(
120                    crate::ParseError::new("keys cannot be multi-line basic strings")
121                        .with_expected(&[
122                            Expected::Description("basic string"),
123                            Expected::Description("literal string"),
124                        ])
125                        .with_unexpected(Span::new_unchecked(0, self.len())),
126                );
127                crate::decoder::string::decode_ml_basic_string(*self, output, &mut error);
128            }
129            None => crate::decoder::string::decode_unquoted_key(*self, output, &mut error),
130        }
131    }
132
133    #[must_use]
134    pub fn decode_scalar(
135        &self,
136        output: &mut dyn StringBuilder<'i>,
137        error: &mut dyn ErrorSink,
138    ) -> crate::decoder::scalar::ScalarKind {
139        let mut error = |err: crate::ParseError| {
140            error.report_error(err.rebase_spans(self.span.start));
141        };
142        match self.encoding {
143            Some(Encoding::LiteralString) => {
144                crate::decoder::string::decode_literal_string(*self, output, &mut error);
145                crate::decoder::scalar::ScalarKind::String
146            }
147            Some(Encoding::BasicString) => {
148                crate::decoder::string::decode_basic_string(*self, output, &mut error);
149                crate::decoder::scalar::ScalarKind::String
150            }
151            Some(Encoding::MlLiteralString) => {
152                crate::decoder::string::decode_ml_literal_string(*self, output, &mut error);
153                crate::decoder::scalar::ScalarKind::String
154            }
155            Some(Encoding::MlBasicString) => {
156                crate::decoder::string::decode_ml_basic_string(*self, output, &mut error);
157                crate::decoder::scalar::ScalarKind::String
158            }
159            None => crate::decoder::scalar::decode_unquoted_scalar(*self, output, &mut error),
160        }
161    }
162
163    pub fn decode_whitespace(&self, _error: &mut dyn ErrorSink) {
164        // whitespace is always valid
165    }
166
167    pub fn decode_comment(&self, error: &mut dyn ErrorSink) {
168        let mut error = |err: crate::ParseError| {
169            error.report_error(err.rebase_spans(self.span.start));
170        };
171        crate::decoder::ws::decode_comment(*self, &mut error);
172    }
173
174    pub fn decode_newline(&self, error: &mut dyn ErrorSink) {
175        let mut error = |err: crate::ParseError| {
176            error.report_error(err.rebase_spans(self.span.start));
177        };
178        crate::decoder::ws::decode_newline(*self, &mut error);
179    }
180
181    pub fn as_str(&self) -> &'i str {
182        self.raw
183    }
184
185    pub fn as_bytes(&self) -> &'i [u8] {
186        self.raw.as_bytes()
187    }
188
189    pub fn len(&self) -> usize {
190        self.raw.len()
191    }
192
193    pub fn is_empty(&self) -> bool {
194        self.raw.is_empty()
195    }
196}
197
198/// Location within the [`Source`]
199#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
200pub struct Span {
201    start: usize,
202    end: usize,
203}
204
205impl Span {
206    pub fn new_unchecked(start: usize, end: usize) -> Self {
207        Self { start, end }
208    }
209
210    pub fn is_empty(&self) -> bool {
211        self.end <= self.start
212    }
213
214    pub fn len(&self) -> usize {
215        self.end - self.start
216    }
217
218    pub fn start(&self) -> usize {
219        self.start
220    }
221
222    pub fn end(&self) -> usize {
223        self.end
224    }
225
226    pub fn before(&self) -> Self {
227        Self::new_unchecked(self.start, self.start)
228    }
229
230    pub fn after(&self) -> Self {
231        Self::new_unchecked(self.end, self.end)
232    }
233
234    /// Extend this `Raw` to the end of `after`
235    #[must_use]
236    pub fn append(&self, after: Self) -> Self {
237        Self::new_unchecked(self.start, after.end)
238    }
239}
240
241impl core::fmt::Debug for Span {
242    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
243        (self.start..self.end).fmt(f)
244    }
245}
246
247impl core::ops::Add<usize> for Span {
248    type Output = Self;
249
250    fn add(self, offset: usize) -> Self::Output {
251        Self::Output {
252            start: self.start + offset,
253            end: self.end + offset,
254        }
255    }
256}
257
258impl core::ops::Add<Span> for usize {
259    type Output = Span;
260
261    fn add(self, span: Span) -> Self::Output {
262        Self::Output {
263            start: span.start + self,
264            end: span.end + self,
265        }
266    }
267}
268
269impl core::ops::AddAssign<usize> for Span {
270    fn add_assign(&mut self, rhs: usize) {
271        self.start += rhs;
272        self.end += rhs;
273    }
274}
275
276/// A helper trait used for indexing operations on [`Source`]
277pub trait SourceIndex: sealed::Sealed {
278    /// Return a subslice of the input
279    fn get<'i>(self, source: &Source<'i>) -> Option<Raw<'i>>;
280
281    /// Return an unchecked subslice of the input
282    ///
283    /// ## Safety
284    ///
285    /// Callers of this function are responsible that these preconditions are satisfied:
286    /// - The starting index must not exceed the ending index;
287    /// - Indexes must be within bounds of the original slice;
288    /// - Indexes must lie on UTF-8 sequence boundaries.
289    ///
290    /// Or one of:
291    /// - `span` came from [`Source::lex`]
292    ///
293    /// Failing any of those, the returned string slice may reference invalid memory or violate the invariants communicated by `str` type.
294    #[cfg(feature = "unsafe")]
295    unsafe fn get_unchecked<'i>(self, source: &Source<'i>) -> Raw<'i>;
296}
297
298impl SourceIndex for Span {
299    fn get<'i>(self, source: &Source<'i>) -> Option<Raw<'i>> {
300        (&self).get(source)
301    }
302
303    #[cfg(feature = "unsafe")]
304    unsafe fn get_unchecked<'i>(self, source: &Source<'i>) -> Raw<'i> {
305        // SAFETY: Same safety guarantees are required
306        unsafe { (&self).get_unchecked(source) }
307    }
308}
309
310impl SourceIndex for &Span {
311    fn get<'i>(self, source: &Source<'i>) -> Option<Raw<'i>> {
312        let encoding = None;
313        source
314            .get_raw_str(*self)
315            .map(|s| Raw::new_unchecked(s, encoding, *self))
316    }
317
318    #[cfg(feature = "unsafe")]
319    unsafe fn get_unchecked<'i>(self, source: &Source<'i>) -> Raw<'i> {
320        let encoding = None;
321        // SAFETY: Same safety guarantees are required
322        let raw = unsafe { source.get_raw_str_unchecked(*self) };
323        Raw::new_unchecked(raw, encoding, *self)
324    }
325}
326
327impl SourceIndex for crate::lexer::Token {
328    fn get<'i>(self, source: &Source<'i>) -> Option<Raw<'i>> {
329        (&self).get(source)
330    }
331
332    #[cfg(feature = "unsafe")]
333    unsafe fn get_unchecked<'i>(self, source: &Source<'i>) -> Raw<'i> {
334        // SAFETY: Same safety guarantees are required
335        unsafe { (&self).get_unchecked(source) }
336    }
337}
338
339impl SourceIndex for &crate::lexer::Token {
340    fn get<'i>(self, source: &Source<'i>) -> Option<Raw<'i>> {
341        let encoding = self.kind().encoding();
342        source
343            .get_raw_str(self.span())
344            .map(|s| Raw::new_unchecked(s, encoding, self.span()))
345    }
346
347    #[cfg(feature = "unsafe")]
348    unsafe fn get_unchecked<'i>(self, source: &Source<'i>) -> Raw<'i> {
349        let encoding = self.kind().encoding();
350        // SAFETY: Same safety guarantees are required
351        let raw = unsafe { source.get_raw_str_unchecked(self.span()) };
352        Raw::new_unchecked(raw, encoding, self.span())
353    }
354}
355
356impl SourceIndex for crate::parser::Event {
357    fn get<'i>(self, source: &Source<'i>) -> Option<Raw<'i>> {
358        (&self).get(source)
359    }
360
361    #[cfg(feature = "unsafe")]
362    unsafe fn get_unchecked<'i>(self, source: &Source<'i>) -> Raw<'i> {
363        // SAFETY: Same safety guarantees are required
364        unsafe { (&self).get_unchecked(source) }
365    }
366}
367
368impl SourceIndex for &crate::parser::Event {
369    fn get<'i>(self, source: &Source<'i>) -> Option<Raw<'i>> {
370        let encoding = self.encoding();
371        source
372            .get_raw_str(self.span())
373            .map(|s| Raw::new_unchecked(s, encoding, self.span()))
374    }
375
376    #[cfg(feature = "unsafe")]
377    unsafe fn get_unchecked<'i>(self, source: &Source<'i>) -> Raw<'i> {
378        let encoding = self.encoding();
379        // SAFETY: Same safety guarantees are required
380        let raw = unsafe { source.get_raw_str_unchecked(self.span()) };
381        Raw::new_unchecked(raw, encoding, self.span())
382    }
383}
384
385mod sealed {
386    pub trait Sealed {}
387
388    impl Sealed for crate::Span {}
389    impl Sealed for &crate::Span {}
390    impl Sealed for crate::lexer::Token {}
391    impl Sealed for &crate::lexer::Token {}
392    impl Sealed for crate::parser::Event {}
393    impl Sealed for &crate::parser::Event {}
394}