toml_parser/decoder/
scalar.rs

1use winnow::stream::ContainsToken as _;
2use winnow::stream::FindSlice as _;
3use winnow::stream::Offset as _;
4use winnow::stream::Stream as _;
5
6use crate::decoder::StringBuilder;
7use crate::ErrorSink;
8use crate::Expected;
9use crate::ParseError;
10use crate::Raw;
11use crate::Span;
12
13const ALLOCATION_ERROR: &str = "could not allocate for string";
14
15#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
16pub enum ScalarKind {
17    String,
18    Boolean(bool),
19    DateTime,
20    Float,
21    Integer(IntegerRadix),
22}
23
24impl ScalarKind {
25    pub fn description(&self) -> &'static str {
26        match self {
27            Self::String => "string",
28            Self::Boolean(_) => "boolean",
29            Self::DateTime => "date-time",
30            Self::Float => "float",
31            Self::Integer(radix) => radix.description(),
32        }
33    }
34
35    pub fn invalid_description(&self) -> &'static str {
36        match self {
37            Self::String => "invalid string",
38            Self::Boolean(_) => "invalid boolean",
39            Self::DateTime => "invalid date-time",
40            Self::Float => "invalid float",
41            Self::Integer(radix) => radix.invalid_description(),
42        }
43    }
44}
45
46#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
47pub enum IntegerRadix {
48    #[default]
49    Dec,
50    Hex,
51    Oct,
52    Bin,
53}
54
55impl IntegerRadix {
56    pub fn description(&self) -> &'static str {
57        match self {
58            Self::Dec => "integer",
59            Self::Hex => "hexadecimal",
60            Self::Oct => "octal",
61            Self::Bin => "binary",
62        }
63    }
64
65    pub fn value(&self) -> u32 {
66        match self {
67            Self::Dec => 10,
68            Self::Hex => 16,
69            Self::Oct => 8,
70            Self::Bin => 2,
71        }
72    }
73
74    pub fn invalid_description(&self) -> &'static str {
75        match self {
76            Self::Dec => "invalid integer number",
77            Self::Hex => "invalid hexadecimal number",
78            Self::Oct => "invalid octal number",
79            Self::Bin => "invalid binary number",
80        }
81    }
82
83    fn validator(&self) -> fn(char) -> bool {
84        match self {
85            Self::Dec => |c| c.is_ascii_digit(),
86            Self::Hex => |c| c.is_ascii_hexdigit(),
87            Self::Oct => |c| matches!(c, '0'..='7'),
88            Self::Bin => |c| matches!(c, '0'..='1'),
89        }
90    }
91}
92
93pub(crate) fn decode_unquoted_scalar<'i>(
94    raw: Raw<'i>,
95    output: &mut dyn StringBuilder<'i>,
96    error: &mut dyn ErrorSink,
97) -> ScalarKind {
98    let s = raw.as_str();
99    let Some(first) = s.as_bytes().first() else {
100        return decode_invalid(raw, output, error);
101    };
102    match first {
103        // number starts
104        b'+' | b'-' => {
105            let value = &raw.as_str()[1..];
106            decode_sign_prefix(raw, value, output, error)
107        }
108        // Report as if they were numbers because its most likely a typo
109        b'_' => decode_datetime_or_float_or_integer(raw.as_str(), raw, output, error),
110        // Date/number starts
111        b'0' => decode_zero_prefix(raw.as_str(), false, raw, output, error),
112        b'1'..=b'9' => decode_datetime_or_float_or_integer(raw.as_str(), raw, output, error),
113        // Report as if they were numbers because its most likely a typo
114        b'.' => {
115            let kind = ScalarKind::Float;
116            let stream = raw.as_str();
117            ensure_float(stream, raw, error);
118            decode_float_or_integer(stream, raw, kind, output, error)
119        }
120        b't' | b'T' => {
121            const SYMBOL: &str = "true";
122            let kind = ScalarKind::Boolean(true);
123            let expected = &[Expected::Literal(SYMBOL)];
124            decode_symbol(raw, SYMBOL, kind, expected, output, error)
125        }
126        b'f' | b'F' => {
127            const SYMBOL: &str = "false";
128            let kind = ScalarKind::Boolean(false);
129            let expected = &[Expected::Literal(SYMBOL)];
130            decode_symbol(raw, SYMBOL, kind, expected, output, error)
131        }
132        b'i' | b'I' => {
133            const SYMBOL: &str = "inf";
134            let kind = ScalarKind::Float;
135            let expected = &[Expected::Literal(SYMBOL)];
136            decode_symbol(raw, SYMBOL, kind, expected, output, error)
137        }
138        b'n' | b'N' => {
139            const SYMBOL: &str = "nan";
140            let kind = ScalarKind::Float;
141            let expected = &[Expected::Literal(SYMBOL)];
142            decode_symbol(raw, SYMBOL, kind, expected, output, error)
143        }
144        _ => decode_invalid(raw, output, error),
145    }
146}
147
148pub(crate) fn decode_sign_prefix<'i>(
149    raw: Raw<'i>,
150    value: &'i str,
151    output: &mut dyn StringBuilder<'i>,
152    error: &mut dyn ErrorSink,
153) -> ScalarKind {
154    let Some(first) = value.as_bytes().first() else {
155        return decode_invalid(raw, output, error);
156    };
157    match first {
158        // number starts
159        b'+' | b'-' => {
160            let start = value.offset_from(&raw.as_str());
161            let end = start + 1;
162            error.report_error(
163                ParseError::new("redundant numeric sign")
164                    .with_context(Span::new_unchecked(0, raw.len()))
165                    .with_expected(&[])
166                    .with_unexpected(Span::new_unchecked(start, end)),
167            );
168
169            let value = &value[1..];
170            decode_sign_prefix(raw, value, output, error)
171        }
172        // Report as if they were numbers because its most likely a typo
173        b'_' => decode_datetime_or_float_or_integer(value, raw, output, error),
174        // Date/number starts
175        b'0' => decode_zero_prefix(value, true, raw, output, error),
176        b'1'..=b'9' => decode_datetime_or_float_or_integer(value, raw, output, error),
177        // Report as if they were numbers because its most likely a typo
178        b'.' => {
179            let kind = ScalarKind::Float;
180            let stream = raw.as_str();
181            ensure_float(stream, raw, error);
182            decode_float_or_integer(stream, raw, kind, output, error)
183        }
184        b'i' | b'I' => {
185            const SYMBOL: &str = "inf";
186            let kind = ScalarKind::Float;
187            if value != SYMBOL {
188                let expected = &[Expected::Literal(SYMBOL)];
189                let start = value.offset_from(&raw.as_str());
190                let end = start + value.len();
191                error.report_error(
192                    ParseError::new(kind.invalid_description())
193                        .with_context(Span::new_unchecked(0, raw.len()))
194                        .with_expected(expected)
195                        .with_unexpected(Span::new_unchecked(start, end)),
196                );
197                decode_as(raw, SYMBOL, kind, output, error)
198            } else {
199                decode_as_is(raw, kind, output, error)
200            }
201        }
202        b'n' | b'N' => {
203            const SYMBOL: &str = "nan";
204            let kind = ScalarKind::Float;
205            if value != SYMBOL {
206                let expected = &[Expected::Literal(SYMBOL)];
207                let start = value.offset_from(&raw.as_str());
208                let end = start + value.len();
209                error.report_error(
210                    ParseError::new(kind.invalid_description())
211                        .with_context(Span::new_unchecked(0, raw.len()))
212                        .with_expected(expected)
213                        .with_unexpected(Span::new_unchecked(start, end)),
214                );
215                decode_as(raw, SYMBOL, kind, output, error)
216            } else {
217                decode_as_is(raw, kind, output, error)
218            }
219        }
220        _ => decode_invalid(raw, output, error),
221    }
222}
223
224pub(crate) fn decode_zero_prefix<'i>(
225    value: &'i str,
226    signed: bool,
227    raw: Raw<'i>,
228    output: &mut dyn StringBuilder<'i>,
229    error: &mut dyn ErrorSink,
230) -> ScalarKind {
231    debug_assert_eq!(value.as_bytes()[0], b'0');
232    if value.len() == 1 {
233        let kind = ScalarKind::Integer(IntegerRadix::Dec);
234        // No extra validation needed
235        decode_float_or_integer(raw.as_str(), raw, kind, output, error)
236    } else {
237        let radix = value.as_bytes()[1];
238        match radix {
239            b'x' | b'X' => {
240                if signed {
241                    error.report_error(
242                        ParseError::new("integers with a radix cannot be signed")
243                            .with_context(Span::new_unchecked(0, raw.len()))
244                            .with_expected(&[])
245                            .with_unexpected(Span::new_unchecked(0, 1)),
246                    );
247                }
248                if radix == b'X' {
249                    let start = value.offset_from(&raw.as_str());
250                    let end = start + 2;
251                    error.report_error(
252                        ParseError::new("radix must be lowercase")
253                            .with_context(Span::new_unchecked(0, raw.len()))
254                            .with_expected(&[Expected::Literal("0x")])
255                            .with_unexpected(Span::new_unchecked(start, end)),
256                    );
257                }
258                let radix = IntegerRadix::Hex;
259                let kind = ScalarKind::Integer(radix);
260                let stream = &value[2..];
261                ensure_radixed_value(stream, raw, radix, error);
262                decode_float_or_integer(stream, raw, kind, output, error)
263            }
264            b'o' | b'O' => {
265                if signed {
266                    error.report_error(
267                        ParseError::new("integers with a radix cannot be signed")
268                            .with_context(Span::new_unchecked(0, raw.len()))
269                            .with_expected(&[])
270                            .with_unexpected(Span::new_unchecked(0, 1)),
271                    );
272                }
273                if radix == b'O' {
274                    let start = value.offset_from(&raw.as_str());
275                    let end = start + 2;
276                    error.report_error(
277                        ParseError::new("radix must be lowercase")
278                            .with_context(Span::new_unchecked(0, raw.len()))
279                            .with_expected(&[Expected::Literal("0o")])
280                            .with_unexpected(Span::new_unchecked(start, end)),
281                    );
282                }
283                let radix = IntegerRadix::Oct;
284                let kind = ScalarKind::Integer(radix);
285                let stream = &value[2..];
286                ensure_radixed_value(stream, raw, radix, error);
287                decode_float_or_integer(stream, raw, kind, output, error)
288            }
289            b'b' | b'B' => {
290                if signed {
291                    error.report_error(
292                        ParseError::new("integers with a radix cannot be signed")
293                            .with_context(Span::new_unchecked(0, raw.len()))
294                            .with_expected(&[])
295                            .with_unexpected(Span::new_unchecked(0, 1)),
296                    );
297                }
298                if radix == b'B' {
299                    let start = value.offset_from(&raw.as_str());
300                    let end = start + 2;
301                    error.report_error(
302                        ParseError::new("radix must be lowercase")
303                            .with_context(Span::new_unchecked(0, raw.len()))
304                            .with_expected(&[Expected::Literal("0b")])
305                            .with_unexpected(Span::new_unchecked(start, end)),
306                    );
307                }
308                let radix = IntegerRadix::Bin;
309                let kind = ScalarKind::Integer(radix);
310                let stream = &value[2..];
311                ensure_radixed_value(stream, raw, radix, error);
312                decode_float_or_integer(stream, raw, kind, output, error)
313            }
314            b'd' | b'D' => {
315                if signed {
316                    error.report_error(
317                        ParseError::new("integers with a radix cannot be signed")
318                            .with_context(Span::new_unchecked(0, raw.len()))
319                            .with_expected(&[])
320                            .with_unexpected(Span::new_unchecked(0, 1)),
321                    );
322                }
323                let radix = IntegerRadix::Dec;
324                let kind = ScalarKind::Integer(radix);
325                let stream = &value[2..];
326                error.report_error(
327                    ParseError::new("redundant integer number prefix")
328                        .with_context(Span::new_unchecked(0, raw.len()))
329                        .with_expected(&[])
330                        .with_unexpected(Span::new_unchecked(0, 2)),
331                );
332                ensure_radixed_value(stream, raw, radix, error);
333                decode_float_or_integer(stream, raw, kind, output, error)
334            }
335            _ => decode_datetime_or_float_or_integer(value, raw, output, error),
336        }
337    }
338}
339
340pub(crate) fn decode_datetime_or_float_or_integer<'i>(
341    value: &'i str,
342    raw: Raw<'i>,
343    output: &mut dyn StringBuilder<'i>,
344    error: &mut dyn ErrorSink,
345) -> ScalarKind {
346    let Some(digit_end) = value
347        .as_bytes()
348        .offset_for(|b| !(b'0'..=b'9').contains_token(b))
349    else {
350        let kind = ScalarKind::Integer(IntegerRadix::Dec);
351        let stream = raw.as_str();
352        ensure_no_leading_zero(value, raw, error);
353        return decode_float_or_integer(stream, raw, kind, output, error);
354    };
355
356    #[cfg(feature = "unsafe")] // SAFETY: ascii digits ensures UTF-8 boundary
357    let rest = unsafe { &value.get_unchecked(digit_end..) };
358    #[cfg(not(feature = "unsafe"))]
359    let rest = &value[digit_end..];
360
361    if rest.starts_with("-") || rest.starts_with(":") {
362        decode_as_is(raw, ScalarKind::DateTime, output, error)
363    } else if is_float(rest) {
364        let kind = ScalarKind::Float;
365        let stream = raw.as_str();
366        ensure_float(value, raw, error);
367        decode_float_or_integer(stream, raw, kind, output, error)
368    } else if rest.starts_with("_") {
369        let kind = ScalarKind::Integer(IntegerRadix::Dec);
370        let stream = raw.as_str();
371        ensure_no_leading_zero(value, raw, error);
372        decode_float_or_integer(stream, raw, kind, output, error)
373    } else {
374        decode_invalid(raw, output, error)
375    }
376}
377
378/// ```abnf
379/// float = float-int-part ( exp / frac [ exp ] )
380///
381/// float-int-part = dec-int
382/// frac = decimal-point zero-prefixable-int
383/// decimal-point = %x2E               ; .
384/// zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT )
385///
386/// exp = "e" float-exp-part
387/// float-exp-part = [ minus / plus ] zero-prefixable-int
388/// ```
389pub(crate) fn ensure_float<'i>(mut value: &'i str, raw: Raw<'i>, error: &mut dyn ErrorSink) {
390    ensure_dec_uint(&mut value, raw, false, "invalid mantissa", error);
391
392    if value.starts_with(".") {
393        let _ = value.next_token();
394        ensure_dec_uint(&mut value, raw, true, "invalid fraction", error);
395    }
396
397    if value.starts_with(['e', 'E']) {
398        let _ = value.next_token();
399        if value.starts_with(['+', '-']) {
400            let _ = value.next_token();
401        }
402        ensure_dec_uint(&mut value, raw, true, "invalid exponent", error);
403    }
404
405    if !value.is_empty() {
406        let start = value.offset_from(&raw.as_str());
407        let end = raw.len();
408        error.report_error(
409            ParseError::new(ScalarKind::Float.invalid_description())
410                .with_context(Span::new_unchecked(0, raw.len()))
411                .with_expected(&[])
412                .with_unexpected(Span::new_unchecked(start, end)),
413        );
414    }
415}
416
417pub(crate) fn ensure_dec_uint<'i>(
418    value: &mut &'i str,
419    raw: Raw<'i>,
420    zero_prefix: bool,
421    invalid_description: &'static str,
422    error: &mut dyn ErrorSink,
423) {
424    let start = *value;
425    let mut digit_count = 0;
426    while let Some(current) = value.chars().next() {
427        if current.is_ascii_digit() {
428            digit_count += 1;
429        } else if current == '_' {
430        } else {
431            break;
432        }
433        let _ = value.next_token();
434    }
435
436    match digit_count {
437        0 => {
438            let start = start.offset_from(&raw.as_str());
439            let end = start;
440            error.report_error(
441                ParseError::new(invalid_description)
442                    .with_context(Span::new_unchecked(0, raw.len()))
443                    .with_expected(&[Expected::Description("digits")])
444                    .with_unexpected(Span::new_unchecked(start, end)),
445            );
446        }
447        1 => {}
448        _ if start.starts_with("0") && !zero_prefix => {
449            let start = start.offset_from(&raw.as_str());
450            let end = start + 1;
451            error.report_error(
452                ParseError::new("unexpected leading zero")
453                    .with_context(Span::new_unchecked(0, raw.len()))
454                    .with_expected(&[])
455                    .with_unexpected(Span::new_unchecked(start, end)),
456            );
457        }
458        _ => {}
459    }
460}
461
462pub(crate) fn ensure_no_leading_zero<'i>(value: &'i str, raw: Raw<'i>, error: &mut dyn ErrorSink) {
463    if value.starts_with("0") {
464        let start = value.offset_from(&raw.as_str());
465        let end = start + 1;
466        error.report_error(
467            ParseError::new("unexpected leading zero")
468                .with_context(Span::new_unchecked(0, raw.len()))
469                .with_expected(&[])
470                .with_unexpected(Span::new_unchecked(start, end)),
471        );
472    }
473}
474
475pub(crate) fn ensure_radixed_value(
476    value: &str,
477    raw: Raw<'_>,
478    radix: IntegerRadix,
479    error: &mut dyn ErrorSink,
480) {
481    let invalid = ['+', '-'];
482    let value = if let Some(value) = value.strip_prefix(invalid) {
483        let pos = raw.as_str().find(invalid).unwrap();
484        error.report_error(
485            ParseError::new("unexpected sign")
486                .with_context(Span::new_unchecked(0, raw.len()))
487                .with_expected(&[])
488                .with_unexpected(Span::new_unchecked(pos, pos + 1)),
489        );
490        value
491    } else {
492        value
493    };
494
495    let valid = radix.validator();
496    for (index, c) in value.char_indices() {
497        if !valid(c) && c != '_' {
498            let pos = value.offset_from(&raw.as_str()) + index;
499            error.report_error(
500                ParseError::new(radix.invalid_description())
501                    .with_context(Span::new_unchecked(0, raw.len()))
502                    .with_unexpected(Span::new_unchecked(pos, pos)),
503            );
504        }
505    }
506}
507
508pub(crate) fn decode_float_or_integer<'i>(
509    stream: &'i str,
510    raw: Raw<'i>,
511    kind: ScalarKind,
512    output: &mut dyn StringBuilder<'i>,
513    error: &mut dyn ErrorSink,
514) -> ScalarKind {
515    output.clear();
516
517    let underscore = "_";
518
519    if has_underscore(stream) {
520        if stream.starts_with(underscore) {
521            error.report_error(
522                ParseError::new("`_` may only go between digits")
523                    .with_context(Span::new_unchecked(0, raw.len()))
524                    .with_expected(&[])
525                    .with_unexpected(Span::new_unchecked(0, underscore.len())),
526            );
527        }
528        if 1 < stream.len() && stream.ends_with(underscore) {
529            let start = stream.offset_from(&raw.as_str());
530            let end = start + stream.len();
531            error.report_error(
532                ParseError::new("`_` may only go between digits")
533                    .with_context(Span::new_unchecked(0, raw.len()))
534                    .with_expected(&[])
535                    .with_unexpected(Span::new_unchecked(end - underscore.len(), end)),
536            );
537        }
538
539        for part in stream.split(underscore) {
540            let part_start = part.offset_from(&raw.as_str());
541            let part_end = part_start + part.len();
542
543            if 0 < part_start {
544                let first = part.as_bytes().first().copied().unwrap_or(b'0');
545                if !is_any_digit(first, kind) {
546                    let start = part_start - 1;
547                    let end = part_start;
548                    debug_assert_eq!(&raw.as_str()[start..end], underscore);
549                    error.report_error(
550                        ParseError::new("`_` may only go between digits")
551                            .with_context(Span::new_unchecked(0, raw.len()))
552                            .with_unexpected(Span::new_unchecked(start, end)),
553                    );
554                }
555            }
556            if 1 < part.len() && part_end < raw.len() {
557                let last = part.as_bytes().last().copied().unwrap_or(b'0');
558                if !is_any_digit(last, kind) {
559                    let start = part_end;
560                    let end = start + underscore.len();
561                    debug_assert_eq!(&raw.as_str()[start..end], underscore);
562                    error.report_error(
563                        ParseError::new("`_` may only go between digits")
564                            .with_context(Span::new_unchecked(0, raw.len()))
565                            .with_unexpected(Span::new_unchecked(start, end)),
566                    );
567                }
568            }
569
570            if part.is_empty() && part_start != 0 && part_end != raw.len() {
571                let start = part_start;
572                let end = start + 1;
573                error.report_error(
574                    ParseError::new("`_` may only go between digits")
575                        .with_context(Span::new_unchecked(0, raw.len()))
576                        .with_unexpected(Span::new_unchecked(start, end)),
577                );
578            }
579
580            if !part.is_empty() && !output.push_str(part) {
581                error.report_error(
582                    ParseError::new(ALLOCATION_ERROR)
583                        .with_unexpected(Span::new_unchecked(part_start, part_end)),
584                );
585            }
586        }
587    } else {
588        if !output.push_str(stream) {
589            error.report_error(
590                ParseError::new(ALLOCATION_ERROR)
591                    .with_unexpected(Span::new_unchecked(0, raw.len())),
592            );
593        }
594    }
595
596    kind
597}
598
599fn is_any_digit(b: u8, kind: ScalarKind) -> bool {
600    if kind == ScalarKind::Float {
601        is_dec_integer_digit(b)
602    } else {
603        is_any_integer_digit(b)
604    }
605}
606
607fn is_any_integer_digit(b: u8) -> bool {
608    (b'0'..=b'9', b'a'..=b'f', b'A'..=b'F').contains_token(b)
609}
610
611fn is_dec_integer_digit(b: u8) -> bool {
612    (b'0'..=b'9').contains_token(b)
613}
614
615fn has_underscore(raw: &str) -> bool {
616    raw.as_bytes().find_slice(b'_').is_some()
617}
618
619fn is_float(raw: &str) -> bool {
620    raw.as_bytes().find_slice((b'.', b'e', b'E')).is_some()
621}
622
623pub(crate) fn decode_as_is<'i>(
624    raw: Raw<'i>,
625    kind: ScalarKind,
626    output: &mut dyn StringBuilder<'i>,
627    error: &mut dyn ErrorSink,
628) -> ScalarKind {
629    let kind = decode_as(raw, raw.as_str(), kind, output, error);
630    kind
631}
632
633pub(crate) fn decode_as<'i>(
634    raw: Raw<'i>,
635    symbol: &'i str,
636    kind: ScalarKind,
637    output: &mut dyn StringBuilder<'i>,
638    error: &mut dyn ErrorSink,
639) -> ScalarKind {
640    output.clear();
641    if !output.push_str(symbol) {
642        error.report_error(
643            ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
644        );
645    }
646    kind
647}
648
649pub(crate) fn decode_symbol<'i>(
650    raw: Raw<'i>,
651    symbol: &'static str,
652    kind: ScalarKind,
653    expected: &'static [Expected],
654    output: &mut dyn StringBuilder<'i>,
655    error: &mut dyn ErrorSink,
656) -> ScalarKind {
657    if raw.as_str() != symbol {
658        error.report_error(
659            ParseError::new(kind.invalid_description())
660                .with_context(Span::new_unchecked(0, raw.len()))
661                .with_expected(expected)
662                .with_unexpected(Span::new_unchecked(0, raw.len())),
663        );
664    }
665
666    decode_as(raw, symbol, kind, output, error)
667}
668
669pub(crate) fn decode_invalid<'i>(
670    raw: Raw<'i>,
671    output: &mut dyn StringBuilder<'i>,
672    error: &mut dyn ErrorSink,
673) -> ScalarKind {
674    error.report_error(
675        ParseError::new("string values must be quoted")
676            .with_context(Span::new_unchecked(0, raw.len()))
677            .with_expected(&[Expected::Description("literal string")])
678            .with_unexpected(Span::new_unchecked(0, raw.len())),
679    );
680
681    output.clear();
682    if !output.push_str(raw.as_str()) {
683        error.report_error(
684            ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
685        );
686    }
687    ScalarKind::String
688}