1use winnow::stream::ContainsToken as _;
2use winnow::stream::FindSlice as _;
3use winnow::stream::Offset as _;
4use winnow::stream::Stream as _;
5
6use crate::decoder::StringBuilder;
7use crate::ErrorSink;
8use crate::Expected;
9use crate::ParseError;
10use crate::Raw;
11use crate::Span;
12
13const ALLOCATION_ERROR: &str = "could not allocate for string";
14
15#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
16pub enum ScalarKind {
17 String,
18 Boolean(bool),
19 DateTime,
20 Float,
21 Integer(IntegerRadix),
22}
23
24impl ScalarKind {
25 pub fn description(&self) -> &'static str {
26 match self {
27 Self::String => "string",
28 Self::Boolean(_) => "boolean",
29 Self::DateTime => "date-time",
30 Self::Float => "float",
31 Self::Integer(radix) => radix.description(),
32 }
33 }
34
35 pub fn invalid_description(&self) -> &'static str {
36 match self {
37 Self::String => "invalid string",
38 Self::Boolean(_) => "invalid boolean",
39 Self::DateTime => "invalid date-time",
40 Self::Float => "invalid float",
41 Self::Integer(radix) => radix.invalid_description(),
42 }
43 }
44}
45
46#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
47pub enum IntegerRadix {
48 #[default]
49 Dec,
50 Hex,
51 Oct,
52 Bin,
53}
54
55impl IntegerRadix {
56 pub fn description(&self) -> &'static str {
57 match self {
58 Self::Dec => "integer",
59 Self::Hex => "hexadecimal",
60 Self::Oct => "octal",
61 Self::Bin => "binary",
62 }
63 }
64
65 pub fn value(&self) -> u32 {
66 match self {
67 Self::Dec => 10,
68 Self::Hex => 16,
69 Self::Oct => 8,
70 Self::Bin => 2,
71 }
72 }
73
74 pub fn invalid_description(&self) -> &'static str {
75 match self {
76 Self::Dec => "invalid integer number",
77 Self::Hex => "invalid hexadecimal number",
78 Self::Oct => "invalid octal number",
79 Self::Bin => "invalid binary number",
80 }
81 }
82
83 fn validator(&self) -> fn(char) -> bool {
84 match self {
85 Self::Dec => |c| c.is_ascii_digit(),
86 Self::Hex => |c| c.is_ascii_hexdigit(),
87 Self::Oct => |c| matches!(c, '0'..='7'),
88 Self::Bin => |c| matches!(c, '0'..='1'),
89 }
90 }
91}
92
93pub(crate) fn decode_unquoted_scalar<'i>(
94 raw: Raw<'i>,
95 output: &mut dyn StringBuilder<'i>,
96 error: &mut dyn ErrorSink,
97) -> ScalarKind {
98 let s = raw.as_str();
99 let Some(first) = s.as_bytes().first() else {
100 return decode_invalid(raw, output, error);
101 };
102 match first {
103 b'+' | b'-' => {
105 let value = &raw.as_str()[1..];
106 decode_sign_prefix(raw, value, output, error)
107 }
108 b'_' => decode_datetime_or_float_or_integer(raw.as_str(), raw, output, error),
110 b'0' => decode_zero_prefix(raw.as_str(), false, raw, output, error),
112 b'1'..=b'9' => decode_datetime_or_float_or_integer(raw.as_str(), raw, output, error),
113 b'.' => {
115 let kind = ScalarKind::Float;
116 let stream = raw.as_str();
117 ensure_float(stream, raw, error);
118 decode_float_or_integer(stream, raw, kind, output, error)
119 }
120 b't' | b'T' => {
121 const SYMBOL: &str = "true";
122 let kind = ScalarKind::Boolean(true);
123 let expected = &[Expected::Literal(SYMBOL)];
124 decode_symbol(raw, SYMBOL, kind, expected, output, error)
125 }
126 b'f' | b'F' => {
127 const SYMBOL: &str = "false";
128 let kind = ScalarKind::Boolean(false);
129 let expected = &[Expected::Literal(SYMBOL)];
130 decode_symbol(raw, SYMBOL, kind, expected, output, error)
131 }
132 b'i' | b'I' => {
133 const SYMBOL: &str = "inf";
134 let kind = ScalarKind::Float;
135 let expected = &[Expected::Literal(SYMBOL)];
136 decode_symbol(raw, SYMBOL, kind, expected, output, error)
137 }
138 b'n' | b'N' => {
139 const SYMBOL: &str = "nan";
140 let kind = ScalarKind::Float;
141 let expected = &[Expected::Literal(SYMBOL)];
142 decode_symbol(raw, SYMBOL, kind, expected, output, error)
143 }
144 _ => decode_invalid(raw, output, error),
145 }
146}
147
148pub(crate) fn decode_sign_prefix<'i>(
149 raw: Raw<'i>,
150 value: &'i str,
151 output: &mut dyn StringBuilder<'i>,
152 error: &mut dyn ErrorSink,
153) -> ScalarKind {
154 let Some(first) = value.as_bytes().first() else {
155 return decode_invalid(raw, output, error);
156 };
157 match first {
158 b'+' | b'-' => {
160 let start = value.offset_from(&raw.as_str());
161 let end = start + 1;
162 error.report_error(
163 ParseError::new("redundant numeric sign")
164 .with_context(Span::new_unchecked(0, raw.len()))
165 .with_expected(&[])
166 .with_unexpected(Span::new_unchecked(start, end)),
167 );
168
169 let value = &value[1..];
170 decode_sign_prefix(raw, value, output, error)
171 }
172 b'_' => decode_datetime_or_float_or_integer(value, raw, output, error),
174 b'0' => decode_zero_prefix(value, true, raw, output, error),
176 b'1'..=b'9' => decode_datetime_or_float_or_integer(value, raw, output, error),
177 b'.' => {
179 let kind = ScalarKind::Float;
180 let stream = raw.as_str();
181 ensure_float(stream, raw, error);
182 decode_float_or_integer(stream, raw, kind, output, error)
183 }
184 b'i' | b'I' => {
185 const SYMBOL: &str = "inf";
186 let kind = ScalarKind::Float;
187 if value != SYMBOL {
188 let expected = &[Expected::Literal(SYMBOL)];
189 let start = value.offset_from(&raw.as_str());
190 let end = start + value.len();
191 error.report_error(
192 ParseError::new(kind.invalid_description())
193 .with_context(Span::new_unchecked(0, raw.len()))
194 .with_expected(expected)
195 .with_unexpected(Span::new_unchecked(start, end)),
196 );
197 decode_as(raw, SYMBOL, kind, output, error)
198 } else {
199 decode_as_is(raw, kind, output, error)
200 }
201 }
202 b'n' | b'N' => {
203 const SYMBOL: &str = "nan";
204 let kind = ScalarKind::Float;
205 if value != SYMBOL {
206 let expected = &[Expected::Literal(SYMBOL)];
207 let start = value.offset_from(&raw.as_str());
208 let end = start + value.len();
209 error.report_error(
210 ParseError::new(kind.invalid_description())
211 .with_context(Span::new_unchecked(0, raw.len()))
212 .with_expected(expected)
213 .with_unexpected(Span::new_unchecked(start, end)),
214 );
215 decode_as(raw, SYMBOL, kind, output, error)
216 } else {
217 decode_as_is(raw, kind, output, error)
218 }
219 }
220 _ => decode_invalid(raw, output, error),
221 }
222}
223
224pub(crate) fn decode_zero_prefix<'i>(
225 value: &'i str,
226 signed: bool,
227 raw: Raw<'i>,
228 output: &mut dyn StringBuilder<'i>,
229 error: &mut dyn ErrorSink,
230) -> ScalarKind {
231 debug_assert_eq!(value.as_bytes()[0], b'0');
232 if value.len() == 1 {
233 let kind = ScalarKind::Integer(IntegerRadix::Dec);
234 decode_float_or_integer(raw.as_str(), raw, kind, output, error)
236 } else {
237 let radix = value.as_bytes()[1];
238 match radix {
239 b'x' | b'X' => {
240 if signed {
241 error.report_error(
242 ParseError::new("integers with a radix cannot be signed")
243 .with_context(Span::new_unchecked(0, raw.len()))
244 .with_expected(&[])
245 .with_unexpected(Span::new_unchecked(0, 1)),
246 );
247 }
248 if radix == b'X' {
249 let start = value.offset_from(&raw.as_str());
250 let end = start + 2;
251 error.report_error(
252 ParseError::new("radix must be lowercase")
253 .with_context(Span::new_unchecked(0, raw.len()))
254 .with_expected(&[Expected::Literal("0x")])
255 .with_unexpected(Span::new_unchecked(start, end)),
256 );
257 }
258 let radix = IntegerRadix::Hex;
259 let kind = ScalarKind::Integer(radix);
260 let stream = &value[2..];
261 ensure_radixed_value(stream, raw, radix, error);
262 decode_float_or_integer(stream, raw, kind, output, error)
263 }
264 b'o' | b'O' => {
265 if signed {
266 error.report_error(
267 ParseError::new("integers with a radix cannot be signed")
268 .with_context(Span::new_unchecked(0, raw.len()))
269 .with_expected(&[])
270 .with_unexpected(Span::new_unchecked(0, 1)),
271 );
272 }
273 if radix == b'O' {
274 let start = value.offset_from(&raw.as_str());
275 let end = start + 2;
276 error.report_error(
277 ParseError::new("radix must be lowercase")
278 .with_context(Span::new_unchecked(0, raw.len()))
279 .with_expected(&[Expected::Literal("0o")])
280 .with_unexpected(Span::new_unchecked(start, end)),
281 );
282 }
283 let radix = IntegerRadix::Oct;
284 let kind = ScalarKind::Integer(radix);
285 let stream = &value[2..];
286 ensure_radixed_value(stream, raw, radix, error);
287 decode_float_or_integer(stream, raw, kind, output, error)
288 }
289 b'b' | b'B' => {
290 if signed {
291 error.report_error(
292 ParseError::new("integers with a radix cannot be signed")
293 .with_context(Span::new_unchecked(0, raw.len()))
294 .with_expected(&[])
295 .with_unexpected(Span::new_unchecked(0, 1)),
296 );
297 }
298 if radix == b'B' {
299 let start = value.offset_from(&raw.as_str());
300 let end = start + 2;
301 error.report_error(
302 ParseError::new("radix must be lowercase")
303 .with_context(Span::new_unchecked(0, raw.len()))
304 .with_expected(&[Expected::Literal("0b")])
305 .with_unexpected(Span::new_unchecked(start, end)),
306 );
307 }
308 let radix = IntegerRadix::Bin;
309 let kind = ScalarKind::Integer(radix);
310 let stream = &value[2..];
311 ensure_radixed_value(stream, raw, radix, error);
312 decode_float_or_integer(stream, raw, kind, output, error)
313 }
314 b'd' | b'D' => {
315 if signed {
316 error.report_error(
317 ParseError::new("integers with a radix cannot be signed")
318 .with_context(Span::new_unchecked(0, raw.len()))
319 .with_expected(&[])
320 .with_unexpected(Span::new_unchecked(0, 1)),
321 );
322 }
323 let radix = IntegerRadix::Dec;
324 let kind = ScalarKind::Integer(radix);
325 let stream = &value[2..];
326 error.report_error(
327 ParseError::new("redundant integer number prefix")
328 .with_context(Span::new_unchecked(0, raw.len()))
329 .with_expected(&[])
330 .with_unexpected(Span::new_unchecked(0, 2)),
331 );
332 ensure_radixed_value(stream, raw, radix, error);
333 decode_float_or_integer(stream, raw, kind, output, error)
334 }
335 _ => decode_datetime_or_float_or_integer(value, raw, output, error),
336 }
337 }
338}
339
340pub(crate) fn decode_datetime_or_float_or_integer<'i>(
341 value: &'i str,
342 raw: Raw<'i>,
343 output: &mut dyn StringBuilder<'i>,
344 error: &mut dyn ErrorSink,
345) -> ScalarKind {
346 let Some(digit_end) = value
347 .as_bytes()
348 .offset_for(|b| !(b'0'..=b'9').contains_token(b))
349 else {
350 let kind = ScalarKind::Integer(IntegerRadix::Dec);
351 let stream = raw.as_str();
352 ensure_no_leading_zero(value, raw, error);
353 return decode_float_or_integer(stream, raw, kind, output, error);
354 };
355
356 #[cfg(feature = "unsafe")] let rest = unsafe { &value.get_unchecked(digit_end..) };
358 #[cfg(not(feature = "unsafe"))]
359 let rest = &value[digit_end..];
360
361 if rest.starts_with("-") || rest.starts_with(":") {
362 decode_as_is(raw, ScalarKind::DateTime, output, error)
363 } else if is_float(rest) {
364 let kind = ScalarKind::Float;
365 let stream = raw.as_str();
366 ensure_float(value, raw, error);
367 decode_float_or_integer(stream, raw, kind, output, error)
368 } else if rest.starts_with("_") {
369 let kind = ScalarKind::Integer(IntegerRadix::Dec);
370 let stream = raw.as_str();
371 ensure_no_leading_zero(value, raw, error);
372 decode_float_or_integer(stream, raw, kind, output, error)
373 } else {
374 decode_invalid(raw, output, error)
375 }
376}
377
378pub(crate) fn ensure_float<'i>(mut value: &'i str, raw: Raw<'i>, error: &mut dyn ErrorSink) {
390 ensure_dec_uint(&mut value, raw, false, "invalid mantissa", error);
391
392 if value.starts_with(".") {
393 let _ = value.next_token();
394 ensure_dec_uint(&mut value, raw, true, "invalid fraction", error);
395 }
396
397 if value.starts_with(['e', 'E']) {
398 let _ = value.next_token();
399 if value.starts_with(['+', '-']) {
400 let _ = value.next_token();
401 }
402 ensure_dec_uint(&mut value, raw, true, "invalid exponent", error);
403 }
404
405 if !value.is_empty() {
406 let start = value.offset_from(&raw.as_str());
407 let end = raw.len();
408 error.report_error(
409 ParseError::new(ScalarKind::Float.invalid_description())
410 .with_context(Span::new_unchecked(0, raw.len()))
411 .with_expected(&[])
412 .with_unexpected(Span::new_unchecked(start, end)),
413 );
414 }
415}
416
417pub(crate) fn ensure_dec_uint<'i>(
418 value: &mut &'i str,
419 raw: Raw<'i>,
420 zero_prefix: bool,
421 invalid_description: &'static str,
422 error: &mut dyn ErrorSink,
423) {
424 let start = *value;
425 let mut digit_count = 0;
426 while let Some(current) = value.chars().next() {
427 if current.is_ascii_digit() {
428 digit_count += 1;
429 } else if current == '_' {
430 } else {
431 break;
432 }
433 let _ = value.next_token();
434 }
435
436 match digit_count {
437 0 => {
438 let start = start.offset_from(&raw.as_str());
439 let end = start;
440 error.report_error(
441 ParseError::new(invalid_description)
442 .with_context(Span::new_unchecked(0, raw.len()))
443 .with_expected(&[Expected::Description("digits")])
444 .with_unexpected(Span::new_unchecked(start, end)),
445 );
446 }
447 1 => {}
448 _ if start.starts_with("0") && !zero_prefix => {
449 let start = start.offset_from(&raw.as_str());
450 let end = start + 1;
451 error.report_error(
452 ParseError::new("unexpected leading zero")
453 .with_context(Span::new_unchecked(0, raw.len()))
454 .with_expected(&[])
455 .with_unexpected(Span::new_unchecked(start, end)),
456 );
457 }
458 _ => {}
459 }
460}
461
462pub(crate) fn ensure_no_leading_zero<'i>(value: &'i str, raw: Raw<'i>, error: &mut dyn ErrorSink) {
463 if value.starts_with("0") {
464 let start = value.offset_from(&raw.as_str());
465 let end = start + 1;
466 error.report_error(
467 ParseError::new("unexpected leading zero")
468 .with_context(Span::new_unchecked(0, raw.len()))
469 .with_expected(&[])
470 .with_unexpected(Span::new_unchecked(start, end)),
471 );
472 }
473}
474
475pub(crate) fn ensure_radixed_value(
476 value: &str,
477 raw: Raw<'_>,
478 radix: IntegerRadix,
479 error: &mut dyn ErrorSink,
480) {
481 let invalid = ['+', '-'];
482 let value = if let Some(value) = value.strip_prefix(invalid) {
483 let pos = raw.as_str().find(invalid).unwrap();
484 error.report_error(
485 ParseError::new("unexpected sign")
486 .with_context(Span::new_unchecked(0, raw.len()))
487 .with_expected(&[])
488 .with_unexpected(Span::new_unchecked(pos, pos + 1)),
489 );
490 value
491 } else {
492 value
493 };
494
495 let valid = radix.validator();
496 for (index, c) in value.char_indices() {
497 if !valid(c) && c != '_' {
498 let pos = value.offset_from(&raw.as_str()) + index;
499 error.report_error(
500 ParseError::new(radix.invalid_description())
501 .with_context(Span::new_unchecked(0, raw.len()))
502 .with_unexpected(Span::new_unchecked(pos, pos)),
503 );
504 }
505 }
506}
507
508pub(crate) fn decode_float_or_integer<'i>(
509 stream: &'i str,
510 raw: Raw<'i>,
511 kind: ScalarKind,
512 output: &mut dyn StringBuilder<'i>,
513 error: &mut dyn ErrorSink,
514) -> ScalarKind {
515 output.clear();
516
517 let underscore = "_";
518
519 if has_underscore(stream) {
520 if stream.starts_with(underscore) {
521 error.report_error(
522 ParseError::new("`_` may only go between digits")
523 .with_context(Span::new_unchecked(0, raw.len()))
524 .with_expected(&[])
525 .with_unexpected(Span::new_unchecked(0, underscore.len())),
526 );
527 }
528 if 1 < stream.len() && stream.ends_with(underscore) {
529 let start = stream.offset_from(&raw.as_str());
530 let end = start + stream.len();
531 error.report_error(
532 ParseError::new("`_` may only go between digits")
533 .with_context(Span::new_unchecked(0, raw.len()))
534 .with_expected(&[])
535 .with_unexpected(Span::new_unchecked(end - underscore.len(), end)),
536 );
537 }
538
539 for part in stream.split(underscore) {
540 let part_start = part.offset_from(&raw.as_str());
541 let part_end = part_start + part.len();
542
543 if 0 < part_start {
544 let first = part.as_bytes().first().copied().unwrap_or(b'0');
545 if !is_any_digit(first, kind) {
546 let start = part_start - 1;
547 let end = part_start;
548 debug_assert_eq!(&raw.as_str()[start..end], underscore);
549 error.report_error(
550 ParseError::new("`_` may only go between digits")
551 .with_context(Span::new_unchecked(0, raw.len()))
552 .with_unexpected(Span::new_unchecked(start, end)),
553 );
554 }
555 }
556 if 1 < part.len() && part_end < raw.len() {
557 let last = part.as_bytes().last().copied().unwrap_or(b'0');
558 if !is_any_digit(last, kind) {
559 let start = part_end;
560 let end = start + underscore.len();
561 debug_assert_eq!(&raw.as_str()[start..end], underscore);
562 error.report_error(
563 ParseError::new("`_` may only go between digits")
564 .with_context(Span::new_unchecked(0, raw.len()))
565 .with_unexpected(Span::new_unchecked(start, end)),
566 );
567 }
568 }
569
570 if part.is_empty() && part_start != 0 && part_end != raw.len() {
571 let start = part_start;
572 let end = start + 1;
573 error.report_error(
574 ParseError::new("`_` may only go between digits")
575 .with_context(Span::new_unchecked(0, raw.len()))
576 .with_unexpected(Span::new_unchecked(start, end)),
577 );
578 }
579
580 if !part.is_empty() && !output.push_str(part) {
581 error.report_error(
582 ParseError::new(ALLOCATION_ERROR)
583 .with_unexpected(Span::new_unchecked(part_start, part_end)),
584 );
585 }
586 }
587 } else {
588 if !output.push_str(stream) {
589 error.report_error(
590 ParseError::new(ALLOCATION_ERROR)
591 .with_unexpected(Span::new_unchecked(0, raw.len())),
592 );
593 }
594 }
595
596 kind
597}
598
599fn is_any_digit(b: u8, kind: ScalarKind) -> bool {
600 if kind == ScalarKind::Float {
601 is_dec_integer_digit(b)
602 } else {
603 is_any_integer_digit(b)
604 }
605}
606
607fn is_any_integer_digit(b: u8) -> bool {
608 (b'0'..=b'9', b'a'..=b'f', b'A'..=b'F').contains_token(b)
609}
610
611fn is_dec_integer_digit(b: u8) -> bool {
612 (b'0'..=b'9').contains_token(b)
613}
614
615fn has_underscore(raw: &str) -> bool {
616 raw.as_bytes().find_slice(b'_').is_some()
617}
618
619fn is_float(raw: &str) -> bool {
620 raw.as_bytes().find_slice((b'.', b'e', b'E')).is_some()
621}
622
623pub(crate) fn decode_as_is<'i>(
624 raw: Raw<'i>,
625 kind: ScalarKind,
626 output: &mut dyn StringBuilder<'i>,
627 error: &mut dyn ErrorSink,
628) -> ScalarKind {
629 let kind = decode_as(raw, raw.as_str(), kind, output, error);
630 kind
631}
632
633pub(crate) fn decode_as<'i>(
634 raw: Raw<'i>,
635 symbol: &'i str,
636 kind: ScalarKind,
637 output: &mut dyn StringBuilder<'i>,
638 error: &mut dyn ErrorSink,
639) -> ScalarKind {
640 output.clear();
641 if !output.push_str(symbol) {
642 error.report_error(
643 ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
644 );
645 }
646 kind
647}
648
649pub(crate) fn decode_symbol<'i>(
650 raw: Raw<'i>,
651 symbol: &'static str,
652 kind: ScalarKind,
653 expected: &'static [Expected],
654 output: &mut dyn StringBuilder<'i>,
655 error: &mut dyn ErrorSink,
656) -> ScalarKind {
657 if raw.as_str() != symbol {
658 error.report_error(
659 ParseError::new(kind.invalid_description())
660 .with_context(Span::new_unchecked(0, raw.len()))
661 .with_expected(expected)
662 .with_unexpected(Span::new_unchecked(0, raw.len())),
663 );
664 }
665
666 decode_as(raw, symbol, kind, output, error)
667}
668
669pub(crate) fn decode_invalid<'i>(
670 raw: Raw<'i>,
671 output: &mut dyn StringBuilder<'i>,
672 error: &mut dyn ErrorSink,
673) -> ScalarKind {
674 error.report_error(
675 ParseError::new("string values must be quoted")
676 .with_context(Span::new_unchecked(0, raw.len()))
677 .with_expected(&[Expected::Description("literal string")])
678 .with_unexpected(Span::new_unchecked(0, raw.len())),
679 );
680
681 output.clear();
682 if !output.push_str(raw.as_str()) {
683 error.report_error(
684 ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
685 );
686 }
687 ScalarKind::String
688}