1use core::fmt::{self, Write as _};
4use core::marker::PhantomData;
5use core::num::NonZeroU8;
6use core::ops::ControlFlow;
7
8use crate::parser::str::find_split;
9use crate::parser::trusted::hexdigits_to_byte;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub(crate) enum PctEncodedFragments<'a> {
14 NoPctStr(&'a str),
16 StrayPercent,
18 Char(&'a str, char),
20 InvalidUtf8PctTriplets(&'a str),
22}
23
24pub(crate) fn process_percent_encoded_best_effort<T, F, B>(
26 v: T,
27 mut f: F,
28) -> Result<ControlFlow<B>, fmt::Error>
29where
30 T: fmt::Display,
31 F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>,
32{
33 let mut buf = [0_u8; 12];
34 let mut writer = DecomposeWriter {
35 f: &mut f,
36 decoder: Default::default(),
37 buf: &mut buf,
38 result: ControlFlow::Continue(()),
39 _r: PhantomData,
40 };
41
42 if write!(writer, "{v}").is_err() {
43 match writer.result {
44 ControlFlow::Continue(_) => return Err(fmt::Error),
45 ControlFlow::Break(v) => return Ok(ControlFlow::Break(v)),
46 }
47 }
48
49 if let Some(len) = writer.decoder.flush(&mut buf).map(|v| usize::from(v.get())) {
51 let len_suffix = len % 3;
52 let triplets_end = len - len_suffix;
53 let triplets = core::str::from_utf8(&buf[..triplets_end])
54 .expect("[validity] percent-encoded triplets consist of ASCII characters");
55 if let ControlFlow::Break(v) = f(PctEncodedFragments::InvalidUtf8PctTriplets(triplets)) {
56 return Ok(ControlFlow::Break(v));
57 }
58
59 if len_suffix > 0 {
60 if let ControlFlow::Break(v) = f(PctEncodedFragments::StrayPercent) {
61 return Ok(ControlFlow::Break(v));
62 }
63 }
64 if len_suffix > 1 {
65 let after_percent = core::str::from_utf8(
66 &buf[(triplets_end + 1)..(triplets_end + len_suffix)],
67 )
68 .expect("[consistency] percent-encoded triplets contains only ASCII characters");
69 if let ControlFlow::Break(v) = f(PctEncodedFragments::NoPctStr(after_percent)) {
70 return Ok(ControlFlow::Break(v));
71 }
72 }
73 }
74
75 Ok(ControlFlow::Continue(()))
76}
77
78struct DecomposeWriter<'a, F, B> {
80 f: &'a mut F,
82 decoder: DecoderBuffer,
84 buf: &'a mut [u8],
86 result: ControlFlow<B>,
88 _r: PhantomData<fn() -> B>,
90}
91impl<F, B> DecomposeWriter<'_, F, B>
92where
93 F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>,
94{
95 #[inline(always)]
97 fn result_continue_or_err(&self) -> fmt::Result {
98 if self.result.is_break() {
99 return Err(fmt::Error);
100 }
101 Ok(())
102 }
103
104 fn output_as_undecodable(&mut self, len_undecodable: u8) -> fmt::Result {
106 let len_written = usize::from(len_undecodable);
107 let frag = core::str::from_utf8(&self.buf[..len_written])
108 .expect("[validity] `DecoderBuffer` writes a valid ASCII string");
109 let len_incomplete = len_written % 3;
110 let len_complete = len_written - len_incomplete;
111 self.result = (self.f)(PctEncodedFragments::InvalidUtf8PctTriplets(
112 &frag[..len_complete],
113 ));
114 self.result_continue_or_err()?;
115 if len_incomplete > 0 {
116 self.result = (self.f)(PctEncodedFragments::StrayPercent);
118 if self.result.is_break() {
119 return Err(fmt::Error);
120 }
121 if len_incomplete > 1 {
122 debug_assert_eq!(
124 len_incomplete, 2,
125 "[consistency] the length of incomplete percent-encoded \
126 triplet must be less than 2 bytes"
127 );
128 self.result = (self.f)(PctEncodedFragments::NoPctStr(
129 &frag[(len_complete + 1)..len_written],
130 ));
131 self.result_continue_or_err()?;
132 }
133 }
134 Ok(())
135 }
136}
137
138impl<F, B> fmt::Write for DecomposeWriter<'_, F, B>
139where
140 F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>,
141{
142 fn write_str(&mut self, s: &str) -> fmt::Result {
143 self.result_continue_or_err()?;
144 let mut rest = s;
145 while !rest.is_empty() {
146 let (len_consumed, result) = self.decoder.push_encoded(self.buf, rest);
147 if len_consumed == 0 {
148 if let Some(len_written) = self.decoder.flush(self.buf).map(NonZeroU8::get) {
151 self.output_as_undecodable(len_written)?;
152 rest = &rest[usize::from(len_written)..];
153 }
154
155 let (plain_prefix, suffix) = find_split(rest, b'%').unwrap_or((rest, ""));
157 debug_assert!(
158 !plain_prefix.is_empty(),
159 "[consistency] `len_consumed == 0` indicates non-empty \
160 `rest` not starting with `%`"
161 );
162 self.result = (self.f)(PctEncodedFragments::NoPctStr(plain_prefix));
163 self.result_continue_or_err()?;
164 rest = suffix;
165 continue;
166 }
167
168 match result {
170 PushResult::Decoded(len_written, c) => {
171 let len_written = usize::from(len_written.get());
172 let frag = core::str::from_utf8(&self.buf[..len_written])
173 .expect("[validity] `DecoderBuffer` writes a valid ASCII string");
174 self.result = (self.f)(PctEncodedFragments::Char(frag, c));
175 self.result_continue_or_err()?;
176 }
177 PushResult::Undecodable(len_written) => {
178 self.output_as_undecodable(len_written)?;
179 }
180 PushResult::NeedMoreBytes => {
181 }
183 }
184 rest = &rest[len_consumed..];
185 }
186 Ok(())
187 }
188}
189
190#[derive(Debug, Clone, Copy)]
192enum PushResult {
193 NeedMoreBytes,
195 Decoded(NonZeroU8, char),
199 Undecodable(u8),
202}
203
204#[derive(Default, Debug, Clone, Copy)]
206struct DecoderBuffer {
207 encoded: [u8; 12],
212 decoded: [u8; 4],
214 len_encoded: u8,
218}
219
220impl DecoderBuffer {
221 fn write_and_pop(&mut self, dest: &mut [u8], remove_len: u8) {
223 let new_len = self.len_encoded - remove_len;
224 let remove_len = usize::from(remove_len);
225 let src_range = remove_len..usize::from(self.len_encoded);
226 dest[..remove_len].copy_from_slice(&self.encoded[..remove_len]);
227
228 if new_len == 0 {
229 *self = Self::default();
230 return;
231 }
232 self.encoded.copy_within(src_range, 0);
233 self.decoded
234 .copy_within((remove_len / 3)..usize::from(self.len_encoded / 3), 0);
235 self.len_encoded = new_len;
236 }
237
238 fn push_single_encoded_byte(&mut self, byte: u8) {
240 debug_assert!(
241 self.len_encoded < 12,
242 "[consistency] four percent-encoded triplets are enough for a unicode code point"
243 );
244 let pos_enc = usize::from(self.len_encoded);
245 self.len_encoded += 1;
246 self.encoded[pos_enc] = byte;
247 if self.len_encoded % 3 == 0 {
248 let pos_dec = usize::from(self.len_encoded / 3 - 1);
250 let upper = self.encoded[pos_enc - 1];
251 let lower = byte;
252 debug_assert!(
253 upper.is_ascii_hexdigit() && lower.is_ascii_hexdigit(),
254 "[consistency] the `encoded` buffer should contain valid percent-encoded triplets"
255 );
256 self.decoded[pos_dec] = hexdigits_to_byte([upper, lower]);
257 }
258 }
259
260 #[must_use]
269 pub(crate) fn push_encoded(&mut self, buf: &mut [u8], s: &str) -> (usize, PushResult) {
270 debug_assert!(
271 buf.len() >= 12,
272 "[internal precondition] destination buffer should be at least 12 bytes"
273 );
274 let mut chars = s.chars();
275 let mut len_triplet_incomplete = self.len_encoded % 3;
276 for c in &mut chars {
277 if len_triplet_incomplete == 0 {
278 if c != '%' {
280 let len_consumed = s.len() - chars.as_str().len() - 1;
283 let len_result = self.len_encoded;
284 self.write_and_pop(buf, len_result);
285 return (len_consumed, PushResult::Undecodable(len_result));
286 }
287 self.push_single_encoded_byte(b'%');
288 len_triplet_incomplete = 1;
289 continue;
290 }
291
292 if !c.is_ascii_hexdigit() {
294 let len_consumed = s.len() - chars.as_str().len() - 1;
297 let len_result = self.len_encoded;
298 self.write_and_pop(buf, len_result);
299 return (len_consumed, PushResult::Undecodable(len_result));
300 }
301 self.push_single_encoded_byte(c as u8);
302 if len_triplet_incomplete == 1 {
303 len_triplet_incomplete = 2;
304 continue;
305 } else {
306 debug_assert_eq!(len_triplet_incomplete, 2);
308 len_triplet_incomplete = 0;
309 }
310
311 let len_decoded = usize::from(self.len_encoded) / 3;
314 match core::str::from_utf8(&self.decoded[..len_decoded]) {
315 Ok(decoded_str) => {
316 let len_consumed = s.len() - chars.as_str().len();
318 let c = decoded_str
319 .chars()
320 .next()
321 .expect("[validity] `decoded` buffer is nonempty");
322 let len_result = NonZeroU8::new(self.len_encoded).expect(
323 "[consistency] `encoded` buffer is nonempty since \
324 `push_single_encoded_byte()` was called",
325 );
326 self.write_and_pop(buf, len_result.get());
327 return (len_consumed, PushResult::Decoded(len_result, c));
328 }
329 Err(e) => {
330 assert_eq!(
332 e.valid_up_to(),
333 0,
334 "[consistency] `decoded` buffer contains at most one character"
335 );
336 let skip_len_decoded = match e.error_len() {
337 None => continue,
339 Some(v) => v,
341 };
342 let len_consumed = s.len() - chars.as_str().len();
343 let len_result = skip_len_decoded as u8 * 3;
344 assert_ne!(
345 skip_len_decoded, 0,
346 "[consistency] empty bytes cannot be invalid"
347 );
348 self.write_and_pop(buf, len_result);
349 return (len_consumed, PushResult::Undecodable(len_result));
350 }
351 };
352 }
353 let len_consumed = s.len() - chars.as_str().len();
354 (len_consumed, PushResult::NeedMoreBytes)
355 }
356
357 #[must_use]
359 pub(crate) fn flush(&mut self, buf: &mut [u8]) -> Option<NonZeroU8> {
360 let len_result = NonZeroU8::new(self.len_encoded)?;
361 self.write_and_pop(buf, len_result.get());
363 debug_assert_eq!(
364 self.len_encoded, 0,
365 "[consistency] the buffer should be cleared after flushed"
366 );
367 Some(len_result)
368 }
369}