aoc/util/
parse.rs

1//! Extracts and parses signed and unsigned integers from surrounding text and whitespace.
2//!
3//! A common pattern in Advent of Code is to parse and return `123`, `456` and `789` from input
4//! resembling:
5//!
6//! ```none
7//!   Lorem ipsum 123 dolor 456 sit 789 amet
8//! ```
9//!
10//! This module provides two [`&str`] extension methods [`iter_signed`] and [`iter_unsigned`]. The
11//! reason for the separate methods is that some Advent of Code inputs contains the `-` character
12//! as a delimeter and this would cause numbers to be incorrectly parsed as negative.
13//!
14//! [`iter_unsigned`]: ParseOps::iter_unsigned
15//! [`iter_signed`]: ParseOps::iter_signed
16use crate::util::integer::*;
17use std::marker::PhantomData;
18use std::str::Bytes;
19
20pub trait ParseByte {
21    fn to_decimal(self) -> u8;
22}
23
24impl ParseByte for u8 {
25    #[inline]
26    fn to_decimal(self) -> u8 {
27        self.wrapping_sub(b'0')
28    }
29}
30
31pub struct ParseUnsigned<'a, T> {
32    bytes: Bytes<'a>,
33    phantom: PhantomData<T>,
34}
35
36pub struct ParseSigned<'a, T> {
37    bytes: Bytes<'a>,
38    phantom: PhantomData<T>,
39}
40
41pub trait ParseOps {
42    fn unsigned<T: Unsigned<T>>(&self) -> T;
43    fn signed<T: Signed<T>>(&self) -> T;
44    fn iter_unsigned<T: Unsigned<T>>(&self) -> ParseUnsigned<'_, T>;
45    fn iter_signed<T: Signed<T>>(&self) -> ParseSigned<'_, T>;
46}
47
48impl ParseOps for &str {
49    fn unsigned<T: Unsigned<T>>(&self) -> T {
50        match try_unsigned(&mut self.bytes()) {
51            Some(t) => t,
52            None => panic!("Unable to parse \"{self}\""),
53        }
54    }
55
56    fn signed<T: Signed<T>>(&self) -> T {
57        match try_signed(&mut self.bytes()) {
58            Some(t) => t,
59            None => panic!("Unable to parse \"{self}\""),
60        }
61    }
62
63    fn iter_unsigned<T: Unsigned<T>>(&self) -> ParseUnsigned<'_, T> {
64        ParseUnsigned { bytes: self.bytes(), phantom: PhantomData }
65    }
66
67    fn iter_signed<T: Signed<T>>(&self) -> ParseSigned<'_, T> {
68        ParseSigned { bytes: self.bytes(), phantom: PhantomData }
69    }
70}
71
72impl<T: Unsigned<T>> Iterator for ParseUnsigned<'_, T> {
73    type Item = T;
74
75    fn size_hint(&self) -> (usize, Option<usize>) {
76        let (lower, upper) = self.bytes.size_hint();
77        (lower / 3, upper.map(|u| u / 3))
78    }
79
80    fn next(&mut self) -> Option<Self::Item> {
81        try_unsigned(&mut self.bytes)
82    }
83}
84
85impl<T: Signed<T>> Iterator for ParseSigned<'_, T> {
86    type Item = T;
87
88    fn size_hint(&self) -> (usize, Option<usize>) {
89        let (lower, upper) = self.bytes.size_hint();
90        (lower / 3, upper.map(|u| u / 3))
91    }
92
93    fn next(&mut self) -> Option<Self::Item> {
94        try_signed(&mut self.bytes)
95    }
96}
97
98fn try_unsigned<T: Unsigned<T>>(bytes: &mut Bytes<'_>) -> Option<T> {
99    let mut n = loop {
100        let byte = bytes.next()?;
101        let digit = byte.to_decimal();
102
103        if digit < 10 {
104            break T::from(digit);
105        }
106    };
107
108    loop {
109        let Some(byte) = bytes.next() else { break Some(n) };
110        let digit = byte.to_decimal();
111
112        if digit < 10 {
113            n = T::TEN * n + T::from(digit);
114        } else {
115            break Some(n);
116        }
117    }
118}
119
120fn try_signed<T: Signed<T>>(bytes: &mut Bytes<'_>) -> Option<T> {
121    let (mut n, negative) = loop {
122        let byte = bytes.next()?;
123        let digit = byte.to_decimal();
124
125        if digit == 253 {
126            break (T::ZERO, true);
127        }
128        if digit < 10 {
129            break (T::from(digit), false);
130        }
131    };
132
133    loop {
134        let Some(byte) = bytes.next() else {
135            break Some(if negative { -n } else { n });
136        };
137        let digit = byte.to_decimal();
138
139        if digit < 10 {
140            n = T::TEN * n + T::from(digit);
141        } else {
142            break Some(if negative { -n } else { n });
143        }
144    }
145}