aoc/year2019/
day16.rs

1//! # Flawed Frequency Transmission
2//!
3//! ## Part One
4//!
5//! For each phase we split the input into two halves. The first half is processed using the
6//! pattern "stretched" for the current phase. For the second half we notice that
7//! the coefficients before are always zero and after always one, so the result can only depend
8//! on later digits. Using the first example:
9//!
10//! ```none
11//!     5*1  + 6*1  + 7*1  + 8*1
12//!     5*0  + 6*1  + 7*1  + 8*1
13//!     5*0  + 6*0  + 7*1  + 8*1
14//!     5*0  + 6*0  + 7*0  + 8*1
15//! ```
16//!
17//! Each digit is the sum of itself and subsequent digits and can be computed using a reverse
18//! rolling [prefix sum].
19//!
20//! ## Part Two
21//!
22//! If the index from the first 7 digits lies in the second half of the input then we only need to
23//! consider coefficients that form an [upper triangular matrix], for example:
24//!
25//! ```none
26//!   1  1  1  1
27//!   0  1  1  1
28//!   0  0  1  1
29//!   0  0  0  1
30//! ```
31//!
32//! After the first phase:
33//!
34//! ```none
35//!   1  2  3  4
36//!   0  1  2  3
37//!   0  0  1  2
38//!   0  0  0  1
39//! ```
40//!
41//! After the second phase:
42//!
43//! ```none
44//!   1  3  6 10
45//!   0  1  3  6
46//!   0  0  1  3
47//!   0  0  0  1
48//! ```
49//!
50//! After the third phase:
51//!
52//! ```none
53//!   1  4 10 20
54//!   0  1  4 10
55//!   0  0  1  6
56//!   0  0  0  1
57//! ```
58//!
59//! We can see that the third phase is the [triangular number] sequence and that the fourth phase
60//! is the [tetrahedral number] sequence. More generally the `i`th coefficient of the 100th phase
61//! is the [binomial coefficient] `(i + 99, i)`.
62//!
63//! We could compute the coefficient using the formula `nᵏ/k!` however this [grows rather large]
64//! and quickly will overflow even a `u128`.
65//!
66//! However we only need to coefficient modulo 10. [Lucas's theorem] allow us to computer binomial
67//! coefficients modulo some prime number. If we compute the coefficients modulo 2 and modulo 5
68//! then we can use the [Chinese remainder theorem] to find the result modulo 10.
69//!
70//! [prefix sum]: https://en.wikipedia.org/wiki/Prefix_sum
71//! [upper triangular matrix]: https://en.wikipedia.org/wiki/Triangular_matrix
72//! [triangular number]: https://en.wikipedia.org/wiki/Triangular_number
73//! [tetrahedral number]: https://en.wikipedia.org/wiki/Tetrahedral_number
74//! [binomial coefficient]: https://en.wikipedia.org/wiki/Binomial_coefficient
75//! [grows rather large]: https://oeis.org/A017763/b017763.txt
76//! [Lucas's theorem]: https://en.wikipedia.org/wiki/Lucas%27s_theorem
77//! [Chinese remainder theorem]: https://en.wikipedia.org/wiki/Chinese_remainder_theorem
78use crate::util::parse::*;
79use crate::util::slice::*;
80
81/// Lookup table for first five rows of
82/// [Pascal's triangle](https://en.wikipedia.org/wiki/Pascal%27s_triangle).
83/// A convention specifically for Lukas's theorem is that if `n` < `k` then the value is 0.
84const PASCALS_TRIANGLE: [[usize; 5]; 5] =
85    [[1, 0, 0, 0, 0], [1, 1, 0, 0, 0], [1, 2, 1, 0, 0], [1, 3, 3, 1, 0], [1, 4, 6, 4, 1]];
86
87pub fn parse(input: &str) -> Vec<u8> {
88    input.trim().bytes().map(u8::to_decimal).collect()
89}
90
91pub fn part1(input: &[u8]) -> i32 {
92    let size = input.len();
93    let mid = size / 2;
94    let end = size - 1;
95
96    let mut current = &mut input.iter().copied().map(i32::from).collect::<Vec<_>>();
97    let mut next = &mut vec![0; size];
98
99    for _ in 0..100 {
100        // Brute force the first half of the input.
101        for i in 0..mid {
102            let phase = i + 1;
103            let skip = 2 * phase;
104            let mut remaining = &current[i..];
105            let mut total: i32 = 0;
106
107            while !remaining.is_empty() {
108                let take = phase.min(remaining.len());
109                total += &remaining[..take].iter().sum();
110
111                if remaining.len() <= skip {
112                    break;
113                }
114                remaining = &remaining[skip..];
115
116                let take = phase.min(remaining.len());
117                total -= &remaining[..take].iter().sum();
118
119                if remaining.len() <= skip {
120                    break;
121                }
122                remaining = &remaining[skip..];
123            }
124
125            next[i] = total.abs() % 10;
126        }
127
128        // Use a faster reverse prefix sum approach for the second half of the input.
129        next[end] = current[end];
130
131        for i in (mid..end).rev() {
132            next[i] = (current[i] + next[i + 1]) % 10;
133        }
134
135        (current, next) = (next, current);
136    }
137
138    current[..8].fold_decimal()
139}
140
141pub fn part2(input: &[u8]) -> usize {
142    let digits: Vec<_> = input.iter().copied().map(usize::from).collect();
143    let start = digits[..7].fold_decimal();
144
145    // This approach will only work if the index is in the second half of the input.
146    let size = digits.len();
147    let lower = size * 5_000;
148    let upper = size * 10_000;
149    assert!(lower <= start && start < upper);
150
151    compute(&digits, size, start, upper)
152}
153
154#[cfg(not(feature = "simd"))]
155fn compute(digits: &[usize], size: usize, start: usize, upper: usize) -> usize {
156    let mut coefficients = [0; 8];
157    let mut result = [0; 8];
158
159    for (k, index) in (start..upper).enumerate() {
160        coefficients.rotate_right(1);
161        coefficients[0] = binomial_mod_10(k + 99, k);
162
163        let next = digits[index % size];
164        result.iter_mut().zip(coefficients).for_each(|(r, c)| *r += next * c);
165    }
166
167    result.iter_mut().for_each(|r| *r %= 10);
168    result.fold_decimal()
169}
170
171#[cfg(feature = "simd")]
172fn compute(digits: &[usize], size: usize, start: usize, upper: usize) -> usize {
173    use std::simd::Mask;
174    use std::simd::Simd;
175
176    let mask: Mask<i32, 8> = Mask::from_bitmask(1);
177    let tens: Simd<u32, 8> = Simd::splat(10);
178
179    let mut coefficients: Simd<u32, 8> = Simd::splat(0);
180    let mut result: Simd<u32, 8> = Simd::splat(0);
181
182    for (k, index) in (start..upper).enumerate() {
183        coefficients = mask.select(
184            Simd::splat(binomial_mod_10(k + 99, k) as u32),
185            coefficients.rotate_elements_right::<1>(),
186        );
187
188        let next = Simd::splat(digits[index % size] as u32);
189        result += next * coefficients;
190    }
191
192    (result % tens).to_array().fold_decimal() as usize
193}
194
195/// Computes C(n, k) % 2
196///
197/// This collapses to a special case of a product of only 4 possible values:
198///
199/// * `C(0, 0) = 1`
200/// * `C(1, 0) = 1`
201/// * `C(1, 1) = 1`
202/// * `C(0, 1) = 0`
203///
204/// So the final value will always be one or zero. The fourth zero case happens when `k` has a
205/// bit not present in `n` so we can compute the final value using bitwise logic.
206#[inline]
207fn binomial_mod_2(n: usize, k: usize) -> usize {
208    (k & !n == 0) as usize
209}
210
211/// Computes C(n, k) % 5
212///
213/// If `k` is zero then the remaining coefficients are 1 so we can exit early.
214/// If `r` is zero then the total result is also zero so we can exit early.
215/// To save some time we only take the result modulo 5 at the end.
216#[inline]
217fn bimonial_mod_5(mut n: usize, mut k: usize) -> usize {
218    let mut r = 1;
219
220    while k > 0 && r > 0 {
221        r *= PASCALS_TRIANGLE[n % 5][k % 5];
222        n /= 5;
223        k /= 5;
224    }
225
226    r % 5
227}
228
229/// Computes C(n, k) % 10
230///
231/// Solving the Chinese remainder theorem for the special case of two congruences:
232///
233/// ```none
234///     x ​≡ a₁ (mod n₁) ​≡ a₁ (mod 2)
235///     x ​≡ a₂ (mod n₂) ≡ a₂ (mod 5)
236///     N = n₁n₂ = 10
237///     y₁ = N / n₁ = 5
238///     y₂ = N / n₂ = 2
239///     z₁ = y₁⁻¹ mod n₁ = 5⁻¹ mod 2 = 1
240///     z₂ = y₂⁻¹ mod n₂ = 2⁻¹ mod 5 = 3
241///     x ≡ a₁y₁z₁ + a₂y₂z₂ (mod 10) ≡ 5a₁ + 6a₂ (mod 10)
242/// ```
243#[inline]
244fn binomial_mod_10(n: usize, k: usize) -> usize {
245    5 * binomial_mod_2(n, k) + 6 * bimonial_mod_5(n, k)
246}