aoc/year2019/day16.rs
1//! # Flawed Frequency Transmission
2//!
3//! ## Part One
4//!
5//! For each phase we split the input into two halves. The first half is processed using the
6//! pattern "stretched" for the current phase. For the second half we notice that
7//! the coefficients before are always zero and after always one, so the result can only depend
8//! on later digits. Using the first example:
9//!
10//! ```none
11//! 5*1 + 6*1 + 7*1 + 8*1
12//! 5*0 + 6*1 + 7*1 + 8*1
13//! 5*0 + 6*0 + 7*1 + 8*1
14//! 5*0 + 6*0 + 7*0 + 8*1
15//! ```
16//!
17//! Each digit is the sum of itself and subsequent digits and can be computed using a reverse
18//! rolling [prefix sum].
19//!
20//! ## Part Two
21//!
22//! If the index from the first 7 digits lies in the second half of the input then we only need to
23//! consider coefficients that form an [upper triangular matrix], for example:
24//!
25//! ```none
26//! 1 1 1 1
27//! 0 1 1 1
28//! 0 0 1 1
29//! 0 0 0 1
30//! ```
31//!
32//! After the first phase:
33//!
34//! ```none
35//! 1 2 3 4
36//! 0 1 2 3
37//! 0 0 1 2
38//! 0 0 0 1
39//! ```
40//!
41//! After the second phase:
42//!
43//! ```none
44//! 1 3 6 10
45//! 0 1 3 6
46//! 0 0 1 3
47//! 0 0 0 1
48//! ```
49//!
50//! After the third phase:
51//!
52//! ```none
53//! 1 4 10 20
54//! 0 1 4 10
55//! 0 0 1 6
56//! 0 0 0 1
57//! ```
58//!
59//! We can see that the third phase is the [triangular number] sequence and that the fourth phase
60//! is the [tetrahedral number] sequence. More generally the `i`th coefficient of the 100th phase
61//! is the [binomial coefficient] `(i + 99, i)`.
62//!
63//! We could compute the coefficient using the formula `nᵏ/k!` however this [grows rather large]
64//! and quickly will overflow even a `u128`.
65//!
66//! However we only need to coefficient modulo 10. [Lucas's theorem] allow us to computer binomial
67//! coefficients modulo some prime number. If we compute the coefficients modulo 2 and modulo 5
68//! then we can use the [Chinese remainder theorem] to find the result modulo 10.
69//!
70//! [prefix sum]: https://en.wikipedia.org/wiki/Prefix_sum
71//! [upper triangular matrix]: https://en.wikipedia.org/wiki/Triangular_matrix
72//! [triangular number]: https://en.wikipedia.org/wiki/Triangular_number
73//! [tetrahedral number]: https://en.wikipedia.org/wiki/Tetrahedral_number
74//! [binomial coefficient]: https://en.wikipedia.org/wiki/Binomial_coefficient
75//! [grows rather large]: https://oeis.org/A017763/b017763.txt
76//! [Lucas's theorem]: https://en.wikipedia.org/wiki/Lucas%27s_theorem
77//! [Chinese remainder theorem]: https://en.wikipedia.org/wiki/Chinese_remainder_theorem
78use crate::util::parse::*;
79use crate::util::slice::*;
80
81/// Lookup table for first five rows of
82/// [Pascal's triangle](https://en.wikipedia.org/wiki/Pascal%27s_triangle).
83/// A convention specifically for Lukas's theorem is that if `n` < `k` then the value is 0.
84const PASCALS_TRIANGLE: [[usize; 5]; 5] =
85 [[1, 0, 0, 0, 0], [1, 1, 0, 0, 0], [1, 2, 1, 0, 0], [1, 3, 3, 1, 0], [1, 4, 6, 4, 1]];
86
87pub fn parse(input: &str) -> Vec<u8> {
88 input.trim().bytes().map(u8::to_decimal).collect()
89}
90
91pub fn part1(input: &[u8]) -> i32 {
92 let size = input.len();
93 let mid = size / 2;
94 let end = size - 1;
95
96 let mut current = &mut input.iter().copied().map(i32::from).collect::<Vec<_>>();
97 let mut next = &mut vec![0; size];
98
99 for _ in 0..100 {
100 // Brute force the first half of the input.
101 for i in 0..mid {
102 let phase = i + 1;
103 let skip = 2 * phase;
104 let mut remaining = ¤t[i..];
105 let mut total: i32 = 0;
106
107 while !remaining.is_empty() {
108 let take = phase.min(remaining.len());
109 total += &remaining[..take].iter().sum();
110
111 if remaining.len() <= skip {
112 break;
113 }
114 remaining = &remaining[skip..];
115
116 let take = phase.min(remaining.len());
117 total -= &remaining[..take].iter().sum();
118
119 if remaining.len() <= skip {
120 break;
121 }
122 remaining = &remaining[skip..];
123 }
124
125 next[i] = total.abs() % 10;
126 }
127
128 // Use a faster reverse prefix sum approach for the second half of the input.
129 next[end] = current[end];
130
131 for i in (mid..end).rev() {
132 next[i] = (current[i] + next[i + 1]) % 10;
133 }
134
135 (current, next) = (next, current);
136 }
137
138 current[..8].fold_decimal()
139}
140
141pub fn part2(input: &[u8]) -> usize {
142 let digits: Vec<_> = input.iter().copied().map(usize::from).collect();
143 let start = digits[..7].fold_decimal();
144
145 // This approach will only work if the index is in the second half of the input.
146 let size = digits.len();
147 let lower = size * 5_000;
148 let upper = size * 10_000;
149 assert!(lower <= start && start < upper);
150
151 compute(&digits, size, start, upper)
152}
153
154#[cfg(not(feature = "simd"))]
155fn compute(digits: &[usize], size: usize, start: usize, upper: usize) -> usize {
156 let mut coefficients = [0; 8];
157 let mut result = [0; 8];
158
159 for (k, index) in (start..upper).enumerate() {
160 coefficients.rotate_right(1);
161 coefficients[0] = binomial_mod_10(k + 99, k);
162
163 let next = digits[index % size];
164 result.iter_mut().zip(coefficients).for_each(|(r, c)| *r += next * c);
165 }
166
167 result.iter_mut().for_each(|r| *r %= 10);
168 result.fold_decimal()
169}
170
171#[cfg(feature = "simd")]
172fn compute(digits: &[usize], size: usize, start: usize, upper: usize) -> usize {
173 use std::simd::Mask;
174 use std::simd::Simd;
175
176 let mask: Mask<i32, 8> = Mask::from_bitmask(1);
177 let tens: Simd<u32, 8> = Simd::splat(10);
178
179 let mut coefficients: Simd<u32, 8> = Simd::splat(0);
180 let mut result: Simd<u32, 8> = Simd::splat(0);
181
182 for (k, index) in (start..upper).enumerate() {
183 coefficients = mask.select(
184 Simd::splat(binomial_mod_10(k + 99, k) as u32),
185 coefficients.rotate_elements_right::<1>(),
186 );
187
188 let next = Simd::splat(digits[index % size] as u32);
189 result += next * coefficients;
190 }
191
192 (result % tens).to_array().fold_decimal() as usize
193}
194
195/// Computes C(n, k) % 2
196///
197/// This collapses to a special case of a product of only 4 possible values:
198///
199/// * `C(0, 0) = 1`
200/// * `C(1, 0) = 1`
201/// * `C(1, 1) = 1`
202/// * `C(0, 1) = 0`
203///
204/// So the final value will always be one or zero. The fourth zero case happens when `k` has a
205/// bit not present in `n` so we can compute the final value using bitwise logic.
206#[inline]
207fn binomial_mod_2(n: usize, k: usize) -> usize {
208 (k & !n == 0) as usize
209}
210
211/// Computes C(n, k) % 5
212///
213/// If `k` is zero then the remaining coefficients are 1 so we can exit early.
214/// If `r` is zero then the total result is also zero so we can exit early.
215/// To save some time we only take the result modulo 5 at the end.
216#[inline]
217fn bimonial_mod_5(mut n: usize, mut k: usize) -> usize {
218 let mut r = 1;
219
220 while k > 0 && r > 0 {
221 r *= PASCALS_TRIANGLE[n % 5][k % 5];
222 n /= 5;
223 k /= 5;
224 }
225
226 r % 5
227}
228
229/// Computes C(n, k) % 10
230///
231/// Solving the Chinese remainder theorem for the special case of two congruences:
232///
233/// ```none
234/// x ≡ a₁ (mod n₁) ≡ a₁ (mod 2)
235/// x ≡ a₂ (mod n₂) ≡ a₂ (mod 5)
236/// N = n₁n₂ = 10
237/// y₁ = N / n₁ = 5
238/// y₂ = N / n₂ = 2
239/// z₁ = y₁⁻¹ mod n₁ = 5⁻¹ mod 2 = 1
240/// z₂ = y₂⁻¹ mod n₂ = 2⁻¹ mod 5 = 3
241/// x ≡ a₁y₁z₁ + a₂y₂z₂ (mod 10) ≡ 5a₁ + 6a₂ (mod 10)
242/// ```
243#[inline]
244fn binomial_mod_10(n: usize, k: usize) -> usize {
245 5 * binomial_mod_2(n, k) + 6 * bimonial_mod_5(n, k)
246}