aoc/year2022/
day20.rs

1//! # Grove Positioning System
2//!
3//! We store the numbers in an array of `vec`s. The initial size of each vector is 20
4//! so that numbers are spread as evenly as possible.
5//!
6//! Using multiple leaf `vec`s greatly reduces the time to insert, remove and find
7//! numbers, compared to storing all numbers in a single flat `vec`. Some further optimizations:
8//! * The first and second level indices of a number change only when it moves, so these can be
9//!   stored in a lookup array for fast access.
10//! * The size of each first level `vec` is the sum of the second level `vec`s contained
11//!   inside. This is stored in the `skip` array to prevent recomputing on each move.
12//!
13//! This implementation is both faster and simpler than the previous version (preserved in the
14//! commit history) that used an [order statistic tree](https://en.wikipedia.org/wiki/Order_statistic_tree),
15//! although perhaps adding [balancing rotations](https://en.wikipedia.org/wiki/Tree_rotation)
16//! to the tree would make it faster.
17//!
18//! Leaf `vec`s are padded to a size modulo 64 to speed up searching for numbers. A SIMD variant
19//! can search for 64 numbers simultaneously.
20use crate::util::parse::*;
21use std::array::from_fn;
22use std::iter::repeat_n;
23
24struct PaddedVec {
25    size: usize,
26    vec: Vec<u16>,
27}
28
29pub fn parse(input: &str) -> Vec<i64> {
30    input.iter_signed().collect()
31}
32
33pub fn part1(input: &[i64]) -> i64 {
34    decrypt(input, 1, 1)
35}
36
37pub fn part2(input: &[i64]) -> i64 {
38    decrypt(input, 811589153, 10)
39}
40
41fn decrypt(input: &[i64], key: i64, rounds: usize) -> i64 {
42    // Important nuance, size is one less because we don't consider the moving number.
43    let size = input.len() - 1;
44    // Another nuance, input contain duplicate numbers, so use index to refer to each number uniquely.
45    let indices: Vec<_> = (0..input.len() as u16).collect();
46    // Pre-process the numbers, coverting any negative indices to positive indices that will wrap.
47    // For example, -1 becomes 4998.
48    let numbers: Vec<_> =
49        input.iter().map(|&n| (n * key).rem_euclid(size as i64) as usize).collect();
50    // Store location of each number within `mixed` for faster lookup.
51    let mut lookup = Vec::with_capacity(input.len());
52    // Size of each block of 16 elements for faster lookup.
53    let mut skip = [0; 16];
54    // Break 5000 numbers into roughly equals chunks.
55    let mut mixed: [_; 256] = from_fn(|_| PaddedVec { size: 0, vec: Vec::with_capacity(128) });
56
57    for (second, slice) in indices.chunks(input.len().div_ceil(256)).enumerate() {
58        let size = slice.len();
59
60        mixed[second].size = size;
61        mixed[second].vec.resize(size.next_multiple_of(64), 0);
62        mixed[second].vec[..size].copy_from_slice(slice);
63
64        lookup.extend(repeat_n(second, size));
65        skip[second / 16] += size;
66    }
67
68    for _ in 0..rounds {
69        'mix: for index in 0..input.len() {
70            // Quickly find the leaf vector storing the number.
71            let number = numbers[index];
72            let second = lookup[index];
73            let first = second / 16;
74
75            // Third level changes as other numbers are added and removed,
76            // so needs to be checked each time.
77            let third = position(&mixed[second], index as u16);
78
79            // Find the offset of the number by adding the size of all previous `vec`s.
80            let position = third
81                + skip[..first].iter().sum::<usize>()
82                + mixed[16 * first..second].iter().map(|v| v.size).sum::<usize>();
83            // Update our position, wrapping around if necessary.
84            let mut next = (position + number) % size;
85
86            // Remove number from current leaf vector, also updating the first level size.
87            mixed[second].size -= 1;
88            mixed[second].vec.remove(third);
89            mixed[second].vec.push(0);
90            skip[first] -= 1;
91
92            // Find our new destination, by checking `vec`s in order until the total elements
93            // are greater than our new index.
94            for (first, outer) in mixed.chunks_exact_mut(16).enumerate() {
95                if next > skip[first] {
96                    next -= skip[first];
97                } else {
98                    for (second, inner) in outer.iter_mut().enumerate() {
99                        if next > inner.size {
100                            next -= inner.size;
101                        } else {
102                            // Insert number into its new home.
103                            inner.size += 1;
104                            inner.vec.insert(next, index as u16);
105                            inner.vec.resize(inner.size.next_multiple_of(64), 0);
106                            // Update location.
107                            skip[first] += 1;
108                            lookup[index] = 16 * first + second;
109                            continue 'mix;
110                        }
111                    }
112                }
113            }
114        }
115    }
116
117    let indices: Vec<_> =
118        mixed.into_iter().flat_map(|pv| pv.vec.into_iter().take(pv.size)).collect();
119    let zeroth = indices.iter().position(|&i| input[i as usize] == 0).unwrap();
120
121    [1000, 2000, 3000]
122        .iter()
123        .map(|offset| (zeroth + offset) % indices.len())
124        .map(|index| input[indices[index] as usize] * key)
125        .sum()
126}
127
128/// The compiler optimizes the position search when the size of the chunk is known.
129#[cfg(not(feature = "simd"))]
130#[inline]
131fn position(haystack: &PaddedVec, needle: u16) -> usize {
132    for (base, slice) in haystack.vec.chunks_exact(64).enumerate() {
133        if let Some(offset) = slice.iter().position(|&i| i == needle) {
134            return 64 * base + offset;
135        }
136    }
137
138    unreachable!()
139}
140
141/// Search 64 lanes simultaneously.
142#[cfg(feature = "simd")]
143#[inline]
144fn position(haystack: &PaddedVec, needle: u16) -> usize {
145    use std::simd::cmp::SimdPartialEq as _;
146    use std::simd::*;
147
148    for (base, slice) in haystack.vec.chunks_exact(64).enumerate() {
149        if let Some(offset) =
150            Simd::<u16, 64>::from_slice(slice).simd_eq(Simd::splat(needle)).first_set()
151        {
152            return 64 * base + offset;
153        }
154    }
155
156    unreachable!()
157}