aoc/year2022/day20.rs
1//! # Grove Positioning System
2//!
3//! We store the numbers in an array of `vec`s. The initial size of each vector is 20
4//! so that numbers are spread as evenly as possible.
5//!
6//! Using multiple leaf `vec`s greatly reduces the time to insert, remove and find
7//! numbers, compared to storing all numbers in a single flat `vec`. Some further optimizations:
8//! * The first and second level indices of a number change only when it moves, so these can be
9//! stored in a lookup array for fast access.
10//! * The size of each first level `vec` is the sum of the second level `vec`s contained
11//! inside. This is stored in the `skip` array to prevent recomputing on each move.
12//!
13//! This implementation is both faster and simpler than the previous version (preserved in the
14//! commit history) that used an [order statistic tree](https://en.wikipedia.org/wiki/Order_statistic_tree),
15//! although perhaps adding [balancing rotations](https://en.wikipedia.org/wiki/Tree_rotation)
16//! to the tree would make it faster.
17//!
18//! Leaf `vec`s are padded to a size modulo 64 to speed up searching for numbers. A SIMD variant
19//! can search for 64 numbers simultaneously.
20use crate::util::parse::*;
21use std::array::from_fn;
22use std::iter::repeat_n;
23
24struct PaddedVec {
25 size: usize,
26 vec: Vec<u16>,
27}
28
29pub fn parse(input: &str) -> Vec<i64> {
30 input.iter_signed().collect()
31}
32
33pub fn part1(input: &[i64]) -> i64 {
34 decrypt(input, 1, 1)
35}
36
37pub fn part2(input: &[i64]) -> i64 {
38 decrypt(input, 811589153, 10)
39}
40
41fn decrypt(input: &[i64], key: i64, rounds: usize) -> i64 {
42 // Important nuance, size is one less because we don't consider the moving number.
43 let size = input.len() - 1;
44 // Another nuance, input contain duplicate numbers, so use index to refer to each number uniquely.
45 let indices: Vec<_> = (0..input.len() as u16).collect();
46 // Pre-process the numbers, coverting any negative indices to positive indices that will wrap.
47 // For example, -1 becomes 4998.
48 let numbers: Vec<_> =
49 input.iter().map(|&n| (n * key).rem_euclid(size as i64) as usize).collect();
50 // Store location of each number within `mixed` for faster lookup.
51 let mut lookup = Vec::with_capacity(input.len());
52 // Size of each block of 16 elements for faster lookup.
53 let mut skip = [0; 16];
54 // Break 5000 numbers into roughly equals chunks.
55 let mut mixed: [_; 256] = from_fn(|_| PaddedVec { size: 0, vec: Vec::with_capacity(128) });
56
57 for (second, slice) in indices.chunks(input.len().div_ceil(256)).enumerate() {
58 let size = slice.len();
59
60 mixed[second].size = size;
61 mixed[second].vec.resize(size.next_multiple_of(64), 0);
62 mixed[second].vec[..size].copy_from_slice(slice);
63
64 lookup.extend(repeat_n(second, size));
65 skip[second / 16] += size;
66 }
67
68 for _ in 0..rounds {
69 'mix: for index in 0..input.len() {
70 // Quickly find the leaf vector storing the number.
71 let number = numbers[index];
72 let second = lookup[index];
73 let first = second / 16;
74
75 // Third level changes as other numbers are added and removed,
76 // so needs to be checked each time.
77 let third = position(&mixed[second], index as u16);
78
79 // Find the offset of the number by adding the size of all previous `vec`s.
80 let position = third
81 + skip[..first].iter().sum::<usize>()
82 + mixed[16 * first..second].iter().map(|v| v.size).sum::<usize>();
83 // Update our position, wrapping around if necessary.
84 let mut next = (position + number) % size;
85
86 // Remove number from current leaf vector, also updating the first level size.
87 mixed[second].size -= 1;
88 mixed[second].vec.remove(third);
89 mixed[second].vec.push(0);
90 skip[first] -= 1;
91
92 // Find our new destination, by checking `vec`s in order until the total elements
93 // are greater than our new index.
94 for (first, outer) in mixed.chunks_exact_mut(16).enumerate() {
95 if next > skip[first] {
96 next -= skip[first];
97 } else {
98 for (second, inner) in outer.iter_mut().enumerate() {
99 if next > inner.size {
100 next -= inner.size;
101 } else {
102 // Insert number into its new home.
103 inner.size += 1;
104 inner.vec.insert(next, index as u16);
105 inner.vec.resize(inner.size.next_multiple_of(64), 0);
106 // Update location.
107 skip[first] += 1;
108 lookup[index] = 16 * first + second;
109 continue 'mix;
110 }
111 }
112 }
113 }
114 }
115 }
116
117 let indices: Vec<_> =
118 mixed.into_iter().flat_map(|pv| pv.vec.into_iter().take(pv.size)).collect();
119 let zeroth = indices.iter().position(|&i| input[i as usize] == 0).unwrap();
120
121 [1000, 2000, 3000]
122 .iter()
123 .map(|offset| (zeroth + offset) % indices.len())
124 .map(|index| input[indices[index] as usize] * key)
125 .sum()
126}
127
128/// The compiler optimizes the position search when the size of the chunk is known.
129#[cfg(not(feature = "simd"))]
130#[inline]
131fn position(haystack: &PaddedVec, needle: u16) -> usize {
132 for (base, slice) in haystack.vec.chunks_exact(64).enumerate() {
133 if let Some(offset) = slice.iter().position(|&i| i == needle) {
134 return 64 * base + offset;
135 }
136 }
137
138 unreachable!()
139}
140
141/// Search 64 lanes simultaneously.
142#[cfg(feature = "simd")]
143#[inline]
144fn position(haystack: &PaddedVec, needle: u16) -> usize {
145 use std::simd::cmp::SimdPartialEq as _;
146 use std::simd::*;
147
148 for (base, slice) in haystack.vec.chunks_exact(64).enumerate() {
149 if let Some(offset) =
150 Simd::<u16, 64>::from_slice(slice).simd_eq(Simd::splat(needle)).first_set()
151 {
152 return 64 * base + offset;
153 }
154 }
155
156 unreachable!()
157}