aoc/year2018/
day11.rs

1//! # Chronal Charge
2//!
3//! Building a [summed-area table](https://en.wikipedia.org/wiki/Summed-area_table) allows us
4//! to compute the power of any rectangle with only 4 array lookups.
5//!
6//! This makes the total complexity `O(n³)`, however the calculation for each size is independent
7//! so we can parallelize over multiple threads.
8use crate::util::parse::*;
9use crate::util::thread::*;
10
11pub struct Result {
12    size: usize,
13    x: usize,
14    y: usize,
15    power: i32,
16}
17
18pub fn parse(input: &str) -> Vec<Result> {
19    let grid_serial_number: i32 = input.signed();
20
21    // Build Summed-area table. Add a little extra buffer to the end for the SIMD variant.
22    let mut sat = vec![0; 301 * 301 + 32];
23
24    for y in 1..301 {
25        for x in 1..301 {
26            let rack_id = x + 10;
27
28            let mut power_level = rack_id * y;
29            power_level += grid_serial_number;
30            power_level *= rack_id;
31            power_level = (power_level / 100) % 10;
32            power_level -= 5;
33
34            let index = (301 * y + x) as usize;
35            sat[index] = power_level + sat[index - 1] + sat[index - 301] - sat[index - 302];
36        }
37    }
38
39    // Use as many cores as possible to parallelize the search.
40    // Smaller sizes take more time so use work stealing to keep all cores busy.
41    let items: Vec<_> = (1..301).collect();
42    let result = spawn_parallel_iterator(&items, |iter| {
43        iter.map(|&size| square(&sat, size)).collect::<Vec<_>>()
44    });
45    result.into_iter().flatten().collect()
46}
47
48pub fn part1(input: &[Result]) -> String {
49    let Result { x, y, .. } = input.iter().find(|r| r.size == 3).unwrap();
50    format!("{x},{y}")
51}
52
53pub fn part2(input: &[Result]) -> String {
54    let Result { size, x, y, .. } = input.iter().max_by_key(|r| r.power).unwrap();
55    format!("{x},{y},{size}")
56}
57
58/// Find the (x,y) coordinates and max power for a square of the specified size.
59#[cfg(not(feature = "simd"))]
60fn square(sat: &[i32], size: usize) -> Result {
61    let mut max_power = i32::MIN;
62    let mut max_x = 0;
63    let mut max_y = 0;
64
65    for y in size..301 {
66        for x in size..301 {
67            let index = 301 * y + x;
68
69            let power =
70                sat[index] - sat[index - size] - sat[index - 301 * size] + sat[index - 302 * size];
71
72            if power > max_power {
73                max_power = power;
74                max_x = x - size + 1;
75                max_y = y - size + 1;
76            }
77        }
78    }
79
80    Result { size, x: max_x, y: max_y, power: max_power }
81}
82
83/// Same as the scalar version but processing 16 lanes simultaneously.
84#[cfg(feature = "simd")]
85fn square(sat: &[i32], size: usize) -> Result {
86    use std::simd::cmp::SimdPartialOrd as _;
87    use std::simd::*;
88
89    const LANE_WIDTH: usize = 16;
90    type Vector = Simd<i32, LANE_WIDTH>;
91
92    let mut max_power = i32::MIN;
93    let mut max_x = 0;
94    let mut max_y = 0;
95
96    for y in size..301 {
97        for x in (size..301).step_by(LANE_WIDTH) {
98            let index = 301 * y + x;
99
100            let power: Vector = Simd::from_slice(&sat[index..])
101                - Simd::from_slice(&sat[index - size..])
102                - Simd::from_slice(&sat[index - 301 * size..])
103                + Simd::from_slice(&sat[index - 302 * size..]);
104
105            if power.simd_gt(Simd::splat(max_power)).any() {
106                let limit = 301 - x;
107                for (offset, power) in power.to_array().into_iter().enumerate().take(limit) {
108                    if power > max_power {
109                        max_power = power;
110                        max_x = x - size + 1 + offset;
111                        max_y = y - size + 1;
112                    }
113                }
114            }
115        }
116    }
117
118    Result { size, x: max_x, y: max_y, power: max_power }
119}