#!/usr/bin/env python # coding: utf-8 # # Off to a flying start? # # - https://adventofcode.com/2023/day/1 # # Not sure if this is a good idea, but we are to help the elves launch us into the sky. Time to whip out the regexes to extract digits from text! # # To get the regex engine to do most of the work I used the following pattern: # # - `^\D*` to match any number of non-digits at the start of a line. # - `(?P\d)` to capture _one_ digit as a named group ("first"); this is the first digit on the line. # - `.*?` to match any number of intervening characters. Note the `?` in the pattern; this makes the `*` repetition _non-greedy_, which is important for the _next_ pattern. More below. # - `(?P\d)?` to capture _one_ digit as a named group ("last"); this is going to be the last digit on the line. Note that this is optional, see below! # - `\D*$` to match any number of non-digits at the end of the line. # # I also switched on multi-line mode with `re.MULTILINE`, so `^` and `$` match the start and end of each line, respectively. # # Because the pattern uses the start and end of line anchors together with the `\D` non-digit character class, the only places that the `\d` groups can match are the first and last digits on each line. # # Now all we have to do is to use `pattern.finditer()` to extract all the first and last digits and we can then turn these into 2-digit numbers. # # As is common, there is a small detail in the puzzle description that is really important: the last digit and first digit can be _one and the same_. To account for this I made the "last" group optional and if the group is empty fall back to the "first" digit. Because the group is optional, you need to make sure that the `.*` pattern between the first and last digit groups doesn't include the last digit itself; you do this by making that repetition group _non-greedy_ with the `?` modifier. That way the regex engine will use the minimum number of characters from the input text to match against, leaving the last digit for the "last" group. # # In[1]: import re import typing as t digits = re.compile(r"^\D*(?P\d).*?(?P\d)?\D*$", flags=re.MULTILINE) def calibration_values(text: str) -> t.Iterator[int]: for match in digits.finditer(text): yield int(match["first"] + (match["last"] or match["first"])) test = """\ 1abc2 pqr3stu8vwx a1b2c3d4e5f treb7uchet """ assert list(calibration_values(test)) == [12, 38, 15, 77] assert sum(calibration_values(test)) == 142 # In[2]: import aocd print("Part 1:", sum(calibration_values(aocd.get_data(day=1, year=2023)))) # # Try not to fall down flat # # Part two is specifically designed to trip up people like me that wanted to find the digits using a single regex pattern. That's because the digit _words_ can't be excluded from the patterns matching the start and end of each line. # # The simplest solution would be to just split the text into lines, then use a regex to match _just_ a digit word or digit, and take the first and last matches for each line. `re.findall()` would do the job just fine. # # But, since Python 3.11 the `re` module supports [atomic grouping](https://www.regular-expressions.info/atomic.html), and these let us steer the regex engine to not back-track when matching digits. This is the pattern to use: # # - `^.*?` to match the minimal number of characters at the start. This starts a back-tracking process until the next pattern matches. # - `(?>(?P\d|one|two|...))` (with all the digit words). Here `(?>...)` is the atomic group. As the regex engine scans the text, the first time a digit word or digit is encountered, the engine will _no longer backtrack_ and match the rest of the text using the remaining patterns. # - `(?>.*(?P\d|one|two|...))?` to match the last digit word or digit. This is an optional atomic group that starts with `.*`, a _greedy_ pattern that makes the engine use backtracking from the end of the line. If this pattern matches, it will have found the last digit in the group and thus stops backtracking at that point. # - `.*$` to match the tail of the line. # # To map from digit words and digits to integer values, I switched to using a dictionary. The keys are the digit words and single digits. The words are also helpful in automating the creation of the regex pattern. # # In[3]: words = { "one": 1, "two": 2, "three": 3, "four": 4, "five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9, } digit_pattern = "|".join([r"\d", *words]) full_digits = re.compile( rf"^.*?(?>(?P{digit_pattern}))(?>.*(?P{digit_pattern}))?.*$", flags=re.MULTILINE, ) digits = {d: int(d) for d in "123456789"} | words def full_calibration_values(text: str) -> t.Iterator[int]: for match in full_digits.finditer(text): yield 10 * digits[match["first"]] + digits[match["last"] or match["first"]] test = """\ two1nine eightwothree abcone2threexyz xtwone3four 4nineeightseven2 zoneight234 7pqrstsixteen """ assert list(full_calibration_values(test)) == [29, 83, 13, 24, 42, 14, 76] assert sum(full_calibration_values(test)) == 281 # In[4]: print("Part 2:", sum(full_calibration_values(aocd.get_data(day=1, year=2023))))