ftmemsim-valgrind/cachegrind/cg_annotate.in

#! /usr/bin/env python3
# pyright: strict

# --------------------------------------------------------------------
# --- Cachegrind's annotator.                       cg_annotate.in ---
# --------------------------------------------------------------------

# This file is part of Cachegrind, a Valgrind tool for cache
# profiling programs.
#
# Copyright (C) 2002-2023 Nicholas Nethercote
#    njn@valgrind.org
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
# The GNU General Public License is contained in the file COPYING.

"""
This script reads Cachegrind output files and produces human-readable reports.
"""

# Use `make ann` to "build" this script every time it is changed. This runs the
# formatters, type-checkers, and linters on `cg_annotate.in` and then generates
# `cg_annotate`.
#
# Python versions: Currently this script targets Python 3.9 and later versions.
# Consequences of this:
# - No use of `TypeAlias` for explicit type aliases, which requires 3.10.
#
# The following Python tools are used. All can be installed with `pip3 install
# $NAME`, except `cProfile` which is built into Python.
#
# - Formatters:
#   - `black`, for general formatting. This avoids the need for style checkers
#     like `flake8`. Note that `black` allows a max line length of 88, which is
#     a mild but common PEP-8 violation.
#   - `isort`, for import sorting.
#
# - Type-checkers:
#   - `mypy --strict`. This is the most commonly used Python type checker.
#   - `pyright`. This is another good type checker. The `pyright: strict`
#     comment above forces strict checking.
#   - Sometimes one type-checker will complain about something the other does
#     not. The goal is to keep both type checkers happy.
#
# - Linters:
#   - `ruff`. Sometimes useful, and very fast to run.
#   - `pylint`. Sometimes annoying, sometimes useful. The `pylintrc`
#     modifies/disables the more annoying lints.
#
# - Profilers:
#   - `cProfile` + `snakeviz`: Typically run with
#     `python3 -m cProfile -o cg.prof cg_annotate $INPUT && snakeviz cg.prof`.
#   - `scalene`. Typically run with `scalene ./cg_annotate $INPUT`.
#
# - Packager:
#   - `cp` is used for distribution. This is possible because this program is a
#     single file and only uses the Python Standard Library. This avoids the
#     needs for any of the million different Python package management tools.


from __future__ import annotations

import os
import re
import sys
from argparse import ArgumentParser, BooleanOptionalAction, Namespace
from collections import defaultdict
from typing import Callable, DefaultDict, NewType, NoReturn, TextIO


class Args(Namespace):
    """
    A typed wrapper for parsed args.

    None of these fields are modified after arg parsing finishes.
    """

    show: list[str]
    sort: list[str]
    threshold: float  # a percentage
    show_percs: bool
    auto: bool
    context: int
    include: list[str]
    cgout_filename: list[str]
    src_filenames: list[str]

    @staticmethod
    def parse() -> Args:
        def comma_separated_list(values: str) -> list[str]:
            return values.split(",")

        def threshold(n: str) -> float:
            f = float(n)
            if 0 <= f <= 20:
                return f
            raise ValueError

        def add_bool_argument(p: ArgumentParser, name: str, help: str) -> None:
            """
            Add a bool argument that defaults to true.

            Supports these forms: `--foo`, `--no-foo`, `--foo=yes`, `--foo=no`.
            The latter two were the forms supported by the old Perl version of
            `cg_annotate`, and are now deprecated.
            """
            flag = "--" + name
            dest = name.replace("-", "_")

            # Note: the default value is always printed with `BooleanOptionalAction`,
            # due to an argparse bug: https://github.com/python/cpython/issues/83137.
            p.add_argument(
                flag,
                default=True,
                action=BooleanOptionalAction,
                help=help,
            )
            p.add_argument(
                f"{flag}=yes",
                dest=dest,
                action="store_true",
                help=f"(deprecated) same as --{name}",
            )
            p.add_argument(
                f"{flag}=no",
                dest=dest,
                action="store_false",
                help=f"(deprecated) same as --no-{name}",
            )

        p = ArgumentParser(description="Process Cachegrind output files.")

        p.add_argument("--version", action="version", version="%(prog)s-@VERSION@")

        p.add_argument(
            "--show",
            type=comma_separated_list,
            metavar="A,B,C",
            help="only show figures for events A,B,C (default: all events)",
        )

        p.add_argument(
            "--sort",
            type=comma_separated_list,
            metavar="A,B,C",
            help="sort functions by events A,B,C (default: event column order)",
        )

        p.add_argument(
            "--threshold",
            type=threshold,
            default=0.1,
            metavar="N:[0,20]",
            help="only show functions with more than N%% of primary sort event "
            "counts (default: %(default)s)",
        )
        add_bool_argument(
            p,
            "show-percs",
            "show a percentage for each non-zero count",
        )
        add_bool_argument(
            p,
            "auto",
            "annotate all source files containing functions that reached the "
            "event count threshold",
        )
        p.add_argument(
            "--context",
            type=int,
            default=8,
            metavar="N",
            help="print N lines of context before and after annotated lines "
            "(default: %(default)s)",
        )
        p.add_argument(
            "-I",
            "--include",
            action="append",
            default=[],
            metavar="D",
            help="add D to the list of searched source file directories",
        )
        p.add_argument(
            "cgout_filename",
            nargs=1,
            metavar="cachegrind-out-file",
            help="file produced by Cachegrind",
        )
        p.add_argument(
            "src_filenames",
            nargs="*",
            metavar="source-files",
            help="source files to annotate (usually not needed due to --auto)",
        )

        return p.parse_args(namespace=Args())


# Args are stored in a global for easy access.
args = Args.parse()


# A single instance of this class is constructed, from `args` and the `events:`
# line in the cgout file.
class Events:
    # The event names.
    events: list[str]

    # The order in which we must traverse events for --show. Can be shorter
    # than `events`.
    show_events: list[str]

    # Like `show_events`, but indices into `events`, rather than names.
    show_indices: list[int]

    # The order in which we must traverse events for --sort. Can be shorter
    # than `events`.
    sort_events: list[str]

    # Like `sort_events`, but indices into `events`, rather than names.
    sort_indices: list[int]

    def __init__(self, text: str) -> None:
        self.events = text.split()
        self.num_events = len(self.events)

        # A temporary dict mapping events to indices, [0, n-1].
        event_indices = {event: n for n, event in enumerate(self.events)}

        # If --show is given, check it is valid. If --show is not given,
        # default to all events in the standard order.
        if args.show:
            for event in args.show:
                if event not in event_indices:
                    die(f"--show event `{event}` did not appear in `events:` line")
            self.show_events = args.show
        else:
            self.show_events = self.events

        self.show_indices = [event_indices[event] for event in self.show_events]

        # Likewise for --sort.
        if args.sort:
            for event in args.sort:
                if event not in event_indices:
                    die(f"--sort event `{event}` did not appear in `events:` line")
            self.sort_events = args.sort
        else:
            self.sort_events = self.events

        self.sort_indices = [event_indices[event] for event in self.sort_events]

    def mk_cc(self, text: str) -> Cc:
        """Raises a `ValueError` exception on syntax error."""
        # This is slightly faster than a list comprehension.
        counts = list(map(int, text.split()))

        if len(counts) == self.num_events:
            pass
        elif len(counts) < self.num_events:
            # Add zeroes at the end for any missing numbers.
            counts.extend([0] * (self.num_events - len(counts)))
        else:
            raise ValueError

        return Cc(counts)

    def mk_empty_cc(self) -> Cc:
        # This is much faster than a list comprehension.
        return Cc([0] * self.num_events)


class Cc:
    """
    This is a dumb container for counts.

    It doesn't know anything about events, i.e. what each count means. It can
    do basic operations like `__iadd__` and `__eq__`, and anything more must be
    done elsewhere. `Events.mk_cc` and `Events.mk_empty_cc` are used for
    construction.
    """

    # Always the same length as `Events.events`.
    counts: list[int]

    def __init__(self, counts: list[int]) -> None:
        self.counts = counts

    def __repr__(self) -> str:
        return str(self.counts)

    def __eq__(self, other: object) -> bool:
        if not isinstance(other, Cc):
            return NotImplemented
        return self.counts == other.counts

    def __iadd__(self, other: Cc) -> Cc:
        for i, other_count in enumerate(other.counts):
            self.counts[i] += other_count
        return self


# A paired filename and function name.
Flfn = NewType("Flfn", tuple[str, str])

# Per-function CCs.
# Note: not using `TypeAlias`. See "Python versions" comment above.
DictFlfnCc = DefaultDict[Flfn, Cc]

# Per-line CCs, organised by filename and line number.
# Note: not using `TypeAlias`. See "Python versions" comment above.
DictLineCc = DefaultDict[int, Cc]
DictFlDictLineCc = DefaultDict[str, DictLineCc]


def die(msg: str) -> NoReturn:
    print("cg_annotate: error:", msg, file=sys.stderr)
    sys.exit(1)


def read_cgout_file() -> tuple[str, str, Events, DictFlfnCc, DictFlDictLineCc, Cc]:
    # The file format is described in Cachegrind's manual.
    try:
        cgout_file = open(args.cgout_filename[0], "r", encoding="utf-8")
    except OSError as err:
        die(f"{err}")

    with cgout_file:
        cgout_line_num = 0

        def parse_die(msg: str) -> NoReturn:
            die(f"{cgout_file.name}:{cgout_line_num}: {msg}")

        def readline() -> str:
            nonlocal cgout_line_num
            cgout_line_num += 1
            return cgout_file.readline()

        # Read "desc:" lines.
        desc = ""
        while line := readline():
            if m := re.match(r"desc:\s+(.*)", line):
                desc += m.group(1) + "\n"
            else:
                break

        # Read "cmd:" line. (`line` is already set from the "desc:" loop.)
        if m := re.match(r"cmd:\s+(.*)", line):
            cmd = m.group(1)
        else:
            parse_die("missing a `command:` line")

        # Read "events:" line.
        line = readline()
        if m := re.match(r"events:\s+(.*)", line):
            events = Events(m.group(1))
        else:
            parse_die("missing an `events:` line")

        def mk_empty_dict_line_cc() -> DictLineCc:
            return defaultdict(events.mk_empty_cc)

        curr_fl = ""
        curr_flfn = Flfn(("", ""))

        # Three different places where we accumulate CC data.
        dict_flfn_cc: DictFlfnCc = defaultdict(events.mk_empty_cc)
        dict_fl_dict_line_cc: DictFlDictLineCc = defaultdict(mk_empty_dict_line_cc)
        summary_cc = None

        # Compile the one hot regex.
        count_pat = re.compile(r"(\d+)\s+(.*)")

        # Line matching is done in order of pattern frequency, for speed.
        while True:
            line = readline()

            if m := count_pat.match(line):
                line_num = int(m.group(1))
                try:
                    cc = events.mk_cc(m.group(2))
                except ValueError:
                    parse_die("malformed or too many event counts")

                # Record this CC at the function level.
                flfn_cc = dict_flfn_cc[curr_flfn]
                flfn_cc += cc

                # Record this CC at the file/line level.
                line_cc = dict_fl_dict_line_cc[curr_fl][line_num]
                line_cc += cc

            elif line.startswith("fn="):
                curr_flfn = Flfn((curr_fl, line[3:-1]))

            elif line.startswith("fl="):
                curr_fl = line[3:-1]
                # A `fn=` line should follow, overwriting the "???".
                curr_flfn = Flfn((curr_fl, "???"))

            elif m := re.match(r"summary:\s+(.*)", line):
                try:
                    summary_cc = events.mk_cc(m.group(1))
                except ValueError:
                    parse_die("too many event counts")

            elif line == "":
                break  # EOF

            elif line == "\n" or line.startswith("#"):
                # Skip empty lines and comment lines.
                pass

            else:
                parse_die(f"malformed line: {line[:-1]}")

    # Check if summary line was present.
    if not summary_cc:
        parse_die("missing `summary:` line, aborting")

    # Check summary is correct.
    total_cc = events.mk_empty_cc()
    for flfn_cc in dict_flfn_cc.values():
        total_cc += flfn_cc
    if summary_cc != total_cc:
        msg = (
            "`summary:` line doesn't match computed total\n"
            f"- summary: {summary_cc}\n"
            f"- total:   {total_cc}"
        )
        parse_die(msg)

    return (desc, cmd, events, dict_flfn_cc, dict_fl_dict_line_cc, summary_cc)


class CcPrinter:
    # Note: every `CcPrinter` gets the same `Events` object.
    events: Events

    # Note: every `CcPrinter` gets the same summary CC.
    summary_cc: Cc

    # The width of each event count column. (This column is also used for event
    # names.) For simplicity, its length matches `events.events`, even though
    # not all events are necessarily shown.
    count_widths: list[int]

    # The width of each percentage column. Zero if --show-percs is disabled.
    # Its length matches `count_widths`.
    perc_widths: list[int]

    def __init__(self, events: Events, ccs: list[Cc], summary_cc: Cc) -> None:
        self.events = events
        self.summary_cc = summary_cc

        # Find min and max value for each event. One of them will be the
        # widest value.
        min_cc = events.mk_empty_cc()
        max_cc = events.mk_empty_cc()
        for cc in ccs:
            for i, _ in enumerate(events.events):
                count = cc.counts[i]
                if count > max_cc.counts[i]:
                    max_cc.counts[i] = count
                elif count < min_cc.counts[i]:
                    min_cc.counts[i] = count

        # Find maximum width for each column.
        self.count_widths = [0] * len(events.events)
        self.perc_widths = [0] * len(events.events)
        for i, event in enumerate(events.events):
            # Get count and perc widths of the min and max CCs.
            (min_count, min_perc) = self.count_and_perc(min_cc, i)
            (max_count, max_perc) = self.count_and_perc(max_cc, i)

            # The event name goes in the count column.
            self.count_widths[i] = max(len(min_count), len(max_count), len(event))
            self.perc_widths[i] = max(len(min_perc), len(max_perc))

    def print_events(self, suffix: str) -> None:
        for i in self.events.show_indices:
            # The event name goes in the count column.
            event = self.events.events[i]
            nwidth = self.count_widths[i]
            pwidth = self.perc_widths[i]
            empty_perc = ""
            print(f"{event:<{nwidth}}{empty_perc:>{pwidth}} ", end="")

        print(suffix)

    def print_count_and_perc(self, i: int, count: str, perc: str) -> None:
        nwidth = self.count_widths[i]
        pwidth = self.perc_widths[i]
        print(f"{count:>{nwidth}}{perc:>{pwidth}} ", end="")

    def count_and_perc(self, cc: Cc, i: int) -> tuple[str, str]:
        count = f"{cc.counts[i]:,d}"  # commify
        if args.show_percs:
            if cc.counts[i] == 0:
                # Don't show percentages for "0" entries, it's just clutter.
                perc = ""
            else:
                summary_count = self.summary_cc.counts[i]
                if summary_count == 0:
                    perc = " (n/a)"
                else:
                    p = cc.counts[i] * 100 / summary_count
                    perc = f" ({p:.1f}%)"
        else:
            perc = ""

        return (count, perc)

    def print_cc(self, cc: Cc, suffix: str) -> None:
        for i in self.events.show_indices:
            (count, perc) = self.count_and_perc(cc, i)
            self.print_count_and_perc(i, count, perc)

        print("", suffix)

    def print_missing_cc(self, suffix: str) -> None:
        # Don't show percentages for "." entries, it's just clutter.
        for i in self.events.show_indices:
            self.print_count_and_perc(i, ".", "")

        print("", suffix)


# Used in various places in the output.
def print_fancy(text: str) -> None:
    fancy = "-" * 80
    print(fancy)
    print("--", text)
    print(fancy)


def print_cachegrind_profile(desc: str, cmd: str, events: Events) -> None:
    print_fancy("Cachegrind profile")
    print(desc, end="")
    print("Command:         ", cmd)
    print("Data file:       ", args.cgout_filename[0])
    print("Events recorded: ", *events.events)
    print("Events shown:    ", *events.show_events)
    print("Event sort order:", *events.sort_events)
    print("Threshold:       ", args.threshold)

    if len(args.include) == 0:
        print("Include dirs:     ")
    else:
        print(f"Include dirs:     {args.include[0]}")
        for include_dirname in args.include[1:]:
            print(f"                  {include_dirname}")

    if len(args.src_filenames) == 0:
        print("User annotated:   ")
    else:
        print(f"User annotated:   {args.src_filenames[0]}")
        for src_filename in args.src_filenames[1:]:
            print(f"                  {src_filename}")

    print("Auto-annotation: ", "on" if args.auto else "off")
    print()


def print_summary(events: Events, summary_cc: Cc) -> None:
    printer = CcPrinter(events, [summary_cc], summary_cc)
    print_fancy("Summary")
    printer.print_events("")
    print()
    printer.print_cc(summary_cc, "PROGRAM TOTALS")
    print()


def print_function_summary(
    events: Events, dict_flfn_cc: DictFlfnCc, summary_cc: Cc
) -> set[str]:
    # Only the first threshold percentage is actually used.
    threshold_index = events.sort_indices[0]

    # Convert the threshold from a percentage to an event count.
    threshold = args.threshold * abs(summary_cc.counts[threshold_index]) / 100

    def meets_threshold(flfn_and_cc: tuple[Flfn, Cc]) -> bool:
        cc = flfn_and_cc[1]
        return abs(cc.counts[threshold_index]) >= threshold

    # Create a list with the counts in sort order, so that left-to-right list
    # comparison does the right thing. Plus the `Flfn` at the end for
    # deterministic output when all the event counts are identical in two CCs.
    def key(flfn_and_cc: tuple[Flfn, Cc]) -> tuple[list[int], Flfn]:
        cc = flfn_and_cc[1]
        return ([abs(cc.counts[i]) for i in events.sort_indices], flfn_and_cc[0])

    # Filter out functions for which the primary sort event count is below the
    # threshold, and sort the remainder.
    filtered_flfns_and_ccs = filter(meets_threshold, dict_flfn_cc.items())
    sorted_flfns_and_ccs = sorted(filtered_flfns_and_ccs, key=key, reverse=True)
    sorted_ccs = list(map(lambda flfn_and_cc: flfn_and_cc[1], sorted_flfns_and_ccs))

    printer = CcPrinter(events, sorted_ccs, summary_cc)
    print_fancy("Function summary")
    printer.print_events(" file:function")
    print()

    # Print per-function counts.
    for flfn, flfn_cc in sorted_flfns_and_ccs:
        printer.print_cc(flfn_cc, f"{flfn[0]}:{flfn[1]}")

    print()

    # Files containing a function that met the threshold.
    return set(flfn_and_cc[0][0] for flfn_and_cc in sorted_flfns_and_ccs)


class AnnotatedCcs:
    line_nums_known_cc: Cc
    line_nums_unknown_cc: Cc
    unreadable_cc: Cc
    below_threshold_cc: Cc
    files_unknown_cc: Cc

    labels = [
        "  annotated: files known & above threshold & readable, line numbers known",
        "  annotated: files known & above threshold & readable, line numbers unknown",
        "unannotated: files known & above threshold & unreadable ",
        "unannotated: files known & below threshold",
        "unannotated: files unknown",
    ]

    def __init__(self, events: Events) -> None:
        self.line_nums_known_cc = events.mk_empty_cc()
        self.line_nums_unknown_cc = events.mk_empty_cc()
        self.unreadable_cc = events.mk_empty_cc()
        self.below_threshold_cc = events.mk_empty_cc()
        self.files_unknown_cc = events.mk_empty_cc()

    def ccs(self) -> list[Cc]:
        return [
            self.line_nums_known_cc,
            self.line_nums_unknown_cc,
            self.unreadable_cc,
            self.below_threshold_cc,
            self.files_unknown_cc,
        ]


def mk_warning(msg: str) -> str:
    return f"""\
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
{msg}\
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
"""


def warn_src_file_is_newer(src_filename: str, cgout_filename: str) -> None:
    msg = f"""\
@ Source file '{src_filename}' is newer than data file '{cgout_filename}'.
@ Annotations may not be correct.
"""
    print(mk_warning(msg))


def warn_bogus_lines(src_filename: str) -> None:
    msg = f"""\
@@ Information recorded about lines past the end of '{src_filename}'.
"""
    print(mk_warning(msg), end="")


def print_annotated_src_file(
    events: Events,
    dict_line_cc: DictLineCc,
    src_file: TextIO,
    annotated_ccs: AnnotatedCcs,
    summary_cc: Cc,
) -> None:
    # If the source file is more recent than the cgout file, issue warning.
    if os.stat(src_file.name).st_mtime_ns > os.stat(args.cgout_filename[0]).st_mtime_ns:
        warn_src_file_is_newer(src_file.name, args.cgout_filename[0])

    printer = CcPrinter(events, list(dict_line_cc.values()), summary_cc)
    # The starting fancy has already been printed by the caller.
    printer.print_events("")
    print()

    # The CC for line 0 is special, holding counts attributed to the source
    # file but not to any particular line (due to incomplete debug info).
    # Annotate the start of the file with this info, if present.
    line0_cc = dict_line_cc.pop(0, None)
    if line0_cc:
        suffix = "<unknown (line 0)>"
        printer.print_cc(line0_cc, suffix)
        annotated_ccs.line_nums_unknown_cc += line0_cc
        print()

    # Find interesting line ranges: all lines with a CC, and all lines within
    # `args.context` lines of a line with a CC.
    line_nums = list(sorted(dict_line_cc.keys()))
    pairs: list[tuple[int, int]] = []
    n = len(line_nums)
    i = 0
    context = args.context
    while i < n:
        lo = max(line_nums[i] - context, 1)  # `max` to prevent negatives
        while i < n - 1 and line_nums[i] + 2 * context >= line_nums[i + 1]:
            i += 1
        hi = line_nums[i] + context
        pairs.append((lo, hi))
        i += 1

    # Annotate chosen lines, tracking total annotated counts.
    line_num = 0
    if pairs:
        while pairs:
            (lo, hi) = pairs.pop(0)
            while line_num < lo - 1:
                tmp = src_file.readline()
                line_num += 1
                if not tmp:
                    break  # EOF

            src_line = ""
            # Print line number, unless start of file.
            if lo != 1:
                print("-- line", lo, "-" * 40)

            while line_num < hi:
                src_line = src_file.readline()
                line_num += 1
                if not src_line:
                    break
                if line_nums and line_num == line_nums[0]:
                    printer.print_cc(dict_line_cc[line_num], src_line[:-1])
                    annotated_ccs.line_nums_known_cc += dict_line_cc[line_num]
                    del line_nums[0]
                else:
                    printer.print_missing_cc(src_line[:-1])

            # Print line number, unless EOF.
            if src_line:
                print("-- line", hi, "-" * 40)
            else:
                break

    # If there was info on lines past the end of the file, warn.
    if line_nums:
        for line_num in line_nums:
            printer.print_cc(dict_line_cc[line_num], f"<bogus line {line_num}>")
            annotated_ccs.line_nums_known_cc += dict_line_cc[line_num]

        print()
        warn_bogus_lines(src_file.name)

    print()


# This (partially) consumes `dict_fl_dict_line_cc`.
def print_annotated_src_files(
    events: Events,
    threshold_src_filenames: set[str],
    dict_fl_dict_line_cc: DictFlDictLineCc,
    summary_cc: Cc,
) -> AnnotatedCcs:
    annotated_ccs = AnnotatedCcs(events)

    def pair_with(label: str) -> Callable[[str], tuple[str, str]]:
        return lambda s: (s, label)

    def add_dict_line_cc_to_cc(dict_line_cc: DictLineCc | None, accum_cc: Cc) -> None:
        if dict_line_cc:
            for line_cc in dict_line_cc.values():
                accum_cc += line_cc

    # If auto-annotating, add interesting files (excluding "???").
    all_src_filenames = set(map(pair_with("User"), args.src_filenames))
    if args.auto:
        threshold_src_filenames.discard("???")

        dict_line_cc = dict_fl_dict_line_cc.pop("???", None)
        add_dict_line_cc_to_cc(dict_line_cc, annotated_ccs.files_unknown_cc)

        all_src_filenames.update(map(pair_with("Auto"), threshold_src_filenames))

    # Prepend "" to the include dirnames so things work in the case where the
    # filename has the full path.
    include_dirnames = args.include.copy()
    include_dirnames.insert(0, "")

    def print_ann_fancy(ann_type: str, src_filename: str) -> None:
        print_fancy(f"{ann_type}-annotated source file: {src_filename}")

    for src_filename, ann_type in sorted(all_src_filenames):
        readable = False
        for include_dirname in include_dirnames:
            if include_dirname == "":
                full_src_filename = src_filename
            else:
                full_src_filename = os.path.join(include_dirname, src_filename)

            try:
                with open(full_src_filename, "r", encoding="utf-8") as src_file:
                    dict_line_cc = dict_fl_dict_line_cc.pop(src_filename, None)
                    if dict_line_cc is not None:
                        print_ann_fancy(ann_type, src_file.name)  # includes full path
                        print_annotated_src_file(
                            events,
                            dict_line_cc,
                            src_file,
                            annotated_ccs,
                            summary_cc,
                        )
                    else:
                        # This only happens for user-specified files that are
                        # readable but not mentioned in the cgout file.
                        print_ann_fancy(ann_type, src_filename)
                        print("This file was not mentioned by the data file")
                        print()

                readable = True
                break
            except OSError:
                pass

        if not readable:
            dict_line_cc = dict_fl_dict_line_cc.pop(src_filename, None)
            add_dict_line_cc_to_cc(dict_line_cc, annotated_ccs.unreadable_cc)

            print_ann_fancy(ann_type, src_filename)
            print("This file was unreadable")
            print()

    # Sum the CCs remaining in `dict_fl_dict_line_cc`, which are all in files
    # below the threshold.
    for dict_line_cc in dict_fl_dict_line_cc.values():
        add_dict_line_cc_to_cc(dict_line_cc, annotated_ccs.below_threshold_cc)

    return annotated_ccs


def print_annotation_summary(
    events: Events,
    annotated_ccs: AnnotatedCcs,
    summary_cc: Cc,
) -> None:
    # If we did any annotating, show how many events were covered by annotated
    # lines above.
    if args.auto or args.src_filenames:
        printer = CcPrinter(events, annotated_ccs.ccs(), summary_cc)
        print_fancy("Annotation summary")
        printer.print_events("")
        print()

        total_cc = events.mk_empty_cc()
        for (cc, label) in zip(annotated_ccs.ccs(), AnnotatedCcs.labels):
            printer.print_cc(cc, label)
            total_cc += cc

        print()

        # Internal sanity check.
        if summary_cc != total_cc:
            msg = (
                "`summary:` line doesn't match computed annotated counts\n"
                f"- summary:   {summary_cc}\n"
                f"- annotated: {total_cc}"
            )
            die(msg)


def main() -> None:
    (
        desc,
        cmd,
        events,
        dict_flfn_cc,
        dict_fl_dict_line_cc,
        summary_cc,
    ) = read_cgout_file()

    # Each of the following calls prints a section of the output.

    print_cachegrind_profile(desc, cmd, events)

    print_summary(events, summary_cc)

    threshold_src_filenames = print_function_summary(events, dict_flfn_cc, summary_cc)

    annotated_ccs = print_annotated_src_files(
        events, threshold_src_filenames, dict_fl_dict_line_cc, summary_cc
    )

    print_annotation_summary(events, annotated_ccs, summary_cc)


if __name__ == "__main__":
    main()