mirror of
https://github.com/Zenithsiz/ftmemsim-valgrind.git
synced 2026-02-04 02:18:37 +00:00
- Every section now has a heading with the long `----` lines above and below. - Event names are always shown below that heading, rather than within it. - Each Unreadable file now gets its own section, much like files that lack any data.
912 lines
30 KiB
Python
Executable File
912 lines
30 KiB
Python
Executable File
#! /usr/bin/env python3
|
|
# pyright: strict
|
|
|
|
# --------------------------------------------------------------------
|
|
# --- Cachegrind's annotator. cg_annotate.in ---
|
|
# --------------------------------------------------------------------
|
|
|
|
# This file is part of Cachegrind, a Valgrind tool for cache
|
|
# profiling programs.
|
|
#
|
|
# Copyright (C) 2002-2023 Nicholas Nethercote
|
|
# njn@valgrind.org
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License as
|
|
# published by the Free Software Foundation; either version 2 of the
|
|
# License, or (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful, but
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
# General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
#
|
|
# The GNU General Public License is contained in the file COPYING.
|
|
|
|
"""
|
|
This script reads Cachegrind output files and produces human-readable reports.
|
|
"""
|
|
|
|
# Use `make ann` to "build" this script every time it is changed. This runs the
|
|
# formatters, type-checkers, and linters on `cg_annotate.in` and then generates
|
|
# `cg_annotate`.
|
|
#
|
|
# Python versions: Currently this script targets Python 3.9 and later versions.
|
|
# Consequences of this:
|
|
# - No use of `TypeAlias` for explicit type aliases, which requires 3.10.
|
|
#
|
|
# The following Python tools are used. All can be installed with `pip3 install
|
|
# $NAME`, except `cProfile` which is built into Python.
|
|
#
|
|
# - Formatters:
|
|
# - `black`, for general formatting. This avoids the need for style checkers
|
|
# like `flake8`. Note that `black` allows a max line length of 88, which is
|
|
# a mild but common PEP-8 violation.
|
|
# - `isort`, for import sorting.
|
|
#
|
|
# - Type-checkers:
|
|
# - `mypy --strict`. This is the most commonly used Python type checker.
|
|
# - `pyright`. This is another good type checker. The `pyright: strict`
|
|
# comment above forces strict checking.
|
|
# - Sometimes one type-checker will complain about something the other does
|
|
# not. The goal is to keep both type checkers happy.
|
|
#
|
|
# - Linters:
|
|
# - `ruff`. Sometimes useful, and very fast to run.
|
|
# - `pylint`. Sometimes annoying, sometimes useful. The `pylintrc`
|
|
# modifies/disables the more annoying lints.
|
|
#
|
|
# - Profilers:
|
|
# - `cProfile` + `snakeviz`: Typically run with
|
|
# `python3 -m cProfile -o cg.prof cg_annotate $INPUT && snakeviz cg.prof`.
|
|
# - `scalene`. Typically run with `scalene ./cg_annotate $INPUT`.
|
|
#
|
|
# - Packager:
|
|
# - `cp` is used for distribution. This is possible because this program is a
|
|
# single file and only uses the Python Standard Library. This avoids the
|
|
# needs for any of the million different Python package management tools.
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
from argparse import ArgumentParser, BooleanOptionalAction, Namespace
|
|
from collections import defaultdict
|
|
from typing import Callable, DefaultDict, NewType, NoReturn, TextIO
|
|
|
|
|
|
class Args(Namespace):
|
|
"""
|
|
A typed wrapper for parsed args.
|
|
|
|
None of these fields are modified after arg parsing finishes.
|
|
"""
|
|
|
|
show: list[str]
|
|
sort: list[str]
|
|
threshold: float # a percentage
|
|
show_percs: bool
|
|
auto: bool
|
|
context: int
|
|
include: list[str]
|
|
cgout_filename: list[str]
|
|
src_filenames: list[str]
|
|
|
|
@staticmethod
|
|
def parse() -> Args:
|
|
def comma_separated_list(values: str) -> list[str]:
|
|
return values.split(",")
|
|
|
|
def threshold(n: str) -> float:
|
|
f = float(n)
|
|
if 0 <= f <= 20:
|
|
return f
|
|
raise ValueError
|
|
|
|
def add_bool_argument(p: ArgumentParser, name: str, help: str) -> None:
|
|
"""
|
|
Add a bool argument that defaults to true.
|
|
|
|
Supports these forms: `--foo`, `--no-foo`, `--foo=yes`, `--foo=no`.
|
|
The latter two were the forms supported by the old Perl version of
|
|
`cg_annotate`, and are now deprecated.
|
|
"""
|
|
flag = "--" + name
|
|
dest = name.replace("-", "_")
|
|
|
|
# Note: the default value is always printed with `BooleanOptionalAction`,
|
|
# due to an argparse bug: https://github.com/python/cpython/issues/83137.
|
|
p.add_argument(
|
|
flag,
|
|
default=True,
|
|
action=BooleanOptionalAction,
|
|
help=help,
|
|
)
|
|
p.add_argument(
|
|
f"{flag}=yes",
|
|
dest=dest,
|
|
action="store_true",
|
|
help=f"(deprecated) same as --{name}",
|
|
)
|
|
p.add_argument(
|
|
f"{flag}=no",
|
|
dest=dest,
|
|
action="store_false",
|
|
help=f"(deprecated) same as --no-{name}",
|
|
)
|
|
|
|
p = ArgumentParser(description="Process Cachegrind output files.")
|
|
|
|
p.add_argument("--version", action="version", version="%(prog)s-@VERSION@")
|
|
|
|
p.add_argument(
|
|
"--show",
|
|
type=comma_separated_list,
|
|
metavar="A,B,C",
|
|
help="only show figures for events A,B,C (default: all events)",
|
|
)
|
|
|
|
p.add_argument(
|
|
"--sort",
|
|
type=comma_separated_list,
|
|
metavar="A,B,C",
|
|
help="sort functions by events A,B,C (default: event column order)",
|
|
)
|
|
|
|
p.add_argument(
|
|
"--threshold",
|
|
type=threshold,
|
|
default=0.1,
|
|
metavar="N:[0,20]",
|
|
help="only show functions with more than N%% of primary sort event "
|
|
"counts (default: %(default)s)",
|
|
)
|
|
add_bool_argument(
|
|
p,
|
|
"show-percs",
|
|
"show a percentage for each non-zero count",
|
|
)
|
|
add_bool_argument(
|
|
p,
|
|
"auto",
|
|
"annotate all source files containing functions that reached the "
|
|
"event count threshold",
|
|
)
|
|
p.add_argument(
|
|
"--context",
|
|
type=int,
|
|
default=8,
|
|
metavar="N",
|
|
help="print N lines of context before and after annotated lines "
|
|
"(default: %(default)s)",
|
|
)
|
|
p.add_argument(
|
|
"-I",
|
|
"--include",
|
|
action="append",
|
|
default=[],
|
|
metavar="D",
|
|
help="add D to the list of searched source file directories",
|
|
)
|
|
p.add_argument(
|
|
"cgout_filename",
|
|
nargs=1,
|
|
metavar="cachegrind-out-file",
|
|
help="file produced by Cachegrind",
|
|
)
|
|
p.add_argument(
|
|
"src_filenames",
|
|
nargs="*",
|
|
metavar="source-files",
|
|
help="source files to annotate (usually not needed due to --auto)",
|
|
)
|
|
|
|
return p.parse_args(namespace=Args())
|
|
|
|
|
|
# Args are stored in a global for easy access.
|
|
args = Args.parse()
|
|
|
|
|
|
# A single instance of this class is constructed, from `args` and the `events:`
|
|
# line in the cgout file.
|
|
class Events:
|
|
# The event names.
|
|
events: list[str]
|
|
|
|
# The order in which we must traverse events for --show. Can be shorter
|
|
# than `events`.
|
|
show_events: list[str]
|
|
|
|
# Like `show_events`, but indices into `events`, rather than names.
|
|
show_indices: list[int]
|
|
|
|
# The order in which we must traverse events for --sort. Can be shorter
|
|
# than `events`.
|
|
sort_events: list[str]
|
|
|
|
# Like `sort_events`, but indices into `events`, rather than names.
|
|
sort_indices: list[int]
|
|
|
|
def __init__(self, text: str) -> None:
|
|
self.events = text.split()
|
|
self.num_events = len(self.events)
|
|
|
|
# A temporary dict mapping events to indices, [0, n-1].
|
|
event_indices = {event: n for n, event in enumerate(self.events)}
|
|
|
|
# If --show is given, check it is valid. If --show is not given,
|
|
# default to all events in the standard order.
|
|
if args.show:
|
|
for event in args.show:
|
|
if event not in event_indices:
|
|
die(f"--show event `{event}` did not appear in `events:` line")
|
|
self.show_events = args.show
|
|
else:
|
|
self.show_events = self.events
|
|
|
|
self.show_indices = [event_indices[event] for event in self.show_events]
|
|
|
|
# Likewise for --sort.
|
|
if args.sort:
|
|
for event in args.sort:
|
|
if event not in event_indices:
|
|
die(f"--sort event `{event}` did not appear in `events:` line")
|
|
self.sort_events = args.sort
|
|
else:
|
|
self.sort_events = self.events
|
|
|
|
self.sort_indices = [event_indices[event] for event in self.sort_events]
|
|
|
|
def mk_cc(self, text: str) -> Cc:
|
|
"""Raises a `ValueError` exception on syntax error."""
|
|
# This is slightly faster than a list comprehension.
|
|
counts = list(map(int, text.split()))
|
|
|
|
if len(counts) == self.num_events:
|
|
pass
|
|
elif len(counts) < self.num_events:
|
|
# Add zeroes at the end for any missing numbers.
|
|
counts.extend([0] * (self.num_events - len(counts)))
|
|
else:
|
|
raise ValueError
|
|
|
|
return Cc(counts)
|
|
|
|
def mk_empty_cc(self) -> Cc:
|
|
# This is much faster than a list comprehension.
|
|
return Cc([0] * self.num_events)
|
|
|
|
|
|
class Cc:
|
|
"""
|
|
This is a dumb container for counts.
|
|
|
|
It doesn't know anything about events, i.e. what each count means. It can
|
|
do basic operations like `__iadd__` and `__eq__`, and anything more must be
|
|
done elsewhere. `Events.mk_cc` and `Events.mk_empty_cc` are used for
|
|
construction.
|
|
"""
|
|
|
|
# Always the same length as `Events.events`.
|
|
counts: list[int]
|
|
|
|
def __init__(self, counts: list[int]) -> None:
|
|
self.counts = counts
|
|
|
|
def __repr__(self) -> str:
|
|
return str(self.counts)
|
|
|
|
def __eq__(self, other: object) -> bool:
|
|
if not isinstance(other, Cc):
|
|
return NotImplemented
|
|
return self.counts == other.counts
|
|
|
|
def __iadd__(self, other: Cc) -> Cc:
|
|
for i, other_count in enumerate(other.counts):
|
|
self.counts[i] += other_count
|
|
return self
|
|
|
|
|
|
# A paired filename and function name.
|
|
Flfn = NewType("Flfn", tuple[str, str])
|
|
|
|
# Per-function CCs.
|
|
# Note: not using `TypeAlias`. See "Python versions" comment above.
|
|
DictFlfnCc = DefaultDict[Flfn, Cc]
|
|
|
|
# Per-line CCs, organised by filename and line number.
|
|
# Note: not using `TypeAlias`. See "Python versions" comment above.
|
|
DictLineCc = DefaultDict[int, Cc]
|
|
DictFlDictLineCc = DefaultDict[str, DictLineCc]
|
|
|
|
|
|
def die(msg: str) -> NoReturn:
|
|
print("cg_annotate: error:", msg, file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
|
|
def read_cgout_file() -> tuple[str, str, Events, DictFlfnCc, DictFlDictLineCc, Cc]:
|
|
# The file format is described in Cachegrind's manual.
|
|
try:
|
|
cgout_file = open(args.cgout_filename[0], "r", encoding="utf-8")
|
|
except OSError as err:
|
|
die(f"{err}")
|
|
|
|
with cgout_file:
|
|
cgout_line_num = 0
|
|
|
|
def parse_die(msg: str) -> NoReturn:
|
|
die(f"{cgout_file.name}:{cgout_line_num}: {msg}")
|
|
|
|
def readline() -> str:
|
|
nonlocal cgout_line_num
|
|
cgout_line_num += 1
|
|
return cgout_file.readline()
|
|
|
|
# Read "desc:" lines.
|
|
desc = ""
|
|
while line := readline():
|
|
if m := re.match(r"desc:\s+(.*)", line):
|
|
desc += m.group(1) + "\n"
|
|
else:
|
|
break
|
|
|
|
# Read "cmd:" line. (`line` is already set from the "desc:" loop.)
|
|
if m := re.match(r"cmd:\s+(.*)", line):
|
|
cmd = m.group(1)
|
|
else:
|
|
parse_die("missing a `command:` line")
|
|
|
|
# Read "events:" line.
|
|
line = readline()
|
|
if m := re.match(r"events:\s+(.*)", line):
|
|
events = Events(m.group(1))
|
|
else:
|
|
parse_die("missing an `events:` line")
|
|
|
|
def mk_empty_dict_line_cc() -> DictLineCc:
|
|
return defaultdict(events.mk_empty_cc)
|
|
|
|
curr_fl = ""
|
|
curr_flfn = Flfn(("", ""))
|
|
|
|
# Three different places where we accumulate CC data.
|
|
dict_flfn_cc: DictFlfnCc = defaultdict(events.mk_empty_cc)
|
|
dict_fl_dict_line_cc: DictFlDictLineCc = defaultdict(mk_empty_dict_line_cc)
|
|
summary_cc = None
|
|
|
|
# Compile the one hot regex.
|
|
count_pat = re.compile(r"(\d+)\s+(.*)")
|
|
|
|
# Line matching is done in order of pattern frequency, for speed.
|
|
while True:
|
|
line = readline()
|
|
|
|
if m := count_pat.match(line):
|
|
line_num = int(m.group(1))
|
|
try:
|
|
cc = events.mk_cc(m.group(2))
|
|
except ValueError:
|
|
parse_die("malformed or too many event counts")
|
|
|
|
# Record this CC at the function level.
|
|
flfn_cc = dict_flfn_cc[curr_flfn]
|
|
flfn_cc += cc
|
|
|
|
# Record this CC at the file/line level.
|
|
line_cc = dict_fl_dict_line_cc[curr_fl][line_num]
|
|
line_cc += cc
|
|
|
|
elif line.startswith("fn="):
|
|
curr_flfn = Flfn((curr_fl, line[3:-1]))
|
|
|
|
elif line.startswith("fl="):
|
|
curr_fl = line[3:-1]
|
|
# A `fn=` line should follow, overwriting the "???".
|
|
curr_flfn = Flfn((curr_fl, "???"))
|
|
|
|
elif m := re.match(r"summary:\s+(.*)", line):
|
|
try:
|
|
summary_cc = events.mk_cc(m.group(1))
|
|
except ValueError:
|
|
parse_die("too many event counts")
|
|
|
|
elif line == "":
|
|
break # EOF
|
|
|
|
elif line == "\n" or line.startswith("#"):
|
|
# Skip empty lines and comment lines.
|
|
pass
|
|
|
|
else:
|
|
parse_die(f"malformed line: {line[:-1]}")
|
|
|
|
# Check if summary line was present.
|
|
if not summary_cc:
|
|
parse_die("missing `summary:` line, aborting")
|
|
|
|
# Check summary is correct.
|
|
total_cc = events.mk_empty_cc()
|
|
for flfn_cc in dict_flfn_cc.values():
|
|
total_cc += flfn_cc
|
|
if summary_cc != total_cc:
|
|
msg = (
|
|
"`summary:` line doesn't match computed total\n"
|
|
f"- summary: {summary_cc}\n"
|
|
f"- total: {total_cc}"
|
|
)
|
|
parse_die(msg)
|
|
|
|
return (desc, cmd, events, dict_flfn_cc, dict_fl_dict_line_cc, summary_cc)
|
|
|
|
|
|
class CcPrinter:
|
|
# Note: every `CcPrinter` gets the same `Events` object.
|
|
events: Events
|
|
|
|
# Note: every `CcPrinter` gets the same summary CC.
|
|
summary_cc: Cc
|
|
|
|
# The width of each event count column. (This column is also used for event
|
|
# names.) For simplicity, its length matches `events.events`, even though
|
|
# not all events are necessarily shown.
|
|
count_widths: list[int]
|
|
|
|
# The width of each percentage column. Zero if --show-percs is disabled.
|
|
# Its length matches `count_widths`.
|
|
perc_widths: list[int]
|
|
|
|
def __init__(self, events: Events, ccs: list[Cc], summary_cc: Cc) -> None:
|
|
self.events = events
|
|
self.summary_cc = summary_cc
|
|
|
|
# Find min and max value for each event. One of them will be the
|
|
# widest value.
|
|
min_cc = events.mk_empty_cc()
|
|
max_cc = events.mk_empty_cc()
|
|
for cc in ccs:
|
|
for i, _ in enumerate(events.events):
|
|
count = cc.counts[i]
|
|
if count > max_cc.counts[i]:
|
|
max_cc.counts[i] = count
|
|
elif count < min_cc.counts[i]:
|
|
min_cc.counts[i] = count
|
|
|
|
# Find maximum width for each column.
|
|
self.count_widths = [0] * len(events.events)
|
|
self.perc_widths = [0] * len(events.events)
|
|
for i, event in enumerate(events.events):
|
|
# Get count and perc widths of the min and max CCs.
|
|
(min_count, min_perc) = self.count_and_perc(min_cc, i)
|
|
(max_count, max_perc) = self.count_and_perc(max_cc, i)
|
|
|
|
# The event name goes in the count column.
|
|
self.count_widths[i] = max(len(min_count), len(max_count), len(event))
|
|
self.perc_widths[i] = max(len(min_perc), len(max_perc))
|
|
|
|
def print_events(self, suffix: str) -> None:
|
|
for i in self.events.show_indices:
|
|
# The event name goes in the count column.
|
|
event = self.events.events[i]
|
|
nwidth = self.count_widths[i]
|
|
pwidth = self.perc_widths[i]
|
|
empty_perc = ""
|
|
print(f"{event:<{nwidth}}{empty_perc:>{pwidth}} ", end="")
|
|
|
|
print(suffix)
|
|
|
|
def print_count_and_perc(self, i: int, count: str, perc: str) -> None:
|
|
nwidth = self.count_widths[i]
|
|
pwidth = self.perc_widths[i]
|
|
print(f"{count:>{nwidth}}{perc:>{pwidth}} ", end="")
|
|
|
|
def count_and_perc(self, cc: Cc, i: int) -> tuple[str, str]:
|
|
count = f"{cc.counts[i]:,d}" # commify
|
|
if args.show_percs:
|
|
if cc.counts[i] == 0:
|
|
# Don't show percentages for "0" entries, it's just clutter.
|
|
perc = ""
|
|
else:
|
|
summary_count = self.summary_cc.counts[i]
|
|
if summary_count == 0:
|
|
perc = " (n/a)"
|
|
else:
|
|
p = cc.counts[i] * 100 / summary_count
|
|
perc = f" ({p:.1f}%)"
|
|
else:
|
|
perc = ""
|
|
|
|
return (count, perc)
|
|
|
|
def print_cc(self, cc: Cc, suffix: str) -> None:
|
|
for i in self.events.show_indices:
|
|
(count, perc) = self.count_and_perc(cc, i)
|
|
self.print_count_and_perc(i, count, perc)
|
|
|
|
print("", suffix)
|
|
|
|
def print_missing_cc(self, suffix: str) -> None:
|
|
# Don't show percentages for "." entries, it's just clutter.
|
|
for i in self.events.show_indices:
|
|
self.print_count_and_perc(i, ".", "")
|
|
|
|
print("", suffix)
|
|
|
|
|
|
# Used in various places in the output.
|
|
def print_fancy(text: str) -> None:
|
|
fancy = "-" * 80
|
|
print(fancy)
|
|
print("--", text)
|
|
print(fancy)
|
|
|
|
|
|
def print_cachegrind_profile(desc: str, cmd: str, events: Events) -> None:
|
|
print_fancy("Cachegrind profile")
|
|
print(desc, end="")
|
|
print("Command: ", cmd)
|
|
print("Data file: ", args.cgout_filename[0])
|
|
print("Events recorded: ", *events.events)
|
|
print("Events shown: ", *events.show_events)
|
|
print("Event sort order:", *events.sort_events)
|
|
print("Threshold: ", args.threshold)
|
|
|
|
if len(args.include) == 0:
|
|
print("Include dirs: ")
|
|
else:
|
|
print(f"Include dirs: {args.include[0]}")
|
|
for include_dirname in args.include[1:]:
|
|
print(f" {include_dirname}")
|
|
|
|
if len(args.src_filenames) == 0:
|
|
print("User annotated: ")
|
|
else:
|
|
print(f"User annotated: {args.src_filenames[0]}")
|
|
for src_filename in args.src_filenames[1:]:
|
|
print(f" {src_filename}")
|
|
|
|
print("Auto-annotation: ", "on" if args.auto else "off")
|
|
print()
|
|
|
|
|
|
def print_summary(events: Events, summary_cc: Cc) -> None:
|
|
printer = CcPrinter(events, [summary_cc], summary_cc)
|
|
print_fancy("Summary")
|
|
printer.print_events("")
|
|
print()
|
|
printer.print_cc(summary_cc, "PROGRAM TOTALS")
|
|
print()
|
|
|
|
|
|
def print_function_summary(
|
|
events: Events, dict_flfn_cc: DictFlfnCc, summary_cc: Cc
|
|
) -> set[str]:
|
|
# Only the first threshold percentage is actually used.
|
|
threshold_index = events.sort_indices[0]
|
|
|
|
# Convert the threshold from a percentage to an event count.
|
|
threshold = args.threshold * abs(summary_cc.counts[threshold_index]) / 100
|
|
|
|
def meets_threshold(flfn_and_cc: tuple[Flfn, Cc]) -> bool:
|
|
cc = flfn_and_cc[1]
|
|
return abs(cc.counts[threshold_index]) >= threshold
|
|
|
|
# Create a list with the counts in sort order, so that left-to-right list
|
|
# comparison does the right thing. Plus the `Flfn` at the end for
|
|
# deterministic output when all the event counts are identical in two CCs.
|
|
def key(flfn_and_cc: tuple[Flfn, Cc]) -> tuple[list[int], Flfn]:
|
|
cc = flfn_and_cc[1]
|
|
return ([abs(cc.counts[i]) for i in events.sort_indices], flfn_and_cc[0])
|
|
|
|
# Filter out functions for which the primary sort event count is below the
|
|
# threshold, and sort the remainder.
|
|
filtered_flfns_and_ccs = filter(meets_threshold, dict_flfn_cc.items())
|
|
sorted_flfns_and_ccs = sorted(filtered_flfns_and_ccs, key=key, reverse=True)
|
|
sorted_ccs = list(map(lambda flfn_and_cc: flfn_and_cc[1], sorted_flfns_and_ccs))
|
|
|
|
printer = CcPrinter(events, sorted_ccs, summary_cc)
|
|
print_fancy("Function summary")
|
|
printer.print_events(" file:function")
|
|
print()
|
|
|
|
# Print per-function counts.
|
|
for flfn, flfn_cc in sorted_flfns_and_ccs:
|
|
printer.print_cc(flfn_cc, f"{flfn[0]}:{flfn[1]}")
|
|
|
|
print()
|
|
|
|
# Files containing a function that met the threshold.
|
|
return set(flfn_and_cc[0][0] for flfn_and_cc in sorted_flfns_and_ccs)
|
|
|
|
|
|
class AnnotatedCcs:
|
|
line_nums_known_cc: Cc
|
|
line_nums_unknown_cc: Cc
|
|
unreadable_cc: Cc
|
|
below_threshold_cc: Cc
|
|
files_unknown_cc: Cc
|
|
|
|
labels = [
|
|
" annotated: files known & above threshold & readable, line numbers known",
|
|
" annotated: files known & above threshold & readable, line numbers unknown",
|
|
"unannotated: files known & above threshold & unreadable ",
|
|
"unannotated: files known & below threshold",
|
|
"unannotated: files unknown",
|
|
]
|
|
|
|
def __init__(self, events: Events) -> None:
|
|
self.line_nums_known_cc = events.mk_empty_cc()
|
|
self.line_nums_unknown_cc = events.mk_empty_cc()
|
|
self.unreadable_cc = events.mk_empty_cc()
|
|
self.below_threshold_cc = events.mk_empty_cc()
|
|
self.files_unknown_cc = events.mk_empty_cc()
|
|
|
|
def ccs(self) -> list[Cc]:
|
|
return [
|
|
self.line_nums_known_cc,
|
|
self.line_nums_unknown_cc,
|
|
self.unreadable_cc,
|
|
self.below_threshold_cc,
|
|
self.files_unknown_cc,
|
|
]
|
|
|
|
|
|
def mk_warning(msg: str) -> str:
|
|
return f"""\
|
|
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
|
@@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@
|
|
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
|
{msg}\
|
|
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
|
"""
|
|
|
|
|
|
def warn_src_file_is_newer(src_filename: str, cgout_filename: str) -> None:
|
|
msg = f"""\
|
|
@ Source file '{src_filename}' is newer than data file '{cgout_filename}'.
|
|
@ Annotations may not be correct.
|
|
"""
|
|
print(mk_warning(msg))
|
|
|
|
|
|
def warn_bogus_lines(src_filename: str) -> None:
|
|
msg = f"""\
|
|
@@ Information recorded about lines past the end of '{src_filename}'.
|
|
"""
|
|
print(mk_warning(msg), end="")
|
|
|
|
|
|
def print_annotated_src_file(
|
|
events: Events,
|
|
dict_line_cc: DictLineCc,
|
|
src_file: TextIO,
|
|
annotated_ccs: AnnotatedCcs,
|
|
summary_cc: Cc,
|
|
) -> None:
|
|
# If the source file is more recent than the cgout file, issue warning.
|
|
if os.stat(src_file.name).st_mtime_ns > os.stat(args.cgout_filename[0]).st_mtime_ns:
|
|
warn_src_file_is_newer(src_file.name, args.cgout_filename[0])
|
|
|
|
printer = CcPrinter(events, list(dict_line_cc.values()), summary_cc)
|
|
# The starting fancy has already been printed by the caller.
|
|
printer.print_events("")
|
|
print()
|
|
|
|
# The CC for line 0 is special, holding counts attributed to the source
|
|
# file but not to any particular line (due to incomplete debug info).
|
|
# Annotate the start of the file with this info, if present.
|
|
line0_cc = dict_line_cc.pop(0, None)
|
|
if line0_cc:
|
|
suffix = "<unknown (line 0)>"
|
|
printer.print_cc(line0_cc, suffix)
|
|
annotated_ccs.line_nums_unknown_cc += line0_cc
|
|
print()
|
|
|
|
# Find interesting line ranges: all lines with a CC, and all lines within
|
|
# `args.context` lines of a line with a CC.
|
|
line_nums = list(sorted(dict_line_cc.keys()))
|
|
pairs: list[tuple[int, int]] = []
|
|
n = len(line_nums)
|
|
i = 0
|
|
context = args.context
|
|
while i < n:
|
|
lo = max(line_nums[i] - context, 1) # `max` to prevent negatives
|
|
while i < n - 1 and line_nums[i] + 2 * context >= line_nums[i + 1]:
|
|
i += 1
|
|
hi = line_nums[i] + context
|
|
pairs.append((lo, hi))
|
|
i += 1
|
|
|
|
# Annotate chosen lines, tracking total annotated counts.
|
|
line_num = 0
|
|
if pairs:
|
|
while pairs:
|
|
(lo, hi) = pairs.pop(0)
|
|
while line_num < lo - 1:
|
|
tmp = src_file.readline()
|
|
line_num += 1
|
|
if not tmp:
|
|
break # EOF
|
|
|
|
src_line = ""
|
|
# Print line number, unless start of file.
|
|
if lo != 1:
|
|
print("-- line", lo, "-" * 40)
|
|
|
|
while line_num < hi:
|
|
src_line = src_file.readline()
|
|
line_num += 1
|
|
if not src_line:
|
|
break
|
|
if line_nums and line_num == line_nums[0]:
|
|
printer.print_cc(dict_line_cc[line_num], src_line[:-1])
|
|
annotated_ccs.line_nums_known_cc += dict_line_cc[line_num]
|
|
del line_nums[0]
|
|
else:
|
|
printer.print_missing_cc(src_line[:-1])
|
|
|
|
# Print line number, unless EOF.
|
|
if src_line:
|
|
print("-- line", hi, "-" * 40)
|
|
else:
|
|
break
|
|
|
|
# If there was info on lines past the end of the file, warn.
|
|
if line_nums:
|
|
for line_num in line_nums:
|
|
printer.print_cc(dict_line_cc[line_num], f"<bogus line {line_num}>")
|
|
annotated_ccs.line_nums_known_cc += dict_line_cc[line_num]
|
|
|
|
print()
|
|
warn_bogus_lines(src_file.name)
|
|
|
|
print()
|
|
|
|
|
|
# This (partially) consumes `dict_fl_dict_line_cc`.
|
|
def print_annotated_src_files(
|
|
events: Events,
|
|
threshold_src_filenames: set[str],
|
|
dict_fl_dict_line_cc: DictFlDictLineCc,
|
|
summary_cc: Cc,
|
|
) -> AnnotatedCcs:
|
|
annotated_ccs = AnnotatedCcs(events)
|
|
|
|
def pair_with(label: str) -> Callable[[str], tuple[str, str]]:
|
|
return lambda s: (s, label)
|
|
|
|
def add_dict_line_cc_to_cc(dict_line_cc: DictLineCc | None, accum_cc: Cc) -> None:
|
|
if dict_line_cc:
|
|
for line_cc in dict_line_cc.values():
|
|
accum_cc += line_cc
|
|
|
|
# If auto-annotating, add interesting files (excluding "???").
|
|
all_src_filenames = set(map(pair_with("User"), args.src_filenames))
|
|
if args.auto:
|
|
threshold_src_filenames.discard("???")
|
|
|
|
dict_line_cc = dict_fl_dict_line_cc.pop("???", None)
|
|
add_dict_line_cc_to_cc(dict_line_cc, annotated_ccs.files_unknown_cc)
|
|
|
|
all_src_filenames.update(map(pair_with("Auto"), threshold_src_filenames))
|
|
|
|
# Prepend "" to the include dirnames so things work in the case where the
|
|
# filename has the full path.
|
|
include_dirnames = args.include.copy()
|
|
include_dirnames.insert(0, "")
|
|
|
|
def print_ann_fancy(ann_type: str, src_filename: str) -> None:
|
|
print_fancy(f"{ann_type}-annotated source file: {src_filename}")
|
|
|
|
for src_filename, ann_type in sorted(all_src_filenames):
|
|
readable = False
|
|
for include_dirname in include_dirnames:
|
|
if include_dirname == "":
|
|
full_src_filename = src_filename
|
|
else:
|
|
full_src_filename = os.path.join(include_dirname, src_filename)
|
|
|
|
try:
|
|
with open(full_src_filename, "r", encoding="utf-8") as src_file:
|
|
dict_line_cc = dict_fl_dict_line_cc.pop(src_filename, None)
|
|
if dict_line_cc is not None:
|
|
print_ann_fancy(ann_type, src_file.name) # includes full path
|
|
print_annotated_src_file(
|
|
events,
|
|
dict_line_cc,
|
|
src_file,
|
|
annotated_ccs,
|
|
summary_cc,
|
|
)
|
|
else:
|
|
# This only happens for user-specified files that are
|
|
# readable but not mentioned in the cgout file.
|
|
print_ann_fancy(ann_type, src_filename)
|
|
print("This file was not mentioned by the data file")
|
|
print()
|
|
|
|
readable = True
|
|
break
|
|
except OSError:
|
|
pass
|
|
|
|
if not readable:
|
|
dict_line_cc = dict_fl_dict_line_cc.pop(src_filename, None)
|
|
add_dict_line_cc_to_cc(dict_line_cc, annotated_ccs.unreadable_cc)
|
|
|
|
print_ann_fancy(ann_type, src_filename)
|
|
print("This file was unreadable")
|
|
print()
|
|
|
|
# Sum the CCs remaining in `dict_fl_dict_line_cc`, which are all in files
|
|
# below the threshold.
|
|
for dict_line_cc in dict_fl_dict_line_cc.values():
|
|
add_dict_line_cc_to_cc(dict_line_cc, annotated_ccs.below_threshold_cc)
|
|
|
|
return annotated_ccs
|
|
|
|
|
|
def print_annotation_summary(
|
|
events: Events,
|
|
annotated_ccs: AnnotatedCcs,
|
|
summary_cc: Cc,
|
|
) -> None:
|
|
# If we did any annotating, show how many events were covered by annotated
|
|
# lines above.
|
|
if args.auto or args.src_filenames:
|
|
printer = CcPrinter(events, annotated_ccs.ccs(), summary_cc)
|
|
print_fancy("Annotation summary")
|
|
printer.print_events("")
|
|
print()
|
|
|
|
total_cc = events.mk_empty_cc()
|
|
for (cc, label) in zip(annotated_ccs.ccs(), AnnotatedCcs.labels):
|
|
printer.print_cc(cc, label)
|
|
total_cc += cc
|
|
|
|
print()
|
|
|
|
# Internal sanity check.
|
|
if summary_cc != total_cc:
|
|
msg = (
|
|
"`summary:` line doesn't match computed annotated counts\n"
|
|
f"- summary: {summary_cc}\n"
|
|
f"- annotated: {total_cc}"
|
|
)
|
|
die(msg)
|
|
|
|
|
|
def main() -> None:
|
|
(
|
|
desc,
|
|
cmd,
|
|
events,
|
|
dict_flfn_cc,
|
|
dict_fl_dict_line_cc,
|
|
summary_cc,
|
|
) = read_cgout_file()
|
|
|
|
# Each of the following calls prints a section of the output.
|
|
|
|
print_cachegrind_profile(desc, cmd, events)
|
|
|
|
print_summary(events, summary_cc)
|
|
|
|
threshold_src_filenames = print_function_summary(events, dict_flfn_cc, summary_cc)
|
|
|
|
annotated_ccs = print_annotated_src_files(
|
|
events, threshold_src_filenames, dict_fl_dict_line_cc, summary_cc
|
|
)
|
|
|
|
print_annotation_summary(events, annotated_ccs, summary_cc)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|