Rewrite cg_merge in Python.

It's currently written in C, but `cg_annotate` and `cg_diff` are written in Python. It's better to have them all in the same language. The good news is that the Python code is 4.5x shorter than the C code. The bad news is that the Python code is roughly 3x slower than the C code. But `cg_merge` isn't used that often, so I think it's a reasonable trade-off.
2026-02-03 18:13:01 +00:00 · 2023-03-27 17:27:56 +11:00 · 2023-03-27 17:27:56 +11:00 · 551874920f
commit 551874920f
parent 8a75eecbad
12 changed files with 482 additions and 1601 deletions
--- a/cachegrind/Makefile.am
+++ b/cachegrind/Makefile.am
@ -10,32 +10,13 @@ EXTRA_DIST = \
 # Headers, etc
 #----------------------------------------------------------------------------

-bin_SCRIPTS = cg_annotate cg_diff
+bin_SCRIPTS = cg_annotate cg_diff cg_merge

 noinst_HEADERS = \
 	cg_arch.h \
 	cg_branchpred.c \
 	cg_sim.c

-#----------------------------------------------------------------------------
-# cg_merge (built for the primary target only)
-#----------------------------------------------------------------------------
-
-bin_PROGRAMS = cg_merge
-
-cg_merge_SOURCES = cg_merge.c
-cg_merge_CPPFLAGS  = $(AM_CPPFLAGS_PRI)
-cg_merge_CFLAGS    = $(AM_CFLAGS_PRI)
-cg_merge_CCASFLAGS = $(AM_CCASFLAGS_PRI)
-cg_merge_LDFLAGS   = $(AM_CFLAGS_PRI)
-# If there is no secondary platform, and the platforms include x86-darwin,
-# then the primary platform must be x86-darwin.  Hence:
-if ! VGCONF_HAVE_PLATFORM_SEC
-if VGCONF_PLATFORMS_INCLUDE_X86_DARWIN
-cg_merge_LDFLAGS   += -Wl,-read_only_relocs -Wl,suppress
-endif
-endif
-
 #----------------------------------------------------------------------------
 # cachegrind-<platform>
 #----------------------------------------------------------------------------
@ -101,4 +82,8 @@ pyann:
 pydiff:
 	+../auxprogs/pybuild.sh cg_diff.in cg_diff

-.PHONY: pyann pydiff
+# "Build" `cg_merge`. The `+` avoids warnings about the jobserver.
+pymerge:
+	+../auxprogs/pybuild.sh cg_merge.in cg_merge
+
+.PHONY: pyann pydiff pymerge
--- a/cachegrind/cg_merge.c
+++ b/cachegrind/cg_merge.c
--- a/cachegrind/cg_merge.in
+++ b/cachegrind/cg_merge.in
@ -0,0 +1,339 @@
+#! /usr/bin/env python3
+# pyright: strict
+
+# --------------------------------------------------------------------
+# --- Cachegrind's merger.                             cg_merge.in ---
+# --------------------------------------------------------------------
+
+#  This file is part of Cachegrind, a Valgrind tool for cache
+#  profiling programs.
+#
+#  Copyright (C) 2002-2023 Nicholas Nethercote
+#     njn@valgrind.org
+#
+#  This program is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU General Public License as
+#  published by the Free Software Foundation; either version 2 of the
+#  License, or (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, see <http://www.gnu.org/licenses/>.
+#
+#  The GNU General Public License is contained in the file COPYING.
+
+"""
+This script diffs Cachegrind output files.
+"""
+
+# Use `make pymerge` to "build" this script every time it is changed. This runs
+# the formatters, type-checkers, and linters on `cg_merge.in` and then
+# generates `cg_merge`.
+#
+# This is a cut-down version of `cg_annotate.in`.
+
+from __future__ import annotations
+
+import re
+import sys
+from argparse import ArgumentParser, Namespace
+from collections import defaultdict
+from typing import DefaultDict, NoReturn, TextIO
+
+
+class Args(Namespace):
+    """
+    A typed wrapper for parsed args.
+
+    None of these fields are modified after arg parsing finishes.
+    """
+
+    output: str
+    cgout_filename: list[str]
+
+    @staticmethod
+    def parse() -> Args:
+        p = ArgumentParser(description="Merge multiple Cachegrind output files.")
+
+        p.add_argument("--version", action="version", version="%(prog)s-@VERSION@")
+
+        p.add_argument(
+            "-o",
+            dest="output",
+            type=str,
+            metavar="FILE",
+            help="output file (default: stdout)",
+        )
+
+        p.add_argument(
+            "cgout_filename",
+            nargs="+",
+            metavar="cachegrind-out-file",
+            help="file produced by Cachegrind",
+        )
+
+        return p.parse_args(namespace=Args())
+
+
+# Args are stored in a global for easy access.
+args = Args.parse()
+
+# A single instance of this class is constructed, from `args` and the `events:`
+# line in the cgout file.
+class Events:
+    # The event names.
+    events: list[str]
+
+    def __init__(self, text: str) -> None:
+        self.events = text.split()
+        self.num_events = len(self.events)
+
+    def mk_cc(self, text: str) -> Cc:
+        """Raises a `ValueError` exception on syntax error."""
+        # This is slightly faster than a list comprehension.
+        counts = list(map(int, text.split()))
+
+        if len(counts) == self.num_events:
+            pass
+        elif len(counts) < self.num_events:
+            # Add zeroes at the end for any missing numbers.
+            counts.extend([0] * (self.num_events - len(counts)))
+        else:
+            raise ValueError
+
+        return Cc(counts)
+
+    def mk_empty_cc(self) -> Cc:
+        # This is much faster than a list comprehension.
+        return Cc([0] * self.num_events)
+
+
+class Cc:
+    """
+    This is a dumb container for counts.
+
+    It doesn't know anything about events, i.e. what each count means. It can
+    do basic operations like `__iadd__` and `__eq__`, and anything more must be
+    done elsewhere. `Events.mk_cc` and `Events.mk_empty_cc` are used for
+    construction.
+    """
+
+    # Always the same length as `Events.events`.
+    counts: list[int]
+
+    def __init__(self, counts: list[int]) -> None:
+        self.counts = counts
+
+    def __repr__(self) -> str:
+        return str(self.counts)
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, Cc):
+            return NotImplemented
+        return self.counts == other.counts
+
+    def __iadd__(self, other: Cc) -> Cc:
+        for i, other_count in enumerate(other.counts):
+            self.counts[i] += other_count
+        return self
+
+
+# Per-line CCs, organised by filename, function name, and line number.
+DictLineCc = DefaultDict[int, Cc]
+DictFnDictLineCc = DefaultDict[str, DictLineCc]
+DictFlDictFnDictLineCc = DefaultDict[str, DictFnDictLineCc]
+
+
+def die(msg: str) -> NoReturn:
+    print("cg_merge: error:", msg, file=sys.stderr)
+    sys.exit(1)
+
+
+def read_cgout_file(
+    cgout_filename: str,
+    is_first_file: bool,
+    cumul_dict_fl_dict_fn_dict_line_cc: DictFlDictFnDictLineCc,
+    cumul_summary_cc: Cc,
+) -> tuple[list[str], str, Events]:
+    # The file format is described in Cachegrind's manual.
+    try:
+        cgout_file = open(cgout_filename, "r", encoding="utf-8")
+    except OSError as err:
+        die(f"{err}")
+
+    with cgout_file:
+        cgout_line_num = 0
+
+        def parse_die(msg: str) -> NoReturn:
+            die(f"{cgout_file.name}:{cgout_line_num}: {msg}")
+
+        def readline() -> str:
+            nonlocal cgout_line_num
+            cgout_line_num += 1
+            return cgout_file.readline()
+
+        # Read "desc:" lines.
+        desc: list[str] = []
+        while line := readline():
+            if m := re.match(r"desc:\s+(.*)", line):
+                desc.append(m.group(1))
+            else:
+                break
+
+        # Read "cmd:" line. (`line` is already set from the "desc:" loop.)
+        if m := re.match(r"cmd:\s+(.*)", line):
+            cmd = m.group(1)
+        else:
+            parse_die("missing a `command:` line")
+
+        # Read "events:" line.
+        line = readline()
+        if m := re.match(r"events:\s+(.*)", line):
+            events = Events(m.group(1))
+        else:
+            parse_die("missing an `events:` line")
+
+        def mk_empty_dict_line_cc() -> DictLineCc:
+            return defaultdict(events.mk_empty_cc)
+
+        def mk_empty_dict_fn_dict_line_cc() -> DictFnDictLineCc:
+            return defaultdict(mk_empty_dict_line_cc)
+
+        summary_cc_present = False
+
+        curr_fl = ""
+        curr_fn = ""
+
+        # The `cumul_*` values are passed in by reference and are modified by
+        # this function. But they can't be properly initialized until the
+        # `events:` line of the first file is read and the number of events is
+        # known. So we initialize them in an invalid state, and then
+        # reinitialize them properly here, before their first use.
+        if is_first_file:
+            cumul_dict_fl_dict_fn_dict_line_cc.default_factory = (
+                mk_empty_dict_fn_dict_line_cc
+            )
+            cumul_summary_cc.counts = events.mk_empty_cc().counts
+
+        # Compile the one hot regex.
+        count_pat = re.compile(r"(\d+)\s+(.*)")
+
+        # Line matching is done in order of pattern frequency, for speed.
+        while True:
+            line = readline()
+
+            if m := count_pat.match(line):
+                line_num = int(m.group(1))
+                try:
+                    cc = events.mk_cc(m.group(2))
+                except ValueError:
+                    parse_die("malformed or too many event counts")
+
+                # Record this CC at the file/func/line level.
+                line_cc = cumul_dict_fl_dict_fn_dict_line_cc[curr_fl][curr_fn][line_num]
+                line_cc += cc
+
+            elif line.startswith("fn="):
+                curr_fn = line[3:-1]
+
+            elif line.startswith("fl="):
+                curr_fl = line[3:-1]
+                # A `fn=` line should follow, overwriting the "???".
+                curr_fn = "???"
+
+            elif m := re.match(r"summary:\s+(.*)", line):
+                summary_cc_present = True
+                try:
+                    cumul_summary_cc += events.mk_cc(m.group(1))
+                except ValueError:
+                    parse_die("too many event counts")
+
+            elif line == "":
+                break  # EOF
+
+            elif line == "\n" or line.startswith("#"):
+                # Skip empty lines and comment lines.
+                pass
+
+            else:
+                parse_die(f"malformed line: {line[:-1]}")
+
+    # Check if summary line was present.
+    if not summary_cc_present:
+        parse_die("missing `summary:` line, aborting")
+
+    # In `cg_annotate.in` and `cg_diff.in` we check that the file's summary CC
+    # matches the totals of the file's individual CCs, but not here. That's
+    # because in this script we don't collect the file's CCs in isolation,
+    # instead we just add them to the accumulated CCs, for speed. This makes it
+    # difficult to do the per-file checking.
+
+    return (desc, cmd, events)
+
+
+def main() -> None:
+    desc1: list[str] | None = None
+    cmd1 = None
+    events1 = None
+
+    # Different places where we accumulate CC data. Initialized to invalid
+    # states prior to the number of events being known.
+    cumul_dict_fl_dict_fn_dict_line_cc: DictFlDictFnDictLineCc = defaultdict(None)
+    cumul_summary_cc: Cc = Cc([])
+
+    for n, filename in enumerate(args.cgout_filename):
+        is_first_file = n == 0
+        (desc_n, cmd_n, events_n) = read_cgout_file(
+            filename,
+            is_first_file,
+            cumul_dict_fl_dict_fn_dict_line_cc,
+            cumul_summary_cc,
+        )
+        # We reuse the description and command from the first file, like the
+        # the old C version of `cg_merge`.
+        if is_first_file:
+            desc1 = desc_n
+            cmd1 = cmd_n
+            events1 = events_n
+        else:
+            assert events1
+            if events1.num_events != events_n.num_events:
+                die("events don't match")
+
+    def write_output(f: TextIO) -> None:
+        # These assertions hold because the loop above executes at least twice.
+        assert desc1
+        assert events1
+        assert cumul_dict_fl_dict_fn_dict_line_cc is not None
+        assert cumul_summary_cc
+
+        for desc_line in desc1:
+            print("desc:", desc_line, file=f)
+        print("cmd:", cmd1, file=f)
+        print("events:", *events1.events, sep=" ", file=f)
+
+        for fl, dict_fn_dict_line_cc in cumul_dict_fl_dict_fn_dict_line_cc.items():
+            print(f"fl={fl}", file=f)
+            for fn, dict_line_cc in dict_fn_dict_line_cc.items():
+                print(f"fn={fn}", file=f)
+                for line, cc in dict_line_cc.items():
+                    print(line, *cc.counts, file=f)
+
+        print("summary:", *cumul_summary_cc.counts, sep=" ", file=f)
+
+    if args.output:
+        try:
+            with open(args.output, "w", encoding="utf-8") as f:
+                write_output(f)
+        except OSError as err:
+            die(f"{err}")
+    else:
+        write_output(sys.stdout)
+
+
+if __name__ == "__main__":
+    main()
--- a/cachegrind/tests/Makefile.am
+++ b/cachegrind/tests/Makefile.am
@ -15,6 +15,8 @@ dist_noinst_SCRIPTS = filter_stderr filter_cachesim_discards
 EXTRA_DIST = \
 	ann-diff1.post.exp ann-diff1.stderr.exp ann-diff1.vgtest \
 		ann-diff2a.cgout ann-diff2b.cgout \
+	ann-merge1.post.exp ann-merge1.stderr.exp ann-merge1.vgtest 
+		ann-merge1a.cgout ann-merge1b.cgout \
 	ann1a.post.exp ann1a.stderr.exp ann1a.vgtest ann1.cgout \
 	ann1b.post.exp ann1b.stderr.exp ann1b.vgtest ann1b.cgout \
 	ann2.post.exp ann2.stderr.exp ann2.vgtest ann2.cgout \
--- a/cachegrind/tests/ann-merge-x.rs
+++ b/cachegrind/tests/ann-merge-x.rs
@ -0,0 +1,5 @@
+one
+two
+three
+four
+five
--- a/cachegrind/tests/ann-merge-y.rs
+++ b/cachegrind/tests/ann-merge-y.rs
@ -0,0 +1,6 @@
+one
+two
+three
+four
+five
+six
--- a/cachegrind/tests/ann-merge1.post.exp
+++ b/cachegrind/tests/ann-merge1.post.exp
@ -0,0 +1,66 @@
+--------------------------------------------------------------------------------
+-- Cachegrind profile
+--------------------------------------------------------------------------------
+Description 1a
+Description 1b
+Command:          Command 1
+Data file:        ann-merge1c.cgout
+Events recorded:  A B C
+Events shown:     A B C
+Event sort order: A B C
+Threshold:        0.1
+Include dirs:     
+User annotated:   
+Auto-annotation:  on
+
+--------------------------------------------------------------------------------
+-- Summary
+--------------------------------------------------------------------------------
+A           B            C            
+
+86 (100.0%) 113 (100.0%) 145 (100.0%)  PROGRAM TOTALS
+
+--------------------------------------------------------------------------------
+-- Function summary
+--------------------------------------------------------------------------------
+A          B          C            file:function
+
+40 (46.5%) 80 (70.8%) 120 (82.8%)  ann-merge-x.rs:x1
+20 (23.3%) 10  (8.8%)   5  (3.4%)  ann-merge-x.rs:x3
+16 (18.6%) 18 (15.9%)  20 (13.8%)  ann-merge-y.rs:y1
+10 (11.6%)  5  (4.4%)   0          ann-merge-x.rs:x2
+
+--------------------------------------------------------------------------------
+-- Auto-annotated source file: ann-merge-x.rs
+--------------------------------------------------------------------------------
+A          B          C          
+
+20 (23.3%) 40 (35.4%) 60 (41.4%)  one
+10 (11.6%) 20 (17.7%) 30 (20.7%)  two
+10 (11.6%) 20 (17.7%) 30 (20.7%)  three
+10 (11.6%)  5  (4.4%)  0          four
+20 (23.3%) 10  (8.8%)  5  (3.4%)  five
+
+--------------------------------------------------------------------------------
+-- Auto-annotated source file: ann-merge-y.rs
+--------------------------------------------------------------------------------
+A        B        C         
+
+8 (9.3%) 9 (8.0%) 10 (6.9%)  one
+8 (9.3%) 9 (8.0%) 10 (6.9%)  two
+.        .         .         three
+.        .         .         four
+.        .         .         five
+.        .         .         six
+
+--------------------------------------------------------------------------------
+-- Annotation summary
+--------------------------------------------------------------------------------
+A           B            C            
+
+86 (100.0%) 113 (100.0%) 145 (100.0%)    annotated: files known & above threshold & readable, line numbers known
+ 0            0            0             annotated: files known & above threshold & readable, line numbers unknown
+ 0            0            0           unannotated: files known & above threshold & unreadable 
+ 0            0            0           unannotated: files known & below threshold
+ 0            0            0           unannotated: files unknown
+
--- a/cachegrind/tests/ann-merge1.stderr.exp
+++ b/cachegrind/tests/ann-merge1.stderr.exp
@ -0,0 +1,17 @@
+
+
+I   refs:
+I1  misses:
+LLi misses:
+I1  miss rate:
+LLi miss rate:
+
+D   refs:
+D1  misses:
+LLd misses:
+D1  miss rate:
+LLd miss rate:
+
+LL refs:
+LL misses:
+LL miss rate:
--- a/cachegrind/tests/ann-merge1.vgtest
+++ b/cachegrind/tests/ann-merge1.vgtest
@ -0,0 +1,7 @@
+# The `prog` doesn't matter because we don't use its output. Instead we test
+# the post-processing of the `ann{1,1b}.cgout` test files.
+prog: ../../tests/true
+vgopts: --cachegrind-out-file=cachegrind.out
+post: python ../../cachegrind/cg_merge ann-merge1a.cgout ann-merge1b.cgout > ann-merge1c.cgout && python ../../cachegrind/cg_annotate ann-merge1c.cgout
+cleanup: rm ann-merge1c.cgout
+
--- a/cachegrind/tests/ann-merge1a.cgout
+++ b/cachegrind/tests/ann-merge1a.cgout
@ -0,0 +1,19 @@
+desc: Description 1a
+desc: Description 1b
+cmd: Command 1
+events: A B C
+
+fl=ann-merge-x.rs
+fn=x1
+1 10 20 30
+2 10 20 30
+
+fn=x2
+4 10 5 0
+
+fl=ann-merge-y.rs
+fn=y1
+1 8 9 10
+2 8 9 10
+
+summary: 46 63 80
--- a/cachegrind/tests/ann-merge1b.cgout
+++ b/cachegrind/tests/ann-merge1b.cgout
@ -0,0 +1,14 @@
+desc: Description 2a
+desc: Description 2b
+cmd: Command 2
+events: A B C
+
+fl=ann-merge-x.rs
+fn=x1
+1 10 20 30
+3 10 20 30
+
+fn=x3
+5 20 10 5
+
+summary: 40 50 65
--- a/configure.ac
+++ b/configure.ac
@ -5406,6 +5406,7 @@ AC_CONFIG_FILES([
   cachegrind/tests/x86/Makefile
   cachegrind/cg_annotate
   cachegrind/cg_diff
+   cachegrind/cg_merge
   callgrind/Makefile
   callgrind/callgrind_annotate
   callgrind/callgrind_control