From 72a784f3b192a4cc4d9f8a55e7cab80dbbeb1aae Mon Sep 17 00:00:00 2001
From: Julian Seward <jseward@acm.org>
Date: Fri, 22 Mar 2002 01:27:54 +0000
Subject: [PATCH] Initial revision

git-svn-id: svn://svn.valgrind.org/valgrind/trunk@2
---
 ACKNOWLEDGEMENTS                      |   26 +
 AUTHORS                               |    0
 COPYING                               |  340 ++
 ChangeLog                             |    0
 INSTALL                               |  182 +
 Makefile.am                           |   80 +
 Makefile.in                           |  584 +++
 NEWS                                  |    0
 PATCHES_APPLIED                       |  392 ++
 README                                |  106 +
 README_KDE3_FOLKS                     |  101 +
 README_MISSING_SYSCALL_OR_IOCTL       |  152 +
 TODO                                  |   34 +
 acconfig.h                            |    5 +
 aclocal.m4                            |  127 +
 addrcheck/Makefile.am                 |   80 +
 cachegrind/Makefile.am                |   80 +
 cachegrind/docs/Makefile.am           |    5 +
 cachegrind/docs/index.html            |   26 +
 cachegrind/docs/manual.html           | 1753 ++++++++
 cachegrind/docs/nav.html              |   68 +
 cachegrind/docs/techdocs.html         | 2116 ++++++++++
 config.guess                          | 1320 +++++++
 config.h.in                           |  101 +
 config.sub                            | 1443 +++++++
 configure                             | 2443 ++++++++++++
 configure.in                          |  138 +
 corecheck/Makefile.am                 |   80 +
 coregrind/Makefile.am                 |   80 +
 coregrind/arch/x86-linux/vg_syscall.S |  179 +
 coregrind/demangle/Makefile.am        |   23 +
 coregrind/demangle/ansidecl.h         |  295 ++
 coregrind/demangle/cp-demangle.c      | 4170 ++++++++++++++++++++
 coregrind/demangle/cplus-dem.c        | 5264 +++++++++++++++++++++++++
 coregrind/demangle/demangle.h         |  177 +
 coregrind/demangle/dyn-string.c       |  439 +++
 coregrind/demangle/dyn-string.h       |   96 +
 coregrind/demangle/safe-ctype.c       |  163 +
 coregrind/demangle/safe-ctype.h       |  103 +
 coregrind/docs/Makefile.am            |    5 +
 coregrind/docs/index.html             |   26 +
 coregrind/docs/manual.html            | 1753 ++++++++
 coregrind/docs/nav.html               |   68 +
 coregrind/docs/techdocs.html          | 2116 ++++++++++
 coregrind/valgrind.in                 |  167 +
 coregrind/vg_clientmalloc.c           |  937 +++++
 coregrind/vg_constants.h              |  105 +
 coregrind/vg_demangle.c               |   70 +
 coregrind/vg_dispatch.S               |  379 ++
 coregrind/vg_errcontext.c             | 1070 +++++
 coregrind/vg_execontext.c             |  259 ++
 coregrind/vg_from_ucode.c             | 2682 +++++++++++++
 coregrind/vg_helpers.S                |  625 +++
 coregrind/vg_include.h                | 1452 +++++++
 coregrind/vg_kerneliface.h            |  165 +
 coregrind/vg_main.c                   | 1440 +++++++
 coregrind/vg_malloc2.c                | 1298 ++++++
 coregrind/vg_memory.c                 | 2300 +++++++++++
 coregrind/vg_messages.c               |  105 +
 coregrind/vg_mylibc.c                 |  929 +++++
 coregrind/vg_procselfmaps.c           |  201 +
 coregrind/vg_signals.c                |  823 ++++
 coregrind/vg_startup.S                |  221 ++
 coregrind/vg_symtab2.c                | 1435 +++++++
 coregrind/vg_syscall.S                |  179 +
 coregrind/vg_to_ucode.c               | 4309 ++++++++++++++++++++
 coregrind/vg_translate.c              | 3096 +++++++++++++++
 coregrind/vg_transtab.c               |  693 ++++
 coregrind/vg_unsafe.h                 |   86 +
 coregrind/vg_valgrinq_dummy.c         |   44 +
 demangle/Makefile.am                  |   23 +
 demangle/Makefile.in                  |  291 ++
 demangle/ansidecl.h                   |  295 ++
 demangle/cp-demangle.c                | 4170 ++++++++++++++++++++
 demangle/cplus-dem.c                  | 5264 +++++++++++++++++++++++++
 demangle/demangle.h                   |  177 +
 demangle/dyn-string.c                 |  439 +++
 demangle/dyn-string.h                 |   96 +
 demangle/safe-ctype.c                 |  163 +
 demangle/safe-ctype.h                 |  103 +
 docs/Makefile.am                      |    5 +
 docs/Makefile.in                      |  200 +
 docs/index.html                       |   26 +
 docs/manual.html                      | 1753 ++++++++
 docs/nav.html                         |   68 +
 docs/techdocs.html                    | 2116 ++++++++++
 helgrind/Makefile.am                  |   80 +
 include/valgrind.h                    |  156 +
 include/vg_profile.c                  |  112 +
 install-sh                            |  251 ++
 lackey/Makefile.am                    |   80 +
 linux22.supp                          |  270 ++
 linux24.supp                          |  296 ++
 ltmain.sh                             | 5029 +++++++++++++++++++++++
 memcheck/Makefile.am                  |   80 +
 memcheck/docs/Makefile.am             |    5 +
 memcheck/docs/index.html              |   26 +
 memcheck/docs/manual.html             | 1753 ++++++++
 memcheck/docs/nav.html                |   68 +
 memcheck/docs/techdocs.html           | 2116 ++++++++++
 missing                               |  190 +
 mkinstalldirs                         |   40 +
 none/Makefile.am                      |   80 +
 stamp-h.in                            |    1 +
 tests/Makefile.am                     |   26 +
 tests/Makefile.in                     |  175 +
 tests/badaddrvalue.c                  |   12 +
 tests/badjump.c                       |    6 +
 tests/badloop.c                       |   15 +
 tests/bitfield1.c                     |   18 +
 tests/blocked_syscall.c               |   32 +
 tests/clientperm.c                    |   39 +
 tests/clientstackperm.c               |   36 +
 tests/coolo_sigaction.cpp             |   54 +
 tests/coolo_strlen.c                  |   13 +
 tests/coolo_strlen.s                  |   90 +
 tests/cpuid_c.c                       |   21 +
 tests/cpuid_s.s                       |   77 +
 tests/doublefree.c                    |   12 +
 tests/errs1.c                         |   17 +
 tests/exitprog.c                      |   15 +
 tests/floored.c                       |   17 +
 tests/fprw.c                          |   26 +
 tests/fwrite.c                        |    9 +
 tests/inline.c                        |   20 +
 tests/inlineh.c                       |   23 +
 tests/inlineh.h                       |    6 +
 tests/malloc1.c                       |   24 +
 tests/malloc2.c                       |   50 +
 tests/manuel1.c                       |    9 +
 tests/manuel2.c                       |    9 +
 tests/manuel3.c                       |   13 +
 tests/memalign_test.c                 |   19 +
 tests/memcmptest.c                    |   19 +
 tests/memtests.cpp                    |   29 +
 tests/mmaptest.c                      |   15 +
 tests/oneparam.c                      |   10 +
 tests/pushfpopf.s                     |   38 +
 tests/pushfpopf_c.c                   |   14 +
 tests/rcl_assert.s                    |    8 +
 tests/rcrl.c                          |   12 +
 tests/readline1.c                     |   25 +
 tests/realloc1.c                      |   14 +
 tests/sha1.test.c                     |  250 ++
 tests/shortpush.c                     |   15 +
 tests/shorts.c                        |   36 +
 tests/signal1.c                       |   22 +
 tests/signal2.c                       |   18 +
 tests/signal3.c                       |   33 +
 tests/smc1.c                          |   72 +
 tests/suppfree.c                      |   30 +
 tests/tronical.c                      |   37 +
 tests/tronical.s                      |   58 +
 tests/twoparams.c                     |    7 +
 tests/twoparams.s                     |   17 +
 valgrind.h                            |  156 +
 valgrind.in                           |  167 +
 vg_clientmalloc.c                     |  937 +++++
 vg_clientperms.c                      |  364 ++
 vg_constants.h                        |  105 +
 vg_demangle.c                         |   70 +
 vg_dispatch.S                         |  379 ++
 vg_errcontext.c                       | 1070 +++++
 vg_execontext.c                       |  259 ++
 vg_from_ucode.c                       | 2682 +++++++++++++
 vg_helpers.S                          |  625 +++
 vg_include.h                          | 1452 +++++++
 vg_kerneliface.h                      |  165 +
 vg_main.c                             | 1440 +++++++
 vg_malloc2.c                          | 1298 ++++++
 vg_memory.c                           | 2300 +++++++++++
 vg_messages.c                         |  105 +
 vg_mylibc.c                           |  929 +++++
 vg_procselfmaps.c                     |  201 +
 vg_profile.c                          |  112 +
 vg_signals.c                          |  823 ++++
 vg_startup.S                          |  221 ++
 vg_symtab2.c                          | 1435 +++++++
 vg_syscall.S                          |  179 +
 vg_syscall_mem.c                      | 2560 ++++++++++++
 vg_to_ucode.c                         | 4309 ++++++++++++++++++++
 vg_translate.c                        | 3096 +++++++++++++++
 vg_transtab.c                         |  693 ++++
 vg_unsafe.h                           |   86 +
 vg_valgrinq_dummy.c                   |   44 +
 vg_version.h                          |    1 +
 vg_vtagops.c                          |   96 +
 187 files changed, 107652 insertions(+)
 create mode 100644 ACKNOWLEDGEMENTS
 create mode 100644 AUTHORS
 create mode 100644 COPYING
 create mode 100644 ChangeLog
 create mode 100644 INSTALL
 create mode 100644 Makefile.am
 create mode 100644 Makefile.in
 create mode 100644 NEWS
 create mode 100644 PATCHES_APPLIED
 create mode 100644 README
 create mode 100644 README_KDE3_FOLKS
 create mode 100644 README_MISSING_SYSCALL_OR_IOCTL
 create mode 100644 TODO
 create mode 100644 acconfig.h
 create mode 100644 aclocal.m4
 create mode 100644 addrcheck/Makefile.am
 create mode 100644 cachegrind/Makefile.am
 create mode 100644 cachegrind/docs/Makefile.am
 create mode 100644 cachegrind/docs/index.html
 create mode 100644 cachegrind/docs/manual.html
 create mode 100644 cachegrind/docs/nav.html
 create mode 100644 cachegrind/docs/techdocs.html
 create mode 100755 config.guess
 create mode 100644 config.h.in
 create mode 100755 config.sub
 create mode 100755 configure
 create mode 100644 configure.in
 create mode 100644 corecheck/Makefile.am
 create mode 100644 coregrind/Makefile.am
 create mode 100644 coregrind/arch/x86-linux/vg_syscall.S
 create mode 100644 coregrind/demangle/Makefile.am
 create mode 100644 coregrind/demangle/ansidecl.h
 create mode 100644 coregrind/demangle/cp-demangle.c
 create mode 100644 coregrind/demangle/cplus-dem.c
 create mode 100644 coregrind/demangle/demangle.h
 create mode 100644 coregrind/demangle/dyn-string.c
 create mode 100644 coregrind/demangle/dyn-string.h
 create mode 100644 coregrind/demangle/safe-ctype.c
 create mode 100644 coregrind/demangle/safe-ctype.h
 create mode 100644 coregrind/docs/Makefile.am
 create mode 100644 coregrind/docs/index.html
 create mode 100644 coregrind/docs/manual.html
 create mode 100644 coregrind/docs/nav.html
 create mode 100644 coregrind/docs/techdocs.html
 create mode 100755 coregrind/valgrind.in
 create mode 100644 coregrind/vg_clientmalloc.c
 create mode 100644 coregrind/vg_constants.h
 create mode 100644 coregrind/vg_demangle.c
 create mode 100644 coregrind/vg_dispatch.S
 create mode 100644 coregrind/vg_errcontext.c
 create mode 100644 coregrind/vg_execontext.c
 create mode 100644 coregrind/vg_from_ucode.c
 create mode 100644 coregrind/vg_helpers.S
 create mode 100644 coregrind/vg_include.h
 create mode 100644 coregrind/vg_kerneliface.h
 create mode 100644 coregrind/vg_main.c
 create mode 100644 coregrind/vg_malloc2.c
 create mode 100644 coregrind/vg_memory.c
 create mode 100644 coregrind/vg_messages.c
 create mode 100644 coregrind/vg_mylibc.c
 create mode 100644 coregrind/vg_procselfmaps.c
 create mode 100644 coregrind/vg_signals.c
 create mode 100644 coregrind/vg_startup.S
 create mode 100644 coregrind/vg_symtab2.c
 create mode 100644 coregrind/vg_syscall.S
 create mode 100644 coregrind/vg_to_ucode.c
 create mode 100644 coregrind/vg_translate.c
 create mode 100644 coregrind/vg_transtab.c
 create mode 100644 coregrind/vg_unsafe.h
 create mode 100644 coregrind/vg_valgrinq_dummy.c
 create mode 100644 demangle/Makefile.am
 create mode 100644 demangle/Makefile.in
 create mode 100644 demangle/ansidecl.h
 create mode 100644 demangle/cp-demangle.c
 create mode 100644 demangle/cplus-dem.c
 create mode 100644 demangle/demangle.h
 create mode 100644 demangle/dyn-string.c
 create mode 100644 demangle/dyn-string.h
 create mode 100644 demangle/safe-ctype.c
 create mode 100644 demangle/safe-ctype.h
 create mode 100644 docs/Makefile.am
 create mode 100644 docs/Makefile.in
 create mode 100644 docs/index.html
 create mode 100644 docs/manual.html
 create mode 100644 docs/nav.html
 create mode 100644 docs/techdocs.html
 create mode 100644 helgrind/Makefile.am
 create mode 100644 include/valgrind.h
 create mode 100644 include/vg_profile.c
 create mode 100755 install-sh
 create mode 100644 lackey/Makefile.am
 create mode 100644 linux22.supp
 create mode 100644 linux24.supp
 create mode 100644 ltmain.sh
 create mode 100644 memcheck/Makefile.am
 create mode 100644 memcheck/docs/Makefile.am
 create mode 100644 memcheck/docs/index.html
 create mode 100644 memcheck/docs/manual.html
 create mode 100644 memcheck/docs/nav.html
 create mode 100644 memcheck/docs/techdocs.html
 create mode 100755 missing
 create mode 100755 mkinstalldirs
 create mode 100644 none/Makefile.am
 create mode 100644 stamp-h.in
 create mode 100644 tests/Makefile.am
 create mode 100644 tests/Makefile.in
 create mode 100644 tests/badaddrvalue.c
 create mode 100644 tests/badjump.c
 create mode 100644 tests/badloop.c
 create mode 100644 tests/bitfield1.c
 create mode 100644 tests/blocked_syscall.c
 create mode 100644 tests/clientperm.c
 create mode 100644 tests/clientstackperm.c
 create mode 100644 tests/coolo_sigaction.cpp
 create mode 100644 tests/coolo_strlen.c
 create mode 100644 tests/coolo_strlen.s
 create mode 100644 tests/cpuid_c.c
 create mode 100644 tests/cpuid_s.s
 create mode 100644 tests/doublefree.c
 create mode 100644 tests/errs1.c
 create mode 100644 tests/exitprog.c
 create mode 100644 tests/floored.c
 create mode 100644 tests/fprw.c
 create mode 100644 tests/fwrite.c
 create mode 100644 tests/inline.c
 create mode 100644 tests/inlineh.c
 create mode 100644 tests/inlineh.h
 create mode 100644 tests/malloc1.c
 create mode 100644 tests/malloc2.c
 create mode 100644 tests/manuel1.c
 create mode 100644 tests/manuel2.c
 create mode 100644 tests/manuel3.c
 create mode 100644 tests/memalign_test.c
 create mode 100644 tests/memcmptest.c
 create mode 100644 tests/memtests.cpp
 create mode 100644 tests/mmaptest.c
 create mode 100644 tests/oneparam.c
 create mode 100644 tests/pushfpopf.s
 create mode 100644 tests/pushfpopf_c.c
 create mode 100644 tests/rcl_assert.s
 create mode 100644 tests/rcrl.c
 create mode 100644 tests/readline1.c
 create mode 100644 tests/realloc1.c
 create mode 100644 tests/sha1.test.c
 create mode 100644 tests/shortpush.c
 create mode 100644 tests/shorts.c
 create mode 100644 tests/signal1.c
 create mode 100644 tests/signal2.c
 create mode 100644 tests/signal3.c
 create mode 100644 tests/smc1.c
 create mode 100644 tests/suppfree.c
 create mode 100644 tests/tronical.c
 create mode 100644 tests/tronical.s
 create mode 100644 tests/twoparams.c
 create mode 100644 tests/twoparams.s
 create mode 100644 valgrind.h
 create mode 100755 valgrind.in
 create mode 100644 vg_clientmalloc.c
 create mode 100644 vg_clientperms.c
 create mode 100644 vg_constants.h
 create mode 100644 vg_demangle.c
 create mode 100644 vg_dispatch.S
 create mode 100644 vg_errcontext.c
 create mode 100644 vg_execontext.c
 create mode 100644 vg_from_ucode.c
 create mode 100644 vg_helpers.S
 create mode 100644 vg_include.h
 create mode 100644 vg_kerneliface.h
 create mode 100644 vg_main.c
 create mode 100644 vg_malloc2.c
 create mode 100644 vg_memory.c
 create mode 100644 vg_messages.c
 create mode 100644 vg_mylibc.c
 create mode 100644 vg_procselfmaps.c
 create mode 100644 vg_profile.c
 create mode 100644 vg_signals.c
 create mode 100644 vg_startup.S
 create mode 100644 vg_symtab2.c
 create mode 100644 vg_syscall.S
 create mode 100644 vg_syscall_mem.c
 create mode 100644 vg_to_ucode.c
 create mode 100644 vg_translate.c
 create mode 100644 vg_transtab.c
 create mode 100644 vg_unsafe.h
 create mode 100644 vg_valgrinq_dummy.c
 create mode 100644 vg_version.h
 create mode 100644 vg_vtagops.c

diff --git a/ACKNOWLEDGEMENTS b/ACKNOWLEDGEMENTS
new file mode 100644
index 000000000..36317308e
--- /dev/null
+++ b/ACKNOWLEDGEMENTS
@@ -0,0 +1,26 @@
+
+The following people contributed in some way to valgrind, during its
+long journey over the past two years or so.  Here's a list.  If I have
+forgotten you, I do apologise; let me know (jseward@acm.org) and I'll
+fix it.
+
+Donna Robinson <donna@muraroa.demon.co.uk>
+	for many reasons, including endless encouragement, and
+	persuading me I wasn't crazy to	try doing this
+
+Rob Noble <rob.noble@antlimited.com>
+	for early encouragement, support, suggestions, and asking of
+	many questions
+
+Reuben Thomas <rrt@sc3d.org>
+	for discussions about value tag operations, and making me
+	laugh
+
+Various KDE folks, for suffering recent versions of valgrind, 
+	providing many patches, questions and helpful feedback
+	Dirk Mueller <mueller@kde.org>
+	Stephan Kulow <coolo@kde.org>
+	Michael Matz <matz@kde.org>
+	Simon Hausmann <hausmann@kde.org>
+	David Faure <david@mandrakesoft.com>
+	Ellis Whitehead <kde@ellisw.net>
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 000000000..e69de29bb
diff --git a/COPYING b/COPYING
new file mode 100644
index 000000000..d60c31a97
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,340 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+     59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year  name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 000000000..e69de29bb
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 000000000..b42a17ac4
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,182 @@
+Basic Installation
+==================
+
+   These are generic installation instructions.
+
+   The `configure' shell script attempts to guess correct values for
+various system-dependent variables used during compilation.  It uses
+those values to create a `Makefile' in each directory of the package.
+It may also create one or more `.h' files containing system-dependent
+definitions.  Finally, it creates a shell script `config.status' that
+you can run in the future to recreate the current configuration, a file
+`config.cache' that saves the results of its tests to speed up
+reconfiguring, and a file `config.log' containing compiler output
+(useful mainly for debugging `configure').
+
+   If you need to do unusual things to compile the package, please try
+to figure out how `configure' could check whether to do them, and mail
+diffs or instructions to the address given in the `README' so they can
+be considered for the next release.  If at some point `config.cache'
+contains results you don't want to keep, you may remove or edit it.
+
+   The file `configure.in' is used to create `configure' by a program
+called `autoconf'.  You only need `configure.in' if you want to change
+it or regenerate `configure' using a newer version of `autoconf'.
+
+The simplest way to compile this package is:
+
+  1. `cd' to the directory containing the package's source code and type
+     `./configure' to configure the package for your system.  If you're
+     using `csh' on an old version of System V, you might need to type
+     `sh ./configure' instead to prevent `csh' from trying to execute
+     `configure' itself.
+
+     Running `configure' takes awhile.  While running, it prints some
+     messages telling which features it is checking for.
+
+  2. Type `make' to compile the package.
+
+  3. Optionally, type `make check' to run any self-tests that come with
+     the package.
+
+  4. Type `make install' to install the programs and any data files and
+     documentation.
+
+  5. You can remove the program binaries and object files from the
+     source code directory by typing `make clean'.  To also remove the
+     files that `configure' created (so you can compile the package for
+     a different kind of computer), type `make distclean'.  There is
+     also a `make maintainer-clean' target, but that is intended mainly
+     for the package's developers.  If you use it, you may have to get
+     all sorts of other programs in order to regenerate files that came
+     with the distribution.
+
+Compilers and Options
+=====================
+
+   Some systems require unusual options for compilation or linking that
+the `configure' script does not know about.  You can give `configure'
+initial values for variables by setting them in the environment.  Using
+a Bourne-compatible shell, you can do that on the command line like
+this:
+     CC=c89 CFLAGS=-O2 LIBS=-lposix ./configure
+
+Or on systems that have the `env' program, you can do it like this:
+     env CPPFLAGS=-I/usr/local/include LDFLAGS=-s ./configure
+
+Compiling For Multiple Architectures
+====================================
+
+   You can compile the package for more than one kind of computer at the
+same time, by placing the object files for each architecture in their
+own directory.  To do this, you must use a version of `make' that
+supports the `VPATH' variable, such as GNU `make'.  `cd' to the
+directory where you want the object files and executables to go and run
+the `configure' script.  `configure' automatically checks for the
+source code in the directory that `configure' is in and in `..'.
+
+   If you have to use a `make' that does not supports the `VPATH'
+variable, you have to compile the package for one architecture at a time
+in the source code directory.  After you have installed the package for
+one architecture, use `make distclean' before reconfiguring for another
+architecture.
+
+Installation Names
+==================
+
+   By default, `make install' will install the package's files in
+`/usr/local/bin', `/usr/local/man', etc.  You can specify an
+installation prefix other than `/usr/local' by giving `configure' the
+option `--prefix=PATH'.
+
+   You can specify separate installation prefixes for
+architecture-specific files and architecture-independent files.  If you
+give `configure' the option `--exec-prefix=PATH', the package will use
+PATH as the prefix for installing programs and libraries.
+Documentation and other data files will still use the regular prefix.
+
+   In addition, if you use an unusual directory layout you can give
+options like `--bindir=PATH' to specify different values for particular
+kinds of files.  Run `configure --help' for a list of the directories
+you can set and what kinds of files go in them.
+
+   If the package supports it, you can cause programs to be installed
+with an extra prefix or suffix on their names by giving `configure' the
+option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
+
+Optional Features
+=================
+
+   Some packages pay attention to `--enable-FEATURE' options to
+`configure', where FEATURE indicates an optional part of the package.
+They may also pay attention to `--with-PACKAGE' options, where PACKAGE
+is something like `gnu-as' or `x' (for the X Window System).  The
+`README' should mention any `--enable-' and `--with-' options that the
+package recognizes.
+
+   For packages that use the X Window System, `configure' can usually
+find the X include and library files automatically, but if it doesn't,
+you can use the `configure' options `--x-includes=DIR' and
+`--x-libraries=DIR' to specify their locations.
+
+Specifying the System Type
+==========================
+
+   There may be some features `configure' can not figure out
+automatically, but needs to determine by the type of host the package
+will run on.  Usually `configure' can figure that out, but if it prints
+a message saying it can not guess the host type, give it the
+`--host=TYPE' option.  TYPE can either be a short name for the system
+type, such as `sun4', or a canonical name with three fields:
+     CPU-COMPANY-SYSTEM
+
+See the file `config.sub' for the possible values of each field.  If
+`config.sub' isn't included in this package, then this package doesn't
+need to know the host type.
+
+   If you are building compiler tools for cross-compiling, you can also
+use the `--target=TYPE' option to select the type of system they will
+produce code for and the `--build=TYPE' option to select the type of
+system on which you are compiling the package.
+
+Sharing Defaults
+================
+
+   If you want to set default values for `configure' scripts to share,
+you can create a site shell script called `config.site' that gives
+default values for variables like `CC', `cache_file', and `prefix'.
+`configure' looks for `PREFIX/share/config.site' if it exists, then
+`PREFIX/etc/config.site' if it exists.  Or, you can set the
+`CONFIG_SITE' environment variable to the location of the site script.
+A warning: not all `configure' scripts look for a site script.
+
+Operation Controls
+==================
+
+   `configure' recognizes the following options to control how it
+operates.
+
+`--cache-file=FILE'
+     Use and save the results of the tests in FILE instead of
+     `./config.cache'.  Set FILE to `/dev/null' to disable caching, for
+     debugging `configure'.
+
+`--help'
+     Print a summary of the options to `configure', and exit.
+
+`--quiet'
+`--silent'
+`-q'
+     Do not print messages saying which checks are being made.  To
+     suppress all normal output, redirect it to `/dev/null' (any error
+     messages will still be shown).
+
+`--srcdir=DIR'
+     Look for the package's source code in directory DIR.  Usually
+     `configure' can determine that directory automatically.
+
+`--version'
+     Print the version of Autoconf used to generate the `configure'
+     script, and exit.
+
+`configure' also accepts some other, not widely useful, options.
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 000000000..00387927b
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,80 @@
+SUBDIRS = demangle . docs tests
+
+valdir = $(libdir)/valgrind
+
+LDFLAGS = -Wl,-z -Wl,initfirst
+
+INCLUDES += -I$(srcdir)/demangle
+
+bin_SCRIPTS = valgrind
+
+val_DATA = linux22.supp linux24.supp
+
+EXTRA_DIST = $(val_DATA) \
+	PATCHES_APPLIED ACKNOWLEDGEMENTS \
+	README_KDE3_FOLKS \
+	README_MISSING_SYSCALL_OR_IOCTL TODO
+
+val_PROGRAMS = valgrind.so valgrinq.so
+
+valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+
+valgrind_so_SOURCES = \
+	vg_clientmalloc.c \
+	vg_clientperms.c \
+	vg_demangle.c \
+	vg_dispatch.S \
+	vg_errcontext.c \
+	vg_execontext.c \
+	vg_from_ucode.c \
+	vg_helpers.S \
+	vg_main.c \
+	vg_malloc2.c \
+	vg_memory.c \
+	vg_messages.c \
+	vg_mylibc.c \
+	vg_procselfmaps.c \
+	vg_profile.c \
+	vg_signals.c \
+	vg_startup.S \
+	vg_symtab2.c \
+	vg_syscall_mem.c \
+	vg_syscall.S \
+	vg_to_ucode.c \
+	vg_translate.c \
+	vg_transtab.c \
+	vg_valgrinq_dummy.c \
+	vg_vtagops.c
+
+valgrind_so_LDADD = \
+	demangle/cp-demangle.o \
+	demangle/cplus-dem.o \
+	demangle/dyn-string.o \
+	demangle/safe-ctype.o
+
+include_HEADERS = valgrind.h
+
+noinst_HEADERS = \
+        vg_kerneliface.h        \
+        vg_include.h            \
+        vg_version.h            \
+        vg_constants.h          \
+        vg_unsafe.h
+
+
+install-data-hook:
+	cd ${valdir} && rm -f default.supp && $(LN_S) $(DEFAULT_SUPP) default.supp
+
+vg_memory.o:
+	$(COMPILE) -O2 -mpreferred-stack-boundary=2 -c $<
+
+vg_clientmalloc.o:
+	$(COMPILE) -fno-omit-frame-pointer -c $<
+
+
+valgrind.so: $(valgrind_so_OBJECTS)
+	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+	$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+
+valgrinq.so: $(valgrinq_so_OBJECTS)
+	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
diff --git a/Makefile.in b/Makefile.in
new file mode 100644
index 000000000..793ffe1da
--- /dev/null
+++ b/Makefile.in
@@ -0,0 +1,584 @@
+# Makefile.in generated automatically by automake 1.4-p4 from Makefile.am
+
+# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+
+DESTDIR =
+
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+
+top_builddir = .
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS)
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+transform = @program_transform_name@
+
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_alias = @host_alias@
+host_triplet = @host@
+CC = @CC@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+DEFAULT_SUPP = @DEFAULT_SUPP@
+LN_S = @LN_S@
+MAKEINFO = @MAKEINFO@
+PACKAGE = @PACKAGE@
+RANLIB = @RANLIB@
+VERSION = @VERSION@
+
+SUBDIRS = demangle . docs tests
+
+valdir = $(libdir)/valgrind
+
+LDFLAGS = -Wl,-z -Wl,initfirst
+
+INCLUDES =  -I$(srcdir)/demangle
+
+bin_SCRIPTS = valgrind
+
+val_DATA = linux22.supp linux24.supp
+
+EXTRA_DIST = $(val_DATA) 	PATCHES_APPLIED ACKNOWLEDGEMENTS 	README_KDE3_FOLKS 	README_MISSING_SYSCALL_OR_IOCTL TODO
+
+
+val_PROGRAMS = valgrind.so valgrinq.so
+
+valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+
+valgrind_so_SOURCES =  	vg_clientmalloc.c 	vg_clientperms.c 	vg_demangle.c 	vg_dispatch.S 	vg_errcontext.c 	vg_execontext.c 	vg_from_ucode.c 	vg_helpers.S 	vg_main.c 	vg_malloc2.c 	vg_memory.c 	vg_messages.c 	vg_mylibc.c 	vg_procselfmaps.c 	vg_profile.c 	vg_signals.c 	vg_startup.S 	vg_symtab2.c 	vg_syscall_mem.c 	vg_syscall.S 	vg_to_ucode.c 	vg_translate.c 	vg_transtab.c 	vg_valgrinq_dummy.c 	vg_vtagops.c
+
+
+valgrind_so_LDADD =  	demangle/cp-demangle.o 	demangle/cplus-dem.o 	demangle/dyn-string.o 	demangle/safe-ctype.o
+
+
+include_HEADERS = valgrind.h
+
+noinst_HEADERS =          vg_kerneliface.h                vg_include.h                    vg_version.h                    vg_constants.h                  vg_unsafe.h
+
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = config.h
+CONFIG_CLEAN_FILES =  valgrind
+PROGRAMS =  $(val_PROGRAMS)
+
+
+DEFS = @DEFS@ -I. -I$(srcdir) -I.
+CPPFLAGS = @CPPFLAGS@
+LIBS = @LIBS@
+valgrind_so_OBJECTS =  vg_clientmalloc.o vg_clientperms.o vg_demangle.o \
+vg_dispatch.o vg_errcontext.o vg_execontext.o vg_from_ucode.o \
+vg_helpers.o vg_main.o vg_malloc2.o vg_memory.o vg_messages.o \
+vg_mylibc.o vg_procselfmaps.o vg_profile.o vg_signals.o vg_startup.o \
+vg_symtab2.o vg_syscall_mem.o vg_syscall.o vg_to_ucode.o vg_translate.o \
+vg_transtab.o vg_valgrinq_dummy.o vg_vtagops.o
+valgrind_so_DEPENDENCIES =  demangle/cp-demangle.o demangle/cplus-dem.o \
+demangle/dyn-string.o demangle/safe-ctype.o
+valgrind_so_LDFLAGS = 
+valgrinq_so_OBJECTS =  vg_valgrinq_dummy.o
+valgrinq_so_LDADD = $(LDADD)
+valgrinq_so_DEPENDENCIES = 
+valgrinq_so_LDFLAGS = 
+SCRIPTS =  $(bin_SCRIPTS)
+
+COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(LDFLAGS) -o $@
+DATA =  $(val_DATA)
+
+HEADERS =  $(include_HEADERS) $(noinst_HEADERS)
+
+DIST_COMMON =  README ./stamp-h.in AUTHORS COPYING ChangeLog INSTALL \
+Makefile.am Makefile.in NEWS TODO acconfig.h aclocal.m4 config.guess \
+config.h.in config.sub configure configure.in install-sh ltmain.sh \
+missing mkinstalldirs valgrind.in
+
+
+DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST)
+
+TAR = tar
+GZIP_ENV = --best
+SOURCES = $(valgrind_so_SOURCES) $(valgrinq_so_SOURCES)
+OBJECTS = $(valgrind_so_OBJECTS) $(valgrinq_so_OBJECTS)
+
+all: all-redirect
+.SUFFIXES:
+.SUFFIXES: .S .c .o .s
+$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) 
+	cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps Makefile
+
+Makefile: $(srcdir)/Makefile.in  $(top_builddir)/config.status
+	cd $(top_builddir) \
+	  && CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+$(ACLOCAL_M4):  configure.in 
+	cd $(srcdir) && $(ACLOCAL)
+
+config.status: $(srcdir)/configure.in $(CONFIG_STATUS_DEPENDENCIES)
+	$(SHELL) ./config.status --recheck
+$(srcdir)/configure: $(srcdir)/configure.in $(ACLOCAL_M4) $(CONFIGURE_DEPENDENCIES)
+	cd $(srcdir) && $(AUTOCONF)
+
+config.h: stamp-h
+	@if test ! -f $@; then \
+		rm -f stamp-h; \
+		$(MAKE) stamp-h; \
+	else :; fi
+stamp-h: $(srcdir)/config.h.in $(top_builddir)/config.status
+	cd $(top_builddir) \
+	  && CONFIG_FILES= CONFIG_HEADERS=config.h \
+	     $(SHELL) ./config.status
+	@echo timestamp > stamp-h 2> /dev/null
+$(srcdir)/config.h.in: $(srcdir)/stamp-h.in
+	@if test ! -f $@; then \
+		rm -f $(srcdir)/stamp-h.in; \
+		$(MAKE) $(srcdir)/stamp-h.in; \
+	else :; fi
+$(srcdir)/stamp-h.in: $(top_srcdir)/configure.in $(ACLOCAL_M4) acconfig.h
+	cd $(top_srcdir) && $(AUTOHEADER)
+	@echo timestamp > $(srcdir)/stamp-h.in 2> /dev/null
+
+mostlyclean-hdr:
+
+clean-hdr:
+
+distclean-hdr:
+	-rm -f config.h
+
+maintainer-clean-hdr:
+valgrind: $(top_builddir)/config.status valgrind.in
+	cd $(top_builddir) && CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+mostlyclean-valPROGRAMS:
+
+clean-valPROGRAMS:
+	-test -z "$(val_PROGRAMS)" || rm -f $(val_PROGRAMS)
+
+distclean-valPROGRAMS:
+
+maintainer-clean-valPROGRAMS:
+
+install-valPROGRAMS: $(val_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	$(mkinstalldirs) $(DESTDIR)$(valdir)
+	@list='$(val_PROGRAMS)'; for p in $$list; do \
+	  if test -f $$p; then \
+	    echo "  $(INSTALL_PROGRAM) $$p $(DESTDIR)$(valdir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`"; \
+	     $(INSTALL_PROGRAM) $$p $(DESTDIR)$(valdir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`; \
+	  else :; fi; \
+	done
+
+uninstall-valPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	list='$(val_PROGRAMS)'; for p in $$list; do \
+	  rm -f $(DESTDIR)$(valdir)/`echo $$p|sed 's/$(EXEEXT)$$//'|sed '$(transform)'|sed 's/$$/$(EXEEXT)/'`; \
+	done
+
+.c.o:
+	$(COMPILE) -c $<
+
+.s.o:
+	$(COMPILE) -c $<
+
+.S.o:
+	$(COMPILE) -c $<
+
+mostlyclean-compile:
+	-rm -f *.o core *.core
+
+clean-compile:
+
+distclean-compile:
+	-rm -f *.tab.c
+
+maintainer-clean-compile:
+
+install-binSCRIPTS: $(bin_SCRIPTS)
+	@$(NORMAL_INSTALL)
+	$(mkinstalldirs) $(DESTDIR)$(bindir)
+	@list='$(bin_SCRIPTS)'; for p in $$list; do \
+	  if test -f $$p; then \
+	    echo " $(INSTALL_SCRIPT) $$p $(DESTDIR)$(bindir)/`echo $$p|sed '$(transform)'`"; \
+	    $(INSTALL_SCRIPT) $$p $(DESTDIR)$(bindir)/`echo $$p|sed '$(transform)'`; \
+	  else if test -f $(srcdir)/$$p; then \
+	    echo " $(INSTALL_SCRIPT) $(srcdir)/$$p $(DESTDIR)$(bindir)/`echo $$p|sed '$(transform)'`"; \
+	    $(INSTALL_SCRIPT) $(srcdir)/$$p $(DESTDIR)$(bindir)/`echo $$p|sed '$(transform)'`; \
+	  else :; fi; fi; \
+	done
+
+uninstall-binSCRIPTS:
+	@$(NORMAL_UNINSTALL)
+	list='$(bin_SCRIPTS)'; for p in $$list; do \
+	  rm -f $(DESTDIR)$(bindir)/`echo $$p|sed '$(transform)'`; \
+	done
+
+install-valDATA: $(val_DATA)
+	@$(NORMAL_INSTALL)
+	$(mkinstalldirs) $(DESTDIR)$(valdir)
+	@list='$(val_DATA)'; for p in $$list; do \
+	  if test -f $(srcdir)/$$p; then \
+	    echo " $(INSTALL_DATA) $(srcdir)/$$p $(DESTDIR)$(valdir)/$$p"; \
+	    $(INSTALL_DATA) $(srcdir)/$$p $(DESTDIR)$(valdir)/$$p; \
+	  else if test -f $$p; then \
+	    echo " $(INSTALL_DATA) $$p $(DESTDIR)$(valdir)/$$p"; \
+	    $(INSTALL_DATA) $$p $(DESTDIR)$(valdir)/$$p; \
+	  fi; fi; \
+	done
+
+uninstall-valDATA:
+	@$(NORMAL_UNINSTALL)
+	list='$(val_DATA)'; for p in $$list; do \
+	  rm -f $(DESTDIR)$(valdir)/$$p; \
+	done
+
+install-includeHEADERS: $(include_HEADERS)
+	@$(NORMAL_INSTALL)
+	$(mkinstalldirs) $(DESTDIR)$(includedir)
+	@list='$(include_HEADERS)'; for p in $$list; do \
+	  if test -f "$$p"; then d= ; else d="$(srcdir)/"; fi; \
+	  echo " $(INSTALL_DATA) $$d$$p $(DESTDIR)$(includedir)/$$p"; \
+	  $(INSTALL_DATA) $$d$$p $(DESTDIR)$(includedir)/$$p; \
+	done
+
+uninstall-includeHEADERS:
+	@$(NORMAL_UNINSTALL)
+	list='$(include_HEADERS)'; for p in $$list; do \
+	  rm -f $(DESTDIR)$(includedir)/$$p; \
+	done
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+#     (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+
+@SET_MAKE@
+
+all-recursive install-data-recursive install-exec-recursive \
+installdirs-recursive install-recursive uninstall-recursive  \
+check-recursive installcheck-recursive info-recursive dvi-recursive:
+	@set fnord $(MAKEFLAGS); amf=$$2; \
+	dot_seen=no; \
+	target=`echo $@ | sed s/-recursive//`; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    dot_seen=yes; \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	   || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
+	done; \
+	if test "$$dot_seen" = "no"; then \
+	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+	fi; test -z "$$fail"
+
+mostlyclean-recursive clean-recursive distclean-recursive \
+maintainer-clean-recursive:
+	@set fnord $(MAKEFLAGS); amf=$$2; \
+	dot_seen=no; \
+	rev=''; list='$(SUBDIRS)'; for subdir in $$list; do \
+	  rev="$$subdir $$rev"; \
+	  test "$$subdir" = "." && dot_seen=yes; \
+	done; \
+	test "$$dot_seen" = "no" && rev=". $$rev"; \
+	target=`echo $@ | sed s/-recursive//`; \
+	for subdir in $$rev; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	   || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
+	done && test -z "$$fail"
+tags-recursive:
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+	done
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP)
+	list='$(SOURCES) $(HEADERS)'; \
+	unique=`for i in $$list; do echo $$i; done | \
+	  awk '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	here=`pwd` && cd $(srcdir) \
+	  && mkid -f$$here/ID $$unique $(LISP)
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) $(LISP)
+	tags=; \
+	here=`pwd`; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+   if test "$$subdir" = .; then :; else \
+	    test -f $$subdir/TAGS && tags="$$tags -i $$here/$$subdir/TAGS"; \
+   fi; \
+	done; \
+	list='$(SOURCES) $(HEADERS)'; \
+	unique=`for i in $$list; do echo $$i; done | \
+	  awk '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	test -z "$(ETAGS_ARGS)config.h.in$$unique$(LISP)$$tags" \
+	  || (cd $(srcdir) && etags $(ETAGS_ARGS) $$tags config.h.in $$unique $(LISP) -o $$here/TAGS)
+
+mostlyclean-tags:
+
+clean-tags:
+
+distclean-tags:
+	-rm -f TAGS ID
+
+maintainer-clean-tags:
+
+distdir = $(PACKAGE)-$(VERSION)
+top_distdir = $(distdir)
+
+# This target untars the dist file and tries a VPATH configuration.  Then
+# it guarantees that the distribution is self-contained by making another
+# tarfile.
+distcheck: dist
+	-rm -rf $(distdir)
+	GZIP=$(GZIP_ENV) $(TAR) zxf $(distdir).tar.gz
+	mkdir $(distdir)/=build
+	mkdir $(distdir)/=inst
+	dc_install_base=`cd $(distdir)/=inst && pwd`; \
+	cd $(distdir)/=build \
+	  && ../configure --srcdir=.. --prefix=$$dc_install_base \
+	  && $(MAKE) $(AM_MAKEFLAGS) \
+	  && $(MAKE) $(AM_MAKEFLAGS) dvi \
+	  && $(MAKE) $(AM_MAKEFLAGS) check \
+	  && $(MAKE) $(AM_MAKEFLAGS) install \
+	  && $(MAKE) $(AM_MAKEFLAGS) installcheck \
+	  && $(MAKE) $(AM_MAKEFLAGS) dist
+	-rm -rf $(distdir)
+	@banner="$(distdir).tar.gz is ready for distribution"; \
+	dashes=`echo "$$banner" | sed s/./=/g`; \
+	echo "$$dashes"; \
+	echo "$$banner"; \
+	echo "$$dashes"
+dist: distdir
+	-chmod -R a+r $(distdir)
+	GZIP=$(GZIP_ENV) $(TAR) chozf $(distdir).tar.gz $(distdir)
+	-rm -rf $(distdir)
+dist-all: distdir
+	-chmod -R a+r $(distdir)
+	GZIP=$(GZIP_ENV) $(TAR) chozf $(distdir).tar.gz $(distdir)
+	-rm -rf $(distdir)
+distdir: $(DISTFILES)
+	-rm -rf $(distdir)
+	mkdir $(distdir)
+	-chmod 777 $(distdir)
+	@for file in $(DISTFILES); do \
+	  d=$(srcdir); \
+	  if test -d $$d/$$file; then \
+	    cp -pr $$d/$$file $(distdir)/$$file; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || ln $$d/$$file $(distdir)/$$file 2> /dev/null \
+	    || cp -p $$d/$$file $(distdir)/$$file || :; \
+	  fi; \
+	done
+	for subdir in $(SUBDIRS); do \
+	  if test "$$subdir" = .; then :; else \
+	    test -d $(distdir)/$$subdir \
+	    || mkdir $(distdir)/$$subdir \
+	    || exit 1; \
+	    chmod 777 $(distdir)/$$subdir; \
+	    (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir=../$(distdir) distdir=../$(distdir)/$$subdir distdir) \
+	      || exit 1; \
+	  fi; \
+	done
+vg_clientmalloc.lo vg_clientmalloc.o : vg_clientmalloc.c vg_include.h \
+	vg_constants.h vg_kerneliface.h
+vg_clientperms.o: vg_clientperms.c vg_include.h vg_constants.h \
+	vg_kerneliface.h
+vg_demangle.o: vg_demangle.c vg_include.h vg_constants.h \
+	vg_kerneliface.h demangle/demangle.h demangle/ansidecl.h
+vg_errcontext.o: vg_errcontext.c vg_include.h vg_constants.h \
+	vg_kerneliface.h
+vg_execontext.o: vg_execontext.c vg_include.h vg_constants.h \
+	vg_kerneliface.h
+vg_from_ucode.o: vg_from_ucode.c vg_include.h vg_constants.h \
+	vg_kerneliface.h
+vg_main.o: vg_main.c vg_include.h vg_constants.h vg_kerneliface.h \
+	vg_version.h
+vg_malloc2.o: vg_malloc2.c vg_include.h vg_constants.h vg_kerneliface.h
+vg_memory.o: vg_memory.c vg_include.h vg_constants.h vg_kerneliface.h
+vg_messages.o: vg_messages.c vg_include.h vg_constants.h \
+	vg_kerneliface.h
+vg_mylibc.o: vg_mylibc.c vg_include.h vg_constants.h vg_kerneliface.h
+vg_procselfmaps.o: vg_procselfmaps.c vg_include.h vg_constants.h \
+	vg_kerneliface.h
+vg_profile.o: vg_profile.c vg_include.h vg_constants.h vg_kerneliface.h
+vg_signals.o: vg_signals.c vg_include.h vg_constants.h vg_kerneliface.h \
+	vg_unsafe.h
+vg_symtab2.o: vg_symtab2.c vg_include.h vg_constants.h vg_kerneliface.h \
+	vg_unsafe.h
+vg_syscall_mem.o: vg_syscall_mem.c vg_include.h vg_constants.h \
+	vg_kerneliface.h vg_unsafe.h
+vg_to_ucode.o: vg_to_ucode.c vg_include.h vg_constants.h \
+	vg_kerneliface.h
+vg_translate.o: vg_translate.c vg_include.h vg_constants.h \
+	vg_kerneliface.h
+vg_transtab.o: vg_transtab.c vg_include.h vg_constants.h \
+	vg_kerneliface.h
+vg_valgrinq_dummy.o: vg_valgrinq_dummy.c
+vg_vtagops.o: vg_vtagops.c vg_include.h vg_constants.h vg_kerneliface.h
+
+info-am:
+info: info-recursive
+dvi-am:
+dvi: dvi-recursive
+check-am: all-am
+check: check-recursive
+installcheck-am:
+installcheck: installcheck-recursive
+all-recursive-am: config.h
+	$(MAKE) $(AM_MAKEFLAGS) all-recursive
+
+install-exec-am: install-binSCRIPTS
+install-exec: install-exec-recursive
+
+install-data-am: install-valPROGRAMS install-valDATA \
+		install-includeHEADERS
+	@$(NORMAL_INSTALL)
+	$(MAKE) $(AM_MAKEFLAGS) install-data-hook
+install-data: install-data-recursive
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+install: install-recursive
+uninstall-am: uninstall-valPROGRAMS uninstall-binSCRIPTS \
+		uninstall-valDATA uninstall-includeHEADERS
+uninstall: uninstall-recursive
+all-am: Makefile $(PROGRAMS) $(SCRIPTS) $(DATA) $(HEADERS) config.h
+all-redirect: all-recursive-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install
+installdirs: installdirs-recursive
+installdirs-am:
+	$(mkinstalldirs)  $(DESTDIR)$(valdir) $(DESTDIR)$(bindir) \
+		$(DESTDIR)$(valdir) $(DESTDIR)$(includedir)
+
+
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-rm -f Makefile $(CONFIG_CLEAN_FILES)
+	-rm -f config.cache config.log stamp-h stamp-h[0-9]*
+
+maintainer-clean-generic:
+mostlyclean-am:  mostlyclean-hdr mostlyclean-valPROGRAMS \
+		mostlyclean-compile mostlyclean-tags \
+		mostlyclean-generic
+
+mostlyclean: mostlyclean-recursive
+
+clean-am:  clean-hdr clean-valPROGRAMS clean-compile clean-tags \
+		clean-generic mostlyclean-am
+
+clean: clean-recursive
+
+distclean-am:  distclean-hdr distclean-valPROGRAMS distclean-compile \
+		distclean-tags distclean-generic clean-am
+
+distclean: distclean-recursive
+	-rm -f config.status
+
+maintainer-clean-am:  maintainer-clean-hdr maintainer-clean-valPROGRAMS \
+		maintainer-clean-compile maintainer-clean-tags \
+		maintainer-clean-generic distclean-am
+	@echo "This command is intended for maintainers to use;"
+	@echo "it deletes files that may require special tools to rebuild."
+
+maintainer-clean: maintainer-clean-recursive
+	-rm -f config.status
+
+.PHONY: mostlyclean-hdr distclean-hdr clean-hdr maintainer-clean-hdr \
+mostlyclean-valPROGRAMS distclean-valPROGRAMS clean-valPROGRAMS \
+maintainer-clean-valPROGRAMS uninstall-valPROGRAMS install-valPROGRAMS \
+mostlyclean-compile distclean-compile clean-compile \
+maintainer-clean-compile uninstall-binSCRIPTS install-binSCRIPTS \
+uninstall-valDATA install-valDATA uninstall-includeHEADERS \
+install-includeHEADERS install-data-recursive uninstall-data-recursive \
+install-exec-recursive uninstall-exec-recursive installdirs-recursive \
+uninstalldirs-recursive all-recursive check-recursive \
+installcheck-recursive info-recursive dvi-recursive \
+mostlyclean-recursive distclean-recursive clean-recursive \
+maintainer-clean-recursive tags tags-recursive mostlyclean-tags \
+distclean-tags clean-tags maintainer-clean-tags distdir info-am info \
+dvi-am dvi check check-am installcheck-am installcheck all-recursive-am \
+install-exec-am install-exec install-data-am install-data install-am \
+install uninstall-am uninstall all-redirect all-am all installdirs-am \
+installdirs mostlyclean-generic distclean-generic clean-generic \
+maintainer-clean-generic clean mostlyclean distclean maintainer-clean
+
+
+install-data-hook:
+	cd ${valdir} && rm -f default.supp && $(LN_S) $(DEFAULT_SUPP) default.supp
+
+vg_memory.o:
+	$(COMPILE) -O2 -mpreferred-stack-boundary=2 -c $<
+
+vg_clientmalloc.o:
+	$(COMPILE) -fno-omit-frame-pointer -c $<
+
+valgrind.so: $(valgrind_so_OBJECTS)
+	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+	$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+
+valgrinq.so: $(valgrinq_so_OBJECTS)
+	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/NEWS b/NEWS
new file mode 100644
index 000000000..e69de29bb
diff --git a/PATCHES_APPLIED b/PATCHES_APPLIED
new file mode 100644
index 000000000..888baffbe
--- /dev/null
+++ b/PATCHES_APPLIED
@@ -0,0 +1,392 @@
+
+(Started this file at 14 Feb 02, 18:18 GMT)
+
+I decided it would be clearest for everybody (including me!) if I make
+a record of patches that I apply.  The starting point for all these is
+valgrind-20020214; patches applied to it before that snapshot was
+released are not recorded anywhere.
+
+
+------------------------------ 14 Feb 02 ------------------------------
+
+Michael Matz <matz@kde.org>:
+   this fixes the leak (32bytes/demangled symbol)in cplus-dem.c.  It also
+   again localizes the changes to libiberty's cp-demangle.c at the top with
+   some #defines.  (It also has some unrelated changes like header guarding
+   and making symbol sets disjoint, these are minor and were needed for the
+   demangle_test program, and are local to the demangler)
+
+   JRS: applied it, but it still seems to leak, running a g++2.96 compiled
+   program, at least (/usr/bin/kedit as installed with RH72).
+   (reenable VG_(show_all_arena_stats)() at vg_demangle.c:63 to see this)
+
+
+Michael Matz <matz@kde.org>
+   here the patch for changing order of restorage of %esp to not trash
+   simulees stack
+
+
+Simon Hausmann <hausmann@kde.org>
+   I attached a patch for the latest vg snapshot to implement better
+   support for shmat. Vg now correct marks the region returned by shmat
+   as read or read-writable (we have to do that blindly as there is no
+   way to figure out if another process initialized the data or not) .
+   Also the patch adds a paranoia check for the buffer passed with
+   shmctl. What's left todo is to make shmdt correctly mark the shm
+   segment as invalid again, but I see no easy way to figure out the
+   shm segment size just from the base address. Anyone? :)
+   Otherwise it'll probably require keeping a copy of the allocated
+   mappings inside vg -- shouldn't be expensive though. 
+   [...]
+   Oops, my previous shm patch contained a little bug in that it called
+   must_be_readable/writable for shmctl even when the address was 0
+   (which for example is the case when using IPC_RMID to remove a
+   shared memory segment, which is perfectly valid) . Attached the
+   updated patch that makes valgrinding Qt/Embedded apps possible :)
+
+
+me:
+   Check for accessibility changes for first and last 64k of address
+   space after every system call, so as to catch syscall wrapper
+   bugs as soon as they happen
+
+
+Simon Hausmann <hausmann@kde.org>
+   The socketpair handler checks that the two fds passed must be
+   readable, but I think the check should use must_be_writable instead
+   as the kernel does not read those values but fills them in.
+
+
+Simon Hausmann <hausmann@kde.org>
+   Here's a patch that implements checking of arguments for
+   sendmsg/recvmsg, as part of the socket syscall.
+   [...]
+   Oops, I just realized that the recvmsg wrapper should mark the
+   fields of the received message as readable, as the kernel hopefully
+   initialized them :)
+   [...]
+   Corrected patch attached.
+
+
+Nick Nethercote <njn25@cam.ac.uk>
+   [Implemented pushaw/pushal/popaw/popal]
+
+   They're kind of ugly due to the special treatment of %esp, but there
+   didn't seem to be any elegant way around it.  You may object to my
+   heinous magic number use with the registers.
+   
+   It seems to work ok, but I'd check it carefully if I were you.  In
+   particular, I've only tried it with POPAD. (I think;  gcc only accepts
+   'popa' in assembler files, and according to the Intel docs will probably
+   choose between POPA and POPAD by looking at the D flag of the current
+   code segment's descriptor.  I'm guessing this is 32-bits for my little 
+   test program.)
+
+
+Dirk Mueller <mueller@kde.org>
+   below a patch for checking if allocations are done correctly in C++ 
+   applications. it checks if new is free'ed by delete, new [] by delete [],
+   and malloc, calloc, realloc by free(). 
+
+   It works for me <tm>, not intensively tested yet. 
+
+
+Michael Matz <matzmich@cs.tu-berlin.de>
+   [allow specification of length of backtrace, default 8]
+   > > another small patch. it just increases the stacktrace 
+   > > length to 8 by default
+   > > (16 with --long-stacktraces=yes). a backtrace of 4 is
+   > > really short for KHTML, which often deeply nests calls.
+   >
+   > Wouldn't it be more usefull, to recognize a --bt-length=<number> 
+   > option, instead of only two sizes?
+
+   I.e. I would find the attached patch more usefull, which introduces an
+   --bt-size=<num> option (with docu ;-) ) but removes that long=yes/no
+   option.
+
+   JRS: the final name for the option is --num-callers
+
+
+------------------------------ 15 Feb 02 ------------------------------
+
+me:
+   fix assertion failure in VG_(malloc_aligned) (duh. duh. duh.)
+
+
+Michael Matz <matzmich@cs.tu-berlin.de>
+   [finally fix the last space leak in the demangler]
+   this should now fix the rest of the leaks.  The old c++ demangler is
+   really crappy code, therefor it took longer to find the leaks.  ;)
+
+
+Simon Hausmann <hausmann@kde.org>
+   Here are two micro patches for vg :)
+
+   The first one fixes what I think is a bug: I think the result param
+   of llseek before the actual syscall should be checked for
+   writability instead of readability.
+
+   The second one is a quick workaround for an assertion in the
+   demangler that I hit quite sometimes (I'm surprised noone else has
+   hit it before, maybe it's gcc3 specific though :) . I'm too lazy to 
+   write a full strtol replacement for now ;) , so just returning an
+   error will keep the program running and output the mangled symbol
+   instead.
+
+
+------------------------------ 17 Feb 02 ------------------------------
+
+me:
+   Implement cleverer semantics for 4-byte integer loads from partially
+   valid addresses, following complains from Coolo re spurious warnings
+   from gcc-inlined strlen()s.  As part of this, hardwire the default
+   behaviour of --bad-addr-value and get rid of this flag entirely.
+   New flag --partial-loads-ok=no|yes [default yes] controls use of
+   new semantics.  Also as a result, stop recommending people use
+   --sloppy-malloc=yes by default.
+
+me:
+   Options are now read from env var VALGRIND_OPTS.
+   Also remove need to edit hardware install dir in shell script.
+
+
+Michael Matz <matzmich@cs.tu-berlin.de>
+Simon Hausmann <hausmann@kde.org>
+   this fixes some leaks in the v3 demangler found by Simon (Julian: i.e.
+   those we couldn't possibly run into, as we use v2 demangling).
+
+
+------------------------------ 18 Feb 02 ------------------------------
+
+me:
+   Tighten up the conditions for which --partial-loads-ok=yes is allowed
+
+me:
+   Error management: slow down collecting errors after the 50th
+   and stop altogether after the 500th
+
+me:
+   Implement --workaround-gcc296-bugs, so RH 7.X and Mandrake users
+   can use their default compilers.
+
+
+------------------------------ 20 Feb 02 ------------------------------
+
+me:
+   Massive overhaul of the signal machinery so that signals can be
+   delivered to the client even when it is blocked in a system call.
+   This fixes the kmail-deadlocks-on-valgrind problem.
+
+------------------------------ 21 Feb 02 ------------------------------
+
+me:
+   Implement GDB attachment to the program.  
+
+Stephan Kulow <coolo@kde.org>
+   Implement a few more syscall wrappers
+
+------------------------------ 24 Feb 02 ------------------------------
+
+me:
+   Cleanup of the signal machinery.  Now we deal with signals purely
+   at the kernel interface, rather than muddying glibc's and the kernel's
+   conflicting concepts of some of the calls and structures.
+
+Kevin Dwyer <kevin@pheared.net>
+   Implement bazillions of syscall wrappers for ioctls
+
+me:
+   minor cleanups in vg_memory.c
+
+------------------------------ 4 Mar 02 -------------------------------
+
+This file is falling significantly behind reality.
+
+Me: get rid of this pointless and incorrect assert:
+   vg_memory.c:441 (set_address_range_perms): Assertion `len < 30000000' failed.
+
+Me: remove incorrect assert:
+   vg_assert(u_in->flags_w != FlagsEmpty);
+   do to with instrumentation of RCL/RCR
+   tracked down by Kevin Ryde <user42@zip.com.au>
+
+Malcolm Studd <mestudd@uwaterloo.ca>
+Sanjay Ghemawat <sanjay@google.com>
+   Implement pread (180)
+
+Byrial Jensen <byrial@image.dk>
+   Implement getsid (147)
+   Implement TCFLSH
+
+Me
+   Implement pause (29) 
+
+------------------------------ 6 Mar 02 -------------------------------
+
+Martin Nicolay <martin@osm-gmbh.de>
+   Implement fstatfs (100)
+   Implement ioctl FIOASYNC
+   Implement ioctl SIOCSPGRP (was previously noted-but-unhandled)
+
+Sanjay Ghemawat <sanjay@google.com>
+   Implement sync (36)
+   Make mallopt() into a no-op, rather than panic
+
+------------------------------ 7 Mar 02 -------------------------------
+
+Me
+   Write a great deal of technical docs about Valgrind internals
+   See docs/techdocs.html
+
+Me
+   Complete renaming of the kernel interface (vg_kerneliface.h)
+   definitions.
+
+"Byrial Jensen" <byrial@image.dk>
+   Use kernel rather than glibc sizes for struct termios
+
+Martin Jones <mjones@trolltech.com>
+   Implement ioctl MIXER_WRITE(0)
+
+Jon Trowbridge <trow@ximian.com>
+   Implement syscall pwrite (181)
+
+Me
+   Revamp the leak detector; fix various segfaults and bus errors in
+   it, and add the --show-reachable=no|yes flag, which makes it more
+   useful.
+
+------------------------------ 8 Mar 02 -------------------------------
+
+Me
+   Split up huge basic blocks into pieces, to avoid this:
+   vg_main.c:495 (vgPlain_create_translation_for): Assertion
+      `trans_size > 0 && trans_size < 65536' failed
+
+Crispin Flowerday <cflowerday@zeus.com>
+   Implement syscall sendfile (187)
+   Allow accept to have NULL 3rd argument
+
+Me
+   Write some more tech docs on the instrumentation and JITter
+
+----------------------------- 10 Mar 02 -------------------------------
+
+Crispin Flowerday <cflowerday@zeus.com>
+   SYS_SOCKETPAIR: on success, mark file descriptors as readable
+
+Ulrich Drepper <drepper@redhat.com>
+   Be cleverer on SYS_CONNECT
+
+Me
+   Fix strange parse error at vg_symtab2.c:1017
+   Complete tech docs to do with the instrumenting JITter
+   Write tech docs re future ideas
+
+----------------------------- 11 Mar 02 -------------------------------
+
+Me
+   Fix some compile problems on Debian Potato, and add some supps
+
+Julian Brown <brown@cs.bris.ac.uk>
+   Handle ioctl 0x40045431
+
+----------------------------- 12 Mar 02 -------------------------------
+
+Martin Burchell <martin.burchell@antlimited.com>
+   Some ioctls. 
+
+Crispin Flowerday <cflowerday@zeus.com>
+   Improved handling of SIOCGIFCONF.
+
+Frank Zago <fzago@greshamstorage.com>
+   Various ioctls for the SCSI generic (sg) driver.  Currently #if 0'd.
+
+Me
+   Fix obscure simulated CPU bug causing this:
+   -1: CMOVW     t34, t36  (-rOSZACP)
+   valgrind: vg_to_ucode.c:4197 (disInstr): Assertion `sane' failed.
+   (Spotted by Thorsten Schnebeck <thorsten.schnebeck@gmx.net>)
+
+Me
+   Add yet more suppressions to linux24.supp
+
+Me [subrev b only]
+   In the leak checker, use __builtin_{setjmp,longjmp} instead of the
+   glibc (pthread) versions.  This avoids a subtle memory corruption
+   problem caused by pthread_longjmp (or whatever it is called), which
+   finally manifests itself in this assertion failure:
+      vgPlain_primary_map[i] == & vg_distinguished_secondary_map
+   Many thanks to Michael Matz for coming up with an easy solution
+   which saved me hours of pissing around.
+
+----------------------------- 15 Mar 02 -------------------------------
+
+Stephan Kulow <coolo@kde.org>
+   Ioctls TIOCGPTN and TIOCSPTLCK
+
+Frank Zago <fzago@greshamstorage.com>
+   Various ioctls for the SCSI generic (sg) driver; these are now engaged.
+
+Harri Porten <porten@trolltech.com>
+   Fix "tiny omission" in debug outputs in vg_clientmalloc.c.
+
+Wolfram Gloger <wg@malloc.de>
+   mallopt returns 1 on success, not 0.
+   Implement __posix_memalign.
+
+Alasdair G Kergon <agk@arachsys.com>
+   Syscalls 
+   setfsgid (139), setregid(71), setresuid(164), setfsuid(138).
+   (argc,argv,envp) hack: reject candidate alignments giving argc==0.
+
+Malte Starostik <malte@kde.org>
+   Various ioctls, including some ISDN ones.
+
+Richard Moore <rich@xmelegance.org>
+   Syscalls mlockall (152), munlockall (153), sched_get_priority_max (159),
+   sched_getscheduler (156), NR_sched_setscheduler (157).
+   Ioctl SG_IO.
+   Print ioctl numbers in hex.
+
+Me
+   Syscall _sysctl (149).
+
+Aaron M. Ucko <amu@monk.mit.edu>
+   Use the size/dir info encoded in the top 16 bits of an ioctl number,
+   where appropriate.
+
+Dirk Mueller <dirk@kde.org>
+   Syscall setfsuid32 (215), and bazillions of ioctls, mostly linux
+   soundcard ones.
+
+Me
+   (following request from Dirk Mueller)
+   Rehash of context management system.  Now --num-callers= accepts 
+   values from 2 to 50.  Also --leak-resolution=low|med|high adjusts
+   how the leak detector presents results.  RTFM ...
+
+----------------------------- 15 Mar 02 second snapshot (0315b) -------
+
+Me
+   Allow suppression of invalid free  and  mismatching free errors.
+
+   Resurrect the internal profiling stuff (VG_PROFILE) and then fail
+   to find anything obvious that I could speed up :-(
+
+   Start playing with using AMD cache prefetch insns as described
+   in docs/techdocs.html.
+
+   Wrap ioctl SNDCTL_DSP_GETSPDIF in #ifdef; apparently undefined in
+   RH 7.2.
+
+----------------------------- 17 Mar 02 -------------------------------
+
+Me
+   New flag -q --quiet, so it just prints error messages and nothing
+   else
+
+   Experimental feature: allow clients to set/check memory range
+   permissions.  Read documentation pertaining to the --client-perms
+   flag.
diff --git a/README b/README
new file mode 100644
index 000000000..89208436e
--- /dev/null
+++ b/README
@@ -0,0 +1,106 @@
+
+Release notes for Valgrind, snapshot 20020217
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+KDE3 developers: please read also README_KDE3_FOLKS for guidance
+about how to debug KDE3 applications with Valgrind.
+
+For instructions on how to build/install, see the end of this file.
+
+
+Executive Summary
+~~~~~~~~~~~~~~~~~
+Valgrind is a tool to help you find memory-management problems in your
+programs. When a program is run under Valgrind's supervision, all
+reads and writes of memory are checked, and calls to
+malloc/new/free/delete are intercepted. As a result, Valgrind can
+detect problems such as:
+
+   Use of uninitialised memory 
+   Reading/writing memory after it has been free'd 
+   Reading/writing off the end of malloc'd blocks 
+   Reading/writing inappropriate areas on the stack 
+   Memory leaks -- where pointers to malloc'd blocks are lost forever
+   Passing of uninitialised and/or unaddressible memory to system calls
+   Mismatched use of malloc/new/new [] vs free/delete/delete [] 
+
+Problems like these can be difficult to find by other means, often
+lying undetected for long periods, then causing occasional,
+difficult-to-diagnose crashes.
+
+When Valgrind detects such a problem, it can, if you like, attach GDB
+to your program, so you can poke around and see what's going on.
+
+Valgrind is closely tied to details of the CPU, operating system and
+to a less extent, compiler and basic C libraries. This makes it
+difficult to make it portable, so I have chosen at the outset to
+concentrate on what I believe to be a widely used platform: Red Hat
+Linux 7.2, on x86s. I believe that it will work without significant
+difficulty on other x86 GNU/Linux systems which use the 2.4 kernel and
+GNU libc 2.2.X, for example SuSE 7.1 and Mandrake 8.0.  Red Hat 6.2 is
+also supported.  It has worked in the past, and probably still does,
+on RedHat 7.1 and 6.2.  Note that I haven't compiled it on RedHat 7.1
+and 6.2 for a while, so they may no longer work now.
+
+Valgrind is licensed under the GNU General Public License, version 2. 
+Read the file LICENSE in the source distribution for details.
+
+
+Documentation
+~~~~~~~~~~~~~
+A comprehensive user guide is supplied.  Point your browser at
+docs/index.html.  If your browser doesn't like frames, point it
+instead at docs/manual.html.
+
+
+Building and installing it
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+At the moment, very rudimentary. 
+
+This tarball is set up for a standard Red Hat 7.2 machine.  No
+configure script, no autoconf, no nothing.  Do the following:
+
+0.  Unpack the sources and cd into the source directory.
+
+1.  Do "make"
+
+2.  See if it works.  Try "./valgrind ls -l".  Either this works,
+    or it bombs out complaining it can't find argc/argv/envp.
+    If this happens, you'll have to futz around with 
+    vg_main.c:710 to vg_main.c:790 to try and find suitable offsets.
+    It's not hard; many have been successful here.
+
+Once step 2 is successful, you can now use valgrind.  Documentation
+is in docs/manual.html.  The following auxiliary steps may enhance
+your valgrinding experience, though.
+
+3.  Add enough suppressions to the default suppression file
+    (linux24.supp) so that
+       ./valgrind xedit
+    runs without generating any errors.  This means you've more
+    or less suppressed all the scummy errors from the X11 base
+    libraries and from glibc, which will make it easier to spot
+    genuine errors in your own code.
+
+    If you are using a machine with a 2.2 kernel (RedHat 6.2,
+    or Debian) you probably will want to edit the "valgrind"
+    shell script, to select the linux22.supp suppression file
+    (uncomment line 13; comment out line 14)
+
+If any kind soul would like to contribute a basic autoconf-style
+configuration/install mechanism (the usual autoconf ; ./configure ;
+make ; make install deal), that would be very much appreciated.  I
+will get round to it eventually, but there are only 24 hours in each
+day.
+
+If you want to install it somewhere other than the build directory:
+the files needed for installation are: valgrind.so, valgrinq.so,
+valgrind, VERSION, linux24.supp. You can copy these to any directory
+you like.
+
+I find it simplest to symlink <build_dir>/valgrind to somewhere
+else in my path, so I can use it in-place.  No need to "install"
+really.
+
+Julian Seward (jseward@acm.org)
+15 Feb 2002
diff --git a/README_KDE3_FOLKS b/README_KDE3_FOLKS
new file mode 100644
index 000000000..18f8e7d3e
--- /dev/null
+++ b/README_KDE3_FOLKS
@@ -0,0 +1,101 @@
+
+4 February 2002
+
+Greetings, KDE developer.  Some of you may have noticed, in recent
+days, that I have posted some bug reports to kde-core-devel@kde.org,
+containing traces like the following:
+
+  Use of uninitialised CPU condition code
+     at 0x471A4196: KateBuffer::parseBlock(KateBufBlock *) (katebuffer.cpp:446)
+     by 0x471A3B58: KateBuffer::line(unsigned int) (katebuffer.cpp:343)
+     by 0x471C684B: KateDocument::updateLines(int, int) 
+                       (../../kdecore/ksharedptr.h:126)
+     by 0x471C1C3E: KateDocument::makeAttribs() (katedocument.cpp:2302)
+
+These errors were detected using this tool, Valgrind.
+
+The purpose of this small doc is to guide you in using Valgrind to
+find and fix memory management bugs in KDE3.
+
+           ---------------------------------------------------
+
+Here's a getting-started-quickly checklist.  It might sound daunting,
+but once set up things work fairly well.
+
+
+* You need an x86 box running a Linux 2.4 kernel, with glibc-2.2.X and
+  XFree86 4.1.0.  In practice this means practically any recent,
+  mainstream Linux distro.  Valgrind is developed on a vanilla Red Hat
+  7.2 installation, so at least works ok there.  I imagine Mandrake 8
+  and SuSE 7.X would be ok too.  It has been known to work (and still
+  should) on Red Hat 7.1 and 6.2 too.
+
+
+* You need a reasonably fast machine, since programs run 25-100 x
+  slower on Valgrind.  I work with a 400 MHz AMD K6-III with 256 M of
+  memory.  Interactive programs like kate, konqueror, etc, are just
+  about usable, but a faster machine would be better.
+
+
+* You need at least 256M of memory for reasonable behaviour.  Valgrind
+  inflates the memory use of KDE apps approximately 4-5 x, so (eg) 
+  konqueror needs ~ 140M of memory to get started.  This is very bad;
+  I hope to improve it (the current memory manager has a design problem).
+
+
+* You need to compile the KDE to be debugged, using a decent gcc/g++:
+
+  - gcc 2.96-*, which comes with Red Hat 7.2, is buggy.  It sometimes
+    generates code with reads below %esp, even for simple functions.
+    This means you will be flooded with errors which are nothing to
+    do with your program.  As of 18 Feb 02, you can use the
+    --workaround-gcc296-bugs=yes flag to ignore them.  See the 
+    manual for details; this is not really a good solution.
+
+  - I recommend you use gcc/g++ 2.95.3.  It seems to compile
+    KDE without problems, and does not suffer from the above bug.  It's
+    what I have been using.
+
+  - gcc-3.0.X -- I have not really tried gcc 3.0.X very much with
+    Valgrind, but others seem to think it works ok.
+
+  It's ok to build Valgrind with the default gcc on Red Hat 7.2.
+
+
+* So: build valgrind -- see the README file.  (it's easy).
+
+* Build as much of KDE+Qt as you can with -g and without -O, for 
+  the usual reasons.
+
+* Use it!
+     /path/to/valgrind $KDEDIR/bin/kate 
+  (or whatever).
+
+* If you are debugging KDE apps, be prepared for the fact that
+  Valgrind finds bugs in the underlying Qt (qt-copy from CVS) too.
+
+* Please read the Valgrind manual, docs/index.html.  It contains 
+  considerable details about how to use it, what's really going on, 
+  etc.
+
+* The source locations in error messages can be way wrong sometimes;
+  please treat them with suspicion.  In particular, it will sometimes
+  say that a source location is in a header file (.h) when really it
+  is in some totally unrelated source (.cpp) file.  I'm working on it ...
+
+* There are some significant limitations:
+  - No threads!  You can run programs linked with libpthread.so,
+    but only until the point where they do clone(); at that point
+    Valgrind will abort.
+  - No MMX, SSE, SSE2 insns.  Basically a 486 instruction set only.
+  - Various other minor limitations listed in the manual.
+
+* Valgrind is still under active development.  If you have trouble
+  with it, please let me know (jseward@acm.org) and I'll see if I
+  can help you out.
+
+
+Have fun!  If you find Valgrind useful in finding and fixing bugs,
+I shall consider my efforts to have been worthwhile.
+
+Julian Seward (jseward@acm.org)
diff --git a/README_MISSING_SYSCALL_OR_IOCTL b/README_MISSING_SYSCALL_OR_IOCTL
new file mode 100644
index 000000000..4545f831d
--- /dev/null
+++ b/README_MISSING_SYSCALL_OR_IOCTL
@@ -0,0 +1,152 @@
+
+Dealing with missing system call or ioctl wrappers in Valgrind
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+You're probably reading this because Valgrind bombed out whilst
+running your program, and advised you to read this file.  The good
+news is that, in general, it's easy to write the missing syscall or
+ioctl wrappers you need, so that you can continue your debugging.  If
+you send the resulting patches to me, then you'll be doing a favour to
+all future Valgrind users too.
+
+Note that an "ioctl" is just a special kind of system call, really; so
+there's not a lot of need to distinguish them (at least conceptually)
+in the discussion that follows.
+
+All this machinery is in vg_syscall_mem.c.
+
+
+What are syscall/ioctl wrappers?  What do they do?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Valgrind does what it does, in part, by keeping track of the status of
+all bytes of memory accessible by your program.  When a system call
+happens, for example a request to read part of a file, control passes
+to the Linux kernel, which fulfills the request, and returns control
+to your program.  The problem is that the kernel will often change the
+status of some part of your program's memory as a result.
+
+The job of syscall and ioctl wrappers is to spot such system calls,
+and update Valgrind's memory status maps accordingly.  This is
+essential, because not doing so would cause you to be flooded with
+errors later on, and, in general, because it's important that
+Valgrind's idea of accessible memory corresponds to that of the Linux
+kernel's.  And for other reasons too.
+
+In addition, Valgrind takes the opportunity to perform some sanity
+checks on the parameters you are presenting to system calls.  This
+isn't essential for the correct operation of Valgrind, but it does
+allow it to warn you about various kinds of misuses which would
+otherwise mean your program just dies without warning, usually with a
+segmentation fault.
+
+So, let's look at an example of a wrapper for a system call which
+should be familiar to many Unix programmers.
+
+
+The syscall wrapper for read()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Removing the debug printing clutter, it looks like this:
+
+   case __NR_read: /* syscall 3 */
+      /* size_t read(int fd, void *buf, size_t count); */
+      must_be_writable( "read(buf)", arg2, arg3 );
+      KERNEL_DO_SYSCALL(res);
+      if (!VG_(is_kerror)(res) && res > 0) {
+         make_readable( arg2, res );
+      }
+      break;
+
+The first thing we do is check that the buffer, which you planned to
+have the result written to, really is addressible ("writable", here).
+Hence:
+
+      must_be_writable( "read(buf)", arg2, arg3 );
+
+which causes Valgrind to issue a warning if the address range 
+[arg2 .. arg2 + arg3 - 1] is not writable.  This is one of those
+nice-to-have-but-not-essential checks mentioned above.  Note that
+the syscall args are always called arg1, arg2, arg3, etc.  Here,
+arg1 corresponds to "fd" in the prototype, arg2 to "buf", and arg3 
+to "count".
+
+Now Valgrind asks the kernel to do the system call, depositing the
+return code in "res":
+
+      KERNEL_DO_SYSCALL(res);
+
+Finally, the really important bit.  If, and only if, the system call
+was successful, mark the buffer as readable (ie, as having valid
+data), for as many bytes as were actually read:
+
+      if (!VG_(is_kerror)(res) && res > 0) {
+         make_readable( arg2, res );
+      }
+
+The function VG_(is_kerror) tells you whether or not its argument
+represents a Linux kernel return error code.  Hence the test.
+
+
+Writing your own syscall wrappers (see below for ioctl wrappers)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If Valgrind tells you that system call NNN is unimplemented, do the 
+following:
+
+1.  Find out the name of the system call:
+
+       grep NNN /usr/include/asm/unistd.h
+
+    This should tell you something like  __NR_mysyscallname.
+
+
+2.  Do 'man 2 mysyscallname' to get some idea of what the syscall
+    does.
+
+
+3.  Add a case to the already-huge collection of wrappers in 
+    vg_syscall_mem.c.  For each in-memory parameter which is read
+    by the syscall, do a must_be_readable or must_be_readable_asciiz
+    on that parameter.  Then do the syscall.  Then, if the syscall
+    succeeds, issue suitable make_readable/writable/noaccess calls
+    afterwards, so as to update Valgrind's memory maps to reflect
+    the state change caused by the call.
+
+    If you find this difficult, read the wrappers for other syscalls
+    for ideas.  A good tip is to look for the wrapper for a syscall
+    which has a similar behaviour to yours, and use it as a 
+    starting point.
+
+    If you have to #include headers for structure definitions,
+    put your #includes into vg_unsafe.h.
+
+    Test it.
+
+    Note that a common error is to call make_readable or make_writable 
+    with 0 (NULL) as the first (address) argument.  This usually means your
+    logic is slightly inadequate.  It's a sufficiently common bug that
+    there's a built-in check for it, and you'll get a "probably sanity 
+    check failure" for the syscall wrapper you just made, if this is
+    the case.
+
+    Note that many syscalls are bracketed by #if defined(__NR_mysyscall)
+    ... #endif, because they exist only in the 2.4 kernel and not
+    the 2.2 kernel.  This enables the same piece of code to serve both
+    kernels.  Please try and stick to this convention.
+
+
+4.  Once happy, send me the patch.  Pretty please.
+
+
+
+
+Writing your own ioctl wrappers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Is pretty much the same as writing syscall wrappers.
+
+If you can't be bothered, do a cheap hack: add it (the ioctl number
+emitted in Valgrind's panic-message) to the long list of IOCTLs which
+are noted but not fully handled by Valgrind (search for the text
+"noted but unhandled ioctl" in vg_syscall_mem.c).  This will get you
+going immediately, at the risk of giving you spurious value errors.
+
+As above, please do send me the resulting patch.
+
+
diff --git a/TODO b/TODO
new file mode 100644
index 000000000..4aff04874
--- /dev/null
+++ b/TODO
@@ -0,0 +1,34 @@
+
+Doesn't run
+~~~~~~~~~~~
+Anything with the __NR_clone system call.  No idea what to do about
+threads yet.
+
+netscape bombs shortly after successful normal startup.
+(???)
+
+
+ToDo
+~~~~
+Rewrite memory manager, with variable sized red zones.
+
+
+Desirable
+~~~~~~~~~
+Demangle C++ names.
+
+Stack: make return address into NoAccess ?
+
+Give client programs a way to set memory range permissions.
+
+Have an --allow-scummy-symbols option, to allow in some of the symbols
+currently ignored in vg_symtab2.c.
+
+Figure out whether cwq (convert signed-32 to signed-64) as
+translated into ucode gives correct check semantics.
+(I think it does.)
+
+Check __NR_select; I don't really understand it.
+For that matter, __NR_newselect looks wrong too.
+
+
diff --git a/acconfig.h b/acconfig.h
new file mode 100644
index 000000000..068c69432
--- /dev/null
+++ b/acconfig.h
@@ -0,0 +1,5 @@
+#undef KERNEL_2_2
+#undef KERNEL_2_4
+
+#undef GLIBC_2_1
+#undef GLIBC_2_2
diff --git a/aclocal.m4 b/aclocal.m4
new file mode 100644
index 000000000..059a8d167
--- /dev/null
+++ b/aclocal.m4
@@ -0,0 +1,127 @@
+dnl aclocal.m4 generated automatically by aclocal 1.4-p4
+
+dnl Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+dnl This program is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+dnl even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+dnl PARTICULAR PURPOSE.
+
+# Like AC_CONFIG_HEADER, but automatically create stamp file.
+
+AC_DEFUN(AM_CONFIG_HEADER,
+[AC_PREREQ([2.12])
+AC_CONFIG_HEADER([$1])
+dnl When config.status generates a header, we must update the stamp-h file.
+dnl This file resides in the same directory as the config header
+dnl that is generated.  We must strip everything past the first ":",
+dnl and everything past the last "/".
+AC_OUTPUT_COMMANDS(changequote(<<,>>)dnl
+ifelse(patsubst(<<$1>>, <<[^ ]>>, <<>>), <<>>,
+<<test -z "<<$>>CONFIG_HEADERS" || echo timestamp > patsubst(<<$1>>, <<^\([^:]*/\)?.*>>, <<\1>>)stamp-h<<>>dnl>>,
+<<am_indx=1
+for am_file in <<$1>>; do
+  case " <<$>>CONFIG_HEADERS " in
+  *" <<$>>am_file "*<<)>>
+    echo timestamp > `echo <<$>>am_file | sed -e 's%:.*%%' -e 's%[^/]*$%%'`stamp-h$am_indx
+    ;;
+  esac
+  am_indx=`expr "<<$>>am_indx" + 1`
+done<<>>dnl>>)
+changequote([,]))])
+
+# Do all the work for Automake.  This macro actually does too much --
+# some checks are only needed if your package does certain things.
+# But this isn't really a big deal.
+
+# serial 1
+
+dnl Usage:
+dnl AM_INIT_AUTOMAKE(package,version, [no-define])
+
+AC_DEFUN(AM_INIT_AUTOMAKE,
+[AC_REQUIRE([AC_PROG_INSTALL])
+PACKAGE=[$1]
+AC_SUBST(PACKAGE)
+VERSION=[$2]
+AC_SUBST(VERSION)
+dnl test to see if srcdir already configured
+if test "`cd $srcdir && pwd`" != "`pwd`" && test -f $srcdir/config.status; then
+  AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
+fi
+ifelse([$3],,
+AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package])
+AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package]))
+AC_REQUIRE([AM_SANITY_CHECK])
+AC_REQUIRE([AC_ARG_PROGRAM])
+dnl FIXME This is truly gross.
+missing_dir=`cd $ac_aux_dir && pwd`
+AM_MISSING_PROG(ACLOCAL, aclocal, $missing_dir)
+AM_MISSING_PROG(AUTOCONF, autoconf, $missing_dir)
+AM_MISSING_PROG(AUTOMAKE, automake, $missing_dir)
+AM_MISSING_PROG(AUTOHEADER, autoheader, $missing_dir)
+AM_MISSING_PROG(MAKEINFO, makeinfo, $missing_dir)
+AC_REQUIRE([AC_PROG_MAKE_SET])])
+
+#
+# Check to make sure that the build environment is sane.
+#
+
+AC_DEFUN(AM_SANITY_CHECK,
+[AC_MSG_CHECKING([whether build environment is sane])
+# Just in case
+sleep 1
+echo timestamp > conftestfile
+# Do `set' in a subshell so we don't clobber the current shell's
+# arguments.  Must try -L first in case configure is actually a
+# symlink; some systems play weird games with the mod time of symlinks
+# (eg FreeBSD returns the mod time of the symlink's containing
+# directory).
+if (
+   set X `ls -Lt $srcdir/configure conftestfile 2> /dev/null`
+   if test "[$]*" = "X"; then
+      # -L didn't work.
+      set X `ls -t $srcdir/configure conftestfile`
+   fi
+   if test "[$]*" != "X $srcdir/configure conftestfile" \
+      && test "[$]*" != "X conftestfile $srcdir/configure"; then
+
+      # If neither matched, then we have a broken ls.  This can happen
+      # if, for instance, CONFIG_SHELL is bash and it inherits a
+      # broken ls alias from the environment.  This has actually
+      # happened.  Such a system could not be considered "sane".
+      AC_MSG_ERROR([ls -t appears to fail.  Make sure there is not a broken
+alias in your environment])
+   fi
+
+   test "[$]2" = conftestfile
+   )
+then
+   # Ok.
+   :
+else
+   AC_MSG_ERROR([newly created file is older than distributed files!
+Check your system clock])
+fi
+rm -f conftest*
+AC_MSG_RESULT(yes)])
+
+dnl AM_MISSING_PROG(NAME, PROGRAM, DIRECTORY)
+dnl The program must properly implement --version.
+AC_DEFUN(AM_MISSING_PROG,
+[AC_MSG_CHECKING(for working $2)
+# Run test in a subshell; some versions of sh will print an error if
+# an executable is not found, even if stderr is redirected.
+# Redirect stdin to placate older versions of autoconf.  Sigh.
+if ($2 --version) < /dev/null > /dev/null 2>&1; then
+   $1=$2
+   AC_MSG_RESULT(found)
+else
+   $1="$3/missing $2"
+   AC_MSG_RESULT(missing)
+fi
+AC_SUBST($1)])
+
diff --git a/addrcheck/Makefile.am b/addrcheck/Makefile.am
new file mode 100644
index 000000000..00387927b
--- /dev/null
+++ b/addrcheck/Makefile.am
@@ -0,0 +1,80 @@
+SUBDIRS = demangle . docs tests
+
+valdir = $(libdir)/valgrind
+
+LDFLAGS = -Wl,-z -Wl,initfirst
+
+INCLUDES += -I$(srcdir)/demangle
+
+bin_SCRIPTS = valgrind
+
+val_DATA = linux22.supp linux24.supp
+
+EXTRA_DIST = $(val_DATA) \
+	PATCHES_APPLIED ACKNOWLEDGEMENTS \
+	README_KDE3_FOLKS \
+	README_MISSING_SYSCALL_OR_IOCTL TODO
+
+val_PROGRAMS = valgrind.so valgrinq.so
+
+valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+
+valgrind_so_SOURCES = \
+	vg_clientmalloc.c \
+	vg_clientperms.c \
+	vg_demangle.c \
+	vg_dispatch.S \
+	vg_errcontext.c \
+	vg_execontext.c \
+	vg_from_ucode.c \
+	vg_helpers.S \
+	vg_main.c \
+	vg_malloc2.c \
+	vg_memory.c \
+	vg_messages.c \
+	vg_mylibc.c \
+	vg_procselfmaps.c \
+	vg_profile.c \
+	vg_signals.c \
+	vg_startup.S \
+	vg_symtab2.c \
+	vg_syscall_mem.c \
+	vg_syscall.S \
+	vg_to_ucode.c \
+	vg_translate.c \
+	vg_transtab.c \
+	vg_valgrinq_dummy.c \
+	vg_vtagops.c
+
+valgrind_so_LDADD = \
+	demangle/cp-demangle.o \
+	demangle/cplus-dem.o \
+	demangle/dyn-string.o \
+	demangle/safe-ctype.o
+
+include_HEADERS = valgrind.h
+
+noinst_HEADERS = \
+        vg_kerneliface.h        \
+        vg_include.h            \
+        vg_version.h            \
+        vg_constants.h          \
+        vg_unsafe.h
+
+
+install-data-hook:
+	cd ${valdir} && rm -f default.supp && $(LN_S) $(DEFAULT_SUPP) default.supp
+
+vg_memory.o:
+	$(COMPILE) -O2 -mpreferred-stack-boundary=2 -c $<
+
+vg_clientmalloc.o:
+	$(COMPILE) -fno-omit-frame-pointer -c $<
+
+
+valgrind.so: $(valgrind_so_OBJECTS)
+	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+	$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+
+valgrinq.so: $(valgrinq_so_OBJECTS)
+	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
diff --git a/cachegrind/Makefile.am b/cachegrind/Makefile.am
new file mode 100644
index 000000000..00387927b
--- /dev/null
+++ b/cachegrind/Makefile.am
@@ -0,0 +1,80 @@
+SUBDIRS = demangle . docs tests
+
+valdir = $(libdir)/valgrind
+
+LDFLAGS = -Wl,-z -Wl,initfirst
+
+INCLUDES += -I$(srcdir)/demangle
+
+bin_SCRIPTS = valgrind
+
+val_DATA = linux22.supp linux24.supp
+
+EXTRA_DIST = $(val_DATA) \
+	PATCHES_APPLIED ACKNOWLEDGEMENTS \
+	README_KDE3_FOLKS \
+	README_MISSING_SYSCALL_OR_IOCTL TODO
+
+val_PROGRAMS = valgrind.so valgrinq.so
+
+valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+
+valgrind_so_SOURCES = \
+	vg_clientmalloc.c \
+	vg_clientperms.c \
+	vg_demangle.c \
+	vg_dispatch.S \
+	vg_errcontext.c \
+	vg_execontext.c \
+	vg_from_ucode.c \
+	vg_helpers.S \
+	vg_main.c \
+	vg_malloc2.c \
+	vg_memory.c \
+	vg_messages.c \
+	vg_mylibc.c \
+	vg_procselfmaps.c \
+	vg_profile.c \
+	vg_signals.c \
+	vg_startup.S \
+	vg_symtab2.c \
+	vg_syscall_mem.c \
+	vg_syscall.S \
+	vg_to_ucode.c \
+	vg_translate.c \
+	vg_transtab.c \
+	vg_valgrinq_dummy.c \
+	vg_vtagops.c
+
+valgrind_so_LDADD = \
+	demangle/cp-demangle.o \
+	demangle/cplus-dem.o \
+	demangle/dyn-string.o \
+	demangle/safe-ctype.o
+
+include_HEADERS = valgrind.h
+
+noinst_HEADERS = \
+        vg_kerneliface.h        \
+        vg_include.h            \
+        vg_version.h            \
+        vg_constants.h          \
+        vg_unsafe.h
+
+
+install-data-hook:
+	cd ${valdir} && rm -f default.supp && $(LN_S) $(DEFAULT_SUPP) default.supp
+
+vg_memory.o:
+	$(COMPILE) -O2 -mpreferred-stack-boundary=2 -c $<
+
+vg_clientmalloc.o:
+	$(COMPILE) -fno-omit-frame-pointer -c $<
+
+
+valgrind.so: $(valgrind_so_OBJECTS)
+	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+	$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+
+valgrinq.so: $(valgrinq_so_OBJECTS)
+	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
diff --git a/cachegrind/docs/Makefile.am b/cachegrind/docs/Makefile.am
new file mode 100644
index 000000000..e8a58fa18
--- /dev/null
+++ b/cachegrind/docs/Makefile.am
@@ -0,0 +1,5 @@
+docdir = $(datadir)/doc/valgrind
+
+doc_DATA = index.html manual.html nav.html techdocs.html
+
+EXTRA_DIST = $(doc_DATA)
diff --git a/cachegrind/docs/index.html b/cachegrind/docs/index.html
new file mode 100644
index 000000000..111170256
--- /dev/null
+++ b/cachegrind/docs/index.html
@@ -0,0 +1,26 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<html>
+
+<head>
+  <meta http-equiv="Content-Type"     
+        content="text/html; charset=iso-8859-1">
+  <meta http-equiv="Content-Language" content="en-gb">
+  <meta name="generator" 
+        content="Mozilla/4.76 (X11; U; Linux 2.4.1-0.1.9 i586) [Netscape]">
+  <meta name="author" content="Julian Seward <jseward@acm.org>">
+  <meta name="description" content="say what this prog does">
+  <meta name="keywords" content="Valgrind, memory checker, x86, GPL">
+  <title>Valgrind's user manual</title>
+</head>
+
+<frameset cols="150,*">
+  <frame name="nav" target="main" src="nav.html">
+  <frame name="main" src="manual.html" scrolling="auto">
+  <noframes>
+    <body>
+     <p>This page uses frames, but your browser doesn't support them.</p>
+    </body>
+  </noframes>
+</frameset>
+
+</html>
diff --git a/cachegrind/docs/manual.html b/cachegrind/docs/manual.html
new file mode 100644
index 000000000..1bcd02a81
--- /dev/null
+++ b/cachegrind/docs/manual.html
@@ -0,0 +1,1753 @@
+<html>
+  <head>
+    <style type="text/css">
+      body      { background-color: #ffffff;
+                  color:            #000000;
+                  font-family:      Times, Helvetica, Arial;
+                  font-size:        14pt}
+      h4        { margin-bottom:    0.3em}
+      code      { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      pre       { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      a:link    { color:            #0000C0;
+                  text-decoration:  none; }
+      a:visited { color:            #0000C0; 
+                  text-decoration:  none; }
+      a:active  { color:            #0000C0;
+                  text-decoration:  none; }
+    </style>
+  </head>
+
+<body bgcolor="#ffffff">
+
+<a name="title">&nbsp;</a>
+<h1 align=center>Valgrind, snapshot 20020317</h1>
+
+<center>
+<a href="mailto:jseward@acm.org">jseward@acm.org<br>
+<a href="http://www.muraroa.demon.co.uk">http://www.muraroa.demon.co.uk</a><br>
+Copyright &copy; 2000-2002 Julian Seward
+<p>
+Valgrind is licensed under the GNU General Public License, 
+version 2<br>
+An open-source tool for finding memory-management problems in
+Linux-x86 executables.
+</center>
+
+<p>
+
+<hr width="100%">
+<a name="contents"></a>
+<h2>Contents of this manual</h2>
+
+<h4>1&nbsp; <a href="#intro">Introduction</a></h4>
+    1.1&nbsp; <a href="#whatfor">What Valgrind is for</a><br>
+    1.2&nbsp; <a href="#whatdoes">What it does with your program</a>
+
+<h4>2&nbsp; <a href="#howtouse">How to use it, and how to make sense 
+    of the results</a></h4>
+    2.1&nbsp; <a href="#starta">Getting started</a><br>
+    2.2&nbsp; <a href="#comment">The commentary</a><br>
+    2.3&nbsp; <a href="#report">Reporting of errors</a><br>
+    2.4&nbsp; <a href="#suppress">Suppressing errors</a><br>
+    2.5&nbsp; <a href="#flags">Command-line flags</a><br>
+    2.6&nbsp; <a href="#errormsgs">Explaination of error messages</a><br>
+    2.7&nbsp; <a href="#suppfiles">Writing suppressions files</a><br>
+    2.8&nbsp; <a href="#install">Building and installing</a><br>
+    2.9&nbsp; <a href="#problems">If you have problems</a><br>
+
+<h4>3&nbsp; <a href="#machine">Details of the checking machinery</a></h4>
+    3.1&nbsp; <a href="#vvalue">Valid-value (V) bits</a><br>
+    3.2&nbsp; <a href="#vaddress">Valid-address (A)&nbsp;bits</a><br>
+    3.3&nbsp; <a href="#together">Putting it all together</a><br>
+    3.4&nbsp; <a href="#signals">Signals</a><br>
+    3.5&nbsp; <a href="#leaks">Memory leak detection</a><br>
+
+<h4>4&nbsp; <a href="#limits">Limitations</a></h4>
+
+<h4>5&nbsp; <a href="#howitworks">How it works -- a rough overview</a></h4>
+    5.1&nbsp; <a href="#startb">Getting started</a><br>
+    5.2&nbsp; <a href="#engine">The translation/instrumentation engine</a><br>
+    5.3&nbsp; <a href="#track">Tracking the status of memory</a><br>
+    5.4&nbsp; <a href="#sys_calls">System calls</a><br>
+    5.5&nbsp; <a href="#sys_signals">Signals</a><br>
+
+<h4>6&nbsp; <a href="#example">An example</a></h4>
+
+<h4>7&nbsp; <a href="techdocs.html">The design and implementation of Valgrind</a></h4>
+
+<hr width="100%">
+
+<a name="intro"></a>
+<h2>1&nbsp; Introduction</h2>
+
+<a name="whatfor"></a>
+<h3>1.1&nbsp; What Valgrind is for</h3>
+
+Valgrind is a tool to help you find memory-management problems in your
+programs. When a program is run under Valgrind's supervision, all
+reads and writes of memory are checked, and calls to
+malloc/new/free/delete are intercepted. As a result, Valgrind can
+detect problems such as:
+<ul>
+  <li>Use of uninitialised memory</li>
+  <li>Reading/writing memory after it has been free'd</li>
+  <li>Reading/writing off the end of malloc'd blocks</li>
+  <li>Reading/writing inappropriate areas on the stack</li>
+  <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+</ul>
+
+Problems like these can be difficult to find by other means, often
+lying undetected for long periods, then causing occasional,
+difficult-to-diagnose crashes.
+
+<p>
+Valgrind is closely tied to details of the CPU, operating system and
+to a less extent, compiler and basic C libraries. This makes it
+difficult to make it portable, so I have chosen at the outset to
+concentrate on what I believe to be a widely used platform: Red Hat
+Linux 7.2, on x86s. I believe that it will work without significant
+difficulty on other x86 GNU/Linux systems which use the 2.4 kernel and
+GNU libc 2.2.X, for example SuSE 7.1 and Mandrake 8.0.  Red Hat 6.2 is
+also supported.  It has worked in the past, and probably still does,
+on RedHat 7.1 and 6.2.  Note that I haven't compiled it on RedHat 7.1
+and 6.2 for a while, so they may no longer work now.
+<p>
+(Early Feb 02: after feedback from the KDE people it also works better
+on other Linuxes).
+<p>
+At some point in the past, Valgrind has also worked on Red Hat 6.2
+(x86), thanks to the efforts of Rob Noble.
+
+<p>
+Valgrind is licensed under the GNU General Public License, version
+2. Read the file LICENSE in the source distribution for details.
+
+<a name="whatdoes">
+<h3>1.2&nbsp; What it does with your program</h3>
+
+Valgrind is designed to be as non-intrusive as possible. It works
+directly with existing executables. You don't need to recompile,
+relink, or otherwise modify, the program to be checked. Simply place
+the word <code>valgrind</code> at the start of the command line
+normally used to run the program. So, for example, if you want to run
+the command <code>ls -l</code> on Valgrind, simply issue the
+command: <code>valgrind ls -l</code>.
+
+<p>Valgrind takes control of your program before it starts. Debugging
+information is read from the executable and associated libraries, so
+that error messages can be phrased in terms of source code
+locations. Your program is then run on a synthetic x86 CPU which
+checks every memory access. All detected errors are written to a
+log. When the program finishes, Valgrind searches for and reports on
+leaked memory.
+
+<p>You can run pretty much any dynamically linked ELF x86 executable using
+Valgrind. Programs run 25 to 50 times slower, and take a lot more
+memory, than they usually would. It works well enough to run large
+programs. For example, the Konqueror web browser from the KDE Desktop
+Environment, version 2.1.1, runs slowly but usably on Valgrind.
+
+<p>Valgrind simulates every single instruction your program executes.
+Because of this, it finds errors not only in your application but also
+in all supporting dynamically-linked (.so-format) libraries, including
+the GNU C library, the X client libraries, Qt, if you work with KDE, and
+so on. That often includes libraries, for example the GNU C library,
+which contain memory access violations, but which you cannot or do not
+want to fix.
+
+<p>Rather than swamping you with errors in which you are not
+interested, Valgrind allows you to selectively suppress errors, by
+recording them in a suppressions file which is read when Valgrind
+starts up. As supplied, Valgrind comes with a suppressions file
+designed to give reasonable behaviour on Red Hat 7.2 (also 7.1 and
+6.2) when running text-only and simple X applications.
+
+<p><a href="#example">Section 6</a> shows an example of use.
+<p>
+<hr width="100%">
+
+<a name="howtouse"></a>
+<h2>2&nbsp; How to use it, and how to make sense of the results</h2>
+
+<a name="starta"></a>
+<h3>2.1&nbsp; Getting started</h3>
+
+First off, consider whether it might be beneficial to recompile your
+application and supporting libraries with optimisation disabled and
+debugging info enabled (the <code>-g</code> flag).  You don't have to
+do this, but doing so helps Valgrind produce more accurate and less
+confusing error reports.  Chances are you're set up like this already,
+if you intended to debug your program with GNU gdb, or some other
+debugger.
+
+<p>Then just run your application, but place the word
+<code>valgrind</code> in front of your usual command-line invokation.
+Note that you should run the real (machine-code) executable here.  If
+your application is started by, for example, a shell or perl script,
+you'll need to modify it to invoke Valgrind on the real executables.
+Running such scripts directly under Valgrind will result in you
+getting error reports pertaining to <code>/bin/sh</code>,
+<code>/usr/bin/perl</code>, or whatever interpreter you're using.
+This almost certainly isn't what you want and can be hugely confusing.
+
+<a name="comment"></a>
+<h3>2.2&nbsp; The commentary</h3>
+
+Valgrind writes a commentary, detailing error reports and other
+significant events.  The commentary goes to standard output by
+default.  This may interfere with your program, so you can ask for it
+to be directed elsewhere.
+
+<p>All lines in the commentary are of the following form:<br>
+<pre>
+  ==12345== some-message-from-Valgrind
+</pre>
+<p>The <code>12345</code>  is the process ID.  This scheme makes it easy
+to distinguish program output from Valgrind commentary, and also easy
+to differentiate commentaries from different processes which have
+become merged together, for whatever reason.
+
+<p>By default, Valgrind writes only essential messages to the commentary,
+so as to avoid flooding you with information of secondary importance.
+If you want more information about what is happening, re-run, passing
+the <code>-v</code> flag to Valgrind.
+
+
+<a name="report"></a>
+<h3>2.3&nbsp; Reporting of errors</h3>
+
+When Valgrind detects something bad happening in the program, an error
+message is written to the commentary.  For example:<br>
+<pre>
+  ==25832== Invalid read of size 4
+  ==25832==    at 0x8048724: BandMatrix::ReSize(int, int, int) (bogon.cpp:45)
+  ==25832==    by 0x80487AF: main (bogon.cpp:66)
+  ==25832==    by 0x40371E5E: __libc_start_main (libc-start.c:129)
+  ==25832==    by 0x80485D1: (within /home/sewardj/newmat10/bogon)
+  ==25832==    Address 0xBFFFF74C is not stack'd, malloc'd or free'd
+</pre>
+
+<p>This message says that the program did an illegal 4-byte read of
+address 0xBFFFF74C, which, as far as it can tell, is not a valid stack
+address, nor corresponds to any currently malloc'd or free'd blocks.
+The read is happening at line 45 of <code>bogon.cpp</code>, called
+from line 66 of the same file, etc.  For errors associated with an
+identified malloc'd/free'd block, for example reading free'd memory,
+Valgrind reports not only the location where the error happened, but
+also where the associated block was malloc'd/free'd.
+
+<p>Valgrind remembers all error reports.  When an error is detected,
+it is compared against old reports, to see if it is a duplicate.  If
+so, the error is noted, but no further commentary is emitted.  This
+avoids you being swamped with bazillions of duplicate error reports.
+
+<p>If you want to know how many times each error occurred, run with
+the <code>-v</code> option.  When execution finishes, all the reports
+are printed out, along with, and sorted by, their occurrence counts.
+This makes it easy to see which errors have occurred most frequently.
+
+<p>Errors are reported before the associated operation actually
+happens.  For example, if you program decides to read from address
+zero, Valgrind will emit a message to this effect, and the program
+will then duly die with a segmentation fault.
+
+<p>In general, you should try and fix errors in the order that they
+are reported.  Not doing so can be confusing.  For example, a program
+which copies uninitialised values to several memory locations, and
+later uses them, will generate several error messages.  The first such
+error message may well give the most direct clue to the root cause of
+the problem.
+
+<a name="suppress"></a>
+<h3>2.4&nbsp; Suppressing errors</h3>
+
+Valgrind detects numerous problems in the base libraries, such as the
+GNU C library, and the XFree86 client libraries, which come
+pre-installed on your GNU/Linux system.  You can't easily fix these,
+but you don't want to see these errors (and yes, there are many!)  So
+Valgrind reads a list of errors to suppress at startup.  By default
+this file is <code>redhat72.supp</code>, located in the Valgrind
+installation directory.  
+
+<p>You can modify and add to the suppressions file at your leisure, or
+write your own.  Multiple suppression files are allowed.  This is
+useful if part of your project contains errors you can't or don't want
+to fix, yet you don't want to continuously be reminded of them.
+
+<p>Each error to be suppressed is described very specifically, to
+minimise the possibility that a suppression-directive inadvertantly
+suppresses a bunch of similar errors which you did want to see.  The
+suppression mechanism is designed to allow precise yet flexible
+specification of errors to suppress.
+
+<p>If you use the <code>-v</code> flag, at the end of execution, Valgrind
+prints out one line for each used suppression, giving its name and the
+number of times it got used.  Here's the suppressions used by a run of
+<code>ls -l</code>:
+<pre>
+  --27579-- supp: 1 socketcall.connect(serv_addr)/__libc_connect/__nscd_getgrgid_r
+  --27579-- supp: 1 socketcall.connect(serv_addr)/__libc_connect/__nscd_getpwuid_r
+  --27579-- supp: 6 strrchr/_dl_map_object_from_fd/_dl_map_object
+</pre>
+
+<a name="flags"></a>
+<h3>2.5&nbsp; Command-line flags</h3>
+
+You invoke Valgrind like this:
+<pre>
+  valgrind [options-for-Valgrind] your-prog [options for your-prog]
+</pre>
+
+<p>Valgrind's default settings succeed in giving reasonable behaviour
+in most cases.  Available options, in no particular order, are as
+follows:
+<ul>
+  <li><code>--help</code></li><br>
+
+  <li><code>--version</code><br>
+      <p>The usual deal.</li><br><p>
+
+  <li><code>-v --verbose</code><br>
+      <p>Be more verbose.  Gives extra information on various aspects
+      of your program, such as: the shared objects loaded, the
+      suppressions used, the progress of the instrumentation engine,
+      and warnings about unusual behaviour.
+      </li><br><p>
+
+  <li><code>-q --quiet</code><br>
+      <p>Run silently, and only print error messages.  Useful if you
+      are running regression tests or have some other automated test
+      machinery.
+      </li><br><p>
+
+  <li><code>--demangle=no</code><br>
+      <code>--demangle=yes</code> [the default]
+      <p>Disable/enable automatic demangling (decoding) of C++ names.
+      Enabled by default.  When enabled, Valgrind will attempt to
+      translate encoded C++ procedure names back to something
+      approaching the original.  The demangler handles symbols mangled
+      by g++ versions 2.X and 3.X.
+
+      <p>An important fact about demangling is that function
+      names mentioned in suppressions files should be in their mangled
+      form.  Valgrind does not demangle function names when searching
+      for applicable suppressions, because to do otherwise would make
+      suppressions file contents dependent on the state of Valgrind's
+      demangling machinery, and would also be slow and pointless.
+      </li><br><p>
+
+  <li><code>--num-callers=&lt;number&gt;</code> [default=4]<br>
+      <p>By default, Valgrind shows four levels of function call names
+      to help you identify program locations.  You can change that
+      number with this option.  This can help in determining the
+      program's location in deeply-nested call chains.  Note that errors
+      are commoned up using only the top three function locations (the
+      place in the current function, and that of its two immediate
+      callers).  So this doesn't affect the total number of errors
+      reported.  
+      <p>
+      The maximum value for this is 50.  Note that higher settings
+      will make Valgrind run a bit more slowly and take a bit more
+      memory, but can be useful when working with programs with
+      deeply-nested call chains.  
+      </li><br><p>
+
+  <li><code>--gdb-attach=no</code> [the default]<br>
+      <code>--gdb-attach=yes</code>
+      <p>When enabled, Valgrind will pause after every error shown,
+      and print the line
+      <br>
+      <code>---- Attach to GDB ? --- [Return/N/n/Y/y/C/c] ----</code>
+      <p>
+      Pressing <code>Ret</code>, or <code>N</code> <code>Ret</code>
+      or <code>n</code> <code>Ret</code>, causes Valgrind not to
+      start GDB for this error.
+      <p>
+      <code>Y</code> <code>Ret</code>
+      or <code>y</code> <code>Ret</code> causes Valgrind to
+      start GDB, for the program at this point.  When you have
+      finished with GDB, quit from it, and the program will continue.
+      Trying to continue from inside GDB doesn't work.
+      <p>
+      <code>C</code> <code>Ret</code>
+      or <code>c</code> <code>Ret</code> causes Valgrind not to
+      start GDB, and not to ask again.
+      <p>
+      <code>--gdb-attach=yes</code> conflicts with 
+      <code>--trace-children=yes</code>.  You can't use them
+      together.  Valgrind refuses to start up in this situation.
+      </li><br><p>
+     
+  <li><code>--partial-loads-ok=yes</code> [the default]<br>
+      <code>--partial-loads-ok=no</code>
+      <p>Controls how Valgrind handles word (4-byte) loads from
+      addresses for which some bytes are addressible and others
+      are not.  When <code>yes</code> (the default), such loads
+      do not elicit an address error.  Instead, the loaded V bytes
+      corresponding to the illegal addresses indicate undefined, and
+      those corresponding to legal addresses are loaded from shadow 
+      memory, as usual.
+      <p>
+      When <code>no</code>, loads from partially
+      invalid addresses are treated the same as loads from completely
+      invalid addresses: an illegal-address error is issued,
+      and the resulting V bytes indicate valid data.
+      </li><br><p>
+
+  <li><code>--sloppy-malloc=no</code> [the default]<br>
+      <code>--sloppy-malloc=yes</code>
+      <p>When enabled, all requests for malloc/calloc are rounded up
+      to a whole number of machine words -- in other words, made
+      divisible by 4.  For example, a request for 17 bytes of space
+      would result in a 20-byte area being made available.  This works
+      around bugs in sloppy libraries which assume that they can
+      safely rely on malloc/calloc requests being rounded up in this
+      fashion.  Without the workaround, these libraries tend to
+      generate large numbers of errors when they access the ends of
+      these areas.  Valgrind snapshots dated 17 Feb 2002 and later are
+      cleverer about this problem, and you should no longer need to 
+      use this flag.
+      </li><br><p>
+
+  <li><code>--trace-children=no</code> [the default]</br>
+      <code>--trace-children=yes</code>
+      <p>When enabled, Valgrind will trace into child processes.  This
+      is confusing and usually not what you want, so is disabled by
+      default.</li><br><p>
+
+  <li><code>--freelist-vol=&lt;number></code> [default: 1000000]
+      <p>When the client program releases memory using free (in C) or
+      delete (C++), that memory is not immediately made available for
+      re-allocation.  Instead it is marked inaccessible and placed in
+      a queue of freed blocks.  The purpose is to delay the point at
+      which freed-up memory comes back into circulation.  This
+      increases the chance that Valgrind will be able to detect
+      invalid accesses to blocks for some significant period of time
+      after they have been freed.  
+      <p>
+      This flag specifies the maximum total size, in bytes, of the
+      blocks in the queue.  The default value is one million bytes.
+      Increasing this increases the total amount of memory used by
+      Valgrind but may detect invalid uses of freed blocks which would
+      otherwise go undetected.</li><br><p>
+
+  <li><code>--logfile-fd=&lt;number></code> [default: 2, stderr]
+      <p>Specifies the file descriptor on which Valgrind communicates
+      all of its messages.  The default, 2, is the standard error
+      channel.  This may interfere with the client's own use of
+      stderr.  To dump Valgrind's commentary in a file without using
+      stderr, something like the following works well (sh/bash
+      syntax):<br>
+      <code>&nbsp;&nbsp;
+            valgrind --logfile-fd=9 my_prog 9> logfile</code><br>
+      That is: tell Valgrind to send all output to file descriptor 9,
+      and ask the shell to route file descriptor 9 to "logfile".
+      </li><br><p>
+
+  <li><code>--suppressions=&lt;filename></code> [default:
+      /installation/directory/redhat72.supp] <p>Specifies an extra
+      file from which to read descriptions of errors to suppress.  You
+      may use as many extra suppressions files as you
+      like.</li><br><p>
+
+  <li><code>--leak-check=no</code> [default]<br>
+      <code>--leak-check=yes</code>
+      <p>When enabled, search for memory leaks when the client program
+      finishes.  A memory leak means a malloc'd block, which has not
+      yet been free'd, but to which no pointer can be found.  Such a
+      block can never be free'd by the program, since no pointer to it
+      exists.  Leak checking is disabled by default
+      because it tends to generate dozens of error messages.
+      </li><br><p>
+
+  <li><code>--show-reachable=no</code> [default]<br>
+      <code>--show-reachable=yes</code> <p>When disabled, the memory
+      leak detector only shows blocks for which it cannot find a
+      pointer to at all, or it can only find a pointer to the middle
+      of.  These blocks are prime candidates for memory leaks.  When
+      enabled, the leak detector also reports on blocks which it could
+      find a pointer to.  Your program could, at least in principle,
+      have freed such blocks before exit.  Contrast this to blocks for
+      which no pointer, or only an interior pointer could be found:
+      they are more likely to indicate memory leaks, because
+      you do not actually have a pointer to the start of the block
+      which you can hand to free(), even if you wanted to.
+      </li><br><p>
+
+  <li><code>--leak-resolution=low</code> [default]<br>
+      <code>--leak-resolution=med</code> <br>
+      <code>--leak-resolution=high</code>
+      <p>When doing leak checking, determines how willing Valgrind is
+      to consider different backtraces the same.  When set to
+      <code>low</code>, the default, only the first two entries need
+      match.  When <code>med</code>, four entries have to match.  When
+      <code>high</code>, all entries need to match.  
+      <p>
+      For hardcore leak debugging, you probably want to use
+      <code>--leak-resolution=high</code> together with 
+      <code>--num-callers=40</code> or some such large number.  Note
+      however that this can give an overwhelming amount of
+      information, which is why the defaults are 4 callers and
+      low-resolution matching.
+      <p>
+      Note that the <code>--leak-resolution=</code> setting does not
+      affect Valgrind's ability to find leaks.  It only changes how
+      the results are presented to you.
+      </li><br><p>
+
+  <li><code>--workaround-gcc296-bugs=no</code> [default]<br>
+      <code>--workaround-gcc296-bugs=yes</code> <p>When enabled,
+      assume that reads and writes some small distance below the stack
+      pointer <code>%esp</code> are due to bugs in gcc 2.96, and does
+      not report them.  The "small distance" is 256 bytes by default.
+      Note that gcc 2.96 is the default compiler on some popular Linux
+      distributions (RedHat 7.X, Mandrake) and so you may well need to
+      use this flag.  Do not use it if you do not have to, as it can
+      cause real errors to be overlooked.  A better option is to use a
+      gcc/g++ which works properly; 2.95.3 seems to be a good choice.
+      <p>
+      Unfortunately (27 Feb 02) it looks like g++ 3.0.4 is similarly
+      buggy, so you may need to issue this flag if you use 3.0.4.
+      </li><br><p>
+
+  <li><code>--client-perms=no</code> [default]<br>
+      <code>--client-perms=yes</code> <p>An experimental feature.
+      <p>
+      When enabled, and when <code>--instrument=yes</code> (which is
+      the default), Valgrind honours client directives to set and
+      query address range permissions.  This allows the client program
+      to tell Valgrind about changes in memory range permissions that
+      Valgrind would not otherwise know about, and so allows clients
+      to get Valgrind to do arbitrary custom checks.
+      <p>
+      Clients need to include the header file <code>valgrind.h</code>
+      to make this work.  The macros therein have the magical property
+      that they generate code in-line which Valgrind can spot.
+      However, the code does nothing when not run on Valgrind, so you
+      are not forced to run your program on Valgrind just because you
+      use the macros in this file.
+      <p>
+      A brief description of the available macros:
+      <ul>
+      <li><code>VALGRIND_MAKE_NOACCESS</code>,
+          <code>VALGRIND_MAKE_WRITABLE</code> and
+          <code>VALGRIND_MAKE_READABLE</code>.  These mark address
+          ranges as completely inaccessible, accessible but containing
+          undefined data, and accessible and containing defined data,
+          respectively.  Subsequent errors may have their faulting
+          addresses described in terms of these blocks.  Returns a
+          "block handle".
+      <p>
+      <li><code>VALGRIND_DISCARD</code>: At some point you may want
+          Valgrind to stop reporting errors in terms of the blocks
+          defined by the previous three macros.  To do this, the above
+          macros return a small-integer "block handle".  You can pass
+          this block handle to <code>VALGRIND_DISCARD</code>.  After
+          doing so, Valgrind will no longer be able to relate
+          addressing errors to the user-defined block associated with
+          the handle.  The permissions settings associated with the
+          handle remain in place; this just affects how errors are
+          reported, not whether they are reported.  Returns 1 for an
+          invalid handle and 0 for a valid handle (although passing
+          invalid handles is harmless).
+      <p>
+      <li><code>VALGRIND_CHECK_NOACCESS</code>,
+          <code>VALGRIND_CHECK_WRITABLE</code> and
+          <code>VALGRIND_CHECK_READABLE</code>: check immediately
+          whether or not the given address range has the relevant
+          property, and if not, print an error message.  Also, for the
+          convenience of the client, returns zero if the relevant
+          property holds; otherwise, the returned value is the address
+          of the first byte for which the property is not true.
+      <p>
+      <li><code>VALGRIND_CHECK_NOACCESS</code>: a quick and easy way
+          to find out whether Valgrind thinks a particular variable
+          (lvalue, to be precise) is addressible and defined.  Prints
+          an error message if not.  Returns no value.
+      <p>
+      <li><code>VALGRIND_MAKE_NOACCESS_STACK</code>: a highly
+          experimental feature.  Similarly to
+          <code>VALGRIND_MAKE_NOACCESS</code>, this marks an address
+          range as inaccessible, so that subsequent accesses to an
+          address in the range gives an error.  However, this macro
+          does not return a block handle.  Instead, all annotations
+          created like this are reviewed at each client
+          <code>ret</code> (subroutine return) instruction, and those
+          which now define an address range block the client's stack
+          pointer register (<code>%esp</code>) are automatically
+          deleted.
+          <p>
+          In other words, this macro allows the client to tell
+          Valgrind about red-zones on its own stack.  Valgrind
+          automatically discards this information when the stack
+          retreats past such blocks.  Beware: hacky and flaky.
+      </ul>
+      </li>
+      <p>
+      As of 17 March 02 (the time of writing this), there is a small
+      problem with all of these macros, which is that I haven't
+      figured out how to make them produce sensible (always-succeeds)
+      return values when the client is run on the real CPU or on
+      Valgrind without <code>--client-perms=yes</code>.  So if you
+      write client code which depends on the return values, be aware
+      that it may misbehave when not run with full Valgrindification.
+      If you always ignore the return values you should always be
+      safe.  I plan to fix this.
+</ul>
+
+There are also some options for debugging Valgrind itself.  You
+shouldn't need to use them in the normal run of things.  Nevertheless:
+
+<ul>
+
+  <li><code>--single-step=no</code> [default]<br>
+      <code>--single-step=yes</code>
+      <p>When enabled, each x86 insn is translated seperately into
+      instrumented code.  When disabled, translation is done on a
+      per-basic-block basis, giving much better translations.</li><br>
+      <p>
+
+  <li><code>--optimise=no</code><br>
+      <code>--optimise=yes</code> [default]
+      <p>When enabled, various improvements are applied to the
+      intermediate code, mainly aimed at allowing the simulated CPU's
+      registers to be cached in the real CPU's registers over several
+      simulated instructions.</li><br>
+      <p>
+
+  <li><code>--instrument=no</code><br>
+      <code>--instrument=yes</code> [default]
+      <p>When disabled, the translations don't actually contain any
+      instrumentation.</li><br>
+      <p>
+
+  <li><code>--cleanup=no</code><br>
+      <code>--cleanup=yes</code> [default]
+      <p>When enabled, various improvments are applied to the
+      post-instrumented intermediate code, aimed at removing redundant
+      value checks.</li><br>
+      <p>
+
+  <li><code>--trace-syscalls=no</code> [default]<br>
+      <code>--trace-syscalls=yes</code>
+      <p>Enable/disable tracing of system call intercepts.</li><br>
+      <p>
+
+  <li><code>--trace-signals=no</code> [default]<br>
+      <code>--trace-signals=yes</code>
+      <p>Enable/disable tracing of signal handling.</li><br>
+      <p>
+
+  <li><code>--trace-symtab=no</code> [default]<br>
+      <code>--trace-symtab=yes</code>
+      <p>Enable/disable tracing of symbol table reading.</li><br>
+      <p>
+
+  <li><code>--trace-malloc=no</code> [default]<br>
+      <code>--trace-malloc=yes</code>
+      <p>Enable/disable tracing of malloc/free (et al) intercepts.
+      </li><br>
+      <p>
+
+  <li><code>--stop-after=&lt;number></code> 
+      [default: infinity, more or less]
+      <p>After &lt;number> basic blocks have been executed, shut down
+      Valgrind and switch back to running the client on the real CPU.
+      </li><br>
+      <p>
+
+  <li><code>--dump-error=&lt;number></code>
+      [default: inactive]
+      <p>After the program has exited, show gory details of the
+      translation of the basic block containing the &lt;number>'th
+      error context.  When used with <code>--single-step=yes</code>, 
+      can show the
+      exact x86 instruction causing an error.</li><br>
+      <p>
+
+  <li><code>--smc-check=none</code><br>
+      <code>--smc-check=some</code> [default]<br>
+      <code>--smc-check=all</code>
+      <p>How carefully should Valgrind check for self-modifying code
+      writes, so that translations can be discarded?&nbsp; When
+      "none", no writes are checked.  When "some", only writes
+      resulting from moves from integer registers to memory are
+      checked.  When "all", all memory writes are checked, even those
+      with which are no sane program would generate code -- for
+      example, floating-point writes.</li>
+</ul>
+
+
+<a name="errormsgs">
+<h3>2.6&nbsp; Explaination of error messages</h3>
+
+Despite considerable sophistication under the hood, Valgrind can only
+really detect two kinds of errors, use of illegal addresses, and use
+of undefined values.  Nevertheless, this is enough to help you
+discover all sorts of memory-management nasties in your code.  This
+section presents a quick summary of what error messages mean.  The
+precise behaviour of the error-checking machinery is described in
+<a href="#machine">Section 4</a>.
+
+
+<h4>2.6.1&nbsp; Illegal read / Illegal write errors</h4>
+For example:
+<pre>
+  ==30975== Invalid read of size 4
+  ==30975==    at 0x40F6BBCC: (within /usr/lib/libpng.so.2.1.0.9)
+  ==30975==    by 0x40F6B804: (within /usr/lib/libpng.so.2.1.0.9)
+  ==30975==    by 0x40B07FF4: read_png_image__FP8QImageIO (kernel/qpngio.cpp:326)
+  ==30975==    by 0x40AC751B: QImageIO::read() (kernel/qimage.cpp:3621)
+  ==30975==    Address 0xBFFFF0E0 is not stack'd, malloc'd or free'd
+</pre>
+
+<p>This happens when your program reads or writes memory at a place
+which Valgrind reckons it shouldn't.  In this example, the program did
+a 4-byte read at address 0xBFFFF0E0, somewhere within the
+system-supplied library libpng.so.2.1.0.9, which was called from
+somewhere else in the same library, called from line 326 of
+qpngio.cpp, and so on.
+
+<p>Valgrind tries to establish what the illegal address might relate
+to, since that's often useful.  So, if it points into a block of
+memory which has already been freed, you'll be informed of this, and
+also where the block was free'd at..  Likewise, if it should turn out
+to be just off the end of a malloc'd block, a common result of
+off-by-one-errors in array subscripting, you'll be informed of this
+fact, and also where the block was malloc'd.
+
+<p>In this example, Valgrind can't identify the address.  Actually the
+address is on the stack, but, for some reason, this is not a valid
+stack address -- it is below the stack pointer, %esp, and that isn't
+allowed.
+
+<p>Note that Valgrind only tells you that your program is about to
+access memory at an illegal address.  It can't stop the access from
+happening.  So, if your program makes an access which normally would
+result in a segmentation fault, you program will still suffer the same
+fate -- but you will get a message from Valgrind immediately prior to
+this.  In this particular example, reading junk on the stack is
+non-fatal, and the program stays alive.
+
+
+<h4>2.6.2&nbsp; Use of uninitialised values</h4>
+For example:
+<pre>
+  ==19146== Use of uninitialised CPU condition code
+  ==19146==    at 0x402DFA94: _IO_vfprintf (_itoa.h:49)
+  ==19146==    by 0x402E8476: _IO_printf (printf.c:36)
+  ==19146==    by 0x8048472: main (tests/manuel1.c:8)
+  ==19146==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+</pre>
+
+<p>An uninitialised-value use error is reported when your program uses
+a value which hasn't been initialised -- in other words, is undefined.
+Here, the undefined value is used somewhere inside the printf()
+machinery of the C library.  This error was reported when running the
+following small program:
+<pre>
+  int main()
+  {
+    int x;
+    printf ("x = %d\n", x);
+  }
+</pre>
+
+<p>It is important to understand that your program can copy around
+junk (uninitialised) data to its heart's content.  Valgrind observes
+this and keeps track of the data, but does not complain.  A complaint
+is issued only when your program attempts to make use of uninitialised
+data.  In this example, x is uninitialised.  Valgrind observes the
+value being passed to _IO_printf and thence to
+_IO_vfprintf, but makes no comment.  However,
+_IO_vfprintf has to examine the value of x
+so it can turn it into the corresponding ASCII string, and it is at
+this point that Valgrind complains.
+
+<p>Sources of uninitialised data tend to be:
+<ul>
+  <li>Local variables in procedures which have not been initialised,
+      as in the example above.</li><br><p>
+
+  <li>The contents of malloc'd blocks, before you write something
+      there.  In C++, the new operator is a wrapper round malloc, so
+      if you create an object with new, its fields will be
+      uninitialised until you fill them in, which is only Right and
+      Proper.</li>
+</ul>
+
+
+
+<h4>2.6.3&nbsp; Illegal frees</h4>
+For example:
+<pre>
+  ==7593== Invalid free()
+  ==7593==    at 0x4004FFDF: free (ut_clientmalloc.c:577)
+  ==7593==    by 0x80484C7: main (tests/doublefree.c:10)
+  ==7593==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+  ==7593==    by 0x80483B1: (within tests/doublefree)
+  ==7593==    Address 0x3807F7B4 is 0 bytes inside a block of size 177 free'd
+  ==7593==    at 0x4004FFDF: free (ut_clientmalloc.c:577)
+  ==7593==    by 0x80484C7: main (tests/doublefree.c:10)
+  ==7593==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+  ==7593==    by 0x80483B1: (within tests/doublefree)
+</pre>
+<p>Valgrind keeps track of the blocks allocated by your program with
+malloc/new, so it can know exactly whether or not the argument to
+free/delete is legitimate or not.  Here, this test program has
+freed the same block twice.  As with the illegal read/write errors,
+Valgrind attempts to make sense of the address free'd.  If, as
+here, the address is one which has previously been freed, you wil
+be told that -- making duplicate frees of the same block easy to spot.
+
+
+<h4>2.6.4&nbsp; Passing system call parameters with inadequate
+read/write permissions</h4>
+
+Valgrind checks all parameters to system calls.  If a system call
+needs to read from a buffer provided by your program, Valgrind checks
+that the entire buffer is addressible and has valid data, ie, it is
+readable.  And if the system call needs to write to a user-supplied
+buffer, Valgrind checks that the buffer is addressible.  After the
+system call, Valgrind updates its administrative information to
+precisely reflect any changes in memory permissions caused by the
+system call.
+
+<p>Here's an example of a system call with an invalid parameter:
+<pre>
+  #include &lt;stdlib.h>
+  #include &lt;unistd.h>
+  int main( void )
+  {
+    char* arr = malloc(10);
+    (void) write( 1 /* stdout */, arr, 10 );
+    return 0;
+  }
+</pre>
+
+<p>You get this complaint ...
+<pre>
+  ==8230== Syscall param write(buf) lacks read permissions
+  ==8230==    at 0x4035E072: __libc_write
+  ==8230==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+  ==8230==    by 0x80483B1: (within tests/badwrite)
+  ==8230==    by &lt;bogus frame pointer> ???
+  ==8230==    Address 0x3807E6D0 is 0 bytes inside a block of size 10 alloc'd
+  ==8230==    at 0x4004FEE6: malloc (ut_clientmalloc.c:539)
+  ==8230==    by 0x80484A0: main (tests/badwrite.c:6)
+  ==8230==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+  ==8230==    by 0x80483B1: (within tests/badwrite)
+</pre>
+
+<p>... because the program has tried to write uninitialised junk from
+the malloc'd block to the standard output.
+
+
+<h4>2.6.5&nbsp; Warning messages you might see</h4>
+
+Most of these only appear if you run in verbose mode (enabled by
+<code>-v</code>):
+<ul>
+<li> <code>More than 50 errors detected.  Subsequent errors
+     will still be recorded, but in less detail than before.</code>
+     <br>
+     After 50 different errors have been shown, Valgrind becomes 
+     more conservative about collecting them.  It then requires only 
+     the program counters in the top two stack frames to match when
+     deciding whether or not two errors are really the same one.
+     Prior to this point, the PCs in the top four frames are required
+     to match.  This hack has the effect of slowing down the
+     appearance of new errors after the first 50.  The 50 constant can
+     be changed by recompiling Valgrind.
+<p>
+<li> <code>More than 500 errors detected.  I'm not reporting any more.
+     Final error counts may be inaccurate.  Go fix your
+     program!</code>
+     <br>
+     After 500 different errors have been detected, Valgrind ignores
+     any more.  It seems unlikely that collecting even more different
+     ones would be of practical help to anybody, and it avoids the
+     danger that Valgrind spends more and more of its time comparing
+     new errors against an ever-growing collection.  As above, the 500
+     number is a compile-time constant.
+<p>
+<li> <code>Warning: client exiting by calling exit(&lt;number>).
+     Bye!</code>
+     <br>
+     Your program has called the <code>exit</code> system call, which
+     will immediately terminate the process.  You'll get no exit-time
+     error summaries or leak checks.  Note that this is not the same
+     as your program calling the ANSI C function <code>exit()</code>
+     -- that causes a normal, controlled shutdown of Valgrind.
+<p>
+<li> <code>Warning: client switching stacks?</code>
+     <br>
+     Valgrind spotted such a large change in the stack pointer, %esp,
+     that it guesses the client is switching to a different stack.
+     At this point it makes a kludgey guess where the base of the new
+     stack is, and sets memory permissions accordingly.  You may get
+     many bogus error messages following this, if Valgrind guesses
+     wrong.  At the moment "large change" is defined as a change of
+     more that 2000000 in the value of the %esp (stack pointer)
+     register.
+<p>
+<li> <code>Warning: client attempted to close Valgrind's logfile fd &lt;number>
+     </code>
+     <br>
+     Valgrind doesn't allow the client
+     to close the logfile, because you'd never see any diagnostic
+     information after that point.  If you see this message,
+     you may want to use the <code>--logfile-fd=&lt;number></code> 
+     option to specify a different logfile file-descriptor number.
+<p>
+<li> <code>Warning: noted but unhandled ioctl &lt;number></code>
+     <br>
+     Valgrind observed a call to one of the vast family of
+     <code>ioctl</code> system calls, but did not modify its
+     memory status info (because I have not yet got round to it).
+     The call will still have gone through, but you may get spurious
+     errors after this as a result of the non-update of the memory info.
+<p>
+<li> <code>Warning: unblocking signal &lt;number> due to
+     sigprocmask</code>
+     <br>
+     Really just a diagnostic from the signal simulation machinery.  
+     This message will appear if your program handles a signal by
+     first <code>longjmp</code>ing out of the signal handler,
+     and then unblocking the signal with <code>sigprocmask</code>
+     -- a standard signal-handling idiom.
+<p>
+<li> <code>Warning: bad signal number &lt;number> in __NR_sigaction.</code>
+     <br>
+     Probably indicates a bug in the signal simulation machinery.
+<p>
+<li> <code>Warning: set address range perms: large range &lt;number></code>
+     <br> 
+     Diagnostic message, mostly for my benefit, to do with memory 
+     permissions.
+</ul>
+
+
+<a name="suppfiles"></a>
+<h3>2.7&nbsp; Writing suppressions files</h3>
+
+A suppression file describes a bunch of errors which, for one reason
+or another, you don't want Valgrind to tell you about.  Usually the
+reason is that the system libraries are buggy but unfixable, at least
+within the scope of the current debugging session.  Multiple
+suppresions files are allowed.  By default, Valgrind uses
+<code>linux24.supp</code> in the directory where it is installed.
+
+<p>
+You can ask to add suppressions from another file, by specifying
+<code>--suppressions=/path/to/file.supp</code>.
+
+<p>Each suppression has the following components:<br>
+<ul>
+
+  <li>Its name.  This merely gives a handy name to the suppression, by
+      which it is referred to in the summary of used suppressions
+      printed out when a program finishes.  It's not important what
+      the name is; any identifying string will do.
+      <p>
+
+  <li>The nature of the error to suppress.  Either: 
+      <code>Value1</code>, 
+      <code>Value2</code>,
+      <code>Value4</code>,
+      <code>Value8</code> or 
+      <code>Value0</code>,
+      meaning an uninitialised-value error when
+      using a value of 1, 2, 4 or 8 bytes, 
+      or the CPU's condition codes, respectively.  Or: 
+      <code>Addr1</code>,
+      <code>Addr2</code>, 
+      <code>Addr4</code> or 
+      <code>Addr8</code>, meaning an invalid address during a
+      memory access of 1, 2, 4 or 8 bytes respectively.  Or 
+      <code>Param</code>,
+      meaning an invalid system call parameter error.  Or
+      <code>Free</code>, meaning an invalid or mismatching free.</li><br>
+      <p>
+
+  <li>The "immediate location" specification.  For Value and Addr
+      errors, is either the name of the function in which the error
+      occurred, or, failing that, the full path the the .so file
+      containing the error location.  For Param errors, is the name of
+      the offending system call parameter.  For Free errors, is the
+      name of the function doing the freeing (eg, <code>free</code>,
+      <code>__builtin_vec_delete</code>, etc)</li><br>
+      <p>
+
+  <li>The caller of the above "immediate location".  Again, either a
+      function or shared-object name.</li><br>
+      <p>
+
+  <li>Optionally, one or two extra calling-function or object names,
+      for greater precision.</li>
+</ul>
+
+<p>
+Locations may be either names of shared objects or wildcards matching
+function names.  They begin <code>obj:</code> and <code>fun:</code>
+respectively.  Function and object names to match against may use the 
+wildcard characters <code>*</code> and <code>?</code>.
+
+A suppression only suppresses an error when the error matches all the
+details in the suppression.  Here's an example:
+<pre>
+  {
+    __gconv_transform_ascii_internal/__mbrtowc/mbtowc
+    Value4
+    fun:__gconv_transform_ascii_internal
+    fun:__mbr*toc
+    fun:mbtowc
+  }
+</pre>
+
+<p>What is means is: suppress a use-of-uninitialised-value error, when
+the data size is 4, when it occurs in the function
+<code>__gconv_transform_ascii_internal</code>, when that is called
+from any function of name matching <code>__mbr*toc</code>, 
+when that is called from
+<code>mbtowc</code>.  It doesn't apply under any other circumstances.
+The string by which this suppression is identified to the user is
+__gconv_transform_ascii_internal/__mbrtowc/mbtowc.
+
+<p>Another example:
+<pre>
+  {
+    libX11.so.6.2/libX11.so.6.2/libXaw.so.7.0
+    Value4
+    obj:/usr/X11R6/lib/libX11.so.6.2
+    obj:/usr/X11R6/lib/libX11.so.6.2
+    obj:/usr/X11R6/lib/libXaw.so.7.0
+  }
+</pre>
+
+<p>Suppress any size 4 uninitialised-value error which occurs anywhere
+in <code>libX11.so.6.2</code>, when called from anywhere in the same
+library, when called from anywhere in <code>libXaw.so.7.0</code>.  The
+inexact specification of locations is regrettable, but is about all
+you can hope for, given that the X11 libraries shipped with Red Hat
+7.2 have had their symbol tables removed.
+
+<p>Note -- since the above two examples did not make it clear -- that
+you can freely mix the <code>obj:</code> and <code>fun:</code>
+styles of description within a single suppression record.
+
+
+<a name="install"></a>
+<h3>2.8&nbsp; Building and installing</h3>
+At the moment, very rudimentary.
+
+<p>The tarball is set up for a standard Red Hat 7.1 (6.2) machine.  To
+build, just do "make".  No configure script, no autoconf, no nothing.
+
+<p>The files needed for installation are: valgrind.so, valgring.so,
+valgrind, VERSION, redhat72.supp (or redhat62.supp). You can copy
+these to any directory you like. However, you then need to edit the
+shell script "valgrind". On line 4, set the environment variable
+<code>VALGRIND</code> to point to the directory you have copied the
+installation into.
+
+
+<a name="problems"></a>
+<h3>2.9&nbsp; If you have problems</h3>
+Mail me (<a href="mailto:jseward@acm.org">jseward@acm.org</a>).
+
+<p>See <a href="#limits">Section 4</a> for the known limitations of
+Valgrind, and for a list of programs which are known not to work on
+it.
+
+<p>The translator/instrumentor has a lot of assertions in it.  They
+are permanently enabled, and I have no plans to disable them.  If one
+of these breaks, please mail me!
+
+<p>If you get an assertion failure on the expression
+<code>chunkSane(ch)</code> in <code>vg_free()</code> in
+<code>vg_malloc.c</code>, this may have happened because your program
+wrote off the end of a malloc'd block, or before its beginning.
+Valgrind should have emitted a proper message to that effect before
+dying in this way.  This is a known problem which I should fix.
+<p>
+
+<hr width="100%">
+
+<a name="machine"></a>
+<h2>3&nbsp; Details of the checking machinery</h2>
+
+Read this section if you want to know, in detail, exactly what and how
+Valgrind is checking.
+
+<a name="vvalue"></a>
+<h3>3.1&nbsp; Valid-value (V) bits</h3>
+
+It is simplest to think of Valgrind implementing a synthetic Intel x86
+CPU which is identical to a real CPU, except for one crucial detail.
+Every bit (literally) of data processed, stored and handled by the
+real CPU has, in the synthetic CPU, an associated "valid-value" bit,
+which says whether or not the accompanying bit has a legitimate value.
+In the discussions which follow, this bit is referred to as the V
+(valid-value) bit.
+
+<p>Each byte in the system therefore has a 8 V bits which accompanies
+it wherever it goes.  For example, when the CPU loads a word-size item
+(4 bytes) from memory, it also loads the corresponding 32 V bits from
+a bitmap which stores the V bits for the process' entire address
+space.  If the CPU should later write the whole or some part of that
+value to memory at a different address, the relevant V bits will be
+stored back in the V-bit bitmap.
+
+<p>In short, each bit in the system has an associated V bit, which
+follows it around everywhere, even inside the CPU.  Yes, the CPU's
+(integer) registers have their own V bit vectors.
+
+<p>Copying values around does not cause Valgrind to check for, or
+report on, errors.  However, when a value is used in a way which might
+conceivably affect the outcome of your program's computation, the
+associated V bits are immediately checked.  If any of these indicate
+that the value is undefined, an error is reported.
+
+<p>Here's an (admittedly nonsensical) example:
+<pre>
+  int i, j;
+  int a[10], b[10];
+  for (i = 0; i &lt; 10; i++) {
+    j = a[i];
+    b[i] = j;
+  }
+</pre>
+
+<p>Valgrind emits no complaints about this, since it merely copies
+uninitialised values from <code>a[]</code> into <code>b[]</code>, and
+doesn't use them in any way.  However, if the loop is changed to
+<pre>
+  for (i = 0; i &lt; 10; i++) {
+    j += a[i];
+  }
+  if (j == 77) 
+     printf("hello there\n");
+</pre>
+then Valgrind will complain, at the <code>if</code>, that the
+condition depends on uninitialised values.
+
+<p>Most low level operations, such as adds, cause Valgrind to 
+use the V bits for the operands to calculate the V bits for the
+result.  Even if the result is partially or wholly undefined,
+it does not complain.
+
+<p>Checks on definedness only occur in two places: when a value is
+used to generate a memory address, and where control flow decision
+needs to be made.  Also, when a system call is detected, valgrind
+checks definedness of parameters as required.
+
+<p>If a check should detect undefinedness, and error message is
+issued.  The resulting value is subsequently regarded as well-defined.
+To do otherwise would give long chains of error messages.  In effect,
+we say that undefined values are non-infectious.
+
+<p>This sounds overcomplicated.  Why not just check all reads from
+memory, and complain if an undefined value is loaded into a CPU register? 
+Well, that doesn't work well, because perfectly legitimate C programs routinely
+copy uninitialised values around in memory, and we don't want endless complaints
+about that.  Here's the canonical example.  Consider a struct
+like this:
+<pre>
+  struct S { int x; char c; };
+  struct S s1, s2;
+  s1.x = 42;
+  s1.c = 'z';
+  s2 = s1;
+</pre>
+
+<p>The question to ask is: how large is <code>struct S</code>, in
+bytes?  An int is 4 bytes and a char one byte, so perhaps a struct S
+occupies 5 bytes?  Wrong.  All (non-toy) compilers I know of will
+round the size of <code>struct S</code> up to a whole number of words,
+in this case 8 bytes.  Not doing this forces compilers to generate
+truly appalling code for subscripting arrays of <code>struct
+S</code>'s.
+
+<p>So s1 occupies 8 bytes, yet only 5 of them will be initialised.
+For the assignment <code>s2 = s1</code>, gcc generates code to copy
+all 8 bytes wholesale into <code>s2</code> without regard for their
+meaning.  If Valgrind simply checked values as they came out of
+memory, it would yelp every time a structure assignment like this
+happened.  So the more complicated semantics described above is
+necessary.  This allows gcc to copy <code>s1</code> into
+<code>s2</code> any way it likes, and a warning will only be emitted
+if the uninitialised values are later used.
+
+<p>One final twist to this story.  The above scheme allows garbage to
+pass through the CPU's integer registers without complaint.  It does
+this by giving the integer registers V tags, passing these around in
+the expected way.  This complicated and computationally expensive to
+do, but is necessary.  Valgrind is more simplistic about
+floating-point loads and stores.  In particular, V bits for data read
+as a result of floating-point loads are checked at the load
+instruction.  So if your program uses the floating-point registers to
+do memory-to-memory copies, you will get complaints about
+uninitialised values.  Fortunately, I have not yet encountered a
+program which (ab)uses the floating-point registers in this way.
+
+<a name="vaddress"></a>
+<h3>3.2&nbsp; Valid-address (A) bits</h3>
+
+Notice that the previous section describes how the validity of values
+is established and maintained without having to say whether the
+program does or does not have the right to access any particular
+memory location.  We now consider the latter issue.
+
+<p>As described above, every bit in memory or in the CPU has an
+associated valid-value (V) bit.  In addition, all bytes in memory, but
+not in the CPU, have an associated valid-address (A) bit.  This
+indicates whether or not the program can legitimately read or write
+that location.  It does not give any indication of the validity or the
+data at that location -- that's the job of the V bits -- only whether
+or not the location may be accessed.
+
+<p>Every time your program reads or writes memory, Valgrind checks the
+A bits associated with the address.  If any of them indicate an
+invalid address, an error is emitted.  Note that the reads and writes
+themselves do not change the A bits, only consult them.
+
+<p>So how do the A bits get set/cleared?  Like this:
+
+<ul>
+  <li>When the program starts, all the global data areas are marked as
+      accessible.</li><br>
+      <p>
+
+  <li>When the program does malloc/new, the A bits for the exactly the
+      area allocated, and not a byte more, are marked as accessible.
+      Upon freeing the area the A bits are changed to indicate
+      inaccessibility.</li><br>
+      <p>
+
+  <li>When the stack pointer register (%esp) moves up or down, A bits
+      are set.  The rule is that the area from %esp up to the base of
+      the stack is marked as accessible, and below %esp is
+      inaccessible.  (If that sounds illogical, bear in mind that the
+      stack grows down, not up, on almost all Unix systems, including
+      GNU/Linux.)  Tracking %esp like this has the useful side-effect
+      that the section of stack used by a function for local variables
+      etc is automatically marked accessible on function entry and
+      inaccessible on exit.</li><br>
+      <p>
+
+  <li>When doing system calls, A bits are changed appropriately.  For
+      example, mmap() magically makes files appear in the process's
+      address space, so the A bits must be updated if mmap()
+      succeeds.</li><br>
+</ul>
+
+
+<a name="together"></a>
+<h3>3.3&nbsp; Putting it all together</h3>
+Valgrind's checking machinery can be summarised as follows:
+
+<ul>
+  <li>Each byte in memory has 8 associated V (valid-value) bits,
+      saying whether or not the byte has a defined value, and a single
+      A (valid-address) bit, saying whether or not the program
+      currently has the right to read/write that address.</li><br>
+      <p>
+
+  <li>When memory is read or written, the relevant A bits are
+      consulted.  If they indicate an invalid address, Valgrind emits
+      an Invalid read or Invalid write error.</li><br>
+      <p>
+
+  <li>When memory is read into the CPU's integer registers, the
+      relevant V bits are fetched from memory and stored in the
+      simulated CPU.  They are not consulted.</li><br>
+      <p>
+
+  <li>When an integer register is written out to memory, the V bits
+      for that register are written back to memory too.</li><br>
+      <p>
+
+  <li>When memory is read into the CPU's floating point registers, the
+      relevant V bits are read from memory and they are immediately
+      checked.  If any are invalid, an uninitialised value error is
+      emitted.  This precludes using the floating-point registers to
+      copy possibly-uninitialised memory, but simplifies Valgrind in
+      that it does not have to track the validity status of the
+      floating-point registers.</li><br>
+      <p>
+
+  <li>As a result, when a floating-point register is written to
+      memory, the associated V bits are set to indicate a valid
+      value.</li><br>
+      <p>
+
+  <li>When values in integer CPU registers are used to generate a
+      memory address, or to determine the outcome of a conditional
+      branch, the V bits for those values are checked, and an error
+      emitted if any of them are undefined.</li><br>
+      <p>
+
+  <li>When values in integer CPU registers are used for any other
+      purpose, Valgrind computes the V bits for the result, but does
+      not check them.</li><br>
+      <p>
+
+  <li>One the V bits for a value in the CPU have been checked, they
+      are then set to indicate validity.  This avoids long chains of
+      errors.</li><br>
+      <p>
+
+  <li>When values are loaded from memory, valgrind checks the A bits
+      for that location and issues an illegal-address warning if
+      needed.  In that case, the V bits loaded are forced to indicate
+      Valid, despite the location being invalid.
+      <p>
+      This apparently strange choice reduces the amount of confusing
+      information presented to the user.  It avoids the
+      unpleasant phenomenon in which memory is read from a place which
+      is both unaddressible and contains invalid values, and, as a
+      result, you get not only an invalid-address (read/write) error,
+      but also a potentially large set of uninitialised-value errors,
+      one for every time the value is used.
+      <p>
+      There is a hazy boundary case to do with multi-byte loads from
+      addresses which are partially valid and partially invalid.  See
+      details of the flag <code>--partial-loads-ok</code> for details.
+      </li><br>
+</ul>
+
+Valgrind intercepts calls to malloc, calloc, realloc, valloc,
+memalign, free, new and delete.  The behaviour you get is:
+
+<ul>
+
+  <li>malloc/new: the returned memory is marked as addressible but not
+      having valid values.  This means you have to write on it before
+      you can read it.</li><br>
+      <p>
+
+  <li>calloc: returned memory is marked both addressible and valid,
+      since calloc() clears the area to zero.</li><br>
+      <p>
+
+  <li>realloc: if the new size is larger than the old, the new section
+      is addressible but invalid, as with malloc.</li><br>
+      <p>
+
+  <li>If the new size is smaller, the dropped-off section is marked as
+      unaddressible.  You may only pass to realloc a pointer
+      previously issued to you by malloc/calloc/new/realloc.</li><br>
+      <p>
+
+  <li>free/delete: you may only pass to free a pointer previously
+      issued to you by malloc/calloc/new/realloc, or the value
+      NULL. Otherwise, Valgrind complains.  If the pointer is indeed
+      valid, Valgrind marks the entire area it points at as
+      unaddressible, and places the block in the freed-blocks-queue.
+      The aim is to defer as long as possible reallocation of this
+      block.  Until that happens, all attempts to access it will
+      elicit an invalid-address error, as you would hope.</li><br>
+</ul>
+
+
+
+<a name="signals"></a>
+<h3>3.4&nbsp; Signals</h3>
+
+Valgrind provides suitable handling of signals, so, provided you stick
+to POSIX stuff, you should be ok.  Basic sigaction() and sigprocmask()
+are handled.  Signal handlers may return in the normal way or do
+longjmp(); both should work ok.  As specified by POSIX, a signal is
+blocked in its own handler.  Default actions for signals should work
+as before.  Etc, etc.
+
+<p>Under the hood, dealing with signals is a real pain, and Valgrind's
+simulation leaves much to be desired.  If your program does
+way-strange stuff with signals, bad things may happen.  If so, let me
+know.  I don't promise to fix it, but I'd at least like to be aware of
+it.
+
+
+<a name="leaks"><a/>
+<h3>3.5&nbsp; Memory leak detection</h3>
+
+Valgrind keeps track of all memory blocks issued in response to calls
+to malloc/calloc/realloc/new.  So when the program exits, it knows
+which blocks are still outstanding -- have not been returned, in other
+words.  Ideally, you want your program to have no blocks still in use
+at exit.  But many programs do.
+
+<p>For each such block, Valgrind scans the entire address space of the
+process, looking for pointers to the block.  One of three situations
+may result:
+
+<ul>
+  <li>A pointer to the start of the block is found.  This usually
+      indicates programming sloppiness; since the block is still
+      pointed at, the programmer could, at least in principle, free'd
+      it before program exit.</li><br>
+      <p>
+
+  <li>A pointer to the interior of the block is found.  The pointer
+      might originally have pointed to the start and have been moved
+      along, or it might be entirely unrelated.  Valgrind deems such a
+      block as "dubious", that is, possibly leaked,
+      because it's unclear whether or
+      not a pointer to it still exists.</li><br>
+      <p>
+
+  <li>The worst outcome is that no pointer to the block can be found.
+      The block is classified as "leaked", because the
+      programmer could not possibly have free'd it at program exit,
+      since no pointer to it exists.  This might be a symptom of
+      having lost the pointer at some earlier point in the
+      program.</li>
+</ul>
+
+Valgrind reports summaries about leaked and dubious blocks.
+For each such block, it will also tell you where the block was
+allocated.  This should help you figure out why the pointer to it has
+been lost.  In general, you should attempt to ensure your programs do
+not have any leaked or dubious blocks at exit.
+
+<p>The precise area of memory in which Valgrind searches for pointers
+is: all naturally-aligned 4-byte words for which all A bits indicate
+addressibility and all V bits indicated that the stored value is
+actually valid.
+
+<p><hr width="100%">
+
+
+<a name="limits"></a>
+<h2>4&nbsp; Limitations</h2>
+
+The following list of limitations seems depressingly long.  However,
+most programs actually work fine.
+
+<p>Valgrind will run x86-GNU/Linux ELF dynamically linked binaries, on
+a kernel 2.4.X system, subject to the following constraints:
+
+<ul>
+  <li>No MMX, SSE, SSE2, 3DNow instructions.  If the translator
+      encounters these, Valgrind will simply give up.  It may be
+      possible to add support for them at a later time. Intel added a
+      few instructions such as "cmov" to the integer instruction set
+      on Pentium and later processors, and these are supported.
+      Nevertheless it's safest to think of Valgrind as implementing
+      the 486 instruction set.</li><br>
+      <p>
+
+  <li>Multithreaded programs are not supported, since I haven't yet
+      figured out how to do this.  To be more specific, it is the
+      "clone" system call which is not supported.  A program calls
+      "clone" to create threads.  Valgrind will abort if this
+      happens.</li><nr>
+      <p>
+
+  <li>Valgrind assumes that the floating point registers are not used
+      as intermediaries in memory-to-memory copies, so it immediately
+      checks V bits in floating-point loads/stores.  If you want to
+      write code which copies around possibly-uninitialised values,
+      you must ensure these travel through the integer registers, not
+      the FPU.</li><br>
+      <p>
+
+  <li>If your program does its own memory management, rather than
+      using malloc/new/free/delete, it should still work, but
+      Valgrind's error checking won't be so effective.</li><br>
+      <p>
+
+  <li>Valgrind's signal simulation is not as robust as it could be.
+      Basic POSIX-compliant sigaction and sigprocmask functionality is
+      supplied, but it's conceivable that things could go badly awry
+      if you do wierd things with signals.  Workaround: don't.
+      Programs that do non-POSIX signal tricks are in any case
+      inherently unportable, so should be avoided if
+      possible.</li><br>
+      <p>
+
+  <li>I have no idea what happens if programs try to handle signals on
+      an alternate stack (sigaltstack).  YMMV.</li><br>
+      <p>
+
+  <li>Programs which switch stacks are not well handled.  Valgrind
+      does have support for this, but I don't have great faith in it.
+      It's difficult -- there's no cast-iron way to decide whether a
+      large change in %esp is as a result of the program switching
+      stacks, or merely allocating a large object temporarily on the
+      current stack -- yet Valgrind needs to handle the two situations
+      differently.</li><br>
+      <p>
+
+  <li>x86 instructions, and system calls, have been implemented on
+      demand.  So it's possible, although unlikely, that a program
+      will fall over with a message to that effect.  If this happens,
+      please mail me ALL the details printed out, so I can try and
+      implement the missing feature.</li><br>
+      <p>
+
+  <li>x86 floating point works correctly, but floating-point code may
+      run even more slowly than integer code, due to my simplistic
+      approach to FPU emulation.</li><br>
+      <p>
+
+  <li>You can't Valgrind-ize statically linked binaries.  Valgrind
+      relies on the dynamic-link mechanism to gain control at
+      startup.</li><br>
+      <p>
+
+  <li>Memory consumption of your program is majorly increased whilst
+      running under Valgrind.  This is due to the large amount of
+      adminstrative information maintained behind the scenes.  Another
+      cause is that Valgrind dynamically translates the original
+      executable and never throws any translation away, except in
+      those rare cases where self-modifying code is detected.
+      Translated, instrumented code is 8-12 times larger than the
+      original (!) so you can easily end up with 15+ MB of
+      translations when running (eg) a web browser.  There's not a lot
+      you can do about this -- use Valgrind on a fast machine with a lot
+      of memory and swap space.  At some point I may implement a LRU
+      caching scheme for translations, so as to bound the maximum
+      amount of memory devoted to them, to say 8 or 16 MB.</li>
+</ul>
+
+
+Programs which are known not to work are:
+
+<ul>
+  <li>Netscape 4.76 works pretty well on some platforms -- quite
+      nicely on my AMD K6-III (400 MHz).  I can surf, do mail, etc, no
+      problem.  On other platforms is has been observed to crash
+      during startup.  Despite much investigation I can't figure out
+      why.</li><br>
+      <p>
+
+  <li>kpackage (a KDE front end to rpm) dies because the CPUID
+      instruction is unimplemented.  Easy to fix.</li><br>
+      <p>
+
+  <li>knode (a KDE newsreader) tries to do multithreaded things, and
+      fails.</li><br>
+      <p>
+
+  <li>emacs starts up but immediately concludes it is out of memory
+      and aborts.  Emacs has it's own memory-management scheme, but I
+      don't understand why this should interact so badly with
+      Valgrind.</li><br>
+      <p>
+
+  <li>Gimp and Gnome and GTK-based apps die early on because
+      of unimplemented system call wrappers.  (I'm a KDE user :)
+      This wouldn't be hard to fix.
+      </li><br>
+      <p>
+
+  <li>As a consequence of me being a KDE user, almost all KDE apps
+      work ok -- except those which are multithreaded.
+      </li><br>
+      <p>
+</ul>
+
+
+<p><hr width="100%">
+
+
+<a name="howitworks"></a>
+<h2>5&nbsp; How it works -- a rough overview</h2>
+Some gory details, for those with a passion for gory details.  You
+don't need to read this section if all you want to do is use Valgrind.
+
+<a name="startb"></a>
+<h3>5.1&nbsp; Getting started</h3>
+
+Valgrind is compiled into a shared object, valgrind.so.  The shell
+script valgrind sets the LD_PRELOAD environment variable to point to
+valgrind.so.  This causes the .so to be loaded as an extra library to
+any subsequently executed dynamically-linked ELF binary, viz, the
+program you want to debug.
+
+<p>The dynamic linker allows each .so in the process image to have an
+initialisation function which is run before main().  It also allows
+each .so to have a finalisation function run after main() exits.
+
+<p>When valgrind.so's initialisation function is called by the dynamic
+linker, the synthetic CPU to starts up.  The real CPU remains locked
+in valgrind.so for the entire rest of the program, but the synthetic
+CPU returns from the initialisation function.  Startup of the program
+now continues as usual -- the dynamic linker calls all the other .so's
+initialisation routines, and eventually runs main().  This all runs on
+the synthetic CPU, not the real one, but the client program cannot
+tell the difference.
+
+<p>Eventually main() exits, so the synthetic CPU calls valgrind.so's
+finalisation function.  Valgrind detects this, and uses it as its cue
+to exit.  It prints summaries of all errors detected, possibly checks
+for memory leaks, and then exits the finalisation routine, but now on
+the real CPU.  The synthetic CPU has now lost control -- permanently
+-- so the program exits back to the OS on the real CPU, just as it
+would have done anyway.
+
+<p>On entry, Valgrind switches stacks, so it runs on its own stack.
+On exit, it switches back.  This means that the client program
+continues to run on its own stack, so we can switch back and forth
+between running it on the simulated and real CPUs without difficulty.
+This was an important design decision, because it makes it easy (well,
+significantly less difficult) to debug the synthetic CPU.
+
+
+<a name="engine"></a>
+<h3>5.2&nbsp; The translation/instrumentation engine</h3>
+
+Valgrind does not directly run any of the original program's code.  Only
+instrumented translations are run.  Valgrind maintains a translation
+table, which allows it to find the translation quickly for any branch
+target (code address).  If no translation has yet been made, the
+translator - a just-in-time translator - is summoned.  This makes an
+instrumented translation, which is added to the collection of
+translations.  Subsequent jumps to that address will use this
+translation.
+
+<p>Valgrind can optionally check writes made by the application, to
+see if they are writing an address contained within code which has
+been translated.  Such a write invalidates translations of code
+bracketing the written address.  Valgrind will discard the relevant
+translations, which causes them to be re-made, if they are needed
+again, reflecting the new updated data stored there.  In this way,
+self modifying code is supported.  In practice I have not found any
+Linux applications which use self-modifying-code.
+
+<p>The JITter translates basic blocks -- blocks of straight-line-code
+-- as single entities.  To minimise the considerable difficulties of
+dealing with the x86 instruction set, x86 instructions are first
+translated to a RISC-like intermediate code, similar to sparc code,
+but with an infinite number of virtual integer registers.  Initially
+each insn is translated seperately, and there is no attempt at
+instrumentation.
+
+<p>The intermediate code is improved, mostly so as to try and cache
+the simulated machine's registers in the real machine's registers over
+several simulated instructions.  This is often very effective.  Also,
+we try to remove redundant updates of the simulated machines's
+condition-code register.
+
+<p>The intermediate code is then instrumented, giving more
+intermediate code.  There are a few extra intermediate-code operations
+to support instrumentation; it is all refreshingly simple.  After
+instrumentation there is a cleanup pass to remove redundant value
+checks.
+
+<p>This gives instrumented intermediate code which mentions arbitrary
+numbers of virtual registers.  A linear-scan register allocator is
+used to assign real registers and possibly generate spill code.  All
+of this is still phrased in terms of the intermediate code.  This
+machinery is inspired by the work of Reuben Thomas (MITE).
+
+<p>Then, and only then, is the final x86 code emitted.  The
+intermediate code is carefully designed so that x86 code can be
+generated from it without need for spare registers or other
+inconveniences.
+
+<p>The translations are managed using a traditional LRU-based caching
+scheme.  The translation cache has a default size of about 14MB.
+
+<a name="track"></a>
+
+<h3>5.3&nbsp; Tracking the status of memory</h3> Each byte in the
+process' address space has nine bits associated with it: one A bit and
+eight V bits.  The A and V bits for each byte are stored using a
+sparse array, which flexibly and efficiently covers arbitrary parts of
+the 32-bit address space without imposing significant space or
+performance overheads for the parts of the address space never
+visited.  The scheme used, and speedup hacks, are described in detail
+at the top of the source file vg_memory.c, so you should read that for
+the gory details.
+
+<a name="sys_calls"></a>
+
+<h3>5.4 System calls</h3>
+All system calls are intercepted.  The memory status map is consulted
+before and updated after each call.  It's all rather tiresome.  See
+vg_syscall_mem.c for details.
+
+<a name="sys_signals"></a>
+
+<h3>5.5&nbsp; Signals</h3>
+All system calls to sigaction() and sigprocmask() are intercepted.  If
+the client program is trying to set a signal handler, Valgrind makes a
+note of the handler address and which signal it is for.  Valgrind then
+arranges for the same signal to be delivered to its own handler.
+
+<p>When such a signal arrives, Valgrind's own handler catches it, and
+notes the fact.  At a convenient safe point in execution, Valgrind
+builds a signal delivery frame on the client's stack and runs its
+handler.  If the handler longjmp()s, there is nothing more to be said.
+If the handler returns, Valgrind notices this, zaps the delivery
+frame, and carries on where it left off before delivering the signal.
+
+<p>The purpose of this nonsense is that setting signal handlers
+essentially amounts to giving callback addresses to the Linux kernel.
+We can't allow this to happen, because if it did, signal handlers
+would run on the real CPU, not the simulated one.  This means the
+checking machinery would not operate during the handler run, and,
+worse, memory permissions maps would not be updated, which could cause
+spurious error reports once the handler had returned.
+
+<p>An even worse thing would happen if the signal handler longjmp'd
+rather than returned: Valgrind would completely lose control of the
+client program.
+
+<p>Upshot: we can't allow the client to install signal handlers
+directly.  Instead, Valgrind must catch, on behalf of the client, any
+signal the client asks to catch, and must delivery it to the client on
+the simulated CPU, not the real one.  This involves considerable
+gruesome fakery; see vg_signals.c for details.
+<p>
+
+<hr width="100%">
+
+<a name="example"></a>
+<h2>6&nbsp; Example</h2>
+This is the log for a run of a small program. The program is in fact
+correct, and the reported error is as the result of a potentially serious
+code generation bug in GNU g++ (snapshot 20010527).
+<pre>
+sewardj@phoenix:~/newmat10$
+~/Valgrind-6/valgrind -v ./bogon 
+==25832== Valgrind 0.10, a memory error detector for x86 RedHat 7.1.
+==25832== Copyright (C) 2000-2001, and GNU GPL'd, by Julian Seward.
+==25832== Startup, with flags:
+==25832== --suppressions=/home/sewardj/Valgrind/redhat71.supp
+==25832== reading syms from /lib/ld-linux.so.2
+==25832== reading syms from /lib/libc.so.6
+==25832== reading syms from /mnt/pima/jrs/Inst/lib/libgcc_s.so.0
+==25832== reading syms from /lib/libm.so.6
+==25832== reading syms from /mnt/pima/jrs/Inst/lib/libstdc++.so.3
+==25832== reading syms from /home/sewardj/Valgrind/valgrind.so
+==25832== reading syms from /proc/self/exe
+==25832== loaded 5950 symbols, 142333 line number locations
+==25832== 
+==25832== Invalid read of size 4
+==25832==    at 0x8048724: _ZN10BandMatrix6ReSizeEiii (bogon.cpp:45)
+==25832==    by 0x80487AF: main (bogon.cpp:66)
+==25832==    by 0x40371E5E: __libc_start_main (libc-start.c:129)
+==25832==    by 0x80485D1: (within /home/sewardj/newmat10/bogon)
+==25832==    Address 0xBFFFF74C is not stack'd, malloc'd or free'd
+==25832==
+==25832== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+==25832== malloc/free: in use at exit: 0 bytes in 0 blocks.
+==25832== malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+==25832== For a detailed leak analysis, rerun with: --leak-check=yes
+==25832==
+==25832== exiting, did 1881 basic blocks, 0 misses.
+==25832== 223 translations, 3626 bytes in, 56801 bytes out.
+</pre>
+<p>The GCC folks fixed this about a week before gcc-3.0 shipped.
+<hr width="100%">
+<p>
+</body>
+</html>
diff --git a/cachegrind/docs/nav.html b/cachegrind/docs/nav.html
new file mode 100644
index 000000000..686ac2bde
--- /dev/null
+++ b/cachegrind/docs/nav.html
@@ -0,0 +1,68 @@
+<html>
+  <head>
+    <title>Valgrind</title>
+    <base target="main">
+    <style type="text/css">
+    <style type="text/css">
+      body      { background-color: #ffffff;
+                  color:            #000000;
+                  font-family:      Times, Helvetica, Arial;
+                  font-size:        14pt}
+      h4        { margin-bottom:    0.3em}
+      code      { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      pre       { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      a:link    { color:            #0000C0;
+                  text-decoration:  none; }
+      a:visited { color:            #0000C0; 
+                  text-decoration:  none; }
+      a:active  { color:            #0000C0;
+                  text-decoration:  none; }
+    </style>
+  </head>
+
+  <body>
+    <br>
+    <a href="manual.html#contents"><b>Contents of this manual</b></a><br>
+    <a href="manual.html#intro">1 Introduction</a><br>
+    <a href="manual.html#whatfor">1.1 What Valgrind is for</a><br>
+    <a href="manual.html#whatdoes">1.2 What it does with
+       your program</a>
+    <p>
+    <a href="manual.html#howtouse">2 <b>How to use it, and how to
+       make sense of the results</b></a><br>
+    <a href="manual.html#starta">2.1 Getting started</a><br>
+    <a href="manual.html#comment">2.2 The commentary</a><br>
+    <a href="manual.html#report">2.3 Reporting of errors</a><br>
+    <a href="manual.html#suppress">2.4 Suppressing errors</a><br>
+    <a href="manual.html#flags">2.5 Command-line flags</a><br>
+    <a href="manual.html#errormsgs">2.6 Explanation of error messages</a><br>
+    <a href="manual.html#suppfiles">2.7 Writing suppressions files</a><br>
+    <a href="manual.html#install">2.8 Building and installing</a><br>
+    <a href="manual.html#problems">2.9 If you have problems</a>
+    <p>
+    <a href="manual.html#machine">3 <b>Details of the checking machinery</b></a><br>
+    <a href="manual.html#vvalue">3.1 Valid-value (V) bits</a><br>
+    <a href="manual.html#vaddress">3.2 Valid-address (A) bits</a><br>
+    <a href="manual.html#together">3.3 Putting it all together</a><br>
+    <a href="manual.html#signals">3.4 Signals</a><br>
+    <a href="manual.html#leaks">3.5 Memory leak detection</a>
+    <p>
+    <a href="manual.html#limits">4 <b>Limitations</b></a><br>
+    <p>
+    <a href="manual.html#howitworks">5 <b>How it works -- a rough overview</b></a><br>
+    <a href="manual.html#startb">5.1 Getting started</a><br>
+    <a href="manual.html#engine">5.2 The translation/instrumentation engine</a><br>
+    <a href="manual.html#track">5.3 Tracking the status of memory</a><br>
+    <a href="manual.html#sys_calls">5.4 System calls</a><br>
+    <a href="manual.html#sys_signals">5.5 Signals</a>
+    <p>
+    <a href="manual.html#example">6 <b>An example</b></a><br>
+    <p>
+    <a href="techdocs.html">7 <b>The design and implementation of Valgrind</b></a><br>
+
+</body>
+</html>
diff --git a/cachegrind/docs/techdocs.html b/cachegrind/docs/techdocs.html
new file mode 100644
index 000000000..4044d4957
--- /dev/null
+++ b/cachegrind/docs/techdocs.html
@@ -0,0 +1,2116 @@
+<html>
+  <head>
+    <style type="text/css">
+      body      { background-color: #ffffff;
+                  color:            #000000;
+                  font-family:      Times, Helvetica, Arial;
+                  font-size:        14pt}
+      h4        { margin-bottom:    0.3em}
+      code      { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      pre       { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      a:link    { color:            #0000C0;
+                  text-decoration:  none; }
+      a:visited { color:            #0000C0; 
+                  text-decoration:  none; }
+      a:active  { color:            #0000C0;
+                  text-decoration:  none; }
+    </style>
+    <title>The design and implementation of Valgrind</title>
+  </head>
+
+<body bgcolor="#ffffff">
+
+<a name="title">&nbsp;</a>
+<h1 align=center>The design and implementation of Valgrind</h1>
+
+<center>
+Detailed technical notes for hackers, maintainers and the
+overly-curious<br>
+These notes pertain to snapshot 20020306<br>
+<p>
+<a href="mailto:jseward@acm.org">jseward@acm.org<br>
+<a href="http://developer.kde.org/~sewardj">http://developer.kde.org/~sewardj</a><br>
+<a href="http://www.muraroa.demon.co.uk">http://www.muraroa.demon.co.uk</a><br>
+Copyright &copy; 2000-2002 Julian Seward
+<p>
+Valgrind is licensed under the GNU General Public License, 
+version 2<br>
+An open-source tool for finding memory-management problems in
+x86 GNU/Linux executables.
+</center>
+
+<p>
+
+
+
+
+<hr width="100%">
+
+<h2>Introduction</h2>
+
+This document contains a detailed, highly-technical description of the
+internals of Valgrind.  This is not the user manual; if you are an
+end-user of Valgrind, you do not want to read this.  Conversely, if
+you really are a hacker-type and want to know how it works, I assume
+that you have read the user manual thoroughly.
+<p>
+You may need to read this document several times, and carefully.  Some
+important things, I only say once.
+
+
+<h3>History</h3>
+
+Valgrind came into public view in late Feb 2002.  However, it has been
+under contemplation for a very long time, perhaps seriously for about
+five years.  Somewhat over two years ago, I started working on the x86
+code generator for the Glasgow Haskell Compiler
+(http://www.haskell.org/ghc), gaining familiarity with x86 internals
+on the way.  I then did Cacheprof (http://www.cacheprof.org), gaining
+further x86 experience.  Some time around Feb 2000 I started
+experimenting with a user-space x86 interpreter for x86-Linux.  This
+worked, but it was clear that a JIT-based scheme would be necessary to
+give reasonable performance for Valgrind.  Design work for the JITter
+started in earnest in Oct 2000, and by early 2001 I had an x86-to-x86
+dynamic translator which could run quite large programs.  This
+translator was in a sense pointless, since it did not do any
+instrumentation or checking.
+
+<p>
+Most of the rest of 2001 was taken up designing and implementing the
+instrumentation scheme.  The main difficulty, which consumed a lot
+of effort, was to design a scheme which did not generate large numbers
+of false uninitialised-value warnings.  By late 2001 a satisfactory
+scheme had been arrived at, and I started to test it on ever-larger
+programs, with an eventual eye to making it work well enough so that
+it was helpful to folks debugging the upcoming version 3 of KDE.  I've
+used KDE since before version 1.0, and wanted to Valgrind to be an
+indirect contribution to the KDE 3 development effort.  At the start of
+Feb 02 the kde-core-devel crew started using it, and gave a huge
+amount of helpful feedback and patches in the space of three weeks.
+Snapshot 20020306 is the result.
+
+<p>
+In the best Unix tradition, or perhaps in the spirit of Fred Brooks'
+depressing-but-completely-accurate epitaph "build one to throw away;
+you will anyway", much of Valgrind is a second or third rendition of
+the initial idea.  The instrumentation machinery
+(<code>vg_translate.c</code>, <code>vg_memory.c</code>) and core CPU
+simulation (<code>vg_to_ucode.c</code>, <code>vg_from_ucode.c</code>)
+have had three redesigns and rewrites; the register allocator,
+low-level memory manager (<code>vg_malloc2.c</code>) and symbol table
+reader (<code>vg_symtab2.c</code>) are on the second rewrite.  In a
+sense, this document serves to record some of the knowledge gained as
+a result.
+
+
+<h3>Design overview</h3>
+
+Valgrind is compiled into a Linux shared object,
+<code>valgrind.so</code>, and also a dummy one,
+<code>valgrinq.so</code>, of which more later.  The
+<code>valgrind</code> shell script adds <code>valgrind.so</code> to
+the <code>LD_PRELOAD</code> list of extra libraries to be
+loaded with any dynamically linked library.  This is a standard trick,
+one which I assume the <code>LD_PRELOAD</code> mechanism was developed
+to support.
+
+<p>
+<code>valgrind.so</code>
+is linked with the <code>-z initfirst</code> flag, which requests that
+its initialisation code is run before that of any other object in the
+executable image.  When this happens, valgrind gains control.  The
+real CPU becomes "trapped" in <code>valgrind.so</code> and the 
+translations it generates.  The synthetic CPU provided by Valgrind
+does, however, return from this initialisation function.  So the 
+normal startup actions, orchestrated by the dynamic linker
+<code>ld.so</code>, continue as usual, except on the synthetic CPU,
+not the real one.  Eventually <code>main</code> is run and returns,
+and then the finalisation code of the shared objects is run,
+presumably in inverse order to which they were initialised.  Remember,
+this is still all happening on the simulated CPU.  Eventually
+<code>valgrind.so</code>'s own finalisation code is called.  It spots
+this event, shuts down the simulated CPU, prints any error summaries
+and/or does leak detection, and returns from the initialisation code
+on the real CPU.  At this point, in effect the real and synthetic CPUs
+have merged back into one, Valgrind has lost control of the program,
+and the program finally <code>exit()s</code> back to the kernel in the
+usual way.
+
+<p>
+The normal course of activity, one Valgrind has started up, is as
+follows.  Valgrind never runs any part of your program (usually
+referred to as the "client"), not a single byte of it, directly.
+Instead it uses function <code>VG_(translate)</code> to translate
+basic blocks (BBs, straight-line sequences of code) into instrumented
+translations, and those are run instead.  The translations are stored
+in the translation cache (TC), <code>vg_tc</code>, with the
+translation table (TT), <code>vg_tt</code> supplying the
+original-to-translation code address mapping.  Auxiliary array
+<code>VG_(tt_fast)</code> is used as a direct-map cache for fast
+lookups in TT; it usually achieves a hit rate of around 98% and
+facilitates an orig-to-trans lookup in 4 x86 insns, which is not bad.
+
+<p>
+Function <code>VG_(dispatch)</code> in <code>vg_dispatch.S</code> is
+the heart of the JIT dispatcher.  Once a translated code address has
+been found, it is executed simply by an x86 <code>call</code>
+to the translation.  At the end of the translation, the next 
+original code addr is loaded into <code>%eax</code>, and the 
+translation then does a <code>ret</code>, taking it back to the
+dispatch loop, with, interestingly, zero branch mispredictions.  
+The address requested in <code>%eax</code> is looked up first in
+<code>VG_(tt_fast)</code>, and, if not found, by calling C helper
+<code>VG_(search_transtab)</code>.  If there is still no translation 
+available, <code>VG_(dispatch)</code> exits back to the top-level
+C dispatcher <code>VG_(toploop)</code>, which arranges for 
+<code>VG_(translate)</code> to make a new translation.  All fairly
+unsurprising, really.  There are various complexities described below.
+
+<p>
+The translator, orchestrated by <code>VG_(translate)</code>, is
+complicated but entirely self-contained.  It is described in great
+detail in subsequent sections.  Translations are stored in TC, with TT
+tracking administrative information.  The translations are subject to
+an approximate LRU-based management scheme.  With the current
+settings, the TC can hold at most about 15MB of translations, and LRU
+passes prune it to about 13.5MB.  Given that the
+orig-to-translation expansion ratio is about 13:1 to 14:1, this means
+TC holds translations for more or less a megabyte of original code,
+which generally comes to about 70000 basic blocks for C++ compiled
+with optimisation on.  Generating new translations is expensive, so it
+is worth having a large TC to minimise the (capacity) miss rate.
+
+<p>
+The dispatcher, <code>VG_(dispatch)</code>, receives hints from
+the translations which allow it to cheaply spot all control 
+transfers corresponding to x86 <code>call</code> and <code>ret</code>
+instructions.  It has to do this in order to spot some special events:
+<ul>
+<li>Calls to <code>VG_(shutdown)</code>.  This is Valgrind's cue to
+    exit.  NOTE: actually this is done a different way; it should be
+    cleaned up.
+<p>
+<li>Returns of system call handlers, to the return address 
+    <code>VG_(signalreturn_bogusRA)</code>.  The signal simulator
+    needs to know when a signal handler is returning, so we spot
+    jumps (returns) to this address.
+<p>
+<li>Calls to <code>vg_trap_here</code>.  All <code>malloc</code>,
+    <code>free</code>, etc calls that the client program makes are
+    eventually routed to a call to <code>vg_trap_here</code>,
+    and Valgrind does its own special thing with these calls.
+    In effect this provides a trapdoor, by which Valgrind can
+    intercept certain calls on the simulated CPU, run the call as it
+    sees fit itself (on the real CPU), and return the result to
+    the simulated CPU, quite transparently to the client program.
+</ul>
+Valgrind intercepts the client's <code>malloc</code>,
+<code>free</code>, etc,
+calls, so that it can store additional information.  Each block 
+<code>malloc</code>'d by the client gives rise to a shadow block
+in which Valgrind stores the call stack at the time of the
+<code>malloc</code>
+call.  When the client calls <code>free</code>, Valgrind tries to
+find the shadow block corresponding to the address passed to
+<code>free</code>, and emits an error message if none can be found.
+If it is found, the block is placed on the freed blocks queue 
+<code>vg_freed_list</code>, it is marked as inaccessible, and
+its shadow block now records the call stack at the time of the
+<code>free</code> call.  Keeping <code>free</code>'d blocks in
+this queue allows Valgrind to spot all (presumably invalid) accesses
+to them.  However, once the volume of blocks in the free queue 
+exceeds <code>VG_(clo_freelist_vol)</code>, blocks are finally
+removed from the queue.
+
+<p>
+Keeping track of A and V bits (note: if you don't know what these are,
+you haven't read the user guide carefully enough) for memory is done
+in <code>vg_memory.c</code>.  This implements a sparse array structure
+which covers the entire 4G address space in a way which is reasonably
+fast and reasonably space efficient.  The 4G address space is divided
+up into 64K sections, each covering 64Kb of address space.  Given a
+32-bit address, the top 16 bits are used to select one of the 65536
+entries in <code>VG_(primary_map)</code>.  The resulting "secondary"
+(<code>SecMap</code>) holds A and V bits for the 64k of address space
+chunk corresponding to the lower 16 bits of the address.
+
+
+<h3>Design decisions</h3>
+
+Some design decisions were motivated by the need to make Valgrind
+debuggable.  Imagine you are writing a CPU simulator.  It works fairly
+well.  However, you run some large program, like Netscape, and after
+tens of millions of instructions, it crashes.  How can you figure out
+where in your simulator the bug is?
+
+<p>
+Valgrind's answer is: cheat.  Valgrind is designed so that it is
+possible to switch back to running the client program on the real
+CPU at any point.  Using the <code>--stop-after= </code> flag, you can 
+ask Valgrind to run just some number of basic blocks, and then 
+run the rest of the way on the real CPU.  If you are searching for
+a bug in the simulated CPU, you can use this to do a binary search,
+which quickly leads you to the specific basic block which is
+causing the problem.  
+
+<p>
+This is all very handy.  It does constrain the design in certain
+unimportant ways.  Firstly, the layout of memory, when viewed from the
+client's point of view, must be identical regardless of whether it is
+running on the real or simulated CPU.  This means that Valgrind can't
+do pointer swizzling -- well, no great loss -- and it can't run on 
+the same stack as the client -- again, no great loss.  
+Valgrind operates on its own stack, <code>VG_(stack)</code>, which
+it switches to at startup, temporarily switching back to the client's
+stack when doing system calls for the client.
+
+<p>
+Valgrind also receives signals on its own stack,
+<code>VG_(sigstack)</code>, but for different gruesome reasons
+discussed below.
+
+<p>
+This nice clean switch-back-to-the-real-CPU-whenever-you-like story
+is muddied by signals.  Problem is that signals arrive at arbitrary
+times and tend to slightly perturb the basic block count, with the
+result that you can get close to the basic block causing a problem but
+can't home in on it exactly.  My kludgey hack is to define
+<code>SIGNAL_SIMULATION</code> to 1 towards the bottom of 
+<code>vg_syscall_mem.c</code>, so that signal handlers are run on the
+real CPU and don't change the BB counts.
+
+<p>
+A second hole in the switch-back-to-real-CPU story is that Valgrind's
+way of delivering signals to the client is different from that of the
+kernel.  Specifically, the layout of the signal delivery frame, and
+the mechanism used to detect a sighandler returning, are different.
+So you can't expect to make the transition inside a sighandler and
+still have things working, but in practice that's not much of a
+restriction.
+
+<p>
+Valgrind's implementation of <code>malloc</code>, <code>free</code>,
+etc, (in <code>vg_clientmalloc.c</code>, not the low-level stuff in
+<code>vg_malloc2.c</code>) is somewhat complicated by the need to 
+handle switching back at arbitrary points.  It does work tho.
+
+
+
+<h3>Correctness</h3>
+
+There's only one of me, and I have a Real Life (tm) as well as hacking
+Valgrind [allegedly :-].  That means I don't have time to waste
+chasing endless bugs in Valgrind.  My emphasis is therefore on doing
+everything as simply as possible, with correctness, stability and
+robustness being the number one priority, more important than
+performance or functionality.  As a result:
+<ul>
+<li>The code is absolutely loaded with assertions, and these are
+    <b>permanently enabled.</b>  I have no plan to remove or disable
+    them later.  Over the past couple of months, as valgrind has
+    become more widely used, they have shown their worth, pulling
+    up various bugs which would otherwise have appeared as
+    hard-to-find segmentation faults.
+    <p>
+    I am of the view that it's acceptable to spend 5% of the total
+    running time of your valgrindified program doing assertion checks
+    and other internal sanity checks.
+<p>
+<li>Aside from the assertions, valgrind contains various sets of
+    internal sanity checks, which get run at varying frequencies
+    during normal operation.  <code>VG_(do_sanity_checks)</code>
+    runs every 1000 basic blocks, which means 500 to 2000 times/second 
+    for typical machines at present.  It checks that Valgrind hasn't
+    overrun its private stack, and does some simple checks on the
+    memory permissions maps.  Once every 25 calls it does some more
+    extensive checks on those maps.  Etc, etc.
+    <p>
+    The following components also have sanity check code, which can
+    be enabled to aid debugging:
+    <ul>
+    <li>The low-level memory-manager
+        (<code>VG_(mallocSanityCheckArena)</code>).  This does a 
+        complete check of all blocks and chains in an arena, which
+        is very slow.  Is not engaged by default.
+    <p>
+    <li>The symbol table reader(s): various checks to ensure
+        uniqueness of mappings; see <code>VG_(read_symbols)</code>
+        for a start.  Is permanently engaged.
+    <p>
+    <li>The A and V bit tracking stuff in <code>vg_memory.c</code>.
+        This can be compiled with cpp symbol
+        <code>VG_DEBUG_MEMORY</code> defined, which removes all the
+        fast, optimised cases, and uses simple-but-slow fallbacks
+        instead.  Not engaged by default.
+    <p>
+    <li>Ditto <code>VG_DEBUG_LEAKCHECK</code>.
+    <p>
+    <li>The JITter parses x86 basic blocks into sequences of 
+        UCode instructions.  It then sanity checks each one with
+        <code>VG_(saneUInstr)</code> and sanity checks the sequence
+        as a whole with <code>VG_(saneUCodeBlock)</code>.  This stuff
+        is engaged by default, and has caught some way-obscure bugs
+        in the simulated CPU machinery in its time.
+    <p>
+    <li>The system call wrapper does
+        <code>VG_(first_and_last_secondaries_look_plausible)</code> after
+        every syscall; this is known to pick up bugs in the syscall
+        wrappers.  Engaged by default.
+    <p>
+    <li>The main dispatch loop, in <code>VG_(dispatch)</code>, checks
+        that translations do not set <code>%ebp</code> to any value
+        different from <code>VG_EBP_DISPATCH_CHECKED</code> or
+        <code>& VG_(baseBlock)</code>.  In effect this test is free,
+        and is permanently engaged.
+    <p>
+    <li>There are a couple of ifdefed-out consistency checks I
+        inserted whilst debugging the new register allocater, 
+        <code>vg_do_register_allocation</code>.
+    </ul>
+<p>
+<li>I try to avoid techniques, algorithms, mechanisms, etc, for which
+    I can supply neither a convincing argument that they are correct,
+    nor sanity-check code which might pick up bugs in my
+    implementation.  I don't always succeed in this, but I try.
+    Basically the idea is: avoid techniques which are, in practice,
+    unverifiable, in some sense.   When doing anything, always have in
+    mind: "how can I verify that this is correct?"
+</ul>
+
+<p>
+Some more specific things are:
+
+<ul>
+<li>Valgrind runs in the same namespace as the client, at least from
+    <code>ld.so</code>'s point of view, and it therefore absolutely
+    had better not export any symbol with a name which could clash
+    with that of the client or any of its libraries.  Therefore, all
+    globally visible symbols exported from <code>valgrind.so</code>
+    are defined using the <code>VG_</code> CPP macro.  As you'll see
+    from <code>vg_constants.h</code>, this appends some arbitrary
+    prefix to the symbol, in order that it be, we hope, globally
+    unique.  Currently the prefix is <code>vgPlain_</code>.  For
+    convenience there are also <code>VGM_</code>, <code>VGP_</code>
+    and <code>VGOFF_</code>.  All locally defined symbols are declared
+    <code>static</code> and do not appear in the final shared object.
+    <p>
+    To check this, I periodically do 
+    <code>nm valgrind.so | grep " T "</code>, 
+    which shows you all the globally exported text symbols.
+    They should all have an approved prefix, except for those like
+    <code>malloc</code>, <code>free</code>, etc, which we deliberately
+    want to shadow and take precedence over the same names exported
+    from <code>glibc.so</code>, so that valgrind can intercept those
+    calls easily.  Similarly, <code>nm valgrind.so | grep " D "</code>
+    allows you to find any rogue data-segment symbol names.
+<p>
+<li>Valgrind tries, and almost succeeds, in being completely
+    independent of all other shared objects, in particular of
+    <code>glibc.so</code>.  For example, we have our own low-level
+    memory manager in <code>vg_malloc2.c</code>, which is a fairly
+    standard malloc/free scheme augmented with arenas, and
+    <code>vg_mylibc.c</code> exports reimplementations of various bits
+    and pieces you'd normally get from the C library.
+    <p>
+    Why all the hassle?  Because imagine the potential chaos of both
+    the simulated and real CPUs executing in <code>glibc.so</code>.
+    It just seems simpler and cleaner to be completely self-contained,
+    so that only the simulated CPU visits <code>glibc.so</code>.  In
+    practice it's not much hassle anyway.  Also, valgrind starts up
+    before glibc has a chance to initialise itself, and who knows what
+    difficulties that could lead to.  Finally, glibc has definitions
+    for some types, specifically <code>sigset_t</code>, which conflict
+    (are different from) the Linux kernel's idea of same.  When 
+    Valgrind wants to fiddle around with signal stuff, it wants to
+    use the kernel's definitions, not glibc's definitions.  So it's 
+    simplest just to keep glibc out of the picture entirely.
+    <p>
+    To find out which glibc symbols are used by Valgrind, reinstate
+    the link flags <code>-nostdlib -Wl,-no-undefined</code>.  This
+    causes linking to fail, but will tell you what you depend on.
+    I have mostly, but not entirely, got rid of the glibc
+    dependencies; what remains is, IMO, fairly harmless.  AFAIK the
+    current dependencies are: <code>memset</code>,
+    <code>memcmp</code>, <code>stat</code>, <code>system</code>,
+    <code>sbrk</code>, <code>setjmp</code> and <code>longjmp</code>.
+
+<p>
+<li>Similarly, valgrind should not really import any headers other
+    than the Linux kernel headers, since it knows of no API other than
+    the kernel interface to talk to.  At the moment this is really not
+    in a good state, and <code>vg_syscall_mem</code> imports, via
+    <code>vg_unsafe.h</code>, a significant number of C-library
+    headers so as to know the sizes of various structs passed across
+    the kernel boundary.  This is of course completely bogus, since
+    there is no guarantee that the C library's definitions of these
+    structs matches those of the kernel.  I have started to sort this
+    out using <code>vg_kerneliface.h</code>, into which I had intended
+    to copy all kernel definitions which valgrind could need, but this
+    has not gotten very far.  At the moment it mostly contains
+    definitions for <code>sigset_t</code> and <code>struct
+    sigaction</code>, since the kernel's definition for these really
+    does clash with glibc's.  I plan to use a <code>vki_</code> prefix
+    on all these types and constants, to denote the fact that they
+    pertain to <b>V</b>algrind's <b>K</b>ernel <b>I</b>nterface.
+    <p>
+    Another advantage of having a <code>vg_kerneliface.h</code> file
+    is that it makes it simpler to interface to a different kernel.
+    Once can, for example, easily imagine writing a new
+    <code>vg_kerneliface.h</code> for FreeBSD, or x86 NetBSD.
+
+</ul>
+
+<h3>Current limitations</h3>
+
+No threads.  I think fixing this is close to a research-grade problem.
+<p>
+No MMX.  Fixing this should be relatively easy, using the same giant
+trick used for x86 FPU instructions.  See below.
+<p>
+Support for weird (non-POSIX) signal stuff is patchy.  Does anybody
+care?
+<p>
+
+
+
+
+<hr width="100%">
+
+<h2>The instrumenting JITter</h2>
+
+This really is the heart of the matter.  We begin with various side
+issues.
+
+<h3>Run-time storage, and the use of host registers</h3>
+
+Valgrind translates client (original) basic blocks into instrumented
+basic blocks, which live in the translation cache TC, until either the
+client finishes or the translations are ejected from TC to make room
+for newer ones.
+<p>
+Since it generates x86 code in memory, Valgrind has complete control
+of the use of registers in the translations.  Now pay attention.  I
+shall say this only once, and it is important you understand this.  In
+what follows I will refer to registers in the host (real) cpu using
+their standard names, <code>%eax</code>, <code>%edi</code>, etc.  I
+refer to registers in the simulated CPU by capitalising them:
+<code>%EAX</code>, <code>%EDI</code>, etc.  These two sets of
+registers usually bear no direct relationship to each other; there is
+no fixed mapping between them.  This naming scheme is used fairly
+consistently in the comments in the sources.
+<p>
+Host registers, once things are up and running, are used as follows:
+<ul>
+<li><code>%esp</code>, the real stack pointer, points
+    somewhere in Valgrind's private stack area,
+    <code>VG_(stack)</code> or, transiently, into its signal delivery
+    stack, <code>VG_(sigstack)</code>.
+<p>
+<li><code>%edi</code> is used as a temporary in code generation; it
+    is almost always dead, except when used for the <code>Left</code>
+    value-tag operations.
+<p>
+<li><code>%eax</code>, <code>%ebx</code>, <code>%ecx</code>,
+    <code>%edx</code> and <code>%esi</code> are available to
+    Valgrind's register allocator.  They are dead (carry unimportant
+    values) in between translations, and are live only in
+    translations.  The one exception to this is <code>%eax</code>,
+    which, as mentioned far above, has a special significance to the
+    dispatch loop <code>VG_(dispatch)</code>: when a translation
+    returns to the dispatch loop, <code>%eax</code> is expected to
+    contain the original-code-address of the next translation to run.
+    The register allocator is so good at minimising spill code that
+    using five regs and not having to save/restore <code>%edi</code>
+    actually gives better code than allocating to <code>%edi</code>
+    as well, but then having to push/pop it around special uses.
+<p>
+<li><code>%ebp</code> points permanently at
+    <code>VG_(baseBlock)</code>.  Valgrind's translations are
+    position-independent, partly because this is convenient, but also
+    because translations get moved around in TC as part of the LRUing
+    activity.  <b>All</b> static entities which need to be referred to
+    from generated code, whether data or helper functions, are stored
+    starting at <code>VG_(baseBlock)</code> and are therefore reached
+    by indexing from <code>%ebp</code>.  There is but one exception, 
+    which is that by placing the value
+    <code>VG_EBP_DISPATCH_CHECKED</code>
+    in <code>%ebp</code> just before a return to the dispatcher, 
+    the dispatcher is informed that the next address to run, 
+    in <code>%eax</code>, requires special treatment.
+<p>
+<li>The real machine's FPU state is pretty much unimportant, for
+    reasons which will become obvious.  Ditto its <code>%eflags</code>
+    register.
+</ul>
+
+<p>
+The state of the simulated CPU is stored in memory, in
+<code>VG_(baseBlock)</code>, which is a block of 200 words IIRC.
+Recall that <code>%ebp</code> points permanently at the start of this
+block.  Function <code>vg_init_baseBlock</code> decides what the
+offsets of various entities in <code>VG_(baseBlock)</code> are to be,
+and allocates word offsets for them.  The code generator then emits
+<code>%ebp</code> relative addresses to get at those things.  The
+sequence in which entities are allocated has been carefully chosen so
+that the 32 most popular entities come first, because this means 8-bit
+offsets can be used in the generated code.
+
+<p>
+If I was clever, I could make <code>%ebp</code> point 32 words along 
+<code>VG_(baseBlock)</code>, so that I'd have another 32 words of
+short-form offsets available, but that's just complicated, and it's
+not important -- the first 32 words take 99% (or whatever) of the
+traffic.
+
+<p>
+Currently, the sequence of stuff in <code>VG_(baseBlock)</code> is as
+follows:
+<ul>
+<li>9 words, holding the simulated integer registers,
+    <code>%EAX</code> .. <code>%EDI</code>, and the simulated flags,
+    <code>%EFLAGS</code>.
+<p>
+<li>Another 9 words, holding the V bit "shadows" for the above 9 regs.
+<p>
+<li>The <b>addresses</b> of various helper routines called from
+    generated code: 
+    <code>VG_(helper_value_check4_fail)</code>,
+    <code>VG_(helper_value_check0_fail)</code>,
+    which register V-check failures,
+    <code>VG_(helperc_STOREV4)</code>,
+    <code>VG_(helperc_STOREV1)</code>,
+    <code>VG_(helperc_LOADV4)</code>,
+    <code>VG_(helperc_LOADV1)</code>,
+    which do stores and loads of V bits to/from the 
+    sparse array which keeps track of V bits in memory,
+    and
+    <code>VGM_(handle_esp_assignment)</code>, which messes with
+    memory addressibility resulting from changes in <code>%ESP</code>.
+<p>
+<li>The simulated <code>%EIP</code>.
+<p>
+<li>24 spill words, for when the register allocator can't make it work
+    with 5 measly registers.
+<p>
+<li>Addresses of helpers <code>VG_(helperc_STOREV2)</code>,
+    <code>VG_(helperc_LOADV2)</code>.  These are here because 2-byte
+    loads and stores are relatively rare, so are placed above the
+    magic 32-word offset boundary.
+<p>
+<li>For similar reasons, addresses of helper functions 
+    <code>VGM_(fpu_write_check)</code> and
+    <code>VGM_(fpu_read_check)</code>, which handle the A/V maps
+    testing and changes required by FPU writes/reads.  
+<p>
+<li>Some other boring helper addresses:
+    <code>VG_(helper_value_check2_fail)</code> and
+    <code>VG_(helper_value_check1_fail)</code>.  These are probably
+    never emitted now, and should be removed.
+<p>
+<li>The entire state of the simulated FPU, which I believe to be
+    108 bytes long.
+<p>
+<li>Finally, the addresses of various other helper functions in
+    <code>vg_helpers.S</code>, which deal with rare situations which
+    are tedious or difficult to generate code in-line for.
+</ul>
+
+<p>
+As a general rule, the simulated machine's state lives permanently in
+memory at <code>VG_(baseBlock)</code>.  However, the JITter does some
+optimisations which allow the simulated integer registers to be
+cached in real registers over multiple simulated instructions within
+the same basic block.  These are always flushed back into memory at
+the end of every basic block, so that the in-memory state is
+up-to-date between basic blocks.  (This flushing is implied by the
+statement above that the real machine's allocatable registers are
+dead in between simulated blocks).
+
+
+<h3>Startup, shutdown, and system calls</h3>
+
+Getting into of Valgrind (<code>VG_(startup)</code>, called from
+<code>valgrind.so</code>'s initialisation section), really means
+copying the real CPU's state into <code>VG_(baseBlock)</code>, and
+then installing our own stack pointer, etc, into the real CPU, and
+then starting up the JITter.  Exiting valgrind involves copying the
+simulated state back to the real state.
+
+<p>
+Unfortunately, there's a complication at startup time.  Problem is
+that at the point where we need to take a snapshot of the real CPU's
+state, the offsets in <code>VG_(baseBlock)</code> are not set up yet,
+because to do so would involve disrupting the real machine's state
+significantly.  The way round this is to dump the real machine's state
+into a temporary, static block of memory,
+<code>VG_(m_state_static)</code>.  We can then set up the
+<code>VG_(baseBlock)</code> offsets at our leisure, and copy into it
+from <code>VG_(m_state_static)</code> at some convenient later time.
+This copying is done by
+<code>VG_(copy_m_state_static_to_baseBlock)</code>.
+
+<p>
+On exit, the inverse transformation is (rather unnecessarily) used:
+stuff in <code>VG_(baseBlock)</code> is copied to
+<code>VG_(m_state_static)</code>, and the assembly stub then copies
+from <code>VG_(m_state_static)</code> into the real machine registers.
+
+<p>
+Doing system calls on behalf of the client (<code>vg_syscall.S</code>)
+is something of a half-way house.  We have to make the world look
+sufficiently like that which the client would normally have to make
+the syscall actually work properly, but we can't afford to lose
+control.  So the trick is to copy all of the client's state, <b>except
+its program counter</b>, into the real CPU, do the system call, and
+copy the state back out.  Note that the client's state includes its
+stack pointer register, so one effect of this partial restoration is
+to cause the system call to be run on the client's stack, as it should
+be.
+
+<p>
+As ever there are complications.  We have to save some of our own state
+somewhere when restoring the client's state into the CPU, so that we
+can keep going sensibly afterwards.  In fact the only thing which is
+important is our own stack pointer, but for paranoia reasons I save 
+and restore our own FPU state as well, even though that's probably
+pointless.
+
+<p>
+The complication on the above complication is, that for horrible
+reasons to do with signals, we may have to handle a second client
+system call whilst the client is blocked inside some other system 
+call (unbelievable!).  That means there's two sets of places to 
+dump Valgrind's stack pointer and FPU state across the syscall,
+and we decide which to use by consulting
+<code>VG_(syscall_depth)</code>, which is in turn maintained by
+<code>VG_(wrap_syscall)</code>.
+
+
+
+<h3>Introduction to UCode</h3>
+
+UCode lies at the heart of the x86-to-x86 JITter.  The basic premise
+is that dealing the the x86 instruction set head-on is just too darn
+complicated, so we do the traditional compiler-writer's trick and
+translate it into a simpler, easier-to-deal-with form.
+
+<p>
+In normal operation, translation proceeds through six stages,
+coordinated by <code>VG_(translate)</code>:
+<ol>
+<li>Parsing of an x86 basic block into a sequence of UCode
+    instructions (<code>VG_(disBB)</code>).
+<p>
+<li>UCode optimisation (<code>vg_improve</code>), with the aim of
+    caching simulated registers in real registers over multiple
+    simulated instructions, and removing redundant simulated
+    <code>%EFLAGS</code> saving/restoring.
+<p>
+<li>UCode instrumentation (<code>vg_instrument</code>), which adds
+    value and address checking code.
+<p>
+<li>Post-instrumentation cleanup (<code>vg_cleanup</code>), removing
+    redundant value-check computations.
+<p>
+<li>Register allocation (<code>vg_do_register_allocation</code>),
+    which, note, is done on UCode.
+<p>
+<li>Emission of final instrumented x86 code
+    (<code>VG_(emit_code)</code>).
+</ol>
+
+<p>
+Notice how steps 2, 3, 4 and 5 are simple UCode-to-UCode
+transformation passes, all on straight-line blocks of UCode (type
+<code>UCodeBlock</code>).  Steps 2 and 4 are optimisation passes and
+can be disabled for debugging purposes, with
+<code>--optimise=no</code> and <code>--cleanup=no</code> respectively.
+
+<p>
+Valgrind can also run in a no-instrumentation mode, given
+<code>--instrument=no</code>.  This is useful for debugging the JITter
+quickly without having to deal with the complexity of the
+instrumentation mechanism too.  In this mode, steps 3 and 4 are
+omitted.
+
+<p>
+These flags combine, so that <code>--instrument=no</code> together with 
+<code>--optimise=no</code> means only steps 1, 5 and 6 are used.
+<code>--single-step=yes</code> causes each x86 instruction to be
+treated as a single basic block.  The translations are terrible but
+this is sometimes instructive.  
+
+<p>
+The <code>--stop-after=N</code> flag switches back to the real CPU
+after <code>N</code> basic blocks.  It also re-JITs the final basic
+block executed and prints the debugging info resulting, so this
+gives you a way to get a quick snapshot of how a basic block looks as
+it passes through the six stages mentioned above.  If you want to 
+see full information for every block translated (probably not, but
+still ...) find, in <code>VG_(translate)</code>, the lines
+<br><code>   dis = True;</code>
+<br><code>   dis = debugging_translation;</code>
+<br>
+and comment out the second line.  This will spew out debugging
+junk faster than you can possibly imagine.
+
+
+
+<h3>UCode operand tags: type <code>Tag</code></h3>
+
+UCode is, more or less, a simple two-address RISC-like code.  In
+keeping with the x86 AT&T assembly syntax, generally speaking the
+first operand is the source operand, and the second is the destination
+operand, which is modified when the uinstr is notionally executed.
+
+<p>
+UCode instructions have up to three operand fields, each of which has
+a corresponding <code>Tag</code> describing it.  Possible values for
+the tag are:
+
+<ul>
+<li><code>NoValue</code>: indicates that the field is not in use.
+<p>
+<li><code>Lit16</code>: the field contains a 16-bit literal.
+<p>
+<li><code>Literal</code>: the field denotes a 32-bit literal, whose
+    value is stored in the <code>lit32</code> field of the uinstr
+    itself.  Since there is only one <code>lit32</code> for the whole
+    uinstr, only one operand field may contain this tag.
+<p>
+<li><code>SpillNo</code>: the field contains a spill slot number, in
+    the range 0 to 23 inclusive, denoting one of the spill slots
+    contained inside <code>VG_(baseBlock)</code>.  Such tags only
+    exist after register allocation.
+<p>
+<li><code>RealReg</code>: the field contains a number in the range 0
+    to 7 denoting an integer x86 ("real") register on the host.  The
+    number is the Intel encoding for integer registers.  Such tags
+    only exist after register allocation.
+<p>
+<li><code>ArchReg</code>: the field contains a number in the range 0
+    to 7 denoting an integer x86 register on the simulated CPU.  In
+    reality this means a reference to one of the first 8 words of
+    <code>VG_(baseBlock)</code>.  Such tags can exist at any point in
+    the translation process.
+<p>
+<li>Last, but not least, <code>TempReg</code>.  The field contains the
+    number of one of an infinite set of virtual (integer)
+    registers. <code>TempReg</code>s are used everywhere throughout
+    the translation process; you can have as many as you want.  The
+    register allocator maps as many as it can into
+    <code>RealReg</code>s and turns the rest into
+    <code>SpillNo</code>s, so <code>TempReg</code>s should not exist
+    after the register allocation phase.
+    <p>
+    <code>TempReg</code>s are always 32 bits long, even if the data
+    they hold is logically shorter.  In that case the upper unused
+    bits are required, and, I think, generally assumed, to be zero.  
+    <code>TempReg</code>s holding V bits for quantities shorter than 
+    32 bits are expected to have ones in the unused places, since a
+    one denotes "undefined".
+</ul>
+
+
+<h3>UCode instructions: type <code>UInstr</code></h3>
+
+<p>
+UCode was carefully designed to make it possible to do register
+allocation on UCode and then translate the result into x86 code
+without needing any extra registers ... well, that was the original
+plan, anyway.  Things have gotten a little more complicated since
+then.  In what follows, UCode instructions are referred to as uinstrs,
+to distinguish them from x86 instructions.  Uinstrs of course have
+uopcodes which are (naturally) different from x86 opcodes.
+
+<p>
+A uinstr (type <code>UInstr</code>) contains
+various fields, not all of which are used by any one uopcode:
+<ul>
+<li>Three 16-bit operand fields, <code>val1</code>, <code>val2</code>
+    and <code>val3</code>.
+<p>
+<li>Three tag fields, <code>tag1</code>, <code>tag2</code>
+    and <code>tag3</code>.  Each of these has a value of type
+    <code>Tag</code>,
+    and they describe what the <code>val1</code>, <code>val2</code>
+    and <code>val3</code> fields contain.
+<p>
+<li>A 32-bit literal field.
+<p>
+<li>Two <code>FlagSet</code>s, specifying which x86 condition codes are
+    read and written by the uinstr.
+<p>
+<li>An opcode byte, containing a value of type <code>Opcode</code>.
+<p>
+<li>A size field, indicating the data transfer size (1/2/4/8/10) in
+    cases where this makes sense, or zero otherwise.
+<p>
+<li>A condition-code field, which, for jumps, holds a
+    value of type <code>Condcode</code>, indicating the condition
+    which applies.  The encoding is as it is in the x86 insn stream,
+    except we add a 17th value <code>CondAlways</code> to indicate
+    an unconditional transfer.
+<p>
+<li>Various 1-bit flags, indicating whether this insn pertains to an
+    x86 CALL or RET instruction, whether a widening is signed or not,
+    etc.
+</ul>
+
+<p>
+UOpcodes (type <code>Opcode</code>) are divided into two groups: those
+necessary merely to express the functionality of the x86 code, and
+extra uopcodes needed to express the instrumentation.  The former
+group contains:
+<ul>
+<li><code>GET</code> and <code>PUT</code>, which move values from the
+    simulated CPU's integer registers (<code>ArchReg</code>s) into
+    <code>TempReg</code>s, and back.  <code>GETF</code> and
+    <code>PUTF</code> do the corresponding thing for the simulated
+    <code>%EFLAGS</code>.  There are no corresponding insns for the
+    FPU register stack, since we don't explicitly simulate its
+    registers.
+<p>
+<li><code>LOAD</code> and <code>STORE</code>, which, in RISC-like
+    fashion, are the only uinstrs able to interact with memory.
+<p>
+<li><code>MOV</code> and <code>CMOV</code> allow unconditional and
+    conditional moves of values between <code>TempReg</code>s.
+<p>
+<li>ALU operations.  Again in RISC-like fashion, these only operate on
+    <code>TempReg</code>s (before reg-alloc) or <code>RealReg</code>s
+    (after reg-alloc).  These are: <code>ADD</code>, <code>ADC</code>,
+    <code>AND</code>, <code>OR</code>, <code>XOR</code>,
+    <code>SUB</code>, <code>SBB</code>, <code>SHL</code>,
+    <code>SHR</code>, <code>SAR</code>, <code>ROL</code>,
+    <code>ROR</code>, <code>RCL</code>, <code>RCR</code>,
+    <code>NOT</code>, <code>NEG</code>, <code>INC</code>,
+    <code>DEC</code>, <code>BSWAP</code>, <code>CC2VAL</code> and
+    <code>WIDEN</code>.  <code>WIDEN</code> does signed or unsigned
+    value widening.  <code>CC2VAL</code> is used to convert condition
+    codes into a value, zero or one.  The rest are obvious.
+    <p>
+    To allow for more efficient code generation, we bend slightly the
+    restriction at the start of the previous para: for
+    <code>ADD</code>, <code>ADC</code>, <code>XOR</code>,
+    <code>SUB</code> and <code>SBB</code>, we allow the first (source)
+    operand to also be an <code>ArchReg</code>, that is, one of the
+    simulated machine's registers.  Also, many of these ALU ops allow
+    the source operand to be a literal.  See
+    <code>VG_(saneUInstr)</code> for the final word on the allowable
+    forms of uinstrs.
+<p>
+<li><code>LEA1</code> and <code>LEA2</code> are not strictly
+    necessary, but allow faciliate better translations.  They
+    record the fancy x86 addressing modes in a direct way, which
+    allows those amodes to be emitted back into the final
+    instruction stream more or less verbatim.
+<p>
+<li><code>CALLM</code> calls a machine-code helper, one of the methods
+    whose address is stored at some <code>VG_(baseBlock)</code>
+    offset.  <code>PUSH</code> and <code>POP</code> move values
+    to/from <code>TempReg</code> to the real (Valgrind's) stack, and
+    <code>CLEAR</code> removes values from the stack.
+    <code>CALLM_S</code> and <code>CALLM_E</code> delimit the
+    boundaries of call setups and clearings, for the benefit of the
+    instrumentation passes.  Getting this right is critical, and so
+    <code>VG_(saneUCodeBlock)</code> makes various checks on the use
+    of these uopcodes.
+    <p>
+    It is important to understand that these uopcodes have nothing to
+    do with the x86 <code>call</code>, <code>return,</code>
+    <code>push</code> or <code>pop</code> instructions, and are not
+    used to implement them.  Those guys turn into combinations of
+    <code>GET</code>, <code>PUT</code>, <code>LOAD</code>,
+    <code>STORE</code>, <code>ADD</code>, <code>SUB</code>, and
+    <code>JMP</code>.  What these uopcodes support is calling of
+    helper functions such as <code>VG_(helper_imul_32_64)</code>,
+    which do stuff which is too difficult or tedious to emit inline.
+<p>
+<li><code>FPU</code>, <code>FPU_R</code> and <code>FPU_W</code>.
+    Valgrind doesn't attempt to simulate the internal state of the
+    FPU at all.  Consequently it only needs to be able to distinguish
+    FPU ops which read and write memory from those that don't, and
+    for those which do, it needs to know the effective address and
+    data transfer size.  This is made easier because the x86 FP
+    instruction encoding is very regular, basically consisting of
+    16 bits for a non-memory FPU insn and 11 (IIRC) bits + an address mode
+    for a memory FPU insn.  So our <code>FPU</code> uinstr carries
+    the 16 bits in its <code>val1</code> field.  And
+    <code>FPU_R</code> and <code>FPU_W</code> carry 11 bits in that
+    field, together with the identity of a <code>TempReg</code> or
+    (later) <code>RealReg</code> which contains the address.
+<p>
+<li><code>JIFZ</code> is unique, in that it allows a control-flow
+    transfer which is not deemed to end a basic block.  It causes a
+    jump to a literal (original) address if the specified argument
+    is zero.
+<p>
+<li>Finally, <code>INCEIP</code> advances the simulated
+    <code>%EIP</code> by the specified literal amount.  This supports
+    lazy <code>%EIP</code> updating, as described below.
+</ul>
+
+<p>
+Stages 1 and 2 of the 6-stage translation process mentioned above
+deal purely with these uopcodes, and no others.  They are
+sufficient to express pretty much all the x86 32-bit protected-mode 
+instruction set, at
+least everything understood by a pre-MMX original Pentium (P54C). 
+
+<p>
+Stages 3, 4, 5 and 6 also deal with the following extra
+"instrumentation" uopcodes.  They are used to express all the
+definedness-tracking and -checking machinery which valgrind does.  In
+later sections we show how to create checking code for each of the
+uopcodes above.  Note that these instrumentation uopcodes, although
+some appearing complicated, have been carefully chosen so that
+efficient x86 code can be generated for them.  GNU superopt v2.5 did a
+great job helping out here.  Anyways, the uopcodes are as follows:
+
+<ul>
+<li><code>GETV</code> and <code>PUTV</code> are analogues to
+    <code>GET</code> and <code>PUT</code> above.  They are identical
+    except that they move the V bits for the specified values back and
+    forth to <code>TempRegs</code>, rather than moving the values
+    themselves.
+<p>
+<li>Similarly, <code>LOADV</code> and <code>STOREV</code> read and
+    write V bits from the synthesised shadow memory that Valgrind
+    maintains.  In fact they do more than that, since they also do
+    address-validity checks, and emit complaints if the read/written
+    addresses are unaddressible.
+<p>
+<li><code>TESTV</code>, whose parameters are a <code>TempReg</code>
+    and a size, tests the V bits in the <code>TempReg</code>, at the
+    specified operation size (0/1/2/4 byte) and emits an error if any
+    of them indicate undefinedness.  This is the only uopcode capable
+    of doing such tests.
+<p>
+<li><code>SETV</code>, whose parameters are also <code>TempReg</code>
+    and a size, makes the V bits in the <code>TempReg</code> indicated
+    definedness, at the specified operation size.  This is usually
+    used to generate the correct V bits for a literal value, which is
+    of course fully defined.
+<p>
+<li><code>GETVF</code> and <code>PUTVF</code> are analogues to
+    <code>GETF</code> and <code>PUTF</code>.  They move the single V
+    bit used to model definedness of <code>%EFLAGS</code> between its
+    home in <code>VG_(baseBlock)</code> and the specified
+    <code>TempReg</code>.
+<p>
+<li><code>TAG1</code> denotes one of a family of unary operations on
+    <code>TempReg</code>s containing V bits.  Similarly,
+    <code>TAG2</code> denotes one in a family of binary operations on
+    V bits.
+</ul>
+
+<p>
+These 10 uopcodes are sufficient to express Valgrind's entire
+definedness-checking semantics.  In fact most of the interesting magic
+is done by the <code>TAG1</code> and <code>TAG2</code>
+suboperations.
+
+<p>
+First, however, I need to explain about V-vector operation sizes.
+There are 4 sizes: 1, 2 and 4, which operate on groups of 8, 16 and 32
+V bits at a time, supporting the usual 1, 2 and 4 byte x86 operations.
+However there is also the mysterious size 0, which really means a
+single V bit.  Single V bits are used in various circumstances; in
+particular, the definedness of <code>%EFLAGS</code> is modelled with a
+single V bit.  Now might be a good time to also point out that for
+V bits, 1 means "undefined" and 0 means "defined".  Similarly, for A
+bits, 1 means "invalid address" and 0 means "valid address".  This
+seems counterintuitive (and so it is), but testing against zero on
+x86s saves instructions compared to testing against all 1s, because
+many ALU operations set the Z flag for free, so to speak.
+
+<p>
+With that in mind, the tag ops are:
+
+<ul>
+<li><b>(UNARY) Pessimising casts</b>: <code>VgT_PCast40</code>,
+    <code>VgT_PCast20</code>, <code>VgT_PCast10</code>,
+    <code>VgT_PCast01</code>, <code>VgT_PCast02</code> and
+    <code>VgT_PCast04</code>.  A "pessimising cast" takes a V-bit
+    vector at one size, and creates a new one at another size,
+    pessimised in the sense that if any of the bits in the source
+    vector indicate undefinedness, then all the bits in the result
+    indicate undefinedness.  In this case the casts are all to or from
+    a single V bit, so for example <code>VgT_PCast40</code> is a
+    pessimising cast from 32 bits to 1, whereas
+    <code>VgT_PCast04</code> simply copies the single source V bit
+    into all 32 bit positions in the result.  Surprisingly, these ops
+    can all be implemented very efficiently.
+    <p>
+    There are also the pessimising casts <code>VgT_PCast14</code>,
+    from 8 bits to 32, <code>VgT_PCast12</code>, from 8 bits to 16,
+    and <code>VgT_PCast11</code>, from 8 bits to 8.  This last one
+    seems nonsensical, but in fact it isn't a no-op because, as
+    mentioned above, any undefined (1) bits in the source infect the
+    entire result.
+<p>
+<li><b>(UNARY) Propagating undefinedness upwards in a word</b>:
+    <code>VgT_Left4</code>, <code>VgT_Left2</code> and
+    <code>VgT_Left1</code>.  These are used to simulate the worst-case
+    effects of carry propagation in adds and subtracts.  They return a
+    V vector identical to the original, except that if the original
+    contained any undefined bits, then it and all bits above it are
+    marked as undefined too.  Hence the Left bit in the names.
+<p>
+<li><b>(UNARY) Signed and unsigned value widening</b>:
+     <code>VgT_SWiden14</code>, <code>VgT_SWiden24</code>,
+     <code>VgT_SWiden12</code>, <code>VgT_ZWiden14</code>,
+     <code>VgT_ZWiden24</code> and <code>VgT_ZWiden12</code>.  These
+     mimic the definedness effects of standard signed and unsigned
+     integer widening.  Unsigned widening creates zero bits in the new
+     positions, so <code>VgT_ZWiden*</code> accordingly park mark
+     those parts of their argument as defined.  Signed widening copies
+     the sign bit into the new positions, so <code>VgT_SWiden*</code>
+     copies the definedness of the sign bit into the new positions.
+     Because 1 means undefined and 0 means defined, these operations
+     can (fascinatingly) be done by the same operations which they
+     mimic.  Go figure.
+<p>
+<li><b>(BINARY) Undefined-if-either-Undefined,
+     Defined-if-either-Defined</b>: <code>VgT_UifU4</code>,
+     <code>VgT_UifU2</code>, <code>VgT_UifU1</code>,
+     <code>VgT_UifU0</code>, <code>VgT_DifD4</code>,
+     <code>VgT_DifD2</code>, <code>VgT_DifD1</code>.  These do simple
+     bitwise operations on pairs of V-bit vectors, with
+     <code>UifU</code> giving undefined if either arg bit is
+     undefined, and <code>DifD</code> giving defined if either arg bit
+     is defined.  Abstract interpretation junkies, if any make it this
+     far, may like to think of them as meets and joins (or is it joins
+     and meets) in the definedness lattices.  
+<p>
+<li><b>(BINARY; one value, one V bits) Generate argument improvement
+    terms for AND and OR</b>: <code>VgT_ImproveAND4_TQ</code>,
+    <code>VgT_ImproveAND2_TQ</code>, <code>VgT_ImproveAND1_TQ</code>,
+    <code>VgT_ImproveOR4_TQ</code>, <code>VgT_ImproveOR2_TQ</code>,
+    <code>VgT_ImproveOR1_TQ</code>.  These help out with AND and OR
+    operations.  AND and OR have the inconvenient property that the
+    definedness of the result depends on the actual values of the
+    arguments as well as their definedness.  At the bit level:
+    <br><code>1 AND undefined = undefined</code>, but 
+    <br><code>0 AND undefined = 0</code>, and similarly 
+    <br><code>0 OR  undefined = undefined</code>, but 
+    <br><code>1 OR  undefined = 1</code>.
+    <br>
+    <p>
+    It turns out that gcc (quite legitimately) generates code which
+    relies on this fact, so we have to model it properly in order to
+    avoid flooding users with spurious value errors.  The ultimate
+    definedness result of AND and OR is calculated using
+    <code>UifU</code> on the definedness of the arguments, but we
+    also <code>DifD</code> in some "improvement" terms which 
+    take into account the above phenomena.  
+    <p>
+    <code>ImproveAND</code> takes as its first argument the actual
+    value of an argument to AND (the T) and the definedness of that
+    argument (the Q), and returns a V-bit vector which is defined (0)
+    for bits which have value 0 and are defined; this, when
+    <code>DifD</code> into the final result causes those bits to be
+    defined even if the corresponding bit in the other argument is undefined.
+    <p>
+    The <code>ImproveOR</code> ops do the dual thing for OR
+    arguments.  Note that XOR does not have this property that one
+    argument can make the other irrelevant, so there is no need for
+    such complexity for XOR.
+</ul>
+
+<p>
+That's all the tag ops.  If you stare at this long enough, and then
+run Valgrind and stare at the pre- and post-instrumented ucode, it
+should be fairly obvious how the instrumentation machinery hangs
+together.
+
+<p>
+One point, if you do this: in order to make it easy to differentiate
+<code>TempReg</code>s carrying values from <code>TempReg</code>s
+carrying V bit vectors, Valgrind prints the former as (for example)
+<code>t28</code> and the latter as <code>q28</code>; the fact that
+they carry the same number serves to indicate their relationship.
+This is purely for the convenience of the human reader; the register
+allocator and code generator don't regard them as different.
+
+
+<h3>Translation into UCode</h3>
+
+<code>VG_(disBB)</code> allocates a new <code>UCodeBlock</code> and
+then uses <code>disInstr</code> to translate x86 instructions one at a
+time into UCode, dumping the result in the <code>UCodeBlock</code>.
+This goes on until a control-flow transfer instruction is encountered.
+
+<p>
+Despite the large size of <code>vg_to_ucode.c</code>, this translation
+is really very simple.  Each x86 instruction is translated entirely
+independently of its neighbours, merrily allocating new
+<code>TempReg</code>s as it goes.  The idea is to have a simple
+translator -- in reality, no more than a macro-expander -- and the --
+resulting bad UCode translation is cleaned up by the UCode
+optimisation phase which follows.  To give you an idea of some x86
+instructions and their translations (this is a complete basic block,
+as Valgrind sees it):
+<pre>
+        0x40435A50:  incl %edx
+
+           0: GETL      %EDX, t0
+           1: INCL      t0  (-wOSZAP)
+           2: PUTL      t0, %EDX
+
+        0x40435A51:  movsbl (%edx),%eax
+
+           3: GETL      %EDX, t2
+           4: LDB       (t2), t2
+           5: WIDENL_Bs t2
+           6: PUTL      t2, %EAX
+
+        0x40435A54:  testb $0x20, 1(%ecx,%eax,2)
+
+           7: GETL      %EAX, t6
+           8: GETL      %ECX, t8
+           9: LEA2L     1(t8,t6,2), t4
+          10: LDB       (t4), t10
+          11: MOVB      $0x20, t12
+          12: ANDB      t12, t10  (-wOSZACP)
+          13: INCEIPo   $9
+
+        0x40435A59:  jnz-8 0x40435A50
+
+          14: Jnzo      $0x40435A50  (-rOSZACP)
+          15: JMPo      $0x40435A5B
+</pre>
+
+<p>
+Notice how the block always ends with an unconditional jump to the
+next block.  This is a bit unnecessary, but makes many things simpler.
+
+<p>
+Most x86 instructions turn into sequences of <code>GET</code>,
+</code>PUT</code>, <code>LEA1</code>, <code>LEA2</code>,
+<code>LOAD</code> and <code>STORE</code>.  Some complicated ones
+however rely on calling helper bits of code in 
+<code>vg_helpers.S</code>.  The ucode instructions <code>PUSH</code>,
+<code>POP</code>, <code>CALL</code>, <code>CALLM_S</code> and
+<code>CALLM_E</code> support this.  The calling convention is somewhat
+ad-hoc and is not the C calling convention.  The helper routines must 
+save all integer registers, and the flags, that they use.  Args are
+passed on the stack underneath the return address, as usual, and if 
+result(s) are to be returned, it (they) are either placed in dummy arg
+slots created by the ucode <code>PUSH</code> sequence, or just
+overwrite the incoming args.
+
+<p>
+In order that the instrumentation mechanism can handle calls to these
+helpers, <code>VG_(saneUCodeBlock)</code> enforces the following
+restrictions on calls to helpers:
+
+<ul>
+<li>Each <code>CALL</code> uinstr must be bracketed by a preceding
+    <code>CALLM_S</code> marker (dummy uinstr) and a trailing
+    <code>CALLM_E</code> marker.  These markers are used by the
+    instrumentation mechanism later to establish the boundaries of the
+    <code>PUSH</code>, <code>POP</code> and <code>CLEAR</code>
+    sequences for the call.
+<p>
+<li><code>PUSH</code>, <code>POP</code> and <code>CLEAR</code>
+    may only appear inside sections bracketed by <code>CALLM_S</code>
+    and <code>CALLM_E</code>, and nowhere else.
+<p>
+<li>In any such bracketed section, no two <code>PUSH</code> insns may
+    push the same <code>TempReg</code>.  Dually, no two two
+    <code>POP</code>s may pop the same <code>TempReg</code>.
+<p>
+<li>Finally, although this is not checked, args should be removed from
+    the stack with <code>CLEAR</code>, rather than <code>POP</code>s
+    into a <code>TempReg</code> which is not subsequently used.  This
+    is because the instrumentation mechanism assumes that all values
+    <code>POP</code>ped from the stack are actually used.
+</ul>
+
+Some of the translations may appear to have redundant
+<code>TempReg</code>-to-<code>TempReg</code> moves.  This helps the
+next phase, UCode optimisation, to generate better code.
+
+
+
+<h3>UCode optimisation</h3>
+
+UCode is then subjected to an improvement pass
+(<code>vg_improve()</code>), which blurs the boundaries between the
+translations of the original x86 instructions.  It's pretty
+straightforward.  Three transformations are done:
+
+<ul>
+<li>Redundant <code>GET</code> elimination.  Actually, more general
+    than that -- eliminates redundant fetches of ArchRegs.  In our
+    running example, uinstr 3 <code>GET</code>s <code>%EDX</code> into
+    <code>t2</code> despite the fact that, by looking at the previous
+    uinstr, it is already in <code>t0</code>.  The <code>GET</code> is
+    therefore removed, and <code>t2</code> renamed to <code>t0</code>.
+    Assuming <code>t0</code> is allocated to a host register, it means
+    the simulated <code>%EDX</code> will exist in a host CPU register
+    for more than one simulated x86 instruction, which seems to me to
+    be a highly desirable property.
+    <p>
+    There is some mucking around to do with subregisters;
+    <code>%AL</code> vs <code>%AH</code> <code>%AX</code> vs
+    <code>%EAX</code> etc.  I can't remember how it works, but in
+    general we are very conservative, and these tend to invalidate the
+    caching. 
+<p>
+<li>Redundant <code>PUT</code> elimination.  This annuls
+    <code>PUT</code>s of values back to simulated CPU registers if a
+    later <code>PUT</code> would overwrite the earlier
+    <code>PUT</code> value, and there is no intervening reads of the
+    simulated register (<code>ArchReg</code>).
+    <p>
+    As before, we are paranoid when faced with subregister references.
+    Also, <code>PUT</code>s of <code>%ESP</code> are never annulled,
+    because it is vital the instrumenter always has an up-to-date
+    <code>%ESP</code> value available, <code>%ESP</code> changes
+    affect addressibility of the memory around the simulated stack
+    pointer.
+    <p>
+    The implication of the above paragraph is that the simulated
+    machine's registers are only lazily updated once the above two
+    optimisation phases have run, with the exception of
+    <code>%ESP</code>.  <code>TempReg</code>s go dead at the end of
+    every basic block, from which is is inferrable that any
+    <code>TempReg</code> caching a simulated CPU reg is flushed (back
+    into the relevant <code>VG_(baseBlock)</code> slot) at the end of
+    every basic block.  The further implication is that the simulated
+    registers are only up-to-date at in between basic blocks, and not
+    at arbitrary points inside basic blocks.  And the consequence of
+    that is that we can only deliver signals to the client in between
+    basic blocks.  None of this seems any problem in practice.
+<p>
+<li>Finally there is a simple def-use thing for condition codes.  If
+    an earlier uinstr writes the condition codes, and the next uinsn
+    along which actually cares about the condition codes writes the
+    same or larger set of them, but does not read any, the earlier
+    uinsn is marked as not writing any condition codes.  This saves 
+    a lot of redundant cond-code saving and restoring.
+</ul>
+
+The effect of these transformations on our short block is rather
+unexciting, and shown below.  On longer basic blocks they can
+dramatically improve code quality.
+
+<pre>
+at 3: delete GET, rename t2 to t0 in (4 .. 6)
+at 7: delete GET, rename t6 to t0 in (8 .. 9)
+at 1: annul flag write OSZAP due to later OSZACP
+
+Improved code:
+           0: GETL      %EDX, t0
+           1: INCL      t0
+           2: PUTL      t0, %EDX
+           4: LDB       (t0), t0
+           5: WIDENL_Bs t0
+           6: PUTL      t0, %EAX
+           8: GETL      %ECX, t8
+           9: LEA2L     1(t8,t0,2), t4
+          10: LDB       (t4), t10
+          11: MOVB      $0x20, t12
+          12: ANDB      t12, t10  (-wOSZACP)
+          13: INCEIPo   $9
+          14: Jnzo      $0x40435A50  (-rOSZACP)
+          15: JMPo      $0x40435A5B
+</pre>
+
+<h3>UCode instrumentation</h3>
+
+Once you understand the meaning of the instrumentation uinstrs,
+discussed in detail above, the instrumentation scheme is fairly
+straighforward.  Each uinstr is instrumented in isolation, and the
+instrumentation uinstrs are placed before the original uinstr.
+Our running example continues below.  I have placed a blank line 
+after every original ucode, to make it easier to see which
+instrumentation uinstrs correspond to which originals.
+
+<p>
+As mentioned somewhere above, <code>TempReg</code>s carrying values 
+have names like <code>t28</code>, and each one has a shadow carrying
+its V bits, with names like <code>q28</code>.  This pairing aids in
+reading instrumented ucode.
+
+<p>
+One decision about all this is where to have "observation points",
+that is, where to check that V bits are valid.  I use a minimalistic
+scheme, only checking where a failure of validity could cause the 
+original program to (seg)fault.  So the use of values as memory
+addresses causes a check, as do conditional jumps (these cause a check
+on the definedness of the condition codes).  And arguments
+<code>PUSH</code>ed for helper calls are checked, hence the wierd
+restrictions on help call preambles described above.
+
+<p>
+Another decision is that once a value is tested, it is thereafter
+regarded as defined, so that we do not emit multiple undefined-value
+errors for the same undefined value.  That means that
+<code>TESTV</code> uinstrs are always followed by <code>SETV</code> 
+on the same (shadow) <code>TempReg</code>s.  Most of these
+<code>SETV</code>s are redundant and are removed by the
+post-instrumentation cleanup phase.
+
+<p>
+The instrumentation for calling helper functions deserves further
+comment.  The definedness of results from a helper is modelled using
+just one V bit.  So, in short, we do pessimising casts of the
+definedness of all the args, down to a single bit, and then
+<code>UifU</code> these bits together.  So this single V bit will say
+"undefined" if any part of any arg is undefined.  This V bit is then
+pessimally cast back up to the result(s) sizes, as needed.  If, by
+seeing that all the args are got rid of with <code>CLEAR</code> and
+none with <code>POP</code>, Valgrind sees that the result of the call
+is not actually used, it immediately examines the result V bit with a
+<code>TESTV</code> -- <code>SETV</code> pair.  If it did not do this,
+there would be no observation point to detect that the some of the
+args to the helper were undefined.  Of course, if the helper's results
+are indeed used, we don't do this, since the result usage will
+presumably cause the result definedness to be checked at some suitable
+future point.
+
+<p>
+In general Valgrind tries to track definedness on a bit-for-bit basis,
+but as the above para shows, for calls to helpers we throw in the
+towel and approximate down to a single bit.  This is because it's too
+complex and difficult to track bit-level definedness through complex
+ops such as integer multiply and divide, and in any case there is no
+reasonable code fragments which attempt to (eg) multiply two
+partially-defined values and end up with something meaningful, so
+there seems little point in modelling multiplies, divides, etc, in
+that level of detail.
+
+<p>
+Integer loads and stores are instrumented with firstly a test of the
+definedness of the address, followed by a <code>LOADV</code> or
+<code>STOREV</code> respectively.  These turn into calls to 
+(for example) <code>VG_(helperc_LOADV4)</code>.  These helpers do two
+things: they perform an address-valid check, and they load or store V
+bits from/to the relevant address in the (simulated V-bit) memory.
+
+<p>
+FPU loads and stores are different.  As above the definedness of the
+address is first tested.  However, the helper routine for FPU loads
+(<code>VGM_(fpu_read_check)</code>) emits an error if either the
+address is invalid or the referenced area contains undefined values.
+It has to do this because we do not simulate the FPU at all, and so
+cannot track definedness of values loaded into it from memory, so we
+have to check them as soon as they are loaded into the FPU, ie, at
+this point.  We notionally assume that everything in the FPU is
+defined.
+
+<p>
+It follows therefore that FPU writes first check the definedness of
+the address, then the validity of the address, and finally mark the
+written bytes as well-defined.
+
+<p>
+If anyone is inspired to extend Valgrind to MMX/SSE insns, I suggest
+you use the same trick.  It works provided that the FPU/MMX unit is
+not used to merely as a conduit to copy partially undefined data from
+one place in memory to another.  Unfortunately the integer CPU is used
+like that (when copying C structs with holes, for example) and this is
+the cause of much of the elaborateness of the instrumentation here
+described.
+
+<p>
+<code>vg_instrument()</code> in <code>vg_translate.c</code> actually
+does the instrumentation.  There are comments explaining how each
+uinstr is handled, so we do not repeat that here.  As explained
+already, it is bit-accurate, except for calls to helper functions.
+Unfortunately the x86 insns <code>bt/bts/btc/btr</code> are done by
+helper fns, so bit-level accuracy is lost there.  This should be fixed
+by doing them inline; it will probably require adding a couple new
+uinstrs.  Also, left and right rotates through the carry flag (x86
+<code>rcl</code> and <code>rcr</code>) are approximated via a single
+V bit; so far this has not caused anyone to complain.  The
+non-carry rotates, <code>rol</code> and <code>ror</code>, are much
+more common and are done exactly.  Re-visiting the instrumentation for
+AND and OR, they seem rather verbose, and I wonder if it could be done
+more concisely now.
+
+<p>
+The lowercase <code>o</code> on many of the uopcodes in the running
+example indicates that the size field is zero, usually meaning a
+single-bit operation.
+
+<p>
+Anyroads, the post-instrumented version of our running example looks
+like this:
+
+<pre>
+Instrumented code:
+           0: GETVL     %EDX, q0
+           1: GETL      %EDX, t0
+
+           2: TAG1o     q0 = Left4 ( q0 )
+           3: INCL      t0
+
+           4: PUTVL     q0, %EDX
+           5: PUTL      t0, %EDX
+
+           6: TESTVL    q0
+           7: SETVL     q0
+           8: LOADVB    (t0), q0
+           9: LDB       (t0), t0
+
+          10: TAG1o     q0 = SWiden14 ( q0 )
+          11: WIDENL_Bs t0
+
+          12: PUTVL     q0, %EAX
+          13: PUTL      t0, %EAX
+
+          14: GETVL     %ECX, q8
+          15: GETL      %ECX, t8
+
+          16: MOVL      q0, q4
+          17: SHLL      $0x1, q4
+          18: TAG2o     q4 = UifU4 ( q8, q4 )
+          19: TAG1o     q4 = Left4 ( q4 )
+          20: LEA2L     1(t8,t0,2), t4
+
+          21: TESTVL    q4
+          22: SETVL     q4
+          23: LOADVB    (t4), q10
+          24: LDB       (t4), t10
+
+          25: SETVB     q12
+          26: MOVB      $0x20, t12
+
+          27: MOVL      q10, q14
+          28: TAG2o     q14 = ImproveAND1_TQ ( t10, q14 )
+          29: TAG2o     q10 = UifU1 ( q12, q10 )
+          30: TAG2o     q10 = DifD1 ( q14, q10 )
+          31: MOVL      q12, q14
+          32: TAG2o     q14 = ImproveAND1_TQ ( t12, q14 )
+          33: TAG2o     q10 = DifD1 ( q14, q10 )
+          34: MOVL      q10, q16
+          35: TAG1o     q16 = PCast10 ( q16 )
+          36: PUTVFo    q16
+          37: ANDB      t12, t10  (-wOSZACP)
+
+          38: INCEIPo   $9
+
+          39: GETVFo    q18
+          40: TESTVo    q18
+          41: SETVo     q18
+          42: Jnzo      $0x40435A50  (-rOSZACP)
+
+          43: JMPo      $0x40435A5B
+</pre>
+
+
+<h3>UCode post-instrumentation cleanup</h3>
+
+<p>
+This pass, coordinated by <code>vg_cleanup()</code>, removes redundant
+definedness computation created by the simplistic instrumentation
+pass.  It consists of two passes,
+<code>vg_propagate_definedness()</code> followed by
+<code>vg_delete_redundant_SETVs</code>.
+
+<p>
+<code>vg_propagate_definedness()</code> is a simple
+constant-propagation and constant-folding pass.  It tries to determine
+which <code>TempReg</code>s containing V bits will always indicate
+"fully defined", and it propagates this information as far as it can,
+and folds out as many operations as possible.  For example, the
+instrumentation for an ADD of a literal to a variable quantity will be
+reduced down so that the definedness of the result is simply the
+definedness of the variable quantity, since the literal is by
+definition fully defined.
+
+<p>
+<code>vg_delete_redundant_SETVs</code> removes <code>SETV</code>s on
+shadow <code>TempReg</code>s for which the next action is a write.
+I don't think there's anything else worth saying about this; it is
+simple.  Read the sources for details.
+
+<p>
+So the cleaned-up running example looks like this.  As above, I have
+inserted line breaks after every original (non-instrumentation) uinstr
+to aid readability.  As with straightforward ucode optimisation, the
+results in this block are undramatic because it is so short; longer
+blocks benefit more because they have more redundancy which gets
+eliminated.
+
+
+<pre>
+at 29: delete UifU1 due to defd arg1
+at 32: change ImproveAND1_TQ to MOV due to defd arg2
+at 41: delete SETV
+at 31: delete MOV
+at 25: delete SETV
+at 22: delete SETV
+at 7: delete SETV
+
+           0: GETVL     %EDX, q0
+           1: GETL      %EDX, t0
+
+           2: TAG1o     q0 = Left4 ( q0 )
+           3: INCL      t0
+
+           4: PUTVL     q0, %EDX
+           5: PUTL      t0, %EDX
+
+           6: TESTVL    q0
+           8: LOADVB    (t0), q0
+           9: LDB       (t0), t0
+
+          10: TAG1o     q0 = SWiden14 ( q0 )
+          11: WIDENL_Bs t0
+
+          12: PUTVL     q0, %EAX
+          13: PUTL      t0, %EAX
+
+          14: GETVL     %ECX, q8
+          15: GETL      %ECX, t8
+
+          16: MOVL      q0, q4
+          17: SHLL      $0x1, q4
+          18: TAG2o     q4 = UifU4 ( q8, q4 )
+          19: TAG1o     q4 = Left4 ( q4 )
+          20: LEA2L     1(t8,t0,2), t4
+
+          21: TESTVL    q4
+          23: LOADVB    (t4), q10
+          24: LDB       (t4), t10
+
+          26: MOVB      $0x20, t12
+
+          27: MOVL      q10, q14
+          28: TAG2o     q14 = ImproveAND1_TQ ( t10, q14 )
+          30: TAG2o     q10 = DifD1 ( q14, q10 )
+          32: MOVL      t12, q14
+          33: TAG2o     q10 = DifD1 ( q14, q10 )
+          34: MOVL      q10, q16
+          35: TAG1o     q16 = PCast10 ( q16 )
+          36: PUTVFo    q16
+          37: ANDB      t12, t10  (-wOSZACP)
+
+          38: INCEIPo   $9
+          39: GETVFo    q18
+          40: TESTVo    q18
+          42: Jnzo      $0x40435A50  (-rOSZACP)
+
+          43: JMPo      $0x40435A5B
+</pre>
+
+
+<h3>Translation from UCode</h3>
+
+This is all very simple, even though <code>vg_from_ucode.c</code>
+is a big file.  Position-independent x86 code is generated into 
+a dynamically allocated array <code>emitted_code</code>; this is
+doubled in size when it overflows.  Eventually the array is handed
+back to the caller of <code>VG_(translate)</code>, who must copy
+the result into TC and TT, and free the array.
+
+<p>
+This file is structured into four layers of abstraction, which,
+thankfully, are glued back together with extensive
+<code>__inline__</code> directives.  From the bottom upwards:
+
+<ul>
+<li>Address-mode emitters, <code>emit_amode_regmem_reg</code> et al.
+<p>
+<li>Emitters for specific x86 instructions.  There are quite a lot of
+    these, with names such as <code>emit_movv_offregmem_reg</code>.
+    The <code>v</code> suffix is Intel parlance for a 16/32 bit insn;
+    there are also <code>b</code> suffixes for 8 bit insns.
+<p>
+<li>The next level up are the <code>synth_*</code> functions, which
+    synthesise possibly a sequence of raw x86 instructions to do some
+    simple task.  Some of these are quite complex because they have to
+    work around Intel's silly restrictions on subregister naming.  See 
+    <code>synth_nonshiftop_reg_reg</code> for example.
+<p>
+<li>Finally, at the top of the heap, we have
+    <code>emitUInstr()</code>,
+    which emits code for a single uinstr.
+</ul>
+
+<p>
+Some comments:
+<ul>
+<li>The hack for FPU instructions becomes apparent here.  To do a
+    <code>FPU</code> ucode instruction, we load the simulated FPU's
+    state into from its <code>VG_(baseBlock)</code> into the real FPU
+    using an x86 <code>frstor</code> insn, do the ucode
+    <code>FPU</code> insn on the real CPU, and write the updated FPU
+    state back into <code>VG_(baseBlock)</code> using an
+    <code>fnsave</code> instruction.  This is pretty brutal, but is
+    simple and it works, and even seems tolerably efficient.  There is
+    no attempt to cache the simulated FPU state in the real FPU over
+    multiple back-to-back ucode FPU instructions.
+    <p>
+    <code>FPU_R</code> and <code>FPU_W</code> are also done this way,
+    with the minor complication that we need to patch in some
+    addressing mode bits so the resulting insn knows the effective
+    address to use.  This is easy because of the regularity of the x86
+    FPU instruction encodings.
+<p>
+<li>An analogous trick is done with ucode insns which claim, in their
+    <code>flags_r</code> and <code>flags_w</code> fields, that they
+    read or write the simulated <code>%EFLAGS</code>.  For such cases
+    we first copy the simulated <code>%EFLAGS</code> into the real
+    <code>%eflags</code>, then do the insn, then, if the insn says it
+    writes the flags, copy back to <code>%EFLAGS</code>.  This is a
+    bit expensive, which is why the ucode optimisation pass goes to
+    some effort to remove redundant flag-update annotations.
+</ul>
+
+<p>
+And so ... that's the end of the documentation for the instrumentating
+translator!  It's really not that complex, because it's composed as a
+sequence of simple(ish) self-contained transformations on
+straight-line blocks of code.
+
+
+<h3>Top-level dispatch loop</h3>
+
+Urk.  In <code>VG_(toploop)</code>.  This is basically boring and
+unsurprising, not to mention fiddly and fragile.  It needs to be
+cleaned up.  
+
+<p>
+The only perhaps surprise is that the whole thing is run
+on top of a <code>setjmp</code>-installed exception handler, because,
+supposing a translation got a segfault, we have to bail out of the
+Valgrind-supplied exception handler <code>VG_(oursignalhandler)</code>
+and immediately start running the client's segfault handler, if it has
+one.  In particular we can't finish the current basic block and then
+deliver the signal at some convenient future point, because signals
+like SIGILL, SIGSEGV and SIGBUS mean that the faulting insn should not
+simply be re-tried.  (I'm sure there is a clearer way to explain this).
+
+
+<h3>Exceptions, creating new translations</h3>
+<h3>Self-modifying code</h3>
+
+<h3>Lazy updates of the simulated program counter</h3>
+
+Simulated <code>%EIP</code> is not updated after every simulated x86
+insn as this was regarded as too expensive.  Instead ucode
+<code>INCEIP</code> insns move it along as and when necessary.
+Currently we don't allow it to fall more than 4 bytes behind reality
+(see <code>VG_(disBB)</code> for the way this works).
+<p>
+Note that <code>%EIP</code> is always brought up to date by the inner
+dispatch loop in <code>VG_(dispatch)</code>, so that if the client
+takes a fault we know at least which basic block this happened in.
+
+
+<h3>The translation cache and translation table</h3>
+
+<h3>Signals</h3>
+
+Horrible, horrible.  <code>vg_signals.c</code>.
+Basically, since we have to intercept all system
+calls anyway, we can see when the client tries to install a signal
+handler.  If it does so, we make a note of what the client asked to
+happen, and ask the kernel to route the signal to our own signal
+handler, <code>VG_(oursignalhandler)</code>.  This simply notes the
+delivery of signals, and returns.  
+
+<p>
+Every 1000 basic blocks, we see if more signals have arrived.  If so,
+<code>VG_(deliver_signals)</code> builds signal delivery frames on the
+client's stack, and allows their handlers to be run.  Valgrind places
+in these signal delivery frames a bogus return address,
+</code>VG_(signalreturn_bogusRA)</code>, and checks all jumps to see
+if any jump to it.  If so, this is a sign that a signal handler is
+returning, and if so Valgrind removes the relevant signal frame from
+the client's stack, restores the from the signal frame the simulated
+state before the signal was delivered, and allows the client to run
+onwards.  We have to do it this way because some signal handlers never
+return, they just <code>longjmp()</code>, which nukes the signal
+delivery frame.
+
+<p>
+The Linux kernel has a different but equally horrible hack for
+detecting signal handler returns.  Discovering it is left as an
+exercise for the reader.
+
+
+
+<h3>Errors, error contexts, error reporting, suppressions</h3>
+<h3>Client malloc/free</h3>
+<h3>Low-level memory management</h3>
+<h3>A and V bitmaps</h3>
+<h3>Symbol table management</h3>
+<h3>Dealing with system calls</h3>
+<h3>Namespace management</h3>
+<h3>GDB attaching</h3>
+<h3>Non-dependence on glibc or anything else</h3>
+<h3>The leak detector</h3>
+<h3>Performance problems</h3>
+<h3>Continuous sanity checking</h3>
+<h3>Tracing, or not tracing, child processes</h3>
+<h3>Assembly glue for syscalls</h3>
+
+
+<hr width="100%">
+
+<h2>Extensions</h2>
+
+Some comments about Stuff To Do.
+
+<h3>Bugs</h3>
+
+Stephan Kulow and Marc Mutz report problems with kmail in KDE 3 CVS
+(RC2 ish) when run on Valgrind.  Stephan has it deadlocking; Marc has
+it looping at startup.  I can't repro either behaviour. Needs
+repro-ing and fixing.
+
+
+<h3>Threads</h3>
+
+Doing a good job of thread support strikes me as almost a
+research-level problem.  The central issues are how to do fast cheap
+locking of the <code>VG_(primary_map)</code> structure, whether or not
+accesses to the individual secondary maps need locking, what
+race-condition issues result, and whether the already-nasty mess that
+is the signal simulator needs further hackery.
+
+<p>
+I realise that threads are the most-frequently-requested feature, and
+I am thinking about it all.  If you have guru-level understanding of 
+fast mutual exclusion mechanisms and race conditions, I would be
+interested in hearing from you.
+
+
+<h3>Verification suite</h3>
+
+Directory <code>tests/</code> contains various ad-hoc tests for
+Valgrind.  However, there is no systematic verification or regression
+suite, that, for example, exercises all the stuff in
+<code>vg_memory.c</code>, to ensure that illegal memory accesses and
+undefined value uses are detected as they should be.  It would be good
+to have such a suite.
+
+
+<h3>Porting to other platforms</h3>
+
+It would be great if Valgrind was ported to FreeBSD and x86 NetBSD,
+and to x86 OpenBSD, if it's possible (doesn't OpenBSD use a.out-style
+executables, not ELF ?)
+
+<p>
+The main difficulties, for an x86-ELF platform, seem to be:
+
+<ul>
+<li>You'd need to rewrite the <code>/proc/self/maps</code> parser
+    (<code>vg_procselfmaps.c</code>).
+    Easy.
+<p>
+<li>You'd need to rewrite <code>vg_syscall_mem.c</code>, or, more
+    specifically, provide one for your OS.  This is tedious, but you
+    can implement syscalls on demand, and the Linux kernel interface
+    is, for the most part, going to look very similar to the *BSD
+    interfaces, so it's really a copy-paste-and-modify-on-demand job.
+    As part of this, you'd need to supply a new
+    <code>vg_kerneliface.h</code> file.
+<p>
+<li>You'd also need to change the syscall wrappers for Valgrind's
+    internal use, in <code>vg_mylibc.c</code>.
+</ul>
+
+All in all, I think a port to x86-ELF *BSDs is not really very
+difficult, and in some ways I would like to see it happen, because
+that would force a more clear factoring of Valgrind into platform
+dependent and independent pieces.  Not to mention, *BSD folks also
+deserve to use Valgrind just as much as the Linux crew do.
+
+
+<p>
+<hr width="100%">
+
+<h2>Easy stuff which ought to be done</h2>
+
+<h3>MMX instructions</h3>
+
+MMX insns should be supported, using the same trick as for FPU insns.
+If the MMX registers are not used to copy uninitialised junk from one
+place to another in memory, this means we don't have to actually
+simulate the internal MMX unit state, so the FPU hack applies.  This
+should be fairly easy.
+
+
+
+<h3>Fix stabs-info reader</h3>
+
+The machinery in <code>vg_symtab2.c</code> which reads "stabs" style
+debugging info is pretty weak.  It usually correctly translates 
+simulated program counter values into line numbers and procedure
+names, but the file name is often completely wrong.  I think the
+logic used to parse "stabs" entries is weak.  It should be fixed.
+The simplest solution, IMO, is to copy either the logic or simply the
+code out of GNU binutils which does this; since GDB can clearly get it
+right, binutils (or GDB?) must have code to do this somewhere.
+
+
+
+
+
+<h3>BT/BTC/BTS/BTR</h3>
+
+These are x86 instructions which test, complement, set, or reset, a
+single bit in a word.  At the moment they are both incorrectly
+implemented and incorrectly instrumented.
+
+<p>
+The incorrect instrumentation is due to use of helper functions.  This
+means we lose bit-level definedness tracking, which could wind up
+giving spurious uninitialised-value use errors.  The Right Thing to do
+is to invent a couple of new UOpcodes, I think <code>GET_BIT</code>
+and <code>SET_BIT</code>, which can be used to implement all 4 x86
+insns, get rid of the helpers, and give bit-accurate instrumentation
+rules for the two new UOpcodes.
+
+<p>
+I realised the other day that they are mis-implemented too.  The x86
+insns take a bit-index and a register or memory location to access.
+For registers the bit index clearly can only be in the range zero to
+register-width minus 1, and I assumed the same applied to memory
+locations too.  But evidently not; for memory locations the index can
+be arbitrary, and the processor will index arbitrarily into memory as
+a result.  This too should be fixed.  Sigh.  Presumably indexing
+outside the immediate word is not actually used by any programs yet
+tested on Valgrind, for otherwise they (presumably) would simply not
+work at all.  If you plan to hack on this, first check the Intel docs
+to make sure my understanding is really correct.
+
+
+
+<h3>Using PREFETCH instructions</h3>
+
+Here's a small but potentially interesting project for performance
+junkies.  Experiments with valgrind's code generator and optimiser(s)
+suggest that reducing the number of instructions executed in the
+translations and mem-check helpers gives disappointingly small
+performance improvements.  Perhaps this is because performance of
+Valgrindified code is limited by cache misses.  After all, each read
+in the original program now gives rise to at least three reads, one
+for the <code>VG_(primary_map)</code>, one of the resulting
+secondary, and the original.  Not to mention, the instrumented
+translations are 13 to 14 times larger than the originals.  All in all
+one would expect the memory system to be hammered to hell and then
+some.
+
+<p>
+So here's an idea.  An x86 insn involving a read from memory, after
+instrumentation, will turn into ucode of the following form:
+<pre>
+    ... calculate effective addr, into ta and qa ...
+    TESTVL qa             -- is the addr defined?
+    LOADV (ta), qloaded   -- fetch V bits for the addr
+    LOAD  (ta), tloaded   -- do the original load
+</pre>
+At the point where the <code>LOADV</code> is done, we know the actual
+address (<code>ta</code>) from which the real <code>LOAD</code> will
+be done.  We also know that the <code>LOADV</code> will take around
+20 x86 insns to do.  So it seems plausible that doing a prefetch of
+<code>ta</code> just before the <code>LOADV</code> might just avoid a
+miss at the <code>LOAD</code> point, and that might be a significant
+performance win.
+
+<p>
+Prefetch insns are notoriously tempermental, more often than not
+making things worse rather than better, so this would require
+considerable fiddling around.  It's complicated because Intels and
+AMDs have different prefetch insns with different semantics, so that
+too needs to be taken into account.  As a general rule, even placing
+the prefetches before the <code>LOADV</code> insn is too near the
+<code>LOAD</code>; the ideal distance is apparently circa 200 CPU
+cycles.  So it might be worth having another analysis/transformation
+pass which pushes prefetches as far back as possible, hopefully 
+immediately after the effective address becomes available.
+
+<p>
+Doing too many prefetches is also bad because they soak up bus
+bandwidth / cpu resources, so some cleverness in deciding which loads
+to prefetch and which to not might be helpful.  One can imagine not
+prefetching client-stack-relative (<code>%EBP</code> or
+<code>%ESP</code>) accesses, since the stack in general tends to show
+good locality anyway.
+
+<p>
+There's quite a lot of experimentation to do here, but I think it
+might make an interesting week's work for someone.
+
+<p>
+As of 15-ish March 2002, I've started to experiment with this, using
+the AMD <code>prefetch/prefetchw</code> insns.
+
+
+
+<h3>User-defined permission ranges</h3>
+
+This is quite a large project -- perhaps a month's hacking for a
+capable hacker to do a good job -- but it's potentially very
+interesting.  The outcome would be that Valgrind could detect a 
+whole class of bugs which it currently cannot.
+
+<p>
+The presentation falls into two pieces.
+
+<p>
+<b>Part 1: user-defined address-range permission setting</b>
+<p>
+
+Valgrind intercepts the client's <code>malloc</code>,
+<code>free</code>, etc calls, watches system calls, and watches the
+stack pointer move.  This is currently the only way it knows about
+which addresses are valid and which not.  Sometimes the client program
+knows extra information about its memory areas.  For example, the
+client could at some point know that all elements of an array are
+out-of-date.  We would like to be able to convey to Valgrind this
+information that the array is now addressable-but-uninitialised, so
+that Valgrind can then warn if elements are used before they get new
+values. 
+
+<p>
+What I would like are some macros like this:
+<pre>
+   VALGRIND_MAKE_NOACCESS(addr, len)
+   VALGRIND_MAKE_WRITABLE(addr, len)
+   VALGRIND_MAKE_READABLE(addr, len)
+</pre>
+and also, to check that memory is addressible/initialised,
+<pre>
+   VALGRIND_CHECK_ADDRESSIBLE(addr, len)
+   VALGRIND_CHECK_INITIALISED(addr, len)
+</pre>
+
+<p>
+I then include in my sources a header defining these macros, rebuild
+my app, run under Valgrind, and get user-defined checks.
+
+<p>
+Now here's a neat trick.  It's a nuisance to have to re-link the app
+with some new library which implements the above macros.  So the idea
+is to define the macros so that the resulting executable is still
+completely stand-alone, and can be run without Valgrind, in which case
+the macros do nothing, but when run on Valgrind, the Right Thing
+happens.  How to do this?  The idea is for these macros to turn into a
+piece of inline assembly code, which (1) has no effect when run on the
+real CPU, (2) is easily spotted by Valgrind's JITter, and (3) no sane
+person would ever write, which is important for avoiding false matches
+in (2).  So here's a suggestion:
+<pre>
+   VALGRIND_MAKE_NOACCESS(addr, len)
+</pre>
+becomes (roughly speaking)
+<pre>
+   movl addr, %eax
+   movl len,  %ebx
+   movl $1,   %ecx   -- 1 describes the action; MAKE_WRITABLE might be
+                     -- 2, etc
+   rorl $13, %ecx
+   rorl $19, %ecx
+   rorl $11, %eax
+   rorl $21, %eax
+</pre>
+The rotate sequences have no effect, and it's unlikely they would
+appear for any other reason, but they define a unique byte-sequence
+which the JITter can easily spot.  Using the operand constraints
+section at the end of a gcc inline-assembly statement, we can tell gcc
+that the assembly fragment kills <code>%eax</code>, <code>%ebx</code>,
+<code>%ecx</code> and the condition codes, so this fragment is made
+harmless when not running on Valgrind, runs quickly when not on
+Valgrind, and does not require any other library support.
+
+
+<p>
+<b>Part 2: using it to detect interference between stack variables</b>
+<p>
+
+Currently Valgrind cannot detect errors of the following form:
+<pre>
+void fooble ( void )
+{
+   int a[10];
+   int b[10];
+   a[10] = 99;
+}
+</pre>
+Now imagine rewriting this as
+<pre>
+void fooble ( void )
+{
+   int spacer0;
+   int a[10];
+   int spacer1;
+   int b[10];
+   int spacer2;
+   VALGRIND_MAKE_NOACCESS(&spacer0, sizeof(int));
+   VALGRIND_MAKE_NOACCESS(&spacer1, sizeof(int));
+   VALGRIND_MAKE_NOACCESS(&spacer2, sizeof(int));
+   a[10] = 99;
+}
+</pre>
+Now the invalid write is certain to hit <code>spacer0</code> or
+<code>spacer1</code>, so Valgrind will spot the error.
+
+<p>
+There are two complications.
+
+<p>
+The first is that we don't want to annotate sources by hand, so the
+Right Thing to do is to write a C/C++ parser, annotator, prettyprinter
+which does this automatically, and run it on post-CPP'd C/C++ source.
+See http://www.cacheprof.org for an example of a system which
+transparently inserts another phase into the gcc/g++ compilation
+route.  The parser/prettyprinter is probably not as hard as it sounds;
+I would write it in Haskell, a powerful functional language well
+suited to doing symbolic computation, with which I am intimately
+familar.  There is already a C parser written in Haskell by someone in
+the Haskell community, and that would probably be a good starting
+point.
+
+<p>
+The second complication is how to get rid of these
+<code>NOACCESS</code> records inside Valgrind when the instrumented
+function exits; after all, these refer to stack addresses and will
+make no sense whatever when some other function happens to re-use the
+same stack address range, probably shortly afterwards.  I think I
+would be inclined to define a special stack-specific macro
+<pre>
+   VALGRIND_MAKE_NOACCESS_STACK(addr, len)
+</pre>
+which causes Valgrind to record the client's <code>%ESP</code> at the
+time it is executed.  Valgrind will then watch for changes in
+<code>%ESP</code> and discard such records as soon as the protected
+area is uncovered by an increase in <code>%ESP</code>.  I hesitate
+with this scheme only because it is potentially expensive, if there
+are hundreds of such records, and considering that changes in
+<code>%ESP</code> already require expensive messing with stack access
+permissions.
+
+<p>
+This is probably easier and more robust than for the instrumenter 
+program to try and spot all exit points for the procedure and place
+suitable deallocation annotations there.  Plus C++ procedures can 
+bomb out at any point if they get an exception, so spotting return
+points at the source level just won't work at all.
+
+<p>
+Although some work, it's all eminently doable, and it would make
+Valgrind into an even-more-useful tool.
+
+<p>
+Update: as of 17 March 2002, this (these hooks) are done.
+
+
+<p>
+</body>
+</html>
diff --git a/config.guess b/config.guess
new file mode 100755
index 000000000..db494f806
--- /dev/null
+++ b/config.guess
@@ -0,0 +1,1320 @@
+#! /bin/sh
+# Attempt to guess a canonical system name.
+#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+#   2000, 2001, 2002 Free Software Foundation, Inc.
+
+timestamp='2002-02-19'
+
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Originally written by Per Bothner <per@bothner.com>.
+# Please send patches to <config-patches@gnu.org>.  Submit a context
+# diff and a properly formatted ChangeLog entry.
+#
+# This script attempts to guess a canonical system name similar to
+# config.sub.  If it succeeds, it prints the system name on stdout, and
+# exits with 0.  Otherwise, it exits with 1.
+#
+# The plan is that this can be called by configure scripts if you
+# don't specify an explicit build system type.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION]
+
+Output the configuration name of the system \`$me' is run on.
+
+Operation modes:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.guess ($timestamp)
+
+Originally written by Per Bothner.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
+Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit 0 ;;
+    --version | -v )
+       echo "$version" ; exit 0 ;;
+    --help | --h* | -h )
+       echo "$usage"; exit 0 ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )	# Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help" >&2
+       exit 1 ;;
+    * )
+       break ;;
+  esac
+done
+
+if test $# != 0; then
+  echo "$me: too many arguments$help" >&2
+  exit 1
+fi
+
+
+dummy=dummy-$$
+trap 'rm -f $dummy.c $dummy.o $dummy.rel $dummy; exit 1' 1 2 15
+
+# CC_FOR_BUILD -- compiler used by this script.
+# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
+# use `HOST_CC' if defined, but it is deprecated.
+
+set_cc_for_build='case $CC_FOR_BUILD,$HOST_CC,$CC in
+ ,,)    echo "int dummy(){}" > $dummy.c ;
+	for c in cc gcc c89 ; do
+	  ($c $dummy.c -c -o $dummy.o) >/dev/null 2>&1 ;
+	  if test $? = 0 ; then
+	     CC_FOR_BUILD="$c"; break ;
+	  fi ;
+	done ;
+	rm -f $dummy.c $dummy.o $dummy.rel ;
+	if test x"$CC_FOR_BUILD" = x ; then
+	  CC_FOR_BUILD=no_compiler_found ;
+	fi
+	;;
+ ,,*)   CC_FOR_BUILD=$CC ;;
+ ,*,*)  CC_FOR_BUILD=$HOST_CC ;;
+esac'
+
+# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
+# (ghazi@noc.rutgers.edu 1994-08-24)
+if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
+	PATH=$PATH:/.attbin ; export PATH
+fi
+
+UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
+UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
+UNAME_SYSTEM=`(uname -s) 2>/dev/null`  || UNAME_SYSTEM=unknown
+UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
+
+# Note: order is significant - the case branches are not exclusive.
+
+case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
+    *:NetBSD:*:*)
+	# NetBSD (nbsd) targets should (where applicable) match one or
+	# more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
+	# switched to ELF, *-*-netbsd* would select the old
+	# object file format.  This provides both forward
+	# compatibility and a consistent mechanism for selecting the
+	# object file format.
+	#
+	# Note: NetBSD doesn't particularly care about the vendor
+	# portion of the name.  We always set it to "unknown".
+	UNAME_MACHINE_ARCH=`(uname -p) 2>/dev/null` || \
+	    UNAME_MACHINE_ARCH=unknown
+	case "${UNAME_MACHINE_ARCH}" in
+	    arm*) machine=arm-unknown ;;
+	    sh3el) machine=shl-unknown ;;
+	    sh3eb) machine=sh-unknown ;;
+	    *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
+	esac
+	# The Operating System including object format, if it has switched
+	# to ELF recently, or will in the future.
+	case "${UNAME_MACHINE_ARCH}" in
+	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
+		eval $set_cc_for_build
+		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
+			| grep __ELF__ >/dev/null
+		then
+		    # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
+		    # Return netbsd for either.  FIX?
+		    os=netbsd
+		else
+		    os=netbsdelf
+		fi
+		;;
+	    *)
+	        os=netbsd
+		;;
+	esac
+	# The OS release
+	release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
+	# contains redundant information, the shorter form:
+	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
+	echo "${machine}-${os}${release}"
+	exit 0 ;;
+    amiga:OpenBSD:*:*)
+	echo m68k-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    arc:OpenBSD:*:*)
+	echo mipsel-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    hp300:OpenBSD:*:*)
+	echo m68k-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    mac68k:OpenBSD:*:*)
+	echo m68k-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    macppc:OpenBSD:*:*)
+	echo powerpc-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    mvme68k:OpenBSD:*:*)
+	echo m68k-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    mvme88k:OpenBSD:*:*)
+	echo m88k-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    mvmeppc:OpenBSD:*:*)
+	echo powerpc-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    pmax:OpenBSD:*:*)
+	echo mipsel-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    sgi:OpenBSD:*:*)
+	echo mipseb-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    sun3:OpenBSD:*:*)
+	echo m68k-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    wgrisc:OpenBSD:*:*)
+	echo mipsel-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    *:OpenBSD:*:*)
+	echo ${UNAME_MACHINE}-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    alpha:OSF1:*:*)
+	if test $UNAME_RELEASE = "V4.0"; then
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
+	fi
+	# A Vn.n version is a released version.
+	# A Tn.n version is a released field test version.
+	# A Xn.n version is an unreleased experimental baselevel.
+	# 1.2 uses "1.2" for uname -r.
+	cat <<EOF >$dummy.s
+	.data
+\$Lformat:
+	.byte 37,100,45,37,120,10,0	# "%d-%x\n"
+
+	.text
+	.globl main
+	.align 4
+	.ent main
+main:
+	.frame \$30,16,\$26,0
+	ldgp \$29,0(\$27)
+	.prologue 1
+	.long 0x47e03d80 # implver \$0
+	lda \$2,-1
+	.long 0x47e20c21 # amask \$2,\$1
+	lda \$16,\$Lformat
+	mov \$0,\$17
+	not \$1,\$18
+	jsr \$26,printf
+	ldgp \$29,0(\$26)
+	mov 0,\$16
+	jsr \$26,exit
+	.end main
+EOF
+	eval $set_cc_for_build
+	$CC_FOR_BUILD $dummy.s -o $dummy 2>/dev/null
+	if test "$?" = 0 ; then
+		case `./$dummy` in
+			0-0)
+				UNAME_MACHINE="alpha"
+				;;
+			1-0)
+				UNAME_MACHINE="alphaev5"
+				;;
+			1-1)
+				UNAME_MACHINE="alphaev56"
+				;;
+			1-101)
+				UNAME_MACHINE="alphapca56"
+				;;
+			2-303)
+				UNAME_MACHINE="alphaev6"
+				;;
+			2-307)
+				UNAME_MACHINE="alphaev67"
+				;;
+			2-1307)
+				UNAME_MACHINE="alphaev68"
+				;;
+		esac
+	fi
+	rm -f $dummy.s $dummy
+	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[VTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+	exit 0 ;;
+    Alpha\ *:Windows_NT*:*)
+	# How do we know it's Interix rather than the generic POSIX subsystem?
+	# Should we change UNAME_MACHINE based on the output of uname instead
+	# of the specific Alpha model?
+	echo alpha-pc-interix
+	exit 0 ;;
+    21064:Windows_NT:50:3)
+	echo alpha-dec-winnt3.5
+	exit 0 ;;
+    Amiga*:UNIX_System_V:4.0:*)
+	echo m68k-unknown-sysv4
+	exit 0;;
+    *:[Aa]miga[Oo][Ss]:*:*)
+	echo ${UNAME_MACHINE}-unknown-amigaos
+	exit 0 ;;
+    *:[Mm]orph[Oo][Ss]:*:*)
+	echo ${UNAME_MACHINE}-unknown-morphos
+	exit 0 ;;
+    *:OS/390:*:*)
+	echo i370-ibm-openedition
+	exit 0 ;;
+    arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
+	echo arm-acorn-riscix${UNAME_RELEASE}
+	exit 0;;
+    SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
+	echo hppa1.1-hitachi-hiuxmpp
+	exit 0;;
+    Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
+	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
+	if test "`(/bin/universe) 2>/dev/null`" = att ; then
+		echo pyramid-pyramid-sysv3
+	else
+		echo pyramid-pyramid-bsd
+	fi
+	exit 0 ;;
+    NILE*:*:*:dcosx)
+	echo pyramid-pyramid-svr4
+	exit 0 ;;
+    sun4H:SunOS:5.*:*)
+	echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit 0 ;;
+    sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
+	echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit 0 ;;
+    i86pc:SunOS:5.*:*)
+	echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit 0 ;;
+    sun4*:SunOS:6*:*)
+	# According to config.sub, this is the proper way to canonicalize
+	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
+	# it's likely to be more like Solaris than SunOS4.
+	echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit 0 ;;
+    sun4*:SunOS:*:*)
+	case "`/usr/bin/arch -k`" in
+	    Series*|S4*)
+		UNAME_RELEASE=`uname -v`
+		;;
+	esac
+	# Japanese Language versions have a version number like `4.1.3-JL'.
+	echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
+	exit 0 ;;
+    sun3*:SunOS:*:*)
+	echo m68k-sun-sunos${UNAME_RELEASE}
+	exit 0 ;;
+    sun*:*:4.2BSD:*)
+	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
+	test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
+	case "`/bin/arch`" in
+	    sun3)
+		echo m68k-sun-sunos${UNAME_RELEASE}
+		;;
+	    sun4)
+		echo sparc-sun-sunos${UNAME_RELEASE}
+		;;
+	esac
+	exit 0 ;;
+    aushp:SunOS:*:*)
+	echo sparc-auspex-sunos${UNAME_RELEASE}
+	exit 0 ;;
+    # The situation for MiNT is a little confusing.  The machine name
+    # can be virtually everything (everything which is not
+    # "atarist" or "atariste" at least should have a processor
+    # > m68000).  The system name ranges from "MiNT" over "FreeMiNT"
+    # to the lowercase version "mint" (or "freemint").  Finally
+    # the system name "TOS" denotes a system which is actually not
+    # MiNT.  But MiNT is downward compatible to TOS, so this should
+    # be no problem.
+    atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
+        echo m68k-atari-mint${UNAME_RELEASE}
+	exit 0 ;;
+    atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
+	echo m68k-atari-mint${UNAME_RELEASE}
+        exit 0 ;;
+    *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
+        echo m68k-atari-mint${UNAME_RELEASE}
+	exit 0 ;;
+    milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
+        echo m68k-milan-mint${UNAME_RELEASE}
+        exit 0 ;;
+    hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
+        echo m68k-hades-mint${UNAME_RELEASE}
+        exit 0 ;;
+    *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
+        echo m68k-unknown-mint${UNAME_RELEASE}
+        exit 0 ;;
+    powerpc:machten:*:*)
+	echo powerpc-apple-machten${UNAME_RELEASE}
+	exit 0 ;;
+    RISC*:Mach:*:*)
+	echo mips-dec-mach_bsd4.3
+	exit 0 ;;
+    RISC*:ULTRIX:*:*)
+	echo mips-dec-ultrix${UNAME_RELEASE}
+	exit 0 ;;
+    VAX*:ULTRIX*:*:*)
+	echo vax-dec-ultrix${UNAME_RELEASE}
+	exit 0 ;;
+    2020:CLIX:*:* | 2430:CLIX:*:*)
+	echo clipper-intergraph-clix${UNAME_RELEASE}
+	exit 0 ;;
+    mips:*:*:UMIPS | mips:*:*:RISCos)
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+#ifdef __cplusplus
+#include <stdio.h>  /* for printf() prototype */
+	int main (int argc, char *argv[]) {
+#else
+	int main (argc, argv) int argc; char *argv[]; {
+#endif
+	#if defined (host_mips) && defined (MIPSEB)
+	#if defined (SYSTYPE_SYSV)
+	  printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
+	#endif
+	#if defined (SYSTYPE_SVR4)
+	  printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
+	#endif
+	#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
+	  printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
+	#endif
+	#endif
+	  exit (-1);
+	}
+EOF
+	$CC_FOR_BUILD $dummy.c -o $dummy \
+	  && ./$dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \
+	  && rm -f $dummy.c $dummy && exit 0
+	rm -f $dummy.c $dummy
+	echo mips-mips-riscos${UNAME_RELEASE}
+	exit 0 ;;
+    Motorola:PowerMAX_OS:*:*)
+	echo powerpc-motorola-powermax
+	exit 0 ;;
+    Night_Hawk:Power_UNIX:*:*)
+	echo powerpc-harris-powerunix
+	exit 0 ;;
+    m88k:CX/UX:7*:*)
+	echo m88k-harris-cxux7
+	exit 0 ;;
+    m88k:*:4*:R4*)
+	echo m88k-motorola-sysv4
+	exit 0 ;;
+    m88k:*:3*:R3*)
+	echo m88k-motorola-sysv3
+	exit 0 ;;
+    AViiON:dgux:*:*)
+        # DG/UX returns AViiON for all architectures
+        UNAME_PROCESSOR=`/usr/bin/uname -p`
+	if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
+	then
+	    if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
+	       [ ${TARGET_BINARY_INTERFACE}x = x ]
+	    then
+		echo m88k-dg-dgux${UNAME_RELEASE}
+	    else
+		echo m88k-dg-dguxbcs${UNAME_RELEASE}
+	    fi
+	else
+	    echo i586-dg-dgux${UNAME_RELEASE}
+	fi
+ 	exit 0 ;;
+    M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
+	echo m88k-dolphin-sysv3
+	exit 0 ;;
+    M88*:*:R3*:*)
+	# Delta 88k system running SVR3
+	echo m88k-motorola-sysv3
+	exit 0 ;;
+    XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
+	echo m88k-tektronix-sysv3
+	exit 0 ;;
+    Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
+	echo m68k-tektronix-bsd
+	exit 0 ;;
+    *:IRIX*:*:*)
+	echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
+	exit 0 ;;
+    ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
+	echo romp-ibm-aix      # uname -m gives an 8 hex-code CPU id
+	exit 0 ;;              # Note that: echo "'`uname -s`'" gives 'AIX '
+    i*86:AIX:*:*)
+	echo i386-ibm-aix
+	exit 0 ;;
+    ia64:AIX:*:*)
+	if [ -x /usr/bin/oslevel ] ; then
+		IBM_REV=`/usr/bin/oslevel`
+	else
+		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+	fi
+	echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
+	exit 0 ;;
+    *:AIX:2:3)
+	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
+		eval $set_cc_for_build
+		sed 's/^		//' << EOF >$dummy.c
+		#include <sys/systemcfg.h>
+
+		main()
+			{
+			if (!__power_pc())
+				exit(1);
+			puts("powerpc-ibm-aix3.2.5");
+			exit(0);
+			}
+EOF
+		$CC_FOR_BUILD $dummy.c -o $dummy && ./$dummy && rm -f $dummy.c $dummy && exit 0
+		rm -f $dummy.c $dummy
+		echo rs6000-ibm-aix3.2.5
+	elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
+		echo rs6000-ibm-aix3.2.4
+	else
+		echo rs6000-ibm-aix3.2
+	fi
+	exit 0 ;;
+    *:AIX:*:[45])
+	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
+	if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
+		IBM_ARCH=rs6000
+	else
+		IBM_ARCH=powerpc
+	fi
+	if [ -x /usr/bin/oslevel ] ; then
+		IBM_REV=`/usr/bin/oslevel`
+	else
+		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+	fi
+	echo ${IBM_ARCH}-ibm-aix${IBM_REV}
+	exit 0 ;;
+    *:AIX:*:*)
+	echo rs6000-ibm-aix
+	exit 0 ;;
+    ibmrt:4.4BSD:*|romp-ibm:BSD:*)
+	echo romp-ibm-bsd4.4
+	exit 0 ;;
+    ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
+	echo romp-ibm-bsd${UNAME_RELEASE}   # 4.3 with uname added to
+	exit 0 ;;                           # report: romp-ibm BSD 4.3
+    *:BOSX:*:*)
+	echo rs6000-bull-bosx
+	exit 0 ;;
+    DPX/2?00:B.O.S.:*:*)
+	echo m68k-bull-sysv3
+	exit 0 ;;
+    9000/[34]??:4.3bsd:1.*:*)
+	echo m68k-hp-bsd
+	exit 0 ;;
+    hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
+	echo m68k-hp-bsd4.4
+	exit 0 ;;
+    9000/[34678]??:HP-UX:*:*)
+	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+	case "${UNAME_MACHINE}" in
+	    9000/31? )            HP_ARCH=m68000 ;;
+	    9000/[34]?? )         HP_ARCH=m68k ;;
+	    9000/[678][0-9][0-9])
+		if [ -x /usr/bin/getconf ]; then
+		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
+                    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+                    case "${sc_cpu_version}" in
+                      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+                      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+                      532)                      # CPU_PA_RISC2_0
+                        case "${sc_kernel_bits}" in
+                          32) HP_ARCH="hppa2.0n" ;;
+                          64) HP_ARCH="hppa2.0w" ;;
+			  '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20
+                        esac ;;
+                    esac
+		fi
+		if [ "${HP_ARCH}" = "" ]; then
+		    eval $set_cc_for_build
+		    sed 's/^              //' << EOF >$dummy.c
+
+              #define _HPUX_SOURCE
+              #include <stdlib.h>
+              #include <unistd.h>
+
+              int main ()
+              {
+              #if defined(_SC_KERNEL_BITS)
+                  long bits = sysconf(_SC_KERNEL_BITS);
+              #endif
+                  long cpu  = sysconf (_SC_CPU_VERSION);
+
+                  switch (cpu)
+              	{
+              	case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+              	case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+              	case CPU_PA_RISC2_0:
+              #if defined(_SC_KERNEL_BITS)
+              	    switch (bits)
+              		{
+              		case 64: puts ("hppa2.0w"); break;
+              		case 32: puts ("hppa2.0n"); break;
+              		default: puts ("hppa2.0"); break;
+              		} break;
+              #else  /* !defined(_SC_KERNEL_BITS) */
+              	    puts ("hppa2.0"); break;
+              #endif
+              	default: puts ("hppa1.0"); break;
+              	}
+                  exit (0);
+              }
+EOF
+		    (CCOPTS= $CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null) && HP_ARCH=`./$dummy`
+		    if test -z "$HP_ARCH"; then HP_ARCH=hppa; fi
+		    rm -f $dummy.c $dummy
+		fi ;;
+	esac
+	echo ${HP_ARCH}-hp-hpux${HPUX_REV}
+	exit 0 ;;
+    ia64:HP-UX:*:*)
+	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+	echo ia64-hp-hpux${HPUX_REV}
+	exit 0 ;;
+    3050*:HI-UX:*:*)
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#include <unistd.h>
+	int
+	main ()
+	{
+	  long cpu = sysconf (_SC_CPU_VERSION);
+	  /* The order matters, because CPU_IS_HP_MC68K erroneously returns
+	     true for CPU_PA_RISC1_0.  CPU_IS_PA_RISC returns correct
+	     results, however.  */
+	  if (CPU_IS_PA_RISC (cpu))
+	    {
+	      switch (cpu)
+		{
+		  case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
+		  case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
+		  case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
+		  default: puts ("hppa-hitachi-hiuxwe2"); break;
+		}
+	    }
+	  else if (CPU_IS_HP_MC68K (cpu))
+	    puts ("m68k-hitachi-hiuxwe2");
+	  else puts ("unknown-hitachi-hiuxwe2");
+	  exit (0);
+	}
+EOF
+	$CC_FOR_BUILD $dummy.c -o $dummy && ./$dummy && rm -f $dummy.c $dummy && exit 0
+	rm -f $dummy.c $dummy
+	echo unknown-hitachi-hiuxwe2
+	exit 0 ;;
+    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
+	echo hppa1.1-hp-bsd
+	exit 0 ;;
+    9000/8??:4.3bsd:*:*)
+	echo hppa1.0-hp-bsd
+	exit 0 ;;
+    *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
+	echo hppa1.0-hp-mpeix
+	exit 0 ;;
+    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
+	echo hppa1.1-hp-osf
+	exit 0 ;;
+    hp8??:OSF1:*:*)
+	echo hppa1.0-hp-osf
+	exit 0 ;;
+    i*86:OSF1:*:*)
+	if [ -x /usr/sbin/sysversion ] ; then
+	    echo ${UNAME_MACHINE}-unknown-osf1mk
+	else
+	    echo ${UNAME_MACHINE}-unknown-osf1
+	fi
+	exit 0 ;;
+    parisc*:Lites*:*:*)
+	echo hppa1.1-hp-lites
+	exit 0 ;;
+    C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
+	echo c1-convex-bsd
+        exit 0 ;;
+    C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
+	if getsysinfo -f scalar_acc
+	then echo c32-convex-bsd
+	else echo c2-convex-bsd
+	fi
+        exit 0 ;;
+    C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
+	echo c34-convex-bsd
+        exit 0 ;;
+    C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
+	echo c38-convex-bsd
+        exit 0 ;;
+    C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
+	echo c4-convex-bsd
+        exit 0 ;;
+    CRAY*Y-MP:*:*:*)
+	echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit 0 ;;
+    CRAY*[A-Z]90:*:*:*)
+	echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
+	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
+	      -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
+	      -e 's/\.[^.]*$/.X/'
+	exit 0 ;;
+    CRAY*TS:*:*:*)
+	echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit 0 ;;
+    CRAY*T3D:*:*:*)
+	echo alpha-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit 0 ;;
+    CRAY*T3E:*:*:*)
+	echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit 0 ;;
+    CRAY*SV1:*:*:*)
+	echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit 0 ;;
+    F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
+	FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+        FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+        echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+        exit 0 ;;
+    i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
+	echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
+	exit 0 ;;
+    sparc*:BSD/OS:*:*)
+	echo sparc-unknown-bsdi${UNAME_RELEASE}
+	exit 0 ;;
+    *:BSD/OS:*:*)
+	echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
+	exit 0 ;;
+    *:FreeBSD:*:*)
+	echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
+	exit 0 ;;
+    i*:CYGWIN*:*)
+	echo ${UNAME_MACHINE}-pc-cygwin
+	exit 0 ;;
+    i*:MINGW*:*)
+	echo ${UNAME_MACHINE}-pc-mingw32
+	exit 0 ;;
+    i*:PW*:*)
+	echo ${UNAME_MACHINE}-pc-pw32
+	exit 0 ;;
+    x86:Interix*:3*)
+	echo i386-pc-interix3
+	exit 0 ;;
+    i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
+	# How do we know it's Interix rather than the generic POSIX subsystem?
+	# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
+	# UNAME_MACHINE based on the output of uname instead of i386?
+	echo i386-pc-interix
+	exit 0 ;;
+    i*:UWIN*:*)
+	echo ${UNAME_MACHINE}-pc-uwin
+	exit 0 ;;
+    p*:CYGWIN*:*)
+	echo powerpcle-unknown-cygwin
+	exit 0 ;;
+    prep*:SunOS:5.*:*)
+	echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit 0 ;;
+    *:GNU:*:*)
+	echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
+	exit 0 ;;
+    i*86:Minix:*:*)
+	echo ${UNAME_MACHINE}-pc-minix
+	exit 0 ;;
+    arm*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit 0 ;;
+    ia64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux
+	exit 0 ;;
+    m68*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit 0 ;;
+    mips:Linux:*:*)
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#undef CPU
+	#undef mips
+	#undef mipsel
+	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) 
+	CPU=mipsel 
+	#else
+	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) 
+	CPU=mips
+	#else
+	CPU=
+	#endif
+	#endif 
+EOF
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=`
+	rm -f $dummy.c
+	test x"${CPU}" != x && echo "${CPU}-pc-linux-gnu" && exit 0
+	;;
+    ppc:Linux:*:*)
+	echo powerpc-unknown-linux-gnu
+	exit 0 ;;
+    ppc64:Linux:*:*)
+	echo powerpc64-unknown-linux-gnu
+	exit 0 ;;
+    alpha:Linux:*:*)
+	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+	  EV5)   UNAME_MACHINE=alphaev5 ;;
+	  EV56)  UNAME_MACHINE=alphaev56 ;;
+	  PCA56) UNAME_MACHINE=alphapca56 ;;
+	  PCA57) UNAME_MACHINE=alphapca56 ;;
+	  EV6)   UNAME_MACHINE=alphaev6 ;;
+	  EV67)  UNAME_MACHINE=alphaev67 ;;
+	  EV68*) UNAME_MACHINE=alphaev68 ;;
+        esac
+	objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null
+	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
+	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+	exit 0 ;;
+    parisc:Linux:*:* | hppa:Linux:*:*)
+	# Look for CPU level
+	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
+	  PA7*) echo hppa1.1-unknown-linux-gnu ;;
+	  PA8*) echo hppa2.0-unknown-linux-gnu ;;
+	  *)    echo hppa-unknown-linux-gnu ;;
+	esac
+	exit 0 ;;
+    parisc64:Linux:*:* | hppa64:Linux:*:*)
+	echo hppa64-unknown-linux-gnu
+	exit 0 ;;
+    s390:Linux:*:* | s390x:Linux:*:*)
+	echo ${UNAME_MACHINE}-ibm-linux
+	exit 0 ;;
+    sh*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit 0 ;;
+    sparc:Linux:*:* | sparc64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit 0 ;;
+    x86_64:Linux:*:*)
+	echo x86_64-unknown-linux-gnu
+	exit 0 ;;
+    i*86:Linux:*:*)
+	# The BFD linker knows what the default object file format is, so
+	# first see if it will tell us. cd to the root directory to prevent
+	# problems with other programs or directories called `ld' in the path.
+	# Set LC_ALL=C to ensure ld outputs messages in English.
+	ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \
+			 | sed -ne '/supported targets:/!d
+				    s/[ 	][ 	]*/ /g
+				    s/.*supported targets: *//
+				    s/ .*//
+				    p'`
+        case "$ld_supported_targets" in
+	  elf32-i386)
+		TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu"
+		;;
+	  a.out-i386-linux)
+		echo "${UNAME_MACHINE}-pc-linux-gnuaout"
+		exit 0 ;;		
+	  coff-i386)
+		echo "${UNAME_MACHINE}-pc-linux-gnucoff"
+		exit 0 ;;
+	  "")
+		# Either a pre-BFD a.out linker (linux-gnuoldld) or
+		# one that does not give us useful --help.
+		echo "${UNAME_MACHINE}-pc-linux-gnuoldld"
+		exit 0 ;;
+	esac
+	# Determine whether the default compiler is a.out or elf
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#include <features.h>
+	#ifdef __ELF__
+	# ifdef __GLIBC__
+	#  if __GLIBC__ >= 2
+	LIBC=gnu
+	#  else
+	LIBC=gnulibc1
+	#  endif
+	# else
+	LIBC=gnulibc1
+	# endif
+	#else
+	#ifdef __INTEL_COMPILER
+	LIBC=gnu
+	#else
+	LIBC=gnuaout
+	#endif
+	#endif
+EOF
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=`
+	rm -f $dummy.c
+	test x"${LIBC}" != x && echo "${UNAME_MACHINE}-pc-linux-${LIBC}" && exit 0
+	test x"${TENTATIVE}" != x && echo "${TENTATIVE}" && exit 0
+	;;
+    i*86:DYNIX/ptx:4*:*)
+	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
+	# earlier versions are messed up and put the nodename in both
+	# sysname and nodename.
+	echo i386-sequent-sysv4
+	exit 0 ;;
+    i*86:UNIX_SV:4.2MP:2.*)
+        # Unixware is an offshoot of SVR4, but it has its own version
+        # number series starting with 2...
+        # I am not positive that other SVR4 systems won't match this,
+	# I just have to hope.  -- rms.
+        # Use sysv4.2uw... so that sysv4* matches it.
+	echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
+	exit 0 ;;
+    i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
+	UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
+	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
+		echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL}
+	else
+		echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
+	fi
+	exit 0 ;;
+    i*86:*:5:[78]*)
+	case `/bin/uname -X | grep "^Machine"` in
+	    *486*)	     UNAME_MACHINE=i486 ;;
+	    *Pentium)	     UNAME_MACHINE=i586 ;;
+	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
+	esac
+	echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
+	exit 0 ;;
+    i*86:*:3.2:*)
+	if test -f /usr/options/cb.name; then
+		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
+		echo ${UNAME_MACHINE}-pc-isc$UNAME_REL
+	elif /bin/uname -X 2>/dev/null >/dev/null ; then
+		UNAME_REL=`(/bin/uname -X|egrep Release|sed -e 's/.*= //')`
+		(/bin/uname -X|egrep i80486 >/dev/null) && UNAME_MACHINE=i486
+		(/bin/uname -X|egrep '^Machine.*Pentium' >/dev/null) \
+			&& UNAME_MACHINE=i586
+		(/bin/uname -X|egrep '^Machine.*Pent ?II' >/dev/null) \
+			&& UNAME_MACHINE=i686
+		(/bin/uname -X|egrep '^Machine.*Pentium Pro' >/dev/null) \
+			&& UNAME_MACHINE=i686
+		echo ${UNAME_MACHINE}-pc-sco$UNAME_REL
+	else
+		echo ${UNAME_MACHINE}-pc-sysv32
+	fi
+	exit 0 ;;
+    i*86:*DOS:*:*)
+	echo ${UNAME_MACHINE}-pc-msdosdjgpp
+	exit 0 ;;
+    pc:*:*:*)
+	# Left here for compatibility:
+        # uname -m prints for DJGPP always 'pc', but it prints nothing about
+        # the processor, so we play safe by assuming i386.
+	echo i386-pc-msdosdjgpp
+        exit 0 ;;
+    Intel:Mach:3*:*)
+	echo i386-pc-mach3
+	exit 0 ;;
+    paragon:*:*:*)
+	echo i860-intel-osf1
+	exit 0 ;;
+    i860:*:4.*:*) # i860-SVR4
+	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
+	  echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
+	else # Add other i860-SVR4 vendors below as they are discovered.
+	  echo i860-unknown-sysv${UNAME_RELEASE}  # Unknown i860-SVR4
+	fi
+	exit 0 ;;
+    mini*:CTIX:SYS*5:*)
+	# "miniframe"
+	echo m68010-convergent-sysv
+	exit 0 ;;
+    M68*:*:R3V[567]*:*)
+	test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;;
+    3[34]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0)
+	OS_REL=''
+	test -r /etc/.relid \
+	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && echo i486-ncr-sysv4.3${OS_REL} && exit 0
+	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+	  && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;;
+    3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
+        /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+          && echo i486-ncr-sysv4 && exit 0 ;;
+    m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
+	echo m68k-unknown-lynxos${UNAME_RELEASE}
+	exit 0 ;;
+    mc68030:UNIX_System_V:4.*:*)
+	echo m68k-atari-sysv4
+	exit 0 ;;
+    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
+	echo i386-unknown-lynxos${UNAME_RELEASE}
+	exit 0 ;;
+    TSUNAMI:LynxOS:2.*:*)
+	echo sparc-unknown-lynxos${UNAME_RELEASE}
+	exit 0 ;;
+    rs6000:LynxOS:2.*:*)
+	echo rs6000-unknown-lynxos${UNAME_RELEASE}
+	exit 0 ;;
+    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*)
+	echo powerpc-unknown-lynxos${UNAME_RELEASE}
+	exit 0 ;;
+    SM[BE]S:UNIX_SV:*:*)
+	echo mips-dde-sysv${UNAME_RELEASE}
+	exit 0 ;;
+    RM*:ReliantUNIX-*:*:*)
+	echo mips-sni-sysv4
+	exit 0 ;;
+    RM*:SINIX-*:*:*)
+	echo mips-sni-sysv4
+	exit 0 ;;
+    *:SINIX-*:*:*)
+	if uname -p 2>/dev/null >/dev/null ; then
+		UNAME_MACHINE=`(uname -p) 2>/dev/null`
+		echo ${UNAME_MACHINE}-sni-sysv4
+	else
+		echo ns32k-sni-sysv
+	fi
+	exit 0 ;;
+    PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+                      # says <Richard.M.Bartel@ccMail.Census.GOV>
+        echo i586-unisys-sysv4
+        exit 0 ;;
+    *:UNIX_System_V:4*:FTX*)
+	# From Gerald Hewes <hewes@openmarket.com>.
+	# How about differentiating between stratus architectures? -djm
+	echo hppa1.1-stratus-sysv4
+	exit 0 ;;
+    *:*:*:FTX*)
+	# From seanf@swdc.stratus.com.
+	echo i860-stratus-sysv4
+	exit 0 ;;
+    *:VOS:*:*)
+	# From Paul.Green@stratus.com.
+	echo hppa1.1-stratus-vos
+	exit 0 ;;
+    mc68*:A/UX:*:*)
+	echo m68k-apple-aux${UNAME_RELEASE}
+	exit 0 ;;
+    news*:NEWS-OS:6*:*)
+	echo mips-sony-newsos6
+	exit 0 ;;
+    R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
+	if [ -d /usr/nec ]; then
+	        echo mips-nec-sysv${UNAME_RELEASE}
+	else
+	        echo mips-unknown-sysv${UNAME_RELEASE}
+	fi
+        exit 0 ;;
+    BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
+	echo powerpc-be-beos
+	exit 0 ;;
+    BeMac:BeOS:*:*)	# BeOS running on Mac or Mac clone, PPC only.
+	echo powerpc-apple-beos
+	exit 0 ;;
+    BePC:BeOS:*:*)	# BeOS running on Intel PC compatible.
+	echo i586-pc-beos
+	exit 0 ;;
+    SX-4:SUPER-UX:*:*)
+	echo sx4-nec-superux${UNAME_RELEASE}
+	exit 0 ;;
+    SX-5:SUPER-UX:*:*)
+	echo sx5-nec-superux${UNAME_RELEASE}
+	exit 0 ;;
+    Power*:Rhapsody:*:*)
+	echo powerpc-apple-rhapsody${UNAME_RELEASE}
+	exit 0 ;;
+    *:Rhapsody:*:*)
+	echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
+	exit 0 ;;
+    *:Darwin:*:*)
+	echo `uname -p`-apple-darwin${UNAME_RELEASE}
+	exit 0 ;;
+    *:procnto*:*:* | *:QNX:[0123456789]*:*)
+	if test "${UNAME_MACHINE}" = "x86pc"; then
+		UNAME_MACHINE=pc
+		echo i386-${UNAME_MACHINE}-nto-qnx
+	else
+		echo `uname -p`-${UNAME_MACHINE}-nto-qnx
+	fi
+	exit 0 ;;
+    *:QNX:*:4*)
+	echo i386-pc-qnx
+	exit 0 ;;
+    NSR-[GKLNPTVW]:NONSTOP_KERNEL:*:*)
+	echo nsr-tandem-nsk${UNAME_RELEASE}
+	exit 0 ;;
+    *:NonStop-UX:*:*)
+	echo mips-compaq-nonstopux
+	exit 0 ;;
+    BS2000:POSIX*:*:*)
+	echo bs2000-siemens-sysv
+	exit 0 ;;
+    DS/*:UNIX_System_V:*:*)
+	echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
+	exit 0 ;;
+    *:Plan9:*:*)
+	# "uname -m" is not consistent, so use $cputype instead. 386
+	# is converted to i386 for consistency with other x86
+	# operating systems.
+	if test "$cputype" = "386"; then
+	    UNAME_MACHINE=i386
+	else
+	    UNAME_MACHINE="$cputype"
+	fi
+	echo ${UNAME_MACHINE}-unknown-plan9
+	exit 0 ;;
+    i*86:OS/2:*:*)
+	# If we were able to find `uname', then EMX Unix compatibility
+	# is probably installed.
+	echo ${UNAME_MACHINE}-pc-os2-emx
+	exit 0 ;;
+    *:TOPS-10:*:*)
+	echo pdp10-unknown-tops10
+	exit 0 ;;
+    *:TENEX:*:*)
+	echo pdp10-unknown-tenex
+	exit 0 ;;
+    KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
+	echo pdp10-dec-tops20
+	exit 0 ;;
+    XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
+	echo pdp10-xkl-tops20
+	exit 0 ;;
+    *:TOPS-20:*:*)
+	echo pdp10-unknown-tops20
+	exit 0 ;;
+    *:ITS:*:*)
+	echo pdp10-unknown-its
+	exit 0 ;;
+    i*86:XTS-300:*:STOP)
+	echo ${UNAME_MACHINE}-unknown-stop
+	exit 0 ;;
+    i*86:atheos:*:*)
+	echo ${UNAME_MACHINE}-unknown-atheos
+	exit 0 ;;
+esac
+
+#echo '(No uname command or uname output not recognized.)' 1>&2
+#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
+
+eval $set_cc_for_build
+cat >$dummy.c <<EOF
+#ifdef _SEQUENT_
+# include <sys/types.h>
+# include <sys/utsname.h>
+#endif
+main ()
+{
+#if defined (sony)
+#if defined (MIPSEB)
+  /* BFD wants "bsd" instead of "newsos".  Perhaps BFD should be changed,
+     I don't know....  */
+  printf ("mips-sony-bsd\n"); exit (0);
+#else
+#include <sys/param.h>
+  printf ("m68k-sony-newsos%s\n",
+#ifdef NEWSOS4
+          "4"
+#else
+	  ""
+#endif
+         ); exit (0);
+#endif
+#endif
+
+#if defined (__arm) && defined (__acorn) && defined (__unix)
+  printf ("arm-acorn-riscix"); exit (0);
+#endif
+
+#if defined (hp300) && !defined (hpux)
+  printf ("m68k-hp-bsd\n"); exit (0);
+#endif
+
+#if defined (NeXT)
+#if !defined (__ARCHITECTURE__)
+#define __ARCHITECTURE__ "m68k"
+#endif
+  int version;
+  version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
+  if (version < 4)
+    printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
+  else
+    printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
+  exit (0);
+#endif
+
+#if defined (MULTIMAX) || defined (n16)
+#if defined (UMAXV)
+  printf ("ns32k-encore-sysv\n"); exit (0);
+#else
+#if defined (CMU)
+  printf ("ns32k-encore-mach\n"); exit (0);
+#else
+  printf ("ns32k-encore-bsd\n"); exit (0);
+#endif
+#endif
+#endif
+
+#if defined (__386BSD__)
+  printf ("i386-pc-bsd\n"); exit (0);
+#endif
+
+#if defined (sequent)
+#if defined (i386)
+  printf ("i386-sequent-dynix\n"); exit (0);
+#endif
+#if defined (ns32000)
+  printf ("ns32k-sequent-dynix\n"); exit (0);
+#endif
+#endif
+
+#if defined (_SEQUENT_)
+    struct utsname un;
+
+    uname(&un);
+
+    if (strncmp(un.version, "V2", 2) == 0) {
+	printf ("i386-sequent-ptx2\n"); exit (0);
+    }
+    if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
+	printf ("i386-sequent-ptx1\n"); exit (0);
+    }
+    printf ("i386-sequent-ptx\n"); exit (0);
+
+#endif
+
+#if defined (vax)
+# if !defined (ultrix)
+#  include <sys/param.h>
+#  if defined (BSD)
+#   if BSD == 43
+      printf ("vax-dec-bsd4.3\n"); exit (0);
+#   else
+#    if BSD == 199006
+      printf ("vax-dec-bsd4.3reno\n"); exit (0);
+#    else
+      printf ("vax-dec-bsd\n"); exit (0);
+#    endif
+#   endif
+#  else
+    printf ("vax-dec-bsd\n"); exit (0);
+#  endif
+# else
+    printf ("vax-dec-ultrix\n"); exit (0);
+# endif
+#endif
+
+#if defined (alliant) && defined (i860)
+  printf ("i860-alliant-bsd\n"); exit (0);
+#endif
+
+  exit (1);
+}
+EOF
+
+$CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null && ./$dummy && rm -f $dummy.c $dummy && exit 0
+rm -f $dummy.c $dummy
+
+# Apollos put the system type in the environment.
+
+test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; }
+
+# Convex versions that predate uname can use getsysinfo(1)
+
+if [ -x /usr/convex/getsysinfo ]
+then
+    case `getsysinfo -f cpu_type` in
+    c1*)
+	echo c1-convex-bsd
+	exit 0 ;;
+    c2*)
+	if getsysinfo -f scalar_acc
+	then echo c32-convex-bsd
+	else echo c2-convex-bsd
+	fi
+	exit 0 ;;
+    c34*)
+	echo c34-convex-bsd
+	exit 0 ;;
+    c38*)
+	echo c38-convex-bsd
+	exit 0 ;;
+    c4*)
+	echo c4-convex-bsd
+	exit 0 ;;
+    esac
+fi
+
+cat >&2 <<EOF
+$0: unable to guess system type
+
+This script, last modified $timestamp, has failed to recognize
+the operating system you are using. It is advised that you
+download the most up to date version of the config scripts from
+
+    ftp://ftp.gnu.org/pub/gnu/config/
+
+If the version you run ($0) is already up to date, please
+send the following data and any information you think might be
+pertinent to <config-patches@gnu.org> in order to provide the needed
+information to handle your system.
+
+config.guess timestamp = $timestamp
+
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`
+
+hostinfo               = `(hostinfo) 2>/dev/null`
+/bin/universe          = `(/bin/universe) 2>/dev/null`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`
+/bin/arch              = `(/bin/arch) 2>/dev/null`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
+
+UNAME_MACHINE = ${UNAME_MACHINE}
+UNAME_RELEASE = ${UNAME_RELEASE}
+UNAME_SYSTEM  = ${UNAME_SYSTEM}
+UNAME_VERSION = ${UNAME_VERSION}
+EOF
+
+exit 1
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/config.h.in b/config.h.in
new file mode 100644
index 000000000..5b3ab7f63
--- /dev/null
+++ b/config.h.in
@@ -0,0 +1,101 @@
+/* config.h.in.  Generated automatically from configure.in by autoheader 2.13.  */
+
+/* Define to empty if the keyword does not work.  */
+#undef const
+
+/* Define to `int' if <sys/types.h> doesn't define.  */
+#undef gid_t
+
+/* Define if you have a working `mmap' system call.  */
+#undef HAVE_MMAP
+
+/* Define to `long' if <sys/types.h> doesn't define.  */
+#undef off_t
+
+/* Define as the return type of signal handlers (int or void).  */
+#undef RETSIGTYPE
+
+/* Define to `unsigned' if <sys/types.h> doesn't define.  */
+#undef size_t
+
+/* Define if you have the ANSI C header files.  */
+#undef STDC_HEADERS
+
+/* Define if you can safely include both <sys/time.h> and <time.h>.  */
+#undef TIME_WITH_SYS_TIME
+
+/* Define to `int' if <sys/types.h> doesn't define.  */
+#undef uid_t
+
+#undef KERNEL_2_2
+#undef KERNEL_2_4
+
+#undef GLIBC_2_1
+#undef GLIBC_2_2
+
+/* Define if you have the floor function.  */
+#undef HAVE_FLOOR
+
+/* Define if you have the getpagesize function.  */
+#undef HAVE_GETPAGESIZE
+
+/* Define if you have the memchr function.  */
+#undef HAVE_MEMCHR
+
+/* Define if you have the memset function.  */
+#undef HAVE_MEMSET
+
+/* Define if you have the mkdir function.  */
+#undef HAVE_MKDIR
+
+/* Define if you have the strchr function.  */
+#undef HAVE_STRCHR
+
+/* Define if you have the strdup function.  */
+#undef HAVE_STRDUP
+
+/* Define if you have the strpbrk function.  */
+#undef HAVE_STRPBRK
+
+/* Define if you have the strrchr function.  */
+#undef HAVE_STRRCHR
+
+/* Define if you have the strstr function.  */
+#undef HAVE_STRSTR
+
+/* Define if you have the <fcntl.h> header file.  */
+#undef HAVE_FCNTL_H
+
+/* Define if you have the <malloc.h> header file.  */
+#undef HAVE_MALLOC_H
+
+/* Define if you have the <stdlib.h> header file.  */
+#undef HAVE_STDLIB_H
+
+/* Define if you have the <string.h> header file.  */
+#undef HAVE_STRING_H
+
+/* Define if you have the <sys/socket.h> header file.  */
+#undef HAVE_SYS_SOCKET_H
+
+/* Define if you have the <sys/statfs.h> header file.  */
+#undef HAVE_SYS_STATFS_H
+
+/* Define if you have the <sys/time.h> header file.  */
+#undef HAVE_SYS_TIME_H
+
+/* Define if you have the <termios.h> header file.  */
+#undef HAVE_TERMIOS_H
+
+/* Define if you have the <unistd.h> header file.  */
+#undef HAVE_UNISTD_H
+
+/* Define if you have the <utime.h> header file.  */
+#undef HAVE_UTIME_H
+
+/* Name of package */
+#undef PACKAGE
+
+/* Version number of package */
+#undef VERSION
+
diff --git a/config.sub b/config.sub
new file mode 100755
index 000000000..4ac7ab2f0
--- /dev/null
+++ b/config.sub
@@ -0,0 +1,1443 @@
+#! /bin/sh
+# Configuration validation subroutine script.
+#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+#   2000, 2001, 2002 Free Software Foundation, Inc.
+
+timestamp='2002-02-22'
+
+# This file is (in principle) common to ALL GNU software.
+# The presence of a machine in this file suggests that SOME GNU software
+# can handle that machine.  It does not imply ALL GNU software can.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Please send patches to <config-patches@gnu.org>.  Submit a context
+# diff and a properly formatted ChangeLog entry.
+#
+# Configuration subroutine to validate and canonicalize a configuration type.
+# Supply the specified configuration type as an argument.
+# If it is invalid, we print an error message on stderr and exit with code 1.
+# Otherwise, we print the canonical config type on stdout and succeed.
+
+# This file is supposed to be the same for all GNU packages
+# and recognize all the CPU types, system types and aliases
+# that are meaningful with *any* GNU software.
+# Each package is responsible for reporting which valid configurations
+# it does not support.  The user should be able to distinguish
+# a failure to support a valid configuration from a meaningless
+# configuration.
+
+# The goal of this file is to map all the various variations of a given
+# machine specification into a single specification in the form:
+#	CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
+# or in some cases, the newer four-part form:
+#	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
+# It is wrong to echo any other type of specification.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION] CPU-MFR-OPSYS
+       $0 [OPTION] ALIAS
+
+Canonicalize a configuration name.
+
+Operation modes:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.sub ($timestamp)
+
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
+Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit 0 ;;
+    --version | -v )
+       echo "$version" ; exit 0 ;;
+    --help | --h* | -h )
+       echo "$usage"; exit 0 ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )	# Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help"
+       exit 1 ;;
+
+    *local*)
+       # First pass through any local machine types.
+       echo $1
+       exit 0;;
+
+    * )
+       break ;;
+  esac
+done
+
+case $# in
+ 0) echo "$me: missing argument$help" >&2
+    exit 1;;
+ 1) ;;
+ *) echo "$me: too many arguments$help" >&2
+    exit 1;;
+esac
+
+# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
+# Here we must recognize all the valid KERNEL-OS combinations.
+maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
+case $maybe_os in
+  nto-qnx* | linux-gnu* | storm-chaos* | os2-emx* | windows32-* | rtmk-nova*)
+    os=-$maybe_os
+    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
+    ;;
+  *)
+    basic_machine=`echo $1 | sed 's/-[^-]*$//'`
+    if [ $basic_machine != $1 ]
+    then os=`echo $1 | sed 's/.*-/-/'`
+    else os=; fi
+    ;;
+esac
+
+### Let's recognize common machines as not being operating systems so
+### that things like config.sub decstation-3100 work.  We also
+### recognize some manufacturers as not being operating systems, so we
+### can provide default operating systems below.
+case $os in
+	-sun*os*)
+		# Prevent following clause from handling this invalid input.
+		;;
+	-dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
+	-att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
+	-unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
+	-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
+	-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
+	-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
+	-apple | -axis)
+		os=
+		basic_machine=$1
+		;;
+	-sim | -cisco | -oki | -wec | -winbond)
+		os=
+		basic_machine=$1
+		;;
+	-scout)
+		;;
+	-wrs)
+		os=-vxworks
+		basic_machine=$1
+		;;
+	-chorusos*)
+		os=-chorusos
+		basic_machine=$1
+		;;
+ 	-chorusrdb)
+ 		os=-chorusrdb
+		basic_machine=$1
+ 		;;
+	-hiux*)
+		os=-hiuxwe2
+		;;
+	-sco5)
+		os=-sco3.2v5
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco4)
+		os=-sco3.2v4
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco3.2.[4-9]*)
+		os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco3.2v[4-9]*)
+		# Don't forget version if it is 3.2v4 or newer.
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco*)
+		os=-sco3.2v2
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-udk*)
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-isc)
+		os=-isc2.2
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-clix*)
+		basic_machine=clipper-intergraph
+		;;
+	-isc*)
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-lynx*)
+		os=-lynxos
+		;;
+	-ptx*)
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
+		;;
+	-windowsnt*)
+		os=`echo $os | sed -e 's/windowsnt/winnt/'`
+		;;
+	-psos*)
+		os=-psos
+		;;
+	-mint | -mint[0-9]*)
+		basic_machine=m68k-atari
+		os=-mint
+		;;
+esac
+
+# Decode aliases for certain CPU-COMPANY combinations.
+case $basic_machine in
+	# Recognize the basic CPU types without company name.
+	# Some are omitted here because they have special meanings below.
+	1750a | 580 \
+	| a29k \
+	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
+	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
+	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr \
+	| c4x | clipper \
+	| d10v | d30v | dsp16xx \
+	| fr30 \
+	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+	| i370 | i860 | i960 | ia64 \
+	| m32r | m68000 | m68k | m88k | mcore \
+	| mips | mips16 | mips64 | mips64el | mips64orion | mips64orionel \
+	| mips64vr4100 | mips64vr4100el | mips64vr4300 \
+	| mips64vr4300el | mips64vr5000 | mips64vr5000el \
+	| mipsbe | mipseb | mipsel | mipsle | mipstx39 | mipstx39el \
+	| mipsisa32 \
+	| mn10200 | mn10300 \
+	| ns16k | ns32k \
+	| openrisc | or32 \
+	| pdp10 | pdp11 | pj | pjl \
+	| powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
+	| pyramid \
+	| sh | sh[34] | sh[34]eb | shbe | shle | sh64 \
+	| sparc | sparc64 | sparclet | sparclite | sparcv9 | sparcv9b \
+	| strongarm \
+	| tahoe | thumb | tic80 | tron \
+	| v850 | v850e \
+	| we32k \
+	| x86 | xscale | xstormy16 | xtensa \
+	| z8k)
+		basic_machine=$basic_machine-unknown
+		;;
+	m6811 | m68hc11 | m6812 | m68hc12)
+		# Motorola 68HC11/12.
+		basic_machine=$basic_machine-unknown
+		os=-none
+		;;
+	m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
+		;;
+
+	# We use `pc' rather than `unknown'
+	# because (1) that's what they normally are, and
+	# (2) the word "unknown" tends to confuse beginning users.
+	i*86 | x86_64)
+	  basic_machine=$basic_machine-pc
+	  ;;
+	# Object if more than one company name word.
+	*-*-*)
+		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+		exit 1
+		;;
+	# Recognize the basic CPU types with company name.
+	580-* \
+	| a29k-* \
+	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
+	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
+	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
+	| arm-*  | armbe-* | armle-* | armv*-* \
+	| avr-* \
+	| bs2000-* \
+	| c[123]* | c30-* | [cjt]90-* | c54x-* \
+	| clipper-* | cydra-* \
+	| d10v-* | d30v-* \
+	| elxsi-* \
+	| f30[01]-* | f700-* | fr30-* | fx80-* \
+	| h8300-* | h8500-* \
+	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+	| i*86-* | i860-* | i960-* | ia64-* \
+	| m32r-* \
+	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
+	| m88110-* | m88k-* | mcore-* \
+	| mips-* | mips16-* | mips64-* | mips64el-* | mips64orion-* \
+	| mips64orionel-* | mips64vr4100-* | mips64vr4100el-* \
+	| mips64vr4300-* | mips64vr4300el-* | mipsbe-* | mipseb-* \
+	| mipsle-* | mipsel-* | mipstx39-* | mipstx39el-* \
+	| none-* | np1-* | ns16k-* | ns32k-* \
+	| orion-* \
+	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
+	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
+	| pyramid-* \
+	| romp-* | rs6000-* \
+	| sh-* | sh[34]-* | sh[34]eb-* | shbe-* | shle-* | sh64-* \
+	| sparc-* | sparc64-* | sparc86x-* | sparclite-* \
+	| sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \
+	| tahoe-* | thumb-* | tic30-* | tic54x-* | tic80-* | tron-* \
+	| v850-* | v850e-* | vax-* \
+	| we32k-* \
+	| x86-* | x86_64-* | xps100-* | xscale-* | xstormy16-* \
+	| xtensa-* \
+	| ymp-* \
+	| z8k-*)
+		;;
+	# Recognize the various machine names and aliases which stand
+	# for a CPU type and a company and sometimes even an OS.
+	386bsd)
+		basic_machine=i386-unknown
+		os=-bsd
+		;;
+	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
+		basic_machine=m68000-att
+		;;
+	3b*)
+		basic_machine=we32k-att
+		;;
+	a29khif)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	adobe68k)
+		basic_machine=m68010-adobe
+		os=-scout
+		;;
+	alliant | fx80)
+		basic_machine=fx80-alliant
+		;;
+	altos | altos3068)
+		basic_machine=m68k-altos
+		;;
+	am29k)
+		basic_machine=a29k-none
+		os=-bsd
+		;;
+	amdahl)
+		basic_machine=580-amdahl
+		os=-sysv
+		;;
+	amiga | amiga-*)
+		basic_machine=m68k-unknown
+		;;
+	amigaos | amigados)
+		basic_machine=m68k-unknown
+		os=-amigaos
+		;;
+	amigaunix | amix)
+		basic_machine=m68k-unknown
+		os=-sysv4
+		;;
+	apollo68)
+		basic_machine=m68k-apollo
+		os=-sysv
+		;;
+	apollo68bsd)
+		basic_machine=m68k-apollo
+		os=-bsd
+		;;
+	aux)
+		basic_machine=m68k-apple
+		os=-aux
+		;;
+	balance)
+		basic_machine=ns32k-sequent
+		os=-dynix
+		;;
+	c90)
+		basic_machine=c90-cray
+		os=-unicos
+		;;
+	convex-c1)
+		basic_machine=c1-convex
+		os=-bsd
+		;;
+	convex-c2)
+		basic_machine=c2-convex
+		os=-bsd
+		;;
+	convex-c32)
+		basic_machine=c32-convex
+		os=-bsd
+		;;
+	convex-c34)
+		basic_machine=c34-convex
+		os=-bsd
+		;;
+	convex-c38)
+		basic_machine=c38-convex
+		os=-bsd
+		;;
+	cray | j90)
+		basic_machine=j90-cray
+		os=-unicos
+		;;
+	crds | unos)
+		basic_machine=m68k-crds
+		;;
+	cris | cris-* | etrax*)
+		basic_machine=cris-axis
+		;;
+	da30 | da30-*)
+		basic_machine=m68k-da30
+		;;
+	decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
+		basic_machine=mips-dec
+		;;
+	decsystem10* | dec10*)
+		basic_machine=pdp10-dec
+		os=-tops10
+		;;
+	decsystem20* | dec20*)
+		basic_machine=pdp10-dec
+		os=-tops20
+		;;
+	delta | 3300 | motorola-3300 | motorola-delta \
+	      | 3300-motorola | delta-motorola)
+		basic_machine=m68k-motorola
+		;;
+	delta88)
+		basic_machine=m88k-motorola
+		os=-sysv3
+		;;
+	dpx20 | dpx20-*)
+		basic_machine=rs6000-bull
+		os=-bosx
+		;;
+	dpx2* | dpx2*-bull)
+		basic_machine=m68k-bull
+		os=-sysv3
+		;;
+	ebmon29k)
+		basic_machine=a29k-amd
+		os=-ebmon
+		;;
+	elxsi)
+		basic_machine=elxsi-elxsi
+		os=-bsd
+		;;
+	encore | umax | mmax)
+		basic_machine=ns32k-encore
+		;;
+	es1800 | OSE68k | ose68k | ose | OSE)
+		basic_machine=m68k-ericsson
+		os=-ose
+		;;
+	fx2800)
+		basic_machine=i860-alliant
+		;;
+	genix)
+		basic_machine=ns32k-ns
+		;;
+	gmicro)
+		basic_machine=tron-gmicro
+		os=-sysv
+		;;
+	go32)
+		basic_machine=i386-pc
+		os=-go32
+		;;
+	h3050r* | hiux*)
+		basic_machine=hppa1.1-hitachi
+		os=-hiuxwe2
+		;;
+	h8300hms)
+		basic_machine=h8300-hitachi
+		os=-hms
+		;;
+	h8300xray)
+		basic_machine=h8300-hitachi
+		os=-xray
+		;;
+	h8500hms)
+		basic_machine=h8500-hitachi
+		os=-hms
+		;;
+	harris)
+		basic_machine=m88k-harris
+		os=-sysv3
+		;;
+	hp300-*)
+		basic_machine=m68k-hp
+		;;
+	hp300bsd)
+		basic_machine=m68k-hp
+		os=-bsd
+		;;
+	hp300hpux)
+		basic_machine=m68k-hp
+		os=-hpux
+		;;
+	hp3k9[0-9][0-9] | hp9[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hp9k2[0-9][0-9] | hp9k31[0-9])
+		basic_machine=m68000-hp
+		;;
+	hp9k3[2-9][0-9])
+		basic_machine=m68k-hp
+		;;
+	hp9k6[0-9][0-9] | hp6[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hp9k7[0-79][0-9] | hp7[0-79][0-9])
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k78[0-9] | hp78[0-9])
+		# FIXME: really hppa2.0-hp
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
+		# FIXME: really hppa2.0-hp
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[0-9][13679] | hp8[0-9][13679])
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[0-9][0-9] | hp8[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hppa-next)
+		os=-nextstep3
+		;;
+	hppaosf)
+		basic_machine=hppa1.1-hp
+		os=-osf
+		;;
+	hppro)
+		basic_machine=hppa1.1-hp
+		os=-proelf
+		;;
+	i370-ibm* | ibm*)
+		basic_machine=i370-ibm
+		;;
+# I'm not sure what "Sysv32" means.  Should this be sysv3.2?
+	i*86v32)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-sysv32
+		;;
+	i*86v4*)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-sysv4
+		;;
+	i*86v)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-sysv
+		;;
+	i*86sol2)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-solaris2
+		;;
+	i386mach)
+		basic_machine=i386-mach
+		os=-mach
+		;;
+	i386-vsta | vsta)
+		basic_machine=i386-unknown
+		os=-vsta
+		;;
+	iris | iris4d)
+		basic_machine=mips-sgi
+		case $os in
+		    -irix*)
+			;;
+		    *)
+			os=-irix4
+			;;
+		esac
+		;;
+	isi68 | isi)
+		basic_machine=m68k-isi
+		os=-sysv
+		;;
+	m88k-omron*)
+		basic_machine=m88k-omron
+		;;
+	magnum | m3230)
+		basic_machine=mips-mips
+		os=-sysv
+		;;
+	merlin)
+		basic_machine=ns32k-utek
+		os=-sysv
+		;;
+	mingw32)
+		basic_machine=i386-pc
+		os=-mingw32
+		;;
+	miniframe)
+		basic_machine=m68000-convergent
+		;;
+	*mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
+		basic_machine=m68k-atari
+		os=-mint
+		;;
+	mips3*-*)
+		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
+		;;
+	mips3*)
+		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
+		;;
+	mmix*)
+		basic_machine=mmix-knuth
+		os=-mmixware
+		;;
+	monitor)
+		basic_machine=m68k-rom68k
+		os=-coff
+		;;
+	morphos)
+		basic_machine=powerpc-unknown
+		os=-morphos
+		;;
+	msdos)
+		basic_machine=i386-pc
+		os=-msdos
+		;;
+	mvs)
+		basic_machine=i370-ibm
+		os=-mvs
+		;;
+	ncr3000)
+		basic_machine=i486-ncr
+		os=-sysv4
+		;;
+	netbsd386)
+		basic_machine=i386-unknown
+		os=-netbsd
+		;;
+	netwinder)
+		basic_machine=armv4l-rebel
+		os=-linux
+		;;
+	news | news700 | news800 | news900)
+		basic_machine=m68k-sony
+		os=-newsos
+		;;
+	news1000)
+		basic_machine=m68030-sony
+		os=-newsos
+		;;
+	news-3600 | risc-news)
+		basic_machine=mips-sony
+		os=-newsos
+		;;
+	necv70)
+		basic_machine=v70-nec
+		os=-sysv
+		;;
+	next | m*-next )
+		basic_machine=m68k-next
+		case $os in
+		    -nextstep* )
+			;;
+		    -ns2*)
+		      os=-nextstep2
+			;;
+		    *)
+		      os=-nextstep3
+			;;
+		esac
+		;;
+	nh3000)
+		basic_machine=m68k-harris
+		os=-cxux
+		;;
+	nh[45]000)
+		basic_machine=m88k-harris
+		os=-cxux
+		;;
+	nindy960)
+		basic_machine=i960-intel
+		os=-nindy
+		;;
+	mon960)
+		basic_machine=i960-intel
+		os=-mon960
+		;;
+	nonstopux)
+		basic_machine=mips-compaq
+		os=-nonstopux
+		;;
+	np1)
+		basic_machine=np1-gould
+		;;
+	nsr-tandem)
+		basic_machine=nsr-tandem
+		;;
+	op50n-* | op60c-*)
+		basic_machine=hppa1.1-oki
+		os=-proelf
+		;;
+	or32 | or32-*)
+		basic_machine=or32-unknown
+		os=-coff
+		;;
+	OSE68000 | ose68000)
+		basic_machine=m68000-ericsson
+		os=-ose
+		;;
+	os68k)
+		basic_machine=m68k-none
+		os=-os68k
+		;;
+	pa-hitachi)
+		basic_machine=hppa1.1-hitachi
+		os=-hiuxwe2
+		;;
+	paragon)
+		basic_machine=i860-intel
+		os=-osf
+		;;
+	pbd)
+		basic_machine=sparc-tti
+		;;
+	pbb)
+		basic_machine=m68k-tti
+		;;
+        pc532 | pc532-*)
+		basic_machine=ns32k-pc532
+		;;
+	pentium | p5 | k5 | k6 | nexgen | viac3)
+		basic_machine=i586-pc
+		;;
+	pentiumpro | p6 | 6x86 | athlon)
+		basic_machine=i686-pc
+		;;
+	pentiumii | pentium2)
+		basic_machine=i686-pc
+		;;
+	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
+		basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentiumpro-* | p6-* | 6x86-* | athlon-*)
+		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentiumii-* | pentium2-*)
+		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pn)
+		basic_machine=pn-gould
+		;;
+	power)	basic_machine=power-ibm
+		;;
+	ppc)	basic_machine=powerpc-unknown
+	        ;;
+	ppc-*)	basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ppcle | powerpclittle | ppc-le | powerpc-little)
+		basic_machine=powerpcle-unknown
+	        ;;
+	ppcle-* | powerpclittle-*)
+		basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ppc64)	basic_machine=powerpc64-unknown
+	        ;;
+	ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ppc64le | powerpc64little | ppc64-le | powerpc64-little)
+		basic_machine=powerpc64le-unknown
+	        ;;
+	ppc64le-* | powerpc64little-*)
+		basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ps2)
+		basic_machine=i386-ibm
+		;;
+	pw32)
+		basic_machine=i586-unknown
+		os=-pw32
+		;;
+	rom68k)
+		basic_machine=m68k-rom68k
+		os=-coff
+		;;
+	rm[46]00)
+		basic_machine=mips-siemens
+		;;
+	rtpc | rtpc-*)
+		basic_machine=romp-ibm
+		;;
+	s390 | s390-*)
+		basic_machine=s390-ibm
+		;;
+	s390x | s390x-*)
+		basic_machine=s390x-ibm
+		;;
+	sa29200)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	sequent)
+		basic_machine=i386-sequent
+		;;
+	sh)
+		basic_machine=sh-hitachi
+		os=-hms
+		;;
+	sparclite-wrs | simso-wrs)
+		basic_machine=sparclite-wrs
+		os=-vxworks
+		;;
+	sps7)
+		basic_machine=m68k-bull
+		os=-sysv2
+		;;
+	spur)
+		basic_machine=spur-unknown
+		;;
+	st2000)
+		basic_machine=m68k-tandem
+		;;
+	stratus)
+		basic_machine=i860-stratus
+		os=-sysv4
+		;;
+	sun2)
+		basic_machine=m68000-sun
+		;;
+	sun2os3)
+		basic_machine=m68000-sun
+		os=-sunos3
+		;;
+	sun2os4)
+		basic_machine=m68000-sun
+		os=-sunos4
+		;;
+	sun3os3)
+		basic_machine=m68k-sun
+		os=-sunos3
+		;;
+	sun3os4)
+		basic_machine=m68k-sun
+		os=-sunos4
+		;;
+	sun4os3)
+		basic_machine=sparc-sun
+		os=-sunos3
+		;;
+	sun4os4)
+		basic_machine=sparc-sun
+		os=-sunos4
+		;;
+	sun4sol2)
+		basic_machine=sparc-sun
+		os=-solaris2
+		;;
+	sun3 | sun3-*)
+		basic_machine=m68k-sun
+		;;
+	sun4)
+		basic_machine=sparc-sun
+		;;
+	sun386 | sun386i | roadrunner)
+		basic_machine=i386-sun
+		;;
+        sv1)
+		basic_machine=sv1-cray
+		os=-unicos
+		;;
+	symmetry)
+		basic_machine=i386-sequent
+		os=-dynix
+		;;
+	t3d)
+		basic_machine=alpha-cray
+		os=-unicos
+		;;
+	t3e)
+		basic_machine=alphaev5-cray
+		os=-unicos
+		;;
+	t90)
+		basic_machine=t90-cray
+		os=-unicos
+		;;
+	tic54x | c54x*)
+		basic_machine=tic54x-unknown
+		os=-coff
+		;;
+	tx39)
+		basic_machine=mipstx39-unknown
+		;;
+	tx39el)
+		basic_machine=mipstx39el-unknown
+		;;
+	toad1)
+		basic_machine=pdp10-xkl
+		os=-tops20
+		;;
+	tower | tower-32)
+		basic_machine=m68k-ncr
+		;;
+	udi29k)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	ultra3)
+		basic_machine=a29k-nyu
+		os=-sym1
+		;;
+	v810 | necv810)
+		basic_machine=v810-nec
+		os=-none
+		;;
+	vaxv)
+		basic_machine=vax-dec
+		os=-sysv
+		;;
+	vms)
+		basic_machine=vax-dec
+		os=-vms
+		;;
+	vpp*|vx|vx-*)
+               basic_machine=f301-fujitsu
+               ;;
+	vxworks960)
+		basic_machine=i960-wrs
+		os=-vxworks
+		;;
+	vxworks68)
+		basic_machine=m68k-wrs
+		os=-vxworks
+		;;
+	vxworks29k)
+		basic_machine=a29k-wrs
+		os=-vxworks
+		;;
+	w65*)
+		basic_machine=w65-wdc
+		os=-none
+		;;
+	w89k-*)
+		basic_machine=hppa1.1-winbond
+		os=-proelf
+		;;
+	windows32)
+		basic_machine=i386-pc
+		os=-windows32-msvcrt
+		;;
+        xps | xps100)
+		basic_machine=xps100-honeywell
+		;;
+	ymp)
+		basic_machine=ymp-cray
+		os=-unicos
+		;;
+	z8k-*-coff)
+		basic_machine=z8k-unknown
+		os=-sim
+		;;
+	none)
+		basic_machine=none-none
+		os=-none
+		;;
+
+# Here we handle the default manufacturer of certain CPU types.  It is in
+# some cases the only manufacturer, in others, it is the most popular.
+	w89k)
+		basic_machine=hppa1.1-winbond
+		;;
+	op50n)
+		basic_machine=hppa1.1-oki
+		;;
+	op60c)
+		basic_machine=hppa1.1-oki
+		;;
+	romp)
+		basic_machine=romp-ibm
+		;;
+	rs6000)
+		basic_machine=rs6000-ibm
+		;;
+	vax)
+		basic_machine=vax-dec
+		;;
+	pdp10)
+		# there are many clones, so DEC is not a safe bet
+		basic_machine=pdp10-unknown
+		;;
+	pdp11)
+		basic_machine=pdp11-dec
+		;;
+	we32k)
+		basic_machine=we32k-att
+		;;
+	sh3 | sh4 | sh3eb | sh4eb)
+		basic_machine=sh-unknown
+		;;
+	sh64)
+		basic_machine=sh64-unknown
+		;;
+	sparc | sparcv9 | sparcv9b)
+		basic_machine=sparc-sun
+		;;
+        cydra)
+		basic_machine=cydra-cydrome
+		;;
+	orion)
+		basic_machine=orion-highlevel
+		;;
+	orion105)
+		basic_machine=clipper-highlevel
+		;;
+	mac | mpw | mac-mpw)
+		basic_machine=m68k-apple
+		;;
+	pmac | pmac-mpw)
+		basic_machine=powerpc-apple
+		;;
+	c4x*)
+		basic_machine=c4x-none
+		os=-coff
+		;;
+	*-unknown)
+		# Make sure to match an already-canonicalized machine name.
+		;;
+	*)
+		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+		exit 1
+		;;
+esac
+
+# Here we canonicalize certain aliases for manufacturers.
+case $basic_machine in
+	*-digital*)
+		basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
+		;;
+	*-commodore*)
+		basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
+		;;
+	*)
+		;;
+esac
+
+# Decode manufacturer-specific aliases for certain operating systems.
+
+if [ x"$os" != x"" ]
+then
+case $os in
+        # First match some system type aliases
+        # that might get confused with valid system types.
+	# -solaris* is a basic system type, with this one exception.
+	-solaris1 | -solaris1.*)
+		os=`echo $os | sed -e 's|solaris1|sunos4|'`
+		;;
+	-solaris)
+		os=-solaris2
+		;;
+	-svr4*)
+		os=-sysv4
+		;;
+	-unixware*)
+		os=-sysv4.2uw
+		;;
+	-gnu/linux*)
+		os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
+		;;
+	# First accept the basic system types.
+	# The portable systems comes first.
+	# Each alternative MUST END IN A *, to match a version number.
+	# -sysv* is not here because it comes later, after sysvr4.
+	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
+	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
+	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \
+	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
+	      | -aos* \
+	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
+	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
+	      | -hiux* | -386bsd* | -netbsd* | -openbsd* | -freebsd* | -riscix* \
+	      | -lynxos* | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
+	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
+	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
+	      | -chorusos* | -chorusrdb* \
+	      | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+	      | -mingw32* | -linux-gnu* | -uxpv* | -beos* | -mpeix* | -udk* \
+	      | -interix* | -uwin* | -rhapsody* | -darwin* | -opened* \
+	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
+	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
+	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
+	      | -morphos* | -superux* | -rtmk* | -rtmk-nova*)
+	# Remember, each alternative MUST END IN *, to match a version number.
+		;;
+	-qnx*)
+		case $basic_machine in
+		    x86-* | i*86-*)
+			;;
+		    *)
+			os=-nto$os
+			;;
+		esac
+		;;
+	-nto*)
+		os=-nto-qnx
+		;;
+	-sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
+	      | -windows* | -osx | -abug | -netware* | -os9* | -beos* \
+	      | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+		;;
+	-mac*)
+		os=`echo $os | sed -e 's|mac|macos|'`
+		;;
+	-linux*)
+		os=`echo $os | sed -e 's|linux|linux-gnu|'`
+		;;
+	-sunos5*)
+		os=`echo $os | sed -e 's|sunos5|solaris2|'`
+		;;
+	-sunos6*)
+		os=`echo $os | sed -e 's|sunos6|solaris3|'`
+		;;
+	-opened*)
+		os=-openedition
+		;;
+	-wince*)
+		os=-wince
+		;;
+	-osfrose*)
+		os=-osfrose
+		;;
+	-osf*)
+		os=-osf
+		;;
+	-utek*)
+		os=-bsd
+		;;
+	-dynix*)
+		os=-bsd
+		;;
+	-acis*)
+		os=-aos
+		;;
+	-atheos*)
+		os=-atheos
+		;;
+	-386bsd)
+		os=-bsd
+		;;
+	-ctix* | -uts*)
+		os=-sysv
+		;;
+	-nova*)
+		os=-rtmk-nova
+		;;
+	-ns2 )
+	        os=-nextstep2
+		;;
+	-nsk*)
+		os=-nsk
+		;;
+	# Preserve the version number of sinix5.
+	-sinix5.*)
+		os=`echo $os | sed -e 's|sinix|sysv|'`
+		;;
+	-sinix*)
+		os=-sysv4
+		;;
+	-triton*)
+		os=-sysv3
+		;;
+	-oss*)
+		os=-sysv3
+		;;
+	-svr4)
+		os=-sysv4
+		;;
+	-svr3)
+		os=-sysv3
+		;;
+	-sysvr4)
+		os=-sysv4
+		;;
+	# This must come after -sysvr4.
+	-sysv*)
+		;;
+	-ose*)
+		os=-ose
+		;;
+	-es1800*)
+		os=-ose
+		;;
+	-xenix)
+		os=-xenix
+		;;
+        -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+	        os=-mint
+		;;
+	-none)
+		;;
+	*)
+		# Get rid of the `-' at the beginning of $os.
+		os=`echo $os | sed 's/[^-]*-//'`
+		echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
+		exit 1
+		;;
+esac
+else
+
+# Here we handle the default operating systems that come with various machines.
+# The value should be what the vendor currently ships out the door with their
+# machine or put another way, the most popular os provided with the machine.
+
+# Note that if you're going to try to match "-MANUFACTURER" here (say,
+# "-sun"), then you have to tell the case statement up towards the top
+# that MANUFACTURER isn't an operating system.  Otherwise, code above
+# will signal an error saying that MANUFACTURER isn't an operating
+# system, and we'll never get to this point.
+
+case $basic_machine in
+	*-acorn)
+		os=-riscix1.2
+		;;
+	arm*-rebel)
+		os=-linux
+		;;
+	arm*-semi)
+		os=-aout
+		;;
+	# This must come before the *-dec entry.
+	pdp10-*)
+		os=-tops20
+		;;
+        pdp11-*)
+		os=-none
+		;;
+	*-dec | vax-*)
+		os=-ultrix4.2
+		;;
+	m68*-apollo)
+		os=-domain
+		;;
+	i386-sun)
+		os=-sunos4.0.2
+		;;
+	m68000-sun)
+		os=-sunos3
+		# This also exists in the configure program, but was not the
+		# default.
+		# os=-sunos4
+		;;
+	m68*-cisco)
+		os=-aout
+		;;
+	mips*-cisco)
+		os=-elf
+		;;
+	mips*-*)
+		os=-elf
+		;;
+	or32-*)
+		os=-coff
+		;;
+	*-tti)	# must be before sparc entry or we get the wrong os.
+		os=-sysv3
+		;;
+	sparc-* | *-sun)
+		os=-sunos4.1.1
+		;;
+	*-be)
+		os=-beos
+		;;
+	*-ibm)
+		os=-aix
+		;;
+	*-wec)
+		os=-proelf
+		;;
+	*-winbond)
+		os=-proelf
+		;;
+	*-oki)
+		os=-proelf
+		;;
+	*-hp)
+		os=-hpux
+		;;
+	*-hitachi)
+		os=-hiux
+		;;
+	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
+		os=-sysv
+		;;
+	*-cbm)
+		os=-amigaos
+		;;
+	*-dg)
+		os=-dgux
+		;;
+	*-dolphin)
+		os=-sysv3
+		;;
+	m68k-ccur)
+		os=-rtu
+		;;
+	m88k-omron*)
+		os=-luna
+		;;
+	*-next )
+		os=-nextstep
+		;;
+	*-sequent)
+		os=-ptx
+		;;
+	*-crds)
+		os=-unos
+		;;
+	*-ns)
+		os=-genix
+		;;
+	i370-*)
+		os=-mvs
+		;;
+	*-next)
+		os=-nextstep3
+		;;
+        *-gould)
+		os=-sysv
+		;;
+        *-highlevel)
+		os=-bsd
+		;;
+	*-encore)
+		os=-bsd
+		;;
+        *-sgi)
+		os=-irix
+		;;
+        *-siemens)
+		os=-sysv4
+		;;
+	*-masscomp)
+		os=-rtu
+		;;
+	f30[01]-fujitsu | f700-fujitsu)
+		os=-uxpv
+		;;
+	*-rom68k)
+		os=-coff
+		;;
+	*-*bug)
+		os=-coff
+		;;
+	*-apple)
+		os=-macos
+		;;
+	*-atari*)
+		os=-mint
+		;;
+	*)
+		os=-none
+		;;
+esac
+fi
+
+# Here we handle the case where we know the os, and the CPU type, but not the
+# manufacturer.  We pick the logical manufacturer.
+vendor=unknown
+case $basic_machine in
+	*-unknown)
+		case $os in
+			-riscix*)
+				vendor=acorn
+				;;
+			-sunos*)
+				vendor=sun
+				;;
+			-aix*)
+				vendor=ibm
+				;;
+			-beos*)
+				vendor=be
+				;;
+			-hpux*)
+				vendor=hp
+				;;
+			-mpeix*)
+				vendor=hp
+				;;
+			-hiux*)
+				vendor=hitachi
+				;;
+			-unos*)
+				vendor=crds
+				;;
+			-dgux*)
+				vendor=dg
+				;;
+			-luna*)
+				vendor=omron
+				;;
+			-genix*)
+				vendor=ns
+				;;
+			-mvs* | -opened*)
+				vendor=ibm
+				;;
+			-ptx*)
+				vendor=sequent
+				;;
+			-vxsim* | -vxworks*)
+				vendor=wrs
+				;;
+			-aux*)
+				vendor=apple
+				;;
+			-hms*)
+				vendor=hitachi
+				;;
+			-mpw* | -macos*)
+				vendor=apple
+				;;
+			-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+				vendor=atari
+				;;
+			-vos*)
+				vendor=stratus
+				;;
+		esac
+		basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
+		;;
+esac
+
+echo $basic_machine$os
+exit 0
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/configure b/configure
new file mode 100755
index 000000000..7d53263c7
--- /dev/null
+++ b/configure
@@ -0,0 +1,2443 @@
+#! /bin/sh
+
+# Guess values for system-dependent variables and create Makefiles.
+# Generated automatically using autoconf version 2.13 
+# Copyright (C) 1992, 93, 94, 95, 96 Free Software Foundation, Inc.
+#
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+
+# Defaults:
+ac_help=
+ac_default_prefix=/usr/local
+# Any additions from configure.in:
+
+# Initialize some variables set by options.
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+build=NONE
+cache_file=./config.cache
+exec_prefix=NONE
+host=NONE
+no_create=
+nonopt=NONE
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+target=NONE
+verbose=
+x_includes=NONE
+x_libraries=NONE
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datadir='${prefix}/share'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+libdir='${exec_prefix}/lib'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+infodir='${prefix}/info'
+mandir='${prefix}/man'
+
+# Initialize some other variables.
+subdirs=
+MFLAGS= MAKEFLAGS=
+SHELL=${CONFIG_SHELL-/bin/sh}
+# Maximum number of lines to put in a shell here document.
+ac_max_here_lines=12
+
+ac_prev=
+for ac_option
+do
+
+  # If the previous option needs an argument, assign it.
+  if test -n "$ac_prev"; then
+    eval "$ac_prev=\$ac_option"
+    ac_prev=
+    continue
+  fi
+
+  case "$ac_option" in
+  -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
+  *) ac_optarg= ;;
+  esac
+
+  # Accept the important Cygnus configure options, so we can diagnose typos.
+
+  case "$ac_option" in
+
+  -bindir | --bindir | --bindi | --bind | --bin | --bi)
+    ac_prev=bindir ;;
+  -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+    bindir="$ac_optarg" ;;
+
+  -build | --build | --buil | --bui | --bu)
+    ac_prev=build ;;
+  -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+    build="$ac_optarg" ;;
+
+  -cache-file | --cache-file | --cache-fil | --cache-fi \
+  | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+    ac_prev=cache_file ;;
+  -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+  | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+    cache_file="$ac_optarg" ;;
+
+  -datadir | --datadir | --datadi | --datad | --data | --dat | --da)
+    ac_prev=datadir ;;
+  -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \
+  | --da=*)
+    datadir="$ac_optarg" ;;
+
+  -disable-* | --disable-*)
+    ac_feature=`echo $ac_option|sed -e 's/-*disable-//'`
+    # Reject names that are not valid shell variable names.
+    if test -n "`echo $ac_feature| sed 's/[-a-zA-Z0-9_]//g'`"; then
+      { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; }
+    fi
+    ac_feature=`echo $ac_feature| sed 's/-/_/g'`
+    eval "enable_${ac_feature}=no" ;;
+
+  -enable-* | --enable-*)
+    ac_feature=`echo $ac_option|sed -e 's/-*enable-//' -e 's/=.*//'`
+    # Reject names that are not valid shell variable names.
+    if test -n "`echo $ac_feature| sed 's/[-_a-zA-Z0-9]//g'`"; then
+      { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; }
+    fi
+    ac_feature=`echo $ac_feature| sed 's/-/_/g'`
+    case "$ac_option" in
+      *=*) ;;
+      *) ac_optarg=yes ;;
+    esac
+    eval "enable_${ac_feature}='$ac_optarg'" ;;
+
+  -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+  | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+  | --exec | --exe | --ex)
+    ac_prev=exec_prefix ;;
+  -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+  | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+  | --exec=* | --exe=* | --ex=*)
+    exec_prefix="$ac_optarg" ;;
+
+  -gas | --gas | --ga | --g)
+    # Obsolete; use --with-gas.
+    with_gas=yes ;;
+
+  -help | --help | --hel | --he)
+    # Omit some internal or obsolete options to make the list less imposing.
+    # This message is too long to be a string in the A/UX 3.1 sh.
+    cat << EOF
+Usage: configure [options] [host]
+Options: [defaults in brackets after descriptions]
+Configuration:
+  --cache-file=FILE       cache test results in FILE
+  --help                  print this message
+  --no-create             do not create output files
+  --quiet, --silent       do not print \`checking...' messages
+  --version               print the version of autoconf that created configure
+Directory and file names:
+  --prefix=PREFIX         install architecture-independent files in PREFIX
+                          [$ac_default_prefix]
+  --exec-prefix=EPREFIX   install architecture-dependent files in EPREFIX
+                          [same as prefix]
+  --bindir=DIR            user executables in DIR [EPREFIX/bin]
+  --sbindir=DIR           system admin executables in DIR [EPREFIX/sbin]
+  --libexecdir=DIR        program executables in DIR [EPREFIX/libexec]
+  --datadir=DIR           read-only architecture-independent data in DIR
+                          [PREFIX/share]
+  --sysconfdir=DIR        read-only single-machine data in DIR [PREFIX/etc]
+  --sharedstatedir=DIR    modifiable architecture-independent data in DIR
+                          [PREFIX/com]
+  --localstatedir=DIR     modifiable single-machine data in DIR [PREFIX/var]
+  --libdir=DIR            object code libraries in DIR [EPREFIX/lib]
+  --includedir=DIR        C header files in DIR [PREFIX/include]
+  --oldincludedir=DIR     C header files for non-gcc in DIR [/usr/include]
+  --infodir=DIR           info documentation in DIR [PREFIX/info]
+  --mandir=DIR            man documentation in DIR [PREFIX/man]
+  --srcdir=DIR            find the sources in DIR [configure dir or ..]
+  --program-prefix=PREFIX prepend PREFIX to installed program names
+  --program-suffix=SUFFIX append SUFFIX to installed program names
+  --program-transform-name=PROGRAM
+                          run sed PROGRAM on installed program names
+EOF
+    cat << EOF
+Host type:
+  --build=BUILD           configure for building on BUILD [BUILD=HOST]
+  --host=HOST             configure for HOST [guessed]
+  --target=TARGET         configure for TARGET [TARGET=HOST]
+Features and packages:
+  --disable-FEATURE       do not include FEATURE (same as --enable-FEATURE=no)
+  --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
+  --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
+  --without-PACKAGE       do not use PACKAGE (same as --with-PACKAGE=no)
+  --x-includes=DIR        X include files are in DIR
+  --x-libraries=DIR       X library files are in DIR
+EOF
+    if test -n "$ac_help"; then
+      echo "--enable and --with options recognized:$ac_help"
+    fi
+    exit 0 ;;
+
+  -host | --host | --hos | --ho)
+    ac_prev=host ;;
+  -host=* | --host=* | --hos=* | --ho=*)
+    host="$ac_optarg" ;;
+
+  -includedir | --includedir | --includedi | --included | --include \
+  | --includ | --inclu | --incl | --inc)
+    ac_prev=includedir ;;
+  -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+  | --includ=* | --inclu=* | --incl=* | --inc=*)
+    includedir="$ac_optarg" ;;
+
+  -infodir | --infodir | --infodi | --infod | --info | --inf)
+    ac_prev=infodir ;;
+  -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+    infodir="$ac_optarg" ;;
+
+  -libdir | --libdir | --libdi | --libd)
+    ac_prev=libdir ;;
+  -libdir=* | --libdir=* | --libdi=* | --libd=*)
+    libdir="$ac_optarg" ;;
+
+  -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+  | --libexe | --libex | --libe)
+    ac_prev=libexecdir ;;
+  -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+  | --libexe=* | --libex=* | --libe=*)
+    libexecdir="$ac_optarg" ;;
+
+  -localstatedir | --localstatedir | --localstatedi | --localstated \
+  | --localstate | --localstat | --localsta | --localst \
+  | --locals | --local | --loca | --loc | --lo)
+    ac_prev=localstatedir ;;
+  -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+  | --localstate=* | --localstat=* | --localsta=* | --localst=* \
+  | --locals=* | --local=* | --loca=* | --loc=* | --lo=*)
+    localstatedir="$ac_optarg" ;;
+
+  -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+    ac_prev=mandir ;;
+  -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+    mandir="$ac_optarg" ;;
+
+  -nfp | --nfp | --nf)
+    # Obsolete; use --without-fp.
+    with_fp=no ;;
+
+  -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+  | --no-cr | --no-c)
+    no_create=yes ;;
+
+  -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+  | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+    no_recursion=yes ;;
+
+  -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+  | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+  | --oldin | --oldi | --old | --ol | --o)
+    ac_prev=oldincludedir ;;
+  -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+  | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+  | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+    oldincludedir="$ac_optarg" ;;
+
+  -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+    ac_prev=prefix ;;
+  -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+    prefix="$ac_optarg" ;;
+
+  -program-prefix | --program-prefix | --program-prefi | --program-pref \
+  | --program-pre | --program-pr | --program-p)
+    ac_prev=program_prefix ;;
+  -program-prefix=* | --program-prefix=* | --program-prefi=* \
+  | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+    program_prefix="$ac_optarg" ;;
+
+  -program-suffix | --program-suffix | --program-suffi | --program-suff \
+  | --program-suf | --program-su | --program-s)
+    ac_prev=program_suffix ;;
+  -program-suffix=* | --program-suffix=* | --program-suffi=* \
+  | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+    program_suffix="$ac_optarg" ;;
+
+  -program-transform-name | --program-transform-name \
+  | --program-transform-nam | --program-transform-na \
+  | --program-transform-n | --program-transform- \
+  | --program-transform | --program-transfor \
+  | --program-transfo | --program-transf \
+  | --program-trans | --program-tran \
+  | --progr-tra | --program-tr | --program-t)
+    ac_prev=program_transform_name ;;
+  -program-transform-name=* | --program-transform-name=* \
+  | --program-transform-nam=* | --program-transform-na=* \
+  | --program-transform-n=* | --program-transform-=* \
+  | --program-transform=* | --program-transfor=* \
+  | --program-transfo=* | --program-transf=* \
+  | --program-trans=* | --program-tran=* \
+  | --progr-tra=* | --program-tr=* | --program-t=*)
+    program_transform_name="$ac_optarg" ;;
+
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil)
+    silent=yes ;;
+
+  -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+    ac_prev=sbindir ;;
+  -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+  | --sbi=* | --sb=*)
+    sbindir="$ac_optarg" ;;
+
+  -sharedstatedir | --sharedstatedir | --sharedstatedi \
+  | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+  | --sharedst | --shareds | --shared | --share | --shar \
+  | --sha | --sh)
+    ac_prev=sharedstatedir ;;
+  -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+  | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+  | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+  | --sha=* | --sh=*)
+    sharedstatedir="$ac_optarg" ;;
+
+  -site | --site | --sit)
+    ac_prev=site ;;
+  -site=* | --site=* | --sit=*)
+    site="$ac_optarg" ;;
+
+  -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+    ac_prev=srcdir ;;
+  -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+    srcdir="$ac_optarg" ;;
+
+  -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+  | --syscon | --sysco | --sysc | --sys | --sy)
+    ac_prev=sysconfdir ;;
+  -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+  | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+    sysconfdir="$ac_optarg" ;;
+
+  -target | --target | --targe | --targ | --tar | --ta | --t)
+    ac_prev=target ;;
+  -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+    target="$ac_optarg" ;;
+
+  -v | -verbose | --verbose | --verbos | --verbo | --verb)
+    verbose=yes ;;
+
+  -version | --version | --versio | --versi | --vers)
+    echo "configure generated by autoconf version 2.13"
+    exit 0 ;;
+
+  -with-* | --with-*)
+    ac_package=`echo $ac_option|sed -e 's/-*with-//' -e 's/=.*//'`
+    # Reject names that are not valid shell variable names.
+    if test -n "`echo $ac_package| sed 's/[-_a-zA-Z0-9]//g'`"; then
+      { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; }
+    fi
+    ac_package=`echo $ac_package| sed 's/-/_/g'`
+    case "$ac_option" in
+      *=*) ;;
+      *) ac_optarg=yes ;;
+    esac
+    eval "with_${ac_package}='$ac_optarg'" ;;
+
+  -without-* | --without-*)
+    ac_package=`echo $ac_option|sed -e 's/-*without-//'`
+    # Reject names that are not valid shell variable names.
+    if test -n "`echo $ac_package| sed 's/[-a-zA-Z0-9_]//g'`"; then
+      { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; }
+    fi
+    ac_package=`echo $ac_package| sed 's/-/_/g'`
+    eval "with_${ac_package}=no" ;;
+
+  --x)
+    # Obsolete; use --with-x.
+    with_x=yes ;;
+
+  -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+  | --x-incl | --x-inc | --x-in | --x-i)
+    ac_prev=x_includes ;;
+  -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+  | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+    x_includes="$ac_optarg" ;;
+
+  -x-libraries | --x-libraries | --x-librarie | --x-librari \
+  | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+    ac_prev=x_libraries ;;
+  -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+  | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+    x_libraries="$ac_optarg" ;;
+
+  -*) { echo "configure: error: $ac_option: invalid option; use --help to show usage" 1>&2; exit 1; }
+    ;;
+
+  *)
+    if test -n "`echo $ac_option| sed 's/[-a-z0-9.]//g'`"; then
+      echo "configure: warning: $ac_option: invalid host type" 1>&2
+    fi
+    if test "x$nonopt" != xNONE; then
+      { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; }
+    fi
+    nonopt="$ac_option"
+    ;;
+
+  esac
+done
+
+if test -n "$ac_prev"; then
+  { echo "configure: error: missing argument to --`echo $ac_prev | sed 's/_/-/g'`" 1>&2; exit 1; }
+fi
+
+trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15
+
+# File descriptor usage:
+# 0 standard input
+# 1 file creation
+# 2 errors and warnings
+# 3 some systems may open it to /dev/tty
+# 4 used on the Kubota Titan
+# 6 checking for... messages and results
+# 5 compiler messages saved in config.log
+if test "$silent" = yes; then
+  exec 6>/dev/null
+else
+  exec 6>&1
+fi
+exec 5>./config.log
+
+echo "\
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+" 1>&5
+
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Also quote any args containing shell metacharacters.
+ac_configure_args=
+for ac_arg
+do
+  case "$ac_arg" in
+  -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+  | --no-cr | --no-c) ;;
+  -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+  | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) ;;
+  *" "*|*"	"*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*)
+  ac_configure_args="$ac_configure_args '$ac_arg'" ;;
+  *) ac_configure_args="$ac_configure_args $ac_arg" ;;
+  esac
+done
+
+# NLS nuisances.
+# Only set these to C if already set.  These must not be set unconditionally
+# because not all systems understand e.g. LANG=C (notably SCO).
+# Fixing LC_MESSAGES prevents Solaris sh from translating var values in `set'!
+# Non-C LC_CTYPE values break the ctype check.
+if test "${LANG+set}"   = set; then LANG=C;   export LANG;   fi
+if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi
+if test "${LC_MESSAGES+set}" = set; then LC_MESSAGES=C; export LC_MESSAGES; fi
+if test "${LC_CTYPE+set}"    = set; then LC_CTYPE=C;    export LC_CTYPE;    fi
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -rf conftest* confdefs.h
+# AIX cpp loses on an empty file, so make sure it contains at least a newline.
+echo > confdefs.h
+
+# A filename unique to this package, relative to the directory that
+# configure is in, which we can look for to find out if srcdir is correct.
+ac_unique_file=vg_clientmalloc.c
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+  ac_srcdir_defaulted=yes
+  # Try the directory containing this script, then its parent.
+  ac_prog=$0
+  ac_confdir=`echo $ac_prog|sed 's%/[^/][^/]*$%%'`
+  test "x$ac_confdir" = "x$ac_prog" && ac_confdir=.
+  srcdir=$ac_confdir
+  if test ! -r $srcdir/$ac_unique_file; then
+    srcdir=..
+  fi
+else
+  ac_srcdir_defaulted=no
+fi
+if test ! -r $srcdir/$ac_unique_file; then
+  if test "$ac_srcdir_defaulted" = yes; then
+    { echo "configure: error: can not find sources in $ac_confdir or .." 1>&2; exit 1; }
+  else
+    { echo "configure: error: can not find sources in $srcdir" 1>&2; exit 1; }
+  fi
+fi
+srcdir=`echo "${srcdir}" | sed 's%\([^/]\)/*$%\1%'`
+
+# Prefer explicitly selected file to automatically selected ones.
+if test -z "$CONFIG_SITE"; then
+  if test "x$prefix" != xNONE; then
+    CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site"
+  else
+    CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site"
+  fi
+fi
+for ac_site_file in $CONFIG_SITE; do
+  if test -r "$ac_site_file"; then
+    echo "loading site script $ac_site_file"
+    . "$ac_site_file"
+  fi
+done
+
+if test -r "$cache_file"; then
+  echo "loading cache $cache_file"
+  . $cache_file
+else
+  echo "creating cache $cache_file"
+  > $cache_file
+fi
+
+ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+ac_exeext=
+ac_objext=o
+if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then
+  # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu.
+  if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then
+    ac_n= ac_c='
+' ac_t='	'
+  else
+    ac_n=-n ac_c= ac_t=
+  fi
+else
+  ac_n= ac_c='\c' ac_t=
+fi
+
+
+
+
+
+ac_aux_dir=
+for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do
+  if test -f $ac_dir/install-sh; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install-sh -c"
+    break
+  elif test -f $ac_dir/install.sh; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install.sh -c"
+    break
+  fi
+done
+if test -z "$ac_aux_dir"; then
+  { echo "configure: error: can not find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." 1>&2; exit 1; }
+fi
+ac_config_guess=$ac_aux_dir/config.guess
+ac_config_sub=$ac_aux_dir/config.sub
+ac_configure=$ac_aux_dir/configure # This should be Cygnus configure.
+
+# Find a good install program.  We prefer a C program (faster),
+# so one script is as good as another.  But avoid the broken or
+# incompatible versions:
+# SysV /etc/install, /usr/sbin/install
+# SunOS /usr/etc/install
+# IRIX /sbin/install
+# AIX /bin/install
+# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
+# AFS /usr/afsws/bin/install, which mishandles nonexistent args
+# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
+# ./install, which can be erroneously created by make from ./install.sh.
+echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6
+echo "configure:559: checking for a BSD compatible install" >&5
+if test -z "$INSTALL"; then
+if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+    IFS="${IFS= 	}"; ac_save_IFS="$IFS"; IFS=":"
+  for ac_dir in $PATH; do
+    # Account for people who put trailing slashes in PATH elements.
+    case "$ac_dir/" in
+    /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;;
+    *)
+      # OSF1 and SCO ODT 3.0 have their own names for install.
+      # Don't use installbsd from OSF since it installs stuff as root
+      # by default.
+      for ac_prog in ginstall scoinst install; do
+        if test -f $ac_dir/$ac_prog; then
+	  if test $ac_prog = install &&
+            grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then
+	    # AIX install.  It has an incompatible calling convention.
+	    :
+	  else
+	    ac_cv_path_install="$ac_dir/$ac_prog -c"
+	    break 2
+	  fi
+	fi
+      done
+      ;;
+    esac
+  done
+  IFS="$ac_save_IFS"
+
+fi
+  if test "${ac_cv_path_install+set}" = set; then
+    INSTALL="$ac_cv_path_install"
+  else
+    # As a last resort, use the slow shell script.  We don't cache a
+    # path for INSTALL within a source directory, because that will
+    # break other packages using the cache if that directory is
+    # removed, or if the path is relative.
+    INSTALL="$ac_install_sh"
+  fi
+fi
+echo "$ac_t""$INSTALL" 1>&6
+
+# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
+# It thinks the first close brace ends the variable substitution.
+test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
+
+test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL_PROGRAM}'
+
+test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
+
+echo $ac_n "checking whether build environment is sane""... $ac_c" 1>&6
+echo "configure:612: checking whether build environment is sane" >&5
+# Just in case
+sleep 1
+echo timestamp > conftestfile
+# Do `set' in a subshell so we don't clobber the current shell's
+# arguments.  Must try -L first in case configure is actually a
+# symlink; some systems play weird games with the mod time of symlinks
+# (eg FreeBSD returns the mod time of the symlink's containing
+# directory).
+if (
+   set X `ls -Lt $srcdir/configure conftestfile 2> /dev/null`
+   if test "$*" = "X"; then
+      # -L didn't work.
+      set X `ls -t $srcdir/configure conftestfile`
+   fi
+   if test "$*" != "X $srcdir/configure conftestfile" \
+      && test "$*" != "X conftestfile $srcdir/configure"; then
+
+      # If neither matched, then we have a broken ls.  This can happen
+      # if, for instance, CONFIG_SHELL is bash and it inherits a
+      # broken ls alias from the environment.  This has actually
+      # happened.  Such a system could not be considered "sane".
+      { echo "configure: error: ls -t appears to fail.  Make sure there is not a broken
+alias in your environment" 1>&2; exit 1; }
+   fi
+
+   test "$2" = conftestfile
+   )
+then
+   # Ok.
+   :
+else
+   { echo "configure: error: newly created file is older than distributed files!
+Check your system clock" 1>&2; exit 1; }
+fi
+rm -f conftest*
+echo "$ac_t""yes" 1>&6
+if test "$program_transform_name" = s,x,x,; then
+  program_transform_name=
+else
+  # Double any \ or $.  echo might interpret backslashes.
+  cat <<\EOF_SED > conftestsed
+s,\\,\\\\,g; s,\$,$$,g
+EOF_SED
+  program_transform_name="`echo $program_transform_name|sed -f conftestsed`"
+  rm -f conftestsed
+fi
+test "$program_prefix" != NONE &&
+  program_transform_name="s,^,${program_prefix},; $program_transform_name"
+# Use a double $ so make ignores it.
+test "$program_suffix" != NONE &&
+  program_transform_name="s,\$\$,${program_suffix},; $program_transform_name"
+
+# sed with no file args requires a program.
+test "$program_transform_name" = "" && program_transform_name="s,x,x,"
+
+echo $ac_n "checking whether ${MAKE-make} sets \${MAKE}""... $ac_c" 1>&6
+echo "configure:669: checking whether ${MAKE-make} sets \${MAKE}" >&5
+set dummy ${MAKE-make}; ac_make=`echo "$2" | sed 'y%./+-%__p_%'`
+if eval "test \"`echo '$''{'ac_cv_prog_make_${ac_make}_set'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftestmake <<\EOF
+all:
+	@echo 'ac_maketemp="${MAKE}"'
+EOF
+# GNU make sometimes prints "make[1]: Entering...", which would confuse us.
+eval `${MAKE-make} -f conftestmake 2>/dev/null | grep temp=`
+if test -n "$ac_maketemp"; then
+  eval ac_cv_prog_make_${ac_make}_set=yes
+else
+  eval ac_cv_prog_make_${ac_make}_set=no
+fi
+rm -f conftestmake
+fi
+if eval "test \"`echo '$ac_cv_prog_make_'${ac_make}_set`\" = yes"; then
+  echo "$ac_t""yes" 1>&6
+  SET_MAKE=
+else
+  echo "$ac_t""no" 1>&6
+  SET_MAKE="MAKE=${MAKE-make}"
+fi
+
+
+PACKAGE=valgrind
+
+VERSION=20020317
+
+if test "`cd $srcdir && pwd`" != "`pwd`" && test -f $srcdir/config.status; then
+  { echo "configure: error: source directory already configured; run "make distclean" there first" 1>&2; exit 1; }
+fi
+cat >> confdefs.h <<EOF
+#define PACKAGE "$PACKAGE"
+EOF
+
+cat >> confdefs.h <<EOF
+#define VERSION "$VERSION"
+EOF
+
+
+
+missing_dir=`cd $ac_aux_dir && pwd`
+echo $ac_n "checking for working aclocal""... $ac_c" 1>&6
+echo "configure:715: checking for working aclocal" >&5
+# Run test in a subshell; some versions of sh will print an error if
+# an executable is not found, even if stderr is redirected.
+# Redirect stdin to placate older versions of autoconf.  Sigh.
+if (aclocal --version) < /dev/null > /dev/null 2>&1; then
+   ACLOCAL=aclocal
+   echo "$ac_t""found" 1>&6
+else
+   ACLOCAL="$missing_dir/missing aclocal"
+   echo "$ac_t""missing" 1>&6
+fi
+
+echo $ac_n "checking for working autoconf""... $ac_c" 1>&6
+echo "configure:728: checking for working autoconf" >&5
+# Run test in a subshell; some versions of sh will print an error if
+# an executable is not found, even if stderr is redirected.
+# Redirect stdin to placate older versions of autoconf.  Sigh.
+if (autoconf --version) < /dev/null > /dev/null 2>&1; then
+   AUTOCONF=autoconf
+   echo "$ac_t""found" 1>&6
+else
+   AUTOCONF="$missing_dir/missing autoconf"
+   echo "$ac_t""missing" 1>&6
+fi
+
+echo $ac_n "checking for working automake""... $ac_c" 1>&6
+echo "configure:741: checking for working automake" >&5
+# Run test in a subshell; some versions of sh will print an error if
+# an executable is not found, even if stderr is redirected.
+# Redirect stdin to placate older versions of autoconf.  Sigh.
+if (automake --version) < /dev/null > /dev/null 2>&1; then
+   AUTOMAKE=automake
+   echo "$ac_t""found" 1>&6
+else
+   AUTOMAKE="$missing_dir/missing automake"
+   echo "$ac_t""missing" 1>&6
+fi
+
+echo $ac_n "checking for working autoheader""... $ac_c" 1>&6
+echo "configure:754: checking for working autoheader" >&5
+# Run test in a subshell; some versions of sh will print an error if
+# an executable is not found, even if stderr is redirected.
+# Redirect stdin to placate older versions of autoconf.  Sigh.
+if (autoheader --version) < /dev/null > /dev/null 2>&1; then
+   AUTOHEADER=autoheader
+   echo "$ac_t""found" 1>&6
+else
+   AUTOHEADER="$missing_dir/missing autoheader"
+   echo "$ac_t""missing" 1>&6
+fi
+
+echo $ac_n "checking for working makeinfo""... $ac_c" 1>&6
+echo "configure:767: checking for working makeinfo" >&5
+# Run test in a subshell; some versions of sh will print an error if
+# an executable is not found, even if stderr is redirected.
+# Redirect stdin to placate older versions of autoconf.  Sigh.
+if (makeinfo --version) < /dev/null > /dev/null 2>&1; then
+   MAKEINFO=makeinfo
+   echo "$ac_t""found" 1>&6
+else
+   MAKEINFO="$missing_dir/missing makeinfo"
+   echo "$ac_t""missing" 1>&6
+fi
+
+
+
+# Checks for programs.
+echo $ac_n "checking whether ln -s works""... $ac_c" 1>&6
+echo "configure:783: checking whether ln -s works" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_LN_S'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  rm -f conftestdata
+if ln -s X conftestdata 2>/dev/null
+then
+  rm -f conftestdata
+  ac_cv_prog_LN_S="ln -s"
+else
+  ac_cv_prog_LN_S=ln
+fi
+fi
+LN_S="$ac_cv_prog_LN_S"
+if test "$ac_cv_prog_LN_S" = "ln -s"; then
+  echo "$ac_t""yes" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+# Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
+echo "configure:806: checking for $ac_word" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+  IFS="${IFS= 	}"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$ac_word; then
+      ac_cv_prog_CC="gcc"
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+fi
+fi
+CC="$ac_cv_prog_CC"
+if test -n "$CC"; then
+  echo "$ac_t""$CC" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+if test -z "$CC"; then
+  # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
+echo "configure:836: checking for $ac_word" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+  IFS="${IFS= 	}"; ac_save_ifs="$IFS"; IFS=":"
+  ac_prog_rejected=no
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$ac_word; then
+      if test "$ac_dir/$ac_word" = "/usr/ucb/cc"; then
+        ac_prog_rejected=yes
+	continue
+      fi
+      ac_cv_prog_CC="cc"
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+if test $ac_prog_rejected = yes; then
+  # We found a bogon in the path, so make sure we never use it.
+  set dummy $ac_cv_prog_CC
+  shift
+  if test $# -gt 0; then
+    # We chose a different compiler from the bogus one.
+    # However, it has the same basename, so the bogon will be chosen
+    # first if we set CC to just the basename; use the full file name.
+    shift
+    set dummy "$ac_dir/$ac_word" "$@"
+    shift
+    ac_cv_prog_CC="$@"
+  fi
+fi
+fi
+fi
+CC="$ac_cv_prog_CC"
+if test -n "$CC"; then
+  echo "$ac_t""$CC" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+  if test -z "$CC"; then
+    case "`uname -s`" in
+    *win32* | *WIN32*)
+      # Extract the first word of "cl", so it can be a program name with args.
+set dummy cl; ac_word=$2
+echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
+echo "configure:887: checking for $ac_word" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+  IFS="${IFS= 	}"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$ac_word; then
+      ac_cv_prog_CC="cl"
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+fi
+fi
+CC="$ac_cv_prog_CC"
+if test -n "$CC"; then
+  echo "$ac_t""$CC" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+ ;;
+    esac
+  fi
+  test -z "$CC" && { echo "configure: error: no acceptable cc found in \$PATH" 1>&2; exit 1; }
+fi
+
+echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6
+echo "configure:919: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5
+
+ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+cat > conftest.$ac_ext << EOF
+
+#line 930 "configure"
+#include "confdefs.h"
+
+main(){return(0);}
+EOF
+if { (eval echo configure:935: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+  ac_cv_prog_cc_works=yes
+  # If we can't run a trivial program, we are probably using a cross compiler.
+  if (./conftest; exit) 2>/dev/null; then
+    ac_cv_prog_cc_cross=no
+  else
+    ac_cv_prog_cc_cross=yes
+  fi
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  ac_cv_prog_cc_works=no
+fi
+rm -fr conftest*
+ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+echo "$ac_t""$ac_cv_prog_cc_works" 1>&6
+if test $ac_cv_prog_cc_works = no; then
+  { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 1; }
+fi
+echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6
+echo "configure:961: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5
+echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6
+cross_compiling=$ac_cv_prog_cc_cross
+
+echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6
+echo "configure:966: checking whether we are using GNU C" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.c <<EOF
+#ifdef __GNUC__
+  yes;
+#endif
+EOF
+if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:975: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then
+  ac_cv_prog_gcc=yes
+else
+  ac_cv_prog_gcc=no
+fi
+fi
+
+echo "$ac_t""$ac_cv_prog_gcc" 1>&6
+
+if test $ac_cv_prog_gcc = yes; then
+  GCC=yes
+else
+  GCC=
+fi
+
+ac_test_CFLAGS="${CFLAGS+set}"
+ac_save_CFLAGS="$CFLAGS"
+CFLAGS=
+echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6
+echo "configure:994: checking whether ${CC-cc} accepts -g" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_cc_g'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  echo 'void f(){}' > conftest.c
+if test -z "`${CC-cc} -g -c conftest.c 2>&1`"; then
+  ac_cv_prog_cc_g=yes
+else
+  ac_cv_prog_cc_g=no
+fi
+rm -f conftest*
+
+fi
+
+echo "$ac_t""$ac_cv_prog_cc_g" 1>&6
+if test "$ac_test_CFLAGS" = set; then
+  CFLAGS="$ac_save_CFLAGS"
+elif test $ac_cv_prog_cc_g = yes; then
+  if test "$GCC" = yes; then
+    CFLAGS="-g -O2"
+  else
+    CFLAGS="-g"
+  fi
+else
+  if test "$GCC" = yes; then
+    CFLAGS="-O2"
+  else
+    CFLAGS=
+  fi
+fi
+
+echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6
+echo "configure:1026: checking how to run the C preprocessor" >&5
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+  CPP=
+fi
+if test -z "$CPP"; then
+if eval "test \"`echo '$''{'ac_cv_prog_CPP'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+    # This must be in double quotes, not single quotes, because CPP may get
+  # substituted into the Makefile and "${CC-cc}" will confuse make.
+  CPP="${CC-cc} -E"
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp.
+  cat > conftest.$ac_ext <<EOF
+#line 1041 "configure"
+#include "confdefs.h"
+#include <assert.h>
+Syntax Error
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:1047: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+  :
+else
+  echo "$ac_err" >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  CPP="${CC-cc} -E -traditional-cpp"
+  cat > conftest.$ac_ext <<EOF
+#line 1058 "configure"
+#include "confdefs.h"
+#include <assert.h>
+Syntax Error
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:1064: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+  :
+else
+  echo "$ac_err" >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  CPP="${CC-cc} -nologo -E"
+  cat > conftest.$ac_ext <<EOF
+#line 1075 "configure"
+#include "confdefs.h"
+#include <assert.h>
+Syntax Error
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:1081: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+  :
+else
+  echo "$ac_err" >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  CPP=/lib/cpp
+fi
+rm -f conftest*
+fi
+rm -f conftest*
+fi
+rm -f conftest*
+  ac_cv_prog_CPP="$CPP"
+fi
+  CPP="$ac_cv_prog_CPP"
+else
+  ac_cv_prog_CPP="$CPP"
+fi
+echo "$ac_t""$CPP" 1>&6
+
+# Extract the first word of "ranlib", so it can be a program name with args.
+set dummy ranlib; ac_word=$2
+echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
+echo "configure:1108: checking for $ac_word" >&5
+if eval "test \"`echo '$''{'ac_cv_prog_RANLIB'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test -n "$RANLIB"; then
+  ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
+else
+  IFS="${IFS= 	}"; ac_save_ifs="$IFS"; IFS=":"
+  ac_dummy="$PATH"
+  for ac_dir in $ac_dummy; do
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$ac_word; then
+      ac_cv_prog_RANLIB="ranlib"
+      break
+    fi
+  done
+  IFS="$ac_save_ifs"
+  test -z "$ac_cv_prog_RANLIB" && ac_cv_prog_RANLIB=":"
+fi
+fi
+RANLIB="$ac_cv_prog_RANLIB"
+if test -n "$RANLIB"; then
+  echo "$ac_t""$RANLIB" 1>&6
+else
+  echo "$ac_t""no" 1>&6
+fi
+
+
+CFLAGS="-Winline -Wall -Wshadow -O -fomit-frame-pointer -g"
+
+
+# Checks for the platform
+
+# Make sure we can run config.sub.
+if ${CONFIG_SHELL-/bin/sh} $ac_config_sub sun4 >/dev/null 2>&1; then :
+else { echo "configure: error: can not run $ac_config_sub" 1>&2; exit 1; }
+fi
+
+echo $ac_n "checking host system type""... $ac_c" 1>&6
+echo "configure:1147: checking host system type" >&5
+
+host_alias=$host
+case "$host_alias" in
+NONE)
+  case $nonopt in
+  NONE)
+    if host_alias=`${CONFIG_SHELL-/bin/sh} $ac_config_guess`; then :
+    else { echo "configure: error: can not guess host type; you must specify one" 1>&2; exit 1; }
+    fi ;;
+  *) host_alias=$nonopt ;;
+  esac ;;
+esac
+
+host=`${CONFIG_SHELL-/bin/sh} $ac_config_sub $host_alias`
+host_cpu=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+host_vendor=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+host_os=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+echo "$ac_t""$host" 1>&6
+
+
+echo $ac_n "checking for a supported CPU""... $ac_c" 1>&6
+echo "configure:1169: checking for a supported CPU" >&5
+
+case ${host_cpu} in
+     i?86) 
+	echo "$ac_t""ok (${host_cpu})" 1>&6
+        ;;
+
+     *) 
+	echo "$ac_t""no (${host_cpu})" 1>&6
+	{ echo "configure: error: Valgrind is ix86 specific. Sorry" 1>&2; exit 1; }
+	;;
+esac
+
+echo $ac_n "checking for a supported OS""... $ac_c" 1>&6
+echo "configure:1183: checking for a supported OS" >&5
+
+case ${host_os} in
+     *linux*) 
+	echo "$ac_t""ok (${host_os})" 1>&6
+        ;;
+
+     *) 
+	echo "$ac_t""no (${host_os})" 1>&6
+	{ echo "configure: error: Valgrind is Linux specific. Sorry" 1>&2; exit 1; }
+	;;
+esac
+
+
+# Ok, this is linux. Check the kernel version
+echo $ac_n "checking for the kernel version""... $ac_c" 1>&6
+echo "configure:1199: checking for the kernel version" >&5
+
+kernel=`uname -r`
+
+case ${kernel} in
+     2.4.*) 
+	    echo "$ac_t""2.4 family (${kernel})" 1>&6
+	    cat >> confdefs.h <<\EOF
+#define KERNEL_2_4 1
+EOF
+
+	    DEFAULT_SUPP="linux24.supp"
+	    ;;
+
+     2.2.*) 
+	    echo "$ac_t""2.2 family (${kernel})" 1>&6
+	    cat >> confdefs.h <<\EOF
+#define KERNEL_2_2 1
+EOF
+
+	    DEFAULT_SUPP="linux22.supp"
+	    ;;
+
+     *) 
+	    echo "$ac_t""unsupported (${kernel})" 1>&6
+	    { echo "configure: error: Valgrind works on kernels 2.2 and 2.4" 1>&2; exit 1; }
+	    ;;
+esac
+
+
+
+
+# Ok, this is linux. Check the kernel version
+echo $ac_n "checking the glibc version""... $ac_c" 1>&6
+echo "configure:1233: checking the glibc version" >&5
+
+glibc=""
+
+cat > conftest.$ac_ext <<EOF
+#line 1238 "configure"
+#include "confdefs.h"
+
+#include <features.h>
+#ifdef __GNU_LIBRARY__
+ #if (__GLIBC__ == 2 && __GLIBC_MINOR__ == 1)
+  GLIBC_21
+ #endif
+#endif
+
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  egrep "GLIBC_21" >/dev/null 2>&1; then
+  rm -rf conftest*
+  glibc="2.1"
+fi
+rm -f conftest*
+
+
+cat > conftest.$ac_ext <<EOF
+#line 1258 "configure"
+#include "confdefs.h"
+
+#include <features.h>
+#ifdef __GNU_LIBRARY__
+ #if (__GLIBC__ == 2 && __GLIBC_MINOR__ == 2)
+  GLIBC_22
+ #endif
+#endif
+
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  egrep "GLIBC_22" >/dev/null 2>&1; then
+  rm -rf conftest*
+  glibc="2.2"
+fi
+rm -f conftest*
+
+
+case ${glibc} in
+     2.1)
+	echo "$ac_t""2.1 family" 1>&6
+	cat >> confdefs.h <<\EOF
+#define GLIBC_2_1 1
+EOF
+
+	;;
+
+     2.2)
+	echo "$ac_t""2.2 family" 1>&6
+	cat >> confdefs.h <<\EOF
+#define GLIBC_2_2 1
+EOF
+
+	;;
+
+     *)
+	echo "$ac_t""unsupported version" 1>&6
+	{ echo "configure: error: Valgrind requires the glibc version 2.1 or 2.2" 1>&2; exit 1; }
+	;;
+esac
+
+# try to detect the XFree version
+
+# Checks for header files.
+echo $ac_n "checking for ANSI C header files""... $ac_c" 1>&6
+echo "configure:1304: checking for ANSI C header files" >&5
+if eval "test \"`echo '$''{'ac_cv_header_stdc'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.$ac_ext <<EOF
+#line 1309 "configure"
+#include "confdefs.h"
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:1317: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+  rm -rf conftest*
+  ac_cv_header_stdc=yes
+else
+  echo "$ac_err" >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+if test $ac_cv_header_stdc = yes; then
+  # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+cat > conftest.$ac_ext <<EOF
+#line 1334 "configure"
+#include "confdefs.h"
+#include <string.h>
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  egrep "memchr" >/dev/null 2>&1; then
+  :
+else
+  rm -rf conftest*
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+cat > conftest.$ac_ext <<EOF
+#line 1352 "configure"
+#include "confdefs.h"
+#include <stdlib.h>
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  egrep "free" >/dev/null 2>&1; then
+  :
+else
+  rm -rf conftest*
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+if test "$cross_compiling" = yes; then
+  :
+else
+  cat > conftest.$ac_ext <<EOF
+#line 1373 "configure"
+#include "confdefs.h"
+#include <ctype.h>
+#define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+#define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int main () { int i; for (i = 0; i < 256; i++)
+if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) exit(2);
+exit (0); }
+
+EOF
+if { (eval echo configure:1384: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null
+then
+  :
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -fr conftest*
+  ac_cv_header_stdc=no
+fi
+rm -fr conftest*
+fi
+
+fi
+fi
+
+echo "$ac_t""$ac_cv_header_stdc" 1>&6
+if test $ac_cv_header_stdc = yes; then
+  cat >> confdefs.h <<\EOF
+#define STDC_HEADERS 1
+EOF
+
+fi
+
+for ac_hdr in fcntl.h malloc.h stdlib.h string.h sys/socket.h sys/statfs.h sys/time.h termios.h unistd.h utime.h
+do
+ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'`
+echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6
+echo "configure:1411: checking for $ac_hdr" >&5
+if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.$ac_ext <<EOF
+#line 1416 "configure"
+#include "confdefs.h"
+#include <$ac_hdr>
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:1421: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+  rm -rf conftest*
+  eval "ac_cv_header_$ac_safe=yes"
+else
+  echo "$ac_err" >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  eval "ac_cv_header_$ac_safe=no"
+fi
+rm -f conftest*
+fi
+if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then
+  echo "$ac_t""yes" 1>&6
+    ac_tr_hdr=HAVE_`echo $ac_hdr | sed 'y%abcdefghijklmnopqrstuvwxyz./-%ABCDEFGHIJKLMNOPQRSTUVWXYZ___%'`
+  cat >> confdefs.h <<EOF
+#define $ac_tr_hdr 1
+EOF
+ 
+else
+  echo "$ac_t""no" 1>&6
+fi
+done
+
+
+# Checks for typedefs, structures, and compiler characteristics.
+echo $ac_n "checking for working const""... $ac_c" 1>&6
+echo "configure:1450: checking for working const" >&5
+if eval "test \"`echo '$''{'ac_cv_c_const'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.$ac_ext <<EOF
+#line 1455 "configure"
+#include "confdefs.h"
+
+int main() {
+
+/* Ultrix mips cc rejects this.  */
+typedef int charset[2]; const charset x;
+/* SunOS 4.1.1 cc rejects this.  */
+char const *const *ccp;
+char **p;
+/* NEC SVR4.0.2 mips cc rejects this.  */
+struct point {int x, y;};
+static struct point const zero = {0,0};
+/* AIX XL C 1.02.0.0 rejects this.
+   It does not let you subtract one const X* pointer from another in an arm
+   of an if-expression whose if-part is not a constant expression */
+const char *g = "string";
+ccp = &g + (g ? g-g : 0);
+/* HPUX 7.0 cc rejects these. */
+++ccp;
+p = (char**) ccp;
+ccp = (char const *const *) p;
+{ /* SCO 3.2v4 cc rejects this.  */
+  char *t;
+  char const *s = 0 ? (char *) 0 : (char const *) 0;
+
+  *t++ = 0;
+}
+{ /* Someone thinks the Sun supposedly-ANSI compiler will reject this.  */
+  int x[] = {25, 17};
+  const int *foo = &x[0];
+  ++foo;
+}
+{ /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */
+  typedef const int *iptr;
+  iptr p = 0;
+  ++p;
+}
+{ /* AIX XL C 1.02.0.0 rejects this saying
+     "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */
+  struct s { int j; const int *ap[3]; };
+  struct s *b; b->j = 5;
+}
+{ /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */
+  const int foo = 10;
+}
+
+; return 0; }
+EOF
+if { (eval echo configure:1504: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+  rm -rf conftest*
+  ac_cv_c_const=yes
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  ac_cv_c_const=no
+fi
+rm -f conftest*
+fi
+
+echo "$ac_t""$ac_cv_c_const" 1>&6
+if test $ac_cv_c_const = no; then
+  cat >> confdefs.h <<\EOF
+#define const 
+EOF
+
+fi
+
+echo $ac_n "checking for uid_t in sys/types.h""... $ac_c" 1>&6
+echo "configure:1525: checking for uid_t in sys/types.h" >&5
+if eval "test \"`echo '$''{'ac_cv_type_uid_t'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.$ac_ext <<EOF
+#line 1530 "configure"
+#include "confdefs.h"
+#include <sys/types.h>
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  egrep "uid_t" >/dev/null 2>&1; then
+  rm -rf conftest*
+  ac_cv_type_uid_t=yes
+else
+  rm -rf conftest*
+  ac_cv_type_uid_t=no
+fi
+rm -f conftest*
+
+fi
+
+echo "$ac_t""$ac_cv_type_uid_t" 1>&6
+if test $ac_cv_type_uid_t = no; then
+  cat >> confdefs.h <<\EOF
+#define uid_t int
+EOF
+
+  cat >> confdefs.h <<\EOF
+#define gid_t int
+EOF
+
+fi
+
+echo $ac_n "checking for off_t""... $ac_c" 1>&6
+echo "configure:1559: checking for off_t" >&5
+if eval "test \"`echo '$''{'ac_cv_type_off_t'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.$ac_ext <<EOF
+#line 1564 "configure"
+#include "confdefs.h"
+#include <sys/types.h>
+#if STDC_HEADERS
+#include <stdlib.h>
+#include <stddef.h>
+#endif
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  egrep "(^|[^a-zA-Z_0-9])off_t[^a-zA-Z_0-9]" >/dev/null 2>&1; then
+  rm -rf conftest*
+  ac_cv_type_off_t=yes
+else
+  rm -rf conftest*
+  ac_cv_type_off_t=no
+fi
+rm -f conftest*
+
+fi
+echo "$ac_t""$ac_cv_type_off_t" 1>&6
+if test $ac_cv_type_off_t = no; then
+  cat >> confdefs.h <<\EOF
+#define off_t long
+EOF
+
+fi
+
+echo $ac_n "checking for size_t""... $ac_c" 1>&6
+echo "configure:1592: checking for size_t" >&5
+if eval "test \"`echo '$''{'ac_cv_type_size_t'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.$ac_ext <<EOF
+#line 1597 "configure"
+#include "confdefs.h"
+#include <sys/types.h>
+#if STDC_HEADERS
+#include <stdlib.h>
+#include <stddef.h>
+#endif
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  egrep "(^|[^a-zA-Z_0-9])size_t[^a-zA-Z_0-9]" >/dev/null 2>&1; then
+  rm -rf conftest*
+  ac_cv_type_size_t=yes
+else
+  rm -rf conftest*
+  ac_cv_type_size_t=no
+fi
+rm -f conftest*
+
+fi
+echo "$ac_t""$ac_cv_type_size_t" 1>&6
+if test $ac_cv_type_size_t = no; then
+  cat >> confdefs.h <<\EOF
+#define size_t unsigned
+EOF
+
+fi
+
+echo $ac_n "checking whether time.h and sys/time.h may both be included""... $ac_c" 1>&6
+echo "configure:1625: checking whether time.h and sys/time.h may both be included" >&5
+if eval "test \"`echo '$''{'ac_cv_header_time'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.$ac_ext <<EOF
+#line 1630 "configure"
+#include "confdefs.h"
+#include <sys/types.h>
+#include <sys/time.h>
+#include <time.h>
+int main() {
+struct tm *tp;
+; return 0; }
+EOF
+if { (eval echo configure:1639: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+  rm -rf conftest*
+  ac_cv_header_time=yes
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  ac_cv_header_time=no
+fi
+rm -f conftest*
+fi
+
+echo "$ac_t""$ac_cv_header_time" 1>&6
+if test $ac_cv_header_time = yes; then
+  cat >> confdefs.h <<\EOF
+#define TIME_WITH_SYS_TIME 1
+EOF
+
+fi
+
+
+# Checks for library functions.
+echo $ac_n "checking for 8-bit clean memcmp""... $ac_c" 1>&6
+echo "configure:1662: checking for 8-bit clean memcmp" >&5
+if eval "test \"`echo '$''{'ac_cv_func_memcmp_clean'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test "$cross_compiling" = yes; then
+  ac_cv_func_memcmp_clean=no
+else
+  cat > conftest.$ac_ext <<EOF
+#line 1670 "configure"
+#include "confdefs.h"
+
+main()
+{
+  char c0 = 0x40, c1 = 0x80, c2 = 0x81;
+  exit(memcmp(&c0, &c2, 1) < 0 && memcmp(&c1, &c2, 1) < 0 ? 0 : 1);
+}
+
+EOF
+if { (eval echo configure:1680: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null
+then
+  ac_cv_func_memcmp_clean=yes
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -fr conftest*
+  ac_cv_func_memcmp_clean=no
+fi
+rm -fr conftest*
+fi
+
+fi
+
+echo "$ac_t""$ac_cv_func_memcmp_clean" 1>&6
+test $ac_cv_func_memcmp_clean = no && LIBOBJS="$LIBOBJS memcmp.${ac_objext}"
+
+for ac_hdr in unistd.h
+do
+ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'`
+echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6
+echo "configure:1701: checking for $ac_hdr" >&5
+if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.$ac_ext <<EOF
+#line 1706 "configure"
+#include "confdefs.h"
+#include <$ac_hdr>
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:1711: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+  rm -rf conftest*
+  eval "ac_cv_header_$ac_safe=yes"
+else
+  echo "$ac_err" >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  eval "ac_cv_header_$ac_safe=no"
+fi
+rm -f conftest*
+fi
+if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then
+  echo "$ac_t""yes" 1>&6
+    ac_tr_hdr=HAVE_`echo $ac_hdr | sed 'y%abcdefghijklmnopqrstuvwxyz./-%ABCDEFGHIJKLMNOPQRSTUVWXYZ___%'`
+  cat >> confdefs.h <<EOF
+#define $ac_tr_hdr 1
+EOF
+ 
+else
+  echo "$ac_t""no" 1>&6
+fi
+done
+
+for ac_func in getpagesize
+do
+echo $ac_n "checking for $ac_func""... $ac_c" 1>&6
+echo "configure:1740: checking for $ac_func" >&5
+if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.$ac_ext <<EOF
+#line 1745 "configure"
+#include "confdefs.h"
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func(); below.  */
+#include <assert.h>
+/* Override any gcc2 internal prototype to avoid an error.  */
+/* We use char because int might match the return type of a gcc2
+    builtin and then its argument prototype would still apply.  */
+char $ac_func();
+
+int main() {
+
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined (__stub_$ac_func) || defined (__stub___$ac_func)
+choke me
+#else
+$ac_func();
+#endif
+
+; return 0; }
+EOF
+if { (eval echo configure:1768: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+  rm -rf conftest*
+  eval "ac_cv_func_$ac_func=yes"
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  eval "ac_cv_func_$ac_func=no"
+fi
+rm -f conftest*
+fi
+
+if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then
+  echo "$ac_t""yes" 1>&6
+    ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'`
+  cat >> confdefs.h <<EOF
+#define $ac_tr_func 1
+EOF
+ 
+else
+  echo "$ac_t""no" 1>&6
+fi
+done
+
+echo $ac_n "checking for working mmap""... $ac_c" 1>&6
+echo "configure:1793: checking for working mmap" >&5
+if eval "test \"`echo '$''{'ac_cv_func_mmap_fixed_mapped'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  if test "$cross_compiling" = yes; then
+  ac_cv_func_mmap_fixed_mapped=no
+else
+  cat > conftest.$ac_ext <<EOF
+#line 1801 "configure"
+#include "confdefs.h"
+
+/* Thanks to Mike Haertel and Jim Avera for this test.
+   Here is a matrix of mmap possibilities:
+	mmap private not fixed
+	mmap private fixed at somewhere currently unmapped
+	mmap private fixed at somewhere already mapped
+	mmap shared not fixed
+	mmap shared fixed at somewhere currently unmapped
+	mmap shared fixed at somewhere already mapped
+   For private mappings, we should verify that changes cannot be read()
+   back from the file, nor mmap's back from the file at a different
+   address.  (There have been systems where private was not correctly
+   implemented like the infamous i386 svr4.0, and systems where the
+   VM page cache was not coherent with the filesystem buffer cache
+   like early versions of FreeBSD and possibly contemporary NetBSD.)
+   For shared mappings, we should conversely verify that changes get
+   propogated back to all the places they're supposed to be.
+
+   Grep wants private fixed already mapped.
+   The main things grep needs to know about mmap are:
+   * does it exist and is it safe to write into the mmap'd area
+   * how to use it (BSD variants)  */
+#include <sys/types.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+/* This mess was copied from the GNU getpagesize.h.  */
+#ifndef HAVE_GETPAGESIZE
+# ifdef HAVE_UNISTD_H
+#  include <unistd.h>
+# endif
+
+/* Assume that all systems that can run configure have sys/param.h.  */
+# ifndef HAVE_SYS_PARAM_H
+#  define HAVE_SYS_PARAM_H 1
+# endif
+
+# ifdef _SC_PAGESIZE
+#  define getpagesize() sysconf(_SC_PAGESIZE)
+# else /* no _SC_PAGESIZE */
+#  ifdef HAVE_SYS_PARAM_H
+#   include <sys/param.h>
+#   ifdef EXEC_PAGESIZE
+#    define getpagesize() EXEC_PAGESIZE
+#   else /* no EXEC_PAGESIZE */
+#    ifdef NBPG
+#     define getpagesize() NBPG * CLSIZE
+#     ifndef CLSIZE
+#      define CLSIZE 1
+#     endif /* no CLSIZE */
+#    else /* no NBPG */
+#     ifdef NBPC
+#      define getpagesize() NBPC
+#     else /* no NBPC */
+#      ifdef PAGESIZE
+#       define getpagesize() PAGESIZE
+#      endif /* PAGESIZE */
+#     endif /* no NBPC */
+#    endif /* no NBPG */
+#   endif /* no EXEC_PAGESIZE */
+#  else /* no HAVE_SYS_PARAM_H */
+#   define getpagesize() 8192	/* punt totally */
+#  endif /* no HAVE_SYS_PARAM_H */
+# endif /* no _SC_PAGESIZE */
+
+#endif /* no HAVE_GETPAGESIZE */
+
+#ifdef __cplusplus
+extern "C" { void *malloc(unsigned); }
+#else
+char *malloc();
+#endif
+
+int
+main()
+{
+	char *data, *data2, *data3;
+	int i, pagesize;
+	int fd;
+
+	pagesize = getpagesize();
+
+	/*
+	 * First, make a file with some known garbage in it.
+	 */
+	data = malloc(pagesize);
+	if (!data)
+		exit(1);
+	for (i = 0; i < pagesize; ++i)
+		*(data + i) = rand();
+	umask(0);
+	fd = creat("conftestmmap", 0600);
+	if (fd < 0)
+		exit(1);
+	if (write(fd, data, pagesize) != pagesize)
+		exit(1);
+	close(fd);
+
+	/*
+	 * Next, try to mmap the file at a fixed address which
+	 * already has something else allocated at it.  If we can,
+	 * also make sure that we see the same garbage.
+	 */
+	fd = open("conftestmmap", O_RDWR);
+	if (fd < 0)
+		exit(1);
+	data2 = malloc(2 * pagesize);
+	if (!data2)
+		exit(1);
+	data2 += (pagesize - ((int) data2 & (pagesize - 1))) & (pagesize - 1);
+	if (data2 != mmap(data2, pagesize, PROT_READ | PROT_WRITE,
+	    MAP_PRIVATE | MAP_FIXED, fd, 0L))
+		exit(1);
+	for (i = 0; i < pagesize; ++i)
+		if (*(data + i) != *(data2 + i))
+			exit(1);
+
+	/*
+	 * Finally, make sure that changes to the mapped area
+	 * do not percolate back to the file as seen by read().
+	 * (This is a bug on some variants of i386 svr4.0.)
+	 */
+	for (i = 0; i < pagesize; ++i)
+		*(data2 + i) = *(data2 + i) + 1;
+	data3 = malloc(pagesize);
+	if (!data3)
+		exit(1);
+	if (read(fd, data3, pagesize) != pagesize)
+		exit(1);
+	for (i = 0; i < pagesize; ++i)
+		if (*(data + i) != *(data3 + i))
+			exit(1);
+	close(fd);
+	unlink("conftestmmap");
+	exit(0);
+}
+
+EOF
+if { (eval echo configure:1941: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null
+then
+  ac_cv_func_mmap_fixed_mapped=yes
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -fr conftest*
+  ac_cv_func_mmap_fixed_mapped=no
+fi
+rm -fr conftest*
+fi
+
+fi
+
+echo "$ac_t""$ac_cv_func_mmap_fixed_mapped" 1>&6
+if test $ac_cv_func_mmap_fixed_mapped = yes; then
+  cat >> confdefs.h <<\EOF
+#define HAVE_MMAP 1
+EOF
+
+fi
+
+echo $ac_n "checking return type of signal handlers""... $ac_c" 1>&6
+echo "configure:1964: checking return type of signal handlers" >&5
+if eval "test \"`echo '$''{'ac_cv_type_signal'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.$ac_ext <<EOF
+#line 1969 "configure"
+#include "confdefs.h"
+#include <sys/types.h>
+#include <signal.h>
+#ifdef signal
+#undef signal
+#endif
+#ifdef __cplusplus
+extern "C" void (*signal (int, void (*)(int)))(int);
+#else
+void (*signal ()) ();
+#endif
+
+int main() {
+int i;
+; return 0; }
+EOF
+if { (eval echo configure:1986: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+  rm -rf conftest*
+  ac_cv_type_signal=void
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  ac_cv_type_signal=int
+fi
+rm -f conftest*
+fi
+
+echo "$ac_t""$ac_cv_type_signal" 1>&6
+cat >> confdefs.h <<EOF
+#define RETSIGTYPE $ac_cv_type_signal
+EOF
+
+
+
+for ac_func in floor memchr memset mkdir strchr strdup strpbrk strrchr strstr
+do
+echo $ac_n "checking for $ac_func""... $ac_c" 1>&6
+echo "configure:2008: checking for $ac_func" >&5
+if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then
+  echo $ac_n "(cached) $ac_c" 1>&6
+else
+  cat > conftest.$ac_ext <<EOF
+#line 2013 "configure"
+#include "confdefs.h"
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func(); below.  */
+#include <assert.h>
+/* Override any gcc2 internal prototype to avoid an error.  */
+/* We use char because int might match the return type of a gcc2
+    builtin and then its argument prototype would still apply.  */
+char $ac_func();
+
+int main() {
+
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined (__stub_$ac_func) || defined (__stub___$ac_func)
+choke me
+#else
+$ac_func();
+#endif
+
+; return 0; }
+EOF
+if { (eval echo configure:2036: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+  rm -rf conftest*
+  eval "ac_cv_func_$ac_func=yes"
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.$ac_ext >&5
+  rm -rf conftest*
+  eval "ac_cv_func_$ac_func=no"
+fi
+rm -f conftest*
+fi
+
+if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then
+  echo "$ac_t""yes" 1>&6
+    ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'`
+  cat >> confdefs.h <<EOF
+#define $ac_tr_func 1
+EOF
+ 
+else
+  echo "$ac_t""no" 1>&6
+fi
+done
+
+
+trap '' 1 2 15
+cat > confcache <<\EOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs.  It is not useful on other systems.
+# If it contains results you don't want to keep, you may remove or edit it.
+#
+# By default, configure uses ./config.cache as the cache file,
+# creating it if it does not exist already.  You can give configure
+# the --cache-file=FILE option to use a different cache file; that is
+# what configure does when it calls configure scripts in
+# subdirectories, so they share the cache.
+# Giving --cache-file=/dev/null disables caching, for debugging configure.
+# config.status only pays attention to the cache file if you give it the
+# --recheck option to rerun configure.
+#
+EOF
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, don't put newlines in cache variables' values.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(set) 2>&1 |
+  case `(ac_space=' '; set | grep ac_space) 2>&1` in
+  *ac_space=\ *)
+    # `set' does not quote correctly, so add quotes (double-quote substitution
+    # turns \\\\ into \\, and sed turns \\ into \).
+    sed -n \
+      -e "s/'/'\\\\''/g" \
+      -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p"
+    ;;
+  *)
+    # `set' quotes correctly as required by POSIX, so do not add quotes.
+    sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p'
+    ;;
+  esac >> confcache
+if cmp -s $cache_file confcache; then
+  :
+else
+  if test -w $cache_file; then
+    echo "updating cache $cache_file"
+    cat confcache > $cache_file
+  else
+    echo "not updating unwritable cache $cache_file"
+  fi
+fi
+rm -f confcache
+
+trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+# Any assignment to VPATH causes Sun make to only execute
+# the first set of double-colon rules, so remove it if not needed.
+# If there is a colon in the path, we need to keep it.
+if test "x$srcdir" = x.; then
+  ac_vpsub='/^[ 	]*VPATH[ 	]*=[^:]*$/d'
+fi
+
+trap 'rm -f $CONFIG_STATUS conftest*; exit 1' 1 2 15
+
+DEFS=-DHAVE_CONFIG_H
+
+# Without the "./", some shells look in PATH for config.status.
+: ${CONFIG_STATUS=./config.status}
+
+echo creating $CONFIG_STATUS
+rm -f $CONFIG_STATUS
+cat > $CONFIG_STATUS <<EOF
+#! /bin/sh
+# Generated automatically by configure.
+# Run this file to recreate the current configuration.
+# This directory was configured as follows,
+# on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
+#
+# $0 $ac_configure_args
+#
+# Compiler output produced by configure, useful for debugging
+# configure, is in ./config.log if it exists.
+
+ac_cs_usage="Usage: $CONFIG_STATUS [--recheck] [--version] [--help]"
+for ac_option
+do
+  case "\$ac_option" in
+  -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+    echo "running \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion"
+    exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;;
+  -version | --version | --versio | --versi | --vers | --ver | --ve | --v)
+    echo "$CONFIG_STATUS generated by autoconf version 2.13"
+    exit 0 ;;
+  -help | --help | --hel | --he | --h)
+    echo "\$ac_cs_usage"; exit 0 ;;
+  *) echo "\$ac_cs_usage"; exit 1 ;;
+  esac
+done
+
+ac_given_srcdir=$srcdir
+ac_given_INSTALL="$INSTALL"
+
+trap 'rm -fr `echo "valgrind 
+   Makefile 
+   docs/Makefile 
+   tests/Makefile 
+   demangle/Makefile config.h" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15
+EOF
+cat >> $CONFIG_STATUS <<EOF
+
+# Protect against being on the right side of a sed subst in config.status.
+sed 's/%@/@@/; s/@%/@@/; s/%g\$/@g/; /@g\$/s/[\\\\&%]/\\\\&/g;
+ s/@@/%@/; s/@@/@%/; s/@g\$/%g/' > conftest.subs <<\\CEOF
+$ac_vpsub
+$extrasub
+s%@SHELL@%$SHELL%g
+s%@CFLAGS@%$CFLAGS%g
+s%@CPPFLAGS@%$CPPFLAGS%g
+s%@CXXFLAGS@%$CXXFLAGS%g
+s%@FFLAGS@%$FFLAGS%g
+s%@DEFS@%$DEFS%g
+s%@LDFLAGS@%$LDFLAGS%g
+s%@LIBS@%$LIBS%g
+s%@exec_prefix@%$exec_prefix%g
+s%@prefix@%$prefix%g
+s%@program_transform_name@%$program_transform_name%g
+s%@bindir@%$bindir%g
+s%@sbindir@%$sbindir%g
+s%@libexecdir@%$libexecdir%g
+s%@datadir@%$datadir%g
+s%@sysconfdir@%$sysconfdir%g
+s%@sharedstatedir@%$sharedstatedir%g
+s%@localstatedir@%$localstatedir%g
+s%@libdir@%$libdir%g
+s%@includedir@%$includedir%g
+s%@oldincludedir@%$oldincludedir%g
+s%@infodir@%$infodir%g
+s%@mandir@%$mandir%g
+s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%g
+s%@INSTALL_SCRIPT@%$INSTALL_SCRIPT%g
+s%@INSTALL_DATA@%$INSTALL_DATA%g
+s%@PACKAGE@%$PACKAGE%g
+s%@VERSION@%$VERSION%g
+s%@ACLOCAL@%$ACLOCAL%g
+s%@AUTOCONF@%$AUTOCONF%g
+s%@AUTOMAKE@%$AUTOMAKE%g
+s%@AUTOHEADER@%$AUTOHEADER%g
+s%@MAKEINFO@%$MAKEINFO%g
+s%@SET_MAKE@%$SET_MAKE%g
+s%@LN_S@%$LN_S%g
+s%@CC@%$CC%g
+s%@CPP@%$CPP%g
+s%@RANLIB@%$RANLIB%g
+s%@host@%$host%g
+s%@host_alias@%$host_alias%g
+s%@host_cpu@%$host_cpu%g
+s%@host_vendor@%$host_vendor%g
+s%@host_os@%$host_os%g
+s%@DEFAULT_SUPP@%$DEFAULT_SUPP%g
+s%@LIBOBJS@%$LIBOBJS%g
+
+CEOF
+EOF
+
+cat >> $CONFIG_STATUS <<\EOF
+
+# Split the substitutions into bite-sized pieces for seds with
+# small command number limits, like on Digital OSF/1 and HP-UX.
+ac_max_sed_cmds=90 # Maximum number of lines to put in a sed script.
+ac_file=1 # Number of current file.
+ac_beg=1 # First line for current file.
+ac_end=$ac_max_sed_cmds # Line after last line for current file.
+ac_more_lines=:
+ac_sed_cmds=""
+while $ac_more_lines; do
+  if test $ac_beg -gt 1; then
+    sed "1,${ac_beg}d; ${ac_end}q" conftest.subs > conftest.s$ac_file
+  else
+    sed "${ac_end}q" conftest.subs > conftest.s$ac_file
+  fi
+  if test ! -s conftest.s$ac_file; then
+    ac_more_lines=false
+    rm -f conftest.s$ac_file
+  else
+    if test -z "$ac_sed_cmds"; then
+      ac_sed_cmds="sed -f conftest.s$ac_file"
+    else
+      ac_sed_cmds="$ac_sed_cmds | sed -f conftest.s$ac_file"
+    fi
+    ac_file=`expr $ac_file + 1`
+    ac_beg=$ac_end
+    ac_end=`expr $ac_end + $ac_max_sed_cmds`
+  fi
+done
+if test -z "$ac_sed_cmds"; then
+  ac_sed_cmds=cat
+fi
+EOF
+
+cat >> $CONFIG_STATUS <<EOF
+
+CONFIG_FILES=\${CONFIG_FILES-"valgrind 
+   Makefile 
+   docs/Makefile 
+   tests/Makefile 
+   demangle/Makefile"}
+EOF
+cat >> $CONFIG_STATUS <<\EOF
+for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then
+  # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
+  case "$ac_file" in
+  *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'`
+       ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;;
+  *) ac_file_in="${ac_file}.in" ;;
+  esac
+
+  # Adjust a relative srcdir, top_srcdir, and INSTALL for subdirectories.
+
+  # Remove last slash and all that follows it.  Not all systems have dirname.
+  ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'`
+  if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then
+    # The file is in a subdirectory.
+    test ! -d "$ac_dir" && mkdir "$ac_dir"
+    ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`"
+    # A "../" for each directory in $ac_dir_suffix.
+    ac_dots=`echo $ac_dir_suffix|sed 's%/[^/]*%../%g'`
+  else
+    ac_dir_suffix= ac_dots=
+  fi
+
+  case "$ac_given_srcdir" in
+  .)  srcdir=.
+      if test -z "$ac_dots"; then top_srcdir=.
+      else top_srcdir=`echo $ac_dots|sed 's%/$%%'`; fi ;;
+  /*) srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;;
+  *) # Relative path.
+    srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix"
+    top_srcdir="$ac_dots$ac_given_srcdir" ;;
+  esac
+
+  case "$ac_given_INSTALL" in
+  [/$]*) INSTALL="$ac_given_INSTALL" ;;
+  *) INSTALL="$ac_dots$ac_given_INSTALL" ;;
+  esac
+
+  echo creating "$ac_file"
+  rm -f "$ac_file"
+  configure_input="Generated automatically from `echo $ac_file_in|sed 's%.*/%%'` by configure."
+  case "$ac_file" in
+  *Makefile*) ac_comsub="1i\\
+# $configure_input" ;;
+  *) ac_comsub= ;;
+  esac
+
+  ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"`
+  sed -e "$ac_comsub
+s%@configure_input@%$configure_input%g
+s%@srcdir@%$srcdir%g
+s%@top_srcdir@%$top_srcdir%g
+s%@INSTALL@%$INSTALL%g
+" $ac_file_inputs | (eval "$ac_sed_cmds") > $ac_file
+fi; done
+rm -f conftest.s*
+
+# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where
+# NAME is the cpp macro being defined and VALUE is the value it is being given.
+#
+# ac_d sets the value in "#define NAME VALUE" lines.
+ac_dA='s%^\([ 	]*\)#\([ 	]*define[ 	][ 	]*\)'
+ac_dB='\([ 	][ 	]*\)[^ 	]*%\1#\2'
+ac_dC='\3'
+ac_dD='%g'
+# ac_u turns "#undef NAME" with trailing blanks into "#define NAME VALUE".
+ac_uA='s%^\([ 	]*\)#\([ 	]*\)undef\([ 	][ 	]*\)'
+ac_uB='\([ 	]\)%\1#\2define\3'
+ac_uC=' '
+ac_uD='\4%g'
+# ac_e turns "#undef NAME" without trailing blanks into "#define NAME VALUE".
+ac_eA='s%^\([ 	]*\)#\([ 	]*\)undef\([ 	][ 	]*\)'
+ac_eB='$%\1#\2define\3'
+ac_eC=' '
+ac_eD='%g'
+
+if test "${CONFIG_HEADERS+set}" != set; then
+EOF
+cat >> $CONFIG_STATUS <<EOF
+  CONFIG_HEADERS="config.h"
+EOF
+cat >> $CONFIG_STATUS <<\EOF
+fi
+for ac_file in .. $CONFIG_HEADERS; do if test "x$ac_file" != x..; then
+  # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
+  case "$ac_file" in
+  *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'`
+       ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;;
+  *) ac_file_in="${ac_file}.in" ;;
+  esac
+
+  echo creating $ac_file
+
+  rm -f conftest.frag conftest.in conftest.out
+  ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"`
+  cat $ac_file_inputs > conftest.in
+
+EOF
+
+# Transform confdefs.h into a sed script conftest.vals that substitutes
+# the proper values into config.h.in to produce config.h.  And first:
+# Protect against being on the right side of a sed subst in config.status.
+# Protect against being in an unquoted here document in config.status.
+rm -f conftest.vals
+cat > conftest.hdr <<\EOF
+s/[\\&%]/\\&/g
+s%[\\$`]%\\&%g
+s%#define \([A-Za-z_][A-Za-z0-9_]*\) *\(.*\)%${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD}%gp
+s%ac_d%ac_u%gp
+s%ac_u%ac_e%gp
+EOF
+sed -n -f conftest.hdr confdefs.h > conftest.vals
+rm -f conftest.hdr
+
+# This sed command replaces #undef with comments.  This is necessary, for
+# example, in the case of _POSIX_SOURCE, which is predefined and required
+# on some systems where configure will not decide to define it.
+cat >> conftest.vals <<\EOF
+s%^[ 	]*#[ 	]*undef[ 	][ 	]*[a-zA-Z_][a-zA-Z_0-9]*%/* & */%
+EOF
+
+# Break up conftest.vals because some shells have a limit on
+# the size of here documents, and old seds have small limits too.
+
+rm -f conftest.tail
+while :
+do
+  ac_lines=`grep -c . conftest.vals`
+  # grep -c gives empty output for an empty file on some AIX systems.
+  if test -z "$ac_lines" || test "$ac_lines" -eq 0; then break; fi
+  # Write a limited-size here document to conftest.frag.
+  echo '  cat > conftest.frag <<CEOF' >> $CONFIG_STATUS
+  sed ${ac_max_here_lines}q conftest.vals >> $CONFIG_STATUS
+  echo 'CEOF
+  sed -f conftest.frag conftest.in > conftest.out
+  rm -f conftest.in
+  mv conftest.out conftest.in
+' >> $CONFIG_STATUS
+  sed 1,${ac_max_here_lines}d conftest.vals > conftest.tail
+  rm -f conftest.vals
+  mv conftest.tail conftest.vals
+done
+rm -f conftest.vals
+
+cat >> $CONFIG_STATUS <<\EOF
+  rm -f conftest.frag conftest.h
+  echo "/* $ac_file.  Generated automatically by configure.  */" > conftest.h
+  cat conftest.in >> conftest.h
+  rm -f conftest.in
+  if cmp -s $ac_file conftest.h 2>/dev/null; then
+    echo "$ac_file is unchanged"
+    rm -f conftest.h
+  else
+    # Remove last slash and all that follows it.  Not all systems have dirname.
+      ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'`
+      if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then
+      # The file is in a subdirectory.
+      test ! -d "$ac_dir" && mkdir "$ac_dir"
+    fi
+    rm -f $ac_file
+    mv conftest.h $ac_file
+  fi
+fi; done
+
+EOF
+cat >> $CONFIG_STATUS <<EOF
+
+
+EOF
+cat >> $CONFIG_STATUS <<\EOF
+test -z "$CONFIG_HEADERS" || echo timestamp > stamp-h
+
+exit 0
+EOF
+chmod +x $CONFIG_STATUS
+rm -fr confdefs* $ac_clean_files
+test "$no_create" = yes || ${CONFIG_SHELL-/bin/sh} $CONFIG_STATUS || exit 1
+
diff --git a/configure.in b/configure.in
new file mode 100644
index 000000000..3bda390f1
--- /dev/null
+++ b/configure.in
@@ -0,0 +1,138 @@
+# Process this file with autoconf to produce a configure script.
+AC_INIT(vg_clientmalloc.c)
+AM_CONFIG_HEADER(config.h)
+AM_INIT_AUTOMAKE(valgrind, 20020317)
+
+# Checks for programs.
+AC_PROG_LN_S
+AC_PROG_CC
+AC_PROG_CPP
+AC_PROG_RANLIB
+
+CFLAGS="-Winline -Wall -Wshadow -O -fomit-frame-pointer -g"
+AC_SUBST(CFLAGS)
+
+# Checks for the platform
+AC_CANONICAL_HOST
+
+AC_MSG_CHECKING([for a supported CPU])
+
+case ${host_cpu} in
+     i?86) 
+	AC_MSG_RESULT([ok (${host_cpu})])
+        ;;
+
+     *) 
+	AC_MSG_RESULT([no (${host_cpu})])
+	AC_MSG_ERROR([Valgrind is ix86 specific. Sorry])
+	;;
+esac
+
+AC_MSG_CHECKING([for a supported OS])
+
+case ${host_os} in
+     *linux*) 
+	AC_MSG_RESULT([ok (${host_os})])
+        ;;
+
+     *) 
+	AC_MSG_RESULT([no (${host_os})])
+	AC_MSG_ERROR([Valgrind is Linux specific. Sorry])
+	;;
+esac
+
+
+# Ok, this is linux. Check the kernel version
+AC_MSG_CHECKING([for the kernel version])
+
+kernel=`uname -r`
+
+case ${kernel} in
+     2.4.*) 
+	    AC_MSG_RESULT([2.4 family (${kernel})])
+	    AC_DEFINE(KERNEL_2_4)
+	    DEFAULT_SUPP="linux24.supp"
+	    ;;
+
+     2.2.*) 
+	    AC_MSG_RESULT([2.2 family (${kernel})])
+	    AC_DEFINE(KERNEL_2_2)
+	    DEFAULT_SUPP="linux22.supp"
+	    ;;
+
+     *) 
+	    AC_MSG_RESULT([unsupported (${kernel})])
+	    AC_MSG_ERROR([Valgrind works on kernels 2.2 and 2.4])
+	    ;;
+esac
+
+AC_SUBST(DEFAULT_SUPP)
+
+
+# Ok, this is linux. Check the kernel version
+AC_MSG_CHECKING([the glibc version])
+
+glibc=""
+
+AC_EGREP_CPP([GLIBC_21], [
+#include <features.h>
+#ifdef __GNU_LIBRARY__
+ #if (__GLIBC__ == 2 && __GLIBC_MINOR__ == 1)
+  GLIBC_21
+ #endif
+#endif
+],
+glibc="2.1")
+
+AC_EGREP_CPP([GLIBC_22], [
+#include <features.h>
+#ifdef __GNU_LIBRARY__
+ #if (__GLIBC__ == 2 && __GLIBC_MINOR__ == 2)
+  GLIBC_22
+ #endif
+#endif
+],
+glibc="2.2")
+
+case ${glibc} in
+     2.1)
+	AC_MSG_RESULT(2.1 family)
+	AC_DEFINE(GLIBC_2_1)
+	;;
+
+     2.2)
+	AC_MSG_RESULT(2.2 family)
+	AC_DEFINE(GLIBC_2_2)
+	;;
+
+     *)
+	AC_MSG_RESULT(unsupported version)
+	AC_MSG_ERROR([Valgrind requires the glibc version 2.1 or 2.2])
+	;;
+esac
+
+# try to detect the XFree version
+
+# Checks for header files.
+AC_HEADER_STDC
+AC_CHECK_HEADERS([fcntl.h malloc.h stdlib.h string.h sys/socket.h sys/statfs.h sys/time.h termios.h unistd.h utime.h])
+
+# Checks for typedefs, structures, and compiler characteristics.
+AC_C_CONST
+AC_TYPE_UID_T
+AC_TYPE_OFF_T
+AC_TYPE_SIZE_T
+AC_HEADER_TIME
+
+# Checks for library functions.
+AC_FUNC_MEMCMP
+AC_FUNC_MMAP
+AC_TYPE_SIGNAL
+
+AC_CHECK_FUNCS([floor memchr memset mkdir strchr strdup strpbrk strrchr strstr])
+
+AC_OUTPUT(valgrind 
+   Makefile 
+   docs/Makefile 
+   tests/Makefile 
+   demangle/Makefile)
diff --git a/corecheck/Makefile.am b/corecheck/Makefile.am
new file mode 100644
index 000000000..00387927b
--- /dev/null
+++ b/corecheck/Makefile.am
@@ -0,0 +1,80 @@
+SUBDIRS = demangle . docs tests
+
+valdir = $(libdir)/valgrind
+
+LDFLAGS = -Wl,-z -Wl,initfirst
+
+INCLUDES += -I$(srcdir)/demangle
+
+bin_SCRIPTS = valgrind
+
+val_DATA = linux22.supp linux24.supp
+
+EXTRA_DIST = $(val_DATA) \
+	PATCHES_APPLIED ACKNOWLEDGEMENTS \
+	README_KDE3_FOLKS \
+	README_MISSING_SYSCALL_OR_IOCTL TODO
+
+val_PROGRAMS = valgrind.so valgrinq.so
+
+valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+
+valgrind_so_SOURCES = \
+	vg_clientmalloc.c \
+	vg_clientperms.c \
+	vg_demangle.c \
+	vg_dispatch.S \
+	vg_errcontext.c \
+	vg_execontext.c \
+	vg_from_ucode.c \
+	vg_helpers.S \
+	vg_main.c \
+	vg_malloc2.c \
+	vg_memory.c \
+	vg_messages.c \
+	vg_mylibc.c \
+	vg_procselfmaps.c \
+	vg_profile.c \
+	vg_signals.c \
+	vg_startup.S \
+	vg_symtab2.c \
+	vg_syscall_mem.c \
+	vg_syscall.S \
+	vg_to_ucode.c \
+	vg_translate.c \
+	vg_transtab.c \
+	vg_valgrinq_dummy.c \
+	vg_vtagops.c
+
+valgrind_so_LDADD = \
+	demangle/cp-demangle.o \
+	demangle/cplus-dem.o \
+	demangle/dyn-string.o \
+	demangle/safe-ctype.o
+
+include_HEADERS = valgrind.h
+
+noinst_HEADERS = \
+        vg_kerneliface.h        \
+        vg_include.h            \
+        vg_version.h            \
+        vg_constants.h          \
+        vg_unsafe.h
+
+
+install-data-hook:
+	cd ${valdir} && rm -f default.supp && $(LN_S) $(DEFAULT_SUPP) default.supp
+
+vg_memory.o:
+	$(COMPILE) -O2 -mpreferred-stack-boundary=2 -c $<
+
+vg_clientmalloc.o:
+	$(COMPILE) -fno-omit-frame-pointer -c $<
+
+
+valgrind.so: $(valgrind_so_OBJECTS)
+	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+	$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+
+valgrinq.so: $(valgrinq_so_OBJECTS)
+	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am
new file mode 100644
index 000000000..00387927b
--- /dev/null
+++ b/coregrind/Makefile.am
@@ -0,0 +1,80 @@
+SUBDIRS = demangle . docs tests
+
+valdir = $(libdir)/valgrind
+
+LDFLAGS = -Wl,-z -Wl,initfirst
+
+INCLUDES += -I$(srcdir)/demangle
+
+bin_SCRIPTS = valgrind
+
+val_DATA = linux22.supp linux24.supp
+
+EXTRA_DIST = $(val_DATA) \
+	PATCHES_APPLIED ACKNOWLEDGEMENTS \
+	README_KDE3_FOLKS \
+	README_MISSING_SYSCALL_OR_IOCTL TODO
+
+val_PROGRAMS = valgrind.so valgrinq.so
+
+valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+
+valgrind_so_SOURCES = \
+	vg_clientmalloc.c \
+	vg_clientperms.c \
+	vg_demangle.c \
+	vg_dispatch.S \
+	vg_errcontext.c \
+	vg_execontext.c \
+	vg_from_ucode.c \
+	vg_helpers.S \
+	vg_main.c \
+	vg_malloc2.c \
+	vg_memory.c \
+	vg_messages.c \
+	vg_mylibc.c \
+	vg_procselfmaps.c \
+	vg_profile.c \
+	vg_signals.c \
+	vg_startup.S \
+	vg_symtab2.c \
+	vg_syscall_mem.c \
+	vg_syscall.S \
+	vg_to_ucode.c \
+	vg_translate.c \
+	vg_transtab.c \
+	vg_valgrinq_dummy.c \
+	vg_vtagops.c
+
+valgrind_so_LDADD = \
+	demangle/cp-demangle.o \
+	demangle/cplus-dem.o \
+	demangle/dyn-string.o \
+	demangle/safe-ctype.o
+
+include_HEADERS = valgrind.h
+
+noinst_HEADERS = \
+        vg_kerneliface.h        \
+        vg_include.h            \
+        vg_version.h            \
+        vg_constants.h          \
+        vg_unsafe.h
+
+
+install-data-hook:
+	cd ${valdir} && rm -f default.supp && $(LN_S) $(DEFAULT_SUPP) default.supp
+
+vg_memory.o:
+	$(COMPILE) -O2 -mpreferred-stack-boundary=2 -c $<
+
+vg_clientmalloc.o:
+	$(COMPILE) -fno-omit-frame-pointer -c $<
+
+
+valgrind.so: $(valgrind_so_OBJECTS)
+	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+	$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+
+valgrinq.so: $(valgrinq_so_OBJECTS)
+	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
diff --git a/coregrind/arch/x86-linux/vg_syscall.S b/coregrind/arch/x86-linux/vg_syscall.S
new file mode 100644
index 000000000..210328a69
--- /dev/null
+++ b/coregrind/arch/x86-linux/vg_syscall.S
@@ -0,0 +1,179 @@
+
+##--------------------------------------------------------------------##
+##--- Support for doing system calls.                              ---##
+##---                                                 vg_syscall.S ---##
+##--------------------------------------------------------------------##
+
+/*
+  This file is part of Valgrind, an x86 protected-mode emulator 
+  designed for debugging and profiling binaries on x86-Unixes.
+
+  Copyright (C) 2000-2002 Julian Seward 
+     jseward@acm.org
+     Julian_Seward@muraroa.demon.co.uk
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_constants.h"
+
+
+.globl	VG_(do_syscall)
+
+# NOTE that this routine expects the simulated machines state
+# to be in m_state_static.  Therefore it needs to be wrapped by
+# code which copies from baseBlock before the call, into
+# m_state_static, and back afterwards.
+	
+VG_(do_syscall):
+	cmpl	$2, VG_(syscall_depth)
+	jz	do_syscall_DEPTH_2
+
+	# depth 1 copy follows ...
+	# Save all the int registers of the real machines state on the
+	# simulators stack.
+	pushal
+
+	# and save the real FPU state too
+	fwait
+	fnsave	VG_(real_fpu_state_saved_over_syscall_d1)
+	frstor	VG_(real_fpu_state_saved_over_syscall_d1)
+
+	# remember what the simulators stack pointer is
+	movl	%esp, VG_(esp_saved_over_syscall_d1)
+	
+	# Now copy the simulated machines state into the real one
+	# esp still refers to the simulators stack
+	frstor	VG_(m_state_static)+40
+	movl	VG_(m_state_static)+32, %eax
+	pushl	%eax
+	popfl
+	movl	VG_(m_state_static)+0, %eax
+	movl	VG_(m_state_static)+4, %ecx
+	movl	VG_(m_state_static)+8, %edx
+	movl	VG_(m_state_static)+12, %ebx
+	movl	VG_(m_state_static)+16, %esp
+	movl	VG_(m_state_static)+20, %ebp
+	movl	VG_(m_state_static)+24, %esi
+	movl	VG_(m_state_static)+28, %edi
+
+	# esp now refers to the simulatees stack
+	# Do the actual system call
+	int	$0x80
+
+	# restore stack as soon as possible
+	# esp refers to simulatees stack
+	movl	%esp, VG_(m_state_static)+16
+	movl	VG_(esp_saved_over_syscall_d1), %esp
+	# esp refers to simulators stack
+
+	# ... and undo everything else.  
+	# Copy real state back to simulated state.	
+	movl	%eax, VG_(m_state_static)+0
+	movl	%ecx, VG_(m_state_static)+4
+	movl	%edx, VG_(m_state_static)+8
+	movl	%ebx, VG_(m_state_static)+12
+	movl	%ebp, VG_(m_state_static)+20
+	movl	%esi, VG_(m_state_static)+24
+	movl	%edi, VG_(m_state_static)+28
+	pushfl
+	popl	%eax
+	movl	%eax, VG_(m_state_static)+32
+	fwait
+	fnsave	VG_(m_state_static)+40
+	frstor	VG_(m_state_static)+40
+
+	# Restore the state of the simulator
+	frstor	VG_(real_fpu_state_saved_over_syscall_d1)
+	popal
+
+	ret
+
+
+
+
+
+
+
+
+do_syscall_DEPTH_2:
+
+	# depth 2 copy follows ...
+	# Save all the int registers of the real machines state on the
+	# simulators stack.
+	pushal
+
+	# and save the real FPU state too
+	fwait
+	fnsave	VG_(real_fpu_state_saved_over_syscall_d2)
+	frstor	VG_(real_fpu_state_saved_over_syscall_d2)
+
+	# remember what the simulators stack pointer is
+	movl	%esp, VG_(esp_saved_over_syscall_d2)
+	
+	# Now copy the simulated machines state into the real one
+	# esp still refers to the simulators stack
+	frstor	VG_(m_state_static)+40
+	movl	VG_(m_state_static)+32, %eax
+	pushl	%eax
+	popfl
+	movl	VG_(m_state_static)+0, %eax
+	movl	VG_(m_state_static)+4, %ecx
+	movl	VG_(m_state_static)+8, %edx
+	movl	VG_(m_state_static)+12, %ebx
+	movl	VG_(m_state_static)+16, %esp
+	movl	VG_(m_state_static)+20, %ebp
+	movl	VG_(m_state_static)+24, %esi
+	movl	VG_(m_state_static)+28, %edi
+
+	# esp now refers to the simulatees stack
+	# Do the actual system call
+	int	$0x80
+
+	# restore stack as soon as possible
+	# esp refers to simulatees stack
+	movl	%esp, VG_(m_state_static)+16
+	movl	VG_(esp_saved_over_syscall_d2), %esp
+	# esp refers to simulators stack
+
+	# ... and undo everything else.  
+	# Copy real state back to simulated state.	
+	movl	%eax, VG_(m_state_static)+0
+	movl	%ecx, VG_(m_state_static)+4
+	movl	%edx, VG_(m_state_static)+8
+	movl	%ebx, VG_(m_state_static)+12
+	movl	%ebp, VG_(m_state_static)+20
+	movl	%esi, VG_(m_state_static)+24
+	movl	%edi, VG_(m_state_static)+28
+	pushfl
+	popl	%eax
+	movl	%eax, VG_(m_state_static)+32
+	fwait
+	fnsave	VG_(m_state_static)+40
+	frstor	VG_(m_state_static)+40
+
+	# Restore the state of the simulator
+	frstor	VG_(real_fpu_state_saved_over_syscall_d2)
+	popal
+
+	ret
+
+
+##--------------------------------------------------------------------##
+##--- end                                             vg_syscall.S ---##
+##--------------------------------------------------------------------##
diff --git a/coregrind/demangle/Makefile.am b/coregrind/demangle/Makefile.am
new file mode 100644
index 000000000..1188b7c89
--- /dev/null
+++ b/coregrind/demangle/Makefile.am
@@ -0,0 +1,23 @@
+INCLUDES += -I$(top_srcdir)
+
+noinst_HEADERS = \
+	ansidecl.h     \
+        dyn-string.h   \
+        demangle.h     \
+        safe-ctype.h 
+
+noinst_LIBRARIES = libdemangle.a
+
+libdemangle_a_SOURCES = \
+	cp-demangle.c cplus-dem.c dyn-string.c safe-ctype.c
+
+# some files don't like my config.h, so just pretend it does not exist...
+
+cp-demangle.o:
+	$(COMPILE) -Wno-unused -Wno-shadow -c $< -UHAVE_CONFIG_H
+
+dyn-string.o:
+	$(COMPILE) -c $< -UHAVE_CONFIG_H
+
+cplus-dem.o:
+	$(COMPILE) -Wno-unused -c $<
diff --git a/coregrind/demangle/ansidecl.h b/coregrind/demangle/ansidecl.h
new file mode 100644
index 000000000..9a7c5777f
--- /dev/null
+++ b/coregrind/demangle/ansidecl.h
@@ -0,0 +1,295 @@
+/* ANSI and traditional C compatability macros
+   Copyright 1991, 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001
+   Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+/* ANSI and traditional C compatibility macros
+
+   ANSI C is assumed if __STDC__ is #defined.
+
+   Macro		ANSI C definition	Traditional C definition
+   -----		---- - ----------	----------- - ----------
+   ANSI_PROTOTYPES	1			not defined
+   PTR			`void *'		`char *'
+   PTRCONST		`void *const'		`char *'
+   LONG_DOUBLE		`long double'		`double'
+   const		not defined		`'
+   volatile		not defined		`'
+   signed		not defined		`'
+   VA_START(ap, var)	va_start(ap, var)	va_start(ap)
+
+   Note that it is safe to write "void foo();" indicating a function
+   with no return value, in all K+R compilers we have been able to test.
+
+   For declaring functions with prototypes, we also provide these:
+
+   PARAMS ((prototype))
+   -- for functions which take a fixed number of arguments.  Use this
+   when declaring the function.  When defining the function, write a
+   K+R style argument list.  For example:
+
+	char *strcpy PARAMS ((char *dest, char *source));
+	...
+	char *
+	strcpy (dest, source)
+	     char *dest;
+	     char *source;
+	{ ... }
+
+
+   VPARAMS ((prototype, ...))
+   -- for functions which take a variable number of arguments.  Use
+   PARAMS to declare the function, VPARAMS to define it.  For example:
+
+	int printf PARAMS ((const char *format, ...));
+	...
+	int
+	printf VPARAMS ((const char *format, ...))
+	{
+	   ...
+	}
+
+   For writing functions which take variable numbers of arguments, we
+   also provide the VA_OPEN, VA_CLOSE, and VA_FIXEDARG macros.  These
+   hide the differences between K+R <varargs.h> and C89 <stdarg.h> more
+   thoroughly than the simple VA_START() macro mentioned above.
+
+   VA_OPEN and VA_CLOSE are used *instead of* va_start and va_end.
+   Immediately after VA_OPEN, put a sequence of VA_FIXEDARG calls
+   corresponding to the list of fixed arguments.  Then use va_arg
+   normally to get the variable arguments, or pass your va_list object
+   around.  You do not declare the va_list yourself; VA_OPEN does it
+   for you.
+
+   Here is a complete example:
+
+	int
+	printf VPARAMS ((const char *format, ...))
+	{
+	   int result;
+
+	   VA_OPEN (ap, format);
+	   VA_FIXEDARG (ap, const char *, format);
+
+	   result = vfprintf (stdout, format, ap);
+	   VA_CLOSE (ap);
+
+	   return result;
+	}
+
+
+   You can declare variables either before or after the VA_OPEN,
+   VA_FIXEDARG sequence.  Also, VA_OPEN and VA_CLOSE are the beginning
+   and end of a block.  They must appear at the same nesting level,
+   and any variables declared after VA_OPEN go out of scope at
+   VA_CLOSE.  Unfortunately, with a K+R compiler, that includes the
+   argument list.  You can have multiple instances of VA_OPEN/VA_CLOSE
+   pairs in a single function in case you need to traverse the
+   argument list more than once.
+
+   For ease of writing code which uses GCC extensions but needs to be
+   portable to other compilers, we provide the GCC_VERSION macro that
+   simplifies testing __GNUC__ and __GNUC_MINOR__ together, and various
+   wrappers around __attribute__.  Also, __extension__ will be #defined
+   to nothing if it doesn't work.  See below.
+
+   This header also defines a lot of obsolete macros:
+   CONST, VOLATILE, SIGNED, PROTO, EXFUN, DEFUN, DEFUN_VOID,
+   AND, DOTS, NOARGS.  Don't use them.  */
+
+#ifndef	_ANSIDECL_H
+#define _ANSIDECL_H	1
+
+/* Every source file includes this file,
+   so they will all get the switch for lint.  */
+/* LINTLIBRARY */
+
+/* Using MACRO(x,y) in cpp #if conditionals does not work with some
+   older preprocessors.  Thus we can't define something like this:
+
+#define HAVE_GCC_VERSION(MAJOR, MINOR) \
+  (__GNUC__ > (MAJOR) || (__GNUC__ == (MAJOR) && __GNUC_MINOR__ >= (MINOR)))
+
+and then test "#if HAVE_GCC_VERSION(2,7)".
+
+So instead we use the macro below and test it against specific values.  */
+
+/* This macro simplifies testing whether we are using gcc, and if it
+   is of a particular minimum version. (Both major & minor numbers are
+   significant.)  This macro will evaluate to 0 if we are not using
+   gcc at all.  */
+#ifndef GCC_VERSION
+#define GCC_VERSION (__GNUC__ * 1000 + __GNUC_MINOR__)
+#endif /* GCC_VERSION */
+
+#if defined (__STDC__) || defined (_AIX) || (defined (__mips) && defined (_SYSTYPE_SVR4)) || defined(_WIN32)
+/* All known AIX compilers implement these things (but don't always
+   define __STDC__).  The RISC/OS MIPS compiler defines these things
+   in SVR4 mode, but does not define __STDC__.  */
+
+#define ANSI_PROTOTYPES	1
+#define PTR		void *
+#define PTRCONST	void *const
+#define LONG_DOUBLE	long double
+
+#define PARAMS(ARGS)		ARGS
+#define VPARAMS(ARGS)		ARGS
+#define VA_START(VA_LIST, VAR)	va_start(VA_LIST, VAR)
+
+/* variadic function helper macros */
+/* "struct Qdmy" swallows the semicolon after VA_OPEN/VA_FIXEDARG's
+   use without inhibiting further decls and without declaring an
+   actual variable.  */
+#define VA_OPEN(AP, VAR)	{ va_list AP; va_start(AP, VAR); { struct Qdmy
+#define VA_CLOSE(AP)		} va_end(AP); }
+#define VA_FIXEDARG(AP, T, N)	struct Qdmy
+ 
+#undef const
+#undef volatile
+#undef signed
+
+/* inline requires special treatment; it's in C99, and GCC >=2.7 supports
+   it too, but it's not in C89.  */
+#undef inline
+#if __STDC_VERSION__ > 199901L
+/* it's a keyword */
+#else
+# if GCC_VERSION >= 2007
+#  define inline __inline__   /* __inline__ prevents -pedantic warnings */
+# else
+#  define inline  /* nothing */
+# endif
+#endif
+
+/* These are obsolete.  Do not use.  */
+#ifndef IN_GCC
+#define CONST		const
+#define VOLATILE	volatile
+#define SIGNED		signed
+
+#define PROTO(type, name, arglist)	type name arglist
+#define EXFUN(name, proto)		name proto
+#define DEFUN(name, arglist, args)	name(args)
+#define DEFUN_VOID(name)		name(void)
+#define AND		,
+#define DOTS		, ...
+#define NOARGS		void
+#endif /* ! IN_GCC */
+
+#else	/* Not ANSI C.  */
+
+#undef  ANSI_PROTOTYPES
+#define PTR		char *
+#define PTRCONST	PTR
+#define LONG_DOUBLE	double
+
+#define PARAMS(args)		()
+#define VPARAMS(args)		(va_alist) va_dcl
+#define VA_START(va_list, var)	va_start(va_list)
+
+#define VA_OPEN(AP, VAR)		{ va_list AP; va_start(AP); { struct Qdmy
+#define VA_CLOSE(AP)			} va_end(AP); }
+#define VA_FIXEDARG(AP, TYPE, NAME)	TYPE NAME = va_arg(AP, TYPE)
+
+/* some systems define these in header files for non-ansi mode */
+#undef const
+#undef volatile
+#undef signed
+#undef inline
+#define const
+#define volatile
+#define signed
+#define inline
+
+#ifndef IN_GCC
+#define CONST
+#define VOLATILE
+#define SIGNED
+
+#define PROTO(type, name, arglist)	type name ()
+#define EXFUN(name, proto)		name()
+#define DEFUN(name, arglist, args)	name arglist args;
+#define DEFUN_VOID(name)		name()
+#define AND		;
+#define DOTS
+#define NOARGS
+#endif /* ! IN_GCC */
+
+#endif	/* ANSI C.  */
+
+/* Define macros for some gcc attributes.  This permits us to use the
+   macros freely, and know that they will come into play for the
+   version of gcc in which they are supported.  */
+
+#if (GCC_VERSION < 2007)
+# define __attribute__(x)
+#endif
+
+/* Attribute __malloc__ on functions was valid as of gcc 2.96. */
+#ifndef ATTRIBUTE_MALLOC
+# if (GCC_VERSION >= 2096)
+#  define ATTRIBUTE_MALLOC __attribute__ ((__malloc__))
+# else
+#  define ATTRIBUTE_MALLOC
+# endif /* GNUC >= 2.96 */
+#endif /* ATTRIBUTE_MALLOC */
+
+/* Attributes on labels were valid as of gcc 2.93. */
+#ifndef ATTRIBUTE_UNUSED_LABEL
+# if (GCC_VERSION >= 2093)
+#  define ATTRIBUTE_UNUSED_LABEL ATTRIBUTE_UNUSED
+# else
+#  define ATTRIBUTE_UNUSED_LABEL
+# endif /* GNUC >= 2.93 */
+#endif /* ATTRIBUTE_UNUSED_LABEL */
+
+#ifndef ATTRIBUTE_UNUSED
+#define ATTRIBUTE_UNUSED __attribute__ ((__unused__))
+#endif /* ATTRIBUTE_UNUSED */
+
+#ifndef ATTRIBUTE_NORETURN
+#define ATTRIBUTE_NORETURN __attribute__ ((__noreturn__))
+#endif /* ATTRIBUTE_NORETURN */
+
+#ifndef ATTRIBUTE_PRINTF
+#define ATTRIBUTE_PRINTF(m, n) __attribute__ ((__format__ (__printf__, m, n)))
+#define ATTRIBUTE_PRINTF_1 ATTRIBUTE_PRINTF(1, 2)
+#define ATTRIBUTE_PRINTF_2 ATTRIBUTE_PRINTF(2, 3)
+#define ATTRIBUTE_PRINTF_3 ATTRIBUTE_PRINTF(3, 4)
+#define ATTRIBUTE_PRINTF_4 ATTRIBUTE_PRINTF(4, 5)
+#define ATTRIBUTE_PRINTF_5 ATTRIBUTE_PRINTF(5, 6)
+#endif /* ATTRIBUTE_PRINTF */
+
+/* We use __extension__ in some places to suppress -pedantic warnings
+   about GCC extensions.  This feature didn't work properly before
+   gcc 2.8.  */
+#if GCC_VERSION < 2008
+#define __extension__
+#endif
+
+/* Bootstrap support:  Adjust certain macros defined by Autoconf,
+   which are only valid for the stage1 compiler.  If we detect
+   a modern version of GCC, we are probably in stage2 or beyond,
+   so unconditionally reset the values.  Note that const, inline,
+   etc. have been dealt with above.  */
+#if (GCC_VERSION >= 2007)
+# ifndef HAVE_LONG_DOUBLE
+#  define HAVE_LONG_DOUBLE 1
+# endif
+#endif /* GCC >= 2.7 */
+
+#endif	/* ansidecl.h	*/
diff --git a/coregrind/demangle/cp-demangle.c b/coregrind/demangle/cp-demangle.c
new file mode 100644
index 000000000..76c669a1a
--- /dev/null
+++ b/coregrind/demangle/cp-demangle.c
@@ -0,0 +1,4170 @@
+/* Demangler for IA64 / g++ V3 ABI.
+   Copyright (C) 2000, 2001 Free Software Foundation, Inc.
+   Written by Alex Samuel <samuel@codesourcery.com>. 
+
+   This file is part of GNU CC.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 
+*/
+
+/* This file implements demangling of C++ names mangled according to
+   the IA64 / g++ V3 ABI.  Use the cp_demangle function to
+   demangle a mangled name, or compile with the preprocessor macro
+   STANDALONE_DEMANGLER defined to create a demangling filter
+   executable (functionally similar to c++filt, but includes this
+   demangler only).  */
+
+#include <sys/types.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+
+#include "vg_include.h"
+#include "ansidecl.h"
+#include "dyn-string.h"
+#include "demangle.h"
+
+#ifndef STANDALONE
+#define malloc(s) VG_(malloc)(VG_AR_DEMANGLE, s)
+#define free(p) VG_(free)(VG_AR_DEMANGLE, p)
+#define realloc(p,s) VG_(realloc)(VG_AR_DEMANGLE, p, s)
+#endif
+
+/* If CP_DEMANGLE_DEBUG is defined, a trace of the grammar evaluation,
+   and other debugging output, will be generated. */
+#ifdef CP_DEMANGLE_DEBUG
+#define DEMANGLE_TRACE(PRODUCTION, DM)                                  \
+  fprintf (stderr, " -> %-24s at position %3d\n",                       \
+           (PRODUCTION), current_position (DM));
+#else
+#define DEMANGLE_TRACE(PRODUCTION, DM)
+#endif
+
+/* Don't include <ctype.h>, to prevent additional unresolved symbols
+   from being dragged into the C++ runtime library.  */
+#define IS_DIGIT(CHAR) ((CHAR) >= '0' && (CHAR) <= '9')
+#define IS_ALPHA(CHAR)                                                  \
+  (((CHAR) >= 'a' && (CHAR) <= 'z')                                     \
+   || ((CHAR) >= 'A' && (CHAR) <= 'Z'))
+
+/* The prefix prepended by GCC to an identifier represnting the
+   anonymous namespace.  */
+#define ANONYMOUS_NAMESPACE_PREFIX "_GLOBAL_"
+
+/* Character(s) to use for namespace separation in demangled output */
+#define NAMESPACE_SEPARATOR (dm->style == DMGL_JAVA ? "." : "::")
+
+/* If flag_verbose is zero, some simplifications will be made to the
+   output to make it easier to read and supress details that are
+   generally not of interest to the average C++ programmer.
+   Otherwise, the demangled representation will attempt to convey as
+   much information as the mangled form.  */
+static int flag_verbose;
+
+/* If flag_strict is non-zero, demangle strictly according to the
+   specification -- don't demangle special g++ manglings.  */
+static int flag_strict;
+
+/* String_list_t is an extended form of dyn_string_t which provides a
+   link field and a caret position for additions to the string.  A
+   string_list_t may safely be cast to and used as a dyn_string_t.  */
+
+struct string_list_def
+{
+  /* The dyn_string; must be first.  */
+  struct dyn_string string;
+
+  /* The position at which additional text is added to this string
+     (using the result_add* macros).  This value is an offset from the
+     end of the string, not the beginning (and should be
+     non-positive).  */
+  int caret_position;
+
+  /* The next string in the list.  */
+  struct string_list_def *next;
+};
+
+typedef struct string_list_def *string_list_t;
+
+/* Data structure representing a potential substitution.  */
+
+struct substitution_def
+{
+  /* The demangled text of the substitution.  */
+  dyn_string_t text;
+
+  /* Whether this substitution represents a template item.  */
+  int template_p : 1;
+};
+
+/* Data structure representing a template argument list.  */
+
+struct template_arg_list_def
+{
+  /* The next (lower) template argument list in the stack of currently
+     active template arguments.  */
+  struct template_arg_list_def *next;
+
+  /* The first element in the list of template arguments in
+     left-to-right order.  */
+  string_list_t first_argument;
+
+  /* The last element in the arguments lists.  */
+  string_list_t last_argument;
+};
+
+typedef struct template_arg_list_def *template_arg_list_t;
+
+/* Data structure to maintain the state of the current demangling.  */
+
+struct demangling_def
+{
+  /* The full mangled name being mangled.  */
+  const char *name;
+
+  /* Pointer into name at the current position.  */
+  const char *next;
+
+  /* Stack for strings containing demangled result generated so far.
+     Text is emitted to the topmost (first) string.  */
+  string_list_t result;
+
+  /* The number of presently available substitutions.  */
+  int num_substitutions;
+
+  /* The allocated size of the substitutions array.  */
+  int substitutions_allocated;
+
+  /* An array of available substitutions.  The number of elements in
+     the array is given by num_substitions, and the allocated array
+     size in substitutions_size.  
+
+     The most recent substition is at the end, so
+
+       - `S_'  corresponds to substititutions[num_substitutions - 1] 
+       - `S0_' corresponds to substititutions[num_substitutions - 2]
+
+     etc. */
+  struct substitution_def *substitutions;
+
+  /* The stack of template argument lists.  */
+  template_arg_list_t template_arg_lists;
+
+  /* The most recently demangled source-name.  */
+  dyn_string_t last_source_name;
+  
+  /* Language style to use for demangled output. */
+  int style;
+
+  /* Set to non-zero iff this name is a constructor.  The actual value
+     indicates what sort of constructor this is; see demangle.h.  */
+  enum gnu_v3_ctor_kinds is_constructor;
+
+  /* Set to non-zero iff this name is a destructor.  The actual value
+     indicates what sort of destructor this is; see demangle.h.  */
+  enum gnu_v3_dtor_kinds is_destructor;
+
+};
+
+typedef struct demangling_def *demangling_t;
+
+/* This type is the standard return code from most functions.  Values
+   other than STATUS_OK contain descriptive messages.  */
+typedef const char *status_t;
+
+/* Special values that can be used as a status_t.  */
+#define STATUS_OK                       NULL
+#define STATUS_ERROR                    "Error."
+#define STATUS_UNIMPLEMENTED            "Unimplemented."
+#define STATUS_INTERNAL_ERROR           "Internal error."
+
+/* This status code indicates a failure in malloc or realloc.  */
+static const char *const status_allocation_failed = "Allocation failed.";
+#define STATUS_ALLOCATION_FAILED        status_allocation_failed
+
+/* Non-zero if STATUS indicates that no error has occurred.  */
+#define STATUS_NO_ERROR(STATUS)         ((STATUS) == STATUS_OK)
+
+/* Evaluate EXPR, which must produce a status_t.  If the status code
+   indicates an error, return from the current function with that
+   status code.  */
+#define RETURN_IF_ERROR(EXPR)                                           \
+  do                                                                    \
+    {                                                                   \
+      status_t s = EXPR;                                                \
+      if (!STATUS_NO_ERROR (s))                                         \
+	return s;                                                       \
+    }                                                                   \
+  while (0)
+
+static status_t int_to_dyn_string 
+  PARAMS ((int, dyn_string_t));
+static string_list_t string_list_new
+  PARAMS ((int));
+static void string_list_delete
+  PARAMS ((string_list_t));
+static status_t result_add_separated_char
+  PARAMS ((demangling_t, int));
+static status_t result_push
+  PARAMS ((demangling_t));
+static string_list_t result_pop
+  PARAMS ((demangling_t));
+static int substitution_start
+  PARAMS ((demangling_t));
+static status_t substitution_add
+  PARAMS ((demangling_t, int, int));
+static dyn_string_t substitution_get
+  PARAMS ((demangling_t, int, int *));
+#ifdef CP_DEMANGLE_DEBUG
+static void substitutions_print 
+  PARAMS ((demangling_t, FILE *));
+#endif
+static template_arg_list_t template_arg_list_new
+  PARAMS ((void));
+static void template_arg_list_delete
+  PARAMS ((template_arg_list_t));
+static void template_arg_list_add_arg 
+  PARAMS ((template_arg_list_t, string_list_t));
+static string_list_t template_arg_list_get_arg
+  PARAMS ((template_arg_list_t, int));
+static void push_template_arg_list
+  PARAMS ((demangling_t, template_arg_list_t));
+static void pop_to_template_arg_list
+  PARAMS ((demangling_t, template_arg_list_t));
+#ifdef CP_DEMANGLE_DEBUG
+static void template_arg_list_print
+  PARAMS ((template_arg_list_t, FILE *));
+#endif
+static template_arg_list_t current_template_arg_list
+  PARAMS ((demangling_t));
+static demangling_t demangling_new
+  PARAMS ((const char *, int));
+static void demangling_delete 
+  PARAMS ((demangling_t));
+
+/* The last character of DS.  Warning: DS is evaluated twice.  */
+#define dyn_string_last_char(DS)                                        \
+  (dyn_string_buf (DS)[dyn_string_length (DS) - 1])
+
+/* Append a space character (` ') to DS if it does not already end
+   with one.  Evaluates to 1 on success, or 0 on allocation failure.  */
+#define dyn_string_append_space(DS)                                     \
+      ((dyn_string_length (DS) > 0                                      \
+        && dyn_string_last_char (DS) != ' ')                            \
+       ? dyn_string_append_char ((DS), ' ')                             \
+       : 1)
+
+/* Returns the index of the current position in the mangled name.  */
+#define current_position(DM)    ((DM)->next - (DM)->name)
+
+/* Returns the character at the current position of the mangled name.  */
+#define peek_char(DM)           (*((DM)->next))
+
+/* Returns the character one past the current position of the mangled
+   name.  */
+#define peek_char_next(DM)                                              \
+  (peek_char (DM) == '\0' ? '\0' : (*((DM)->next + 1)))
+
+/* Returns the character at the current position, and advances the
+   current position to the next character.  */
+#define next_char(DM)           (*((DM)->next)++)
+
+/* Returns non-zero if the current position is the end of the mangled
+   name, i.e. one past the last character.  */
+#define end_of_name_p(DM)       (peek_char (DM) == '\0')
+
+/* Advances the current position by one character.  */
+#define advance_char(DM)        (++(DM)->next)
+
+/* Returns the string containing the current demangled result.  */
+#define result_string(DM)       (&(DM)->result->string)
+
+/* Returns the position at which new text is inserted into the
+   demangled result.  */
+#define result_caret_pos(DM)                                            \
+  (result_length (DM) +                                                 \
+   ((string_list_t) result_string (DM))->caret_position)
+
+/* Adds a dyn_string_t to the demangled result.  */
+#define result_add_string(DM, STRING)                                   \
+  (dyn_string_insert (&(DM)->result->string,                            \
+		      result_caret_pos (DM), (STRING))                  \
+   ? STATUS_OK : STATUS_ALLOCATION_FAILED)
+
+/* Adds NUL-terminated string CSTR to the demangled result.    */
+#define result_add(DM, CSTR)                                            \
+  (dyn_string_insert_cstr (&(DM)->result->string,                       \
+			   result_caret_pos (DM), (CSTR))               \
+   ? STATUS_OK : STATUS_ALLOCATION_FAILED)
+
+/* Adds character CHAR to the demangled result.  */
+#define result_add_char(DM, CHAR)                                       \
+  (dyn_string_insert_char (&(DM)->result->string,                       \
+			   result_caret_pos (DM), (CHAR))               \
+   ? STATUS_OK : STATUS_ALLOCATION_FAILED)
+
+/* Inserts a dyn_string_t to the demangled result at position POS.  */
+#define result_insert_string(DM, POS, STRING)                           \
+  (dyn_string_insert (&(DM)->result->string, (POS), (STRING))           \
+   ? STATUS_OK : STATUS_ALLOCATION_FAILED)
+
+/* Inserts NUL-terminated string CSTR to the demangled result at
+   position POS.  */
+#define result_insert(DM, POS, CSTR)                                    \
+  (dyn_string_insert_cstr (&(DM)->result->string, (POS), (CSTR))        \
+   ? STATUS_OK : STATUS_ALLOCATION_FAILED)
+
+/* Inserts character CHAR to the demangled result at position POS.  */
+#define result_insert_char(DM, POS, CHAR)                               \
+  (dyn_string_insert_char (&(DM)->result->string, (POS), (CHAR))        \
+   ? STATUS_OK : STATUS_ALLOCATION_FAILED)
+
+/* The length of the current demangled result.  */
+#define result_length(DM)                                               \
+  dyn_string_length (&(DM)->result->string)
+
+/* Appends a (less-than, greater-than) character to the result in DM
+   to (open, close) a template argument or parameter list.  Appends a
+   space first if necessary to prevent spurious elision of angle
+   brackets with the previous character.  */
+#define result_open_template_list(DM) result_add_separated_char(DM, '<')
+#define result_close_template_list(DM) result_add_separated_char(DM, '>')
+
+/* Appends a base 10 representation of VALUE to DS.  STATUS_OK on
+   success.  On failure, deletes DS and returns an error code.  */
+
+static status_t
+int_to_dyn_string (value, ds)
+     int value;
+     dyn_string_t ds;
+{
+  int i;
+  int mask = 1;
+
+  /* Handle zero up front.  */
+  if (value == 0)
+    {
+      if (!dyn_string_append_char (ds, '0'))
+	return STATUS_ALLOCATION_FAILED;
+      return STATUS_OK;
+    }
+
+  /* For negative numbers, emit a minus sign.  */
+  if (value < 0)
+    {
+      if (!dyn_string_append_char (ds, '-'))
+	return STATUS_ALLOCATION_FAILED;
+      value = -value;
+    }
+  
+  /* Find the power of 10 of the first digit.  */
+  i = value;
+  while (i > 9)
+    {
+      mask *= 10;
+      i /= 10;
+    }
+
+  /* Write the digits.  */
+  while (mask > 0)
+    {
+      int digit = value / mask;
+
+      if (!dyn_string_append_char (ds, '0' + digit))
+	return STATUS_ALLOCATION_FAILED;
+
+      value -= digit * mask;
+      mask /= 10;
+    }
+
+  return STATUS_OK;
+}
+
+/* Creates a new string list node.  The contents of the string are
+   empty, but the initial buffer allocation is LENGTH.  The string
+   list node should be deleted with string_list_delete.  Returns NULL
+   if allocation fails.  */
+
+static string_list_t 
+string_list_new (length)
+     int length;
+{
+  string_list_t s = (string_list_t) malloc (sizeof (struct string_list_def));
+  s->caret_position = 0;
+  if (s == NULL)
+    return NULL;
+  if (!dyn_string_init ((dyn_string_t) s, length))
+    return NULL;
+  return s;
+}  
+
+/* Deletes the entire string list starting at NODE.  */
+
+static void
+string_list_delete (node)
+     string_list_t node;
+{
+  while (node != NULL)
+    {
+      string_list_t next = node->next;
+      dyn_string_delete ((dyn_string_t) node);
+      node = next;
+    }
+}
+
+/* Appends CHARACTER to the demangled result.  If the current trailing
+   character of the result is CHARACTER, a space is inserted first.  */
+
+static status_t
+result_add_separated_char (dm, character)
+     demangling_t dm;
+     int character;
+{
+  char *result = dyn_string_buf (result_string (dm));
+  int caret_pos = result_caret_pos (dm);
+
+  /* Add a space if the last character is already the character we
+     want to add.  */
+  if (caret_pos > 0 && result[caret_pos - 1] == character)
+    RETURN_IF_ERROR (result_add_char (dm, ' '));
+  /* Add the character.  */
+  RETURN_IF_ERROR (result_add_char (dm, character));
+
+  return STATUS_OK;
+}
+
+/* Allocates and pushes a new string onto the demangled results stack
+   for DM.  Subsequent demangling with DM will emit to the new string.
+   Returns STATUS_OK on success, STATUS_ALLOCATION_FAILED on
+   allocation failure.  */
+
+static status_t
+result_push (dm)
+     demangling_t dm;
+{
+  string_list_t new_string = string_list_new (0);
+  if (new_string == NULL)
+    /* Allocation failed.  */
+    return STATUS_ALLOCATION_FAILED;
+
+  /* Link the new string to the front of the list of result strings.  */
+  new_string->next = (string_list_t) dm->result;
+  dm->result = new_string;
+  return STATUS_OK;
+}
+
+/* Removes and returns the topmost element on the demangled results
+   stack for DM.  The caller assumes ownership for the returned
+   string.  */
+
+static string_list_t
+result_pop (dm)
+     demangling_t dm;
+{
+  string_list_t top = dm->result;
+  dm->result = top->next;
+  return top;
+}
+
+/* Returns the current value of the caret for the result string.  The
+   value is an offet from the end of the result string.  */
+
+static int
+result_get_caret (dm)
+     demangling_t dm;
+{
+  return ((string_list_t) result_string (dm))->caret_position;
+}
+
+/* Sets the value of the caret for the result string, counted as an
+   offet from the end of the result string.  */
+
+static void
+result_set_caret (dm, position)
+     demangling_t dm;
+     int position;
+{
+  ((string_list_t) result_string (dm))->caret_position = position;
+}
+
+/* Shifts the position of the next addition to the result by
+   POSITION_OFFSET.  A negative value shifts the caret to the left.  */
+
+static void
+result_shift_caret (dm, position_offset)
+     demangling_t dm;
+     int position_offset;
+{
+  ((string_list_t) result_string (dm))->caret_position += position_offset;
+}
+
+/* Returns non-zero if the character that comes right before the place
+   where text will be added to the result is a space.  In this case,
+   the caller should supress adding another space.  */
+
+static int
+result_previous_char_is_space (dm)
+     demangling_t dm;
+{
+  char *result = dyn_string_buf (result_string (dm));
+  int pos = result_caret_pos (dm);
+  return pos > 0 && result[pos - 1] == ' ';
+}
+
+/* Returns the start position of a fragment of the demangled result
+   that will be a substitution candidate.  Should be called at the
+   start of productions that can add substitutions.  */
+
+static int
+substitution_start (dm)
+     demangling_t dm;
+{
+  return result_caret_pos (dm);
+}
+
+/* Adds the suffix of the current demangled result of DM starting at
+   START_POSITION as a potential substitution.  If TEMPLATE_P is
+   non-zero, this potential substitution is a template-id.  */
+
+static status_t
+substitution_add (dm, start_position, template_p)
+     demangling_t dm;
+     int start_position;
+     int template_p;
+{
+  dyn_string_t result = result_string (dm);
+  dyn_string_t substitution = dyn_string_new (0);
+  int i;
+
+  if (substitution == NULL)
+    return STATUS_ALLOCATION_FAILED;
+
+  /* Extract the substring of the current demangling result that
+     represents the subsitution candidate.  */
+  if (!dyn_string_substring (substitution, 
+			     result, start_position, result_caret_pos (dm)))
+    {
+      dyn_string_delete (substitution);
+      return STATUS_ALLOCATION_FAILED;
+    }
+
+  /* If there's no room for the new entry, grow the array.  */
+  if (dm->substitutions_allocated == dm->num_substitutions)
+    {
+      size_t new_array_size;
+      if (dm->substitutions_allocated > 0)
+	dm->substitutions_allocated *= 2;
+      else
+	dm->substitutions_allocated = 2;
+      new_array_size = 
+	sizeof (struct substitution_def) * dm->substitutions_allocated;
+
+      dm->substitutions = (struct substitution_def *)
+	realloc (dm->substitutions, new_array_size);
+      if (dm->substitutions == NULL)
+	/* Realloc failed.  */
+	{
+	  dyn_string_delete (substitution);
+	  return STATUS_ALLOCATION_FAILED;
+	}
+    }
+
+  /* Add the substitution to the array.  */
+  i = dm->num_substitutions++;
+  dm->substitutions[i].text = substitution;
+  dm->substitutions[i].template_p = template_p;
+
+#ifdef CP_DEMANGLE_DEBUG
+  substitutions_print (dm, stderr);
+#endif
+
+  return STATUS_OK;
+}
+
+/* Returns the Nth-most-recent substitution.  Sets *TEMPLATE_P to
+   non-zero if the substitution is a template-id, zero otherwise.  
+   N is numbered from zero.  DM retains ownership of the returned
+   string.  If N is negative, or equal to or greater than the current
+   number of substitution candidates, returns NULL.  */
+
+static dyn_string_t
+substitution_get (dm, n, template_p)
+     demangling_t dm;
+     int n;
+     int *template_p;
+{
+  struct substitution_def *sub;
+
+  /* Make sure N is in the valid range.  */
+  if (n < 0 || n >= dm->num_substitutions)
+    return NULL;
+
+  sub = &(dm->substitutions[n]);
+  *template_p = sub->template_p;
+  return sub->text;
+}
+
+#ifdef CP_DEMANGLE_DEBUG
+/* Debugging routine to print the current substitutions to FP.  */
+
+static void
+substitutions_print (dm, fp)
+     demangling_t dm;
+     FILE *fp;
+{
+  int seq_id;
+  int num = dm->num_substitutions;
+
+  fprintf (fp, "SUBSTITUTIONS:\n");
+  for (seq_id = -1; seq_id < num - 1; ++seq_id)
+    {
+      int template_p;
+      dyn_string_t text = substitution_get (dm, seq_id + 1, &template_p);
+
+      if (seq_id == -1)
+	fprintf (fp, " S_ ");
+      else
+	fprintf (fp, " S%d_", seq_id);
+      fprintf (fp, " %c: %s\n", template_p ? '*' : ' ', dyn_string_buf (text));
+    }
+}
+
+#endif /* CP_DEMANGLE_DEBUG */
+
+/* Creates a new template argument list.  Returns NULL if allocation
+   fails.  */
+
+static template_arg_list_t
+template_arg_list_new ()
+{
+  template_arg_list_t new_list =
+    (template_arg_list_t) malloc (sizeof (struct template_arg_list_def));
+  if (new_list == NULL)
+    return NULL;
+  /* Initialize the new list to have no arguments.  */
+  new_list->first_argument = NULL;
+  new_list->last_argument = NULL;
+  /* Return the new list.  */
+  return new_list;
+}
+
+/* Deletes a template argument list and the template arguments it
+   contains.  */
+
+static void
+template_arg_list_delete (list)
+     template_arg_list_t list;
+{
+  /* If there are any arguments on LIST, delete them.  */
+  if (list->first_argument != NULL)
+    string_list_delete (list->first_argument);
+  /* Delete LIST.  */
+  free (list);
+}
+
+/* Adds ARG to the template argument list ARG_LIST.  */
+
+static void 
+template_arg_list_add_arg (arg_list, arg)
+     template_arg_list_t arg_list;
+     string_list_t arg;
+{
+  if (arg_list->first_argument == NULL)
+    /* If there were no arguments before, ARG is the first one.  */
+    arg_list->first_argument = arg;
+  else
+    /* Make ARG the last argument on the list.  */
+    arg_list->last_argument->next = arg;
+  /* Make ARG the last on the list.  */
+  arg_list->last_argument = arg;
+  arg->next = NULL;
+}
+
+/* Returns the template arugment at position INDEX in template
+   argument list ARG_LIST.  */
+
+static string_list_t
+template_arg_list_get_arg (arg_list, index)
+     template_arg_list_t arg_list;
+     int index;
+{
+  string_list_t arg = arg_list->first_argument;
+  /* Scan down the list of arguments to find the one at position
+     INDEX.  */
+  while (index--)
+    {
+      arg = arg->next;
+      if (arg == NULL)
+	/* Ran out of arguments before INDEX hit zero.  That's an
+	   error.  */
+	return NULL;
+    }
+  /* Return the argument at position INDEX.  */
+  return arg;
+}
+
+/* Pushes ARG_LIST onto the top of the template argument list stack.  */
+
+static void
+push_template_arg_list (dm, arg_list)
+     demangling_t dm;
+     template_arg_list_t arg_list;
+{
+  arg_list->next = dm->template_arg_lists;
+  dm->template_arg_lists = arg_list;
+#ifdef CP_DEMANGLE_DEBUG
+  fprintf (stderr, " ** pushing template arg list\n");
+  template_arg_list_print (arg_list, stderr);
+#endif 
+}
+
+/* Pops and deletes elements on the template argument list stack until
+   arg_list is the topmost element.  If arg_list is NULL, all elements
+   are popped and deleted.  */
+
+static void
+pop_to_template_arg_list (dm, arg_list)
+     demangling_t dm;
+     template_arg_list_t arg_list;
+{
+  while (dm->template_arg_lists != arg_list)
+    {
+      template_arg_list_t top = dm->template_arg_lists;
+      /* Disconnect the topmost element from the list.  */
+      dm->template_arg_lists = top->next;
+      /* Delete the popped element.  */
+      template_arg_list_delete (top);
+#ifdef CP_DEMANGLE_DEBUG
+      fprintf (stderr, " ** removing template arg list\n");
+#endif
+    }
+}
+
+#ifdef CP_DEMANGLE_DEBUG
+
+/* Prints the contents of ARG_LIST to FP.  */
+
+static void
+template_arg_list_print (arg_list, fp)
+  template_arg_list_t arg_list;
+  FILE *fp;
+{
+  string_list_t arg;
+  int index = -1;
+
+  fprintf (fp, "TEMPLATE ARGUMENT LIST:\n");
+  for (arg = arg_list->first_argument; arg != NULL; arg = arg->next)
+    {
+      if (index == -1)
+	fprintf (fp, " T_  : ");
+      else
+	fprintf (fp, " T%d_ : ", index);
+      ++index;
+      fprintf (fp, "%s\n", dyn_string_buf ((dyn_string_t) arg));
+    }
+}
+
+#endif /* CP_DEMANGLE_DEBUG */
+
+/* Returns the topmost element on the stack of template argument
+   lists.  If there is no list of template arguments, returns NULL.  */
+
+static template_arg_list_t
+current_template_arg_list (dm)
+     demangling_t dm;
+{
+  return dm->template_arg_lists;
+}
+
+/* Allocates a demangling_t object for demangling mangled NAME.  A new
+   result must be pushed before the returned object can be used.
+   Returns NULL if allocation fails.  */
+
+static demangling_t
+demangling_new (name, style)
+     const char *name;
+     int style;
+{
+  demangling_t dm;
+  dm = (demangling_t) malloc (sizeof (struct demangling_def));
+  if (dm == NULL)
+    return NULL;
+
+  dm->name = name;
+  dm->next = name;
+  dm->result = NULL;
+  dm->num_substitutions = 0;
+  dm->substitutions_allocated = 10;
+  dm->template_arg_lists = NULL;
+  dm->last_source_name = dyn_string_new (0);
+  if (dm->last_source_name == NULL)
+    return NULL;
+  dm->substitutions = (struct substitution_def *)
+    malloc (dm->substitutions_allocated * sizeof (struct substitution_def));
+  if (dm->substitutions == NULL)
+    {
+      dyn_string_delete (dm->last_source_name);
+      return NULL;
+    }
+  dm->style = style;
+  dm->is_constructor = 0;
+  dm->is_destructor = 0;
+
+  return dm;
+}
+
+/* Deallocates a demangling_t object and all memory associated with
+   it.  */
+
+static void
+demangling_delete (dm)
+     demangling_t dm;
+{
+  int i;
+  template_arg_list_t arg_list = dm->template_arg_lists;
+
+  /* Delete the stack of template argument lists.  */
+  while (arg_list != NULL)
+    {
+      template_arg_list_t next = arg_list->next;
+      template_arg_list_delete (arg_list);
+      arg_list = next;
+    }
+  /* Delete the list of substitutions.  */
+  for (i = dm->num_substitutions; --i >= 0; )
+    dyn_string_delete (dm->substitutions[i].text);
+  free (dm->substitutions);
+  /* Delete the demangled result.  */
+  string_list_delete (dm->result);
+  /* Delete the stored identifier name.  */
+  dyn_string_delete (dm->last_source_name);
+  /* Delete the context object itself.  */
+  free (dm);
+}
+
+/* These functions demangle an alternative of the corresponding
+   production in the mangling spec.  The first argument of each is a
+   demangling context structure for the current demangling
+   operation.  Most emit demangled text directly to the topmost result
+   string on the result string stack in the demangling context
+   structure.  */
+
+static status_t demangle_char
+  PARAMS ((demangling_t, int));
+static status_t demangle_mangled_name 
+  PARAMS ((demangling_t));
+static status_t demangle_encoding
+  PARAMS ((demangling_t));
+static status_t demangle_name
+  PARAMS ((demangling_t, int *));
+static status_t demangle_nested_name
+  PARAMS ((demangling_t, int *));
+static status_t demangle_prefix_v3
+  PARAMS ((demangling_t, int *));
+static status_t demangle_unqualified_name
+  PARAMS ((demangling_t, int *));
+static status_t demangle_source_name
+  PARAMS ((demangling_t));
+static status_t demangle_number
+  PARAMS ((demangling_t, int *, int, int));
+static status_t demangle_number_literally
+  PARAMS ((demangling_t, dyn_string_t, int, int));
+static status_t demangle_identifier
+  PARAMS ((demangling_t, int, dyn_string_t));
+static status_t demangle_operator_name
+  PARAMS ((demangling_t, int, int *));
+static status_t demangle_nv_offset
+  PARAMS ((demangling_t));
+static status_t demangle_v_offset
+  PARAMS ((demangling_t));
+static status_t demangle_call_offset
+  PARAMS ((demangling_t));
+static status_t demangle_special_name
+  PARAMS ((demangling_t));
+static status_t demangle_ctor_dtor_name
+  PARAMS ((demangling_t));
+static status_t demangle_type_ptr
+  PARAMS ((demangling_t, int *, int));
+static status_t demangle_type
+  PARAMS ((demangling_t));
+static status_t demangle_CV_qualifiers
+  PARAMS ((demangling_t, dyn_string_t));
+static status_t demangle_builtin_type
+  PARAMS ((demangling_t));
+static status_t demangle_function_type
+  PARAMS ((demangling_t, int *));
+static status_t demangle_bare_function_type
+  PARAMS ((demangling_t, int *));
+static status_t demangle_class_enum_type
+  PARAMS ((demangling_t, int *));
+static status_t demangle_array_type
+  PARAMS ((demangling_t, int *));
+static status_t demangle_template_param
+  PARAMS ((demangling_t));
+static status_t demangle_template_args_1
+  PARAMS ((demangling_t, template_arg_list_t));
+static status_t demangle_template_args
+  PARAMS ((demangling_t));
+static status_t demangle_literal
+  PARAMS ((demangling_t));
+static status_t demangle_template_arg
+  PARAMS ((demangling_t));
+static status_t demangle_expression_v3
+  PARAMS ((demangling_t));
+static status_t demangle_scope_expression
+  PARAMS ((demangling_t));
+static status_t demangle_expr_primary
+  PARAMS ((demangling_t));
+static status_t demangle_substitution
+  PARAMS ((demangling_t, int *));
+static status_t demangle_local_name
+  PARAMS ((demangling_t));
+static status_t demangle_discriminator 
+  PARAMS ((demangling_t, int));
+static status_t cp_demangle
+  PARAMS ((const char *, dyn_string_t, int));
+#ifdef IN_LIBGCC2
+static status_t cp_demangle_type
+  PARAMS ((const char*, dyn_string_t));
+#endif
+
+/* When passed to demangle_bare_function_type, indicates that the
+   function's return type is not encoded before its parameter types.  */
+#define BFT_NO_RETURN_TYPE    NULL
+
+/* Check that the next character is C.  If so, consume it.  If not,
+   return an error.  */
+
+static status_t
+demangle_char (dm, c)
+     demangling_t dm;
+     int c;
+{
+  static char *error_message = NULL;
+
+  if (peek_char (dm) == c)
+    {
+      advance_char (dm);
+      return STATUS_OK;
+    }
+  else
+    {
+	vg_assert (0);
+	/*
+      if (error_message == NULL)
+	error_message = strdup ("Expected ?");
+      error_message[9] = c;
+      return error_message;
+      */
+    }
+}
+
+/* Demangles and emits a <mangled-name>.  
+
+    <mangled-name>      ::= _Z <encoding>  */
+
+static status_t
+demangle_mangled_name (dm)
+     demangling_t dm;
+{
+  DEMANGLE_TRACE ("mangled-name", dm);
+  RETURN_IF_ERROR (demangle_char (dm, '_'));
+  RETURN_IF_ERROR (demangle_char (dm, 'Z'));
+  RETURN_IF_ERROR (demangle_encoding (dm));
+  return STATUS_OK;
+}
+
+/* Demangles and emits an <encoding>.  
+
+    <encoding>		::= <function name> <bare-function-type>
+			::= <data name>
+			::= <special-name>  */
+
+static status_t
+demangle_encoding (dm)
+     demangling_t dm;
+{
+  int encode_return_type;
+  int start_position;
+  template_arg_list_t old_arg_list = current_template_arg_list (dm);
+  char peek = peek_char (dm);
+
+  DEMANGLE_TRACE ("encoding", dm);
+  
+  /* Remember where the name starts.  If it turns out to be a template
+     function, we'll have to insert the return type here.  */
+  start_position = result_caret_pos (dm);
+
+  if (peek == 'G' || peek == 'T')
+    RETURN_IF_ERROR (demangle_special_name (dm));
+  else
+    {
+      /* Now demangle the name.  */
+      RETURN_IF_ERROR (demangle_name (dm, &encode_return_type));
+
+      /* If there's anything left, the name was a function name, with
+	 maybe its return type, and its parameter types, following.  */
+      if (!end_of_name_p (dm) 
+	  && peek_char (dm) != 'E')
+	{
+	  if (encode_return_type)
+	    /* Template functions have their return type encoded.  The
+	       return type should be inserted at start_position.  */
+	    RETURN_IF_ERROR 
+	      (demangle_bare_function_type (dm, &start_position));
+	  else
+	    /* Non-template functions don't have their return type
+	       encoded.  */
+	    RETURN_IF_ERROR 
+	      (demangle_bare_function_type (dm, BFT_NO_RETURN_TYPE)); 
+	}
+    }
+
+  /* Pop off template argument lists that were built during the
+     mangling of this name, to restore the old template context.  */
+  pop_to_template_arg_list (dm, old_arg_list);
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <name>.
+
+    <name>              ::= <unscoped-name>
+                        ::= <unscoped-template-name> <template-args>
+			::= <nested-name>
+                        ::= <local-name>
+
+    <unscoped-name>     ::= <unqualified-name>
+			::= St <unqualified-name>   # ::std::
+
+    <unscoped-template-name>    
+                        ::= <unscoped-name>
+                        ::= <substitution>  */
+
+static status_t
+demangle_name (dm, encode_return_type)
+     demangling_t dm;
+     int *encode_return_type;
+{
+  int start = substitution_start (dm);
+  char peek = peek_char (dm);
+  int is_std_substitution = 0;
+
+  /* Generally, the return type is encoded if the function is a
+     template-id, and suppressed otherwise.  There are a few cases,
+     though, in which the return type is not encoded even for a
+     templated function.  In these cases, this flag is set.  */
+  int suppress_return_type = 0;
+
+  DEMANGLE_TRACE ("name", dm);
+
+  switch (peek)
+    {
+    case 'N':
+      /* This is a <nested-name>.  */
+      RETURN_IF_ERROR (demangle_nested_name (dm, encode_return_type));
+      break;
+
+    case 'Z':
+      RETURN_IF_ERROR (demangle_local_name (dm));
+      *encode_return_type = 0;
+      break;
+
+    case 'S':
+      /* The `St' substitution allows a name nested in std:: to appear
+	 without being enclosed in a nested name.  */
+      if (peek_char_next (dm) == 't') 
+	{
+	  (void) next_char (dm);
+	  (void) next_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "std::"));
+	  RETURN_IF_ERROR 
+	    (demangle_unqualified_name (dm, &suppress_return_type));
+	  is_std_substitution = 1;
+	}
+      else
+	RETURN_IF_ERROR (demangle_substitution (dm, encode_return_type));
+      /* Check if a template argument list immediately follows.
+	 If so, then we just demangled an <unqualified-template-name>.  */
+      if (peek_char (dm) == 'I') 
+	{
+	  /* A template name of the form std::<unqualified-name> is a
+             substitution candidate.  */
+	  if (is_std_substitution)
+	    RETURN_IF_ERROR (substitution_add (dm, start, 0));
+	  /* Demangle the <template-args> here.  */
+	  RETURN_IF_ERROR (demangle_template_args (dm));
+	  *encode_return_type = !suppress_return_type;
+	}
+      else
+	*encode_return_type = 0;
+
+      break;
+
+    default:
+      /* This is an <unscoped-name> or <unscoped-template-name>.  */
+      RETURN_IF_ERROR (demangle_unqualified_name (dm, &suppress_return_type));
+
+      /* If the <unqualified-name> is followed by template args, this
+	 is an <unscoped-template-name>.  */
+      if (peek_char (dm) == 'I')
+	{
+	  /* Add a substitution for the unqualified template name.  */
+	  RETURN_IF_ERROR (substitution_add (dm, start, 0));
+
+	  RETURN_IF_ERROR (demangle_template_args (dm));
+	  *encode_return_type = !suppress_return_type;
+	}
+      else
+	*encode_return_type = 0;
+
+      break;
+    }
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <nested-name>. 
+
+    <nested-name>     ::= N [<CV-qualifiers>] <prefix> <unqulified-name> E  */
+
+static status_t
+demangle_nested_name (dm, encode_return_type)
+     demangling_t dm;
+     int *encode_return_type;
+{
+  char peek;
+
+  DEMANGLE_TRACE ("nested-name", dm);
+
+  RETURN_IF_ERROR (demangle_char (dm, 'N'));
+
+  peek = peek_char (dm);
+  if (peek == 'r' || peek == 'V' || peek == 'K')
+    {
+      dyn_string_t cv_qualifiers;
+      status_t status;
+
+      /* Snarf up CV qualifiers.  */
+      cv_qualifiers = dyn_string_new (24);
+      if (cv_qualifiers == NULL)
+	return STATUS_ALLOCATION_FAILED;
+      demangle_CV_qualifiers (dm, cv_qualifiers);
+
+      /* Emit them, preceded by a space.  */
+      status = result_add_char (dm, ' ');
+      if (STATUS_NO_ERROR (status)) 
+	status = result_add_string (dm, cv_qualifiers);
+      /* The CV qualifiers that occur in a <nested-name> will be
+	 qualifiers for member functions.  These are placed at the end
+	 of the function.  Therefore, shift the caret to the left by
+	 the length of the qualifiers, so other text is inserted
+	 before them and they stay at the end.  */
+      result_shift_caret (dm, -dyn_string_length (cv_qualifiers) - 1);
+      /* Clean up.  */
+      dyn_string_delete (cv_qualifiers);
+      RETURN_IF_ERROR (status);
+    }
+
+  RETURN_IF_ERROR (demangle_prefix_v3 (dm, encode_return_type));
+  /* No need to demangle the final <unqualified-name>; demangle_prefix
+     will handle it.  */
+  RETURN_IF_ERROR (demangle_char (dm, 'E'));
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <prefix>.
+
+    <prefix>            ::= <prefix> <unqualified-name>
+                        ::= <template-prefix> <template-args>
+			::= # empty
+			::= <substitution>
+
+    <template-prefix>   ::= <prefix>
+                        ::= <substitution>  */
+
+static status_t
+demangle_prefix_v3 (dm, encode_return_type)
+     demangling_t dm;
+     int *encode_return_type;
+{
+  int start = substitution_start (dm);
+  int nested = 0;
+
+  /* ENCODE_RETURN_TYPE is updated as we decend the nesting chain.
+     After <template-args>, it is set to non-zero; after everything
+     else it is set to zero.  */
+
+  /* Generally, the return type is encoded if the function is a
+     template-id, and suppressed otherwise.  There are a few cases,
+     though, in which the return type is not encoded even for a
+     templated function.  In these cases, this flag is set.  */
+  int suppress_return_type = 0;
+
+  DEMANGLE_TRACE ("prefix", dm);
+
+  while (1)
+    {
+      char peek;
+
+      if (end_of_name_p (dm))
+	return "Unexpected end of name in <compound-name>.";
+
+      peek = peek_char (dm);
+      
+      /* We'll initialize suppress_return_type to false, and set it to true
+	 if we end up demangling a constructor name.  However, make
+	 sure we're not actually about to demangle template arguments
+	 -- if so, this is the <template-args> following a
+	 <template-prefix>, so we'll want the previous flag value
+	 around.  */
+      if (peek != 'I')
+	suppress_return_type = 0;
+
+      if (IS_DIGIT ((unsigned char) peek)
+	  || (peek >= 'a' && peek <= 'z')
+	  || peek == 'C' || peek == 'D'
+	  || peek == 'S')
+	{
+	  /* We have another level of scope qualification.  */
+	  if (nested)
+	    RETURN_IF_ERROR (result_add (dm, NAMESPACE_SEPARATOR));
+	  else
+	    nested = 1;
+
+	  if (peek == 'S')
+	    /* The substitution determines whether this is a
+	       template-id.  */
+	    RETURN_IF_ERROR (demangle_substitution (dm, encode_return_type));
+	  else
+	    {
+	      /* It's just a name.  */
+	      RETURN_IF_ERROR 
+		(demangle_unqualified_name (dm, &suppress_return_type));
+	      *encode_return_type = 0;
+	    }
+	}
+      else if (peek == 'Z')
+	RETURN_IF_ERROR (demangle_local_name (dm));
+      else if (peek == 'I')
+	{
+	  RETURN_IF_ERROR (demangle_template_args (dm));
+
+	  /* Now we want to indicate to the caller that we've
+	     demangled template arguments, thus the prefix was a
+	     <template-prefix>.  That's so that the caller knows to
+	     demangle the function's return type, if this turns out to
+	     be a function name.  But, if it's a member template
+	     constructor or a templated conversion operator, report it
+	     as untemplated.  Those never get encoded return types.  */
+	  *encode_return_type = !suppress_return_type;
+	}
+      else if (peek == 'E')
+	/* All done.  */
+	return STATUS_OK;
+      else
+	return "Unexpected character in <compound-name>.";
+
+      if (peek != 'S'
+	  && peek_char (dm) != 'E')
+	/* Add a new substitution for the prefix thus far.  */
+	RETURN_IF_ERROR (substitution_add (dm, start, *encode_return_type));
+    }
+}
+
+/* Demangles and emits an <unqualified-name>.  If this
+   <unqualified-name> is for a special function type that should never
+   have its return type encoded (particularly, a constructor or
+   conversion operator), *SUPPRESS_RETURN_TYPE is set to 1; otherwise,
+   it is set to zero.
+
+    <unqualified-name>  ::= <operator-name>
+			::= <special-name>  
+			::= <source-name>  */
+
+static status_t
+demangle_unqualified_name (dm, suppress_return_type)
+     demangling_t dm;
+     int *suppress_return_type;
+{
+  char peek = peek_char (dm);
+
+  DEMANGLE_TRACE ("unqualified-name", dm);
+
+  /* By default, don't force suppression of the return type (though
+     non-template functions still don't get a return type encoded).  */ 
+  *suppress_return_type = 0;
+
+  if (IS_DIGIT ((unsigned char) peek))
+    RETURN_IF_ERROR (demangle_source_name (dm));
+  else if (peek >= 'a' && peek <= 'z')
+    {
+      int num_args;
+
+      /* Conversion operators never have a return type encoded.  */
+      if (peek == 'c' && peek_char_next (dm) == 'v')
+	*suppress_return_type = 1;
+
+      RETURN_IF_ERROR (demangle_operator_name (dm, 0, &num_args));
+    }
+  else if (peek == 'C' || peek == 'D')
+    {
+      /* Constructors never have a return type encoded.  */
+      if (peek == 'C')
+	*suppress_return_type = 1;
+
+      RETURN_IF_ERROR (demangle_ctor_dtor_name (dm));
+    }
+  else
+    return "Unexpected character in <unqualified-name>.";
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits <source-name>.  
+
+    <source-name> ::= <length number> <identifier>  */
+
+static status_t
+demangle_source_name (dm)
+     demangling_t dm;
+{
+  int length;
+
+  DEMANGLE_TRACE ("source-name", dm);
+
+  /* Decode the length of the identifier.  */
+  RETURN_IF_ERROR (demangle_number (dm, &length, 10, 0));
+  if (length == 0)
+    return "Zero length in <source-name>.";
+
+  /* Now the identifier itself.  It's placed into last_source_name,
+     where it can be used to build a constructor or destructor name.  */
+  RETURN_IF_ERROR (demangle_identifier (dm, length, 
+					dm->last_source_name));
+
+  /* Emit it.  */
+  RETURN_IF_ERROR (result_add_string (dm, dm->last_source_name));
+
+  return STATUS_OK;
+}
+
+/* Demangles a number, either a <number> or a <positive-number> at the
+   current position, consuming all consecutive digit characters.  Sets
+   *VALUE to the resulting numberand returns STATUS_OK.  The number is
+   interpreted as BASE, which must be either 10 or 36.  If IS_SIGNED
+   is non-zero, negative numbers -- prefixed with `n' -- are accepted.
+
+    <number> ::= [n] <positive-number>
+
+    <positive-number> ::= <decimal integer>  */
+
+static status_t
+demangle_number (dm, value, base, is_signed)
+     demangling_t dm;
+     int *value;
+     int base;
+     int is_signed;
+{
+  dyn_string_t number = dyn_string_new (10);
+
+  DEMANGLE_TRACE ("number", dm);
+
+  if (number == NULL)
+    return STATUS_ALLOCATION_FAILED;
+
+  demangle_number_literally (dm, number, base, is_signed);
+  /*
+  *value = strtol (dyn_string_buf (number), NULL, base);
+  */
+  /* vg_assert( base == 10 ); */
+  if ( base != 10 ) {
+     dyn_string_delete(number);
+     return STATUS_UNIMPLEMENTED;
+  }
+
+  *value = VG_(atoll) (dyn_string_buf (number));
+  dyn_string_delete (number);
+
+  return STATUS_OK;
+}
+
+/* Demangles a number at the current position.  The digits (and minus
+   sign, if present) that make up the number are appended to STR.
+   Only base-BASE digits are accepted; BASE must be either 10 or 36.
+   If IS_SIGNED, negative numbers -- prefixed with `n' -- are
+   accepted.  Does not consume a trailing underscore or other
+   terminating character.  */
+
+static status_t
+demangle_number_literally (dm, str, base, is_signed)
+     demangling_t dm;
+     dyn_string_t str;
+     int base;
+     int is_signed;
+{
+  DEMANGLE_TRACE ("number*", dm);
+
+  if (base != 10 && base != 36)
+    return STATUS_INTERNAL_ERROR;
+
+  /* An `n' denotes a negative number.  */
+  if (is_signed && peek_char (dm) == 'n')
+    {
+      /* Skip past the n.  */
+      advance_char (dm);
+      /* The normal way to write a negative number is with a minus
+	 sign.  */
+      if (!dyn_string_append_char (str, '-'))
+	return STATUS_ALLOCATION_FAILED;
+    }
+
+  /* Loop until we hit a non-digit.  */
+  while (1)
+    {
+      char peek = peek_char (dm);
+      if (IS_DIGIT ((unsigned char) peek)
+	  || (base == 36 && peek >= 'A' && peek <= 'Z'))
+	{
+	  /* Accumulate digits.  */
+	  if (!dyn_string_append_char (str, next_char (dm)))
+	    return STATUS_ALLOCATION_FAILED;
+	}
+      else
+	/* Not a digit?  All done.  */
+	break;
+    }
+
+  return STATUS_OK;
+}
+
+/* Demangles an identifier at the current position of LENGTH
+   characters and places it in IDENTIFIER.  */
+
+static status_t
+demangle_identifier (dm, length, identifier)
+     demangling_t dm;
+     int length;
+     dyn_string_t identifier;
+{
+  DEMANGLE_TRACE ("identifier", dm);
+
+  dyn_string_clear (identifier);
+  if (!dyn_string_resize (identifier, length))
+    return STATUS_ALLOCATION_FAILED;
+
+  while (length-- > 0)
+    {
+      if (end_of_name_p (dm))
+	return "Unexpected end of name in <identifier>.";
+      if (!dyn_string_append_char (identifier, next_char (dm)))
+	return STATUS_ALLOCATION_FAILED;
+    }
+
+  /* GCC encodes anonymous namespaces using a `_GLOBAL_[_.$]N.'
+     followed by the source file name and some random characters.
+     Unless we're in strict mode, decipher these names appropriately.  */
+  if (!flag_strict)
+    {
+      char *name = dyn_string_buf (identifier);
+      int prefix_length = VG_(strlen) (ANONYMOUS_NAMESPACE_PREFIX);
+
+      /* Compare the first, fixed part.  */
+      if (VG_(strncmp) (name, ANONYMOUS_NAMESPACE_PREFIX, prefix_length) == 0)
+        {
+	  name += prefix_length;
+	  /* The next character might be a period, an underscore, or
+	     dollar sign, depending on the target architecture's
+	     assembler's capabilities.  After that comes an `N'.  */
+	  if ((*name == '.' || *name == '_' || *name == '$')
+	      && *(name + 1) == 'N')
+	    /* This looks like the anonymous namespace identifier.
+	       Replace it with something comprehensible.  */
+	    dyn_string_copy_cstr (identifier, "(anonymous namespace)");
+	}
+    }
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits an <operator-name>.  If SHORT_NAME is non-zero,
+   the short form is emitted; otherwise the full source form
+   (`operator +' etc.) is emitted.  *NUM_ARGS is set to the number of
+   operands that the operator takes.  
+
+    <operator-name>
+                  ::= nw        # new           
+                  ::= na        # new[]
+                  ::= dl        # delete        
+                  ::= da        # delete[]      
+		  ::= ps        # + (unary)
+                  ::= ng        # - (unary)     
+                  ::= ad        # & (unary)     
+                  ::= de        # * (unary)     
+                  ::= co        # ~             
+                  ::= pl        # +             
+                  ::= mi        # -             
+                  ::= ml        # *             
+                  ::= dv        # /             
+                  ::= rm        # %             
+                  ::= an        # &             
+                  ::= or        # |             
+                  ::= eo        # ^             
+                  ::= aS        # =             
+                  ::= pL        # +=            
+                  ::= mI        # -=            
+                  ::= mL        # *=            
+                  ::= dV        # /=            
+                  ::= rM        # %=            
+                  ::= aN        # &=            
+                  ::= oR        # |=            
+                  ::= eO        # ^=            
+                  ::= ls        # <<            
+                  ::= rs        # >>            
+                  ::= lS        # <<=           
+                  ::= rS        # >>=           
+                  ::= eq        # ==            
+                  ::= ne        # !=            
+                  ::= lt        # <             
+                  ::= gt        # >             
+                  ::= le        # <=            
+                  ::= ge        # >=            
+                  ::= nt        # !             
+                  ::= aa        # &&            
+                  ::= oo        # ||            
+                  ::= pp        # ++            
+                  ::= mm        # --            
+                  ::= cm        # ,             
+                  ::= pm        # ->*           
+                  ::= pt        # ->            
+                  ::= cl        # ()            
+                  ::= ix        # []            
+                  ::= qu        # ?
+                  ::= sz        # sizeof 
+                  ::= cv <type> # cast        
+		  ::= v [0-9] <source-name>  # vendor extended operator  */
+
+static status_t
+demangle_operator_name (dm, short_name, num_args)
+     demangling_t dm;
+     int short_name;
+     int *num_args;
+{
+  struct operator_code
+  {
+    /* The mangled code for this operator.  */
+    const char *const code;
+    /* The source name of this operator.  */
+    const char *const name;
+    /* The number of arguments this operator takes.  */
+    const int num_args;
+  };
+
+  static const struct operator_code operators[] = 
+  {
+    { "aN", "&="       , 2 },
+    { "aS", "="        , 2 },
+    { "aa", "&&"       , 2 },
+    { "ad", "&"        , 1 },
+    { "an", "&"        , 2 },
+    { "cl", "()"       , 0 },
+    { "cm", ","        , 2 },
+    { "co", "~"        , 1 },
+    { "dV", "/="       , 2 },
+    { "da", " delete[]", 1 },
+    { "de", "*"        , 1 },
+    { "dl", " delete"  , 1 },
+    { "dv", "/"        , 2 },
+    { "eO", "^="       , 2 },
+    { "eo", "^"        , 2 },
+    { "eq", "=="       , 2 },
+    { "ge", ">="       , 2 },
+    { "gt", ">"        , 2 },
+    { "ix", "[]"       , 2 },
+    { "lS", "<<="      , 2 },
+    { "le", "<="       , 2 },
+    { "ls", "<<"       , 2 },
+    { "lt", "<"        , 2 },
+    { "mI", "-="       , 2 },
+    { "mL", "*="       , 2 },
+    { "mi", "-"        , 2 },
+    { "ml", "*"        , 2 },
+    { "mm", "--"       , 1 },
+    { "na", " new[]"   , 1 },
+    { "ne", "!="       , 2 },
+    { "ng", "-"        , 1 },
+    { "nt", "!"        , 1 },
+    { "nw", " new"     , 1 },
+    { "oR", "|="       , 2 },
+    { "oo", "||"       , 2 },
+    { "or", "|"        , 2 },
+    { "pL", "+="       , 2 },
+    { "pl", "+"        , 2 },
+    { "pm", "->*"      , 2 },
+    { "pp", "++"       , 1 },
+    { "ps", "+"        , 1 },
+    { "pt", "->"       , 2 },
+    { "qu", "?"        , 3 },
+    { "rM", "%="       , 2 },
+    { "rS", ">>="      , 2 },
+    { "rm", "%"        , 2 },
+    { "rs", ">>"       , 2 },
+    { "sz", " sizeof"  , 1 }
+  };
+
+  const int num_operators = 
+    sizeof (operators) / sizeof (struct operator_code);
+
+  int c0 = next_char (dm);
+  int c1 = next_char (dm);
+  const struct operator_code* p1 = operators;
+  const struct operator_code* p2 = operators + num_operators;
+
+  DEMANGLE_TRACE ("operator-name", dm);
+
+  /* Is this a vendor-extended operator?  */
+  if (c0 == 'v' && IS_DIGIT (c1))
+    {
+      RETURN_IF_ERROR (result_add (dm, "operator "));
+      RETURN_IF_ERROR (demangle_source_name (dm));
+      *num_args = 0;
+      return STATUS_OK;
+    }
+
+  /* Is this a conversion operator?  */
+  if (c0 == 'c' && c1 == 'v')
+    {
+      RETURN_IF_ERROR (result_add (dm, "operator "));
+      /* Demangle the converted-to type.  */
+      RETURN_IF_ERROR (demangle_type (dm));
+      *num_args = 0;
+      return STATUS_OK;
+    }
+
+  /* Perform a binary search for the operator code.  */
+  while (1)
+    {
+      const struct operator_code* p = p1 + (p2 - p1) / 2;
+      char match0 = p->code[0];
+      char match1 = p->code[1];
+
+      if (c0 == match0 && c1 == match1)
+	/* Found it.  */
+	{
+	  if (!short_name)
+	    RETURN_IF_ERROR (result_add (dm, "operator"));
+	  RETURN_IF_ERROR (result_add (dm, p->name));
+	  *num_args = p->num_args;
+
+	  return STATUS_OK;
+	}
+
+      if (p == p1)
+	/* Couldn't find it.  */
+	return "Unknown code in <operator-name>.";
+
+      /* Try again.  */
+      if (c0 < match0 || (c0 == match0 && c1 < match1))
+	p2 = p;
+      else
+	p1 = p;
+    }
+}
+
+/* Demangles and omits an <nv-offset>.
+
+    <nv-offset> ::= <offset number>   # non-virtual base override  */
+
+static status_t
+demangle_nv_offset (dm)
+     demangling_t dm;
+{
+  dyn_string_t number;
+  status_t status = STATUS_OK;
+
+  DEMANGLE_TRACE ("h-offset", dm);
+
+  /* Demangle the offset.  */
+  number = dyn_string_new (4);
+  if (number == NULL)
+    return STATUS_ALLOCATION_FAILED;
+  demangle_number_literally (dm, number, 10, 1);
+
+  /* Don't display the offset unless in verbose mode.  */
+  if (flag_verbose)
+    {
+      status = result_add (dm, " [nv:");
+      if (STATUS_NO_ERROR (status))
+	status = result_add_string (dm, number);
+      if (STATUS_NO_ERROR (status))
+	status = result_add_char (dm, ']');
+    }
+
+  /* Clean up.  */
+  dyn_string_delete (number);
+  RETURN_IF_ERROR (status);
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <v-offset>. 
+
+    <v-offset>  ::= <offset number> _ <virtual offset number>
+			# virtual base override, with vcall offset  */
+
+static status_t
+demangle_v_offset (dm)
+     demangling_t dm;
+{
+  dyn_string_t number;
+  status_t status = STATUS_OK;
+
+  DEMANGLE_TRACE ("v-offset", dm);
+
+  /* Demangle the offset.  */
+  number = dyn_string_new (4);
+  if (number == NULL)
+    return STATUS_ALLOCATION_FAILED;
+  demangle_number_literally (dm, number, 10, 1);
+
+  /* Don't display the offset unless in verbose mode.  */
+  if (flag_verbose)
+    {
+      status = result_add (dm, " [v:");
+      if (STATUS_NO_ERROR (status))
+	status = result_add_string (dm, number);
+      if (STATUS_NO_ERROR (status))
+	result_add_char (dm, ',');
+    }
+  dyn_string_delete (number);
+  RETURN_IF_ERROR (status);
+
+  /* Demangle the separator.  */
+  RETURN_IF_ERROR (demangle_char (dm, '_'));
+
+  /* Demangle the vcall offset.  */
+  number = dyn_string_new (4);
+  if (number == NULL)
+    return STATUS_ALLOCATION_FAILED;
+  demangle_number_literally (dm, number, 10, 1);
+
+  /* Don't display the vcall offset unless in verbose mode.  */
+  if (flag_verbose)
+    {
+      status = result_add_string (dm, number);
+      if (STATUS_NO_ERROR (status))
+	status = result_add_char (dm, ']');
+    }
+  dyn_string_delete (number);
+  RETURN_IF_ERROR (status);
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <call-offset>.
+
+    <call-offset> ::= h <nv-offset> _
+		  ::= v <v-offset> _  */
+
+static status_t
+demangle_call_offset (dm)
+     demangling_t dm;
+{
+  DEMANGLE_TRACE ("call-offset", dm);
+
+  switch (peek_char (dm))
+    {
+    case 'h':
+      advance_char (dm);
+      /* Demangle the offset.  */
+      RETURN_IF_ERROR (demangle_nv_offset (dm));
+      /* Demangle the separator.  */
+      RETURN_IF_ERROR (demangle_char (dm, '_'));
+      break;
+
+    case 'v':
+      advance_char (dm);
+      /* Demangle the offset.  */
+      RETURN_IF_ERROR (demangle_v_offset (dm));
+      /* Demangle the separator.  */
+      RETURN_IF_ERROR (demangle_char (dm, '_'));
+      break;
+
+    default:
+      return "Unrecognized <call-offset>.";
+    }
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <special-name>.  
+
+    <special-name> ::= GV <object name>   # Guard variable
+                   ::= TV <type>          # virtual table
+                   ::= TT <type>          # VTT
+                   ::= TI <type>          # typeinfo structure
+		   ::= TS <type>          # typeinfo name  
+
+   Other relevant productions include thunks:
+
+    <special-name> ::= T <call-offset> <base encoding>
+ 			 # base is the nominal target function of thunk
+
+    <special-name> ::= Tc <call-offset> <call-offset> <base encoding>
+			 # base is the nominal target function of thunk
+			 # first call-offset is 'this' adjustment
+			 # second call-offset is result adjustment
+
+   where
+
+    <call-offset>  ::= h <nv-offset> _
+		   ::= v <v-offset> _
+
+   Also demangles the special g++ manglings,
+
+    <special-name> ::= TC <type> <offset number> _ <base type>
+                                          # construction vtable
+		   ::= TF <type>	  # typeinfo function (old ABI only)
+		   ::= TJ <type>	  # java Class structure  */
+
+static status_t
+demangle_special_name (dm)
+     demangling_t dm;
+{
+  dyn_string_t number;
+  int unused;
+  char peek = peek_char (dm);
+
+  DEMANGLE_TRACE ("special-name", dm);
+
+  if (peek == 'G')
+    {
+      /* Consume the G.  */
+      advance_char (dm);
+      switch (peek_char (dm))
+	{
+	case 'V':
+	  /* A guard variable name.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "guard variable for "));
+	  RETURN_IF_ERROR (demangle_name (dm, &unused));
+	  break;
+
+	case 'R':
+	  /* A reference temporary.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "reference temporary for "));
+	  RETURN_IF_ERROR (demangle_name (dm, &unused));
+	  break;
+	  
+	default:
+	  return "Unrecognized <special-name>.";
+	}
+    }
+  else if (peek == 'T')
+    {
+      status_t status = STATUS_OK;
+
+      /* Other C++ implementation miscellania.  Consume the T.  */
+      advance_char (dm);
+
+      switch (peek_char (dm))
+	{
+	case 'V':
+	  /* Virtual table.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "vtable for "));
+	  RETURN_IF_ERROR (demangle_type (dm));
+	  break;
+
+	case 'T':
+	  /* VTT structure.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "VTT for "));
+	  RETURN_IF_ERROR (demangle_type (dm));
+	  break;
+
+	case 'I':
+	  /* Typeinfo structure.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "typeinfo for "));
+	  RETURN_IF_ERROR (demangle_type (dm));
+	  break;
+
+	case 'F':
+	  /* Typeinfo function.  Used only in old ABI with new mangling.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "typeinfo fn for "));
+	  RETURN_IF_ERROR (demangle_type (dm));
+	  break;
+
+	case 'S':
+	  /* Character string containing type name, used in typeinfo. */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "typeinfo name for "));
+	  RETURN_IF_ERROR (demangle_type (dm));
+	  break;
+
+	case 'J':
+	  /* The java Class variable corresponding to a C++ class.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "java Class for "));
+	  RETURN_IF_ERROR (demangle_type (dm));
+	  break;
+
+	case 'h':
+	  /* Non-virtual thunk.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "non-virtual thunk"));
+	  RETURN_IF_ERROR (demangle_nv_offset (dm));
+	  /* Demangle the separator.  */
+	  RETURN_IF_ERROR (demangle_char (dm, '_'));
+	  /* Demangle and emit the target name and function type.  */
+	  RETURN_IF_ERROR (result_add (dm, " to "));
+	  RETURN_IF_ERROR (demangle_encoding (dm));
+	  break;
+
+	case 'v':
+	  /* Virtual thunk.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "virtual thunk"));
+	  RETURN_IF_ERROR (demangle_v_offset (dm));
+	  /* Demangle the separator.  */
+	  RETURN_IF_ERROR (demangle_char (dm, '_'));
+	  /* Demangle and emit the target function.  */
+	  RETURN_IF_ERROR (result_add (dm, " to "));
+	  RETURN_IF_ERROR (demangle_encoding (dm));
+	  break;
+
+	case 'c':
+	  /* Covariant return thunk.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "covariant return thunk"));
+	  RETURN_IF_ERROR (demangle_call_offset (dm));
+	  RETURN_IF_ERROR (demangle_call_offset (dm));
+	  /* Demangle and emit the target function.  */
+	  RETURN_IF_ERROR (result_add (dm, " to "));
+	  RETURN_IF_ERROR (demangle_encoding (dm));
+	  break;
+
+	case 'C':
+	  /* TC is a special g++ mangling for a construction vtable. */
+	  if (!flag_strict)
+	    {
+	      dyn_string_t derived_type;
+
+	      advance_char (dm);
+	      RETURN_IF_ERROR (result_add (dm, "construction vtable for "));
+
+	      /* Demangle the derived type off to the side.  */
+	      RETURN_IF_ERROR (result_push (dm));
+	      RETURN_IF_ERROR (demangle_type (dm));
+	      derived_type = (dyn_string_t) result_pop (dm);
+
+	      /* Demangle the offset.  */
+	      number = dyn_string_new (4);
+	      if (number == NULL)
+		{
+		  dyn_string_delete (derived_type);
+		  return STATUS_ALLOCATION_FAILED;
+		}
+	      demangle_number_literally (dm, number, 10, 1);
+	      /* Demangle the underscore separator.  */
+	      status = demangle_char (dm, '_');
+
+	      /* Demangle the base type.  */
+	      if (STATUS_NO_ERROR (status))
+		status = demangle_type (dm);
+
+	      /* Emit the derived type.  */
+	      if (STATUS_NO_ERROR (status))
+		status = result_add (dm, "-in-");
+	      if (STATUS_NO_ERROR (status))
+		status = result_add_string (dm, derived_type);
+	      dyn_string_delete (derived_type);
+
+	      /* Don't display the offset unless in verbose mode.  */
+	      if (flag_verbose)
+		{
+		  status = result_add_char (dm, ' ');
+		  if (STATUS_NO_ERROR (status))
+		    result_add_string (dm, number);
+		}
+	      dyn_string_delete (number);
+	      RETURN_IF_ERROR (status);
+	      break;
+	    }
+	  /* If flag_strict, fall through.  */
+
+	default:
+	  return "Unrecognized <special-name>.";
+	}
+    }
+  else
+    return STATUS_ERROR;
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <ctor-dtor-name>.  
+   
+    <ctor-dtor-name>
+                   ::= C1  # complete object (in-charge) ctor
+                   ::= C2  # base object (not-in-charge) ctor
+                   ::= C3  # complete object (in-charge) allocating ctor
+                   ::= D0  # deleting (in-charge) dtor
+                   ::= D1  # complete object (in-charge) dtor
+                   ::= D2  # base object (not-in-charge) dtor  */
+
+static status_t
+demangle_ctor_dtor_name (dm)
+     demangling_t dm;
+{
+  static const char *const ctor_flavors[] = 
+  {
+    "in-charge",
+    "not-in-charge",
+    "allocating"
+  };
+  static const char *const dtor_flavors[] = 
+  {
+    "in-charge deleting",
+    "in-charge",
+    "not-in-charge"
+  };
+
+  int flavor;
+  char peek = peek_char (dm);
+
+  DEMANGLE_TRACE ("ctor-dtor-name", dm);
+  
+  if (peek == 'C')
+    {
+      /* A constructor name.  Consume the C.  */
+      advance_char (dm);
+      flavor = next_char (dm);
+      if (flavor < '1' || flavor > '3')
+	return "Unrecognized constructor.";
+      RETURN_IF_ERROR (result_add_string (dm, dm->last_source_name));
+      switch (flavor)
+	{
+	case '1': dm->is_constructor = gnu_v3_complete_object_ctor;
+	  break;
+	case '2': dm->is_constructor = gnu_v3_base_object_ctor;
+	  break;
+	case '3': dm->is_constructor = gnu_v3_complete_object_allocating_ctor;
+	  break;
+	}
+      /* Print the flavor of the constructor if in verbose mode.  */
+      if (flag_verbose)
+	{
+	  RETURN_IF_ERROR (result_add (dm, "["));
+	  RETURN_IF_ERROR (result_add (dm, ctor_flavors[flavor - '1']));
+	  RETURN_IF_ERROR (result_add_char (dm, ']'));
+	}
+    }
+  else if (peek == 'D')
+    {
+      /* A destructor name.  Consume the D.  */
+      advance_char (dm);
+      flavor = next_char (dm);
+      if (flavor < '0' || flavor > '2')
+	return "Unrecognized destructor.";
+      RETURN_IF_ERROR (result_add_char (dm, '~'));
+      RETURN_IF_ERROR (result_add_string (dm, dm->last_source_name));
+      switch (flavor)
+	{
+	case '0': dm->is_destructor = gnu_v3_deleting_dtor;
+	  break;
+	case '1': dm->is_destructor = gnu_v3_complete_object_dtor;
+	  break;
+	case '2': dm->is_destructor = gnu_v3_base_object_dtor;
+	  break;
+	}
+      /* Print the flavor of the destructor if in verbose mode.  */
+      if (flag_verbose)
+	{
+	  RETURN_IF_ERROR (result_add (dm, " ["));
+	  RETURN_IF_ERROR (result_add (dm, dtor_flavors[flavor - '0']));
+	  RETURN_IF_ERROR (result_add_char (dm, ']'));
+	}
+    }
+  else
+    return STATUS_ERROR;
+
+  return STATUS_OK;
+}
+
+/* Handle pointer, reference, and pointer-to-member cases for
+   demangle_type.  All consecutive `P's, `R's, and 'M's are joined to
+   build a pointer/reference type.  We snarf all these, plus the
+   following <type>, all at once since we need to know whether we have
+   a pointer to data or pointer to function to construct the right
+   output syntax.  C++'s pointer syntax is hairy.  
+
+   This function adds substitution candidates for every nested
+   pointer/reference type it processes, including the outermost, final
+   type, assuming the substitution starts at SUBSTITUTION_START in the
+   demangling result.  For example, if this function demangles
+   `PP3Foo', it will add a substitution for `Foo', `Foo*', and
+   `Foo**', in that order.
+
+   *INSERT_POS is a quantity used internally, when this function calls
+   itself recursively, to figure out where to insert pointer
+   punctuation on the way up.  On entry to this function, INSERT_POS
+   should point to a temporary value, but that value need not be
+   initialized.
+
+     <type> ::= P <type>
+            ::= R <type>
+            ::= <pointer-to-member-type>
+
+     <pointer-to-member-type> ::= M </class/ type> </member/ type>  */
+
+static status_t
+demangle_type_ptr (dm, insert_pos, substitution_start)
+     demangling_t dm;
+     int *insert_pos;
+     int substitution_start;
+{
+  status_t status;
+  int is_substitution_candidate = 1;
+
+  DEMANGLE_TRACE ("type*", dm);
+
+  /* Scan forward, collecting pointers and references into symbols,
+     until we hit something else.  Then emit the type.  */
+  switch (peek_char (dm))
+    {
+    case 'P':
+      /* A pointer.  Snarf the `P'.  */
+      advance_char (dm);
+      /* Demangle the underlying type.  */
+      RETURN_IF_ERROR (demangle_type_ptr (dm, insert_pos, 
+					  substitution_start));
+      /* Insert an asterisk where we're told to; it doesn't
+	 necessarily go at the end.  If we're doing Java style output, 
+	 there is no pointer symbol.  */
+      if (dm->style != DMGL_JAVA)
+	RETURN_IF_ERROR (result_insert_char (dm, *insert_pos, '*'));
+      /* The next (outermost) pointer or reference character should go
+	 after this one.  */
+      ++(*insert_pos);
+      break;
+
+    case 'R':
+      /* A reference.  Snarf the `R'.  */
+      advance_char (dm);
+      /* Demangle the underlying type.  */
+      RETURN_IF_ERROR (demangle_type_ptr (dm, insert_pos, 
+					  substitution_start));
+      /* Insert an ampersand where we're told to; it doesn't
+	 necessarily go at the end.  */
+      RETURN_IF_ERROR (result_insert_char (dm, *insert_pos, '&'));
+      /* The next (outermost) pointer or reference character should go
+	 after this one.  */
+      ++(*insert_pos);
+      break;
+
+    case 'M':
+    {
+      /* A pointer-to-member.  */
+      dyn_string_t class_type;
+      
+      /* Eat the 'M'.  */
+      advance_char (dm);
+      
+      /* Capture the type of which this is a pointer-to-member.  */
+      RETURN_IF_ERROR (result_push (dm));
+      RETURN_IF_ERROR (demangle_type (dm));
+      class_type = (dyn_string_t) result_pop (dm);
+      
+      if (peek_char (dm) == 'F')
+	/* A pointer-to-member function.  We want output along the
+	   lines of `void (C::*) (int, int)'.  Demangle the function
+	   type, which would in this case give `void () (int, int)'
+	   and set *insert_pos to the spot between the first
+	   parentheses.  */
+	status = demangle_type_ptr (dm, insert_pos, substitution_start);
+      else if (peek_char (dm) == 'A')
+	/* A pointer-to-member array variable.  We want output that
+	   looks like `int (Klass::*) [10]'.  Demangle the array type
+	   as `int () [10]', and set *insert_pos to the spot between
+	   the parentheses.  */
+	status = demangle_array_type (dm, insert_pos);
+      else
+        {
+	  /* A pointer-to-member variable.  Demangle the type of the
+             pointed-to member.  */
+	  status = demangle_type (dm);
+	  /* Make it pretty.  */
+	  if (STATUS_NO_ERROR (status)
+	      && !result_previous_char_is_space (dm))
+	    status = result_add_char (dm, ' ');
+	  /* The pointer-to-member notation (e.g. `C::*') follows the
+             member's type.  */
+	  *insert_pos = result_caret_pos (dm);
+	}
+
+      /* Build the pointer-to-member notation.  */
+      if (STATUS_NO_ERROR (status))
+	status = result_insert (dm, *insert_pos, "::*");
+      if (STATUS_NO_ERROR (status))
+	status = result_insert_string (dm, *insert_pos, class_type);
+      /* There may be additional levels of (pointer or reference)
+	 indirection in this type.  If so, the `*' and `&' should be
+	 added after the pointer-to-member notation (e.g. `C::*&' for
+	 a reference to a pointer-to-member of class C).  */
+      *insert_pos += dyn_string_length (class_type) + 3;
+
+      /* Clean up. */
+      dyn_string_delete (class_type);
+
+      RETURN_IF_ERROR (status);
+    }
+    break;
+
+    case 'F':
+      /* Ooh, tricky, a pointer-to-function.  When we demangle the
+	 function type, the return type should go at the very
+	 beginning.  */
+      *insert_pos = result_caret_pos (dm);
+      /* The parentheses indicate this is a function pointer or
+	 reference type.  */
+      RETURN_IF_ERROR (result_add (dm, "()"));
+      /* Now demangle the function type.  The return type will be
+	 inserted before the `()', and the argument list will go after
+	 it.  */
+      RETURN_IF_ERROR (demangle_function_type (dm, insert_pos));
+      /* We should now have something along the lines of 
+	 `void () (int, int)'.  The pointer or reference characters
+	 have to inside the first set of parentheses.  *insert_pos has
+	 already been updated to point past the end of the return
+	 type.  Move it one character over so it points inside the
+	 `()'.  */
+      ++(*insert_pos);
+      break;
+
+    case 'A':
+      /* An array pointer or reference.  demangle_array_type will figure
+	 out where the asterisks and ampersands go.  */
+      RETURN_IF_ERROR (demangle_array_type (dm, insert_pos));
+      break;
+
+    default:
+      /* No more pointer or reference tokens; this is therefore a
+	 pointer to data.  Finish up by demangling the underlying
+	 type.  */
+      RETURN_IF_ERROR (demangle_type (dm));
+      /* The pointer or reference characters follow the underlying
+	 type, as in `int*&'.  */
+      *insert_pos = result_caret_pos (dm);
+      /* Because of the production <type> ::= <substitution>,
+	 demangle_type will already have added the underlying type as
+	 a substitution candidate.  Don't do it again.  */
+      is_substitution_candidate = 0;
+      break;
+    }
+  
+  if (is_substitution_candidate)
+    RETURN_IF_ERROR (substitution_add (dm, substitution_start, 0));
+  
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <type>.  
+
+    <type> ::= <builtin-type>
+	   ::= <function-type>
+	   ::= <class-enum-type>
+	   ::= <array-type>
+	   ::= <pointer-to-member-type>
+	   ::= <template-param>
+	   ::= <template-template-param> <template-args>
+           ::= <CV-qualifiers> <type>
+	   ::= P <type>   # pointer-to
+	   ::= R <type>   # reference-to
+	   ::= C <type>   # complex pair (C 2000)
+	   ::= G <type>   # imaginary (C 2000)
+	   ::= U <source-name> <type>     # vendor extended type qualifier
+	   ::= <substitution>  */
+
+static status_t
+demangle_type (dm)
+     demangling_t dm;
+{
+  int start = substitution_start (dm);
+  char peek = peek_char (dm);
+  char peek_next;
+  int encode_return_type = 0;
+  template_arg_list_t old_arg_list = current_template_arg_list (dm);
+  int insert_pos;
+
+  /* A <type> can be a <substitution>; therefore, this <type> is a
+     substitution candidate unless a special condition holds (see
+     below).  */
+  int is_substitution_candidate = 1;
+
+  DEMANGLE_TRACE ("type", dm);
+
+  /* A <class-enum-type> can start with a digit (a <source-name>), an
+     N (a <nested-name>), or a Z (a <local-name>).  */
+  if (IS_DIGIT ((unsigned char) peek) || peek == 'N' || peek == 'Z')
+    RETURN_IF_ERROR (demangle_class_enum_type (dm, &encode_return_type));
+  /* Lower-case letters begin <builtin-type>s, except for `r', which
+     denotes restrict.  */
+  else if (peek >= 'a' && peek <= 'z' && peek != 'r')
+    {
+      RETURN_IF_ERROR (demangle_builtin_type (dm));
+      /* Built-in types are not substitution candidates.  */
+      is_substitution_candidate = 0;
+    }
+  else
+    switch (peek)
+      {
+      case 'r':
+      case 'V':
+      case 'K':
+	/* CV-qualifiers (including restrict).  We have to demangle
+	   them off to the side, since C++ syntax puts them in a funny
+	   place for qualified pointer and reference types.  */
+	{
+	  status_t status;
+	  dyn_string_t cv_qualifiers = dyn_string_new (24);
+	  int old_caret_position = result_get_caret (dm);
+
+	  if (cv_qualifiers == NULL)
+	    return STATUS_ALLOCATION_FAILED;
+
+	  /* Decode all adjacent CV qualifiers.  */
+	  demangle_CV_qualifiers (dm, cv_qualifiers);
+	  /* Emit them, and shift the caret left so that the
+	     underlying type will be emitted before the qualifiers.  */
+	  status = result_add_string (dm, cv_qualifiers);
+	  result_shift_caret (dm, -dyn_string_length (cv_qualifiers));
+	  /* Clean up.  */
+	  dyn_string_delete (cv_qualifiers);
+	  RETURN_IF_ERROR (status);
+	  /* Also prepend a blank, if needed.  */
+	  RETURN_IF_ERROR (result_add_char (dm, ' '));
+	  result_shift_caret (dm, -1);
+
+	  /* Demangle the underlying type.  It will be emitted before
+	     the CV qualifiers, since we moved the caret.  */
+	  RETURN_IF_ERROR (demangle_type (dm));
+
+	  /* Put the caret back where it was previously.  */
+	  result_set_caret (dm, old_caret_position);
+	}
+	break;
+
+      case 'F':
+	return "Non-pointer or -reference function type.";
+
+      case 'A':
+	RETURN_IF_ERROR (demangle_array_type (dm, NULL));
+	break;
+
+      case 'T':
+	/* It's either a <template-param> or a
+	   <template-template-param>.  In either case, demangle the
+	   `T' token first.  */
+	RETURN_IF_ERROR (demangle_template_param (dm));
+
+	/* Check for a template argument list; if one is found, it's a
+	     <template-template-param> ::= <template-param>
+                                       ::= <substitution>  */
+	if (peek_char (dm) == 'I')
+	  {
+	    /* Add a substitution candidate.  The template parameter
+	       `T' token is a substitution candidate by itself,
+	       without the template argument list.  */
+	    RETURN_IF_ERROR (substitution_add (dm, start, encode_return_type));
+
+	    /* Now demangle the template argument list.  */
+	    RETURN_IF_ERROR (demangle_template_args (dm));
+	    /* The entire type, including the template template
+	       parameter and its argument list, will be added as a
+	       substitution candidate below.  */
+	  }
+
+	break;
+
+      case 'S':
+	/* First check if this is a special substitution.  If it is,
+	   this is a <class-enum-type>.  Special substitutions have a
+	   letter following the `S'; other substitutions have a digit
+	   or underscore.  */
+	peek_next = peek_char_next (dm);
+	if (IS_DIGIT (peek_next) || peek_next == '_')
+	  {
+	    RETURN_IF_ERROR (demangle_substitution (dm, &encode_return_type));
+	    
+	    /* The substituted name may have been a template name.
+	       Check if template arguments follow, and if so, demangle
+	       them.  */
+	    if (peek_char (dm) == 'I')
+	      RETURN_IF_ERROR (demangle_template_args (dm));
+	    else
+	      /* A substitution token is not itself a substitution
+		 candidate.  (However, if the substituted template is
+		 instantiated, the resulting type is.)  */
+	      is_substitution_candidate = 0;
+	  }
+	else
+	  {
+	    /* Now some trickiness.  We have a special substitution
+	       here.  Often, the special substitution provides the
+	       name of a template that's subsequently instantiated,
+	       for instance `SaIcE' => std::allocator<char>.  In these
+	       cases we need to add a substitution candidate for the
+	       entire <class-enum-type> and thus don't want to clear
+	       the is_substitution_candidate flag.
+
+	       However, it's possible that what we have here is a
+	       substitution token representing an entire type, such as
+	       `Ss' => std::string.  In this case, we mustn't add a
+	       new substitution candidate for this substitution token.
+	       To detect this case, remember where the start of the
+	       substitution token is.  */
+ 	    const char *next = dm->next;
+	    /* Now demangle the <class-enum-type>.  */
+	    RETURN_IF_ERROR 
+	      (demangle_class_enum_type (dm, &encode_return_type));
+	    /* If all that was just demangled is the two-character
+	       special substitution token, supress the addition of a
+	       new candidate for it.  */
+	    if (dm->next == next + 2)
+	      is_substitution_candidate = 0;
+	  }
+
+	break;
+
+      case 'P':
+      case 'R':
+      case 'M':
+	RETURN_IF_ERROR (demangle_type_ptr (dm, &insert_pos, start));
+	/* demangle_type_ptr adds all applicable substitution
+	   candidates.  */
+	is_substitution_candidate = 0;
+	break;
+
+      case 'C':
+	/* A C99 complex type.  */
+	RETURN_IF_ERROR (result_add (dm, "complex "));
+	advance_char (dm);
+	RETURN_IF_ERROR (demangle_type (dm));
+	break;
+
+      case 'G':
+	/* A C99 imaginary type.  */
+	RETURN_IF_ERROR (result_add (dm, "imaginary "));
+	advance_char (dm);
+	RETURN_IF_ERROR (demangle_type (dm));
+	break;
+
+      case 'U':
+	/* Vendor-extended type qualifier.  */
+	advance_char (dm);
+	RETURN_IF_ERROR (demangle_source_name (dm));
+	RETURN_IF_ERROR (result_add_char (dm, ' '));
+	RETURN_IF_ERROR (demangle_type (dm));
+	break;
+
+      default:
+	return "Unexpected character in <type>.";
+      }
+
+  if (is_substitution_candidate)
+    /* Add a new substitution for the type. If this type was a
+       <template-param>, pass its index since from the point of
+       substitutions; a <template-param> token is a substitution
+       candidate distinct from the type that is substituted for it.  */
+    RETURN_IF_ERROR (substitution_add (dm, start, encode_return_type));
+
+  /* Pop off template argument lists added during mangling of this
+     type.  */
+  pop_to_template_arg_list (dm, old_arg_list);
+
+  return STATUS_OK;
+}
+
+/* C++ source names of builtin types, indexed by the mangled code
+   letter's position in the alphabet ('a' -> 0, 'b' -> 1, etc).  */
+static const char *const builtin_type_names[26] = 
+{
+  "signed char",              /* a */
+  "bool",                     /* b */
+  "char",                     /* c */
+  "double",                   /* d */
+  "long double",              /* e */
+  "float",                    /* f */
+  "__float128",               /* g */
+  "unsigned char",            /* h */
+  "int",                      /* i */
+  "unsigned",                 /* j */
+  NULL,                       /* k */
+  "long",                     /* l */
+  "unsigned long",            /* m */
+  "__int128",                 /* n */
+  "unsigned __int128",        /* o */
+  NULL,                       /* p */
+  NULL,                       /* q */
+  NULL,                       /* r */
+  "short",                    /* s */
+  "unsigned short",           /* t */
+  NULL,                       /* u */
+  "void",                     /* v */
+  "wchar_t",                  /* w */
+  "long long",                /* x */
+  "unsigned long long",       /* y */
+  "..."                       /* z */
+};
+
+/* Java source names of builtin types.  Types that arn't valid in Java
+   are also included here - we don't fail if someone attempts to demangle a 
+   C++ symbol in Java style. */
+static const char *const java_builtin_type_names[26] = 
+{
+  "signed char",                /* a */
+  "boolean", /* C++ "bool" */   /* b */
+  "byte", /* C++ "char" */      /* c */
+  "double",                     /* d */
+  "long double",                /* e */
+  "float",                      /* f */
+  "__float128",                 /* g */
+  "unsigned char",              /* h */
+  "int",                        /* i */
+  "unsigned",                   /* j */
+  NULL,                         /* k */
+  "long",                       /* l */
+  "unsigned long",              /* m */
+  "__int128",                   /* n */
+  "unsigned __int128",          /* o */
+  NULL,                         /* p */
+  NULL,                         /* q */
+  NULL,                         /* r */
+  "short",                      /* s */
+  "unsigned short",             /* t */
+  NULL,                         /* u */
+  "void",                       /* v */
+  "char", /* C++ "wchar_t" */   /* w */
+  "long", /* C++ "long long" */ /* x */
+  "unsigned long long",         /* y */
+  "..."                         /* z */
+};
+
+/* Demangles and emits a <builtin-type>.  
+
+    <builtin-type> ::= v  # void
+		   ::= w  # wchar_t
+		   ::= b  # bool
+		   ::= c  # char
+		   ::= a  # signed char
+		   ::= h  # unsigned char
+		   ::= s  # short
+		   ::= t  # unsigned short
+		   ::= i  # int
+		   ::= j  # unsigned int
+		   ::= l  # long
+		   ::= m  # unsigned long
+		   ::= x  # long long, __int64
+		   ::= y  # unsigned long long, __int64
+		   ::= n  # __int128
+		   ::= o  # unsigned __int128
+		   ::= f  # float
+		   ::= d  # double
+		   ::= e  # long double, __float80
+		   ::= g  # __float128
+		   ::= z  # ellipsis
+		   ::= u <source-name>    # vendor extended type  */
+
+static status_t
+demangle_builtin_type (dm)
+     demangling_t dm;
+{
+
+  char code = peek_char (dm);
+
+  DEMANGLE_TRACE ("builtin-type", dm);
+
+  if (code == 'u')
+    {
+      advance_char (dm);
+      RETURN_IF_ERROR (demangle_source_name (dm));
+      return STATUS_OK;
+    }
+  else if (code >= 'a' && code <= 'z')
+    {
+      const char *type_name;
+      /* Java uses different names for some built-in types. */
+      if (dm->style == DMGL_JAVA)
+        type_name = java_builtin_type_names[code - 'a'];
+      else
+        type_name = builtin_type_names[code - 'a'];
+      if (type_name == NULL)
+	return "Unrecognized <builtin-type> code.";
+
+      RETURN_IF_ERROR (result_add (dm, type_name));
+      advance_char (dm);
+      return STATUS_OK;
+    }
+  else
+    return "Non-alphabetic <builtin-type> code.";
+}
+
+/* Demangles all consecutive CV-qualifiers (const, volatile, and
+   restrict) at the current position.  The qualifiers are appended to
+   QUALIFIERS.  Returns STATUS_OK.  */
+
+static status_t
+demangle_CV_qualifiers (dm, qualifiers)
+     demangling_t dm;
+     dyn_string_t qualifiers;
+{
+  DEMANGLE_TRACE ("CV-qualifiers", dm);
+
+  while (1)
+    {
+      switch (peek_char (dm))
+	{
+	case 'r':
+	  if (!dyn_string_append_space (qualifiers))
+	    return STATUS_ALLOCATION_FAILED;
+	  if (!dyn_string_append_cstr (qualifiers, "restrict"))
+	    return STATUS_ALLOCATION_FAILED;
+	  break;
+
+	case 'V':
+	  if (!dyn_string_append_space (qualifiers))
+	    return STATUS_ALLOCATION_FAILED;
+	  if (!dyn_string_append_cstr (qualifiers, "volatile"))
+	    return STATUS_ALLOCATION_FAILED;
+	  break;
+
+	case 'K':
+	  if (!dyn_string_append_space (qualifiers))
+	    return STATUS_ALLOCATION_FAILED;
+	  if (!dyn_string_append_cstr (qualifiers, "const"))
+	    return STATUS_ALLOCATION_FAILED;
+	  break;
+
+	default:
+	  return STATUS_OK;
+	}
+
+      advance_char (dm);
+    }
+}
+
+/* Demangles and emits a <function-type>.  *FUNCTION_NAME_POS is the
+   position in the result string of the start of the function
+   identifier, at which the function's return type will be inserted;
+   *FUNCTION_NAME_POS is updated to position past the end of the
+   function's return type.
+
+    <function-type> ::= F [Y] <bare-function-type> E  */
+
+static status_t
+demangle_function_type (dm, function_name_pos)
+     demangling_t dm;
+     int *function_name_pos;
+{
+  DEMANGLE_TRACE ("function-type", dm);
+  RETURN_IF_ERROR (demangle_char (dm, 'F'));  
+  if (peek_char (dm) == 'Y')
+    {
+      /* Indicate this function has C linkage if in verbose mode.  */
+      if (flag_verbose)
+	RETURN_IF_ERROR (result_add (dm, " [extern \"C\"] "));
+      advance_char (dm);
+    }
+  RETURN_IF_ERROR (demangle_bare_function_type (dm, function_name_pos));
+  RETURN_IF_ERROR (demangle_char (dm, 'E'));
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <bare-function-type>.  RETURN_TYPE_POS is the
+   position in the result string at which the function return type
+   should be inserted.  If RETURN_TYPE_POS is BFT_NO_RETURN_TYPE, the
+   function's return type is assumed not to be encoded.  
+
+    <bare-function-type> ::= <signature type>+  */
+
+static status_t
+demangle_bare_function_type (dm, return_type_pos)
+     demangling_t dm;
+     int *return_type_pos;
+{
+  /* Sequence is the index of the current function parameter, counting
+     from zero.  The value -1 denotes the return type.  */
+  int sequence = 
+    (return_type_pos == BFT_NO_RETURN_TYPE ? 0 : -1);
+
+  DEMANGLE_TRACE ("bare-function-type", dm);
+
+  RETURN_IF_ERROR (result_add_char (dm, '('));
+  while (!end_of_name_p (dm) && peek_char (dm) != 'E')
+    {
+      if (sequence == -1)
+	/* We're decoding the function's return type.  */
+	{
+	  dyn_string_t return_type;
+	  status_t status = STATUS_OK;
+
+	  /* Decode the return type off to the side.  */
+	  RETURN_IF_ERROR (result_push (dm));
+	  RETURN_IF_ERROR (demangle_type (dm));
+	  return_type = (dyn_string_t) result_pop (dm);
+
+	  /* Add a space to the end of the type.  Insert the return
+             type where we've been asked to. */
+	  if (!dyn_string_append_space (return_type))
+	    status = STATUS_ALLOCATION_FAILED;
+	  if (STATUS_NO_ERROR (status))
+	    {
+	      if (!dyn_string_insert (result_string (dm), *return_type_pos, 
+				      return_type))
+		status = STATUS_ALLOCATION_FAILED;
+	      else
+		*return_type_pos += dyn_string_length (return_type);
+	    }
+
+	  dyn_string_delete (return_type);
+	  RETURN_IF_ERROR (status);
+	}
+      else 
+	{
+	  /* Skip `void' parameter types.  One should only occur as
+	     the only type in a parameter list; in that case, we want
+	     to print `foo ()' instead of `foo (void)'.  */
+	  if (peek_char (dm) == 'v')
+	    /* Consume the v.  */
+	    advance_char (dm);
+	  else
+	    {
+	      /* Separate parameter types by commas.  */
+	      if (sequence > 0)
+		RETURN_IF_ERROR (result_add (dm, ", "));
+	      /* Demangle the type.  */
+	      RETURN_IF_ERROR (demangle_type (dm));
+	    }
+	}
+
+      ++sequence;
+    }
+  RETURN_IF_ERROR (result_add_char (dm, ')'));
+
+  /* We should have demangled at least one parameter type (which would
+     be void, for a function that takes no parameters), plus the
+     return type, if we were supposed to demangle that.  */
+  if (sequence == -1)
+    return "Missing function return type.";
+  else if (sequence == 0)
+    return "Missing function parameter.";
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <class-enum-type>.  *ENCODE_RETURN_TYPE is set to
+   non-zero if the type is a template-id, zero otherwise.  
+
+    <class-enum-type> ::= <name>  */
+
+static status_t
+demangle_class_enum_type (dm, encode_return_type)
+     demangling_t dm;
+     int *encode_return_type;
+{
+  DEMANGLE_TRACE ("class-enum-type", dm);
+
+  RETURN_IF_ERROR (demangle_name (dm, encode_return_type));
+  return STATUS_OK;
+}
+
+/* Demangles and emits an <array-type>.  
+
+   If PTR_INSERT_POS is not NULL, the array type is formatted as a
+   pointer or reference to an array, except that asterisk and
+   ampersand punctuation is omitted (since it's not know at this
+   point).  *PTR_INSERT_POS is set to the position in the demangled
+   name at which this punctuation should be inserted.  For example,
+   `A10_i' is demangled to `int () [10]' and *PTR_INSERT_POS points
+   between the parentheses.
+
+   If PTR_INSERT_POS is NULL, the array type is assumed not to be
+   pointer- or reference-qualified.  Then, for example, `A10_i' is
+   demangled simply as `int[10]'.  
+
+    <array-type> ::= A [<dimension number>] _ <element type>  
+                 ::= A <dimension expression> _ <element type>  */
+
+static status_t
+demangle_array_type (dm, ptr_insert_pos)
+     demangling_t dm;
+     int *ptr_insert_pos;
+{
+  status_t status = STATUS_OK;
+  dyn_string_t array_size = NULL;
+  char peek;
+
+  DEMANGLE_TRACE ("array-type", dm);
+
+  RETURN_IF_ERROR (demangle_char (dm, 'A'));
+
+  /* Demangle the array size into array_size.  */
+  peek = peek_char (dm);
+  if (peek == '_')
+    /* Array bound is omitted.  This is a C99-style VLA.  */
+    ;
+  else if (IS_DIGIT (peek_char (dm))) 
+    {
+      /* It looks like a constant array bound.  */
+      array_size = dyn_string_new (10);
+      if (array_size == NULL)
+	return STATUS_ALLOCATION_FAILED;
+      status = demangle_number_literally (dm, array_size, 10, 0);
+    }
+  else
+    {
+      /* Anything is must be an expression for a nont-constant array
+	 bound.  This happens if the array type occurs in a template
+	 and the array bound references a template parameter.  */
+      RETURN_IF_ERROR (result_push (dm));
+      RETURN_IF_ERROR (demangle_expression_v3 (dm));
+      array_size = (dyn_string_t) result_pop (dm);
+    }
+  /* array_size may have been allocated by now, so we can't use
+     RETURN_IF_ERROR until it's been deallocated.  */
+
+  /* Demangle the base type of the array.  */
+  if (STATUS_NO_ERROR (status))
+    status = demangle_char (dm, '_');
+  if (STATUS_NO_ERROR (status))
+    status = demangle_type (dm);
+
+  if (ptr_insert_pos != NULL)
+    {
+      /* This array is actually part of an pointer- or
+	 reference-to-array type.  Format appropriately, except we
+	 don't know which and how much punctuation to use.  */
+      if (STATUS_NO_ERROR (status))
+	status = result_add (dm, " () ");
+      /* Let the caller know where to insert the punctuation.  */
+      *ptr_insert_pos = result_caret_pos (dm) - 2;
+    }
+
+  /* Emit the array dimension syntax.  */
+  if (STATUS_NO_ERROR (status))
+    status = result_add_char (dm, '[');
+  if (STATUS_NO_ERROR (status) && array_size != NULL)
+    status = result_add_string (dm, array_size);
+  if (STATUS_NO_ERROR (status))
+    status = result_add_char (dm, ']');
+  if (array_size != NULL)
+    dyn_string_delete (array_size);
+  
+  RETURN_IF_ERROR (status);
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <template-param>.  
+
+    <template-param> ::= T_       # first template parameter
+                     ::= T <parameter-2 number> _  */
+
+static status_t
+demangle_template_param (dm)
+     demangling_t dm;
+{
+  int parm_number;
+  template_arg_list_t current_arg_list = current_template_arg_list (dm);
+  string_list_t arg;
+
+  DEMANGLE_TRACE ("template-param", dm);
+
+  /* Make sure there is a template argmust list in which to look up
+     this parameter reference.  */
+  if (current_arg_list == NULL)
+    return "Template parameter outside of template.";
+
+  RETURN_IF_ERROR (demangle_char (dm, 'T'));
+  if (peek_char (dm) == '_')
+    parm_number = 0;
+  else
+    {
+      RETURN_IF_ERROR (demangle_number (dm, &parm_number, 10, 0));
+      ++parm_number;
+    }
+  RETURN_IF_ERROR (demangle_char (dm, '_'));
+
+  arg = template_arg_list_get_arg (current_arg_list, parm_number);
+  if (arg == NULL)
+    /* parm_number exceeded the number of arguments in the current
+       template argument list.  */
+    return "Template parameter number out of bounds.";
+  RETURN_IF_ERROR (result_add_string (dm, (dyn_string_t) arg));
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <template-args>.  
+
+    <template-args> ::= I <template-arg>+ E  */
+
+static status_t
+demangle_template_args_1 (dm, arg_list)
+     demangling_t dm;
+     template_arg_list_t arg_list;
+{
+  int first = 1;
+
+  DEMANGLE_TRACE ("template-args", dm);
+
+  RETURN_IF_ERROR (demangle_char (dm, 'I'));
+  RETURN_IF_ERROR (result_open_template_list (dm));
+  do
+    {
+      string_list_t arg;
+
+      if (first)
+	first = 0;
+      else
+	RETURN_IF_ERROR (result_add (dm, ", "));
+
+      /* Capture the template arg.  */
+      RETURN_IF_ERROR (result_push (dm));
+      RETURN_IF_ERROR (demangle_template_arg (dm));
+      arg = result_pop (dm);
+
+      /* Emit it in the demangled name.  */
+      RETURN_IF_ERROR (result_add_string (dm, (dyn_string_t) arg));
+
+      /* Save it for use in expanding <template-param>s.  */
+      template_arg_list_add_arg (arg_list, arg);
+    }
+  while (peek_char (dm) != 'E');
+  /* Append the '>'.  */
+  RETURN_IF_ERROR (result_close_template_list (dm));
+
+  /* Consume the 'E'.  */
+  advance_char (dm);
+
+  return STATUS_OK;
+}
+
+static status_t
+demangle_template_args (dm)
+     demangling_t dm;
+{
+  int first = 1;
+  dyn_string_t old_last_source_name;
+  dyn_string_t new_name;
+  template_arg_list_t arg_list = template_arg_list_new ();
+  status_t status;
+
+  if (arg_list == NULL)
+    return STATUS_ALLOCATION_FAILED;
+
+  /* Preserve the most recently demangled source name.  */
+  old_last_source_name = dm->last_source_name;
+  new_name = dyn_string_new (0);
+
+  if (new_name == NULL)
+    {
+      template_arg_list_delete (arg_list);
+      return STATUS_ALLOCATION_FAILED;
+    }
+
+  dm->last_source_name = new_name;
+  
+  status = demangle_template_args_1 (dm, arg_list);
+  /* Restore the most recent demangled source name.  */
+  dyn_string_delete (dm->last_source_name);
+  dm->last_source_name = old_last_source_name;
+
+  if (!STATUS_NO_ERROR (status))
+    {
+      template_arg_list_delete (arg_list);
+      return status;
+    }
+
+  /* Push the list onto the top of the stack of template argument
+     lists, so that arguments from it are used from now on when
+     expanding <template-param>s.  */
+  push_template_arg_list (dm, arg_list);
+
+  return STATUS_OK;
+}
+
+/* This function, which does not correspond to a production in the
+   mangling spec, handles the `literal' production for both
+   <template-arg> and <expr-primary>.  It does not expect or consume
+   the initial `L' or final `E'.  The demangling is given by:
+
+     <literal> ::= <type> </value/ number>
+
+   and the emitted output is `(type)number'.  */
+
+static status_t
+demangle_literal (dm)
+     demangling_t dm;
+{
+  char peek = peek_char (dm);
+  dyn_string_t value_string;
+  status_t status;
+
+  DEMANGLE_TRACE ("literal", dm);
+
+  if (!flag_verbose && peek >= 'a' && peek <= 'z')
+    {
+      /* If not in verbose mode and this is a builtin type, see if we
+	 can produce simpler numerical output.  In particular, for
+	 integer types shorter than `long', just write the number
+	 without type information; for bools, write `true' or `false'.
+	 Other refinements could be made here too.  */
+
+      /* This constant string is used to map from <builtin-type> codes
+	 (26 letters of the alphabet) to codes that determine how the 
+	 value will be displayed.  The codes are:
+	   b: display as bool
+	   i: display as int
+           l: display as long
+	 A space means the value will be represented using cast
+	 notation. */
+      static const char *const code_map = "ibi    iii ll     ii  i  ";
+
+      char code = code_map[peek - 'a'];
+      /* FIXME: Implement demangling of floats and doubles.  */
+      if (code == 'u')
+	return STATUS_UNIMPLEMENTED;
+      if (code == 'b')
+	{
+	  /* It's a boolean.  */
+	  char value;
+
+	  /* Consume the b.  */
+	  advance_char (dm);
+	  /* Look at the next character.  It should be 0 or 1,
+	     corresponding to false or true, respectively.  */
+	  value = peek_char (dm);
+	  if (value == '0')
+	    RETURN_IF_ERROR (result_add (dm, "false"));
+	  else if (value == '1')
+	    RETURN_IF_ERROR (result_add (dm, "true"));
+	  else
+	    return "Unrecognized bool constant.";
+	  /* Consume the 0 or 1.  */
+	  advance_char (dm);
+	  return STATUS_OK;
+	}
+      else if (code == 'i' || code == 'l')
+	{
+	  /* It's an integer or long.  */
+
+	  /* Consume the type character.  */
+	  advance_char (dm);
+
+	  /* Demangle the number and write it out.  */
+	  value_string = dyn_string_new (0);
+	  status = demangle_number_literally (dm, value_string, 10, 1);
+	  if (STATUS_NO_ERROR (status))
+	    status = result_add_string (dm, value_string);
+	  /* For long integers, append an l.  */
+	  if (code == 'l' && STATUS_NO_ERROR (status))
+	    status = result_add_char (dm, code);
+	  dyn_string_delete (value_string);
+
+	  RETURN_IF_ERROR (status);
+	  return STATUS_OK;
+	}
+      /* ...else code == ' ', so fall through to represent this
+	 literal's type explicitly using cast syntax.  */
+    }
+
+  RETURN_IF_ERROR (result_add_char (dm, '('));
+  RETURN_IF_ERROR (demangle_type (dm));
+  RETURN_IF_ERROR (result_add_char (dm, ')'));
+
+  value_string = dyn_string_new (0);
+  if (value_string == NULL)
+    return STATUS_ALLOCATION_FAILED;
+
+  status = demangle_number_literally (dm, value_string, 10, 1);
+  if (STATUS_NO_ERROR (status))
+    status = result_add_string (dm, value_string);
+  dyn_string_delete (value_string);
+  RETURN_IF_ERROR (status);
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <template-arg>.  
+
+    <template-arg> ::= <type>                     # type
+                   ::= L <type> <value number> E  # literal
+                   ::= LZ <encoding> E            # external name
+                   ::= X <expression> E           # expression  */
+
+static status_t
+demangle_template_arg (dm)
+     demangling_t dm;
+{
+  DEMANGLE_TRACE ("template-arg", dm);
+
+  switch (peek_char (dm))
+    {
+    case 'L':
+      advance_char (dm);
+
+      if (peek_char (dm) == 'Z')
+	{
+	  /* External name.  */
+	  advance_char (dm);
+	  /* FIXME: Standard is contradictory here.  */
+	  RETURN_IF_ERROR (demangle_encoding (dm));
+	}
+      else
+	RETURN_IF_ERROR (demangle_literal (dm));
+      RETURN_IF_ERROR (demangle_char (dm, 'E'));
+      break;
+
+    case 'X':
+      /* Expression.  */
+      advance_char (dm);
+      RETURN_IF_ERROR (demangle_expression_v3 (dm));
+      RETURN_IF_ERROR (demangle_char (dm, 'E'));
+      break;
+
+    default:
+      RETURN_IF_ERROR (demangle_type (dm));
+      break;
+    }
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits an <expression>.
+
+    <expression> ::= <unary operator-name> <expression>
+		 ::= <binary operator-name> <expression> <expression>
+		 ::= <expr-primary>  
+                 ::= <scope-expression>  */
+
+static status_t
+demangle_expression_v3 (dm)
+     demangling_t dm;
+{
+  char peek = peek_char (dm);
+
+  DEMANGLE_TRACE ("expression", dm);
+
+  if (peek == 'L' || peek == 'T')
+    RETURN_IF_ERROR (demangle_expr_primary (dm));
+  else if (peek == 's' && peek_char_next (dm) == 'r')
+    RETURN_IF_ERROR (demangle_scope_expression (dm));
+  else
+    /* An operator expression.  */
+    {
+      int num_args;
+      status_t status = STATUS_OK;
+      dyn_string_t operator_name;
+
+      /* We have an operator name.  Since we want to output binary
+	 operations in infix notation, capture the operator name
+	 first.  */
+      RETURN_IF_ERROR (result_push (dm));
+      RETURN_IF_ERROR (demangle_operator_name (dm, 1, &num_args));
+      operator_name = (dyn_string_t) result_pop (dm);
+
+      /* If it's binary, do an operand first.  */
+      if (num_args > 1)
+	{
+	  status = result_add_char (dm, '(');
+	  if (STATUS_NO_ERROR (status))
+	    status = demangle_expression_v3 (dm);
+	  if (STATUS_NO_ERROR (status))
+	    status = result_add_char (dm, ')');
+	}
+
+      /* Emit the operator.  */  
+      if (STATUS_NO_ERROR (status))
+	status = result_add_string (dm, operator_name);
+      dyn_string_delete (operator_name);
+      RETURN_IF_ERROR (status);
+      
+      /* Emit its second (if binary) or only (if unary) operand.  */
+      RETURN_IF_ERROR (result_add_char (dm, '('));
+      RETURN_IF_ERROR (demangle_expression_v3 (dm));
+      RETURN_IF_ERROR (result_add_char (dm, ')'));
+
+      /* The ternary operator takes a third operand.  */
+      if (num_args == 3)
+	{
+	  RETURN_IF_ERROR (result_add (dm, ":("));
+	  RETURN_IF_ERROR (demangle_expression_v3 (dm));
+	  RETURN_IF_ERROR (result_add_char (dm, ')'));
+	}
+    }
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <scope-expression>.  
+
+    <scope-expression> ::= sr <qualifying type> <source-name>
+                       ::= sr <qualifying type> <encoding>  */
+
+static status_t
+demangle_scope_expression (dm)
+     demangling_t dm;
+{
+  RETURN_IF_ERROR (demangle_char (dm, 's'));
+  RETURN_IF_ERROR (demangle_char (dm, 'r'));
+  RETURN_IF_ERROR (demangle_type (dm));
+  RETURN_IF_ERROR (result_add (dm, "::"));
+  RETURN_IF_ERROR (demangle_encoding (dm));
+  return STATUS_OK;
+}
+
+/* Demangles and emits an <expr-primary>.  
+
+    <expr-primary> ::= <template-param>
+		   ::= L <type> <value number> E  # literal
+		   ::= L <mangled-name> E         # external name  */
+
+static status_t
+demangle_expr_primary (dm)
+     demangling_t dm;
+{
+  char peek = peek_char (dm);
+
+  DEMANGLE_TRACE ("expr-primary", dm);
+
+  if (peek == 'T')
+    RETURN_IF_ERROR (demangle_template_param (dm));
+  else if (peek == 'L')
+    {
+      /* Consume the `L'.  */
+      advance_char (dm);
+      peek = peek_char (dm);
+
+      if (peek == '_')
+	RETURN_IF_ERROR (demangle_mangled_name (dm));
+      else
+	RETURN_IF_ERROR (demangle_literal (dm));
+
+      RETURN_IF_ERROR (demangle_char (dm, 'E'));
+    }
+  else
+    return STATUS_ERROR;
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <substitution>.  Sets *TEMPLATE_P to non-zero
+   if the substitution is the name of a template, zero otherwise. 
+
+     <substitution> ::= S <seq-id> _
+                    ::= S_
+
+                    ::= St   # ::std::
+                    ::= Sa   # ::std::allocator
+                    ::= Sb   # ::std::basic_string
+                    ::= Ss   # ::std::basic_string<char,
+				    		   ::std::char_traits<char>,
+						   ::std::allocator<char> >
+                    ::= Si   # ::std::basic_istream<char,  
+                                                    std::char_traits<char> >
+                    ::= So   # ::std::basic_ostream<char,  
+                                                    std::char_traits<char> >
+                    ::= Sd   # ::std::basic_iostream<char, 
+                                                    std::char_traits<char> >
+*/
+
+static status_t
+demangle_substitution (dm, template_p)
+     demangling_t dm;
+     int *template_p;
+{
+  int seq_id;
+  int peek;
+  dyn_string_t text;
+
+  DEMANGLE_TRACE ("substitution", dm);
+
+  RETURN_IF_ERROR (demangle_char (dm, 'S'));
+
+  /* Scan the substitution sequence index.  A missing number denotes
+     the first index.  */
+  peek = peek_char (dm);
+  if (peek == '_')
+    seq_id = -1;
+  /* If the following character is 0-9 or a capital letter, interpret
+     the sequence up to the next underscore as a base-36 substitution
+     index.  */
+  else if (IS_DIGIT ((unsigned char) peek) 
+	   || (peek >= 'A' && peek <= 'Z'))
+    RETURN_IF_ERROR (demangle_number (dm, &seq_id, 36, 0));
+  else 
+    {
+      const char *new_last_source_name = NULL;
+
+      switch (peek)
+	{
+	case 't':
+	  RETURN_IF_ERROR (result_add (dm, "std"));
+	  break;
+
+	case 'a':
+	  RETURN_IF_ERROR (result_add (dm, "std::allocator"));
+	  new_last_source_name = "allocator";
+	  *template_p = 1;
+	  break;
+
+	case 'b':
+	  RETURN_IF_ERROR (result_add (dm, "std::basic_string"));
+	  new_last_source_name = "basic_string";
+	  *template_p = 1;
+	  break;
+	  
+	case 's':
+	  if (!flag_verbose)
+	    {
+	      RETURN_IF_ERROR (result_add (dm, "std::string"));
+	      new_last_source_name = "string";
+	    }
+	  else
+	    {
+	      RETURN_IF_ERROR (result_add (dm, "std::basic_string<char, std::char_traits<char>, std::allocator<char> >"));
+	      new_last_source_name = "basic_string";
+	    }
+	  *template_p = 0;
+	  break;
+
+	case 'i':
+	  if (!flag_verbose)
+	    {
+	      RETURN_IF_ERROR (result_add (dm, "std::istream"));
+	      new_last_source_name = "istream";
+	    }
+	  else
+	    {
+	      RETURN_IF_ERROR (result_add (dm, "std::basic_istream<char, std::char_traints<char> >"));
+	      new_last_source_name = "basic_istream";
+	    }
+	  *template_p = 0;
+	  break;
+
+	case 'o':
+	  if (!flag_verbose)
+	    {
+	      RETURN_IF_ERROR (result_add (dm, "std::ostream"));
+	      new_last_source_name = "ostream";
+	    }
+	  else
+	    {
+	      RETURN_IF_ERROR (result_add (dm, "std::basic_ostream<char, std::char_traits<char> >"));
+	      new_last_source_name = "basic_ostream";
+	    }
+	  *template_p = 0;
+	  break;
+
+	case 'd':
+	  if (!flag_verbose) 
+	    {
+	      RETURN_IF_ERROR (result_add (dm, "std::iostream"));
+	      new_last_source_name = "iostream";
+	    }
+	  else
+	    {
+	      RETURN_IF_ERROR (result_add (dm, "std::basic_iostream<char, std::char_traits<char> >"));
+	      new_last_source_name = "basic_iostream";
+	    }
+	  *template_p = 0;
+	  break;
+
+	default:
+	  return "Unrecognized <substitution>.";
+	}
+      
+      /* Consume the character we just processed.  */
+      advance_char (dm);
+
+      if (new_last_source_name != NULL)
+	{
+	  if (!dyn_string_copy_cstr (dm->last_source_name, 
+				     new_last_source_name))
+	    return STATUS_ALLOCATION_FAILED;
+	}
+
+      return STATUS_OK;
+    }
+
+  /* Look up the substitution text.  Since `S_' is the most recent
+     substitution, `S0_' is the second-most-recent, etc., shift the
+     numbering by one.  */
+  text = substitution_get (dm, seq_id + 1, template_p);
+  if (text == NULL) 
+    return "Substitution number out of range.";
+
+  /* Emit the substitution text.  */
+  RETURN_IF_ERROR (result_add_string (dm, text));
+
+  RETURN_IF_ERROR (demangle_char (dm, '_'));
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <local-name>.  
+
+    <local-name> := Z <function encoding> E <entity name> [<discriminator>]
+                 := Z <function encoding> E s [<discriminator>]  */
+
+static status_t
+demangle_local_name (dm)
+     demangling_t dm;
+{
+  DEMANGLE_TRACE ("local-name", dm);
+
+  RETURN_IF_ERROR (demangle_char (dm, 'Z'));
+  RETURN_IF_ERROR (demangle_encoding (dm));
+  RETURN_IF_ERROR (demangle_char (dm, 'E'));
+  RETURN_IF_ERROR (result_add (dm, "::"));
+
+  if (peek_char (dm) == 's')
+    {
+      /* Local character string literal.  */
+      RETURN_IF_ERROR (result_add (dm, "string literal"));
+      /* Consume the s.  */
+      advance_char (dm);
+      RETURN_IF_ERROR (demangle_discriminator (dm, 0));
+    }
+  else
+    {
+      int unused;
+      /* Local name for some other entity.  Demangle its name.  */
+      RETURN_IF_ERROR (demangle_name (dm, &unused));
+      RETURN_IF_ERROR (demangle_discriminator (dm, 1));
+     }
+
+   return STATUS_OK;
+ }
+
+ /* Optimonally demangles and emits a <discriminator>.  If there is no
+    <discriminator> at the current position in the mangled string, the
+    descriminator is assumed to be zero.  Emit the discriminator number
+    in parentheses, unless SUPPRESS_FIRST is non-zero and the
+    discriminator is zero.  
+
+     <discriminator> ::= _ <number>  */
+
+static status_t
+demangle_discriminator (dm, suppress_first)
+     demangling_t dm;
+     int suppress_first;
+{
+  /* Output for <discriminator>s to the demangled name is completely
+     suppressed if not in verbose mode.  */
+
+  if (peek_char (dm) == '_')
+    {
+      /* Consume the underscore.  */
+      advance_char (dm);
+      if (flag_verbose)
+	RETURN_IF_ERROR (result_add (dm, " [#"));
+      /* Check if there's a number following the underscore.  */
+      if (IS_DIGIT ((unsigned char) peek_char (dm)))
+	{
+	  int discriminator;
+	  /* Demangle the number.  */
+	  RETURN_IF_ERROR (demangle_number (dm, &discriminator, 10, 0));
+	  if (flag_verbose)
+	    /* Write the discriminator.  The mangled number is two
+	       less than the discriminator ordinal, counting from
+	       zero.  */
+	    RETURN_IF_ERROR (int_to_dyn_string (discriminator + 1,
+						(dyn_string_t) dm->result));
+	}
+      else
+	return STATUS_ERROR;
+      if (flag_verbose)
+	RETURN_IF_ERROR (result_add_char (dm, ']'));
+    }
+  else if (!suppress_first)
+    {
+      if (flag_verbose)
+	RETURN_IF_ERROR (result_add (dm, " [#0]"));
+    }
+
+  return STATUS_OK;
+}
+
+/* Demangle NAME into RESULT, which must be an initialized
+   dyn_string_t.  On success, returns STATUS_OK.  On failure, returns
+   an error message, and the contents of RESULT are unchanged.  */
+
+static status_t
+cp_demangle (name, result, style)
+     const char *name;
+     dyn_string_t result;
+     int style;
+{
+  status_t status;
+  int length = VG_(strlen) (name);
+
+  if (length > 2 && name[0] == '_' && name[1] == 'Z')
+    {
+      demangling_t dm = demangling_new (name, style);
+      if (dm == NULL)
+	return STATUS_ALLOCATION_FAILED;
+
+      status = result_push (dm);
+      if (status != STATUS_OK)
+	{
+	  demangling_delete (dm);
+	  return status;
+	}
+
+      status = demangle_mangled_name (dm);
+      if (STATUS_NO_ERROR (status))
+	{
+	  dyn_string_t demangled = (dyn_string_t) result_pop (dm);
+	  if (!dyn_string_copy (result, demangled))
+	    {
+	      demangling_delete (dm);
+	      return STATUS_ALLOCATION_FAILED;
+	    }
+	  dyn_string_delete (demangled);
+	}
+      
+      demangling_delete (dm);
+    }
+  else
+    {
+      /* It's evidently not a mangled C++ name.  It could be the name
+	 of something with C linkage, though, so just copy NAME into
+	 RESULT.  */
+      if (!dyn_string_copy_cstr (result, name))
+	return STATUS_ALLOCATION_FAILED;
+      status = STATUS_OK;
+    }
+
+  return status; 
+}
+
+/* Demangle TYPE_NAME into RESULT, which must be an initialized
+   dyn_string_t.  On success, returns STATUS_OK.  On failiure, returns
+   an error message, and the contents of RESULT are unchanged.  */
+
+#ifdef IN_LIBGCC2
+static status_t
+cp_demangle_type (type_name, result)
+     const char* type_name;
+     dyn_string_t result;
+{
+  status_t status;
+  demangling_t dm = demangling_new (type_name);
+  
+  if (dm == NULL)
+    return STATUS_ALLOCATION_FAILED;
+
+  /* Demangle the type name.  The demangled name is stored in dm.  */
+  status = result_push (dm);
+  if (status != STATUS_OK)
+    {
+      demangling_delete (dm);
+      return status;
+    }
+
+  status = demangle_type (dm);
+
+  if (STATUS_NO_ERROR (status))
+    {
+      /* The demangling succeeded.  Pop the result out of dm and copy
+	 it into RESULT.  */
+      dyn_string_t demangled = (dyn_string_t) result_pop (dm);
+      if (!dyn_string_copy (result, demangled))
+	return STATUS_ALLOCATION_FAILED;
+      dyn_string_delete (demangled);
+    }
+
+  /* Clean up.  */
+  demangling_delete (dm);
+
+  return status;
+}
+
+extern char *__cxa_demangle PARAMS ((const char *, char *, size_t *, int *));
+
+/* ia64 ABI-mandated entry point in the C++ runtime library for performing
+   demangling.  MANGLED_NAME is a NUL-terminated character string
+   containing the name to be demangled.  
+
+   OUTPUT_BUFFER is a region of memory, allocated with malloc, of
+   *LENGTH bytes, into which the demangled name is stored.  If
+   OUTPUT_BUFFER is not long enough, it is expanded using realloc.
+   OUTPUT_BUFFER may instead be NULL; in that case, the demangled name
+   is placed in a region of memory allocated with malloc.  
+
+   If LENGTH is non-NULL, the length of the buffer conaining the
+   demangled name, is placed in *LENGTH.  
+
+   The return value is a pointer to the start of the NUL-terminated
+   demangled name, or NULL if the demangling fails.  The caller is
+   responsible for deallocating this memory using free.  
+
+   *STATUS is set to one of the following values:
+      0: The demangling operation succeeded.
+     -1: A memory allocation failiure occurred.
+     -2: MANGLED_NAME is not a valid name under the C++ ABI mangling rules.
+     -3: One of the arguments is invalid.
+
+   The demagling is performed using the C++ ABI mangling rules, with
+   GNU extensions.  */
+
+char *
+__cxa_demangle (mangled_name, output_buffer, length, status)
+     const char *mangled_name;
+     char *output_buffer;
+     size_t *length;
+     int *status;
+{
+  struct dyn_string demangled_name;
+  status_t result;
+
+  if (status == NULL)
+    return NULL;
+
+  if (mangled_name == NULL) {
+    *status = -3;
+    return NULL;
+  }
+
+  /* Did the caller provide a buffer for the demangled name?  */
+  if (output_buffer == NULL) {
+    /* No; dyn_string will malloc a buffer for us.  */
+    if (!dyn_string_init (&demangled_name, 0)) 
+      {
+	*status = -1;
+	return NULL;
+      }
+  }
+  else {
+    /* Yes.  Check that the length was provided.  */
+    if (length == NULL) {
+      *status = -3;
+      return NULL;
+    }
+    /* Install the buffer into a dyn_string.  */
+    demangled_name.allocated = *length;
+    demangled_name.length = 0;
+    demangled_name.s = output_buffer;
+  }
+
+  if (mangled_name[0] == '_' && mangled_name[1] == 'Z')
+    /* MANGLED_NAME apprears to be a function or variable name.
+       Demangle it accordingly.  */
+    result = cp_demangle (mangled_name, &demangled_name, 0);
+  else
+    /* Try to demangled MANGLED_NAME as the name of a type.  */
+    result = cp_demangle_type (mangled_name, &demangled_name);
+
+  if (result == STATUS_OK) 
+    /* The demangling succeeded.  */
+    {
+      /* If LENGTH isn't NULL, store the allocated buffer length
+	 there; the buffer may have been realloced by dyn_string
+	 functions.  */
+      if (length != NULL)
+	*length = demangled_name.allocated;
+      /* The operation was a success.  */
+      *status = 0;
+      return dyn_string_buf (&demangled_name);
+    }
+  else if (result == STATUS_ALLOCATION_FAILED)
+    /* A call to malloc or realloc failed during the demangling
+       operation.  */
+    {
+      *status = -1;
+      return NULL;
+    }
+  else
+    /* The demangling failed for another reason, most probably because
+       MANGLED_NAME isn't a valid mangled name.  */
+    {
+      /* If the buffer containing the demangled name wasn't provided
+	 by the caller, free it.  */
+      if (output_buffer == NULL)
+	free (dyn_string_buf (&demangled_name));
+      *status = -2;
+      return NULL;
+    }
+}
+
+#else /* !IN_LIBGCC2 */
+
+/* Variant entry point for integration with the existing cplus-dem
+   demangler.  Attempts to demangle MANGLED.  If the demangling
+   succeeds, returns a buffer, allocated with malloc, containing the
+   demangled name.  The caller must deallocate the buffer using free.
+   If the demangling failes, returns NULL.  */
+
+char *
+VG_(cplus_demangle_v3) (mangled)
+     const char* mangled;
+{
+  dyn_string_t demangled;
+  status_t status;
+
+  /* If this isn't a mangled name, don't pretend to demangle it.  */
+  if (VG_(strncmp) (mangled, "_Z", 2) != 0)
+    return NULL;
+
+  /* Create a dyn_string to hold the demangled name.  */
+  demangled = dyn_string_new (0);
+  /* Attempt the demangling.  */
+  status = cp_demangle ((char *) mangled, demangled, 0);
+
+  if (STATUS_NO_ERROR (status))
+    /* Demangling succeeded.  */
+    {
+      /* Grab the demangled result from the dyn_string.  It was
+	 allocated with malloc, so we can return it directly.  */
+      char *return_value = dyn_string_release (demangled);
+      /* Hand back the demangled name.  */
+      return return_value;
+    }
+  else if (status == STATUS_ALLOCATION_FAILED)
+    {
+	vg_assert (0);
+	/*
+      fprintf (stderr, "Memory allocation failed.\n");
+      abort ();
+      */
+    }
+  else
+    /* Demangling failed.  */
+    {
+      dyn_string_delete (demangled);
+      return NULL;
+    }
+}
+
+/* Demangle a Java symbol.  Java uses a subset of the V3 ABI C++ mangling 
+   conventions, but the output formatting is a little different.
+   This instructs the C++ demangler not to emit pointer characters ("*"), and 
+   to use Java's namespace separator symbol ("." instead of "::").  It then 
+   does an additional pass over the demangled output to replace instances 
+   of JArray<TYPE> with TYPE[].  */
+
+char *
+VG_(java_demangle_v3) (mangled)
+     const char* mangled;
+{
+  dyn_string_t demangled;
+  char *next;
+  char *end;
+  int len;
+  status_t status;
+  int nesting = 0;
+  char *cplus_demangled;
+  char *return_value;
+    
+  /* Create a dyn_string to hold the demangled name.  */
+  demangled = dyn_string_new (0);
+
+  /* Attempt the demangling.  */
+  status = cp_demangle ((char *) mangled, demangled, DMGL_JAVA);
+
+  if (STATUS_NO_ERROR (status))
+    /* Demangling succeeded.  */
+    {
+      /* Grab the demangled result from the dyn_string. */
+      cplus_demangled = dyn_string_release (demangled);
+    }
+  else if (status == STATUS_ALLOCATION_FAILED)
+    {
+	vg_assert (0);
+	/*
+      fprintf (stderr, "Memory allocation failed.\n");
+      abort ();
+      */
+    }
+  else
+    /* Demangling failed.  */
+    {
+      dyn_string_delete (demangled);
+      return NULL;
+    }
+  
+  len = VG_(strlen) (cplus_demangled);
+  next = cplus_demangled;
+  end = next + len;
+  demangled = NULL;
+
+  /* Replace occurances of JArray<TYPE> with TYPE[]. */
+  while (next < end)
+    {
+      char *open_str = VG_(strstr) (next, "JArray<");
+      char *close_str = NULL;
+      if (nesting > 0)
+	close_str = VG_(strchr) (next, '>');
+    
+      if (open_str != NULL && (close_str == NULL || close_str > open_str))
+        {
+	  ++nesting;
+	  
+	  if (!demangled)
+	    demangled = dyn_string_new(len);
+
+          /* Copy prepending symbols, if any. */
+	  if (open_str > next)
+	    {
+	      open_str[0] = 0;
+	      dyn_string_append_cstr (demangled, next);
+	    }	  
+	  next = open_str + 7;
+	}
+      else if (close_str != NULL)
+        {
+	  --nesting;
+	  
+          /* Copy prepending type symbol, if any. Squash any spurious 
+	     whitespace. */
+	  if (close_str > next && next[0] != ' ')
+	    {
+	      close_str[0] = 0;
+	      dyn_string_append_cstr (demangled, next);
+	    }
+	  dyn_string_append_cstr (demangled, "[]");	  
+	  next = close_str + 1;
+	}
+      else
+        {
+	  /* There are no more arrays. Copy the rest of the symbol, or
+	     simply return the original symbol if no changes were made. */
+	  if (next == cplus_demangled)
+	    return cplus_demangled;
+
+          dyn_string_append_cstr (demangled, next);
+	  next = end;
+	}
+    }
+
+  free (cplus_demangled);
+  
+  return_value = dyn_string_release (demangled);
+  return return_value;
+}
+
+#endif /* IN_LIBGCC2 */
+
+
+/* Demangle NAME in the G++ V3 ABI demangling style, and return either
+   zero, indicating that some error occurred, or a demangling_t
+   holding the results.  */
+static demangling_t
+demangle_v3_with_details (name)
+     const char *name;
+{
+  demangling_t dm;
+  status_t status;
+
+  if (VG_(strncmp) (name, "_Z", 2))
+    return 0;
+
+  dm = demangling_new (name, DMGL_GNU_V3);
+  if (dm == NULL)
+    {
+	vg_assert (0);
+	/*
+      fprintf (stderr, "Memory allocation failed.\n");
+      abort ();
+      */
+    }
+
+  status = result_push (dm);
+  if (! STATUS_NO_ERROR (status))
+    {
+      demangling_delete (dm);
+      vg_assert (0);
+      /*
+      fprintf (stderr, "%s\n", status);
+      abort ();
+      */
+    }
+
+  status = demangle_mangled_name (dm);
+  if (STATUS_NO_ERROR (status))
+    return dm;
+
+  demangling_delete (dm);
+  return 0;
+}
+
+
+/* Return non-zero iff NAME is the mangled form of a constructor name
+   in the G++ V3 ABI demangling style.  Specifically, return:
+   - '1' if NAME is a complete object constructor,
+   - '2' if NAME is a base object constructor, or
+   - '3' if NAME is a complete object allocating constructor.  */
+/*
+enum gnu_v3_ctor_kinds
+is_gnu_v3_mangled_ctor (name)
+     const char *name;
+{
+  demangling_t dm = demangle_v3_with_details (name);
+
+  if (dm)
+    {
+      enum gnu_v3_ctor_kinds result = dm->is_constructor;
+      demangling_delete (dm);
+      return result;
+    }
+  else
+    return 0;
+}
+*/
+
+
+/* Return non-zero iff NAME is the mangled form of a destructor name
+   in the G++ V3 ABI demangling style.  Specifically, return:
+   - '0' if NAME is a deleting destructor,
+   - '1' if NAME is a complete object destructor, or
+   - '2' if NAME is a base object destructor.  */
+/*
+enum gnu_v3_dtor_kinds
+is_gnu_v3_mangled_dtor (name)
+     const char *name;
+{
+  demangling_t dm = demangle_v3_with_details (name);
+
+  if (dm)
+    {
+      enum gnu_v3_dtor_kinds result = dm->is_destructor;
+      demangling_delete (dm);
+      return result;
+    }
+  else
+    return 0;
+}
+*/
+
+#ifdef STANDALONE_DEMANGLER
+
+#include "getopt.h"
+
+static void print_usage
+  PARAMS ((FILE* fp, int exit_value));
+
+/* Non-zero if CHAR is a character than can occur in a mangled name.  */
+#define is_mangled_char(CHAR)                                           \
+  (IS_ALPHA (CHAR) || IS_DIGIT (CHAR)                                   \
+   || (CHAR) == '_' || (CHAR) == '.' || (CHAR) == '$')
+
+/* The name of this program, as invoked.  */
+const char* program_name;
+
+/* Prints usage summary to FP and then exits with EXIT_VALUE.  */
+
+static void
+print_usage (fp, exit_value)
+     FILE* fp;
+     int exit_value;
+{
+  fprintf (fp, "Usage: %s [options] [names ...]\n", program_name);
+  fprintf (fp, "Options:\n");
+  fprintf (fp, "  -h,--help       Display this message.\n");
+  fprintf (fp, "  -s,--strict     Demangle standard names only.\n");
+  fprintf (fp, "  -v,--verbose    Produce verbose demanglings.\n");
+  fprintf (fp, "If names are provided, they are demangled.  Otherwise filters standard input.\n");
+
+  exit (exit_value);
+}
+
+/* Option specification for getopt_long.  */
+static const struct option long_options[] = 
+{
+  { "help",    no_argument, NULL, 'h' },
+  { "strict",  no_argument, NULL, 's' },
+  { "verbose", no_argument, NULL, 'v' },
+  { NULL,      no_argument, NULL, 0   },
+};
+
+/* Main entry for a demangling filter executable.  It will demangle
+   its command line arguments, if any.  If none are provided, it will
+   filter stdin to stdout, replacing any recognized mangled C++ names
+   with their demangled equivalents.  */
+
+int
+main (argc, argv)
+     int argc;
+     char *argv[];
+{
+  status_t status;
+  int i;
+  int opt_char;
+
+  /* Use the program name of this program, as invoked.  */
+  program_name = argv[0];
+
+  /* Parse options.  */
+  do 
+    {
+      opt_char = getopt_long (argc, argv, "hsv", long_options, NULL);
+      switch (opt_char)
+	{
+	case '?':  /* Unrecognized option.  */
+	  print_usage (stderr, 1);
+	  break;
+
+	case 'h':
+	  print_usage (stdout, 0);
+	  break;
+
+	case 's':
+	  flag_strict = 1;
+	  break;
+
+	case 'v':
+	  flag_verbose = 1;
+	  break;
+	}
+    }
+  while (opt_char != -1);
+
+  if (optind == argc) 
+    /* No command line arguments were provided.  Filter stdin.  */
+    {
+      dyn_string_t mangled = dyn_string_new (3);
+      dyn_string_t demangled = dyn_string_new (0);
+      status_t status;
+
+      /* Read all of input.  */
+      while (!feof (stdin))
+	{
+	  char c = getchar ();
+
+	  /* The first character of a mangled name is an underscore.  */
+	  if (feof (stdin))
+	    break;
+	  if (c != '_')
+	    {
+	      /* It's not a mangled name.  Print the character and go
+		 on.  */
+	      putchar (c);
+	      continue;
+	    }
+	  c = getchar ();
+	  
+	  /* The second character of a mangled name is a capital `Z'.  */
+	  if (feof (stdin))
+	    break;
+	  if (c != 'Z')
+	    {
+	      /* It's not a mangled name.  Print the previous
+		 underscore, the `Z', and go on.  */
+	      putchar ('_');
+	      putchar (c);
+	      continue;
+	    }
+
+	  /* Start keeping track of the candidate mangled name.  */
+	  dyn_string_append_char (mangled, '_');
+	  dyn_string_append_char (mangled, 'Z');
+
+	  /* Pile characters into mangled until we hit one that can't
+	     occur in a mangled name.  */
+	  c = getchar ();
+	  while (!feof (stdin) && is_mangled_char (c))
+	    {
+	      dyn_string_append_char (mangled, c);
+	      if (feof (stdin))
+		break;
+	      c = getchar ();
+	    }
+
+	  /* Attempt to demangle the name.  */
+	  status = cp_demangle (dyn_string_buf (mangled), demangled, 0);
+
+	  /* If the demangling succeeded, great!  Print out the
+	     demangled version.  */
+	  if (STATUS_NO_ERROR (status))
+	    fputs (dyn_string_buf (demangled), stdout);
+	  /* Abort on allocation failures.  */
+	  else if (status == STATUS_ALLOCATION_FAILED)
+	    {
+	      fprintf (stderr, "Memory allocation failed.\n");
+	      abort ();
+	    }
+	  /* Otherwise, it might not have been a mangled name.  Just
+	     print out the original text.  */
+	  else
+	    fputs (dyn_string_buf (mangled), stdout);
+
+	  /* If we haven't hit EOF yet, we've read one character that
+	     can't occur in a mangled name, so print it out.  */
+	  if (!feof (stdin))
+	    putchar (c);
+
+	  /* Clear the candidate mangled name, to start afresh next
+	     time we hit a `_Z'.  */
+	  dyn_string_clear (mangled);
+	}
+
+      dyn_string_delete (mangled);
+      dyn_string_delete (demangled);
+    }
+  else
+    /* Demangle command line arguments.  */
+    {
+      dyn_string_t result = dyn_string_new (0);
+
+      /* Loop over command line arguments.  */
+      for (i = optind; i < argc; ++i)
+	{
+	  /* Attempt to demangle.  */
+	  status = cp_demangle (argv[i], result, 0);
+
+	  /* If it worked, print the demangled name.  */
+	  if (STATUS_NO_ERROR (status))
+	    printf ("%s\n", dyn_string_buf (result));
+	  /* Abort on allocaiton failures.  */
+	  else if (status == STATUS_ALLOCATION_FAILED)
+	    {
+	      fprintf (stderr, "Memory allocation failed.\n");
+	      abort ();
+	    }
+	  /* If not, print the error message to stderr instead.  */
+	  else 
+	    fprintf (stderr, "%s\n", status);
+	}
+      dyn_string_delete (result);
+    }
+
+  return 0;
+}
+
+#endif /* STANDALONE_DEMANGLER */
diff --git a/coregrind/demangle/cplus-dem.c b/coregrind/demangle/cplus-dem.c
new file mode 100644
index 000000000..56c326139
--- /dev/null
+++ b/coregrind/demangle/cplus-dem.c
@@ -0,0 +1,5264 @@
+/* Demangler for GNU C++
+   Copyright 1989, 1991, 1994, 1995, 1996, 1997, 1998, 1999,
+   2000, 2001 Free Software Foundation, Inc.
+   Written by James Clark (jjc@jclark.uucp)
+   Rewritten by Fred Fish (fnf@cygnus.com) for ARM and Lucid demangling
+   Modified by Satish Pai (pai@apollo.hp.com) for HP demangling
+
+This file is part of the libiberty library.
+Libiberty is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public
+License as published by the Free Software Foundation; either
+version 2 of the License, or (at your option) any later version.
+
+Libiberty is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with libiberty; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* This file exports two functions; cplus_mangle_opname and cplus_demangle.
+
+   This file imports xmalloc and xrealloc, which are like malloc and
+   realloc except that they generate a fatal error if there is no
+   available memory.  */
+
+/* This file lives in both GCC and libiberty.  When making changes, please
+   try not to break either.  */
+
+#define __NO_STRING_INLINES
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "safe-ctype.h"
+#include "vg_include.h"
+
+#include <sys/types.h>
+#include <string.h>
+#include <stdio.h>
+
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#else
+char * malloc ();
+char * realloc ();
+#endif
+
+#include <demangle.h>
+#include "dyn-string.h"
+#undef CURRENT_DEMANGLING_STYLE
+#define CURRENT_DEMANGLING_STYLE work->options
+
+/*#include "libiberty.h"*/
+
+static char *ada_demangle  PARAMS ((const char *, int));
+
+#define min(X,Y) (((X) < (Y)) ? (X) : (Y))
+
+/* A value at least one greater than the maximum number of characters
+   that will be output when using the `%d' format with `printf'.  */
+#define INTBUF_SIZE 32
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
+#endif
+
+#ifndef STANDALONE
+#define xstrdup(ptr) VG_(strdup)(VG_AR_DEMANGLE, ptr)
+#define free(ptr) VG_(free)(VG_AR_DEMANGLE, ptr)
+#define xmalloc(size) VG_(malloc)(VG_AR_DEMANGLE, size)
+#define xrealloc(ptr, size) VG_(realloc)(VG_AR_DEMANGLE, ptr, size)
+#define abort() vg_assert(0)
+#undef strstr
+#define strstr VG_(strstr)
+#define sprintf VG_(sprintf)
+#define strncpy VG_(strncpy)
+#define strncat VG_(strncat)
+#define strchr VG_(strchr)
+#define strpbrk VG_(strpbrk)
+#endif
+
+extern void fancy_abort PARAMS ((void)) ATTRIBUTE_NORETURN;
+
+/* In order to allow a single demangler executable to demangle strings
+   using various common values of CPLUS_MARKER, as well as any specific
+   one set at compile time, we maintain a string containing all the
+   commonly used ones, and check to see if the marker we are looking for
+   is in that string.  CPLUS_MARKER is usually '$' on systems where the
+   assembler can deal with that.  Where the assembler can't, it's usually
+   '.' (but on many systems '.' is used for other things).  We put the
+   current defined CPLUS_MARKER first (which defaults to '$'), followed
+   by the next most common value, followed by an explicit '$' in case
+   the value of CPLUS_MARKER is not '$'.
+
+   We could avoid this if we could just get g++ to tell us what the actual
+   cplus marker character is as part of the debug information, perhaps by
+   ensuring that it is the character that terminates the gcc<n>_compiled
+   marker symbol (FIXME).  */
+
+#if !defined (CPLUS_MARKER)
+#define CPLUS_MARKER '$'
+#endif
+
+enum demangling_styles current_demangling_style = auto_demangling;
+
+static char cplus_markers[] = { CPLUS_MARKER, '.', '$', '\0' };
+
+static char char_str[2] = { '\000', '\000' };
+
+/*
+void
+set_cplus_marker_for_demangling (ch)
+     int ch;
+{
+  cplus_markers[0] = ch;
+}
+*/
+
+typedef struct string		/* Beware: these aren't required to be */
+{				/*  '\0' terminated.  */
+  char *b;			/* pointer to start of string */
+  char *p;			/* pointer after last character */
+  char *e;			/* pointer after end of allocated space */
+} string;
+
+/* Stuff that is shared between sub-routines.
+   Using a shared structure allows cplus_demangle to be reentrant.  */
+
+struct work_stuff
+{
+  int options;
+  char **typevec;
+  char **ktypevec;
+  char **btypevec;
+  int numk;
+  int numb;
+  int ksize;
+  int bsize;
+  int ntypes;
+  int typevec_size;
+  int constructor;
+  int destructor;
+  int static_type;	/* A static member function */
+  int temp_start;       /* index in demangled to start of template args */
+  int type_quals;       /* The type qualifiers.  */
+  int dllimported;	/* Symbol imported from a PE DLL */
+  char **tmpl_argvec;   /* Template function arguments. */
+  int ntmpl_args;       /* The number of template function arguments. */
+  int forgetting_types; /* Nonzero if we are not remembering the types
+			   we see.  */
+  string* previous_argument; /* The last function argument demangled.  */
+  int nrepeats;         /* The number of times to repeat the previous
+			   argument.  */
+};
+
+#define PRINT_ANSI_QUALIFIERS (work -> options & DMGL_ANSI)
+#define PRINT_ARG_TYPES       (work -> options & DMGL_PARAMS)
+
+static const struct optable
+{
+  const char *const in;
+  const char *const out;
+  const int flags;
+} optable[] = {
+  {"nw",	  " new",	DMGL_ANSI},	/* new (1.92,	 ansi) */
+  {"dl",	  " delete",	DMGL_ANSI},	/* new (1.92,	 ansi) */
+  {"new",	  " new",	0},		/* old (1.91,	 and 1.x) */
+  {"delete",	  " delete",	0},		/* old (1.91,	 and 1.x) */
+  {"vn",	  " new []",	DMGL_ANSI},	/* GNU, pending ansi */
+  {"vd",	  " delete []",	DMGL_ANSI},	/* GNU, pending ansi */
+  {"as",	  "=",		DMGL_ANSI},	/* ansi */
+  {"ne",	  "!=",		DMGL_ANSI},	/* old, ansi */
+  {"eq",	  "==",		DMGL_ANSI},	/* old,	ansi */
+  {"ge",	  ">=",		DMGL_ANSI},	/* old,	ansi */
+  {"gt",	  ">",		DMGL_ANSI},	/* old,	ansi */
+  {"le",	  "<=",		DMGL_ANSI},	/* old,	ansi */
+  {"lt",	  "<",		DMGL_ANSI},	/* old,	ansi */
+  {"plus",	  "+",		0},		/* old */
+  {"pl",	  "+",		DMGL_ANSI},	/* ansi */
+  {"apl",	  "+=",		DMGL_ANSI},	/* ansi */
+  {"minus",	  "-",		0},		/* old */
+  {"mi",	  "-",		DMGL_ANSI},	/* ansi */
+  {"ami",	  "-=",		DMGL_ANSI},	/* ansi */
+  {"mult",	  "*",		0},		/* old */
+  {"ml",	  "*",		DMGL_ANSI},	/* ansi */
+  {"amu",	  "*=",		DMGL_ANSI},	/* ansi (ARM/Lucid) */
+  {"aml",	  "*=",		DMGL_ANSI},	/* ansi (GNU/g++) */
+  {"convert",	  "+",		0},		/* old (unary +) */
+  {"negate",	  "-",		0},		/* old (unary -) */
+  {"trunc_mod",	  "%",		0},		/* old */
+  {"md",	  "%",		DMGL_ANSI},	/* ansi */
+  {"amd",	  "%=",		DMGL_ANSI},	/* ansi */
+  {"trunc_div",	  "/",		0},		/* old */
+  {"dv",	  "/",		DMGL_ANSI},	/* ansi */
+  {"adv",	  "/=",		DMGL_ANSI},	/* ansi */
+  {"truth_andif", "&&",		0},		/* old */
+  {"aa",	  "&&",		DMGL_ANSI},	/* ansi */
+  {"truth_orif",  "||",		0},		/* old */
+  {"oo",	  "||",		DMGL_ANSI},	/* ansi */
+  {"truth_not",	  "!",		0},		/* old */
+  {"nt",	  "!",		DMGL_ANSI},	/* ansi */
+  {"postincrement","++",	0},		/* old */
+  {"pp",	  "++",		DMGL_ANSI},	/* ansi */
+  {"postdecrement","--",	0},		/* old */
+  {"mm",	  "--",		DMGL_ANSI},	/* ansi */
+  {"bit_ior",	  "|",		0},		/* old */
+  {"or",	  "|",		DMGL_ANSI},	/* ansi */
+  {"aor",	  "|=",		DMGL_ANSI},	/* ansi */
+  {"bit_xor",	  "^",		0},		/* old */
+  {"er",	  "^",		DMGL_ANSI},	/* ansi */
+  {"aer",	  "^=",		DMGL_ANSI},	/* ansi */
+  {"bit_and",	  "&",		0},		/* old */
+  {"ad",	  "&",		DMGL_ANSI},	/* ansi */
+  {"aad",	  "&=",		DMGL_ANSI},	/* ansi */
+  {"bit_not",	  "~",		0},		/* old */
+  {"co",	  "~",		DMGL_ANSI},	/* ansi */
+  {"call",	  "()",		0},		/* old */
+  {"cl",	  "()",		DMGL_ANSI},	/* ansi */
+  {"alshift",	  "<<",		0},		/* old */
+  {"ls",	  "<<",		DMGL_ANSI},	/* ansi */
+  {"als",	  "<<=",	DMGL_ANSI},	/* ansi */
+  {"arshift",	  ">>",		0},		/* old */
+  {"rs",	  ">>",		DMGL_ANSI},	/* ansi */
+  {"ars",	  ">>=",	DMGL_ANSI},	/* ansi */
+  {"component",	  "->",		0},		/* old */
+  {"pt",	  "->",		DMGL_ANSI},	/* ansi; Lucid C++ form */
+  {"rf",	  "->",		DMGL_ANSI},	/* ansi; ARM/GNU form */
+  {"indirect",	  "*",		0},		/* old */
+  {"method_call",  "->()",	0},		/* old */
+  {"addr",	  "&",		0},		/* old (unary &) */
+  {"array",	  "[]",		0},		/* old */
+  {"vc",	  "[]",		DMGL_ANSI},	/* ansi */
+  {"compound",	  ", ",		0},		/* old */
+  {"cm",	  ", ",		DMGL_ANSI},	/* ansi */
+  {"cond",	  "?:",		0},		/* old */
+  {"cn",	  "?:",		DMGL_ANSI},	/* pseudo-ansi */
+  {"max",	  ">?",		0},		/* old */
+  {"mx",	  ">?",		DMGL_ANSI},	/* pseudo-ansi */
+  {"min",	  "<?",		0},		/* old */
+  {"mn",	  "<?",		DMGL_ANSI},	/* pseudo-ansi */
+  {"nop",	  "",		0},		/* old (for operator=) */
+  {"rm",	  "->*",	DMGL_ANSI},	/* ansi */
+  {"sz",          "sizeof ",    DMGL_ANSI}      /* pseudo-ansi */
+};
+
+/* These values are used to indicate the various type varieties.
+   They are all non-zero so that they can be used as `success'
+   values.  */
+typedef enum type_kind_t
+{
+  tk_none,
+  tk_pointer,
+  tk_reference,
+  tk_integral,
+  tk_bool,
+  tk_char,
+  tk_real
+} type_kind_t;
+
+const struct demangler_engine libiberty_demanglers[] =
+{
+  {
+    NO_DEMANGLING_STYLE_STRING,
+    no_demangling,
+    "Demangling disabled"
+  }
+  ,
+  {
+    AUTO_DEMANGLING_STYLE_STRING,
+      auto_demangling,
+      "Automatic selection based on executable"
+  }
+  ,
+  {
+    GNU_DEMANGLING_STYLE_STRING,
+      gnu_demangling,
+      "GNU (g++) style demangling"
+  }
+  ,
+  {
+    LUCID_DEMANGLING_STYLE_STRING,
+      lucid_demangling,
+      "Lucid (lcc) style demangling"
+  }
+  ,
+  {
+    ARM_DEMANGLING_STYLE_STRING,
+      arm_demangling,
+      "ARM style demangling"
+  }
+  ,
+  {
+    HP_DEMANGLING_STYLE_STRING,
+      hp_demangling,
+      "HP (aCC) style demangling"
+  }
+  ,
+  {
+    EDG_DEMANGLING_STYLE_STRING,
+      edg_demangling,
+      "EDG style demangling"
+  }
+  ,
+  {
+    GNU_V3_DEMANGLING_STYLE_STRING,
+    gnu_v3_demangling,
+    "GNU (g++) V3 ABI-style demangling"
+  }
+  ,
+  {
+    JAVA_DEMANGLING_STYLE_STRING,
+    java_demangling,
+    "Java style demangling"
+  }
+  ,
+  {
+    GNAT_DEMANGLING_STYLE_STRING,
+    gnat_demangling,
+    "GNAT style demangling"
+  }
+  ,
+  {
+    NULL, unknown_demangling, NULL
+  }
+};
+
+#define STRING_EMPTY(str)	((str) -> b == (str) -> p)
+#define PREPEND_BLANK(str)	{if (!STRING_EMPTY(str)) \
+    string_prepend(str, " ");}
+#define APPEND_BLANK(str)	{if (!STRING_EMPTY(str)) \
+    string_append(str, " ");}
+#define LEN_STRING(str)         ( (STRING_EMPTY(str))?0:((str)->p - (str)->b))
+
+/* The scope separator appropriate for the language being demangled.  */
+
+#define SCOPE_STRING(work) ((work->options & DMGL_JAVA) ? "." : "::")
+
+#define ARM_VTABLE_STRING "__vtbl__"	/* Lucid/ARM virtual table prefix */
+#define ARM_VTABLE_STRLEN 8		/* strlen (ARM_VTABLE_STRING) */
+
+/* Prototypes for local functions */
+
+static void
+delete_work_stuff PARAMS ((struct work_stuff *));
+
+static void
+delete_non_B_K_work_stuff PARAMS ((struct work_stuff *));
+
+static char *
+mop_up PARAMS ((struct work_stuff *, string *, int));
+
+static void
+squangle_mop_up PARAMS ((struct work_stuff *));
+
+static void
+work_stuff_copy_to_from PARAMS ((struct work_stuff *, struct work_stuff *));
+
+#if 0
+static int
+demangle_method_args PARAMS ((struct work_stuff *, const char **, string *));
+#endif
+
+static char *
+internal_cplus_demangle PARAMS ((struct work_stuff *, const char *));
+
+static int
+demangle_template_template_parm PARAMS ((struct work_stuff *work,
+					 const char **, string *));
+
+static int
+demangle_template PARAMS ((struct work_stuff *work, const char **, string *,
+			   string *, int, int));
+
+static int
+arm_pt PARAMS ((struct work_stuff *, const char *, int, const char **,
+		const char **));
+
+static int
+demangle_class_name PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+demangle_qualified PARAMS ((struct work_stuff *, const char **, string *,
+			    int, int));
+
+static int
+demangle_class PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+demangle_fund_type PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+demangle_signature PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+demangle_prefix PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+gnu_special PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+arm_special PARAMS ((const char **, string *));
+
+static void
+string_need PARAMS ((string *, int));
+
+static void
+string_delete PARAMS ((string *));
+
+static void
+string_init PARAMS ((string *));
+
+static void
+string_clear PARAMS ((string *));
+
+#if 0
+static int
+string_empty PARAMS ((string *));
+#endif
+
+static void
+string_append PARAMS ((string *, const char *));
+
+static void
+string_appends PARAMS ((string *, string *));
+
+static void
+string_appendn PARAMS ((string *, const char *, int));
+
+static void
+string_prepend PARAMS ((string *, const char *));
+
+static void
+string_prependn PARAMS ((string *, const char *, int));
+
+static void
+string_append_template_idx PARAMS ((string *, int));
+
+static int
+get_count PARAMS ((const char **, int *));
+
+static int
+consume_count PARAMS ((const char **));
+
+static int
+consume_count_with_underscores PARAMS ((const char**));
+
+static int
+demangle_args PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+demangle_nested_args PARAMS ((struct work_stuff*, const char**, string*));
+
+static int
+do_type PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+do_arg PARAMS ((struct work_stuff *, const char **, string *));
+
+static void
+demangle_function_name PARAMS ((struct work_stuff *, const char **, string *,
+				const char *));
+
+static int
+iterate_demangle_function PARAMS ((struct work_stuff *,
+				   const char **, string *, const char *));
+
+static void
+remember_type PARAMS ((struct work_stuff *, const char *, int));
+
+static void
+remember_Btype PARAMS ((struct work_stuff *, const char *, int, int));
+
+static int
+register_Btype PARAMS ((struct work_stuff *));
+
+static void
+remember_Ktype PARAMS ((struct work_stuff *, const char *, int));
+
+static void
+forget_types PARAMS ((struct work_stuff *));
+
+static void
+forget_B_and_K_types PARAMS ((struct work_stuff *));
+
+static void
+string_prepends PARAMS ((string *, string *));
+
+static int
+demangle_template_value_parm PARAMS ((struct work_stuff*, const char**,
+				      string*, type_kind_t));
+
+static int
+do_hpacc_template_const_value PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+do_hpacc_template_literal PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+snarf_numeric_literal PARAMS ((const char **, string *));
+
+/* There is a TYPE_QUAL value for each type qualifier.  They can be
+   combined by bitwise-or to form the complete set of qualifiers for a
+   type.  */
+
+#define TYPE_UNQUALIFIED   0x0
+#define TYPE_QUAL_CONST    0x1
+#define TYPE_QUAL_VOLATILE 0x2
+#define TYPE_QUAL_RESTRICT 0x4
+
+static int
+code_for_qualifier PARAMS ((int));
+
+static const char*
+qualifier_string PARAMS ((int));
+
+static const char*
+demangle_qualifier PARAMS ((int));
+
+static int
+demangle_expression PARAMS ((struct work_stuff *, const char **, string *, 
+			     type_kind_t));
+
+static int
+demangle_integral_value PARAMS ((struct work_stuff *, const char **,
+				 string *));
+
+static int
+demangle_real_value PARAMS ((struct work_stuff *, const char **, string *));
+
+static void
+demangle_arm_hp_template PARAMS ((struct work_stuff *, const char **, int,
+				  string *));
+
+static void
+recursively_demangle PARAMS ((struct work_stuff *, const char **, string *,
+			      int));
+
+static void
+grow_vect PARAMS ((void **, size_t *, size_t, int));
+
+/* Translate count to integer, consuming tokens in the process.
+   Conversion terminates on the first non-digit character.
+
+   Trying to consume something that isn't a count results in no
+   consumption of input and a return of -1.
+
+   Overflow consumes the rest of the digits, and returns -1.  */
+
+static int
+consume_count (type)
+     const char **type;
+{
+  int count = 0;
+
+  if (! ISDIGIT ((unsigned char)**type))
+    return -1;
+
+  while (ISDIGIT ((unsigned char)**type))
+    {
+      count *= 10;
+
+      /* Check for overflow.
+	 We assume that count is represented using two's-complement;
+	 no power of two is divisible by ten, so if an overflow occurs
+	 when multiplying by ten, the result will not be a multiple of
+	 ten.  */
+      if ((count % 10) != 0)
+	{
+	  while (ISDIGIT ((unsigned char) **type))
+	    (*type)++;
+	  return -1;
+	}
+
+      count += **type - '0';
+      (*type)++;
+    }
+
+  if (count < 0)
+    count = -1;
+
+  return (count);
+}
+
+
+/* Like consume_count, but for counts that are preceded and followed
+   by '_' if they are greater than 10.  Also, -1 is returned for
+   failure, since 0 can be a valid value.  */
+
+static int
+consume_count_with_underscores (mangled)
+     const char **mangled;
+{
+  int idx;
+
+  if (**mangled == '_')
+    {
+      (*mangled)++;
+      if (!ISDIGIT ((unsigned char)**mangled))
+	return -1;
+
+      idx = consume_count (mangled);
+      if (**mangled != '_')
+	/* The trailing underscore was missing. */
+	return -1;
+
+      (*mangled)++;
+    }
+  else
+    {
+      if (**mangled < '0' || **mangled > '9')
+	return -1;
+
+      idx = **mangled - '0';
+      (*mangled)++;
+    }
+
+  return idx;
+}
+
+/* C is the code for a type-qualifier.  Return the TYPE_QUAL
+   corresponding to this qualifier.  */
+
+static int
+code_for_qualifier (c)
+  int c;
+{
+  switch (c)
+    {
+    case 'C':
+      return TYPE_QUAL_CONST;
+
+    case 'V':
+      return TYPE_QUAL_VOLATILE;
+
+    case 'u':
+      return TYPE_QUAL_RESTRICT;
+
+    default:
+      break;
+    }
+
+  /* C was an invalid qualifier.  */
+  abort ();
+}
+
+/* Return the string corresponding to the qualifiers given by
+   TYPE_QUALS.  */
+
+static const char*
+qualifier_string (type_quals)
+     int type_quals;
+{
+  switch (type_quals)
+    {
+    case TYPE_UNQUALIFIED:
+      return "";
+
+    case TYPE_QUAL_CONST:
+      return "const";
+
+    case TYPE_QUAL_VOLATILE:
+      return "volatile";
+
+    case TYPE_QUAL_RESTRICT:
+      return "__restrict";
+
+    case TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE:
+      return "const volatile";
+
+    case TYPE_QUAL_CONST | TYPE_QUAL_RESTRICT:
+      return "const __restrict";
+
+    case TYPE_QUAL_VOLATILE | TYPE_QUAL_RESTRICT:
+      return "volatile __restrict";
+
+    case TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE | TYPE_QUAL_RESTRICT:
+      return "const volatile __restrict";
+
+    default:
+      break;
+    }
+
+  /* TYPE_QUALS was an invalid qualifier set.  */
+  abort ();
+}
+
+/* C is the code for a type-qualifier.  Return the string
+   corresponding to this qualifier.  This function should only be
+   called with a valid qualifier code.  */
+
+static const char*
+demangle_qualifier (c)
+  int c;
+{
+  return qualifier_string (code_for_qualifier (c));
+}
+
+#if 0
+int
+cplus_demangle_opname (opname, result, options)
+     const char *opname;
+     char *result;
+     int options;
+{
+  int len, len1, ret;
+  string type;
+  struct work_stuff work[1];
+  const char *tem;
+
+  len = strlen(opname);
+  result[0] = '\0';
+  ret = 0;
+  memset ((char *) work, 0, sizeof (work));
+  work->options = options;
+
+  if (opname[0] == '_' && opname[1] == '_'
+      && opname[2] == 'o' && opname[3] == 'p')
+    {
+      /* ANSI.  */
+      /* type conversion operator.  */
+      tem = opname + 4;
+      if (do_type (work, &tem, &type))
+	{
+	  strcat (result, "operator ");
+	  strncat (result, type.b, type.p - type.b);
+	  string_delete (&type);
+	  ret = 1;
+	}
+    }
+  else if (opname[0] == '_' && opname[1] == '_'
+	   && ISLOWER((unsigned char)opname[2])
+	   && ISLOWER((unsigned char)opname[3]))
+    {
+      if (opname[4] == '\0')
+	{
+	  /* Operator.  */
+	  size_t i;
+	  for (i = 0; i < ARRAY_SIZE (optable); i++)
+	    {
+	      if (strlen (optable[i].in) == 2
+		  && memcmp (optable[i].in, opname + 2, 2) == 0)
+		{
+		  strcat (result, "operator");
+		  strcat (result, optable[i].out);
+		  ret = 1;
+		  break;
+		}
+	    }
+	}
+      else
+	{
+	  if (opname[2] == 'a' && opname[5] == '\0')
+	    {
+	      /* Assignment.  */
+	      size_t i;
+	      for (i = 0; i < ARRAY_SIZE (optable); i++)
+		{
+		  if (strlen (optable[i].in) == 3
+		      && memcmp (optable[i].in, opname + 2, 3) == 0)
+		    {
+		      strcat (result, "operator");
+		      strcat (result, optable[i].out);
+		      ret = 1;
+		      break;
+		    }
+		}
+	    }
+	}
+    }
+  else if (len >= 3
+	   && opname[0] == 'o'
+	   && opname[1] == 'p'
+	   && strchr (cplus_markers, opname[2]) != NULL)
+    {
+      /* see if it's an assignment expression */
+      if (len >= 10 /* op$assign_ */
+	  && memcmp (opname + 3, "assign_", 7) == 0)
+	{
+	  size_t i;
+	  for (i = 0; i < ARRAY_SIZE (optable); i++)
+	    {
+	      len1 = len - 10;
+	      if ((int) strlen (optable[i].in) == len1
+		  && memcmp (optable[i].in, opname + 10, len1) == 0)
+		{
+		  strcat (result, "operator");
+		  strcat (result, optable[i].out);
+		  strcat (result, "=");
+		  ret = 1;
+		  break;
+		}
+	    }
+	}
+      else
+	{
+	  size_t i;
+	  for (i = 0; i < ARRAY_SIZE (optable); i++)
+	    {
+	      len1 = len - 3;
+	      if ((int) strlen (optable[i].in) == len1
+		  && memcmp (optable[i].in, opname + 3, len1) == 0)
+		{
+		  strcat (result, "operator");
+		  strcat (result, optable[i].out);
+		  ret = 1;
+		  break;
+		}
+	    }
+	}
+    }
+  else if (len >= 5 && memcmp (opname, "type", 4) == 0
+	   && strchr (cplus_markers, opname[4]) != NULL)
+    {
+      /* type conversion operator */
+      tem = opname + 5;
+      if (do_type (work, &tem, &type))
+	{
+	  strcat (result, "operator ");
+	  strncat (result, type.b, type.p - type.b);
+	  string_delete (&type);
+	  ret = 1;
+	}
+    }
+  squangle_mop_up (work);
+  return ret;
+
+}
+#endif /* 0 */
+
+/* Takes operator name as e.g. "++" and returns mangled
+   operator name (e.g. "postincrement_expr"), or NULL if not found.
+
+   If OPTIONS & DMGL_ANSI == 1, return the ANSI name;
+   if OPTIONS & DMGL_ANSI == 0, return the old GNU name.  */
+
+/*
+const char *
+cplus_mangle_opname (opname, options)
+     const char *opname;
+     int options;
+{
+  size_t i;
+  int len;
+
+  len = strlen (opname);
+  for (i = 0; i < ARRAY_SIZE (optable); i++)
+    {
+      if ((int) strlen (optable[i].out) == len
+	  && (options & DMGL_ANSI) == (optable[i].flags & DMGL_ANSI)
+	  && memcmp (optable[i].out, opname, len) == 0)
+	return optable[i].in;
+    }
+  return (0);
+}
+*/
+
+/* Add a routine to set the demangling style to be sure it is valid and
+   allow for any demangler initialization that maybe necessary. */
+
+/*
+enum demangling_styles
+cplus_demangle_set_style (style)
+     enum demangling_styles style;
+{
+  const struct demangler_engine *demangler = libiberty_demanglers; 
+
+  for (; demangler->demangling_style != unknown_demangling; ++demangler)
+    if (style == demangler->demangling_style)
+      {
+	current_demangling_style = style;
+	return current_demangling_style;
+      }
+
+  return unknown_demangling;
+}
+*/
+
+/* Do string name to style translation */
+
+/*
+enum demangling_styles
+cplus_demangle_name_to_style (name)
+     const char *name;
+{
+  const struct demangler_engine *demangler = libiberty_demanglers; 
+
+  for (; demangler->demangling_style != unknown_demangling; ++demangler)
+    if (strcmp (name, demangler->demangling_style_name) == 0)
+      return demangler->demangling_style;
+
+  return unknown_demangling;
+}
+*/
+
+/* char *cplus_demangle (const char *mangled, int options)
+
+   If MANGLED is a mangled function name produced by GNU C++, then
+   a pointer to a @code{malloc}ed string giving a C++ representation
+   of the name will be returned; otherwise NULL will be returned.
+   It is the caller's responsibility to free the string which
+   is returned.
+
+   The OPTIONS arg may contain one or more of the following bits:
+
+   	DMGL_ANSI	ANSI qualifiers such as `const' and `void' are
+			included.
+	DMGL_PARAMS	Function parameters are included.
+
+   For example,
+
+   cplus_demangle ("foo__1Ai", DMGL_PARAMS)		=> "A::foo(int)"
+   cplus_demangle ("foo__1Ai", DMGL_PARAMS | DMGL_ANSI)	=> "A::foo(int)"
+   cplus_demangle ("foo__1Ai", 0)			=> "A::foo"
+
+   cplus_demangle ("foo__1Afe", DMGL_PARAMS)		=> "A::foo(float,...)"
+   cplus_demangle ("foo__1Afe", DMGL_PARAMS | DMGL_ANSI)=> "A::foo(float,...)"
+   cplus_demangle ("foo__1Afe", 0)			=> "A::foo"
+
+   Note that any leading underscores, or other such characters prepended by
+   the compilation system, are presumed to have already been stripped from
+   MANGLED.  */
+
+char *
+VG_(cplus_demangle) (mangled, options)
+     const char *mangled;
+     int options;
+{
+  char *ret;
+  struct work_stuff work[1];
+
+  if (current_demangling_style == no_demangling)
+    return xstrdup (mangled);
+
+  memset ((char *) work, 0, sizeof (work));
+  work->options = options;
+  if ((work->options & DMGL_STYLE_MASK) == 0)
+    work->options |= (int) current_demangling_style & DMGL_STYLE_MASK;
+
+  /* The V3 ABI demangling is implemented elsewhere.  */
+  if (GNU_V3_DEMANGLING || AUTO_DEMANGLING)
+    {
+      ret = VG_(cplus_demangle_v3) (mangled/*, work->options*/);
+      if (ret || GNU_V3_DEMANGLING)
+	return ret;
+    }
+
+  if (JAVA_DEMANGLING)
+    {
+      ret = VG_(java_demangle_v3) (mangled);
+      if (ret)
+        return ret;
+    }
+
+  if (GNAT_DEMANGLING)
+    return ada_demangle(mangled,options);
+
+  ret = internal_cplus_demangle (work, mangled);
+  squangle_mop_up (work);
+  return (ret);
+}
+
+
+/* Assuming *OLD_VECT points to an array of *SIZE objects of size
+   ELEMENT_SIZE, grow it to contain at least MIN_SIZE objects,
+   updating *OLD_VECT and *SIZE as necessary.  */
+
+static void
+grow_vect (old_vect, size, min_size, element_size)
+     void **old_vect;
+     size_t *size;
+     size_t min_size;
+     int element_size;
+{
+  if (*size < min_size)
+    {
+      *size *= 2;
+      if (*size < min_size)
+	*size = min_size;
+      *old_vect = xrealloc (*old_vect, *size * element_size);
+    }
+}
+
+/* Demangle ada names:
+   1. Discard final __{DIGIT}+ or ${DIGIT}+
+   2. Convert other instances of embedded "__" to `.'.
+   3. Discard leading _ada_.
+   4. Remove everything after first ___ if it is followed by 'X'.
+   5. Put symbols that should be suppressed in <...> brackets.
+   The resulting string is valid until the next call of ada_demangle.  */
+
+static char *
+ada_demangle (mangled, option)
+     const char *mangled;
+     int option ATTRIBUTE_UNUSED;
+{
+  int i, j;
+  int len0;
+  const char* p;
+  char *demangled = NULL;
+  int at_start_name;
+  int changed;
+  char *demangling_buffer = NULL;
+  size_t demangling_buffer_size = 0;
+  
+  changed = 0;
+
+  if (strncmp (mangled, "_ada_", 5) == 0)
+    {
+      mangled += 5;
+      changed = 1;
+    }
+  
+  if (mangled[0] == '_' || mangled[0] == '<')
+    goto Suppress;
+  
+  p = strstr (mangled, "___");
+  if (p == NULL)
+    len0 = strlen (mangled);
+  else
+    {
+      if (p[3] == 'X')
+	{
+	  len0 = p - mangled;
+	  changed = 1;
+	}
+      else
+	goto Suppress;
+    }
+  
+  /* Make demangled big enough for possible expansion by operator name.  */
+  grow_vect ((void **) &(demangling_buffer),
+	     &demangling_buffer_size,  2 * len0 + 1,
+	     sizeof (char));
+  demangled = demangling_buffer;
+  
+  if (ISDIGIT ((unsigned char) mangled[len0 - 1])) {
+    for (i = len0 - 2; i >= 0 && ISDIGIT ((unsigned char) mangled[i]); i -= 1)
+      ;
+    if (i > 1 && mangled[i] == '_' && mangled[i - 1] == '_')
+      {
+	len0 = i - 1;
+	changed = 1;
+      }
+    else if (mangled[i] == '$')
+      {
+	len0 = i;
+	changed = 1;
+      }
+  }
+  
+  for (i = 0, j = 0; i < len0 && ! ISALPHA ((unsigned char)mangled[i]);
+       i += 1, j += 1)
+    demangled[j] = mangled[i];
+  
+  at_start_name = 1;
+  while (i < len0)
+    {
+      at_start_name = 0;
+      
+      if (i < len0 - 2 && mangled[i] == '_' && mangled[i + 1] == '_')
+	{
+	  demangled[j] = '.';
+	  changed = at_start_name = 1;
+	  i += 2; j += 1;
+	}
+      else
+	{
+	  demangled[j] = mangled[i];
+	  i += 1;  j += 1;
+	}
+    }
+  demangled[j] = '\000';
+  
+  for (i = 0; demangled[i] != '\0'; i += 1)
+    if (ISUPPER ((unsigned char)demangled[i]) || demangled[i] == ' ')
+      goto Suppress;
+
+  if (! changed)
+    return NULL;
+  else
+    return demangled;
+  
+ Suppress:
+  grow_vect ((void **) &(demangling_buffer),
+	     &demangling_buffer_size,  strlen (mangled) + 3,
+	     sizeof (char));
+  demangled = demangling_buffer;
+  if (mangled[0] == '<')
+     strcpy (demangled, mangled);
+  else
+    sprintf (demangled, "<%s>", mangled);
+
+  return demangled;
+}
+
+/* This function performs most of what cplus_demangle use to do, but
+   to be able to demangle a name with a B, K or n code, we need to
+   have a longer term memory of what types have been seen. The original
+   now intializes and cleans up the squangle code info, while internal
+   calls go directly to this routine to avoid resetting that info. */
+
+static char *
+internal_cplus_demangle (work, mangled)
+     struct work_stuff *work;
+     const char *mangled;
+{
+
+  string decl;
+  int success = 0;
+  char *demangled = NULL;
+  int s1, s2, s3, s4;
+  s1 = work->constructor;
+  s2 = work->destructor;
+  s3 = work->static_type;
+  s4 = work->type_quals;
+  work->constructor = work->destructor = 0;
+  work->type_quals = TYPE_UNQUALIFIED;
+  work->dllimported = 0;
+
+  if ((mangled != NULL) && (*mangled != '\0'))
+    {
+      string_init (&decl);
+
+      /* First check to see if gnu style demangling is active and if the
+	 string to be demangled contains a CPLUS_MARKER.  If so, attempt to
+	 recognize one of the gnu special forms rather than looking for a
+	 standard prefix.  In particular, don't worry about whether there
+	 is a "__" string in the mangled string.  Consider "_$_5__foo" for
+	 example.  */
+
+      if ((AUTO_DEMANGLING || GNU_DEMANGLING))
+	{
+	  success = gnu_special (work, &mangled, &decl);
+	}
+      if (!success)
+	{
+	  success = demangle_prefix (work, &mangled, &decl);
+	}
+      if (success && (*mangled != '\0'))
+	{
+	  success = demangle_signature (work, &mangled, &decl);
+	}
+      if (work->constructor == 2)
+        {
+          string_prepend (&decl, "global constructors keyed to ");
+          work->constructor = 0;
+        }
+      else if (work->destructor == 2)
+        {
+          string_prepend (&decl, "global destructors keyed to ");
+          work->destructor = 0;
+        }
+      else if (work->dllimported == 1)
+        {
+          string_prepend (&decl, "import stub for ");
+          work->dllimported = 0;
+        }
+      demangled = mop_up (work, &decl, success);
+    }
+  work->constructor = s1;
+  work->destructor = s2;
+  work->static_type = s3;
+  work->type_quals = s4;
+  return demangled;
+}
+
+
+/* Clear out and squangling related storage */
+static void
+squangle_mop_up (work)
+     struct work_stuff *work;
+{
+  /* clean up the B and K type mangling types. */
+  forget_B_and_K_types (work);
+  if (work -> btypevec != NULL)
+    {
+      free ((char *) work -> btypevec);
+    }
+  if (work -> ktypevec != NULL)
+    {
+      free ((char *) work -> ktypevec);
+    }
+}
+
+
+/* Copy the work state and storage.  */
+
+static void
+work_stuff_copy_to_from (to, from)
+     struct work_stuff *to;
+     struct work_stuff *from;
+{
+  int i;
+
+  delete_work_stuff (to);
+
+  /* Shallow-copy scalars.  */
+  memcpy (to, from, sizeof (*to));
+
+  /* Deep-copy dynamic storage.  */
+  if (from->typevec_size)
+    to->typevec
+      = (char **) xmalloc (from->typevec_size * sizeof (to->typevec[0]));
+
+  for (i = 0; i < from->ntypes; i++)
+    {
+      int len = strlen (from->typevec[i]) + 1;
+
+      to->typevec[i] = xmalloc (len);
+      memcpy (to->typevec[i], from->typevec[i], len);
+    }
+
+  if (from->ksize)
+    to->ktypevec
+      = (char **) xmalloc (from->ksize * sizeof (to->ktypevec[0]));
+
+  for (i = 0; i < from->numk; i++)
+    {
+      int len = strlen (from->ktypevec[i]) + 1;
+
+      to->ktypevec[i] = xmalloc (len);
+      memcpy (to->ktypevec[i], from->ktypevec[i], len);
+    }
+
+  if (from->bsize)
+    to->btypevec
+      = (char **) xmalloc (from->bsize * sizeof (to->btypevec[0]));
+
+  for (i = 0; i < from->numb; i++)
+    {
+      int len = strlen (from->btypevec[i]) + 1;
+
+      to->btypevec[i] = xmalloc (len);
+      memcpy (to->btypevec[i], from->btypevec[i], len);
+    }
+
+  if (from->ntmpl_args)
+    to->tmpl_argvec
+      = xmalloc (from->ntmpl_args * sizeof (to->tmpl_argvec[0]));
+
+  for (i = 0; i < from->ntmpl_args; i++)
+    {
+      int len = strlen (from->tmpl_argvec[i]) + 1;
+
+      to->tmpl_argvec[i] = xmalloc (len);
+      memcpy (to->tmpl_argvec[i], from->tmpl_argvec[i], len);
+    }
+
+  if (from->previous_argument)
+    {
+      to->previous_argument = (string*) xmalloc (sizeof (string));
+      string_init (to->previous_argument);
+      string_appends (to->previous_argument, from->previous_argument);
+    }
+}
+
+
+/* Delete dynamic stuff in work_stuff that is not to be re-used.  */
+
+static void
+delete_non_B_K_work_stuff (work)
+     struct work_stuff *work;
+{
+  /* Discard the remembered types, if any.  */
+
+  forget_types (work);
+  if (work -> typevec != NULL)
+    {
+      free ((char *) work -> typevec);
+      work -> typevec = NULL;
+      work -> typevec_size = 0;
+    }
+  if (work->tmpl_argvec)
+    {
+      int i;
+
+      for (i = 0; i < work->ntmpl_args; i++)
+	if (work->tmpl_argvec[i])
+	  free ((char*) work->tmpl_argvec[i]);
+
+      free ((char*) work->tmpl_argvec);
+      work->tmpl_argvec = NULL;
+    }
+  if (work->previous_argument)
+    {
+      string_delete (work->previous_argument);
+      free ((char*) work->previous_argument);
+      work->previous_argument = NULL;
+    }
+}
+
+
+/* Delete all dynamic storage in work_stuff.  */
+static void
+delete_work_stuff (work)
+     struct work_stuff *work;
+{
+  delete_non_B_K_work_stuff (work);
+  squangle_mop_up (work);
+}
+
+
+/* Clear out any mangled storage */
+
+static char *
+mop_up (work, declp, success)
+     struct work_stuff *work;
+     string *declp;
+     int success;
+{
+  char *demangled = NULL;
+
+  delete_non_B_K_work_stuff (work);
+
+  /* If demangling was successful, ensure that the demangled string is null
+     terminated and return it.  Otherwise, free the demangling decl.  */
+
+  if (!success)
+    {
+      string_delete (declp);
+    }
+  else
+    {
+      string_appendn (declp, "", 1);
+      demangled = declp->b;
+    }
+  return (demangled);
+}
+
+/*
+
+LOCAL FUNCTION
+
+	demangle_signature -- demangle the signature part of a mangled name
+
+SYNOPSIS
+
+	static int
+	demangle_signature (struct work_stuff *work, const char **mangled,
+			    string *declp);
+
+DESCRIPTION
+
+	Consume and demangle the signature portion of the mangled name.
+
+	DECLP is the string where demangled output is being built.  At
+	entry it contains the demangled root name from the mangled name
+	prefix.  I.E. either a demangled operator name or the root function
+	name.  In some special cases, it may contain nothing.
+
+	*MANGLED points to the current unconsumed location in the mangled
+	name.  As tokens are consumed and demangling is performed, the
+	pointer is updated to continuously point at the next token to
+	be consumed.
+
+	Demangling GNU style mangled names is nasty because there is no
+	explicit token that marks the start of the outermost function
+	argument list.  */
+
+static int
+demangle_signature (work, mangled, declp)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+{
+  int success = 1;
+  int func_done = 0;
+  int expect_func = 0;
+  int expect_return_type = 0;
+  const char *oldmangled = NULL;
+  string trawname;
+  string tname;
+
+  while (success && (**mangled != '\0'))
+    {
+      switch (**mangled)
+	{
+	case 'Q':
+	  oldmangled = *mangled;
+	  success = demangle_qualified (work, mangled, declp, 1, 0);
+	  if (success)
+	    remember_type (work, oldmangled, *mangled - oldmangled);
+	  if (AUTO_DEMANGLING || GNU_DEMANGLING)
+	    expect_func = 1;
+	  oldmangled = NULL;
+	  break;
+
+        case 'K':
+	  oldmangled = *mangled;
+	  success = demangle_qualified (work, mangled, declp, 1, 0);
+	  if (AUTO_DEMANGLING || GNU_DEMANGLING)
+	    {
+	      expect_func = 1;
+	    }
+	  oldmangled = NULL;
+	  break;
+
+	case 'S':
+	  /* Static member function */
+	  if (oldmangled == NULL)
+	    {
+	      oldmangled = *mangled;
+	    }
+	  (*mangled)++;
+	  work -> static_type = 1;
+	  break;
+
+	case 'C':
+	case 'V':
+	case 'u':
+	  work->type_quals |= code_for_qualifier (**mangled);
+
+	  /* a qualified member function */
+	  if (oldmangled == NULL)
+	    oldmangled = *mangled;
+	  (*mangled)++;
+	  break;
+
+	case 'L':
+	  /* Local class name follows after "Lnnn_" */
+	  if (HP_DEMANGLING)
+	    {
+	      while (**mangled && (**mangled != '_'))
+		(*mangled)++;
+	      if (!**mangled)
+		success = 0;
+	      else
+		(*mangled)++;
+	    }
+	  else
+	    success = 0;
+	  break;
+
+	case '0': case '1': case '2': case '3': case '4':
+	case '5': case '6': case '7': case '8': case '9':
+	  if (oldmangled == NULL)
+	    {
+	      oldmangled = *mangled;
+	    }
+          work->temp_start = -1; /* uppermost call to demangle_class */
+	  success = demangle_class (work, mangled, declp);
+	  if (success)
+	    {
+	      remember_type (work, oldmangled, *mangled - oldmangled);
+	    }
+	  if (AUTO_DEMANGLING || GNU_DEMANGLING || EDG_DEMANGLING)
+	    {
+              /* EDG and others will have the "F", so we let the loop cycle
+                 if we are looking at one. */
+              if (**mangled != 'F')
+                 expect_func = 1;
+	    }
+	  oldmangled = NULL;
+	  break;
+
+	case 'B':
+	  {
+	    string s;
+	    success = do_type (work, mangled, &s);
+	    if (success)
+	      {
+		string_append (&s, SCOPE_STRING (work));
+		string_prepends (declp, &s);
+	      }
+	    oldmangled = NULL;
+	    expect_func = 1;
+	  }
+	  break;
+
+	case 'F':
+	  /* Function */
+	  /* ARM/HP style demangling includes a specific 'F' character after
+	     the class name.  For GNU style, it is just implied.  So we can
+	     safely just consume any 'F' at this point and be compatible
+	     with either style.  */
+
+	  oldmangled = NULL;
+	  func_done = 1;
+	  (*mangled)++;
+
+	  /* For lucid/ARM/HP style we have to forget any types we might
+	     have remembered up to this point, since they were not argument
+	     types.  GNU style considers all types seen as available for
+	     back references.  See comment in demangle_args() */
+
+	  if (LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING)
+	    {
+	      forget_types (work);
+	    }
+	  success = demangle_args (work, mangled, declp);
+	  /* After picking off the function args, we expect to either
+	     find the function return type (preceded by an '_') or the
+	     end of the string. */
+	  if (success && (AUTO_DEMANGLING || EDG_DEMANGLING) && **mangled == '_')
+	    {
+	      ++(*mangled);
+              /* At this level, we do not care about the return type. */
+              success = do_type (work, mangled, &tname);
+              string_delete (&tname);
+            }
+
+	  break;
+
+	case 't':
+	  /* G++ Template */
+	  string_init(&trawname);
+	  string_init(&tname);
+	  if (oldmangled == NULL)
+	    {
+	      oldmangled = *mangled;
+	    }
+	  success = demangle_template (work, mangled, &tname,
+				       &trawname, 1, 1);
+	  if (success)
+	    {
+	      remember_type (work, oldmangled, *mangled - oldmangled);
+	    }
+	  string_append (&tname, SCOPE_STRING (work));
+
+	  string_prepends(declp, &tname);
+	  if (work -> destructor & 1)
+	    {
+	      string_prepend (&trawname, "~");
+	      string_appends (declp, &trawname);
+	      work->destructor -= 1;
+	    }
+	  if ((work->constructor & 1) || (work->destructor & 1))
+	    {
+	      string_appends (declp, &trawname);
+	      work->constructor -= 1;
+	    }
+	  string_delete(&trawname);
+	  string_delete(&tname);
+	  oldmangled = NULL;
+	  expect_func = 1;
+	  break;
+
+	case '_':
+	  if ((AUTO_DEMANGLING || GNU_DEMANGLING) && expect_return_type)
+	    {
+	      /* Read the return type. */
+	      string return_type;
+	      string_init (&return_type);
+
+	      (*mangled)++;
+	      success = do_type (work, mangled, &return_type);
+	      APPEND_BLANK (&return_type);
+
+	      string_prepends (declp, &return_type);
+	      string_delete (&return_type);
+	      break;
+	    }
+	  else
+	    /* At the outermost level, we cannot have a return type specified,
+	       so if we run into another '_' at this point we are dealing with
+	       a mangled name that is either bogus, or has been mangled by
+	       some algorithm we don't know how to deal with.  So just
+	       reject the entire demangling.  */
+            /* However, "_nnn" is an expected suffix for alternate entry point
+               numbered nnn for a function, with HP aCC, so skip over that
+               without reporting failure. pai/1997-09-04 */
+            if (HP_DEMANGLING)
+              {
+                (*mangled)++;
+                while (**mangled && ISDIGIT ((unsigned char)**mangled))
+                  (*mangled)++;
+              }
+            else
+	      success = 0;
+	  break;
+
+	case 'H':
+	  if (AUTO_DEMANGLING || GNU_DEMANGLING)
+	    {
+	      /* A G++ template function.  Read the template arguments. */
+	      success = demangle_template (work, mangled, declp, 0, 0,
+					   0);
+	      if (!(work->constructor & 1))
+		expect_return_type = 1;
+	      (*mangled)++;
+	      break;
+	    }
+	  else
+	    /* fall through */
+	    {;}
+
+	default:
+	  if (AUTO_DEMANGLING || GNU_DEMANGLING)
+	    {
+	      /* Assume we have stumbled onto the first outermost function
+		 argument token, and start processing args.  */
+	      func_done = 1;
+	      success = demangle_args (work, mangled, declp);
+	    }
+	  else
+	    {
+	      /* Non-GNU demanglers use a specific token to mark the start
+		 of the outermost function argument tokens.  Typically 'F',
+		 for ARM/HP-demangling, for example.  So if we find something
+		 we are not prepared for, it must be an error.  */
+	      success = 0;
+	    }
+	  break;
+	}
+      /*
+	if (AUTO_DEMANGLING || GNU_DEMANGLING)
+	*/
+      {
+	if (success && expect_func)
+	  {
+	    func_done = 1;
+              if (LUCID_DEMANGLING || ARM_DEMANGLING || EDG_DEMANGLING)
+                {
+                  forget_types (work);
+                }
+	    success = demangle_args (work, mangled, declp);
+	    /* Since template include the mangling of their return types,
+	       we must set expect_func to 0 so that we don't try do
+	       demangle more arguments the next time we get here.  */
+	    expect_func = 0;
+	  }
+      }
+    }
+  if (success && !func_done)
+    {
+      if (AUTO_DEMANGLING || GNU_DEMANGLING)
+	{
+	  /* With GNU style demangling, bar__3foo is 'foo::bar(void)', and
+	     bar__3fooi is 'foo::bar(int)'.  We get here when we find the
+	     first case, and need to ensure that the '(void)' gets added to
+	     the current declp.  Note that with ARM/HP, the first case
+	     represents the name of a static data member 'foo::bar',
+	     which is in the current declp, so we leave it alone.  */
+	  success = demangle_args (work, mangled, declp);
+	}
+    }
+  if (success && PRINT_ARG_TYPES)
+    {
+      if (work->static_type)
+	string_append (declp, " static");
+      if (work->type_quals != TYPE_UNQUALIFIED)
+	{
+	  APPEND_BLANK (declp);
+	  string_append (declp, qualifier_string (work->type_quals));
+	}
+    }
+
+  return (success);
+}
+
+#if 0
+
+static int
+demangle_method_args (work, mangled, declp)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+{
+  int success = 0;
+
+  if (work -> static_type)
+    {
+      string_append (declp, *mangled + 1);
+      *mangled += strlen (*mangled);
+      success = 1;
+    }
+  else
+    {
+      success = demangle_args (work, mangled, declp);
+    }
+  return (success);
+}
+
+#endif
+
+static int
+demangle_template_template_parm (work, mangled, tname)
+     struct work_stuff *work;
+     const char **mangled;
+     string *tname;
+{
+  int i;
+  int r;
+  int need_comma = 0;
+  int success = 1;
+  string temp;
+
+  string_append (tname, "template <");
+  /* get size of template parameter list */
+  if (get_count (mangled, &r))
+    {
+      for (i = 0; i < r; i++)
+	{
+	  if (need_comma)
+	    {
+	      string_append (tname, ", ");
+	    }
+
+	    /* Z for type parameters */
+	    if (**mangled == 'Z')
+	      {
+		(*mangled)++;
+		string_append (tname, "class");
+	      }
+	      /* z for template parameters */
+	    else if (**mangled == 'z')
+	      {
+		(*mangled)++;
+		success =
+		  demangle_template_template_parm (work, mangled, tname);
+		if (!success)
+		  {
+		    break;
+		  }
+	      }
+	    else
+	      {
+		/* temp is initialized in do_type */
+		success = do_type (work, mangled, &temp);
+		if (success)
+		  {
+		    string_appends (tname, &temp);
+		  }
+		string_delete(&temp);
+		if (!success)
+		  {
+		    break;
+		  }
+	      }
+	  need_comma = 1;
+	}
+
+    }
+  if (tname->p[-1] == '>')
+    string_append (tname, " ");
+  string_append (tname, "> class");
+  return (success);
+}
+
+static int
+demangle_expression (work, mangled, s, tk)
+     struct work_stuff *work;
+     const char** mangled;
+     string* s;
+     type_kind_t tk;
+{
+  int need_operator = 0;
+  int success;
+
+  success = 1;
+  string_appendn (s, "(", 1);
+  (*mangled)++;
+  while (success && **mangled != 'W' && **mangled != '\0')
+    {
+      if (need_operator)
+	{
+	  size_t i;
+	  size_t len;
+
+	  success = 0;
+
+	  len = strlen (*mangled);
+
+	  for (i = 0; i < ARRAY_SIZE (optable); ++i)
+	    {
+	      size_t l = strlen (optable[i].in);
+
+	      if (l <= len
+		  && memcmp (optable[i].in, *mangled, l) == 0)
+		{
+		  string_appendn (s, " ", 1);
+		  string_append (s, optable[i].out);
+		  string_appendn (s, " ", 1);
+		  success = 1;
+		  (*mangled) += l;
+		  break;
+		}
+	    }
+
+	  if (!success)
+	    break;
+	}
+      else
+	need_operator = 1;
+
+      success = demangle_template_value_parm (work, mangled, s, tk);
+    }
+
+  if (**mangled != 'W')
+    success = 0;
+  else
+    {
+      string_appendn (s, ")", 1);
+      (*mangled)++;
+    }
+
+  return success;
+}
+
+static int
+demangle_integral_value (work, mangled, s)
+     struct work_stuff *work;
+     const char** mangled;
+     string* s;
+{
+  int success;
+
+  if (**mangled == 'E')
+    success = demangle_expression (work, mangled, s, tk_integral);
+  else if (**mangled == 'Q' || **mangled == 'K')
+    success = demangle_qualified (work, mangled, s, 0, 1);
+  else
+    {
+      int value;
+
+      /* By default, we let the number decide whether we shall consume an
+	 underscore.  */
+      int consume_following_underscore = 0;
+      int leave_following_underscore = 0;
+
+      success = 0;
+
+      /* Negative numbers are indicated with a leading `m'.  */
+      if (**mangled == 'm')
+	{
+	  string_appendn (s, "-", 1);
+	  (*mangled)++;
+	}
+      else if (mangled[0][0] == '_' && mangled[0][1] == 'm')
+	{
+	  /* Since consume_count_with_underscores does not handle the
+	     `m'-prefix we must do it here, using consume_count and
+	     adjusting underscores: we have to consume the underscore
+	     matching the prepended one.  */
+	  consume_following_underscore = 1;
+	  string_appendn (s, "-", 1);
+	  (*mangled) += 2;
+	}
+      else if (**mangled == '_')
+	{
+	  /* Do not consume a following underscore;
+	     consume_following_underscore will consume what should be
+	     consumed.  */
+	  leave_following_underscore = 1;
+	}
+
+      /* We must call consume_count if we expect to remove a trailing
+	 underscore, since consume_count_with_underscores expects
+	 the leading underscore (that we consumed) if it is to handle
+	 multi-digit numbers.  */
+      if (consume_following_underscore)
+	value = consume_count (mangled);
+      else
+	value = consume_count_with_underscores (mangled);
+
+      if (value != -1)
+	{
+	  char buf[INTBUF_SIZE];
+	  sprintf (buf, "%d", value);
+	  string_append (s, buf);
+
+	  /* Numbers not otherwise delimited, might have an underscore
+	     appended as a delimeter, which we should skip.
+
+	     ??? This used to always remove a following underscore, which
+	     is wrong.  If other (arbitrary) cases are followed by an
+	     underscore, we need to do something more radical.  */
+
+	  if ((value > 9 || consume_following_underscore)
+	      && ! leave_following_underscore
+	      && **mangled == '_')
+	    (*mangled)++;
+
+	  /* All is well.  */
+	  success = 1;
+	}
+    }
+
+  return success;
+}
+
+/* Demangle the real value in MANGLED.  */
+
+static int
+demangle_real_value (work, mangled, s)
+     struct work_stuff *work;
+     const char **mangled;
+     string* s;
+{
+  if (**mangled == 'E')
+    return demangle_expression (work, mangled, s, tk_real);
+
+  if (**mangled == 'm')
+    {
+      string_appendn (s, "-", 1);
+      (*mangled)++;
+    }
+  while (ISDIGIT ((unsigned char)**mangled))
+    {
+      string_appendn (s, *mangled, 1);
+      (*mangled)++;
+    }
+  if (**mangled == '.') /* fraction */
+    {
+      string_appendn (s, ".", 1);
+      (*mangled)++;
+      while (ISDIGIT ((unsigned char)**mangled))
+	{
+	  string_appendn (s, *mangled, 1);
+	  (*mangled)++;
+	}
+    }
+  if (**mangled == 'e') /* exponent */
+    {
+      string_appendn (s, "e", 1);
+      (*mangled)++;
+      while (ISDIGIT ((unsigned char)**mangled))
+	{
+	  string_appendn (s, *mangled, 1);
+	  (*mangled)++;
+	}
+    }
+
+  return 1;
+}
+
+static int
+demangle_template_value_parm (work, mangled, s, tk)
+     struct work_stuff *work;
+     const char **mangled;
+     string* s;
+     type_kind_t tk;
+{
+  int success = 1;
+
+  if (**mangled == 'Y')
+    {
+      /* The next argument is a template parameter. */
+      int idx;
+
+      (*mangled)++;
+      idx = consume_count_with_underscores (mangled);
+      if (idx == -1
+	  || (work->tmpl_argvec && idx >= work->ntmpl_args)
+	  || consume_count_with_underscores (mangled) == -1)
+	return -1;
+      if (work->tmpl_argvec)
+	string_append (s, work->tmpl_argvec[idx]);
+      else
+	string_append_template_idx (s, idx);
+    }
+  else if (tk == tk_integral)
+    success = demangle_integral_value (work, mangled, s);
+  else if (tk == tk_char)
+    {
+      char tmp[2];
+      int val;
+      if (**mangled == 'm')
+	{
+	  string_appendn (s, "-", 1);
+	  (*mangled)++;
+	}
+      string_appendn (s, "'", 1);
+      val = consume_count(mangled);
+      if (val <= 0)
+	success = 0;
+      else
+	{
+	  tmp[0] = (char)val;
+	  tmp[1] = '\0';
+	  string_appendn (s, &tmp[0], 1);
+	  string_appendn (s, "'", 1);
+	}
+    }
+  else if (tk == tk_bool)
+    {
+      int val = consume_count (mangled);
+      if (val == 0)
+	string_appendn (s, "false", 5);
+      else if (val == 1)
+	string_appendn (s, "true", 4);
+      else
+	success = 0;
+    }
+  else if (tk == tk_real)
+    success = demangle_real_value (work, mangled, s);
+  else if (tk == tk_pointer || tk == tk_reference)
+    {
+      if (**mangled == 'Q')
+	success = demangle_qualified (work, mangled, s,
+				      /*isfuncname=*/0, 
+				      /*append=*/1);
+      else
+	{
+	  int symbol_len  = consume_count (mangled);
+	  if (symbol_len == -1)
+	    return -1;
+	  if (symbol_len == 0)
+	    string_appendn (s, "0", 1);
+	  else
+	    {
+	      char *p = xmalloc (symbol_len + 1), *q;
+	      strncpy (p, *mangled, symbol_len);
+	      p [symbol_len] = '\0';
+	      /* We use cplus_demangle here, rather than
+		 internal_cplus_demangle, because the name of the entity
+		 mangled here does not make use of any of the squangling
+		 or type-code information we have built up thus far; it is
+		 mangled independently.  */
+	      q = VG_(cplus_demangle) (p, work->options);
+	      if (tk == tk_pointer)
+		string_appendn (s, "&", 1);
+	      /* FIXME: Pointer-to-member constants should get a
+		 qualifying class name here.  */
+	      if (q)
+		{
+		  string_append (s, q);
+		  free (q);
+		}
+	      else
+		string_append (s, p);
+	      free (p);
+	    }
+	  *mangled += symbol_len;
+	}
+    }
+
+  return success;
+}
+
+/* Demangle the template name in MANGLED.  The full name of the
+   template (e.g., S<int>) is placed in TNAME.  The name without the
+   template parameters (e.g. S) is placed in TRAWNAME if TRAWNAME is
+   non-NULL.  If IS_TYPE is nonzero, this template is a type template,
+   not a function template.  If both IS_TYPE and REMEMBER are nonzero,
+   the template is remembered in the list of back-referenceable
+   types.  */
+
+static int
+demangle_template (work, mangled, tname, trawname, is_type, remember)
+     struct work_stuff *work;
+     const char **mangled;
+     string *tname;
+     string *trawname;
+     int is_type;
+     int remember;
+{
+  int i;
+  int r;
+  int need_comma = 0;
+  int success = 0;
+  const char *start;
+  int is_java_array = 0;
+  string temp;
+  int bindex = 0;
+
+  (*mangled)++;
+  if (is_type)
+    {
+      if (remember)
+	bindex = register_Btype (work);
+      start = *mangled;
+      /* get template name */
+      if (**mangled == 'z')
+	{
+	  int idx;
+	  (*mangled)++;
+	  (*mangled)++;
+
+	  idx = consume_count_with_underscores (mangled);
+	  if (idx == -1
+	      || (work->tmpl_argvec && idx >= work->ntmpl_args)
+	      || consume_count_with_underscores (mangled) == -1)
+	    return (0);
+
+	  if (work->tmpl_argvec)
+	    {
+	      string_append (tname, work->tmpl_argvec[idx]);
+	      if (trawname)
+		string_append (trawname, work->tmpl_argvec[idx]);
+	    }
+	  else
+	    {
+	      string_append_template_idx (tname, idx);
+	      if (trawname)
+		string_append_template_idx (trawname, idx);
+	    }
+	}
+      else
+	{
+	  if ((r = consume_count (mangled)) <= 0
+	      || (int) strlen (*mangled) < r)
+	    {
+	      return (0);
+	    }
+	  is_java_array = (work -> options & DMGL_JAVA)
+	    && strncmp (*mangled, "JArray1Z", 8) == 0;
+	  if (! is_java_array)
+	    {
+	      string_appendn (tname, *mangled, r);
+	    }
+	  if (trawname)
+	    string_appendn (trawname, *mangled, r);
+	  *mangled += r;
+	}
+    }
+  if (!is_java_array)
+    string_append (tname, "<");
+  /* get size of template parameter list */
+  if (!get_count (mangled, &r))
+    {
+      return (0);
+    }
+  if (!is_type)
+    {
+      /* Create an array for saving the template argument values. */
+      work->tmpl_argvec = (char**) xmalloc (r * sizeof (char *));
+      work->ntmpl_args = r;
+      for (i = 0; i < r; i++)
+	work->tmpl_argvec[i] = 0;
+    }
+  for (i = 0; i < r; i++)
+    {
+      if (need_comma)
+	{
+	  string_append (tname, ", ");
+	}
+      /* Z for type parameters */
+      if (**mangled == 'Z')
+	{
+	  (*mangled)++;
+	  /* temp is initialized in do_type */
+	  success = do_type (work, mangled, &temp);
+	  if (success)
+	    {
+	      string_appends (tname, &temp);
+
+	      if (!is_type)
+		{
+		  /* Save the template argument. */
+		  int len = temp.p - temp.b;
+		  work->tmpl_argvec[i] = xmalloc (len + 1);
+		  memcpy (work->tmpl_argvec[i], temp.b, len);
+		  work->tmpl_argvec[i][len] = '\0';
+		}
+	    }
+	  string_delete(&temp);
+	  if (!success)
+	    {
+	      break;
+	    }
+	}
+      /* z for template parameters */
+      else if (**mangled == 'z')
+	{
+	  int r2;
+	  (*mangled)++;
+	  success = demangle_template_template_parm (work, mangled, tname);
+
+	  if (success
+	      && (r2 = consume_count (mangled)) > 0
+	      && (int) strlen (*mangled) >= r2)
+	    {
+	      string_append (tname, " ");
+	      string_appendn (tname, *mangled, r2);
+	      if (!is_type)
+		{
+		  /* Save the template argument. */
+		  int len = r2;
+		  work->tmpl_argvec[i] = xmalloc (len + 1);
+		  memcpy (work->tmpl_argvec[i], *mangled, len);
+		  work->tmpl_argvec[i][len] = '\0';
+		}
+	      *mangled += r2;
+	    }
+	  if (!success)
+	    {
+	      break;
+	    }
+	}
+      else
+	{
+	  string  param;
+	  string* s;
+
+	  /* otherwise, value parameter */
+
+	  /* temp is initialized in do_type */
+	  success = do_type (work, mangled, &temp);
+	  string_delete(&temp);
+	  if (!success)
+	    break;
+
+	  if (!is_type)
+	    {
+	      s = &param;
+	      string_init (s);
+	    }
+	  else
+	    s = tname;
+
+	  success = demangle_template_value_parm (work, mangled, s,
+						  (type_kind_t) success);
+
+	  if (!success)
+	    {
+	      if (!is_type)
+		string_delete (s);
+	      success = 0;
+	      break;
+	    }
+
+	  if (!is_type)
+	    {
+	      int len = s->p - s->b;
+	      work->tmpl_argvec[i] = xmalloc (len + 1);
+	      memcpy (work->tmpl_argvec[i], s->b, len);
+	      work->tmpl_argvec[i][len] = '\0';
+
+	      string_appends (tname, s);
+	      string_delete (s);
+	    }
+	}
+      need_comma = 1;
+    }
+  if (is_java_array)
+    {
+      string_append (tname, "[]");
+    }
+  else
+    {
+      if (tname->p[-1] == '>')
+	string_append (tname, " ");
+      string_append (tname, ">");
+    }
+
+  if (is_type && remember)
+    remember_Btype (work, tname->b, LEN_STRING (tname), bindex);
+
+  /*
+    if (work -> static_type)
+    {
+    string_append (declp, *mangled + 1);
+    *mangled += strlen (*mangled);
+    success = 1;
+    }
+    else
+    {
+    success = demangle_args (work, mangled, declp);
+    }
+    }
+    */
+  return (success);
+}
+
+static int
+arm_pt (work, mangled, n, anchor, args)
+     struct work_stuff *work;
+     const char *mangled;
+     int n;
+     const char **anchor, **args;
+{
+  /* Check if ARM template with "__pt__" in it ("parameterized type") */
+  /* Allow HP also here, because HP's cfront compiler follows ARM to some extent */
+  if ((ARM_DEMANGLING || HP_DEMANGLING) && (*anchor = strstr (mangled, "__pt__")))
+    {
+      int len;
+      *args = *anchor + 6;
+      len = consume_count (args);
+      if (len == -1)
+	return 0;
+      if (*args + len == mangled + n && **args == '_')
+	{
+	  ++*args;
+	  return 1;
+	}
+    }
+  if (AUTO_DEMANGLING || EDG_DEMANGLING)
+    {
+      if ((*anchor = strstr (mangled, "__tm__"))
+          || (*anchor = strstr (mangled, "__ps__"))
+          || (*anchor = strstr (mangled, "__pt__")))
+        {
+          int len;
+          *args = *anchor + 6;
+          len = consume_count (args);
+	  if (len == -1)
+	    return 0;
+          if (*args + len == mangled + n && **args == '_')
+            {
+              ++*args;
+              return 1;
+            }
+        }
+      else if ((*anchor = strstr (mangled, "__S")))
+        {
+ 	  int len;
+ 	  *args = *anchor + 3;
+ 	  len = consume_count (args);
+	  if (len == -1)
+	    return 0;
+ 	  if (*args + len == mangled + n && **args == '_')
+            {
+              ++*args;
+ 	      return 1;
+            }
+        }
+    }
+
+  return 0;
+}
+
+static void
+demangle_arm_hp_template (work, mangled, n, declp)
+     struct work_stuff *work;
+     const char **mangled;
+     int n;
+     string *declp;
+{
+  const char *p;
+  const char *args;
+  const char *e = *mangled + n;
+  string arg;
+
+  /* Check for HP aCC template spec: classXt1t2 where t1, t2 are
+     template args */
+  if (HP_DEMANGLING && ((*mangled)[n] == 'X'))
+    {
+      char *start_spec_args = NULL;
+
+      /* First check for and omit template specialization pseudo-arguments,
+         such as in "Spec<#1,#1.*>" */
+      start_spec_args = strchr (*mangled, '<');
+      if (start_spec_args && (start_spec_args - *mangled < n))
+        string_appendn (declp, *mangled, start_spec_args - *mangled);
+      else
+        string_appendn (declp, *mangled, n);
+      (*mangled) += n + 1;
+      string_init (&arg);
+      if (work->temp_start == -1) /* non-recursive call */
+        work->temp_start = declp->p - declp->b;
+      string_append (declp, "<");
+      while (1)
+        {
+          string_clear (&arg);
+          switch (**mangled)
+            {
+              case 'T':
+                /* 'T' signals a type parameter */
+                (*mangled)++;
+                if (!do_type (work, mangled, &arg))
+                  goto hpacc_template_args_done;
+                break;
+
+              case 'U':
+              case 'S':
+                /* 'U' or 'S' signals an integral value */
+                if (!do_hpacc_template_const_value (work, mangled, &arg))
+                  goto hpacc_template_args_done;
+                break;
+
+              case 'A':
+                /* 'A' signals a named constant expression (literal) */
+                if (!do_hpacc_template_literal (work, mangled, &arg))
+                  goto hpacc_template_args_done;
+                break;
+
+              default:
+                /* Today, 1997-09-03, we have only the above types
+                   of template parameters */
+                /* FIXME: maybe this should fail and return null */
+                goto hpacc_template_args_done;
+            }
+          string_appends (declp, &arg);
+         /* Check if we're at the end of template args.
+             0 if at end of static member of template class,
+             _ if done with template args for a function */
+          if ((**mangled == '\000') || (**mangled == '_'))
+            break;
+          else
+            string_append (declp, ",");
+        }
+    hpacc_template_args_done:
+      string_append (declp, ">");
+      string_delete (&arg);
+      if (**mangled == '_')
+        (*mangled)++;
+      return;
+    }
+  /* ARM template? (Also handles HP cfront extensions) */
+  else if (arm_pt (work, *mangled, n, &p, &args))
+    {
+      string type_str;
+
+      string_init (&arg);
+      string_appendn (declp, *mangled, p - *mangled);
+      if (work->temp_start == -1)  /* non-recursive call */
+	work->temp_start = declp->p - declp->b;
+      string_append (declp, "<");
+      /* should do error checking here */
+      while (args < e) {
+	string_clear (&arg);
+
+	/* Check for type or literal here */
+	switch (*args)
+	  {
+	    /* HP cfront extensions to ARM for template args */
+	    /* spec: Xt1Lv1 where t1 is a type, v1 is a literal value */
+	    /* FIXME: We handle only numeric literals for HP cfront */
+          case 'X':
+            /* A typed constant value follows */
+            args++;
+            if (!do_type (work, &args, &type_str))
+	      goto cfront_template_args_done;
+            string_append (&arg, "(");
+            string_appends (&arg, &type_str);
+            string_append (&arg, ")");
+            if (*args != 'L')
+              goto cfront_template_args_done;
+            args++;
+            /* Now snarf a literal value following 'L' */
+            if (!snarf_numeric_literal (&args, &arg))
+	      goto cfront_template_args_done;
+            break;
+
+          case 'L':
+            /* Snarf a literal following 'L' */
+            args++;
+            if (!snarf_numeric_literal (&args, &arg))
+	      goto cfront_template_args_done;
+            break;
+          default:
+            /* Not handling other HP cfront stuff */
+            if (!do_type (work, &args, &arg))
+              goto cfront_template_args_done;
+	  }
+	string_appends (declp, &arg);
+	string_append (declp, ",");
+      }
+    cfront_template_args_done:
+      string_delete (&arg);
+      if (args >= e)
+	--declp->p; /* remove extra comma */
+      string_append (declp, ">");
+    }
+  else if (n>10 && strncmp (*mangled, "_GLOBAL_", 8) == 0
+	   && (*mangled)[9] == 'N'
+	   && (*mangled)[8] == (*mangled)[10]
+	   && strchr (cplus_markers, (*mangled)[8]))
+    {
+      /* A member of the anonymous namespace.  */
+      string_append (declp, "{anonymous}");
+    }
+  else
+    {
+      if (work->temp_start == -1) /* non-recursive call only */
+	work->temp_start = 0;     /* disable in recursive calls */
+      string_appendn (declp, *mangled, n);
+    }
+  *mangled += n;
+}
+
+/* Extract a class name, possibly a template with arguments, from the
+   mangled string; qualifiers, local class indicators, etc. have
+   already been dealt with */
+
+static int
+demangle_class_name (work, mangled, declp)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+{
+  int n;
+  int success = 0;
+
+  n = consume_count (mangled);
+  if (n == -1)
+    return 0;
+  if ((int) strlen (*mangled) >= n)
+    {
+      demangle_arm_hp_template (work, mangled, n, declp);
+      success = 1;
+    }
+
+  return (success);
+}
+
+/*
+
+LOCAL FUNCTION
+
+	demangle_class -- demangle a mangled class sequence
+
+SYNOPSIS
+
+	static int
+	demangle_class (struct work_stuff *work, const char **mangled,
+			strint *declp)
+
+DESCRIPTION
+
+	DECLP points to the buffer into which demangling is being done.
+
+	*MANGLED points to the current token to be demangled.  On input,
+	it points to a mangled class (I.E. "3foo", "13verylongclass", etc.)
+	On exit, it points to the next token after the mangled class on
+	success, or the first unconsumed token on failure.
+
+	If the CONSTRUCTOR or DESTRUCTOR flags are set in WORK, then
+	we are demangling a constructor or destructor.  In this case
+	we prepend "class::class" or "class::~class" to DECLP.
+
+	Otherwise, we prepend "class::" to the current DECLP.
+
+	Reset the constructor/destructor flags once they have been
+	"consumed".  This allows demangle_class to be called later during
+	the same demangling, to do normal class demangling.
+
+	Returns 1 if demangling is successful, 0 otherwise.
+
+*/
+
+static int
+demangle_class (work, mangled, declp)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+{
+  int success = 0;
+  int btype;
+  string class_name;
+  char *save_class_name_end = 0;
+
+  string_init (&class_name);
+  btype = register_Btype (work);
+  if (demangle_class_name (work, mangled, &class_name))
+    {
+      save_class_name_end = class_name.p;
+      if ((work->constructor & 1) || (work->destructor & 1))
+	{
+          /* adjust so we don't include template args */
+          if (work->temp_start && (work->temp_start != -1))
+            {
+              class_name.p = class_name.b + work->temp_start;
+            }
+	  string_prepends (declp, &class_name);
+	  if (work -> destructor & 1)
+	    {
+	      string_prepend (declp, "~");
+              work -> destructor -= 1;
+	    }
+	  else
+	    {
+	      work -> constructor -= 1;
+	    }
+	}
+      class_name.p = save_class_name_end;
+      remember_Ktype (work, class_name.b, LEN_STRING(&class_name));
+      remember_Btype (work, class_name.b, LEN_STRING(&class_name), btype);
+      string_prepend (declp, SCOPE_STRING (work));
+      string_prepends (declp, &class_name);
+      success = 1;
+    }
+  string_delete (&class_name);
+  return (success);
+}
+
+
+/* Called when there's a "__" in the mangled name, with `scan' pointing to
+   the rightmost guess.
+
+   Find the correct "__"-sequence where the function name ends and the
+   signature starts, which is ambiguous with GNU mangling.
+   Call demangle_signature here, so we can make sure we found the right
+   one; *mangled will be consumed so caller will not make further calls to
+   demangle_signature.  */
+
+static int
+iterate_demangle_function (work, mangled, declp, scan)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+     const char *scan;
+{
+  const char *mangle_init = *mangled;
+  int success = 0;
+  string decl_init;
+  struct work_stuff work_init;
+
+  if (*(scan + 2) == '\0')
+    return 0;
+
+  /* Do not iterate for some demangling modes, or if there's only one
+     "__"-sequence.  This is the normal case.  */
+  if (ARM_DEMANGLING || LUCID_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING
+      || strstr (scan + 2, "__") == NULL)
+    {
+      demangle_function_name (work, mangled, declp, scan);
+      return 1;
+    }
+
+  /* Save state so we can restart if the guess at the correct "__" was
+     wrong.  */
+  string_init (&decl_init);
+  string_appends (&decl_init, declp);
+  memset (&work_init, 0, sizeof work_init);
+  work_stuff_copy_to_from (&work_init, work);
+
+  /* Iterate over occurrences of __, allowing names and types to have a
+     "__" sequence in them.  We must start with the first (not the last)
+     occurrence, since "__" most often occur between independent mangled
+     parts, hence starting at the last occurence inside a signature
+     might get us a "successful" demangling of the signature.  */
+
+  while (scan[2])
+    {
+      demangle_function_name (work, mangled, declp, scan);
+      success = demangle_signature (work, mangled, declp);
+      if (success)
+	break;
+
+      /* Reset demangle state for the next round.  */
+      *mangled = mangle_init;
+      string_clear (declp);
+      string_appends (declp, &decl_init);
+      work_stuff_copy_to_from (work, &work_init);
+
+      /* Leave this underscore-sequence.  */
+      scan += 2;
+
+      /* Scan for the next "__" sequence.  */
+      while (*scan && (scan[0] != '_' || scan[1] != '_'))
+	scan++;
+
+      /* Move to last "__" in this sequence.  */
+      while (*scan && *scan == '_')
+	scan++;
+      scan -= 2;
+    }
+
+  /* Delete saved state.  */
+  delete_work_stuff (&work_init);
+  string_delete (&decl_init);
+
+  return success;
+}
+
+/*
+
+LOCAL FUNCTION
+
+	demangle_prefix -- consume the mangled name prefix and find signature
+
+SYNOPSIS
+
+	static int
+	demangle_prefix (struct work_stuff *work, const char **mangled,
+			 string *declp);
+
+DESCRIPTION
+
+	Consume and demangle the prefix of the mangled name.
+	While processing the function name root, arrange to call
+	demangle_signature if the root is ambiguous.
+
+	DECLP points to the string buffer into which demangled output is
+	placed.  On entry, the buffer is empty.  On exit it contains
+	the root function name, the demangled operator name, or in some
+	special cases either nothing or the completely demangled result.
+
+	MANGLED points to the current pointer into the mangled name.  As each
+	token of the mangled name is consumed, it is updated.  Upon entry
+	the current mangled name pointer points to the first character of
+	the mangled name.  Upon exit, it should point to the first character
+	of the signature if demangling was successful, or to the first
+	unconsumed character if demangling of the prefix was unsuccessful.
+
+	Returns 1 on success, 0 otherwise.
+ */
+
+static int
+demangle_prefix (work, mangled, declp)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+{
+  int success = 1;
+  const char *scan;
+  int i;
+
+  if (strlen(*mangled) > 6
+      && (strncmp(*mangled, "_imp__", 6) == 0
+          || strncmp(*mangled, "__imp_", 6) == 0))
+    {
+      /* it's a symbol imported from a PE dynamic library. Check for both
+         new style prefix _imp__ and legacy __imp_ used by older versions
+	 of dlltool. */
+      (*mangled) += 6;
+      work->dllimported = 1;
+    }
+  else if (strlen(*mangled) >= 11 && strncmp(*mangled, "_GLOBAL_", 8) == 0)
+    {
+      char *marker = strchr (cplus_markers, (*mangled)[8]);
+      if (marker != NULL && *marker == (*mangled)[10])
+	{
+	  if ((*mangled)[9] == 'D')
+	    {
+	      /* it's a GNU global destructor to be executed at program exit */
+	      (*mangled) += 11;
+	      work->destructor = 2;
+	      if (gnu_special (work, mangled, declp))
+		return success;
+	    }
+	  else if ((*mangled)[9] == 'I')
+	    {
+	      /* it's a GNU global constructor to be executed at program init */
+	      (*mangled) += 11;
+	      work->constructor = 2;
+	      if (gnu_special (work, mangled, declp))
+		return success;
+	    }
+	}
+    }
+  else if ((ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING) && strncmp(*mangled, "__std__", 7) == 0)
+    {
+      /* it's a ARM global destructor to be executed at program exit */
+      (*mangled) += 7;
+      work->destructor = 2;
+    }
+  else if ((ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING) && strncmp(*mangled, "__sti__", 7) == 0)
+    {
+      /* it's a ARM global constructor to be executed at program initial */
+      (*mangled) += 7;
+      work->constructor = 2;
+    }
+
+  /*  This block of code is a reduction in strength time optimization
+      of:
+      scan = strstr (*mangled, "__"); */
+
+  {
+    scan = *mangled;
+
+    do {
+      scan = strchr (scan, '_');
+    } while (scan != NULL && *++scan != '_');
+
+    if (scan != NULL) --scan;
+  }
+
+  if (scan != NULL)
+    {
+      /* We found a sequence of two or more '_', ensure that we start at
+	 the last pair in the sequence.  */
+      /* i = strspn (scan, "_"); */
+      i = 0;
+      while (scan[i] == '_') i++;
+      if (i > 2)
+	{
+	  scan += (i - 2);
+	}
+    }
+
+  if (scan == NULL)
+    {
+      success = 0;
+    }
+  else if (work -> static_type)
+    {
+      if (!ISDIGIT ((unsigned char)scan[0]) && (scan[0] != 't'))
+	{
+	  success = 0;
+	}
+    }
+  else if ((scan == *mangled)
+	   && (ISDIGIT ((unsigned char)scan[2]) || (scan[2] == 'Q')
+	       || (scan[2] == 't') || (scan[2] == 'K') || (scan[2] == 'H')))
+    {
+      /* The ARM says nothing about the mangling of local variables.
+	 But cfront mangles local variables by prepending __<nesting_level>
+	 to them. As an extension to ARM demangling we handle this case.  */
+      if ((LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING)
+	  && ISDIGIT ((unsigned char)scan[2]))
+	{
+	  *mangled = scan + 2;
+	  consume_count (mangled);
+	  string_append (declp, *mangled);
+	  *mangled += strlen (*mangled);
+	  success = 1;
+	}
+      else
+	{
+	  /* A GNU style constructor starts with __[0-9Qt].  But cfront uses
+	     names like __Q2_3foo3bar for nested type names.  So don't accept
+	     this style of constructor for cfront demangling.  A GNU
+	     style member-template constructor starts with 'H'. */
+	  if (!(LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING))
+	    work -> constructor += 1;
+	  *mangled = scan + 2;
+	}
+    }
+  else if (ARM_DEMANGLING && scan[2] == 'p' && scan[3] == 't')
+    {
+      /* Cfront-style parameterized type.  Handled later as a signature. */
+      success = 1;
+
+      /* ARM template? */
+      demangle_arm_hp_template (work, mangled, strlen (*mangled), declp);
+    }
+  else if (EDG_DEMANGLING && ((scan[2] == 't' && scan[3] == 'm')
+                              || (scan[2] == 'p' && scan[3] == 's')
+                              || (scan[2] == 'p' && scan[3] == 't')))
+    {
+      /* EDG-style parameterized type.  Handled later as a signature. */
+      success = 1;
+
+      /* EDG template? */
+      demangle_arm_hp_template (work, mangled, strlen (*mangled), declp);
+    }
+  else if ((scan == *mangled) && !ISDIGIT ((unsigned char)scan[2])
+	   && (scan[2] != 't'))
+    {
+      /* Mangled name starts with "__".  Skip over any leading '_' characters,
+	 then find the next "__" that separates the prefix from the signature.
+	 */
+      if (!(ARM_DEMANGLING || LUCID_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING)
+	  || (arm_special (mangled, declp) == 0))
+	{
+	  while (*scan == '_')
+	    {
+	      scan++;
+	    }
+	  if ((scan = strstr (scan, "__")) == NULL || (*(scan + 2) == '\0'))
+	    {
+	      /* No separator (I.E. "__not_mangled"), or empty signature
+		 (I.E. "__not_mangled_either__") */
+	      success = 0;
+	    }
+	  else
+	    return iterate_demangle_function (work, mangled, declp, scan);
+	}
+    }
+  else if (*(scan + 2) != '\0')
+    {
+      /* Mangled name does not start with "__" but does have one somewhere
+	 in there with non empty stuff after it.  Looks like a global
+	 function name.  Iterate over all "__":s until the right
+	 one is found.  */
+      return iterate_demangle_function (work, mangled, declp, scan);
+    }
+  else
+    {
+      /* Doesn't look like a mangled name */
+      success = 0;
+    }
+
+  if (!success && (work->constructor == 2 || work->destructor == 2))
+    {
+      string_append (declp, *mangled);
+      *mangled += strlen (*mangled);
+      success = 1;
+    }
+  return (success);
+}
+
+/*
+
+LOCAL FUNCTION
+
+	gnu_special -- special handling of gnu mangled strings
+
+SYNOPSIS
+
+	static int
+	gnu_special (struct work_stuff *work, const char **mangled,
+		     string *declp);
+
+
+DESCRIPTION
+
+	Process some special GNU style mangling forms that don't fit
+	the normal pattern.  For example:
+
+		_$_3foo		(destructor for class foo)
+		_vt$foo		(foo virtual table)
+		_vt$foo$bar	(foo::bar virtual table)
+		__vt_foo	(foo virtual table, new style with thunks)
+		_3foo$varname	(static data member)
+		_Q22rs2tu$vw	(static data member)
+		__t6vector1Zii	(constructor with template)
+		__thunk_4__$_7ostream (virtual function thunk)
+ */
+
+static int
+gnu_special (work, mangled, declp)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+{
+  int n;
+  int success = 1;
+  const char *p;
+
+  if ((*mangled)[0] == '_'
+      && strchr (cplus_markers, (*mangled)[1]) != NULL
+      && (*mangled)[2] == '_')
+    {
+      /* Found a GNU style destructor, get past "_<CPLUS_MARKER>_" */
+      (*mangled) += 3;
+      work -> destructor += 1;
+    }
+  else if ((*mangled)[0] == '_'
+	   && (((*mangled)[1] == '_'
+		&& (*mangled)[2] == 'v'
+		&& (*mangled)[3] == 't'
+		&& (*mangled)[4] == '_')
+	       || ((*mangled)[1] == 'v'
+		   && (*mangled)[2] == 't'
+		   && strchr (cplus_markers, (*mangled)[3]) != NULL)))
+    {
+      /* Found a GNU style virtual table, get past "_vt<CPLUS_MARKER>"
+         and create the decl.  Note that we consume the entire mangled
+	 input string, which means that demangle_signature has no work
+	 to do.  */
+      if ((*mangled)[2] == 'v')
+	(*mangled) += 5; /* New style, with thunks: "__vt_" */
+      else
+	(*mangled) += 4; /* Old style, no thunks: "_vt<CPLUS_MARKER>" */
+      while (**mangled != '\0')
+	{
+	  switch (**mangled)
+	    {
+	    case 'Q':
+	    case 'K':
+	      success = demangle_qualified (work, mangled, declp, 0, 1);
+	      break;
+	    case 't':
+	      success = demangle_template (work, mangled, declp, 0, 1,
+					   1);
+	      break;
+	    default:
+	      if (ISDIGIT((unsigned char)*mangled[0]))
+		{
+		  n = consume_count(mangled);
+		  /* We may be seeing a too-large size, or else a
+		     ".<digits>" indicating a static local symbol.  In
+		     any case, declare victory and move on; *don't* try
+		     to use n to allocate.  */
+		  if (n > (int) strlen (*mangled))
+		    {
+		      success = 1;
+		      break;
+		    }
+		}
+	      else
+		{
+		  /*n = strcspn (*mangled, cplus_markers);*/
+		  const char *check = *mangled;
+		  n = 0;
+		  while (*check)
+		    if (strchr (cplus_markers, *check++) == NULL)
+		      n++;
+		    else
+		      break;
+		}
+	      string_appendn (declp, *mangled, n);
+	      (*mangled) += n;
+	    }
+
+	  p = strpbrk (*mangled, cplus_markers);
+	  if (success && ((p == NULL) || (p == *mangled)))
+	    {
+	      if (p != NULL)
+		{
+		  string_append (declp, SCOPE_STRING (work));
+		  (*mangled)++;
+		}
+	    }
+	  else
+	    {
+	      success = 0;
+	      break;
+	    }
+	}
+      if (success)
+	string_append (declp, " virtual table");
+    }
+  else if ((*mangled)[0] == '_'
+	   && (strchr("0123456789Qt", (*mangled)[1]) != NULL)
+	   && (p = strpbrk (*mangled, cplus_markers)) != NULL)
+    {
+      /* static data member, "_3foo$varname" for example */
+      (*mangled)++;
+      switch (**mangled)
+	{
+	case 'Q':
+	case 'K':
+	  success = demangle_qualified (work, mangled, declp, 0, 1);
+	  break;
+	case 't':
+	  success = demangle_template (work, mangled, declp, 0, 1, 1);
+	  break;
+	default:
+	  n = consume_count (mangled);
+	  if (n < 0 || n > (long) strlen (*mangled))
+	    {
+	      success = 0;
+	      break;
+	    }
+
+	  if (n > 10 && strncmp (*mangled, "_GLOBAL_", 8) == 0
+	      && (*mangled)[9] == 'N'
+	      && (*mangled)[8] == (*mangled)[10]
+	      && strchr (cplus_markers, (*mangled)[8]))
+	    {
+	      /* A member of the anonymous namespace.  There's information
+		 about what identifier or filename it was keyed to, but
+		 it's just there to make the mangled name unique; we just
+		 step over it.  */
+	      string_append (declp, "{anonymous}");
+	      (*mangled) += n;
+
+	      /* Now p points to the marker before the N, so we need to
+		 update it to the first marker after what we consumed.  */
+	      p = strpbrk (*mangled, cplus_markers);
+	      break;
+	    }
+
+	  string_appendn (declp, *mangled, n);
+	  (*mangled) += n;
+	}
+      if (success && (p == *mangled))
+	{
+	  /* Consumed everything up to the cplus_marker, append the
+	     variable name.  */
+	  (*mangled)++;
+	  string_append (declp, SCOPE_STRING (work));
+	  n = strlen (*mangled);
+	  string_appendn (declp, *mangled, n);
+	  (*mangled) += n;
+	}
+      else
+	{
+	  success = 0;
+	}
+    }
+  else if (strncmp (*mangled, "__thunk_", 8) == 0)
+    {
+      int delta;
+
+      (*mangled) += 8;
+      delta = consume_count (mangled);
+      if (delta == -1)
+	success = 0;
+      else
+	{
+	  char *method = internal_cplus_demangle (work, ++*mangled);
+
+	  if (method)
+	    {
+	      char buf[50];
+	      sprintf (buf, "virtual function thunk (delta:%d) for ", -delta);
+	      string_append (declp, buf);
+	      string_append (declp, method);
+	      free (method);
+	      n = strlen (*mangled);
+	      (*mangled) += n;
+	    }
+	  else
+	    {
+	      success = 0;
+	    }
+	}
+    }
+  else if (strncmp (*mangled, "__t", 3) == 0
+	   && ((*mangled)[3] == 'i' || (*mangled)[3] == 'f'))
+    {
+      p = (*mangled)[3] == 'i' ? " type_info node" : " type_info function";
+      (*mangled) += 4;
+      switch (**mangled)
+	{
+	case 'Q':
+	case 'K':
+	  success = demangle_qualified (work, mangled, declp, 0, 1);
+	  break;
+	case 't':
+	  success = demangle_template (work, mangled, declp, 0, 1, 1);
+	  break;
+	default:
+	  success = do_type (work, mangled, declp);
+	  break;
+	}
+      if (success && **mangled != '\0')
+	success = 0;
+      if (success)
+	string_append (declp, p);
+    }
+  else
+    {
+      success = 0;
+    }
+  return (success);
+}
+
+static void
+recursively_demangle(work, mangled, result, namelength)
+     struct work_stuff *work;
+     const char **mangled;
+     string *result;
+     int namelength;
+{
+  char * recurse = (char *)NULL;
+  char * recurse_dem = (char *)NULL;
+
+  recurse = (char *) xmalloc (namelength + 1);
+  memcpy (recurse, *mangled, namelength);
+  recurse[namelength] = '\000';
+
+  recurse_dem = VG_(cplus_demangle) (recurse, work->options);
+
+  if (recurse_dem)
+    {
+      string_append (result, recurse_dem);
+      free (recurse_dem);
+    }
+  else
+    {
+      string_appendn (result, *mangled, namelength);
+    }
+  free (recurse);
+  *mangled += namelength;
+}
+
+/*
+
+LOCAL FUNCTION
+
+	arm_special -- special handling of ARM/lucid mangled strings
+
+SYNOPSIS
+
+	static int
+	arm_special (const char **mangled,
+		     string *declp);
+
+
+DESCRIPTION
+
+	Process some special ARM style mangling forms that don't fit
+	the normal pattern.  For example:
+
+		__vtbl__3foo		(foo virtual table)
+		__vtbl__3foo__3bar	(bar::foo virtual table)
+
+ */
+
+static int
+arm_special (mangled, declp)
+     const char **mangled;
+     string *declp;
+{
+  int n;
+  int success = 1;
+  const char *scan;
+
+  if (strncmp (*mangled, ARM_VTABLE_STRING, ARM_VTABLE_STRLEN) == 0)
+    {
+      /* Found a ARM style virtual table, get past ARM_VTABLE_STRING
+         and create the decl.  Note that we consume the entire mangled
+	 input string, which means that demangle_signature has no work
+	 to do.  */
+      scan = *mangled + ARM_VTABLE_STRLEN;
+      while (*scan != '\0')        /* first check it can be demangled */
+        {
+          n = consume_count (&scan);
+          if (n == -1)
+	    {
+	      return (0);           /* no good */
+	    }
+          scan += n;
+          if (scan[0] == '_' && scan[1] == '_')
+	    {
+	      scan += 2;
+	    }
+        }
+      (*mangled) += ARM_VTABLE_STRLEN;
+      while (**mangled != '\0')
+	{
+	  n = consume_count (mangled);
+          if (n == -1
+	      || n > (long) strlen (*mangled))
+	    return 0;
+	  string_prependn (declp, *mangled, n);
+	  (*mangled) += n;
+	  if ((*mangled)[0] == '_' && (*mangled)[1] == '_')
+	    {
+	      string_prepend (declp, "::");
+	      (*mangled) += 2;
+	    }
+	}
+      string_append (declp, " virtual table");
+    }
+  else
+    {
+      success = 0;
+    }
+  return (success);
+}
+
+/*
+
+LOCAL FUNCTION
+
+	demangle_qualified -- demangle 'Q' qualified name strings
+
+SYNOPSIS
+
+	static int
+	demangle_qualified (struct work_stuff *, const char *mangled,
+			    string *result, int isfuncname, int append);
+
+DESCRIPTION
+
+	Demangle a qualified name, such as "Q25Outer5Inner" which is
+	the mangled form of "Outer::Inner".  The demangled output is
+	prepended or appended to the result string according to the
+	state of the append flag.
+
+	If isfuncname is nonzero, then the qualified name we are building
+	is going to be used as a member function name, so if it is a
+	constructor or destructor function, append an appropriate
+	constructor or destructor name.  I.E. for the above example,
+	the result for use as a constructor is "Outer::Inner::Inner"
+	and the result for use as a destructor is "Outer::Inner::~Inner".
+
+BUGS
+
+	Numeric conversion is ASCII dependent (FIXME).
+
+ */
+
+static int
+demangle_qualified (work, mangled, result, isfuncname, append)
+     struct work_stuff *work;
+     const char **mangled;
+     string *result;
+     int isfuncname;
+     int append;
+{
+  int qualifiers = 0;
+  int success = 1;
+  string temp;
+  string last_name;
+  int bindex = register_Btype (work);
+
+  /* We only make use of ISFUNCNAME if the entity is a constructor or
+     destructor.  */
+  isfuncname = (isfuncname
+		&& ((work->constructor & 1) || (work->destructor & 1)));
+
+  string_init (&temp);
+  string_init (&last_name);
+
+  if ((*mangled)[0] == 'K')
+    {
+    /* Squangling qualified name reuse */
+      int idx;
+      (*mangled)++;
+      idx = consume_count_with_underscores (mangled);
+      if (idx == -1 || idx >= work -> numk)
+        success = 0;
+      else
+        string_append (&temp, work -> ktypevec[idx]);
+    }
+  else
+    switch ((*mangled)[1])
+    {
+    case '_':
+      /* GNU mangled name with more than 9 classes.  The count is preceded
+	 by an underscore (to distinguish it from the <= 9 case) and followed
+	 by an underscore.  */
+      (*mangled)++;
+      qualifiers = consume_count_with_underscores (mangled);
+      if (qualifiers == -1)
+	success = 0;
+      break;
+
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+      /* The count is in a single digit.  */
+      qualifiers = (*mangled)[1] - '0';
+
+      /* If there is an underscore after the digit, skip it.  This is
+	 said to be for ARM-qualified names, but the ARM makes no
+	 mention of such an underscore.  Perhaps cfront uses one.  */
+      if ((*mangled)[2] == '_')
+	{
+	  (*mangled)++;
+	}
+      (*mangled) += 2;
+      break;
+
+    case '0':
+    default:
+      success = 0;
+    }
+
+  if (!success)
+    {
+      string_delete (&last_name);
+      string_delete (&temp);
+      return success;
+    }
+
+  /* Pick off the names and collect them in the temp buffer in the order
+     in which they are found, separated by '::'.  */
+
+  while (qualifiers-- > 0)
+    {
+      int remember_K = 1;
+      string_clear (&last_name);
+
+      if (*mangled[0] == '_')
+	(*mangled)++;
+
+      if (*mangled[0] == 't')
+	{
+	  /* Here we always append to TEMP since we will want to use
+	     the template name without the template parameters as a
+	     constructor or destructor name.  The appropriate
+	     (parameter-less) value is returned by demangle_template
+	     in LAST_NAME.  We do not remember the template type here,
+	     in order to match the G++ mangling algorithm.  */
+	  success = demangle_template(work, mangled, &temp,
+				      &last_name, 1, 0);
+	  if (!success)
+	    break;
+	}
+      else if (*mangled[0] == 'K')
+	{
+          int idx;
+          (*mangled)++;
+          idx = consume_count_with_underscores (mangled);
+          if (idx == -1 || idx >= work->numk)
+            success = 0;
+          else
+            string_append (&temp, work->ktypevec[idx]);
+          remember_K = 0;
+
+	  if (!success) break;
+	}
+      else
+	{
+	  if (EDG_DEMANGLING)
+            {
+	      int namelength;
+ 	      /* Now recursively demangle the qualifier
+ 	       * This is necessary to deal with templates in
+ 	       * mangling styles like EDG */
+	      namelength = consume_count (mangled);
+	      if (namelength == -1)
+		{
+		  success = 0;
+		  break;
+		}
+ 	      recursively_demangle(work, mangled, &temp, namelength);
+            }
+          else
+            {
+	      string temp_last_name;
+	      string_init (&temp_last_name);
+              success = do_type (work, mangled, &temp_last_name);
+              if (!success)
+	        {
+		  string_delete (&temp_last_name);
+                  break;
+		}
+              string_appends (&temp, &temp_last_name);
+	      string_appends (&last_name, &temp_last_name);
+	      string_delete (&temp_last_name);
+            }
+	}
+
+      if (remember_K)
+	remember_Ktype (work, temp.b, LEN_STRING (&temp));
+
+      if (qualifiers > 0)
+	string_append (&temp, SCOPE_STRING (work));
+    }
+
+  remember_Btype (work, temp.b, LEN_STRING (&temp), bindex);
+
+  /* If we are using the result as a function name, we need to append
+     the appropriate '::' separated constructor or destructor name.
+     We do this here because this is the most convenient place, where
+     we already have a pointer to the name and the length of the name.  */
+
+  if (isfuncname)
+    {
+      string_append (&temp, SCOPE_STRING (work));
+      if (work -> destructor & 1)
+	string_append (&temp, "~");
+      string_appends (&temp, &last_name);
+    }
+
+  /* Now either prepend the temp buffer to the result, or append it,
+     depending upon the state of the append flag.  */
+
+  if (append)
+    string_appends (result, &temp);
+  else
+    {
+      if (!STRING_EMPTY (result))
+	string_append (&temp, SCOPE_STRING (work));
+      string_prepends (result, &temp);
+    }
+
+  string_delete (&last_name);
+  string_delete (&temp);
+  return (success);
+}
+
+/*
+
+LOCAL FUNCTION
+
+	get_count -- convert an ascii count to integer, consuming tokens
+
+SYNOPSIS
+
+	static int
+	get_count (const char **type, int *count)
+
+DESCRIPTION
+
+	Assume that *type points at a count in a mangled name; set
+	*count to its value, and set *type to the next character after
+	the count.  There are some weird rules in effect here.
+
+	If *type does not point at a string of digits, return zero.
+
+	If *type points at a string of digits followed by an
+	underscore, set *count to their value as an integer, advance
+	*type to point *after the underscore, and return 1.
+
+	If *type points at a string of digits not followed by an
+	underscore, consume only the first digit.  Set *count to its
+	value as an integer, leave *type pointing after that digit,
+	and return 1.
+
+        The excuse for this odd behavior: in the ARM and HP demangling
+        styles, a type can be followed by a repeat count of the form
+        `Nxy', where:
+
+        `x' is a single digit specifying how many additional copies
+            of the type to append to the argument list, and
+
+        `y' is one or more digits, specifying the zero-based index of
+            the first repeated argument in the list.  Yes, as you're
+            unmangling the name you can figure this out yourself, but
+            it's there anyway.
+
+        So, for example, in `bar__3fooFPiN51', the first argument is a
+        pointer to an integer (`Pi'), and then the next five arguments
+        are the same (`N5'), and the first repeat is the function's
+        second argument (`1').
+*/
+
+static int
+get_count (type, count)
+     const char **type;
+     int *count;
+{
+  const char *p;
+  int n;
+
+  if (!ISDIGIT ((unsigned char)**type))
+    return (0);
+  else
+    {
+      *count = **type - '0';
+      (*type)++;
+      if (ISDIGIT ((unsigned char)**type))
+	{
+	  p = *type;
+	  n = *count;
+	  do
+	    {
+	      n *= 10;
+	      n += *p - '0';
+	      p++;
+	    }
+	  while (ISDIGIT ((unsigned char)*p));
+	  if (*p == '_')
+	    {
+	      *type = p + 1;
+	      *count = n;
+	    }
+	}
+    }
+  return (1);
+}
+
+/* RESULT will be initialised here; it will be freed on failure.  The
+   value returned is really a type_kind_t.  */
+
+static int
+do_type (work, mangled, result)
+     struct work_stuff *work;
+     const char **mangled;
+     string *result;
+{
+  int n;
+  int done;
+  int success;
+  string decl;
+  const char *remembered_type;
+  int type_quals;
+  string btype;
+  type_kind_t tk = tk_none;
+
+  string_init (&btype);
+  string_init (&decl);
+  string_init (result);
+
+  done = 0;
+  success = 1;
+  while (success && !done)
+    {
+      int member;
+      switch (**mangled)
+	{
+
+	  /* A pointer type */
+	case 'P':
+	case 'p':
+	  (*mangled)++;
+	  if (! (work -> options & DMGL_JAVA))
+	    string_prepend (&decl, "*");
+	  if (tk == tk_none)
+	    tk = tk_pointer;
+	  break;
+
+	  /* A reference type */
+	case 'R':
+	  (*mangled)++;
+	  string_prepend (&decl, "&");
+	  if (tk == tk_none)
+	    tk = tk_reference;
+	  break;
+
+	  /* An array */
+	case 'A':
+	  {
+	    ++(*mangled);
+	    if (!STRING_EMPTY (&decl)
+		&& (decl.b[0] == '*' || decl.b[0] == '&'))
+	      {
+		string_prepend (&decl, "(");
+		string_append (&decl, ")");
+	      }
+	    string_append (&decl, "[");
+	    if (**mangled != '_')
+	      success = demangle_template_value_parm (work, mangled, &decl,
+						      tk_integral);
+	    if (**mangled == '_')
+	      ++(*mangled);
+	    string_append (&decl, "]");
+	    break;
+	  }
+
+	/* A back reference to a previously seen type */
+	case 'T':
+	  (*mangled)++;
+	  if (!get_count (mangled, &n) || n >= work -> ntypes)
+	    {
+	      success = 0;
+	    }
+	  else
+	    {
+	      remembered_type = work -> typevec[n];
+	      mangled = &remembered_type;
+	    }
+	  break;
+
+	  /* A function */
+	case 'F':
+	  (*mangled)++;
+	    if (!STRING_EMPTY (&decl)
+		&& (decl.b[0] == '*' || decl.b[0] == '&'))
+	    {
+	      string_prepend (&decl, "(");
+	      string_append (&decl, ")");
+	    }
+	  /* After picking off the function args, we expect to either find the
+	     function return type (preceded by an '_') or the end of the
+	     string.  */
+	  if (!demangle_nested_args (work, mangled, &decl)
+	      || (**mangled != '_' && **mangled != '\0'))
+	    {
+	      success = 0;
+	      break;
+	    }
+	  if (success && (**mangled == '_'))
+	    (*mangled)++;
+	  break;
+
+	case 'M':
+	case 'O':
+	  {
+	    type_quals = TYPE_UNQUALIFIED;
+
+	    member = **mangled == 'M';
+	    (*mangled)++;
+
+	    string_append (&decl, ")");
+
+	    /* We don't need to prepend `::' for a qualified name;
+	       demangle_qualified will do that for us.  */
+	    if (**mangled != 'Q')
+	      string_prepend (&decl, SCOPE_STRING (work));
+
+	    if (ISDIGIT ((unsigned char)**mangled))
+	      {
+		n = consume_count (mangled);
+		if (n == -1
+		    || (int) strlen (*mangled) < n)
+		  {
+		    success = 0;
+		    break;
+		  }
+		string_prependn (&decl, *mangled, n);
+		*mangled += n;
+	      }
+	    else if (**mangled == 'X' || **mangled == 'Y')
+	      {
+		string temp;
+		do_type (work, mangled, &temp);
+		string_prepends (&decl, &temp);
+	      }
+	    else if (**mangled == 't')
+	      {
+		string temp;
+		string_init (&temp);
+		success = demangle_template (work, mangled, &temp,
+					     NULL, 1, 1);
+		if (success)
+		  {
+		    string_prependn (&decl, temp.b, temp.p - temp.b);
+		    string_clear (&temp);
+		  }
+		else
+		  break;
+	      }
+	    else if (**mangled == 'Q')
+	      {
+		success = demangle_qualified (work, mangled, &decl,
+					      /*isfuncnam=*/0, 
+					      /*append=*/0);
+		if (!success)
+		  break;
+	      }
+	    else
+	      {
+		success = 0;
+		break;
+	      }
+
+	    string_prepend (&decl, "(");
+	    if (member)
+	      {
+		switch (**mangled)
+		  {
+		  case 'C':
+		  case 'V':
+		  case 'u':
+		    type_quals |= code_for_qualifier (**mangled);
+		    (*mangled)++;
+		    break;
+
+		  default:
+		    break;
+		  }
+
+		if (*(*mangled)++ != 'F')
+		  {
+		    success = 0;
+		    break;
+		  }
+	      }
+	    if ((member && !demangle_nested_args (work, mangled, &decl))
+		|| **mangled != '_')
+	      {
+		success = 0;
+		break;
+	      }
+	    (*mangled)++;
+	    if (! PRINT_ANSI_QUALIFIERS)
+	      {
+		break;
+	      }
+	    if (type_quals != TYPE_UNQUALIFIED)
+	      {
+		APPEND_BLANK (&decl);
+		string_append (&decl, qualifier_string (type_quals));
+	      }
+	    break;
+	  }
+        case 'G':
+	  (*mangled)++;
+	  break;
+
+	case 'C':
+	case 'V':
+	case 'u':
+	  if (PRINT_ANSI_QUALIFIERS)
+	    {
+	      if (!STRING_EMPTY (&decl))
+		string_prepend (&decl, " ");
+
+	      string_prepend (&decl, demangle_qualifier (**mangled));
+	    }
+	  (*mangled)++;
+	  break;
+	  /*
+	    }
+	    */
+
+	  /* fall through */
+	default:
+	  done = 1;
+	  break;
+	}
+    }
+
+  if (success) switch (**mangled)
+    {
+      /* A qualified name, such as "Outer::Inner".  */
+    case 'Q':
+    case 'K':
+      {
+        success = demangle_qualified (work, mangled, result, 0, 1);
+        break;
+      }
+
+    /* A back reference to a previously seen squangled type */
+    case 'B':
+      (*mangled)++;
+      if (!get_count (mangled, &n) || n >= work -> numb)
+	success = 0;
+      else
+	string_append (result, work->btypevec[n]);
+      break;
+
+    case 'X':
+    case 'Y':
+      /* A template parm.  We substitute the corresponding argument. */
+      {
+	int idx;
+
+	(*mangled)++;
+	idx = consume_count_with_underscores (mangled);
+
+	if (idx == -1
+	    || (work->tmpl_argvec && idx >= work->ntmpl_args)
+	    || consume_count_with_underscores (mangled) == -1)
+	  {
+	    success = 0;
+	    break;
+	  }
+
+	if (work->tmpl_argvec)
+	  string_append (result, work->tmpl_argvec[idx]);
+	else
+	  string_append_template_idx (result, idx);
+
+	success = 1;
+      }
+    break;
+
+    default:
+      success = demangle_fund_type (work, mangled, result);
+      if (tk == tk_none)
+	tk = (type_kind_t) success;
+      break;
+    }
+
+  if (success)
+    {
+      if (!STRING_EMPTY (&decl))
+	{
+	  string_append (result, " ");
+	  string_appends (result, &decl);
+	}
+    }
+  else
+    string_delete (result);
+  string_delete (&decl);
+
+  if (success)
+    /* Assume an integral type, if we're not sure.  */
+    return (int) ((tk == tk_none) ? tk_integral : tk);
+  else
+    return 0;
+}
+
+/* Given a pointer to a type string that represents a fundamental type
+   argument (int, long, unsigned int, etc) in TYPE, a pointer to the
+   string in which the demangled output is being built in RESULT, and
+   the WORK structure, decode the types and add them to the result.
+
+   For example:
+
+   	"Ci"	=>	"const int"
+	"Sl"	=>	"signed long"
+	"CUs"	=>	"const unsigned short"
+
+   The value returned is really a type_kind_t.  */
+
+static int
+demangle_fund_type (work, mangled, result)
+     struct work_stuff *work;
+     const char **mangled;
+     string *result;
+{
+  int done = 0;
+  int success = 1;
+  char buf[10];
+  unsigned int dec = 0;
+  string btype;
+  type_kind_t tk = tk_integral;
+
+  string_init (&btype);
+
+  /* First pick off any type qualifiers.  There can be more than one.  */
+
+  while (!done)
+    {
+      switch (**mangled)
+	{
+	case 'C':
+	case 'V':
+	case 'u':
+	  if (PRINT_ANSI_QUALIFIERS)
+	    {
+              if (!STRING_EMPTY (result))
+                string_prepend (result, " ");
+	      string_prepend (result, demangle_qualifier (**mangled));
+	    }
+	  (*mangled)++;
+	  break;
+	case 'U':
+	  (*mangled)++;
+	  APPEND_BLANK (result);
+	  string_append (result, "unsigned");
+	  break;
+	case 'S': /* signed char only */
+	  (*mangled)++;
+	  APPEND_BLANK (result);
+	  string_append (result, "signed");
+	  break;
+	case 'J':
+	  (*mangled)++;
+	  APPEND_BLANK (result);
+	  string_append (result, "__complex");
+	  break;
+	default:
+	  done = 1;
+	  break;
+	}
+    }
+
+  /* Now pick off the fundamental type.  There can be only one.  */
+
+  switch (**mangled)
+    {
+    case '\0':
+    case '_':
+      break;
+    case 'v':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "void");
+      break;
+    case 'x':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "long long");
+      break;
+    case 'l':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "long");
+      break;
+    case 'i':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "int");
+      break;
+    case 's':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "short");
+      break;
+    case 'b':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "bool");
+      tk = tk_bool;
+      break;
+    case 'c':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "char");
+      tk = tk_char;
+      break;
+    case 'w':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "wchar_t");
+      tk = tk_char;
+      break;
+    case 'r':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "long double");
+      tk = tk_real;
+      break;
+    case 'd':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "double");
+      tk = tk_real;
+      break;
+    case 'f':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "float");
+      tk = tk_real;
+      break;
+    case 'G':
+      (*mangled)++;
+      if (!ISDIGIT ((unsigned char)**mangled))
+	{
+	  success = 0;
+	  break;
+	}
+    case 'I':
+      (*mangled)++;
+      if (**mangled == '_')
+	{
+	  int i;
+	  (*mangled)++;
+	  for (i = 0;
+	       i < (long) sizeof (buf) - 1 && **mangled && **mangled != '_';
+	       (*mangled)++, i++)
+	    buf[i] = **mangled;
+	  if (**mangled != '_')
+	    {
+	      success = 0;
+	      break;
+	    }
+	  buf[i] = '\0';
+	  (*mangled)++;
+	}
+      else
+	{
+	  strncpy (buf, *mangled, 2);
+	  buf[2] = '\0';
+	  *mangled += min (strlen (*mangled), 2);
+	}
+      /*sscanf (buf, "%x", &dec);
+      sprintf (buf, "int%u_t", dec);*/
+      sprintf (buf, "i_xx_t");
+      APPEND_BLANK (result);
+      string_append (result, buf);
+      break;
+
+      /* fall through */
+      /* An explicit type, such as "6mytype" or "7integer" */
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+      {
+        int bindex = register_Btype (work);
+        string loc_btype;
+        string_init (&loc_btype);
+        if (demangle_class_name (work, mangled, &loc_btype)) {
+          remember_Btype (work, loc_btype.b, LEN_STRING (&loc_btype), bindex);
+          APPEND_BLANK (result);
+          string_appends (result, &loc_btype);
+        }
+        else
+          success = 0;
+        string_delete (&loc_btype);
+        break;
+      }
+    case 't':
+      {
+        success = demangle_template (work, mangled, &btype, 0, 1, 1);
+        string_appends (result, &btype);
+        break;
+      }
+    default:
+      success = 0;
+      break;
+    }
+
+  string_delete (&btype);
+
+  return success ? ((int) tk) : 0;
+}
+
+
+/* Handle a template's value parameter for HP aCC (extension from ARM)
+   **mangled points to 'S' or 'U' */
+
+static int
+do_hpacc_template_const_value (work, mangled, result)
+     struct work_stuff *work ATTRIBUTE_UNUSED;
+     const char **mangled;
+     string *result;
+{
+  int unsigned_const;
+
+  if (**mangled != 'U' && **mangled != 'S')
+    return 0;
+
+  unsigned_const = (**mangled == 'U');
+
+  (*mangled)++;
+
+  switch (**mangled)
+    {
+      case 'N':
+        string_append (result, "-");
+        /* fall through */
+      case 'P':
+        (*mangled)++;
+        break;
+      case 'M':
+        /* special case for -2^31 */
+        string_append (result, "-2147483648");
+        (*mangled)++;
+        return 1;
+      default:
+        return 0;
+    }
+
+  /* We have to be looking at an integer now */
+  if (!(ISDIGIT ((unsigned char)**mangled)))
+    return 0;
+
+  /* We only deal with integral values for template
+     parameters -- so it's OK to look only for digits */
+  while (ISDIGIT ((unsigned char)**mangled))
+    {
+      char_str[0] = **mangled;
+      string_append (result, char_str);
+      (*mangled)++;
+    }
+
+  if (unsigned_const)
+    string_append (result, "U");
+
+  /* FIXME? Some day we may have 64-bit (or larger :-) ) constants
+     with L or LL suffixes. pai/1997-09-03 */
+
+  return 1; /* success */
+}
+
+/* Handle a template's literal parameter for HP aCC (extension from ARM)
+   **mangled is pointing to the 'A' */
+
+static int
+do_hpacc_template_literal (work, mangled, result)
+     struct work_stuff *work;
+     const char **mangled;
+     string *result;
+{
+  int literal_len = 0;
+  char * recurse;
+  char * recurse_dem;
+
+  if (**mangled != 'A')
+    return 0;
+
+  (*mangled)++;
+
+  literal_len = consume_count (mangled);
+
+  if (literal_len <= 0)
+    return 0;
+
+  /* Literal parameters are names of arrays, functions, etc.  and the
+     canonical representation uses the address operator */
+  string_append (result, "&");
+
+  /* Now recursively demangle the literal name */
+  recurse = (char *) xmalloc (literal_len + 1);
+  memcpy (recurse, *mangled, literal_len);
+  recurse[literal_len] = '\000';
+
+  recurse_dem = VG_(cplus_demangle) (recurse, work->options);
+
+  if (recurse_dem)
+    {
+      string_append (result, recurse_dem);
+      free (recurse_dem);
+    }
+  else
+    {
+      string_appendn (result, *mangled, literal_len);
+    }
+  (*mangled) += literal_len;
+  free (recurse);
+
+  return 1;
+}
+
+static int
+snarf_numeric_literal (args, arg)
+     const char ** args;
+     string * arg;
+{
+  if (**args == '-')
+    {
+      char_str[0] = '-';
+      string_append (arg, char_str);
+      (*args)++;
+    }
+  else if (**args == '+')
+    (*args)++;
+
+  if (!ISDIGIT ((unsigned char)**args))
+    return 0;
+
+  while (ISDIGIT ((unsigned char)**args))
+    {
+      char_str[0] = **args;
+      string_append (arg, char_str);
+      (*args)++;
+    }
+
+  return 1;
+}
+
+/* Demangle the next argument, given by MANGLED into RESULT, which
+   *should be an uninitialized* string.  It will be initialized here,
+   and free'd should anything go wrong.  */
+
+static int
+do_arg (work, mangled, result)
+     struct work_stuff *work;
+     const char **mangled;
+     string *result;
+{
+  /* Remember where we started so that we can record the type, for
+     non-squangling type remembering.  */
+  const char *start = *mangled;
+  string temp_result;
+
+  string_init (result);
+  string_init (&temp_result);
+
+  if (work->nrepeats > 0)
+    {
+      --work->nrepeats;
+
+      if (work->previous_argument == 0)
+	return 0;
+
+      /* We want to reissue the previous type in this argument list.  */
+      string_appends (result, work->previous_argument);
+      return 1;
+    }
+
+  if (**mangled == 'n')
+    {
+      /* A squangling-style repeat.  */
+      (*mangled)++;
+      work->nrepeats = consume_count(mangled);
+
+      if (work->nrepeats <= 0)
+	/* This was not a repeat count after all.  */
+	return 0;
+
+      if (work->nrepeats > 9)
+	{
+	  if (**mangled != '_')
+	    /* The repeat count should be followed by an '_' in this
+	       case.  */
+	    return 0;
+	  else
+	    (*mangled)++;
+	}
+
+      /* Now, the repeat is all set up.  */
+      return do_arg (work, mangled, result);
+    }
+
+  /* Save the result in WORK->previous_argument so that we can find it
+     if it's repeated.  Note that saving START is not good enough: we
+     do not want to add additional types to the back-referenceable
+     type vector when processing a repeated type.  */
+  if (work->previous_argument)
+    string_clear (work->previous_argument);
+  else
+    {
+      work->previous_argument = (string*) xmalloc (sizeof (string));
+      string_init (work->previous_argument);
+    }
+
+  if (!do_type (work, mangled, &temp_result))
+    {
+      string_delete (&temp_result);
+      return 0;
+    }
+  string_appends (work->previous_argument, &temp_result);
+  string_delete (&temp_result);
+
+  string_appends (result, work->previous_argument);
+
+  remember_type (work, start, *mangled - start);
+  return 1;
+}
+
+static void
+remember_type (work, start, len)
+     struct work_stuff *work;
+     const char *start;
+     int len;
+{
+  char *tem;
+
+  if (work->forgetting_types)
+    return;
+
+  if (work -> ntypes >= work -> typevec_size)
+    {
+      if (work -> typevec_size == 0)
+	{
+	  work -> typevec_size = 3;
+	  work -> typevec
+	    = (char **) xmalloc (sizeof (char *) * work -> typevec_size);
+	}
+      else
+	{
+	  work -> typevec_size *= 2;
+	  work -> typevec
+	    = (char **) xrealloc ((char *)work -> typevec,
+				  sizeof (char *) * work -> typevec_size);
+	}
+    }
+  tem = xmalloc (len + 1);
+  memcpy (tem, start, len);
+  tem[len] = '\0';
+  work -> typevec[work -> ntypes++] = tem;
+}
+
+
+/* Remember a K type class qualifier. */
+static void
+remember_Ktype (work, start, len)
+     struct work_stuff *work;
+     const char *start;
+     int len;
+{
+  char *tem;
+
+  if (work -> numk >= work -> ksize)
+    {
+      if (work -> ksize == 0)
+	{
+	  work -> ksize = 5;
+	  work -> ktypevec
+	    = (char **) xmalloc (sizeof (char *) * work -> ksize);
+	}
+      else
+	{
+	  work -> ksize *= 2;
+	  work -> ktypevec
+	    = (char **) xrealloc ((char *)work -> ktypevec,
+				  sizeof (char *) * work -> ksize);
+	}
+    }
+  tem = xmalloc (len + 1);
+  memcpy (tem, start, len);
+  tem[len] = '\0';
+  work -> ktypevec[work -> numk++] = tem;
+}
+
+/* Register a B code, and get an index for it. B codes are registered
+   as they are seen, rather than as they are completed, so map<temp<char> >
+   registers map<temp<char> > as B0, and temp<char> as B1 */
+
+static int
+register_Btype (work)
+     struct work_stuff *work;
+{
+  int ret;
+
+  if (work -> numb >= work -> bsize)
+    {
+      if (work -> bsize == 0)
+	{
+	  work -> bsize = 5;
+	  work -> btypevec
+	    = (char **) xmalloc (sizeof (char *) * work -> bsize);
+	}
+      else
+	{
+	  work -> bsize *= 2;
+	  work -> btypevec
+	    = (char **) xrealloc ((char *)work -> btypevec,
+				  sizeof (char *) * work -> bsize);
+	}
+    }
+  ret = work -> numb++;
+  work -> btypevec[ret] = NULL;
+  return(ret);
+}
+
+/* Store a value into a previously registered B code type. */
+
+static void
+remember_Btype (work, start, len, ind)
+     struct work_stuff *work;
+     const char *start;
+     int len, ind;
+{
+  char *tem;
+
+  tem = xmalloc (len + 1);
+  memcpy (tem, start, len);
+  tem[len] = '\0';
+  work -> btypevec[ind] = tem;
+}
+
+/* Lose all the info related to B and K type codes. */
+static void
+forget_B_and_K_types (work)
+     struct work_stuff *work;
+{
+  int i;
+
+  while (work -> numk > 0)
+    {
+      i = --(work -> numk);
+      if (work -> ktypevec[i] != NULL)
+	{
+	  free (work -> ktypevec[i]);
+	  work -> ktypevec[i] = NULL;
+	}
+    }
+
+  while (work -> numb > 0)
+    {
+      i = --(work -> numb);
+      if (work -> btypevec[i] != NULL)
+	{
+	  free (work -> btypevec[i]);
+	  work -> btypevec[i] = NULL;
+	}
+    }
+}
+/* Forget the remembered types, but not the type vector itself.  */
+
+static void
+forget_types (work)
+     struct work_stuff *work;
+{
+  int i;
+
+  while (work -> ntypes > 0)
+    {
+      i = --(work -> ntypes);
+      if (work -> typevec[i] != NULL)
+	{
+	  free (work -> typevec[i]);
+	  work -> typevec[i] = NULL;
+	}
+    }
+}
+
+/* Process the argument list part of the signature, after any class spec
+   has been consumed, as well as the first 'F' character (if any).  For
+   example:
+
+   "__als__3fooRT0"		=>	process "RT0"
+   "complexfunc5__FPFPc_PFl_i"	=>	process "PFPc_PFl_i"
+
+   DECLP must be already initialised, usually non-empty.  It won't be freed
+   on failure.
+
+   Note that g++ differs significantly from ARM and lucid style mangling
+   with regards to references to previously seen types.  For example, given
+   the source fragment:
+
+     class foo {
+       public:
+       foo::foo (int, foo &ia, int, foo &ib, int, foo &ic);
+     };
+
+     foo::foo (int, foo &ia, int, foo &ib, int, foo &ic) { ia = ib = ic; }
+     void foo (int, foo &ia, int, foo &ib, int, foo &ic) { ia = ib = ic; }
+
+   g++ produces the names:
+
+     __3fooiRT0iT2iT2
+     foo__FiR3fooiT1iT1
+
+   while lcc (and presumably other ARM style compilers as well) produces:
+
+     foo__FiR3fooT1T2T1T2
+     __ct__3fooFiR3fooT1T2T1T2
+
+   Note that g++ bases its type numbers starting at zero and counts all
+   previously seen types, while lucid/ARM bases its type numbers starting
+   at one and only considers types after it has seen the 'F' character
+   indicating the start of the function args.  For lucid/ARM style, we
+   account for this difference by discarding any previously seen types when
+   we see the 'F' character, and subtracting one from the type number
+   reference.
+
+ */
+
+static int
+demangle_args (work, mangled, declp)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+{
+  string arg;
+  int need_comma = 0;
+  int r;
+  int t;
+  const char *tem;
+  char temptype;
+
+  if (PRINT_ARG_TYPES)
+    {
+      string_append (declp, "(");
+      if (**mangled == '\0')
+	{
+	  string_append (declp, "void");
+	}
+    }
+
+  while ((**mangled != '_' && **mangled != '\0' && **mangled != 'e')
+	 || work->nrepeats > 0)
+    {
+      if ((**mangled == 'N') || (**mangled == 'T'))
+	{
+	  temptype = *(*mangled)++;
+
+	  if (temptype == 'N')
+	    {
+	      if (!get_count (mangled, &r))
+		{
+		  return (0);
+		}
+	    }
+	  else
+	    {
+	      r = 1;
+	    }
+          if ((HP_DEMANGLING || ARM_DEMANGLING || EDG_DEMANGLING) && work -> ntypes >= 10)
+            {
+              /* If we have 10 or more types we might have more than a 1 digit
+                 index so we'll have to consume the whole count here. This
+                 will lose if the next thing is a type name preceded by a
+                 count but it's impossible to demangle that case properly
+                 anyway. Eg if we already have 12 types is T12Pc "(..., type1,
+                 Pc, ...)"  or "(..., type12, char *, ...)" */
+              if ((t = consume_count(mangled)) <= 0)
+                {
+                  return (0);
+                }
+            }
+          else
+	    {
+	      if (!get_count (mangled, &t))
+	    	{
+	          return (0);
+	    	}
+	    }
+	  if (LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING)
+	    {
+	      t--;
+	    }
+	  /* Validate the type index.  Protect against illegal indices from
+	     malformed type strings.  */
+	  if ((t < 0) || (t >= work -> ntypes))
+	    {
+	      return (0);
+	    }
+	  while (work->nrepeats > 0 || --r >= 0)
+	    {
+	      tem = work -> typevec[t];
+	      if (need_comma && PRINT_ARG_TYPES)
+		{
+		  string_append (declp, ", ");
+		}
+	      if (!do_arg (work, &tem, &arg))
+		{
+		  return (0);
+		}
+	      if (PRINT_ARG_TYPES)
+		{
+		  string_appends (declp, &arg);
+		}
+	      string_delete (&arg);
+	      need_comma = 1;
+	    }
+	}
+      else
+	{
+	  if (need_comma && PRINT_ARG_TYPES)
+	    string_append (declp, ", ");
+	  if (!do_arg (work, mangled, &arg))
+	    {
+	      string_delete (&arg);
+	      return (0);
+	    }
+	  if (PRINT_ARG_TYPES)
+	    string_appends (declp, &arg);
+	  string_delete (&arg);
+	  need_comma = 1;
+	}
+    }
+
+  if (**mangled == 'e')
+    {
+      (*mangled)++;
+      if (PRINT_ARG_TYPES)
+	{
+	  if (need_comma)
+	    {
+	      string_append (declp, ",");
+	    }
+	  string_append (declp, "...");
+	}
+    }
+
+  if (PRINT_ARG_TYPES)
+    {
+      string_append (declp, ")");
+    }
+  return (1);
+}
+
+/* Like demangle_args, but for demangling the argument lists of function
+   and method pointers or references, not top-level declarations.  */
+
+static int
+demangle_nested_args (work, mangled, declp)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+{
+  string* saved_previous_argument;
+  int result;
+  int saved_nrepeats;
+
+  /* The G++ name-mangling algorithm does not remember types on nested
+     argument lists, unless -fsquangling is used, and in that case the
+     type vector updated by remember_type is not used.  So, we turn
+     off remembering of types here.  */
+  ++work->forgetting_types;
+
+  /* For the repeat codes used with -fsquangling, we must keep track of
+     the last argument.  */
+  saved_previous_argument = work->previous_argument;
+  saved_nrepeats = work->nrepeats;
+  work->previous_argument = 0;
+  work->nrepeats = 0;
+
+  /* Actually demangle the arguments.  */
+  result = demangle_args (work, mangled, declp);
+
+  /* Restore the previous_argument field.  */
+  if (work->previous_argument)
+    {
+      string_delete (work->previous_argument);
+      free ((char*) work->previous_argument);
+    }
+  work->previous_argument = saved_previous_argument;
+  --work->forgetting_types;
+  work->nrepeats = saved_nrepeats;
+
+  return result;
+}
+
+static void
+demangle_function_name (work, mangled, declp, scan)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+     const char *scan;
+{
+  size_t i;
+  string type;
+  const char *tem;
+
+  string_appendn (declp, (*mangled), scan - (*mangled));
+  string_need (declp, 1);
+  *(declp -> p) = '\0';
+
+  /* Consume the function name, including the "__" separating the name
+     from the signature.  We are guaranteed that SCAN points to the
+     separator.  */
+
+  (*mangled) = scan + 2;
+  /* We may be looking at an instantiation of a template function:
+     foo__Xt1t2_Ft3t4, where t1, t2, ... are template arguments and a
+     following _F marks the start of the function arguments.  Handle
+     the template arguments first. */
+
+  if (HP_DEMANGLING && (**mangled == 'X'))
+    {
+      demangle_arm_hp_template (work, mangled, 0, declp);
+      /* This leaves MANGLED pointing to the 'F' marking func args */
+    }
+
+  if (LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING)
+    {
+
+      /* See if we have an ARM style constructor or destructor operator.
+	 If so, then just record it, clear the decl, and return.
+	 We can't build the actual constructor/destructor decl until later,
+	 when we recover the class name from the signature.  */
+
+      if (strcmp (declp -> b, "__ct") == 0)
+	{
+	  work -> constructor += 1;
+	  string_clear (declp);
+	  return;
+	}
+      else if (strcmp (declp -> b, "__dt") == 0)
+	{
+	  work -> destructor += 1;
+	  string_clear (declp);
+	  return;
+	}
+    }
+
+  if (declp->p - declp->b >= 3
+      && declp->b[0] == 'o'
+      && declp->b[1] == 'p'
+      && strchr (cplus_markers, declp->b[2]) != NULL)
+    {
+      /* see if it's an assignment expression */
+      if (declp->p - declp->b >= 10 /* op$assign_ */
+	  && memcmp (declp->b + 3, "assign_", 7) == 0)
+	{
+	  for (i = 0; i < ARRAY_SIZE (optable); i++)
+	    {
+	      int len = declp->p - declp->b - 10;
+	      if ((int) strlen (optable[i].in) == len
+		  && memcmp (optable[i].in, declp->b + 10, len) == 0)
+		{
+		  string_clear (declp);
+		  string_append (declp, "operator");
+		  string_append (declp, optable[i].out);
+		  string_append (declp, "=");
+		  break;
+		}
+	    }
+	}
+      else
+	{
+	  for (i = 0; i < ARRAY_SIZE (optable); i++)
+	    {
+	      int len = declp->p - declp->b - 3;
+	      if ((int) strlen (optable[i].in) == len
+		  && memcmp (optable[i].in, declp->b + 3, len) == 0)
+		{
+		  string_clear (declp);
+		  string_append (declp, "operator");
+		  string_append (declp, optable[i].out);
+		  break;
+		}
+	    }
+	}
+    }
+  else if (declp->p - declp->b >= 5 && memcmp (declp->b, "type", 4) == 0
+	   && strchr (cplus_markers, declp->b[4]) != NULL)
+    {
+      /* type conversion operator */
+      tem = declp->b + 5;
+      if (do_type (work, &tem, &type))
+	{
+	  string_clear (declp);
+	  string_append (declp, "operator ");
+	  string_appends (declp, &type);
+	  string_delete (&type);
+	}
+    }
+  else if (declp->b[0] == '_' && declp->b[1] == '_'
+	   && declp->b[2] == 'o' && declp->b[3] == 'p')
+    {
+      /* ANSI.  */
+      /* type conversion operator.  */
+      tem = declp->b + 4;
+      if (do_type (work, &tem, &type))
+	{
+	  string_clear (declp);
+	  string_append (declp, "operator ");
+	  string_appends (declp, &type);
+	  string_delete (&type);
+	}
+    }
+  else if (declp->b[0] == '_' && declp->b[1] == '_'
+	   && ISLOWER((unsigned char)declp->b[2])
+	   && ISLOWER((unsigned char)declp->b[3]))
+    {
+      if (declp->b[4] == '\0')
+	{
+	  /* Operator.  */
+	  for (i = 0; i < ARRAY_SIZE (optable); i++)
+	    {
+	      if (strlen (optable[i].in) == 2
+		  && memcmp (optable[i].in, declp->b + 2, 2) == 0)
+		{
+		  string_clear (declp);
+		  string_append (declp, "operator");
+		  string_append (declp, optable[i].out);
+		  break;
+		}
+	    }
+	}
+      else
+	{
+	  if (declp->b[2] == 'a' && declp->b[5] == '\0')
+	    {
+	      /* Assignment.  */
+	      for (i = 0; i < ARRAY_SIZE (optable); i++)
+		{
+		  if (strlen (optable[i].in) == 3
+		      && memcmp (optable[i].in, declp->b + 2, 3) == 0)
+		    {
+		      string_clear (declp);
+		      string_append (declp, "operator");
+		      string_append (declp, optable[i].out);
+		      break;
+		    }
+		}
+	    }
+	}
+    }
+}
+
+/* a mini string-handling package */
+
+static void
+string_need (s, n)
+     string *s;
+     int n;
+{
+  int tem;
+
+  if (s->b == NULL)
+    {
+      if (n < 32)
+	{
+	  n = 32;
+	}
+      s->p = s->b = xmalloc (n);
+      s->e = s->b + n;
+    }
+  else if (s->e - s->p < n)
+    {
+      tem = s->p - s->b;
+      n += tem;
+      n *= 2;
+      s->b = xrealloc (s->b, n);
+      s->p = s->b + tem;
+      s->e = s->b + n;
+    }
+}
+
+static void
+string_delete (s)
+     string *s;
+{
+  if (s->b != NULL)
+    {
+      free (s->b);
+      s->b = s->e = s->p = NULL;
+    }
+}
+
+static void
+string_init (s)
+     string *s;
+{
+  s->b = s->p = s->e = NULL;
+}
+
+static void
+string_clear (s)
+     string *s;
+{
+  s->p = s->b;
+}
+
+#if 0
+
+static int
+string_empty (s)
+     string *s;
+{
+  return (s->b == s->p);
+}
+
+#endif
+
+static void
+string_append (p, s)
+     string *p;
+     const char *s;
+{
+  int n;
+  if (s == NULL || *s == '\0')
+    return;
+  n = strlen (s);
+  string_need (p, n);
+  memcpy (p->p, s, n);
+  p->p += n;
+}
+
+static void
+string_appends (p, s)
+     string *p, *s;
+{
+  int n;
+
+  if (s->b != s->p)
+    {
+      n = s->p - s->b;
+      string_need (p, n);
+      memcpy (p->p, s->b, n);
+      p->p += n;
+    }
+}
+
+static void
+string_appendn (p, s, n)
+     string *p;
+     const char *s;
+     int n;
+{
+  if (n != 0)
+    {
+      string_need (p, n);
+      memcpy (p->p, s, n);
+      p->p += n;
+    }
+}
+
+static void
+string_prepend (p, s)
+     string *p;
+     const char *s;
+{
+  if (s != NULL && *s != '\0')
+    {
+      string_prependn (p, s, strlen (s));
+    }
+}
+
+static void
+string_prepends (p, s)
+     string *p, *s;
+{
+  if (s->b != s->p)
+    {
+      string_prependn (p, s->b, s->p - s->b);
+    }
+}
+
+static void
+string_prependn (p, s, n)
+     string *p;
+     const char *s;
+     int n;
+{
+  char *q;
+
+  if (n != 0)
+    {
+      string_need (p, n);
+      for (q = p->p - 1; q >= p->b; q--)
+	{
+	  q[n] = q[0];
+	}
+      memcpy (p->b, s, n);
+      p->p += n;
+    }
+}
+
+static void
+string_append_template_idx (s, idx)
+     string *s;
+     int idx;
+{
+  char buf[INTBUF_SIZE + 1 /* 'T' */];
+  sprintf(buf, "T%d", idx);
+  string_append (s, buf);
+}
+
+/* To generate a standalone demangler program for testing purposes,
+   just compile and link this file with -DMAIN and libiberty.a.  When
+   run, it demangles each command line arg, or each stdin string, and
+   prints the result on stdout.  */
+
+#ifdef MAIN
+
+#include "getopt.h"
+
+static const char *program_name;
+static const char *program_version = VERSION;
+static int flags = DMGL_PARAMS | DMGL_ANSI | DMGL_VERBOSE;
+
+static void demangle_it PARAMS ((char *));
+static void usage PARAMS ((FILE *, int)) ATTRIBUTE_NORETURN;
+static void fatal PARAMS ((const char *)) ATTRIBUTE_NORETURN;
+static void print_demangler_list PARAMS ((FILE *));
+
+static void
+demangle_it (mangled_name)
+     char *mangled_name;
+{
+  char *result;
+
+  /* For command line args, also try to demangle type encodings.  */
+  result = cplus_demangle (mangled_name, flags | DMGL_TYPES);
+  if (result == NULL)
+    {
+      printf ("%s\n", mangled_name);
+    }
+  else
+    {
+      printf ("%s\n", result);
+      free (result);
+    }
+}
+
+static void 
+print_demangler_list (stream)
+     FILE *stream;
+{
+  const struct demangler_engine *demangler; 
+
+  fprintf (stream, "{%s", libiberty_demanglers->demangling_style_name);
+  
+  for (demangler = libiberty_demanglers + 1;
+       demangler->demangling_style != unknown_demangling;
+       ++demangler)
+    fprintf (stream, ",%s", demangler->demangling_style_name);
+
+  fprintf (stream, "}");
+}
+
+static void
+usage (stream, status)
+     FILE *stream;
+     int status;
+{
+  fprintf (stream, "\
+Usage: %s [-_] [-n] [--strip-underscores] [--no-strip-underscores] \n",
+	   program_name);
+
+  fprintf (stream, "\
+       [-s ");
+  print_demangler_list (stream);
+  fprintf (stream, "]\n");
+
+  fprintf (stream, "\
+       [--format ");
+  print_demangler_list (stream);
+  fprintf (stream, "]\n");
+
+  fprintf (stream, "\
+       [--help] [--version] [arg...]\n");
+  exit (status);
+}
+
+#define MBUF_SIZE 32767
+char mbuffer[MBUF_SIZE];
+
+/* Defined in the automatically-generated underscore.c.  */
+extern int prepends_underscore;
+
+int strip_underscore = 0;
+
+static const struct option long_options[] = {
+  {"strip-underscores", no_argument, 0, '_'},
+  {"format", required_argument, 0, 's'},
+  {"help", no_argument, 0, 'h'},
+  {"no-strip-underscores", no_argument, 0, 'n'},
+  {"version", no_argument, 0, 'v'},
+  {0, no_argument, 0, 0}
+};
+
+/* More 'friendly' abort that prints the line and file.
+   config.h can #define abort fancy_abort if you like that sort of thing.  */
+
+void
+fancy_abort ()
+{
+  fatal ("Internal gcc abort.");
+}
+
+
+static const char *
+standard_symbol_characters PARAMS ((void));
+
+static const char *
+hp_symbol_characters PARAMS ((void));
+
+static const char *
+gnu_v3_symbol_characters PARAMS ((void));
+
+/* Return the string of non-alnum characters that may occur 
+   as a valid symbol component, in the standard assembler symbol
+   syntax.  */
+
+static const char *
+standard_symbol_characters ()
+{
+  return "_$.";
+}
+
+
+/* Return the string of non-alnum characters that may occur
+   as a valid symbol name component in an HP object file.
+
+   Note that, since HP's compiler generates object code straight from
+   C++ source, without going through an assembler, its mangled
+   identifiers can use all sorts of characters that no assembler would
+   tolerate, so the alphabet this function creates is a little odd.
+   Here are some sample mangled identifiers offered by HP:
+
+	typeid*__XT24AddressIndExpClassMember_
+	[Vftptr]key:__dt__32OrdinaryCompareIndExpClassMemberFv
+	__ct__Q2_9Elf64_Dyn18{unnamed.union.#1}Fv
+
+   This still seems really weird to me, since nowhere else in this
+   file is there anything to recognize curly brackets, parens, etc.
+   I've talked with Srikanth <srikanth@cup.hp.com>, and he assures me
+   this is right, but I still strongly suspect that there's a
+   misunderstanding here.
+
+   If we decide it's better for c++filt to use HP's assembler syntax
+   to scrape identifiers out of its input, here's the definition of
+   the symbol name syntax from the HP assembler manual:
+
+       Symbols are composed of uppercase and lowercase letters, decimal
+       digits, dollar symbol, period (.), ampersand (&), pound sign(#) and
+       underscore (_). A symbol can begin with a letter, digit underscore or
+       dollar sign. If a symbol begins with a digit, it must contain a
+       non-digit character.
+
+   So have fun.  */
+static const char *
+hp_symbol_characters ()
+{
+  return "_$.<>#,*&[]:(){}";
+}
+
+
+/* Return the string of non-alnum characters that may occur 
+   as a valid symbol component in the GNU C++ V3 ABI mangling
+   scheme.  */
+
+static const char *
+gnu_v3_symbol_characters ()
+{
+  return "_$.";
+}
+
+
+extern int main PARAMS ((int, char **));
+
+int
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  char *result;
+  int c;
+  const char *valid_symbols;
+  enum demangling_styles style = auto_demangling;
+
+  program_name = argv[0];
+
+  strip_underscore = prepends_underscore;
+
+  while ((c = getopt_long (argc, argv, "_ns:", long_options, (int *) 0)) != EOF)
+    {
+      switch (c)
+	{
+	case '?':
+	  usage (stderr, 1);
+	  break;
+	case 'h':
+	  usage (stdout, 0);
+	case 'n':
+	  strip_underscore = 0;
+	  break;
+	case 'v':
+	  printf ("GNU %s (C++ demangler), version %s\n", program_name, program_version);
+	  return (0);
+	case '_':
+	  strip_underscore = 1;
+	  break;
+	case 's':
+	  {
+	    style = cplus_demangle_name_to_style (optarg);
+	    if (style == unknown_demangling)
+	      {
+		fprintf (stderr, "%s: unknown demangling style `%s'\n",
+			 program_name, optarg);
+		return (1);
+	      }
+	    else
+	      cplus_demangle_set_style (style);
+	  }
+	  break;
+	}
+    }
+
+  if (optind < argc)
+    {
+      for ( ; optind < argc; optind++)
+	{
+	  demangle_it (argv[optind]);
+	}
+    }
+  else
+    {
+      switch (current_demangling_style)
+	{
+	case gnu_demangling:
+	case lucid_demangling:
+	case arm_demangling:
+	case java_demangling:
+	case edg_demangling:
+	case gnat_demangling:
+	case auto_demangling:
+	  valid_symbols = standard_symbol_characters ();
+	  break;
+	case hp_demangling:
+	  valid_symbols = hp_symbol_characters ();
+	  break;
+	case gnu_v3_demangling:
+	  valid_symbols = gnu_v3_symbol_characters ();
+	  break;
+	default:
+	  /* Folks should explicitly indicate the appropriate alphabet for
+	     each demangling.  Providing a default would allow the
+	     question to go unconsidered.  */
+	  abort ();
+	}
+
+      for (;;)
+	{
+	  int i = 0;
+	  c = getchar ();
+	  /* Try to read a label.  */
+	  while (c != EOF && (ISALNUM (c) || strchr (valid_symbols, c)))
+	    {
+	      if (i >= MBUF_SIZE-1)
+		break;
+	      mbuffer[i++] = c;
+	      c = getchar ();
+	    }
+	  if (i > 0)
+	    {
+	      int skip_first = 0;
+
+	      if (mbuffer[0] == '.' || mbuffer[0] == '$')
+		++skip_first;
+	      if (strip_underscore && mbuffer[skip_first] == '_')
+		++skip_first;
+
+	      if (skip_first > i)
+		skip_first = i;
+
+	      mbuffer[i] = 0;
+	      flags |= (int) style;
+	      result = cplus_demangle (mbuffer + skip_first, flags);
+	      if (result)
+		{
+		  if (mbuffer[0] == '.')
+		    putc ('.', stdout);
+		  fputs (result, stdout);
+		  free (result);
+		}
+	      else
+		fputs (mbuffer, stdout);
+
+	      fflush (stdout);
+	    }
+	  if (c == EOF)
+	    break;
+	  putchar (c);
+	  fflush (stdout);
+	}
+    }
+
+  return (0);
+}
+
+static void
+fatal (str)
+     const char *str;
+{
+  fprintf (stderr, "%s: %s\n", program_name, str);
+  exit (1);
+}
+
+PTR
+xmalloc (size)
+  size_t size;
+{
+  register PTR value = (PTR) malloc (size);
+  if (value == 0)
+    fatal ("virtual memory exhausted");
+  return value;
+}
+
+PTR
+xrealloc (ptr, size)
+  PTR ptr;
+  size_t size;
+{
+  register PTR value = (PTR) realloc (ptr, size);
+  if (value == 0)
+    fatal ("virtual memory exhausted");
+  return value;
+}
+#endif	/* main */
diff --git a/coregrind/demangle/demangle.h b/coregrind/demangle/demangle.h
new file mode 100644
index 000000000..238ae3398
--- /dev/null
+++ b/coregrind/demangle/demangle.h
@@ -0,0 +1,177 @@
+/* Defs for interface to demanglers.
+   Copyright 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001
+   Free Software Foundation, Inc.
+   
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+
+#if !defined (DEMANGLE_H)
+#define DEMANGLE_H
+
+#include <ansidecl.h>
+
+#define current_demangling_style VG_(current_demangling_style)
+
+/* Options passed to cplus_demangle (in 2nd parameter). */
+
+#define DMGL_NO_OPTS	 0		/* For readability... */
+#define DMGL_PARAMS	 (1 << 0)	/* Include function args */
+#define DMGL_ANSI	 (1 << 1)	/* Include const, volatile, etc */
+#define DMGL_JAVA	 (1 << 2)	/* Demangle as Java rather than C++. */
+
+#define DMGL_AUTO	 (1 << 8)
+#define DMGL_GNU	 (1 << 9)
+#define DMGL_LUCID	 (1 << 10)
+#define DMGL_ARM	 (1 << 11)
+#define DMGL_HP 	 (1 << 12)       /* For the HP aCC compiler;
+                                            same as ARM except for
+                                            template arguments, etc. */
+#define DMGL_EDG	 (1 << 13)
+#define DMGL_GNU_V3	 (1 << 14)
+#define DMGL_GNAT	 (1 << 15)
+
+/* If none of these are set, use 'current_demangling_style' as the default. */
+#define DMGL_STYLE_MASK (DMGL_AUTO|DMGL_GNU|DMGL_LUCID|DMGL_ARM|DMGL_HP|DMGL_EDG|DMGL_GNU_V3|DMGL_JAVA|DMGL_GNAT)
+
+/* Enumeration of possible demangling styles.
+
+   Lucid and ARM styles are still kept logically distinct, even though
+   they now both behave identically.  The resulting style is actual the
+   union of both.  I.E. either style recognizes both "__pt__" and "__rf__"
+   for operator "->", even though the first is lucid style and the second
+   is ARM style. (FIXME?) */
+
+extern enum demangling_styles
+{
+  no_demangling = -1,
+  unknown_demangling = 0,
+  auto_demangling = DMGL_AUTO,
+  gnu_demangling = DMGL_GNU,
+  lucid_demangling = DMGL_LUCID,
+  arm_demangling = DMGL_ARM,
+  hp_demangling = DMGL_HP,
+  edg_demangling = DMGL_EDG,
+  gnu_v3_demangling = DMGL_GNU_V3,
+  java_demangling = DMGL_JAVA,
+  gnat_demangling = DMGL_GNAT
+} current_demangling_style;
+
+/* Define string names for the various demangling styles. */
+
+#define NO_DEMANGLING_STYLE_STRING            "none"
+#define AUTO_DEMANGLING_STYLE_STRING	      "auto"
+#define GNU_DEMANGLING_STYLE_STRING    	      "gnu"
+#define LUCID_DEMANGLING_STYLE_STRING	      "lucid"
+#define ARM_DEMANGLING_STYLE_STRING	      "arm"
+#define HP_DEMANGLING_STYLE_STRING	      "hp"
+#define EDG_DEMANGLING_STYLE_STRING	      "edg"
+#define GNU_V3_DEMANGLING_STYLE_STRING        "gnu-v3"
+#define JAVA_DEMANGLING_STYLE_STRING          "java"
+#define GNAT_DEMANGLING_STYLE_STRING          "gnat"
+
+/* Some macros to test what demangling style is active. */
+
+#define CURRENT_DEMANGLING_STYLE current_demangling_style
+#define AUTO_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_AUTO)
+#define GNU_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_GNU)
+#define LUCID_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_LUCID)
+#define ARM_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_ARM)
+#define HP_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_HP)
+#define EDG_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_EDG)
+#define GNU_V3_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_GNU_V3)
+#define JAVA_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_JAVA)
+#define GNAT_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_GNAT)
+
+/* Provide information about the available demangle styles. This code is
+   pulled from gdb into libiberty because it is useful to binutils also.  */
+
+extern const struct demangler_engine
+{
+  const char *const demangling_style_name;
+  const enum demangling_styles demangling_style;
+  const char *const demangling_style_doc;
+} libiberty_demanglers[];
+
+extern char *
+VG_(cplus_demangle) PARAMS ((const char *mangled, int options));
+
+/*
+extern int
+cplus_demangle_opname PARAMS ((const char *opname, char *result, int options));
+*/
+
+/*
+extern const char *
+cplus_mangle_opname PARAMS ((const char *opname, int options));
+*/
+
+/* Note: This sets global state.  FIXME if you care about multi-threading. */
+
+/*
+extern void
+set_cplus_marker_for_demangling PARAMS ((int ch));
+*/
+
+/*
+extern enum demangling_styles 
+cplus_demangle_set_style PARAMS ((enum demangling_styles style));
+*/
+
+/*
+extern enum demangling_styles 
+cplus_demangle_name_to_style PARAMS ((const char *name));
+*/
+
+/* V3 ABI demangling entry points, defined in cp-demangle.c.  */
+extern char*
+VG_(cplus_demangle_v3) PARAMS ((const char* mangled));
+
+extern char*
+VG_(java_demangle_v3) PARAMS ((const char* mangled));
+
+
+enum gnu_v3_ctor_kinds {
+  gnu_v3_complete_object_ctor = 1,
+  gnu_v3_base_object_ctor,
+  gnu_v3_complete_object_allocating_ctor
+};
+
+/* Return non-zero iff NAME is the mangled form of a constructor name
+   in the G++ V3 ABI demangling style.  Specifically, return an `enum
+   gnu_v3_ctor_kinds' value indicating what kind of constructor
+   it is.  */
+/*
+extern enum gnu_v3_ctor_kinds
+	is_gnu_v3_mangled_ctor PARAMS ((const char *name));
+*/
+
+
+enum gnu_v3_dtor_kinds {
+  gnu_v3_deleting_dtor = 1,
+  gnu_v3_complete_object_dtor,
+  gnu_v3_base_object_dtor
+};
+
+/* Return non-zero iff NAME is the mangled form of a destructor name
+   in the G++ V3 ABI demangling style.  Specifically, return an `enum
+   gnu_v3_dtor_kinds' value, indicating what kind of destructor
+   it is.  */
+/*
+extern enum gnu_v3_dtor_kinds
+	is_gnu_v3_mangled_dtor PARAMS ((const char *name));
+*/
+
+#endif	/* DEMANGLE_H */
diff --git a/coregrind/demangle/dyn-string.c b/coregrind/demangle/dyn-string.c
new file mode 100644
index 000000000..aaa7e3631
--- /dev/null
+++ b/coregrind/demangle/dyn-string.c
@@ -0,0 +1,439 @@
+/* An abstract string datatype.
+   Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+   Contributed by Mark Mitchell (mark@markmitchell.com).
+
+This file is part of GNU CC.
+   
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#include "vg_include.h"
+#include "ansidecl.h"
+#include "dyn-string.h"
+
+#ifndef STANDALONE
+#define malloc(s) VG_(malloc)(VG_AR_DEMANGLE, s)
+#define free(p) VG_(free)(VG_AR_DEMANGLE, p)
+#define realloc(p,s) VG_(realloc)(VG_AR_DEMANGLE, p, s)
+#endif
+
+/* If this file is being compiled for inclusion in the C++ runtime
+   library, as part of the demangler implementation, we don't want to
+   abort if an allocation fails.  Instead, percolate an error code up
+   through the call chain.  */
+
+#ifdef IN_LIBGCC2
+#define RETURN_ON_ALLOCATION_FAILURE
+#endif
+
+/* Performs in-place initialization of a dyn_string struct.  This
+   function can be used with a dyn_string struct on the stack or
+   embedded in another object.  The contents of of the string itself
+   are still dynamically allocated.  The string initially is capable
+   of holding at least SPACE characeters, including the terminating
+   NUL.  If SPACE is 0, it will silently be increated to 1.  
+
+   If RETURN_ON_ALLOCATION_FAILURE is defined and memory allocation
+   fails, returns 0.  Otherwise returns 1.  */
+
+int
+dyn_string_init (ds_struct_ptr, space)
+     struct dyn_string *ds_struct_ptr;
+     int space;
+{
+  /* We need at least one byte in which to store the terminating NUL.  */
+  if (space == 0)
+    space = 1;
+
+#ifdef RETURN_ON_ALLOCATION_FAILURE
+  ds_struct_ptr->s = (char *) malloc (space);
+  if (ds_struct_ptr->s == NULL)
+    return 0;
+#else
+  ds_struct_ptr->s = (char *) malloc (space);
+#endif
+  ds_struct_ptr->allocated = space;
+  ds_struct_ptr->length = 0;
+  ds_struct_ptr->s[0] = '\0';
+
+  return 1;
+}
+
+/* Create a new dynamic string capable of holding at least SPACE
+   characters, including the terminating NUL.  If SPACE is 0, it will
+   be silently increased to 1.  If RETURN_ON_ALLOCATION_FAILURE is
+   defined and memory allocation fails, returns NULL.  Otherwise
+   returns the newly allocated string.  */
+
+dyn_string_t 
+dyn_string_new (space)
+     int space;
+{
+  dyn_string_t result;
+#ifdef RETURN_ON_ALLOCATION_FAILURE
+  result = (dyn_string_t) malloc (sizeof (struct dyn_string));
+  if (result == NULL)
+    return NULL;
+  if (!dyn_string_init (result, space))
+    {
+      free (result);
+      return NULL;
+    }
+#else
+  result = (dyn_string_t) malloc (sizeof (struct dyn_string));
+  dyn_string_init (result, space);
+#endif
+  return result;
+}
+
+/* Free the memory used by DS.  */
+
+void 
+dyn_string_delete (ds)
+     dyn_string_t ds;
+{
+  free (ds->s);
+  free (ds);
+}
+
+/* Returns the contents of DS in a buffer allocated with malloc.  It
+   is the caller's responsibility to deallocate the buffer using free.
+   DS is then set to the empty string.  Deletes DS itself.  */
+
+char*
+dyn_string_release (ds)
+     dyn_string_t ds;
+{
+  /* Store the old buffer.  */
+  char* result = ds->s;
+  /* The buffer is no longer owned by DS.  */
+  ds->s = NULL;
+  /* Delete DS.  */
+  free (ds);
+  /* Return the old buffer.  */
+  return result;
+}
+
+/* Increase the capacity of DS so it can hold at least SPACE
+   characters, plus the terminating NUL.  This function will not (at
+   present) reduce the capacity of DS.  Returns DS on success. 
+
+   If RETURN_ON_ALLOCATION_FAILURE is defined and a memory allocation
+   operation fails, deletes DS and returns NULL.  */
+
+dyn_string_t 
+dyn_string_resize (ds, space)
+     dyn_string_t ds;
+     int space;
+{
+  int new_allocated = ds->allocated;
+
+  /* Increase SPACE to hold the NUL termination.  */
+  ++space;
+
+  /* Increase allocation by factors of two.  */
+  while (space > new_allocated)
+    new_allocated *= 2;
+    
+  if (new_allocated != ds->allocated)
+    {
+      ds->allocated = new_allocated;
+      /* We actually need more space.  */
+#ifdef RETURN_ON_ALLOCATION_FAILURE
+      ds->s = (char *) realloc (ds->s, ds->allocated);
+      if (ds->s == NULL)
+	{
+	  free (ds);
+	  return NULL;
+	}
+#else
+      ds->s = (char *) realloc (ds->s, ds->allocated);
+#endif
+    }
+
+  return ds;
+}
+
+/* Sets the contents of DS to the empty string.  */
+
+void
+dyn_string_clear (ds)
+     dyn_string_t ds;
+{
+  /* A dyn_string always has room for at least the NUL terminator.  */
+  ds->s[0] = '\0';
+  ds->length = 0;
+}
+
+/* Makes the contents of DEST the same as the contents of SRC.  DEST
+   and SRC must be distinct.  Returns 1 on success.  On failure, if
+   RETURN_ON_ALLOCATION_FAILURE, deletes DEST and returns 0.  */
+
+int
+dyn_string_copy (dest, src)
+     dyn_string_t dest;
+     dyn_string_t src;
+{
+  if (dest == src)
+      VG_(panic) ("dyn_string_copy: src==dest");
+
+  /* Make room in DEST.  */
+  if (dyn_string_resize (dest, src->length) == NULL)
+    return 0;
+  /* Copy DEST into SRC.  */
+  VG_(strcpy) (dest->s, src->s);
+  /* Update the size of DEST.  */
+  dest->length = src->length;
+  return 1;
+}
+
+/* Copies SRC, a NUL-terminated string, into DEST.  Returns 1 on
+   success.  On failure, if RETURN_ON_ALLOCATION_FAILURE, deletes DEST
+   and returns 0.  */
+
+int
+dyn_string_copy_cstr (dest, src)
+     dyn_string_t dest;
+     const char *src;
+{
+  int length = VG_(strlen) (src);
+  /* Make room in DEST.  */
+  if (dyn_string_resize (dest, length) == NULL)
+    return 0;
+  /* Copy DEST into SRC.  */
+  VG_(strcpy) (dest->s, src);
+  /* Update the size of DEST.  */
+  dest->length = length;
+  return 1;
+}
+
+/* Inserts SRC at the beginning of DEST.  DEST is expanded as
+   necessary.  SRC and DEST must be distinct.  Returns 1 on success.
+   On failure, if RETURN_ON_ALLOCATION_FAILURE, deletes DEST and
+   returns 0.  */
+
+int
+dyn_string_prepend (dest, src)
+     dyn_string_t dest;
+     dyn_string_t src;
+{
+  return dyn_string_insert (dest, 0, src);
+}
+
+/* Inserts SRC, a NUL-terminated string, at the beginning of DEST.
+   DEST is expanded as necessary.  Returns 1 on success.  On failure,
+   if RETURN_ON_ALLOCATION_FAILURE, deletes DEST and returns 0. */
+
+int
+dyn_string_prepend_cstr (dest, src)
+     dyn_string_t dest;
+     const char *src;
+{
+  return dyn_string_insert_cstr (dest, 0, src);
+}
+
+/* Inserts SRC into DEST starting at position POS.  DEST is expanded
+   as necessary.  SRC and DEST must be distinct.  Returns 1 on
+   success.  On failure, if RETURN_ON_ALLOCATION_FAILURE, deletes DEST
+   and returns 0.  */
+
+int
+dyn_string_insert (dest, pos, src)
+     dyn_string_t dest;
+     int pos;
+     dyn_string_t src;
+{
+  int i;
+
+  if (src == dest)
+    VG_(panic)( "dyn_string_insert: src==dest" );
+
+  if (dyn_string_resize (dest, dest->length + src->length) == NULL)
+    return 0;
+  /* Make room for the insertion.  Be sure to copy the NUL.  */
+  for (i = dest->length; i >= pos; --i)
+    dest->s[i + src->length] = dest->s[i];
+  /* Splice in the new stuff.  */
+  VG_(strncpy) (dest->s + pos, src->s, src->length);
+  /* Compute the new length.  */
+  dest->length += src->length;
+  return 1;
+}
+
+/* Inserts SRC, a NUL-terminated string, into DEST starting at
+   position POS.  DEST is expanded as necessary.  Returns 1 on
+   success.  On failure, RETURN_ON_ALLOCATION_FAILURE, deletes DEST
+   and returns 0.  */
+
+int
+dyn_string_insert_cstr (dest, pos, src)
+     dyn_string_t dest;
+     int pos;
+     const char *src;
+{
+  int i;
+  int length = VG_(strlen) (src);
+
+  if (dyn_string_resize (dest, dest->length + length) == NULL)
+    return 0;
+  /* Make room for the insertion.  Be sure to copy the NUL.  */
+  for (i = dest->length; i >= pos; --i)
+    dest->s[i + length] = dest->s[i];
+  /* Splice in the new stuff.  */
+  VG_(strncpy) (dest->s + pos, src, length);
+  /* Compute the new length.  */
+  dest->length += length;
+  return 1;
+}
+
+/* Inserts character C into DEST starting at position POS.  DEST is
+   expanded as necessary.  Returns 1 on success.  On failure,
+   RETURN_ON_ALLOCATION_FAILURE, deletes DEST and returns 0.  */
+
+int
+dyn_string_insert_char (dest, pos, c)
+     dyn_string_t dest;
+     int pos;
+     int c;
+{
+  int i;
+
+  if (dyn_string_resize (dest, dest->length + 1) == NULL)
+    return 0;
+  /* Make room for the insertion.  Be sure to copy the NUL.  */
+  for (i = dest->length; i >= pos; --i)
+    dest->s[i + 1] = dest->s[i];
+  /* Add the new character.  */
+  dest->s[pos] = c;
+  /* Compute the new length.  */
+  ++dest->length;
+  return 1;
+}
+     
+/* Append S to DS, resizing DS if necessary.  Returns 1 on success.
+   On failure, if RETURN_ON_ALLOCATION_FAILURE, deletes DEST and
+   returns 0.  */
+
+int
+dyn_string_append (dest, s)
+     dyn_string_t dest;
+     dyn_string_t s;
+{
+  if (dyn_string_resize (dest, dest->length + s->length) == 0)
+    return 0;
+  VG_(strcpy) (dest->s + dest->length, s->s);
+  dest->length += s->length;
+  return 1;
+}
+
+/* Append the NUL-terminated string S to DS, resizing DS if necessary.
+   Returns 1 on success.  On failure, if RETURN_ON_ALLOCATION_FAILURE,
+   deletes DEST and returns 0.  */
+
+int
+dyn_string_append_cstr (dest, s)
+     dyn_string_t dest;
+     const char *s;
+{
+  int len = VG_(strlen) (s);
+
+  /* The new length is the old length plus the size of our string, plus
+     one for the null at the end.  */
+  if (dyn_string_resize (dest, dest->length + len) == NULL)
+    return 0;
+  VG_(strcpy) (dest->s + dest->length, s);
+  dest->length += len;
+  return 1;
+}
+
+/* Appends C to the end of DEST.  Returns 1 on success.  On failiure,
+   if RETURN_ON_ALLOCATION_FAILURE, deletes DEST and returns 0.  */
+
+int
+dyn_string_append_char (dest, c)
+     dyn_string_t dest;
+     int c;
+{
+  /* Make room for the extra character.  */
+  if (dyn_string_resize (dest, dest->length + 1) == NULL)
+    return 0;
+  /* Append the character; it will overwrite the old NUL.  */
+  dest->s[dest->length] = c;
+  /* Add a new NUL at the end.  */
+  dest->s[dest->length + 1] = '\0';
+  /* Update the length.  */
+  ++(dest->length);
+  return 1;
+}
+
+/* Sets the contents of DEST to the substring of SRC starting at START
+   and ending before END.  START must be less than or equal to END,
+   and both must be between zero and the length of SRC, inclusive.
+   Returns 1 on success.  On failure, if RETURN_ON_ALLOCATION_FAILURE,
+   deletes DEST and returns 0.  */
+
+int
+dyn_string_substring (dest, src, start, end)
+     dyn_string_t dest;
+     dyn_string_t src;
+     int start;
+     int end;
+{
+  int i;
+  int length = end - start;
+
+  /*
+  vg_assert (start > end || start > src->length || end > src->length);
+  */
+
+  /* Make room for the substring.  */
+  if (dyn_string_resize (dest, length) == NULL)
+    return 0;
+  /* Copy the characters in the substring,  */
+  for (i = length; --i >= 0; )
+    dest->s[i] = src->s[start + i];
+  /* NUL-terimate the result.  */
+  dest->s[length] = '\0';
+  /* Record the length of the substring.  */
+  dest->length = length;
+
+  return 1;
+}
+
+/* Returns non-zero if DS1 and DS2 have the same contents.  */
+
+int
+dyn_string_eq (ds1, ds2)
+     dyn_string_t ds1;
+     dyn_string_t ds2;
+{
+  /* If DS1 and DS2 have different lengths, they must not be the same.  */
+  if (ds1->length != ds2->length)
+    return 0;
+  else
+    return !VG_(strcmp) (ds1->s, ds2->s);
+}
diff --git a/coregrind/demangle/dyn-string.h b/coregrind/demangle/dyn-string.h
new file mode 100644
index 000000000..9615cd64e
--- /dev/null
+++ b/coregrind/demangle/dyn-string.h
@@ -0,0 +1,96 @@
+/* An abstract string datatype.
+   Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+   Contributed by Mark Mitchell (mark@markmitchell.com).
+
+This file is part of GCC.
+   
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+#ifndef __DYN_STRING_H
+#define __DYN_STRING_H
+
+
+typedef struct dyn_string
+{
+  int allocated;	/* The amount of space allocated for the string.  */
+  int length;		/* The actual length of the string.  */
+  char *s;		/* The string itself, NUL-terminated.  */
+}* dyn_string_t;
+
+/* The length STR, in bytes, not including the terminating NUL.  */
+#define dyn_string_length(STR)                                          \
+  ((STR)->length)
+
+/* The NTBS in which the contents of STR are stored.  */
+#define dyn_string_buf(STR)                                             \
+  ((STR)->s)
+
+/* Compare DS1 to DS2 with strcmp.  */
+#define dyn_string_compare(DS1, DS2)                                    \
+  (VG_(strcmp) ((DS1)->s, (DS2)->s))
+
+
+/* dyn_string functions are used in the demangling implementation
+   included in the G++ runtime library.  To prevent collisions with
+   names in user programs, the functions that are used in the
+   demangler are given implementation-reserved names.  */
+
+#if 1 /* def IN_LIBGCC2 */
+
+#define dyn_string_init                 VG_(__cxa_dyn_string_init)
+#define dyn_string_new                  VG_(__cxa_dyn_string_new)
+#define dyn_string_delete               VG_(__cxa_dyn_string_delete)
+#define dyn_string_release              VG_(__cxa_dyn_string_release)
+#define dyn_string_resize               VG_(__cxa_dyn_string_resize)
+#define dyn_string_clear                VG_(__cxa_dyn_string_clear)
+#define dyn_string_copy                 VG_(__cxa_dyn_string_copy)
+#define dyn_string_copy_cstr            VG_(__cxa_dyn_string_copy_cstr)
+#define dyn_string_prepend              VG_(__cxa_dyn_string_prepend)
+#define dyn_string_prepend_cstr         VG_(__cxa_dyn_string_prepend_cstr)
+#define dyn_string_insert               VG_(__cxa_dyn_string_insert)
+#define dyn_string_insert_cstr          VG_(__cxa_dyn_string_insert_cstr)
+#define dyn_string_insert_char          VG_(__cxa_dyn_string_insert_char)
+#define dyn_string_append               VG_(__cxa_dyn_string_append)
+#define dyn_string_append_cstr          VG_(__cxa_dyn_string_append_cstr)
+#define dyn_string_append_char          VG_(__cxa_dyn_string_append_char)
+#define dyn_string_substring            VG_(__cxa_dyn_string_substring)
+#define dyn_string_eq                   VG_(__cxa_dyn_string_eq)
+
+#endif /* IN_LIBGCC2 */
+
+
+extern int dyn_string_init              PARAMS ((struct dyn_string *, int));
+extern dyn_string_t dyn_string_new      PARAMS ((int));
+extern void dyn_string_delete           PARAMS ((dyn_string_t));
+extern char *dyn_string_release         PARAMS ((dyn_string_t));
+extern dyn_string_t dyn_string_resize   PARAMS ((dyn_string_t, int));
+extern void dyn_string_clear            PARAMS ((dyn_string_t));
+extern int dyn_string_copy              PARAMS ((dyn_string_t, dyn_string_t));
+extern int dyn_string_copy_cstr         PARAMS ((dyn_string_t, const char *));
+extern int dyn_string_prepend           PARAMS ((dyn_string_t, dyn_string_t));
+extern int dyn_string_prepend_cstr      PARAMS ((dyn_string_t, const char *));
+extern int dyn_string_insert            PARAMS ((dyn_string_t, int,
+						 dyn_string_t));
+extern int dyn_string_insert_cstr       PARAMS ((dyn_string_t, int,
+						 const char *));
+extern int dyn_string_insert_char       PARAMS ((dyn_string_t, int, int));
+extern int dyn_string_append            PARAMS ((dyn_string_t, dyn_string_t));
+extern int dyn_string_append_cstr       PARAMS ((dyn_string_t, const char *));
+extern int dyn_string_append_char       PARAMS ((dyn_string_t, int));
+extern int dyn_string_substring         PARAMS ((dyn_string_t, 
+						 dyn_string_t, int, int));
+extern int dyn_string_eq                PARAMS ((dyn_string_t, dyn_string_t));
+
+#endif
diff --git a/coregrind/demangle/safe-ctype.c b/coregrind/demangle/safe-ctype.c
new file mode 100644
index 000000000..0c2be3ed7
--- /dev/null
+++ b/coregrind/demangle/safe-ctype.c
@@ -0,0 +1,163 @@
+/* <ctype.h> replacement macros.
+
+   Copyright (C) 2000 Free Software Foundation, Inc.
+   Contributed by Zack Weinberg <zackw@stanford.edu>.
+
+This file is part of the libiberty library.
+Libiberty is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public
+License as published by the Free Software Foundation; either
+version 2 of the License, or (at your option) any later version.
+
+Libiberty is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with libiberty; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* This is a compatible replacement of the standard C library's <ctype.h>
+   with the following properties:
+
+   - Implements all isxxx() macros required by C99.
+   - Also implements some character classes useful when
+     parsing C-like languages.
+   - Does not change behavior depending on the current locale.
+   - Behaves properly for all values in the range of a signed or
+     unsigned char.  */
+
+#include "ansidecl.h"
+#include <safe-ctype.h>
+#include <stdio.h>  /* for EOF */
+
+/* Shorthand */
+#define bl _sch_isblank
+#define cn _sch_iscntrl
+#define di _sch_isdigit
+#define is _sch_isidst
+#define lo _sch_islower
+#define nv _sch_isnvsp
+#define pn _sch_ispunct
+#define pr _sch_isprint
+#define sp _sch_isspace
+#define up _sch_isupper
+#define vs _sch_isvsp
+#define xd _sch_isxdigit
+
+/* Masks.  */
+#define L  lo|is   |pr	/* lower case letter */
+#define XL lo|is|xd|pr	/* lowercase hex digit */
+#define U  up|is   |pr	/* upper case letter */
+#define XU up|is|xd|pr	/* uppercase hex digit */
+#define D  di   |xd|pr	/* decimal digit */
+#define P  pn      |pr	/* punctuation */
+#define _  pn|is   |pr	/* underscore */
+
+#define C           cn	/* control character */
+#define Z  nv      |cn	/* NUL */
+#define M  nv|sp   |cn	/* cursor movement: \f \v */
+#define V  vs|sp   |cn	/* vertical space: \r \n */
+#define T  nv|sp|bl|cn	/* tab */
+#define S  nv|sp|bl|pr	/* space */
+
+/* Are we ASCII? */
+#if '\n' == 0x0A && ' ' == 0x20 && '0' == 0x30 \
+  && 'A' == 0x41 && 'a' == 0x61 && '!' == 0x21 \
+  && EOF == -1
+
+const unsigned short _sch_istable[256] =
+{
+  Z,  C,  C,  C,   C,  C,  C,  C,   /* NUL SOH STX ETX  EOT ENQ ACK BEL */
+  C,  T,  V,  M,   M,  V,  C,  C,   /* BS  HT  LF  VT   FF  CR  SO  SI  */
+  C,  C,  C,  C,   C,  C,  C,  C,   /* DLE DC1 DC2 DC3  DC4 NAK SYN ETB */
+  C,  C,  C,  C,   C,  C,  C,  C,   /* CAN EM  SUB ESC  FS  GS  RS  US  */
+  S,  P,  P,  P,   P,  P,  P,  P,   /* SP  !   "   #    $   %   &   '   */
+  P,  P,  P,  P,   P,  P,  P,  P,   /* (   )   *   +    ,   -   .   /   */
+  D,  D,  D,  D,   D,  D,  D,  D,   /* 0   1   2   3    4   5   6   7   */
+  D,  D,  P,  P,   P,  P,  P,  P,   /* 8   9   :   ;    <   =   >   ?   */
+  P, XU, XU, XU,  XU, XU, XU,  U,   /* @   A   B   C    D   E   F   G   */
+  U,  U,  U,  U,   U,  U,  U,  U,   /* H   I   J   K    L   M   N   O   */
+  U,  U,  U,  U,   U,  U,  U,  U,   /* P   Q   R   S    T   U   V   W   */
+  U,  U,  U,  P,   P,  P,  P,  _,   /* X   Y   Z   [    \   ]   ^   _   */
+  P, XL, XL, XL,  XL, XL, XL,  L,   /* `   a   b   c    d   e   f   g   */
+  L,  L,  L,  L,   L,  L,  L,  L,   /* h   i   j   k    l   m   n   o   */
+  L,  L,  L,  L,   L,  L,  L,  L,   /* p   q   r   s    t   u   v   w   */
+  L,  L,  L,  P,   P,  P,  P,  C,   /* x   y   z   {    |   }   ~   DEL */
+
+  /* high half of unsigned char is locale-specific, so all tests are
+     false in "C" locale */
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+};
+
+const unsigned char _sch_tolower[256] =
+{
+   0,  1,  2,  3,   4,  5,  6,  7,   8,  9, 10, 11,  12, 13, 14, 15,
+  16, 17, 18, 19,  20, 21, 22, 23,  24, 25, 26, 27,  28, 29, 30, 31,
+  32, 33, 34, 35,  36, 37, 38, 39,  40, 41, 42, 43,  44, 45, 46, 47,
+  48, 49, 50, 51,  52, 53, 54, 55,  56, 57, 58, 59,  60, 61, 62, 63,
+  64,
+
+  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+  'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+
+  91, 92, 93, 94, 95, 96,
+
+  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+  'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+
+ 123,124,125,126,127,
+
+ 128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143,
+ 144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159,
+ 160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175,
+ 176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191,
+
+ 192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207,
+ 208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223,
+ 224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239,
+ 240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255,
+};
+
+const unsigned char _sch_toupper[256] =
+{
+   0,  1,  2,  3,   4,  5,  6,  7,   8,  9, 10, 11,  12, 13, 14, 15,
+  16, 17, 18, 19,  20, 21, 22, 23,  24, 25, 26, 27,  28, 29, 30, 31,
+  32, 33, 34, 35,  36, 37, 38, 39,  40, 41, 42, 43,  44, 45, 46, 47,
+  48, 49, 50, 51,  52, 53, 54, 55,  56, 57, 58, 59,  60, 61, 62, 63,
+  64,
+
+  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+  'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+
+  91, 92, 93, 94, 95, 96,
+
+  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+  'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+
+ 123,124,125,126,127,
+
+ 128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143,
+ 144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159,
+ 160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175,
+ 176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191,
+
+ 192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207,
+ 208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223,
+ 224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239,
+ 240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255,
+};
+
+#else
+ #error "Unsupported host character set"
+#endif /* not ASCII */
diff --git a/coregrind/demangle/safe-ctype.h b/coregrind/demangle/safe-ctype.h
new file mode 100644
index 000000000..b2ad8490b
--- /dev/null
+++ b/coregrind/demangle/safe-ctype.h
@@ -0,0 +1,103 @@
+/* <ctype.h> replacement macros.
+
+   Copyright (C) 2000, 2001 Free Software Foundation, Inc.
+   Contributed by Zack Weinberg <zackw@stanford.edu>.
+
+This file is part of the libiberty library.
+Libiberty is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public
+License as published by the Free Software Foundation; either
+version 2 of the License, or (at your option) any later version.
+
+Libiberty is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with libiberty; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* This is a compatible replacement of the standard C library's <ctype.h>
+   with the following properties:
+
+   - Implements all isxxx() macros required by C99.
+   - Also implements some character classes useful when
+     parsing C-like languages.
+   - Does not change behavior depending on the current locale.
+   - Behaves properly for all values in the range of a signed or
+     unsigned char.
+
+   To avoid conflicts, this header defines the isxxx functions in upper
+   case, e.g. ISALPHA not isalpha.  */
+
+#ifndef SAFE_CTYPE_H
+#define SAFE_CTYPE_H
+
+#ifdef isalpha
+ #error "safe-ctype.h and ctype.h may not be used simultaneously"
+#else
+
+/* Categories.  */
+
+enum {
+  /* In C99 */
+  _sch_isblank  = 0x0001,	/* space \t */
+  _sch_iscntrl  = 0x0002,	/* nonprinting characters */
+  _sch_isdigit  = 0x0004,	/* 0-9 */
+  _sch_islower  = 0x0008,	/* a-z */
+  _sch_isprint  = 0x0010,	/* any printing character including ' ' */
+  _sch_ispunct  = 0x0020,	/* all punctuation */
+  _sch_isspace  = 0x0040,	/* space \t \n \r \f \v */
+  _sch_isupper  = 0x0080,	/* A-Z */
+  _sch_isxdigit = 0x0100,	/* 0-9A-Fa-f */
+
+  /* Extra categories useful to cpplib.  */
+  _sch_isidst	= 0x0200,	/* A-Za-z_ */
+  _sch_isvsp    = 0x0400,	/* \n \r */
+  _sch_isnvsp   = 0x0800,	/* space \t \f \v \0 */
+
+  /* Combinations of the above.  */
+  _sch_isalpha  = _sch_isupper|_sch_islower,	/* A-Za-z */
+  _sch_isalnum  = _sch_isalpha|_sch_isdigit,	/* A-Za-z0-9 */
+  _sch_isidnum  = _sch_isidst|_sch_isdigit,	/* A-Za-z0-9_ */
+  _sch_isgraph  = _sch_isalnum|_sch_ispunct,	/* isprint and not space */
+  _sch_iscppsp  = _sch_isvsp|_sch_isnvsp,	/* isspace + \0 */
+  _sch_isbasic  = _sch_isprint|_sch_iscppsp     /* basic charset of ISO C
+						   (plus ` and @)  */
+};
+
+/* Character classification.  */
+extern const unsigned short _sch_istable[256];
+
+#define _sch_test(c, bit) (_sch_istable[(c) & 0xff] & (unsigned short)(bit))
+
+#define ISALPHA(c)  _sch_test(c, _sch_isalpha)
+#define ISALNUM(c)  _sch_test(c, _sch_isalnum)
+#define ISBLANK(c)  _sch_test(c, _sch_isblank)
+#define ISCNTRL(c)  _sch_test(c, _sch_iscntrl)
+#define ISDIGIT(c)  _sch_test(c, _sch_isdigit)
+#define ISGRAPH(c)  _sch_test(c, _sch_isgraph)
+#define ISLOWER(c)  _sch_test(c, _sch_islower)
+#define ISPRINT(c)  _sch_test(c, _sch_isprint)
+#define ISPUNCT(c)  _sch_test(c, _sch_ispunct)
+#define ISSPACE(c)  _sch_test(c, _sch_isspace)
+#define ISUPPER(c)  _sch_test(c, _sch_isupper)
+#define ISXDIGIT(c) _sch_test(c, _sch_isxdigit)
+
+#define ISIDNUM(c)	_sch_test(c, _sch_isidnum)
+#define ISIDST(c)	_sch_test(c, _sch_isidst)
+#define IS_ISOBASIC(c)	_sch_test(c, _sch_isbasic)
+#define IS_VSPACE(c)	_sch_test(c, _sch_isvsp)
+#define IS_NVSPACE(c)	_sch_test(c, _sch_isnvsp)
+#define IS_SPACE_OR_NUL(c)	_sch_test(c, _sch_iscppsp)
+
+/* Character transformation.  */
+extern const unsigned char  _sch_toupper[256];
+extern const unsigned char  _sch_tolower[256];
+#define TOUPPER(c) _sch_toupper[(c) & 0xff]
+#define TOLOWER(c) _sch_tolower[(c) & 0xff]
+
+#endif /* no ctype.h */
+#endif /* SAFE_CTYPE_H */
diff --git a/coregrind/docs/Makefile.am b/coregrind/docs/Makefile.am
new file mode 100644
index 000000000..e8a58fa18
--- /dev/null
+++ b/coregrind/docs/Makefile.am
@@ -0,0 +1,5 @@
+docdir = $(datadir)/doc/valgrind
+
+doc_DATA = index.html manual.html nav.html techdocs.html
+
+EXTRA_DIST = $(doc_DATA)
diff --git a/coregrind/docs/index.html b/coregrind/docs/index.html
new file mode 100644
index 000000000..111170256
--- /dev/null
+++ b/coregrind/docs/index.html
@@ -0,0 +1,26 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<html>
+
+<head>
+  <meta http-equiv="Content-Type"     
+        content="text/html; charset=iso-8859-1">
+  <meta http-equiv="Content-Language" content="en-gb">
+  <meta name="generator" 
+        content="Mozilla/4.76 (X11; U; Linux 2.4.1-0.1.9 i586) [Netscape]">
+  <meta name="author" content="Julian Seward <jseward@acm.org>">
+  <meta name="description" content="say what this prog does">
+  <meta name="keywords" content="Valgrind, memory checker, x86, GPL">
+  <title>Valgrind's user manual</title>
+</head>
+
+<frameset cols="150,*">
+  <frame name="nav" target="main" src="nav.html">
+  <frame name="main" src="manual.html" scrolling="auto">
+  <noframes>
+    <body>
+     <p>This page uses frames, but your browser doesn't support them.</p>
+    </body>
+  </noframes>
+</frameset>
+
+</html>
diff --git a/coregrind/docs/manual.html b/coregrind/docs/manual.html
new file mode 100644
index 000000000..1bcd02a81
--- /dev/null
+++ b/coregrind/docs/manual.html
@@ -0,0 +1,1753 @@
+<html>
+  <head>
+    <style type="text/css">
+      body      { background-color: #ffffff;
+                  color:            #000000;
+                  font-family:      Times, Helvetica, Arial;
+                  font-size:        14pt}
+      h4        { margin-bottom:    0.3em}
+      code      { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      pre       { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      a:link    { color:            #0000C0;
+                  text-decoration:  none; }
+      a:visited { color:            #0000C0; 
+                  text-decoration:  none; }
+      a:active  { color:            #0000C0;
+                  text-decoration:  none; }
+    </style>
+  </head>
+
+<body bgcolor="#ffffff">
+
+<a name="title">&nbsp;</a>
+<h1 align=center>Valgrind, snapshot 20020317</h1>
+
+<center>
+<a href="mailto:jseward@acm.org">jseward@acm.org<br>
+<a href="http://www.muraroa.demon.co.uk">http://www.muraroa.demon.co.uk</a><br>
+Copyright &copy; 2000-2002 Julian Seward
+<p>
+Valgrind is licensed under the GNU General Public License, 
+version 2<br>
+An open-source tool for finding memory-management problems in
+Linux-x86 executables.
+</center>
+
+<p>
+
+<hr width="100%">
+<a name="contents"></a>
+<h2>Contents of this manual</h2>
+
+<h4>1&nbsp; <a href="#intro">Introduction</a></h4>
+    1.1&nbsp; <a href="#whatfor">What Valgrind is for</a><br>
+    1.2&nbsp; <a href="#whatdoes">What it does with your program</a>
+
+<h4>2&nbsp; <a href="#howtouse">How to use it, and how to make sense 
+    of the results</a></h4>
+    2.1&nbsp; <a href="#starta">Getting started</a><br>
+    2.2&nbsp; <a href="#comment">The commentary</a><br>
+    2.3&nbsp; <a href="#report">Reporting of errors</a><br>
+    2.4&nbsp; <a href="#suppress">Suppressing errors</a><br>
+    2.5&nbsp; <a href="#flags">Command-line flags</a><br>
+    2.6&nbsp; <a href="#errormsgs">Explaination of error messages</a><br>
+    2.7&nbsp; <a href="#suppfiles">Writing suppressions files</a><br>
+    2.8&nbsp; <a href="#install">Building and installing</a><br>
+    2.9&nbsp; <a href="#problems">If you have problems</a><br>
+
+<h4>3&nbsp; <a href="#machine">Details of the checking machinery</a></h4>
+    3.1&nbsp; <a href="#vvalue">Valid-value (V) bits</a><br>
+    3.2&nbsp; <a href="#vaddress">Valid-address (A)&nbsp;bits</a><br>
+    3.3&nbsp; <a href="#together">Putting it all together</a><br>
+    3.4&nbsp; <a href="#signals">Signals</a><br>
+    3.5&nbsp; <a href="#leaks">Memory leak detection</a><br>
+
+<h4>4&nbsp; <a href="#limits">Limitations</a></h4>
+
+<h4>5&nbsp; <a href="#howitworks">How it works -- a rough overview</a></h4>
+    5.1&nbsp; <a href="#startb">Getting started</a><br>
+    5.2&nbsp; <a href="#engine">The translation/instrumentation engine</a><br>
+    5.3&nbsp; <a href="#track">Tracking the status of memory</a><br>
+    5.4&nbsp; <a href="#sys_calls">System calls</a><br>
+    5.5&nbsp; <a href="#sys_signals">Signals</a><br>
+
+<h4>6&nbsp; <a href="#example">An example</a></h4>
+
+<h4>7&nbsp; <a href="techdocs.html">The design and implementation of Valgrind</a></h4>
+
+<hr width="100%">
+
+<a name="intro"></a>
+<h2>1&nbsp; Introduction</h2>
+
+<a name="whatfor"></a>
+<h3>1.1&nbsp; What Valgrind is for</h3>
+
+Valgrind is a tool to help you find memory-management problems in your
+programs. When a program is run under Valgrind's supervision, all
+reads and writes of memory are checked, and calls to
+malloc/new/free/delete are intercepted. As a result, Valgrind can
+detect problems such as:
+<ul>
+  <li>Use of uninitialised memory</li>
+  <li>Reading/writing memory after it has been free'd</li>
+  <li>Reading/writing off the end of malloc'd blocks</li>
+  <li>Reading/writing inappropriate areas on the stack</li>
+  <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+</ul>
+
+Problems like these can be difficult to find by other means, often
+lying undetected for long periods, then causing occasional,
+difficult-to-diagnose crashes.
+
+<p>
+Valgrind is closely tied to details of the CPU, operating system and
+to a less extent, compiler and basic C libraries. This makes it
+difficult to make it portable, so I have chosen at the outset to
+concentrate on what I believe to be a widely used platform: Red Hat
+Linux 7.2, on x86s. I believe that it will work without significant
+difficulty on other x86 GNU/Linux systems which use the 2.4 kernel and
+GNU libc 2.2.X, for example SuSE 7.1 and Mandrake 8.0.  Red Hat 6.2 is
+also supported.  It has worked in the past, and probably still does,
+on RedHat 7.1 and 6.2.  Note that I haven't compiled it on RedHat 7.1
+and 6.2 for a while, so they may no longer work now.
+<p>
+(Early Feb 02: after feedback from the KDE people it also works better
+on other Linuxes).
+<p>
+At some point in the past, Valgrind has also worked on Red Hat 6.2
+(x86), thanks to the efforts of Rob Noble.
+
+<p>
+Valgrind is licensed under the GNU General Public License, version
+2. Read the file LICENSE in the source distribution for details.
+
+<a name="whatdoes">
+<h3>1.2&nbsp; What it does with your program</h3>
+
+Valgrind is designed to be as non-intrusive as possible. It works
+directly with existing executables. You don't need to recompile,
+relink, or otherwise modify, the program to be checked. Simply place
+the word <code>valgrind</code> at the start of the command line
+normally used to run the program. So, for example, if you want to run
+the command <code>ls -l</code> on Valgrind, simply issue the
+command: <code>valgrind ls -l</code>.
+
+<p>Valgrind takes control of your program before it starts. Debugging
+information is read from the executable and associated libraries, so
+that error messages can be phrased in terms of source code
+locations. Your program is then run on a synthetic x86 CPU which
+checks every memory access. All detected errors are written to a
+log. When the program finishes, Valgrind searches for and reports on
+leaked memory.
+
+<p>You can run pretty much any dynamically linked ELF x86 executable using
+Valgrind. Programs run 25 to 50 times slower, and take a lot more
+memory, than they usually would. It works well enough to run large
+programs. For example, the Konqueror web browser from the KDE Desktop
+Environment, version 2.1.1, runs slowly but usably on Valgrind.
+
+<p>Valgrind simulates every single instruction your program executes.
+Because of this, it finds errors not only in your application but also
+in all supporting dynamically-linked (.so-format) libraries, including
+the GNU C library, the X client libraries, Qt, if you work with KDE, and
+so on. That often includes libraries, for example the GNU C library,
+which contain memory access violations, but which you cannot or do not
+want to fix.
+
+<p>Rather than swamping you with errors in which you are not
+interested, Valgrind allows you to selectively suppress errors, by
+recording them in a suppressions file which is read when Valgrind
+starts up. As supplied, Valgrind comes with a suppressions file
+designed to give reasonable behaviour on Red Hat 7.2 (also 7.1 and
+6.2) when running text-only and simple X applications.
+
+<p><a href="#example">Section 6</a> shows an example of use.
+<p>
+<hr width="100%">
+
+<a name="howtouse"></a>
+<h2>2&nbsp; How to use it, and how to make sense of the results</h2>
+
+<a name="starta"></a>
+<h3>2.1&nbsp; Getting started</h3>
+
+First off, consider whether it might be beneficial to recompile your
+application and supporting libraries with optimisation disabled and
+debugging info enabled (the <code>-g</code> flag).  You don't have to
+do this, but doing so helps Valgrind produce more accurate and less
+confusing error reports.  Chances are you're set up like this already,
+if you intended to debug your program with GNU gdb, or some other
+debugger.
+
+<p>Then just run your application, but place the word
+<code>valgrind</code> in front of your usual command-line invokation.
+Note that you should run the real (machine-code) executable here.  If
+your application is started by, for example, a shell or perl script,
+you'll need to modify it to invoke Valgrind on the real executables.
+Running such scripts directly under Valgrind will result in you
+getting error reports pertaining to <code>/bin/sh</code>,
+<code>/usr/bin/perl</code>, or whatever interpreter you're using.
+This almost certainly isn't what you want and can be hugely confusing.
+
+<a name="comment"></a>
+<h3>2.2&nbsp; The commentary</h3>
+
+Valgrind writes a commentary, detailing error reports and other
+significant events.  The commentary goes to standard output by
+default.  This may interfere with your program, so you can ask for it
+to be directed elsewhere.
+
+<p>All lines in the commentary are of the following form:<br>
+<pre>
+  ==12345== some-message-from-Valgrind
+</pre>
+<p>The <code>12345</code>  is the process ID.  This scheme makes it easy
+to distinguish program output from Valgrind commentary, and also easy
+to differentiate commentaries from different processes which have
+become merged together, for whatever reason.
+
+<p>By default, Valgrind writes only essential messages to the commentary,
+so as to avoid flooding you with information of secondary importance.
+If you want more information about what is happening, re-run, passing
+the <code>-v</code> flag to Valgrind.
+
+
+<a name="report"></a>
+<h3>2.3&nbsp; Reporting of errors</h3>
+
+When Valgrind detects something bad happening in the program, an error
+message is written to the commentary.  For example:<br>
+<pre>
+  ==25832== Invalid read of size 4
+  ==25832==    at 0x8048724: BandMatrix::ReSize(int, int, int) (bogon.cpp:45)
+  ==25832==    by 0x80487AF: main (bogon.cpp:66)
+  ==25832==    by 0x40371E5E: __libc_start_main (libc-start.c:129)
+  ==25832==    by 0x80485D1: (within /home/sewardj/newmat10/bogon)
+  ==25832==    Address 0xBFFFF74C is not stack'd, malloc'd or free'd
+</pre>
+
+<p>This message says that the program did an illegal 4-byte read of
+address 0xBFFFF74C, which, as far as it can tell, is not a valid stack
+address, nor corresponds to any currently malloc'd or free'd blocks.
+The read is happening at line 45 of <code>bogon.cpp</code>, called
+from line 66 of the same file, etc.  For errors associated with an
+identified malloc'd/free'd block, for example reading free'd memory,
+Valgrind reports not only the location where the error happened, but
+also where the associated block was malloc'd/free'd.
+
+<p>Valgrind remembers all error reports.  When an error is detected,
+it is compared against old reports, to see if it is a duplicate.  If
+so, the error is noted, but no further commentary is emitted.  This
+avoids you being swamped with bazillions of duplicate error reports.
+
+<p>If you want to know how many times each error occurred, run with
+the <code>-v</code> option.  When execution finishes, all the reports
+are printed out, along with, and sorted by, their occurrence counts.
+This makes it easy to see which errors have occurred most frequently.
+
+<p>Errors are reported before the associated operation actually
+happens.  For example, if you program decides to read from address
+zero, Valgrind will emit a message to this effect, and the program
+will then duly die with a segmentation fault.
+
+<p>In general, you should try and fix errors in the order that they
+are reported.  Not doing so can be confusing.  For example, a program
+which copies uninitialised values to several memory locations, and
+later uses them, will generate several error messages.  The first such
+error message may well give the most direct clue to the root cause of
+the problem.
+
+<a name="suppress"></a>
+<h3>2.4&nbsp; Suppressing errors</h3>
+
+Valgrind detects numerous problems in the base libraries, such as the
+GNU C library, and the XFree86 client libraries, which come
+pre-installed on your GNU/Linux system.  You can't easily fix these,
+but you don't want to see these errors (and yes, there are many!)  So
+Valgrind reads a list of errors to suppress at startup.  By default
+this file is <code>redhat72.supp</code>, located in the Valgrind
+installation directory.  
+
+<p>You can modify and add to the suppressions file at your leisure, or
+write your own.  Multiple suppression files are allowed.  This is
+useful if part of your project contains errors you can't or don't want
+to fix, yet you don't want to continuously be reminded of them.
+
+<p>Each error to be suppressed is described very specifically, to
+minimise the possibility that a suppression-directive inadvertantly
+suppresses a bunch of similar errors which you did want to see.  The
+suppression mechanism is designed to allow precise yet flexible
+specification of errors to suppress.
+
+<p>If you use the <code>-v</code> flag, at the end of execution, Valgrind
+prints out one line for each used suppression, giving its name and the
+number of times it got used.  Here's the suppressions used by a run of
+<code>ls -l</code>:
+<pre>
+  --27579-- supp: 1 socketcall.connect(serv_addr)/__libc_connect/__nscd_getgrgid_r
+  --27579-- supp: 1 socketcall.connect(serv_addr)/__libc_connect/__nscd_getpwuid_r
+  --27579-- supp: 6 strrchr/_dl_map_object_from_fd/_dl_map_object
+</pre>
+
+<a name="flags"></a>
+<h3>2.5&nbsp; Command-line flags</h3>
+
+You invoke Valgrind like this:
+<pre>
+  valgrind [options-for-Valgrind] your-prog [options for your-prog]
+</pre>
+
+<p>Valgrind's default settings succeed in giving reasonable behaviour
+in most cases.  Available options, in no particular order, are as
+follows:
+<ul>
+  <li><code>--help</code></li><br>
+
+  <li><code>--version</code><br>
+      <p>The usual deal.</li><br><p>
+
+  <li><code>-v --verbose</code><br>
+      <p>Be more verbose.  Gives extra information on various aspects
+      of your program, such as: the shared objects loaded, the
+      suppressions used, the progress of the instrumentation engine,
+      and warnings about unusual behaviour.
+      </li><br><p>
+
+  <li><code>-q --quiet</code><br>
+      <p>Run silently, and only print error messages.  Useful if you
+      are running regression tests or have some other automated test
+      machinery.
+      </li><br><p>
+
+  <li><code>--demangle=no</code><br>
+      <code>--demangle=yes</code> [the default]
+      <p>Disable/enable automatic demangling (decoding) of C++ names.
+      Enabled by default.  When enabled, Valgrind will attempt to
+      translate encoded C++ procedure names back to something
+      approaching the original.  The demangler handles symbols mangled
+      by g++ versions 2.X and 3.X.
+
+      <p>An important fact about demangling is that function
+      names mentioned in suppressions files should be in their mangled
+      form.  Valgrind does not demangle function names when searching
+      for applicable suppressions, because to do otherwise would make
+      suppressions file contents dependent on the state of Valgrind's
+      demangling machinery, and would also be slow and pointless.
+      </li><br><p>
+
+  <li><code>--num-callers=&lt;number&gt;</code> [default=4]<br>
+      <p>By default, Valgrind shows four levels of function call names
+      to help you identify program locations.  You can change that
+      number with this option.  This can help in determining the
+      program's location in deeply-nested call chains.  Note that errors
+      are commoned up using only the top three function locations (the
+      place in the current function, and that of its two immediate
+      callers).  So this doesn't affect the total number of errors
+      reported.  
+      <p>
+      The maximum value for this is 50.  Note that higher settings
+      will make Valgrind run a bit more slowly and take a bit more
+      memory, but can be useful when working with programs with
+      deeply-nested call chains.  
+      </li><br><p>
+
+  <li><code>--gdb-attach=no</code> [the default]<br>
+      <code>--gdb-attach=yes</code>
+      <p>When enabled, Valgrind will pause after every error shown,
+      and print the line
+      <br>
+      <code>---- Attach to GDB ? --- [Return/N/n/Y/y/C/c] ----</code>
+      <p>
+      Pressing <code>Ret</code>, or <code>N</code> <code>Ret</code>
+      or <code>n</code> <code>Ret</code>, causes Valgrind not to
+      start GDB for this error.
+      <p>
+      <code>Y</code> <code>Ret</code>
+      or <code>y</code> <code>Ret</code> causes Valgrind to
+      start GDB, for the program at this point.  When you have
+      finished with GDB, quit from it, and the program will continue.
+      Trying to continue from inside GDB doesn't work.
+      <p>
+      <code>C</code> <code>Ret</code>
+      or <code>c</code> <code>Ret</code> causes Valgrind not to
+      start GDB, and not to ask again.
+      <p>
+      <code>--gdb-attach=yes</code> conflicts with 
+      <code>--trace-children=yes</code>.  You can't use them
+      together.  Valgrind refuses to start up in this situation.
+      </li><br><p>
+     
+  <li><code>--partial-loads-ok=yes</code> [the default]<br>
+      <code>--partial-loads-ok=no</code>
+      <p>Controls how Valgrind handles word (4-byte) loads from
+      addresses for which some bytes are addressible and others
+      are not.  When <code>yes</code> (the default), such loads
+      do not elicit an address error.  Instead, the loaded V bytes
+      corresponding to the illegal addresses indicate undefined, and
+      those corresponding to legal addresses are loaded from shadow 
+      memory, as usual.
+      <p>
+      When <code>no</code>, loads from partially
+      invalid addresses are treated the same as loads from completely
+      invalid addresses: an illegal-address error is issued,
+      and the resulting V bytes indicate valid data.
+      </li><br><p>
+
+  <li><code>--sloppy-malloc=no</code> [the default]<br>
+      <code>--sloppy-malloc=yes</code>
+      <p>When enabled, all requests for malloc/calloc are rounded up
+      to a whole number of machine words -- in other words, made
+      divisible by 4.  For example, a request for 17 bytes of space
+      would result in a 20-byte area being made available.  This works
+      around bugs in sloppy libraries which assume that they can
+      safely rely on malloc/calloc requests being rounded up in this
+      fashion.  Without the workaround, these libraries tend to
+      generate large numbers of errors when they access the ends of
+      these areas.  Valgrind snapshots dated 17 Feb 2002 and later are
+      cleverer about this problem, and you should no longer need to 
+      use this flag.
+      </li><br><p>
+
+  <li><code>--trace-children=no</code> [the default]</br>
+      <code>--trace-children=yes</code>
+      <p>When enabled, Valgrind will trace into child processes.  This
+      is confusing and usually not what you want, so is disabled by
+      default.</li><br><p>
+
+  <li><code>--freelist-vol=&lt;number></code> [default: 1000000]
+      <p>When the client program releases memory using free (in C) or
+      delete (C++), that memory is not immediately made available for
+      re-allocation.  Instead it is marked inaccessible and placed in
+      a queue of freed blocks.  The purpose is to delay the point at
+      which freed-up memory comes back into circulation.  This
+      increases the chance that Valgrind will be able to detect
+      invalid accesses to blocks for some significant period of time
+      after they have been freed.  
+      <p>
+      This flag specifies the maximum total size, in bytes, of the
+      blocks in the queue.  The default value is one million bytes.
+      Increasing this increases the total amount of memory used by
+      Valgrind but may detect invalid uses of freed blocks which would
+      otherwise go undetected.</li><br><p>
+
+  <li><code>--logfile-fd=&lt;number></code> [default: 2, stderr]
+      <p>Specifies the file descriptor on which Valgrind communicates
+      all of its messages.  The default, 2, is the standard error
+      channel.  This may interfere with the client's own use of
+      stderr.  To dump Valgrind's commentary in a file without using
+      stderr, something like the following works well (sh/bash
+      syntax):<br>
+      <code>&nbsp;&nbsp;
+            valgrind --logfile-fd=9 my_prog 9> logfile</code><br>
+      That is: tell Valgrind to send all output to file descriptor 9,
+      and ask the shell to route file descriptor 9 to "logfile".
+      </li><br><p>
+
+  <li><code>--suppressions=&lt;filename></code> [default:
+      /installation/directory/redhat72.supp] <p>Specifies an extra
+      file from which to read descriptions of errors to suppress.  You
+      may use as many extra suppressions files as you
+      like.</li><br><p>
+
+  <li><code>--leak-check=no</code> [default]<br>
+      <code>--leak-check=yes</code>
+      <p>When enabled, search for memory leaks when the client program
+      finishes.  A memory leak means a malloc'd block, which has not
+      yet been free'd, but to which no pointer can be found.  Such a
+      block can never be free'd by the program, since no pointer to it
+      exists.  Leak checking is disabled by default
+      because it tends to generate dozens of error messages.
+      </li><br><p>
+
+  <li><code>--show-reachable=no</code> [default]<br>
+      <code>--show-reachable=yes</code> <p>When disabled, the memory
+      leak detector only shows blocks for which it cannot find a
+      pointer to at all, or it can only find a pointer to the middle
+      of.  These blocks are prime candidates for memory leaks.  When
+      enabled, the leak detector also reports on blocks which it could
+      find a pointer to.  Your program could, at least in principle,
+      have freed such blocks before exit.  Contrast this to blocks for
+      which no pointer, or only an interior pointer could be found:
+      they are more likely to indicate memory leaks, because
+      you do not actually have a pointer to the start of the block
+      which you can hand to free(), even if you wanted to.
+      </li><br><p>
+
+  <li><code>--leak-resolution=low</code> [default]<br>
+      <code>--leak-resolution=med</code> <br>
+      <code>--leak-resolution=high</code>
+      <p>When doing leak checking, determines how willing Valgrind is
+      to consider different backtraces the same.  When set to
+      <code>low</code>, the default, only the first two entries need
+      match.  When <code>med</code>, four entries have to match.  When
+      <code>high</code>, all entries need to match.  
+      <p>
+      For hardcore leak debugging, you probably want to use
+      <code>--leak-resolution=high</code> together with 
+      <code>--num-callers=40</code> or some such large number.  Note
+      however that this can give an overwhelming amount of
+      information, which is why the defaults are 4 callers and
+      low-resolution matching.
+      <p>
+      Note that the <code>--leak-resolution=</code> setting does not
+      affect Valgrind's ability to find leaks.  It only changes how
+      the results are presented to you.
+      </li><br><p>
+
+  <li><code>--workaround-gcc296-bugs=no</code> [default]<br>
+      <code>--workaround-gcc296-bugs=yes</code> <p>When enabled,
+      assume that reads and writes some small distance below the stack
+      pointer <code>%esp</code> are due to bugs in gcc 2.96, and does
+      not report them.  The "small distance" is 256 bytes by default.
+      Note that gcc 2.96 is the default compiler on some popular Linux
+      distributions (RedHat 7.X, Mandrake) and so you may well need to
+      use this flag.  Do not use it if you do not have to, as it can
+      cause real errors to be overlooked.  A better option is to use a
+      gcc/g++ which works properly; 2.95.3 seems to be a good choice.
+      <p>
+      Unfortunately (27 Feb 02) it looks like g++ 3.0.4 is similarly
+      buggy, so you may need to issue this flag if you use 3.0.4.
+      </li><br><p>
+
+  <li><code>--client-perms=no</code> [default]<br>
+      <code>--client-perms=yes</code> <p>An experimental feature.
+      <p>
+      When enabled, and when <code>--instrument=yes</code> (which is
+      the default), Valgrind honours client directives to set and
+      query address range permissions.  This allows the client program
+      to tell Valgrind about changes in memory range permissions that
+      Valgrind would not otherwise know about, and so allows clients
+      to get Valgrind to do arbitrary custom checks.
+      <p>
+      Clients need to include the header file <code>valgrind.h</code>
+      to make this work.  The macros therein have the magical property
+      that they generate code in-line which Valgrind can spot.
+      However, the code does nothing when not run on Valgrind, so you
+      are not forced to run your program on Valgrind just because you
+      use the macros in this file.
+      <p>
+      A brief description of the available macros:
+      <ul>
+      <li><code>VALGRIND_MAKE_NOACCESS</code>,
+          <code>VALGRIND_MAKE_WRITABLE</code> and
+          <code>VALGRIND_MAKE_READABLE</code>.  These mark address
+          ranges as completely inaccessible, accessible but containing
+          undefined data, and accessible and containing defined data,
+          respectively.  Subsequent errors may have their faulting
+          addresses described in terms of these blocks.  Returns a
+          "block handle".
+      <p>
+      <li><code>VALGRIND_DISCARD</code>: At some point you may want
+          Valgrind to stop reporting errors in terms of the blocks
+          defined by the previous three macros.  To do this, the above
+          macros return a small-integer "block handle".  You can pass
+          this block handle to <code>VALGRIND_DISCARD</code>.  After
+          doing so, Valgrind will no longer be able to relate
+          addressing errors to the user-defined block associated with
+          the handle.  The permissions settings associated with the
+          handle remain in place; this just affects how errors are
+          reported, not whether they are reported.  Returns 1 for an
+          invalid handle and 0 for a valid handle (although passing
+          invalid handles is harmless).
+      <p>
+      <li><code>VALGRIND_CHECK_NOACCESS</code>,
+          <code>VALGRIND_CHECK_WRITABLE</code> and
+          <code>VALGRIND_CHECK_READABLE</code>: check immediately
+          whether or not the given address range has the relevant
+          property, and if not, print an error message.  Also, for the
+          convenience of the client, returns zero if the relevant
+          property holds; otherwise, the returned value is the address
+          of the first byte for which the property is not true.
+      <p>
+      <li><code>VALGRIND_CHECK_NOACCESS</code>: a quick and easy way
+          to find out whether Valgrind thinks a particular variable
+          (lvalue, to be precise) is addressible and defined.  Prints
+          an error message if not.  Returns no value.
+      <p>
+      <li><code>VALGRIND_MAKE_NOACCESS_STACK</code>: a highly
+          experimental feature.  Similarly to
+          <code>VALGRIND_MAKE_NOACCESS</code>, this marks an address
+          range as inaccessible, so that subsequent accesses to an
+          address in the range gives an error.  However, this macro
+          does not return a block handle.  Instead, all annotations
+          created like this are reviewed at each client
+          <code>ret</code> (subroutine return) instruction, and those
+          which now define an address range block the client's stack
+          pointer register (<code>%esp</code>) are automatically
+          deleted.
+          <p>
+          In other words, this macro allows the client to tell
+          Valgrind about red-zones on its own stack.  Valgrind
+          automatically discards this information when the stack
+          retreats past such blocks.  Beware: hacky and flaky.
+      </ul>
+      </li>
+      <p>
+      As of 17 March 02 (the time of writing this), there is a small
+      problem with all of these macros, which is that I haven't
+      figured out how to make them produce sensible (always-succeeds)
+      return values when the client is run on the real CPU or on
+      Valgrind without <code>--client-perms=yes</code>.  So if you
+      write client code which depends on the return values, be aware
+      that it may misbehave when not run with full Valgrindification.
+      If you always ignore the return values you should always be
+      safe.  I plan to fix this.
+</ul>
+
+There are also some options for debugging Valgrind itself.  You
+shouldn't need to use them in the normal run of things.  Nevertheless:
+
+<ul>
+
+  <li><code>--single-step=no</code> [default]<br>
+      <code>--single-step=yes</code>
+      <p>When enabled, each x86 insn is translated seperately into
+      instrumented code.  When disabled, translation is done on a
+      per-basic-block basis, giving much better translations.</li><br>
+      <p>
+
+  <li><code>--optimise=no</code><br>
+      <code>--optimise=yes</code> [default]
+      <p>When enabled, various improvements are applied to the
+      intermediate code, mainly aimed at allowing the simulated CPU's
+      registers to be cached in the real CPU's registers over several
+      simulated instructions.</li><br>
+      <p>
+
+  <li><code>--instrument=no</code><br>
+      <code>--instrument=yes</code> [default]
+      <p>When disabled, the translations don't actually contain any
+      instrumentation.</li><br>
+      <p>
+
+  <li><code>--cleanup=no</code><br>
+      <code>--cleanup=yes</code> [default]
+      <p>When enabled, various improvments are applied to the
+      post-instrumented intermediate code, aimed at removing redundant
+      value checks.</li><br>
+      <p>
+
+  <li><code>--trace-syscalls=no</code> [default]<br>
+      <code>--trace-syscalls=yes</code>
+      <p>Enable/disable tracing of system call intercepts.</li><br>
+      <p>
+
+  <li><code>--trace-signals=no</code> [default]<br>
+      <code>--trace-signals=yes</code>
+      <p>Enable/disable tracing of signal handling.</li><br>
+      <p>
+
+  <li><code>--trace-symtab=no</code> [default]<br>
+      <code>--trace-symtab=yes</code>
+      <p>Enable/disable tracing of symbol table reading.</li><br>
+      <p>
+
+  <li><code>--trace-malloc=no</code> [default]<br>
+      <code>--trace-malloc=yes</code>
+      <p>Enable/disable tracing of malloc/free (et al) intercepts.
+      </li><br>
+      <p>
+
+  <li><code>--stop-after=&lt;number></code> 
+      [default: infinity, more or less]
+      <p>After &lt;number> basic blocks have been executed, shut down
+      Valgrind and switch back to running the client on the real CPU.
+      </li><br>
+      <p>
+
+  <li><code>--dump-error=&lt;number></code>
+      [default: inactive]
+      <p>After the program has exited, show gory details of the
+      translation of the basic block containing the &lt;number>'th
+      error context.  When used with <code>--single-step=yes</code>, 
+      can show the
+      exact x86 instruction causing an error.</li><br>
+      <p>
+
+  <li><code>--smc-check=none</code><br>
+      <code>--smc-check=some</code> [default]<br>
+      <code>--smc-check=all</code>
+      <p>How carefully should Valgrind check for self-modifying code
+      writes, so that translations can be discarded?&nbsp; When
+      "none", no writes are checked.  When "some", only writes
+      resulting from moves from integer registers to memory are
+      checked.  When "all", all memory writes are checked, even those
+      with which are no sane program would generate code -- for
+      example, floating-point writes.</li>
+</ul>
+
+
+<a name="errormsgs">
+<h3>2.6&nbsp; Explaination of error messages</h3>
+
+Despite considerable sophistication under the hood, Valgrind can only
+really detect two kinds of errors, use of illegal addresses, and use
+of undefined values.  Nevertheless, this is enough to help you
+discover all sorts of memory-management nasties in your code.  This
+section presents a quick summary of what error messages mean.  The
+precise behaviour of the error-checking machinery is described in
+<a href="#machine">Section 4</a>.
+
+
+<h4>2.6.1&nbsp; Illegal read / Illegal write errors</h4>
+For example:
+<pre>
+  ==30975== Invalid read of size 4
+  ==30975==    at 0x40F6BBCC: (within /usr/lib/libpng.so.2.1.0.9)
+  ==30975==    by 0x40F6B804: (within /usr/lib/libpng.so.2.1.0.9)
+  ==30975==    by 0x40B07FF4: read_png_image__FP8QImageIO (kernel/qpngio.cpp:326)
+  ==30975==    by 0x40AC751B: QImageIO::read() (kernel/qimage.cpp:3621)
+  ==30975==    Address 0xBFFFF0E0 is not stack'd, malloc'd or free'd
+</pre>
+
+<p>This happens when your program reads or writes memory at a place
+which Valgrind reckons it shouldn't.  In this example, the program did
+a 4-byte read at address 0xBFFFF0E0, somewhere within the
+system-supplied library libpng.so.2.1.0.9, which was called from
+somewhere else in the same library, called from line 326 of
+qpngio.cpp, and so on.
+
+<p>Valgrind tries to establish what the illegal address might relate
+to, since that's often useful.  So, if it points into a block of
+memory which has already been freed, you'll be informed of this, and
+also where the block was free'd at..  Likewise, if it should turn out
+to be just off the end of a malloc'd block, a common result of
+off-by-one-errors in array subscripting, you'll be informed of this
+fact, and also where the block was malloc'd.
+
+<p>In this example, Valgrind can't identify the address.  Actually the
+address is on the stack, but, for some reason, this is not a valid
+stack address -- it is below the stack pointer, %esp, and that isn't
+allowed.
+
+<p>Note that Valgrind only tells you that your program is about to
+access memory at an illegal address.  It can't stop the access from
+happening.  So, if your program makes an access which normally would
+result in a segmentation fault, you program will still suffer the same
+fate -- but you will get a message from Valgrind immediately prior to
+this.  In this particular example, reading junk on the stack is
+non-fatal, and the program stays alive.
+
+
+<h4>2.6.2&nbsp; Use of uninitialised values</h4>
+For example:
+<pre>
+  ==19146== Use of uninitialised CPU condition code
+  ==19146==    at 0x402DFA94: _IO_vfprintf (_itoa.h:49)
+  ==19146==    by 0x402E8476: _IO_printf (printf.c:36)
+  ==19146==    by 0x8048472: main (tests/manuel1.c:8)
+  ==19146==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+</pre>
+
+<p>An uninitialised-value use error is reported when your program uses
+a value which hasn't been initialised -- in other words, is undefined.
+Here, the undefined value is used somewhere inside the printf()
+machinery of the C library.  This error was reported when running the
+following small program:
+<pre>
+  int main()
+  {
+    int x;
+    printf ("x = %d\n", x);
+  }
+</pre>
+
+<p>It is important to understand that your program can copy around
+junk (uninitialised) data to its heart's content.  Valgrind observes
+this and keeps track of the data, but does not complain.  A complaint
+is issued only when your program attempts to make use of uninitialised
+data.  In this example, x is uninitialised.  Valgrind observes the
+value being passed to _IO_printf and thence to
+_IO_vfprintf, but makes no comment.  However,
+_IO_vfprintf has to examine the value of x
+so it can turn it into the corresponding ASCII string, and it is at
+this point that Valgrind complains.
+
+<p>Sources of uninitialised data tend to be:
+<ul>
+  <li>Local variables in procedures which have not been initialised,
+      as in the example above.</li><br><p>
+
+  <li>The contents of malloc'd blocks, before you write something
+      there.  In C++, the new operator is a wrapper round malloc, so
+      if you create an object with new, its fields will be
+      uninitialised until you fill them in, which is only Right and
+      Proper.</li>
+</ul>
+
+
+
+<h4>2.6.3&nbsp; Illegal frees</h4>
+For example:
+<pre>
+  ==7593== Invalid free()
+  ==7593==    at 0x4004FFDF: free (ut_clientmalloc.c:577)
+  ==7593==    by 0x80484C7: main (tests/doublefree.c:10)
+  ==7593==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+  ==7593==    by 0x80483B1: (within tests/doublefree)
+  ==7593==    Address 0x3807F7B4 is 0 bytes inside a block of size 177 free'd
+  ==7593==    at 0x4004FFDF: free (ut_clientmalloc.c:577)
+  ==7593==    by 0x80484C7: main (tests/doublefree.c:10)
+  ==7593==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+  ==7593==    by 0x80483B1: (within tests/doublefree)
+</pre>
+<p>Valgrind keeps track of the blocks allocated by your program with
+malloc/new, so it can know exactly whether or not the argument to
+free/delete is legitimate or not.  Here, this test program has
+freed the same block twice.  As with the illegal read/write errors,
+Valgrind attempts to make sense of the address free'd.  If, as
+here, the address is one which has previously been freed, you wil
+be told that -- making duplicate frees of the same block easy to spot.
+
+
+<h4>2.6.4&nbsp; Passing system call parameters with inadequate
+read/write permissions</h4>
+
+Valgrind checks all parameters to system calls.  If a system call
+needs to read from a buffer provided by your program, Valgrind checks
+that the entire buffer is addressible and has valid data, ie, it is
+readable.  And if the system call needs to write to a user-supplied
+buffer, Valgrind checks that the buffer is addressible.  After the
+system call, Valgrind updates its administrative information to
+precisely reflect any changes in memory permissions caused by the
+system call.
+
+<p>Here's an example of a system call with an invalid parameter:
+<pre>
+  #include &lt;stdlib.h>
+  #include &lt;unistd.h>
+  int main( void )
+  {
+    char* arr = malloc(10);
+    (void) write( 1 /* stdout */, arr, 10 );
+    return 0;
+  }
+</pre>
+
+<p>You get this complaint ...
+<pre>
+  ==8230== Syscall param write(buf) lacks read permissions
+  ==8230==    at 0x4035E072: __libc_write
+  ==8230==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+  ==8230==    by 0x80483B1: (within tests/badwrite)
+  ==8230==    by &lt;bogus frame pointer> ???
+  ==8230==    Address 0x3807E6D0 is 0 bytes inside a block of size 10 alloc'd
+  ==8230==    at 0x4004FEE6: malloc (ut_clientmalloc.c:539)
+  ==8230==    by 0x80484A0: main (tests/badwrite.c:6)
+  ==8230==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+  ==8230==    by 0x80483B1: (within tests/badwrite)
+</pre>
+
+<p>... because the program has tried to write uninitialised junk from
+the malloc'd block to the standard output.
+
+
+<h4>2.6.5&nbsp; Warning messages you might see</h4>
+
+Most of these only appear if you run in verbose mode (enabled by
+<code>-v</code>):
+<ul>
+<li> <code>More than 50 errors detected.  Subsequent errors
+     will still be recorded, but in less detail than before.</code>
+     <br>
+     After 50 different errors have been shown, Valgrind becomes 
+     more conservative about collecting them.  It then requires only 
+     the program counters in the top two stack frames to match when
+     deciding whether or not two errors are really the same one.
+     Prior to this point, the PCs in the top four frames are required
+     to match.  This hack has the effect of slowing down the
+     appearance of new errors after the first 50.  The 50 constant can
+     be changed by recompiling Valgrind.
+<p>
+<li> <code>More than 500 errors detected.  I'm not reporting any more.
+     Final error counts may be inaccurate.  Go fix your
+     program!</code>
+     <br>
+     After 500 different errors have been detected, Valgrind ignores
+     any more.  It seems unlikely that collecting even more different
+     ones would be of practical help to anybody, and it avoids the
+     danger that Valgrind spends more and more of its time comparing
+     new errors against an ever-growing collection.  As above, the 500
+     number is a compile-time constant.
+<p>
+<li> <code>Warning: client exiting by calling exit(&lt;number>).
+     Bye!</code>
+     <br>
+     Your program has called the <code>exit</code> system call, which
+     will immediately terminate the process.  You'll get no exit-time
+     error summaries or leak checks.  Note that this is not the same
+     as your program calling the ANSI C function <code>exit()</code>
+     -- that causes a normal, controlled shutdown of Valgrind.
+<p>
+<li> <code>Warning: client switching stacks?</code>
+     <br>
+     Valgrind spotted such a large change in the stack pointer, %esp,
+     that it guesses the client is switching to a different stack.
+     At this point it makes a kludgey guess where the base of the new
+     stack is, and sets memory permissions accordingly.  You may get
+     many bogus error messages following this, if Valgrind guesses
+     wrong.  At the moment "large change" is defined as a change of
+     more that 2000000 in the value of the %esp (stack pointer)
+     register.
+<p>
+<li> <code>Warning: client attempted to close Valgrind's logfile fd &lt;number>
+     </code>
+     <br>
+     Valgrind doesn't allow the client
+     to close the logfile, because you'd never see any diagnostic
+     information after that point.  If you see this message,
+     you may want to use the <code>--logfile-fd=&lt;number></code> 
+     option to specify a different logfile file-descriptor number.
+<p>
+<li> <code>Warning: noted but unhandled ioctl &lt;number></code>
+     <br>
+     Valgrind observed a call to one of the vast family of
+     <code>ioctl</code> system calls, but did not modify its
+     memory status info (because I have not yet got round to it).
+     The call will still have gone through, but you may get spurious
+     errors after this as a result of the non-update of the memory info.
+<p>
+<li> <code>Warning: unblocking signal &lt;number> due to
+     sigprocmask</code>
+     <br>
+     Really just a diagnostic from the signal simulation machinery.  
+     This message will appear if your program handles a signal by
+     first <code>longjmp</code>ing out of the signal handler,
+     and then unblocking the signal with <code>sigprocmask</code>
+     -- a standard signal-handling idiom.
+<p>
+<li> <code>Warning: bad signal number &lt;number> in __NR_sigaction.</code>
+     <br>
+     Probably indicates a bug in the signal simulation machinery.
+<p>
+<li> <code>Warning: set address range perms: large range &lt;number></code>
+     <br> 
+     Diagnostic message, mostly for my benefit, to do with memory 
+     permissions.
+</ul>
+
+
+<a name="suppfiles"></a>
+<h3>2.7&nbsp; Writing suppressions files</h3>
+
+A suppression file describes a bunch of errors which, for one reason
+or another, you don't want Valgrind to tell you about.  Usually the
+reason is that the system libraries are buggy but unfixable, at least
+within the scope of the current debugging session.  Multiple
+suppresions files are allowed.  By default, Valgrind uses
+<code>linux24.supp</code> in the directory where it is installed.
+
+<p>
+You can ask to add suppressions from another file, by specifying
+<code>--suppressions=/path/to/file.supp</code>.
+
+<p>Each suppression has the following components:<br>
+<ul>
+
+  <li>Its name.  This merely gives a handy name to the suppression, by
+      which it is referred to in the summary of used suppressions
+      printed out when a program finishes.  It's not important what
+      the name is; any identifying string will do.
+      <p>
+
+  <li>The nature of the error to suppress.  Either: 
+      <code>Value1</code>, 
+      <code>Value2</code>,
+      <code>Value4</code>,
+      <code>Value8</code> or 
+      <code>Value0</code>,
+      meaning an uninitialised-value error when
+      using a value of 1, 2, 4 or 8 bytes, 
+      or the CPU's condition codes, respectively.  Or: 
+      <code>Addr1</code>,
+      <code>Addr2</code>, 
+      <code>Addr4</code> or 
+      <code>Addr8</code>, meaning an invalid address during a
+      memory access of 1, 2, 4 or 8 bytes respectively.  Or 
+      <code>Param</code>,
+      meaning an invalid system call parameter error.  Or
+      <code>Free</code>, meaning an invalid or mismatching free.</li><br>
+      <p>
+
+  <li>The "immediate location" specification.  For Value and Addr
+      errors, is either the name of the function in which the error
+      occurred, or, failing that, the full path the the .so file
+      containing the error location.  For Param errors, is the name of
+      the offending system call parameter.  For Free errors, is the
+      name of the function doing the freeing (eg, <code>free</code>,
+      <code>__builtin_vec_delete</code>, etc)</li><br>
+      <p>
+
+  <li>The caller of the above "immediate location".  Again, either a
+      function or shared-object name.</li><br>
+      <p>
+
+  <li>Optionally, one or two extra calling-function or object names,
+      for greater precision.</li>
+</ul>
+
+<p>
+Locations may be either names of shared objects or wildcards matching
+function names.  They begin <code>obj:</code> and <code>fun:</code>
+respectively.  Function and object names to match against may use the 
+wildcard characters <code>*</code> and <code>?</code>.
+
+A suppression only suppresses an error when the error matches all the
+details in the suppression.  Here's an example:
+<pre>
+  {
+    __gconv_transform_ascii_internal/__mbrtowc/mbtowc
+    Value4
+    fun:__gconv_transform_ascii_internal
+    fun:__mbr*toc
+    fun:mbtowc
+  }
+</pre>
+
+<p>What is means is: suppress a use-of-uninitialised-value error, when
+the data size is 4, when it occurs in the function
+<code>__gconv_transform_ascii_internal</code>, when that is called
+from any function of name matching <code>__mbr*toc</code>, 
+when that is called from
+<code>mbtowc</code>.  It doesn't apply under any other circumstances.
+The string by which this suppression is identified to the user is
+__gconv_transform_ascii_internal/__mbrtowc/mbtowc.
+
+<p>Another example:
+<pre>
+  {
+    libX11.so.6.2/libX11.so.6.2/libXaw.so.7.0
+    Value4
+    obj:/usr/X11R6/lib/libX11.so.6.2
+    obj:/usr/X11R6/lib/libX11.so.6.2
+    obj:/usr/X11R6/lib/libXaw.so.7.0
+  }
+</pre>
+
+<p>Suppress any size 4 uninitialised-value error which occurs anywhere
+in <code>libX11.so.6.2</code>, when called from anywhere in the same
+library, when called from anywhere in <code>libXaw.so.7.0</code>.  The
+inexact specification of locations is regrettable, but is about all
+you can hope for, given that the X11 libraries shipped with Red Hat
+7.2 have had their symbol tables removed.
+
+<p>Note -- since the above two examples did not make it clear -- that
+you can freely mix the <code>obj:</code> and <code>fun:</code>
+styles of description within a single suppression record.
+
+
+<a name="install"></a>
+<h3>2.8&nbsp; Building and installing</h3>
+At the moment, very rudimentary.
+
+<p>The tarball is set up for a standard Red Hat 7.1 (6.2) machine.  To
+build, just do "make".  No configure script, no autoconf, no nothing.
+
+<p>The files needed for installation are: valgrind.so, valgring.so,
+valgrind, VERSION, redhat72.supp (or redhat62.supp). You can copy
+these to any directory you like. However, you then need to edit the
+shell script "valgrind". On line 4, set the environment variable
+<code>VALGRIND</code> to point to the directory you have copied the
+installation into.
+
+
+<a name="problems"></a>
+<h3>2.9&nbsp; If you have problems</h3>
+Mail me (<a href="mailto:jseward@acm.org">jseward@acm.org</a>).
+
+<p>See <a href="#limits">Section 4</a> for the known limitations of
+Valgrind, and for a list of programs which are known not to work on
+it.
+
+<p>The translator/instrumentor has a lot of assertions in it.  They
+are permanently enabled, and I have no plans to disable them.  If one
+of these breaks, please mail me!
+
+<p>If you get an assertion failure on the expression
+<code>chunkSane(ch)</code> in <code>vg_free()</code> in
+<code>vg_malloc.c</code>, this may have happened because your program
+wrote off the end of a malloc'd block, or before its beginning.
+Valgrind should have emitted a proper message to that effect before
+dying in this way.  This is a known problem which I should fix.
+<p>
+
+<hr width="100%">
+
+<a name="machine"></a>
+<h2>3&nbsp; Details of the checking machinery</h2>
+
+Read this section if you want to know, in detail, exactly what and how
+Valgrind is checking.
+
+<a name="vvalue"></a>
+<h3>3.1&nbsp; Valid-value (V) bits</h3>
+
+It is simplest to think of Valgrind implementing a synthetic Intel x86
+CPU which is identical to a real CPU, except for one crucial detail.
+Every bit (literally) of data processed, stored and handled by the
+real CPU has, in the synthetic CPU, an associated "valid-value" bit,
+which says whether or not the accompanying bit has a legitimate value.
+In the discussions which follow, this bit is referred to as the V
+(valid-value) bit.
+
+<p>Each byte in the system therefore has a 8 V bits which accompanies
+it wherever it goes.  For example, when the CPU loads a word-size item
+(4 bytes) from memory, it also loads the corresponding 32 V bits from
+a bitmap which stores the V bits for the process' entire address
+space.  If the CPU should later write the whole or some part of that
+value to memory at a different address, the relevant V bits will be
+stored back in the V-bit bitmap.
+
+<p>In short, each bit in the system has an associated V bit, which
+follows it around everywhere, even inside the CPU.  Yes, the CPU's
+(integer) registers have their own V bit vectors.
+
+<p>Copying values around does not cause Valgrind to check for, or
+report on, errors.  However, when a value is used in a way which might
+conceivably affect the outcome of your program's computation, the
+associated V bits are immediately checked.  If any of these indicate
+that the value is undefined, an error is reported.
+
+<p>Here's an (admittedly nonsensical) example:
+<pre>
+  int i, j;
+  int a[10], b[10];
+  for (i = 0; i &lt; 10; i++) {
+    j = a[i];
+    b[i] = j;
+  }
+</pre>
+
+<p>Valgrind emits no complaints about this, since it merely copies
+uninitialised values from <code>a[]</code> into <code>b[]</code>, and
+doesn't use them in any way.  However, if the loop is changed to
+<pre>
+  for (i = 0; i &lt; 10; i++) {
+    j += a[i];
+  }
+  if (j == 77) 
+     printf("hello there\n");
+</pre>
+then Valgrind will complain, at the <code>if</code>, that the
+condition depends on uninitialised values.
+
+<p>Most low level operations, such as adds, cause Valgrind to 
+use the V bits for the operands to calculate the V bits for the
+result.  Even if the result is partially or wholly undefined,
+it does not complain.
+
+<p>Checks on definedness only occur in two places: when a value is
+used to generate a memory address, and where control flow decision
+needs to be made.  Also, when a system call is detected, valgrind
+checks definedness of parameters as required.
+
+<p>If a check should detect undefinedness, and error message is
+issued.  The resulting value is subsequently regarded as well-defined.
+To do otherwise would give long chains of error messages.  In effect,
+we say that undefined values are non-infectious.
+
+<p>This sounds overcomplicated.  Why not just check all reads from
+memory, and complain if an undefined value is loaded into a CPU register? 
+Well, that doesn't work well, because perfectly legitimate C programs routinely
+copy uninitialised values around in memory, and we don't want endless complaints
+about that.  Here's the canonical example.  Consider a struct
+like this:
+<pre>
+  struct S { int x; char c; };
+  struct S s1, s2;
+  s1.x = 42;
+  s1.c = 'z';
+  s2 = s1;
+</pre>
+
+<p>The question to ask is: how large is <code>struct S</code>, in
+bytes?  An int is 4 bytes and a char one byte, so perhaps a struct S
+occupies 5 bytes?  Wrong.  All (non-toy) compilers I know of will
+round the size of <code>struct S</code> up to a whole number of words,
+in this case 8 bytes.  Not doing this forces compilers to generate
+truly appalling code for subscripting arrays of <code>struct
+S</code>'s.
+
+<p>So s1 occupies 8 bytes, yet only 5 of them will be initialised.
+For the assignment <code>s2 = s1</code>, gcc generates code to copy
+all 8 bytes wholesale into <code>s2</code> without regard for their
+meaning.  If Valgrind simply checked values as they came out of
+memory, it would yelp every time a structure assignment like this
+happened.  So the more complicated semantics described above is
+necessary.  This allows gcc to copy <code>s1</code> into
+<code>s2</code> any way it likes, and a warning will only be emitted
+if the uninitialised values are later used.
+
+<p>One final twist to this story.  The above scheme allows garbage to
+pass through the CPU's integer registers without complaint.  It does
+this by giving the integer registers V tags, passing these around in
+the expected way.  This complicated and computationally expensive to
+do, but is necessary.  Valgrind is more simplistic about
+floating-point loads and stores.  In particular, V bits for data read
+as a result of floating-point loads are checked at the load
+instruction.  So if your program uses the floating-point registers to
+do memory-to-memory copies, you will get complaints about
+uninitialised values.  Fortunately, I have not yet encountered a
+program which (ab)uses the floating-point registers in this way.
+
+<a name="vaddress"></a>
+<h3>3.2&nbsp; Valid-address (A) bits</h3>
+
+Notice that the previous section describes how the validity of values
+is established and maintained without having to say whether the
+program does or does not have the right to access any particular
+memory location.  We now consider the latter issue.
+
+<p>As described above, every bit in memory or in the CPU has an
+associated valid-value (V) bit.  In addition, all bytes in memory, but
+not in the CPU, have an associated valid-address (A) bit.  This
+indicates whether or not the program can legitimately read or write
+that location.  It does not give any indication of the validity or the
+data at that location -- that's the job of the V bits -- only whether
+or not the location may be accessed.
+
+<p>Every time your program reads or writes memory, Valgrind checks the
+A bits associated with the address.  If any of them indicate an
+invalid address, an error is emitted.  Note that the reads and writes
+themselves do not change the A bits, only consult them.
+
+<p>So how do the A bits get set/cleared?  Like this:
+
+<ul>
+  <li>When the program starts, all the global data areas are marked as
+      accessible.</li><br>
+      <p>
+
+  <li>When the program does malloc/new, the A bits for the exactly the
+      area allocated, and not a byte more, are marked as accessible.
+      Upon freeing the area the A bits are changed to indicate
+      inaccessibility.</li><br>
+      <p>
+
+  <li>When the stack pointer register (%esp) moves up or down, A bits
+      are set.  The rule is that the area from %esp up to the base of
+      the stack is marked as accessible, and below %esp is
+      inaccessible.  (If that sounds illogical, bear in mind that the
+      stack grows down, not up, on almost all Unix systems, including
+      GNU/Linux.)  Tracking %esp like this has the useful side-effect
+      that the section of stack used by a function for local variables
+      etc is automatically marked accessible on function entry and
+      inaccessible on exit.</li><br>
+      <p>
+
+  <li>When doing system calls, A bits are changed appropriately.  For
+      example, mmap() magically makes files appear in the process's
+      address space, so the A bits must be updated if mmap()
+      succeeds.</li><br>
+</ul>
+
+
+<a name="together"></a>
+<h3>3.3&nbsp; Putting it all together</h3>
+Valgrind's checking machinery can be summarised as follows:
+
+<ul>
+  <li>Each byte in memory has 8 associated V (valid-value) bits,
+      saying whether or not the byte has a defined value, and a single
+      A (valid-address) bit, saying whether or not the program
+      currently has the right to read/write that address.</li><br>
+      <p>
+
+  <li>When memory is read or written, the relevant A bits are
+      consulted.  If they indicate an invalid address, Valgrind emits
+      an Invalid read or Invalid write error.</li><br>
+      <p>
+
+  <li>When memory is read into the CPU's integer registers, the
+      relevant V bits are fetched from memory and stored in the
+      simulated CPU.  They are not consulted.</li><br>
+      <p>
+
+  <li>When an integer register is written out to memory, the V bits
+      for that register are written back to memory too.</li><br>
+      <p>
+
+  <li>When memory is read into the CPU's floating point registers, the
+      relevant V bits are read from memory and they are immediately
+      checked.  If any are invalid, an uninitialised value error is
+      emitted.  This precludes using the floating-point registers to
+      copy possibly-uninitialised memory, but simplifies Valgrind in
+      that it does not have to track the validity status of the
+      floating-point registers.</li><br>
+      <p>
+
+  <li>As a result, when a floating-point register is written to
+      memory, the associated V bits are set to indicate a valid
+      value.</li><br>
+      <p>
+
+  <li>When values in integer CPU registers are used to generate a
+      memory address, or to determine the outcome of a conditional
+      branch, the V bits for those values are checked, and an error
+      emitted if any of them are undefined.</li><br>
+      <p>
+
+  <li>When values in integer CPU registers are used for any other
+      purpose, Valgrind computes the V bits for the result, but does
+      not check them.</li><br>
+      <p>
+
+  <li>One the V bits for a value in the CPU have been checked, they
+      are then set to indicate validity.  This avoids long chains of
+      errors.</li><br>
+      <p>
+
+  <li>When values are loaded from memory, valgrind checks the A bits
+      for that location and issues an illegal-address warning if
+      needed.  In that case, the V bits loaded are forced to indicate
+      Valid, despite the location being invalid.
+      <p>
+      This apparently strange choice reduces the amount of confusing
+      information presented to the user.  It avoids the
+      unpleasant phenomenon in which memory is read from a place which
+      is both unaddressible and contains invalid values, and, as a
+      result, you get not only an invalid-address (read/write) error,
+      but also a potentially large set of uninitialised-value errors,
+      one for every time the value is used.
+      <p>
+      There is a hazy boundary case to do with multi-byte loads from
+      addresses which are partially valid and partially invalid.  See
+      details of the flag <code>--partial-loads-ok</code> for details.
+      </li><br>
+</ul>
+
+Valgrind intercepts calls to malloc, calloc, realloc, valloc,
+memalign, free, new and delete.  The behaviour you get is:
+
+<ul>
+
+  <li>malloc/new: the returned memory is marked as addressible but not
+      having valid values.  This means you have to write on it before
+      you can read it.</li><br>
+      <p>
+
+  <li>calloc: returned memory is marked both addressible and valid,
+      since calloc() clears the area to zero.</li><br>
+      <p>
+
+  <li>realloc: if the new size is larger than the old, the new section
+      is addressible but invalid, as with malloc.</li><br>
+      <p>
+
+  <li>If the new size is smaller, the dropped-off section is marked as
+      unaddressible.  You may only pass to realloc a pointer
+      previously issued to you by malloc/calloc/new/realloc.</li><br>
+      <p>
+
+  <li>free/delete: you may only pass to free a pointer previously
+      issued to you by malloc/calloc/new/realloc, or the value
+      NULL. Otherwise, Valgrind complains.  If the pointer is indeed
+      valid, Valgrind marks the entire area it points at as
+      unaddressible, and places the block in the freed-blocks-queue.
+      The aim is to defer as long as possible reallocation of this
+      block.  Until that happens, all attempts to access it will
+      elicit an invalid-address error, as you would hope.</li><br>
+</ul>
+
+
+
+<a name="signals"></a>
+<h3>3.4&nbsp; Signals</h3>
+
+Valgrind provides suitable handling of signals, so, provided you stick
+to POSIX stuff, you should be ok.  Basic sigaction() and sigprocmask()
+are handled.  Signal handlers may return in the normal way or do
+longjmp(); both should work ok.  As specified by POSIX, a signal is
+blocked in its own handler.  Default actions for signals should work
+as before.  Etc, etc.
+
+<p>Under the hood, dealing with signals is a real pain, and Valgrind's
+simulation leaves much to be desired.  If your program does
+way-strange stuff with signals, bad things may happen.  If so, let me
+know.  I don't promise to fix it, but I'd at least like to be aware of
+it.
+
+
+<a name="leaks"><a/>
+<h3>3.5&nbsp; Memory leak detection</h3>
+
+Valgrind keeps track of all memory blocks issued in response to calls
+to malloc/calloc/realloc/new.  So when the program exits, it knows
+which blocks are still outstanding -- have not been returned, in other
+words.  Ideally, you want your program to have no blocks still in use
+at exit.  But many programs do.
+
+<p>For each such block, Valgrind scans the entire address space of the
+process, looking for pointers to the block.  One of three situations
+may result:
+
+<ul>
+  <li>A pointer to the start of the block is found.  This usually
+      indicates programming sloppiness; since the block is still
+      pointed at, the programmer could, at least in principle, free'd
+      it before program exit.</li><br>
+      <p>
+
+  <li>A pointer to the interior of the block is found.  The pointer
+      might originally have pointed to the start and have been moved
+      along, or it might be entirely unrelated.  Valgrind deems such a
+      block as "dubious", that is, possibly leaked,
+      because it's unclear whether or
+      not a pointer to it still exists.</li><br>
+      <p>
+
+  <li>The worst outcome is that no pointer to the block can be found.
+      The block is classified as "leaked", because the
+      programmer could not possibly have free'd it at program exit,
+      since no pointer to it exists.  This might be a symptom of
+      having lost the pointer at some earlier point in the
+      program.</li>
+</ul>
+
+Valgrind reports summaries about leaked and dubious blocks.
+For each such block, it will also tell you where the block was
+allocated.  This should help you figure out why the pointer to it has
+been lost.  In general, you should attempt to ensure your programs do
+not have any leaked or dubious blocks at exit.
+
+<p>The precise area of memory in which Valgrind searches for pointers
+is: all naturally-aligned 4-byte words for which all A bits indicate
+addressibility and all V bits indicated that the stored value is
+actually valid.
+
+<p><hr width="100%">
+
+
+<a name="limits"></a>
+<h2>4&nbsp; Limitations</h2>
+
+The following list of limitations seems depressingly long.  However,
+most programs actually work fine.
+
+<p>Valgrind will run x86-GNU/Linux ELF dynamically linked binaries, on
+a kernel 2.4.X system, subject to the following constraints:
+
+<ul>
+  <li>No MMX, SSE, SSE2, 3DNow instructions.  If the translator
+      encounters these, Valgrind will simply give up.  It may be
+      possible to add support for them at a later time. Intel added a
+      few instructions such as "cmov" to the integer instruction set
+      on Pentium and later processors, and these are supported.
+      Nevertheless it's safest to think of Valgrind as implementing
+      the 486 instruction set.</li><br>
+      <p>
+
+  <li>Multithreaded programs are not supported, since I haven't yet
+      figured out how to do this.  To be more specific, it is the
+      "clone" system call which is not supported.  A program calls
+      "clone" to create threads.  Valgrind will abort if this
+      happens.</li><nr>
+      <p>
+
+  <li>Valgrind assumes that the floating point registers are not used
+      as intermediaries in memory-to-memory copies, so it immediately
+      checks V bits in floating-point loads/stores.  If you want to
+      write code which copies around possibly-uninitialised values,
+      you must ensure these travel through the integer registers, not
+      the FPU.</li><br>
+      <p>
+
+  <li>If your program does its own memory management, rather than
+      using malloc/new/free/delete, it should still work, but
+      Valgrind's error checking won't be so effective.</li><br>
+      <p>
+
+  <li>Valgrind's signal simulation is not as robust as it could be.
+      Basic POSIX-compliant sigaction and sigprocmask functionality is
+      supplied, but it's conceivable that things could go badly awry
+      if you do wierd things with signals.  Workaround: don't.
+      Programs that do non-POSIX signal tricks are in any case
+      inherently unportable, so should be avoided if
+      possible.</li><br>
+      <p>
+
+  <li>I have no idea what happens if programs try to handle signals on
+      an alternate stack (sigaltstack).  YMMV.</li><br>
+      <p>
+
+  <li>Programs which switch stacks are not well handled.  Valgrind
+      does have support for this, but I don't have great faith in it.
+      It's difficult -- there's no cast-iron way to decide whether a
+      large change in %esp is as a result of the program switching
+      stacks, or merely allocating a large object temporarily on the
+      current stack -- yet Valgrind needs to handle the two situations
+      differently.</li><br>
+      <p>
+
+  <li>x86 instructions, and system calls, have been implemented on
+      demand.  So it's possible, although unlikely, that a program
+      will fall over with a message to that effect.  If this happens,
+      please mail me ALL the details printed out, so I can try and
+      implement the missing feature.</li><br>
+      <p>
+
+  <li>x86 floating point works correctly, but floating-point code may
+      run even more slowly than integer code, due to my simplistic
+      approach to FPU emulation.</li><br>
+      <p>
+
+  <li>You can't Valgrind-ize statically linked binaries.  Valgrind
+      relies on the dynamic-link mechanism to gain control at
+      startup.</li><br>
+      <p>
+
+  <li>Memory consumption of your program is majorly increased whilst
+      running under Valgrind.  This is due to the large amount of
+      adminstrative information maintained behind the scenes.  Another
+      cause is that Valgrind dynamically translates the original
+      executable and never throws any translation away, except in
+      those rare cases where self-modifying code is detected.
+      Translated, instrumented code is 8-12 times larger than the
+      original (!) so you can easily end up with 15+ MB of
+      translations when running (eg) a web browser.  There's not a lot
+      you can do about this -- use Valgrind on a fast machine with a lot
+      of memory and swap space.  At some point I may implement a LRU
+      caching scheme for translations, so as to bound the maximum
+      amount of memory devoted to them, to say 8 or 16 MB.</li>
+</ul>
+
+
+Programs which are known not to work are:
+
+<ul>
+  <li>Netscape 4.76 works pretty well on some platforms -- quite
+      nicely on my AMD K6-III (400 MHz).  I can surf, do mail, etc, no
+      problem.  On other platforms is has been observed to crash
+      during startup.  Despite much investigation I can't figure out
+      why.</li><br>
+      <p>
+
+  <li>kpackage (a KDE front end to rpm) dies because the CPUID
+      instruction is unimplemented.  Easy to fix.</li><br>
+      <p>
+
+  <li>knode (a KDE newsreader) tries to do multithreaded things, and
+      fails.</li><br>
+      <p>
+
+  <li>emacs starts up but immediately concludes it is out of memory
+      and aborts.  Emacs has it's own memory-management scheme, but I
+      don't understand why this should interact so badly with
+      Valgrind.</li><br>
+      <p>
+
+  <li>Gimp and Gnome and GTK-based apps die early on because
+      of unimplemented system call wrappers.  (I'm a KDE user :)
+      This wouldn't be hard to fix.
+      </li><br>
+      <p>
+
+  <li>As a consequence of me being a KDE user, almost all KDE apps
+      work ok -- except those which are multithreaded.
+      </li><br>
+      <p>
+</ul>
+
+
+<p><hr width="100%">
+
+
+<a name="howitworks"></a>
+<h2>5&nbsp; How it works -- a rough overview</h2>
+Some gory details, for those with a passion for gory details.  You
+don't need to read this section if all you want to do is use Valgrind.
+
+<a name="startb"></a>
+<h3>5.1&nbsp; Getting started</h3>
+
+Valgrind is compiled into a shared object, valgrind.so.  The shell
+script valgrind sets the LD_PRELOAD environment variable to point to
+valgrind.so.  This causes the .so to be loaded as an extra library to
+any subsequently executed dynamically-linked ELF binary, viz, the
+program you want to debug.
+
+<p>The dynamic linker allows each .so in the process image to have an
+initialisation function which is run before main().  It also allows
+each .so to have a finalisation function run after main() exits.
+
+<p>When valgrind.so's initialisation function is called by the dynamic
+linker, the synthetic CPU to starts up.  The real CPU remains locked
+in valgrind.so for the entire rest of the program, but the synthetic
+CPU returns from the initialisation function.  Startup of the program
+now continues as usual -- the dynamic linker calls all the other .so's
+initialisation routines, and eventually runs main().  This all runs on
+the synthetic CPU, not the real one, but the client program cannot
+tell the difference.
+
+<p>Eventually main() exits, so the synthetic CPU calls valgrind.so's
+finalisation function.  Valgrind detects this, and uses it as its cue
+to exit.  It prints summaries of all errors detected, possibly checks
+for memory leaks, and then exits the finalisation routine, but now on
+the real CPU.  The synthetic CPU has now lost control -- permanently
+-- so the program exits back to the OS on the real CPU, just as it
+would have done anyway.
+
+<p>On entry, Valgrind switches stacks, so it runs on its own stack.
+On exit, it switches back.  This means that the client program
+continues to run on its own stack, so we can switch back and forth
+between running it on the simulated and real CPUs without difficulty.
+This was an important design decision, because it makes it easy (well,
+significantly less difficult) to debug the synthetic CPU.
+
+
+<a name="engine"></a>
+<h3>5.2&nbsp; The translation/instrumentation engine</h3>
+
+Valgrind does not directly run any of the original program's code.  Only
+instrumented translations are run.  Valgrind maintains a translation
+table, which allows it to find the translation quickly for any branch
+target (code address).  If no translation has yet been made, the
+translator - a just-in-time translator - is summoned.  This makes an
+instrumented translation, which is added to the collection of
+translations.  Subsequent jumps to that address will use this
+translation.
+
+<p>Valgrind can optionally check writes made by the application, to
+see if they are writing an address contained within code which has
+been translated.  Such a write invalidates translations of code
+bracketing the written address.  Valgrind will discard the relevant
+translations, which causes them to be re-made, if they are needed
+again, reflecting the new updated data stored there.  In this way,
+self modifying code is supported.  In practice I have not found any
+Linux applications which use self-modifying-code.
+
+<p>The JITter translates basic blocks -- blocks of straight-line-code
+-- as single entities.  To minimise the considerable difficulties of
+dealing with the x86 instruction set, x86 instructions are first
+translated to a RISC-like intermediate code, similar to sparc code,
+but with an infinite number of virtual integer registers.  Initially
+each insn is translated seperately, and there is no attempt at
+instrumentation.
+
+<p>The intermediate code is improved, mostly so as to try and cache
+the simulated machine's registers in the real machine's registers over
+several simulated instructions.  This is often very effective.  Also,
+we try to remove redundant updates of the simulated machines's
+condition-code register.
+
+<p>The intermediate code is then instrumented, giving more
+intermediate code.  There are a few extra intermediate-code operations
+to support instrumentation; it is all refreshingly simple.  After
+instrumentation there is a cleanup pass to remove redundant value
+checks.
+
+<p>This gives instrumented intermediate code which mentions arbitrary
+numbers of virtual registers.  A linear-scan register allocator is
+used to assign real registers and possibly generate spill code.  All
+of this is still phrased in terms of the intermediate code.  This
+machinery is inspired by the work of Reuben Thomas (MITE).
+
+<p>Then, and only then, is the final x86 code emitted.  The
+intermediate code is carefully designed so that x86 code can be
+generated from it without need for spare registers or other
+inconveniences.
+
+<p>The translations are managed using a traditional LRU-based caching
+scheme.  The translation cache has a default size of about 14MB.
+
+<a name="track"></a>
+
+<h3>5.3&nbsp; Tracking the status of memory</h3> Each byte in the
+process' address space has nine bits associated with it: one A bit and
+eight V bits.  The A and V bits for each byte are stored using a
+sparse array, which flexibly and efficiently covers arbitrary parts of
+the 32-bit address space without imposing significant space or
+performance overheads for the parts of the address space never
+visited.  The scheme used, and speedup hacks, are described in detail
+at the top of the source file vg_memory.c, so you should read that for
+the gory details.
+
+<a name="sys_calls"></a>
+
+<h3>5.4 System calls</h3>
+All system calls are intercepted.  The memory status map is consulted
+before and updated after each call.  It's all rather tiresome.  See
+vg_syscall_mem.c for details.
+
+<a name="sys_signals"></a>
+
+<h3>5.5&nbsp; Signals</h3>
+All system calls to sigaction() and sigprocmask() are intercepted.  If
+the client program is trying to set a signal handler, Valgrind makes a
+note of the handler address and which signal it is for.  Valgrind then
+arranges for the same signal to be delivered to its own handler.
+
+<p>When such a signal arrives, Valgrind's own handler catches it, and
+notes the fact.  At a convenient safe point in execution, Valgrind
+builds a signal delivery frame on the client's stack and runs its
+handler.  If the handler longjmp()s, there is nothing more to be said.
+If the handler returns, Valgrind notices this, zaps the delivery
+frame, and carries on where it left off before delivering the signal.
+
+<p>The purpose of this nonsense is that setting signal handlers
+essentially amounts to giving callback addresses to the Linux kernel.
+We can't allow this to happen, because if it did, signal handlers
+would run on the real CPU, not the simulated one.  This means the
+checking machinery would not operate during the handler run, and,
+worse, memory permissions maps would not be updated, which could cause
+spurious error reports once the handler had returned.
+
+<p>An even worse thing would happen if the signal handler longjmp'd
+rather than returned: Valgrind would completely lose control of the
+client program.
+
+<p>Upshot: we can't allow the client to install signal handlers
+directly.  Instead, Valgrind must catch, on behalf of the client, any
+signal the client asks to catch, and must delivery it to the client on
+the simulated CPU, not the real one.  This involves considerable
+gruesome fakery; see vg_signals.c for details.
+<p>
+
+<hr width="100%">
+
+<a name="example"></a>
+<h2>6&nbsp; Example</h2>
+This is the log for a run of a small program. The program is in fact
+correct, and the reported error is as the result of a potentially serious
+code generation bug in GNU g++ (snapshot 20010527).
+<pre>
+sewardj@phoenix:~/newmat10$
+~/Valgrind-6/valgrind -v ./bogon 
+==25832== Valgrind 0.10, a memory error detector for x86 RedHat 7.1.
+==25832== Copyright (C) 2000-2001, and GNU GPL'd, by Julian Seward.
+==25832== Startup, with flags:
+==25832== --suppressions=/home/sewardj/Valgrind/redhat71.supp
+==25832== reading syms from /lib/ld-linux.so.2
+==25832== reading syms from /lib/libc.so.6
+==25832== reading syms from /mnt/pima/jrs/Inst/lib/libgcc_s.so.0
+==25832== reading syms from /lib/libm.so.6
+==25832== reading syms from /mnt/pima/jrs/Inst/lib/libstdc++.so.3
+==25832== reading syms from /home/sewardj/Valgrind/valgrind.so
+==25832== reading syms from /proc/self/exe
+==25832== loaded 5950 symbols, 142333 line number locations
+==25832== 
+==25832== Invalid read of size 4
+==25832==    at 0x8048724: _ZN10BandMatrix6ReSizeEiii (bogon.cpp:45)
+==25832==    by 0x80487AF: main (bogon.cpp:66)
+==25832==    by 0x40371E5E: __libc_start_main (libc-start.c:129)
+==25832==    by 0x80485D1: (within /home/sewardj/newmat10/bogon)
+==25832==    Address 0xBFFFF74C is not stack'd, malloc'd or free'd
+==25832==
+==25832== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+==25832== malloc/free: in use at exit: 0 bytes in 0 blocks.
+==25832== malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+==25832== For a detailed leak analysis, rerun with: --leak-check=yes
+==25832==
+==25832== exiting, did 1881 basic blocks, 0 misses.
+==25832== 223 translations, 3626 bytes in, 56801 bytes out.
+</pre>
+<p>The GCC folks fixed this about a week before gcc-3.0 shipped.
+<hr width="100%">
+<p>
+</body>
+</html>
diff --git a/coregrind/docs/nav.html b/coregrind/docs/nav.html
new file mode 100644
index 000000000..686ac2bde
--- /dev/null
+++ b/coregrind/docs/nav.html
@@ -0,0 +1,68 @@
+<html>
+  <head>
+    <title>Valgrind</title>
+    <base target="main">
+    <style type="text/css">
+    <style type="text/css">
+      body      { background-color: #ffffff;
+                  color:            #000000;
+                  font-family:      Times, Helvetica, Arial;
+                  font-size:        14pt}
+      h4        { margin-bottom:    0.3em}
+      code      { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      pre       { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      a:link    { color:            #0000C0;
+                  text-decoration:  none; }
+      a:visited { color:            #0000C0; 
+                  text-decoration:  none; }
+      a:active  { color:            #0000C0;
+                  text-decoration:  none; }
+    </style>
+  </head>
+
+  <body>
+    <br>
+    <a href="manual.html#contents"><b>Contents of this manual</b></a><br>
+    <a href="manual.html#intro">1 Introduction</a><br>
+    <a href="manual.html#whatfor">1.1 What Valgrind is for</a><br>
+    <a href="manual.html#whatdoes">1.2 What it does with
+       your program</a>
+    <p>
+    <a href="manual.html#howtouse">2 <b>How to use it, and how to
+       make sense of the results</b></a><br>
+    <a href="manual.html#starta">2.1 Getting started</a><br>
+    <a href="manual.html#comment">2.2 The commentary</a><br>
+    <a href="manual.html#report">2.3 Reporting of errors</a><br>
+    <a href="manual.html#suppress">2.4 Suppressing errors</a><br>
+    <a href="manual.html#flags">2.5 Command-line flags</a><br>
+    <a href="manual.html#errormsgs">2.6 Explanation of error messages</a><br>
+    <a href="manual.html#suppfiles">2.7 Writing suppressions files</a><br>
+    <a href="manual.html#install">2.8 Building and installing</a><br>
+    <a href="manual.html#problems">2.9 If you have problems</a>
+    <p>
+    <a href="manual.html#machine">3 <b>Details of the checking machinery</b></a><br>
+    <a href="manual.html#vvalue">3.1 Valid-value (V) bits</a><br>
+    <a href="manual.html#vaddress">3.2 Valid-address (A) bits</a><br>
+    <a href="manual.html#together">3.3 Putting it all together</a><br>
+    <a href="manual.html#signals">3.4 Signals</a><br>
+    <a href="manual.html#leaks">3.5 Memory leak detection</a>
+    <p>
+    <a href="manual.html#limits">4 <b>Limitations</b></a><br>
+    <p>
+    <a href="manual.html#howitworks">5 <b>How it works -- a rough overview</b></a><br>
+    <a href="manual.html#startb">5.1 Getting started</a><br>
+    <a href="manual.html#engine">5.2 The translation/instrumentation engine</a><br>
+    <a href="manual.html#track">5.3 Tracking the status of memory</a><br>
+    <a href="manual.html#sys_calls">5.4 System calls</a><br>
+    <a href="manual.html#sys_signals">5.5 Signals</a>
+    <p>
+    <a href="manual.html#example">6 <b>An example</b></a><br>
+    <p>
+    <a href="techdocs.html">7 <b>The design and implementation of Valgrind</b></a><br>
+
+</body>
+</html>
diff --git a/coregrind/docs/techdocs.html b/coregrind/docs/techdocs.html
new file mode 100644
index 000000000..4044d4957
--- /dev/null
+++ b/coregrind/docs/techdocs.html
@@ -0,0 +1,2116 @@
+<html>
+  <head>
+    <style type="text/css">
+      body      { background-color: #ffffff;
+                  color:            #000000;
+                  font-family:      Times, Helvetica, Arial;
+                  font-size:        14pt}
+      h4        { margin-bottom:    0.3em}
+      code      { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      pre       { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      a:link    { color:            #0000C0;
+                  text-decoration:  none; }
+      a:visited { color:            #0000C0; 
+                  text-decoration:  none; }
+      a:active  { color:            #0000C0;
+                  text-decoration:  none; }
+    </style>
+    <title>The design and implementation of Valgrind</title>
+  </head>
+
+<body bgcolor="#ffffff">
+
+<a name="title">&nbsp;</a>
+<h1 align=center>The design and implementation of Valgrind</h1>
+
+<center>
+Detailed technical notes for hackers, maintainers and the
+overly-curious<br>
+These notes pertain to snapshot 20020306<br>
+<p>
+<a href="mailto:jseward@acm.org">jseward@acm.org<br>
+<a href="http://developer.kde.org/~sewardj">http://developer.kde.org/~sewardj</a><br>
+<a href="http://www.muraroa.demon.co.uk">http://www.muraroa.demon.co.uk</a><br>
+Copyright &copy; 2000-2002 Julian Seward
+<p>
+Valgrind is licensed under the GNU General Public License, 
+version 2<br>
+An open-source tool for finding memory-management problems in
+x86 GNU/Linux executables.
+</center>
+
+<p>
+
+
+
+
+<hr width="100%">
+
+<h2>Introduction</h2>
+
+This document contains a detailed, highly-technical description of the
+internals of Valgrind.  This is not the user manual; if you are an
+end-user of Valgrind, you do not want to read this.  Conversely, if
+you really are a hacker-type and want to know how it works, I assume
+that you have read the user manual thoroughly.
+<p>
+You may need to read this document several times, and carefully.  Some
+important things, I only say once.
+
+
+<h3>History</h3>
+
+Valgrind came into public view in late Feb 2002.  However, it has been
+under contemplation for a very long time, perhaps seriously for about
+five years.  Somewhat over two years ago, I started working on the x86
+code generator for the Glasgow Haskell Compiler
+(http://www.haskell.org/ghc), gaining familiarity with x86 internals
+on the way.  I then did Cacheprof (http://www.cacheprof.org), gaining
+further x86 experience.  Some time around Feb 2000 I started
+experimenting with a user-space x86 interpreter for x86-Linux.  This
+worked, but it was clear that a JIT-based scheme would be necessary to
+give reasonable performance for Valgrind.  Design work for the JITter
+started in earnest in Oct 2000, and by early 2001 I had an x86-to-x86
+dynamic translator which could run quite large programs.  This
+translator was in a sense pointless, since it did not do any
+instrumentation or checking.
+
+<p>
+Most of the rest of 2001 was taken up designing and implementing the
+instrumentation scheme.  The main difficulty, which consumed a lot
+of effort, was to design a scheme which did not generate large numbers
+of false uninitialised-value warnings.  By late 2001 a satisfactory
+scheme had been arrived at, and I started to test it on ever-larger
+programs, with an eventual eye to making it work well enough so that
+it was helpful to folks debugging the upcoming version 3 of KDE.  I've
+used KDE since before version 1.0, and wanted to Valgrind to be an
+indirect contribution to the KDE 3 development effort.  At the start of
+Feb 02 the kde-core-devel crew started using it, and gave a huge
+amount of helpful feedback and patches in the space of three weeks.
+Snapshot 20020306 is the result.
+
+<p>
+In the best Unix tradition, or perhaps in the spirit of Fred Brooks'
+depressing-but-completely-accurate epitaph "build one to throw away;
+you will anyway", much of Valgrind is a second or third rendition of
+the initial idea.  The instrumentation machinery
+(<code>vg_translate.c</code>, <code>vg_memory.c</code>) and core CPU
+simulation (<code>vg_to_ucode.c</code>, <code>vg_from_ucode.c</code>)
+have had three redesigns and rewrites; the register allocator,
+low-level memory manager (<code>vg_malloc2.c</code>) and symbol table
+reader (<code>vg_symtab2.c</code>) are on the second rewrite.  In a
+sense, this document serves to record some of the knowledge gained as
+a result.
+
+
+<h3>Design overview</h3>
+
+Valgrind is compiled into a Linux shared object,
+<code>valgrind.so</code>, and also a dummy one,
+<code>valgrinq.so</code>, of which more later.  The
+<code>valgrind</code> shell script adds <code>valgrind.so</code> to
+the <code>LD_PRELOAD</code> list of extra libraries to be
+loaded with any dynamically linked library.  This is a standard trick,
+one which I assume the <code>LD_PRELOAD</code> mechanism was developed
+to support.
+
+<p>
+<code>valgrind.so</code>
+is linked with the <code>-z initfirst</code> flag, which requests that
+its initialisation code is run before that of any other object in the
+executable image.  When this happens, valgrind gains control.  The
+real CPU becomes "trapped" in <code>valgrind.so</code> and the 
+translations it generates.  The synthetic CPU provided by Valgrind
+does, however, return from this initialisation function.  So the 
+normal startup actions, orchestrated by the dynamic linker
+<code>ld.so</code>, continue as usual, except on the synthetic CPU,
+not the real one.  Eventually <code>main</code> is run and returns,
+and then the finalisation code of the shared objects is run,
+presumably in inverse order to which they were initialised.  Remember,
+this is still all happening on the simulated CPU.  Eventually
+<code>valgrind.so</code>'s own finalisation code is called.  It spots
+this event, shuts down the simulated CPU, prints any error summaries
+and/or does leak detection, and returns from the initialisation code
+on the real CPU.  At this point, in effect the real and synthetic CPUs
+have merged back into one, Valgrind has lost control of the program,
+and the program finally <code>exit()s</code> back to the kernel in the
+usual way.
+
+<p>
+The normal course of activity, one Valgrind has started up, is as
+follows.  Valgrind never runs any part of your program (usually
+referred to as the "client"), not a single byte of it, directly.
+Instead it uses function <code>VG_(translate)</code> to translate
+basic blocks (BBs, straight-line sequences of code) into instrumented
+translations, and those are run instead.  The translations are stored
+in the translation cache (TC), <code>vg_tc</code>, with the
+translation table (TT), <code>vg_tt</code> supplying the
+original-to-translation code address mapping.  Auxiliary array
+<code>VG_(tt_fast)</code> is used as a direct-map cache for fast
+lookups in TT; it usually achieves a hit rate of around 98% and
+facilitates an orig-to-trans lookup in 4 x86 insns, which is not bad.
+
+<p>
+Function <code>VG_(dispatch)</code> in <code>vg_dispatch.S</code> is
+the heart of the JIT dispatcher.  Once a translated code address has
+been found, it is executed simply by an x86 <code>call</code>
+to the translation.  At the end of the translation, the next 
+original code addr is loaded into <code>%eax</code>, and the 
+translation then does a <code>ret</code>, taking it back to the
+dispatch loop, with, interestingly, zero branch mispredictions.  
+The address requested in <code>%eax</code> is looked up first in
+<code>VG_(tt_fast)</code>, and, if not found, by calling C helper
+<code>VG_(search_transtab)</code>.  If there is still no translation 
+available, <code>VG_(dispatch)</code> exits back to the top-level
+C dispatcher <code>VG_(toploop)</code>, which arranges for 
+<code>VG_(translate)</code> to make a new translation.  All fairly
+unsurprising, really.  There are various complexities described below.
+
+<p>
+The translator, orchestrated by <code>VG_(translate)</code>, is
+complicated but entirely self-contained.  It is described in great
+detail in subsequent sections.  Translations are stored in TC, with TT
+tracking administrative information.  The translations are subject to
+an approximate LRU-based management scheme.  With the current
+settings, the TC can hold at most about 15MB of translations, and LRU
+passes prune it to about 13.5MB.  Given that the
+orig-to-translation expansion ratio is about 13:1 to 14:1, this means
+TC holds translations for more or less a megabyte of original code,
+which generally comes to about 70000 basic blocks for C++ compiled
+with optimisation on.  Generating new translations is expensive, so it
+is worth having a large TC to minimise the (capacity) miss rate.
+
+<p>
+The dispatcher, <code>VG_(dispatch)</code>, receives hints from
+the translations which allow it to cheaply spot all control 
+transfers corresponding to x86 <code>call</code> and <code>ret</code>
+instructions.  It has to do this in order to spot some special events:
+<ul>
+<li>Calls to <code>VG_(shutdown)</code>.  This is Valgrind's cue to
+    exit.  NOTE: actually this is done a different way; it should be
+    cleaned up.
+<p>
+<li>Returns of system call handlers, to the return address 
+    <code>VG_(signalreturn_bogusRA)</code>.  The signal simulator
+    needs to know when a signal handler is returning, so we spot
+    jumps (returns) to this address.
+<p>
+<li>Calls to <code>vg_trap_here</code>.  All <code>malloc</code>,
+    <code>free</code>, etc calls that the client program makes are
+    eventually routed to a call to <code>vg_trap_here</code>,
+    and Valgrind does its own special thing with these calls.
+    In effect this provides a trapdoor, by which Valgrind can
+    intercept certain calls on the simulated CPU, run the call as it
+    sees fit itself (on the real CPU), and return the result to
+    the simulated CPU, quite transparently to the client program.
+</ul>
+Valgrind intercepts the client's <code>malloc</code>,
+<code>free</code>, etc,
+calls, so that it can store additional information.  Each block 
+<code>malloc</code>'d by the client gives rise to a shadow block
+in which Valgrind stores the call stack at the time of the
+<code>malloc</code>
+call.  When the client calls <code>free</code>, Valgrind tries to
+find the shadow block corresponding to the address passed to
+<code>free</code>, and emits an error message if none can be found.
+If it is found, the block is placed on the freed blocks queue 
+<code>vg_freed_list</code>, it is marked as inaccessible, and
+its shadow block now records the call stack at the time of the
+<code>free</code> call.  Keeping <code>free</code>'d blocks in
+this queue allows Valgrind to spot all (presumably invalid) accesses
+to them.  However, once the volume of blocks in the free queue 
+exceeds <code>VG_(clo_freelist_vol)</code>, blocks are finally
+removed from the queue.
+
+<p>
+Keeping track of A and V bits (note: if you don't know what these are,
+you haven't read the user guide carefully enough) for memory is done
+in <code>vg_memory.c</code>.  This implements a sparse array structure
+which covers the entire 4G address space in a way which is reasonably
+fast and reasonably space efficient.  The 4G address space is divided
+up into 64K sections, each covering 64Kb of address space.  Given a
+32-bit address, the top 16 bits are used to select one of the 65536
+entries in <code>VG_(primary_map)</code>.  The resulting "secondary"
+(<code>SecMap</code>) holds A and V bits for the 64k of address space
+chunk corresponding to the lower 16 bits of the address.
+
+
+<h3>Design decisions</h3>
+
+Some design decisions were motivated by the need to make Valgrind
+debuggable.  Imagine you are writing a CPU simulator.  It works fairly
+well.  However, you run some large program, like Netscape, and after
+tens of millions of instructions, it crashes.  How can you figure out
+where in your simulator the bug is?
+
+<p>
+Valgrind's answer is: cheat.  Valgrind is designed so that it is
+possible to switch back to running the client program on the real
+CPU at any point.  Using the <code>--stop-after= </code> flag, you can 
+ask Valgrind to run just some number of basic blocks, and then 
+run the rest of the way on the real CPU.  If you are searching for
+a bug in the simulated CPU, you can use this to do a binary search,
+which quickly leads you to the specific basic block which is
+causing the problem.  
+
+<p>
+This is all very handy.  It does constrain the design in certain
+unimportant ways.  Firstly, the layout of memory, when viewed from the
+client's point of view, must be identical regardless of whether it is
+running on the real or simulated CPU.  This means that Valgrind can't
+do pointer swizzling -- well, no great loss -- and it can't run on 
+the same stack as the client -- again, no great loss.  
+Valgrind operates on its own stack, <code>VG_(stack)</code>, which
+it switches to at startup, temporarily switching back to the client's
+stack when doing system calls for the client.
+
+<p>
+Valgrind also receives signals on its own stack,
+<code>VG_(sigstack)</code>, but for different gruesome reasons
+discussed below.
+
+<p>
+This nice clean switch-back-to-the-real-CPU-whenever-you-like story
+is muddied by signals.  Problem is that signals arrive at arbitrary
+times and tend to slightly perturb the basic block count, with the
+result that you can get close to the basic block causing a problem but
+can't home in on it exactly.  My kludgey hack is to define
+<code>SIGNAL_SIMULATION</code> to 1 towards the bottom of 
+<code>vg_syscall_mem.c</code>, so that signal handlers are run on the
+real CPU and don't change the BB counts.
+
+<p>
+A second hole in the switch-back-to-real-CPU story is that Valgrind's
+way of delivering signals to the client is different from that of the
+kernel.  Specifically, the layout of the signal delivery frame, and
+the mechanism used to detect a sighandler returning, are different.
+So you can't expect to make the transition inside a sighandler and
+still have things working, but in practice that's not much of a
+restriction.
+
+<p>
+Valgrind's implementation of <code>malloc</code>, <code>free</code>,
+etc, (in <code>vg_clientmalloc.c</code>, not the low-level stuff in
+<code>vg_malloc2.c</code>) is somewhat complicated by the need to 
+handle switching back at arbitrary points.  It does work tho.
+
+
+
+<h3>Correctness</h3>
+
+There's only one of me, and I have a Real Life (tm) as well as hacking
+Valgrind [allegedly :-].  That means I don't have time to waste
+chasing endless bugs in Valgrind.  My emphasis is therefore on doing
+everything as simply as possible, with correctness, stability and
+robustness being the number one priority, more important than
+performance or functionality.  As a result:
+<ul>
+<li>The code is absolutely loaded with assertions, and these are
+    <b>permanently enabled.</b>  I have no plan to remove or disable
+    them later.  Over the past couple of months, as valgrind has
+    become more widely used, they have shown their worth, pulling
+    up various bugs which would otherwise have appeared as
+    hard-to-find segmentation faults.
+    <p>
+    I am of the view that it's acceptable to spend 5% of the total
+    running time of your valgrindified program doing assertion checks
+    and other internal sanity checks.
+<p>
+<li>Aside from the assertions, valgrind contains various sets of
+    internal sanity checks, which get run at varying frequencies
+    during normal operation.  <code>VG_(do_sanity_checks)</code>
+    runs every 1000 basic blocks, which means 500 to 2000 times/second 
+    for typical machines at present.  It checks that Valgrind hasn't
+    overrun its private stack, and does some simple checks on the
+    memory permissions maps.  Once every 25 calls it does some more
+    extensive checks on those maps.  Etc, etc.
+    <p>
+    The following components also have sanity check code, which can
+    be enabled to aid debugging:
+    <ul>
+    <li>The low-level memory-manager
+        (<code>VG_(mallocSanityCheckArena)</code>).  This does a 
+        complete check of all blocks and chains in an arena, which
+        is very slow.  Is not engaged by default.
+    <p>
+    <li>The symbol table reader(s): various checks to ensure
+        uniqueness of mappings; see <code>VG_(read_symbols)</code>
+        for a start.  Is permanently engaged.
+    <p>
+    <li>The A and V bit tracking stuff in <code>vg_memory.c</code>.
+        This can be compiled with cpp symbol
+        <code>VG_DEBUG_MEMORY</code> defined, which removes all the
+        fast, optimised cases, and uses simple-but-slow fallbacks
+        instead.  Not engaged by default.
+    <p>
+    <li>Ditto <code>VG_DEBUG_LEAKCHECK</code>.
+    <p>
+    <li>The JITter parses x86 basic blocks into sequences of 
+        UCode instructions.  It then sanity checks each one with
+        <code>VG_(saneUInstr)</code> and sanity checks the sequence
+        as a whole with <code>VG_(saneUCodeBlock)</code>.  This stuff
+        is engaged by default, and has caught some way-obscure bugs
+        in the simulated CPU machinery in its time.
+    <p>
+    <li>The system call wrapper does
+        <code>VG_(first_and_last_secondaries_look_plausible)</code> after
+        every syscall; this is known to pick up bugs in the syscall
+        wrappers.  Engaged by default.
+    <p>
+    <li>The main dispatch loop, in <code>VG_(dispatch)</code>, checks
+        that translations do not set <code>%ebp</code> to any value
+        different from <code>VG_EBP_DISPATCH_CHECKED</code> or
+        <code>& VG_(baseBlock)</code>.  In effect this test is free,
+        and is permanently engaged.
+    <p>
+    <li>There are a couple of ifdefed-out consistency checks I
+        inserted whilst debugging the new register allocater, 
+        <code>vg_do_register_allocation</code>.
+    </ul>
+<p>
+<li>I try to avoid techniques, algorithms, mechanisms, etc, for which
+    I can supply neither a convincing argument that they are correct,
+    nor sanity-check code which might pick up bugs in my
+    implementation.  I don't always succeed in this, but I try.
+    Basically the idea is: avoid techniques which are, in practice,
+    unverifiable, in some sense.   When doing anything, always have in
+    mind: "how can I verify that this is correct?"
+</ul>
+
+<p>
+Some more specific things are:
+
+<ul>
+<li>Valgrind runs in the same namespace as the client, at least from
+    <code>ld.so</code>'s point of view, and it therefore absolutely
+    had better not export any symbol with a name which could clash
+    with that of the client or any of its libraries.  Therefore, all
+    globally visible symbols exported from <code>valgrind.so</code>
+    are defined using the <code>VG_</code> CPP macro.  As you'll see
+    from <code>vg_constants.h</code>, this appends some arbitrary
+    prefix to the symbol, in order that it be, we hope, globally
+    unique.  Currently the prefix is <code>vgPlain_</code>.  For
+    convenience there are also <code>VGM_</code>, <code>VGP_</code>
+    and <code>VGOFF_</code>.  All locally defined symbols are declared
+    <code>static</code> and do not appear in the final shared object.
+    <p>
+    To check this, I periodically do 
+    <code>nm valgrind.so | grep " T "</code>, 
+    which shows you all the globally exported text symbols.
+    They should all have an approved prefix, except for those like
+    <code>malloc</code>, <code>free</code>, etc, which we deliberately
+    want to shadow and take precedence over the same names exported
+    from <code>glibc.so</code>, so that valgrind can intercept those
+    calls easily.  Similarly, <code>nm valgrind.so | grep " D "</code>
+    allows you to find any rogue data-segment symbol names.
+<p>
+<li>Valgrind tries, and almost succeeds, in being completely
+    independent of all other shared objects, in particular of
+    <code>glibc.so</code>.  For example, we have our own low-level
+    memory manager in <code>vg_malloc2.c</code>, which is a fairly
+    standard malloc/free scheme augmented with arenas, and
+    <code>vg_mylibc.c</code> exports reimplementations of various bits
+    and pieces you'd normally get from the C library.
+    <p>
+    Why all the hassle?  Because imagine the potential chaos of both
+    the simulated and real CPUs executing in <code>glibc.so</code>.
+    It just seems simpler and cleaner to be completely self-contained,
+    so that only the simulated CPU visits <code>glibc.so</code>.  In
+    practice it's not much hassle anyway.  Also, valgrind starts up
+    before glibc has a chance to initialise itself, and who knows what
+    difficulties that could lead to.  Finally, glibc has definitions
+    for some types, specifically <code>sigset_t</code>, which conflict
+    (are different from) the Linux kernel's idea of same.  When 
+    Valgrind wants to fiddle around with signal stuff, it wants to
+    use the kernel's definitions, not glibc's definitions.  So it's 
+    simplest just to keep glibc out of the picture entirely.
+    <p>
+    To find out which glibc symbols are used by Valgrind, reinstate
+    the link flags <code>-nostdlib -Wl,-no-undefined</code>.  This
+    causes linking to fail, but will tell you what you depend on.
+    I have mostly, but not entirely, got rid of the glibc
+    dependencies; what remains is, IMO, fairly harmless.  AFAIK the
+    current dependencies are: <code>memset</code>,
+    <code>memcmp</code>, <code>stat</code>, <code>system</code>,
+    <code>sbrk</code>, <code>setjmp</code> and <code>longjmp</code>.
+
+<p>
+<li>Similarly, valgrind should not really import any headers other
+    than the Linux kernel headers, since it knows of no API other than
+    the kernel interface to talk to.  At the moment this is really not
+    in a good state, and <code>vg_syscall_mem</code> imports, via
+    <code>vg_unsafe.h</code>, a significant number of C-library
+    headers so as to know the sizes of various structs passed across
+    the kernel boundary.  This is of course completely bogus, since
+    there is no guarantee that the C library's definitions of these
+    structs matches those of the kernel.  I have started to sort this
+    out using <code>vg_kerneliface.h</code>, into which I had intended
+    to copy all kernel definitions which valgrind could need, but this
+    has not gotten very far.  At the moment it mostly contains
+    definitions for <code>sigset_t</code> and <code>struct
+    sigaction</code>, since the kernel's definition for these really
+    does clash with glibc's.  I plan to use a <code>vki_</code> prefix
+    on all these types and constants, to denote the fact that they
+    pertain to <b>V</b>algrind's <b>K</b>ernel <b>I</b>nterface.
+    <p>
+    Another advantage of having a <code>vg_kerneliface.h</code> file
+    is that it makes it simpler to interface to a different kernel.
+    Once can, for example, easily imagine writing a new
+    <code>vg_kerneliface.h</code> for FreeBSD, or x86 NetBSD.
+
+</ul>
+
+<h3>Current limitations</h3>
+
+No threads.  I think fixing this is close to a research-grade problem.
+<p>
+No MMX.  Fixing this should be relatively easy, using the same giant
+trick used for x86 FPU instructions.  See below.
+<p>
+Support for weird (non-POSIX) signal stuff is patchy.  Does anybody
+care?
+<p>
+
+
+
+
+<hr width="100%">
+
+<h2>The instrumenting JITter</h2>
+
+This really is the heart of the matter.  We begin with various side
+issues.
+
+<h3>Run-time storage, and the use of host registers</h3>
+
+Valgrind translates client (original) basic blocks into instrumented
+basic blocks, which live in the translation cache TC, until either the
+client finishes or the translations are ejected from TC to make room
+for newer ones.
+<p>
+Since it generates x86 code in memory, Valgrind has complete control
+of the use of registers in the translations.  Now pay attention.  I
+shall say this only once, and it is important you understand this.  In
+what follows I will refer to registers in the host (real) cpu using
+their standard names, <code>%eax</code>, <code>%edi</code>, etc.  I
+refer to registers in the simulated CPU by capitalising them:
+<code>%EAX</code>, <code>%EDI</code>, etc.  These two sets of
+registers usually bear no direct relationship to each other; there is
+no fixed mapping between them.  This naming scheme is used fairly
+consistently in the comments in the sources.
+<p>
+Host registers, once things are up and running, are used as follows:
+<ul>
+<li><code>%esp</code>, the real stack pointer, points
+    somewhere in Valgrind's private stack area,
+    <code>VG_(stack)</code> or, transiently, into its signal delivery
+    stack, <code>VG_(sigstack)</code>.
+<p>
+<li><code>%edi</code> is used as a temporary in code generation; it
+    is almost always dead, except when used for the <code>Left</code>
+    value-tag operations.
+<p>
+<li><code>%eax</code>, <code>%ebx</code>, <code>%ecx</code>,
+    <code>%edx</code> and <code>%esi</code> are available to
+    Valgrind's register allocator.  They are dead (carry unimportant
+    values) in between translations, and are live only in
+    translations.  The one exception to this is <code>%eax</code>,
+    which, as mentioned far above, has a special significance to the
+    dispatch loop <code>VG_(dispatch)</code>: when a translation
+    returns to the dispatch loop, <code>%eax</code> is expected to
+    contain the original-code-address of the next translation to run.
+    The register allocator is so good at minimising spill code that
+    using five regs and not having to save/restore <code>%edi</code>
+    actually gives better code than allocating to <code>%edi</code>
+    as well, but then having to push/pop it around special uses.
+<p>
+<li><code>%ebp</code> points permanently at
+    <code>VG_(baseBlock)</code>.  Valgrind's translations are
+    position-independent, partly because this is convenient, but also
+    because translations get moved around in TC as part of the LRUing
+    activity.  <b>All</b> static entities which need to be referred to
+    from generated code, whether data or helper functions, are stored
+    starting at <code>VG_(baseBlock)</code> and are therefore reached
+    by indexing from <code>%ebp</code>.  There is but one exception, 
+    which is that by placing the value
+    <code>VG_EBP_DISPATCH_CHECKED</code>
+    in <code>%ebp</code> just before a return to the dispatcher, 
+    the dispatcher is informed that the next address to run, 
+    in <code>%eax</code>, requires special treatment.
+<p>
+<li>The real machine's FPU state is pretty much unimportant, for
+    reasons which will become obvious.  Ditto its <code>%eflags</code>
+    register.
+</ul>
+
+<p>
+The state of the simulated CPU is stored in memory, in
+<code>VG_(baseBlock)</code>, which is a block of 200 words IIRC.
+Recall that <code>%ebp</code> points permanently at the start of this
+block.  Function <code>vg_init_baseBlock</code> decides what the
+offsets of various entities in <code>VG_(baseBlock)</code> are to be,
+and allocates word offsets for them.  The code generator then emits
+<code>%ebp</code> relative addresses to get at those things.  The
+sequence in which entities are allocated has been carefully chosen so
+that the 32 most popular entities come first, because this means 8-bit
+offsets can be used in the generated code.
+
+<p>
+If I was clever, I could make <code>%ebp</code> point 32 words along 
+<code>VG_(baseBlock)</code>, so that I'd have another 32 words of
+short-form offsets available, but that's just complicated, and it's
+not important -- the first 32 words take 99% (or whatever) of the
+traffic.
+
+<p>
+Currently, the sequence of stuff in <code>VG_(baseBlock)</code> is as
+follows:
+<ul>
+<li>9 words, holding the simulated integer registers,
+    <code>%EAX</code> .. <code>%EDI</code>, and the simulated flags,
+    <code>%EFLAGS</code>.
+<p>
+<li>Another 9 words, holding the V bit "shadows" for the above 9 regs.
+<p>
+<li>The <b>addresses</b> of various helper routines called from
+    generated code: 
+    <code>VG_(helper_value_check4_fail)</code>,
+    <code>VG_(helper_value_check0_fail)</code>,
+    which register V-check failures,
+    <code>VG_(helperc_STOREV4)</code>,
+    <code>VG_(helperc_STOREV1)</code>,
+    <code>VG_(helperc_LOADV4)</code>,
+    <code>VG_(helperc_LOADV1)</code>,
+    which do stores and loads of V bits to/from the 
+    sparse array which keeps track of V bits in memory,
+    and
+    <code>VGM_(handle_esp_assignment)</code>, which messes with
+    memory addressibility resulting from changes in <code>%ESP</code>.
+<p>
+<li>The simulated <code>%EIP</code>.
+<p>
+<li>24 spill words, for when the register allocator can't make it work
+    with 5 measly registers.
+<p>
+<li>Addresses of helpers <code>VG_(helperc_STOREV2)</code>,
+    <code>VG_(helperc_LOADV2)</code>.  These are here because 2-byte
+    loads and stores are relatively rare, so are placed above the
+    magic 32-word offset boundary.
+<p>
+<li>For similar reasons, addresses of helper functions 
+    <code>VGM_(fpu_write_check)</code> and
+    <code>VGM_(fpu_read_check)</code>, which handle the A/V maps
+    testing and changes required by FPU writes/reads.  
+<p>
+<li>Some other boring helper addresses:
+    <code>VG_(helper_value_check2_fail)</code> and
+    <code>VG_(helper_value_check1_fail)</code>.  These are probably
+    never emitted now, and should be removed.
+<p>
+<li>The entire state of the simulated FPU, which I believe to be
+    108 bytes long.
+<p>
+<li>Finally, the addresses of various other helper functions in
+    <code>vg_helpers.S</code>, which deal with rare situations which
+    are tedious or difficult to generate code in-line for.
+</ul>
+
+<p>
+As a general rule, the simulated machine's state lives permanently in
+memory at <code>VG_(baseBlock)</code>.  However, the JITter does some
+optimisations which allow the simulated integer registers to be
+cached in real registers over multiple simulated instructions within
+the same basic block.  These are always flushed back into memory at
+the end of every basic block, so that the in-memory state is
+up-to-date between basic blocks.  (This flushing is implied by the
+statement above that the real machine's allocatable registers are
+dead in between simulated blocks).
+
+
+<h3>Startup, shutdown, and system calls</h3>
+
+Getting into of Valgrind (<code>VG_(startup)</code>, called from
+<code>valgrind.so</code>'s initialisation section), really means
+copying the real CPU's state into <code>VG_(baseBlock)</code>, and
+then installing our own stack pointer, etc, into the real CPU, and
+then starting up the JITter.  Exiting valgrind involves copying the
+simulated state back to the real state.
+
+<p>
+Unfortunately, there's a complication at startup time.  Problem is
+that at the point where we need to take a snapshot of the real CPU's
+state, the offsets in <code>VG_(baseBlock)</code> are not set up yet,
+because to do so would involve disrupting the real machine's state
+significantly.  The way round this is to dump the real machine's state
+into a temporary, static block of memory,
+<code>VG_(m_state_static)</code>.  We can then set up the
+<code>VG_(baseBlock)</code> offsets at our leisure, and copy into it
+from <code>VG_(m_state_static)</code> at some convenient later time.
+This copying is done by
+<code>VG_(copy_m_state_static_to_baseBlock)</code>.
+
+<p>
+On exit, the inverse transformation is (rather unnecessarily) used:
+stuff in <code>VG_(baseBlock)</code> is copied to
+<code>VG_(m_state_static)</code>, and the assembly stub then copies
+from <code>VG_(m_state_static)</code> into the real machine registers.
+
+<p>
+Doing system calls on behalf of the client (<code>vg_syscall.S</code>)
+is something of a half-way house.  We have to make the world look
+sufficiently like that which the client would normally have to make
+the syscall actually work properly, but we can't afford to lose
+control.  So the trick is to copy all of the client's state, <b>except
+its program counter</b>, into the real CPU, do the system call, and
+copy the state back out.  Note that the client's state includes its
+stack pointer register, so one effect of this partial restoration is
+to cause the system call to be run on the client's stack, as it should
+be.
+
+<p>
+As ever there are complications.  We have to save some of our own state
+somewhere when restoring the client's state into the CPU, so that we
+can keep going sensibly afterwards.  In fact the only thing which is
+important is our own stack pointer, but for paranoia reasons I save 
+and restore our own FPU state as well, even though that's probably
+pointless.
+
+<p>
+The complication on the above complication is, that for horrible
+reasons to do with signals, we may have to handle a second client
+system call whilst the client is blocked inside some other system 
+call (unbelievable!).  That means there's two sets of places to 
+dump Valgrind's stack pointer and FPU state across the syscall,
+and we decide which to use by consulting
+<code>VG_(syscall_depth)</code>, which is in turn maintained by
+<code>VG_(wrap_syscall)</code>.
+
+
+
+<h3>Introduction to UCode</h3>
+
+UCode lies at the heart of the x86-to-x86 JITter.  The basic premise
+is that dealing the the x86 instruction set head-on is just too darn
+complicated, so we do the traditional compiler-writer's trick and
+translate it into a simpler, easier-to-deal-with form.
+
+<p>
+In normal operation, translation proceeds through six stages,
+coordinated by <code>VG_(translate)</code>:
+<ol>
+<li>Parsing of an x86 basic block into a sequence of UCode
+    instructions (<code>VG_(disBB)</code>).
+<p>
+<li>UCode optimisation (<code>vg_improve</code>), with the aim of
+    caching simulated registers in real registers over multiple
+    simulated instructions, and removing redundant simulated
+    <code>%EFLAGS</code> saving/restoring.
+<p>
+<li>UCode instrumentation (<code>vg_instrument</code>), which adds
+    value and address checking code.
+<p>
+<li>Post-instrumentation cleanup (<code>vg_cleanup</code>), removing
+    redundant value-check computations.
+<p>
+<li>Register allocation (<code>vg_do_register_allocation</code>),
+    which, note, is done on UCode.
+<p>
+<li>Emission of final instrumented x86 code
+    (<code>VG_(emit_code)</code>).
+</ol>
+
+<p>
+Notice how steps 2, 3, 4 and 5 are simple UCode-to-UCode
+transformation passes, all on straight-line blocks of UCode (type
+<code>UCodeBlock</code>).  Steps 2 and 4 are optimisation passes and
+can be disabled for debugging purposes, with
+<code>--optimise=no</code> and <code>--cleanup=no</code> respectively.
+
+<p>
+Valgrind can also run in a no-instrumentation mode, given
+<code>--instrument=no</code>.  This is useful for debugging the JITter
+quickly without having to deal with the complexity of the
+instrumentation mechanism too.  In this mode, steps 3 and 4 are
+omitted.
+
+<p>
+These flags combine, so that <code>--instrument=no</code> together with 
+<code>--optimise=no</code> means only steps 1, 5 and 6 are used.
+<code>--single-step=yes</code> causes each x86 instruction to be
+treated as a single basic block.  The translations are terrible but
+this is sometimes instructive.  
+
+<p>
+The <code>--stop-after=N</code> flag switches back to the real CPU
+after <code>N</code> basic blocks.  It also re-JITs the final basic
+block executed and prints the debugging info resulting, so this
+gives you a way to get a quick snapshot of how a basic block looks as
+it passes through the six stages mentioned above.  If you want to 
+see full information for every block translated (probably not, but
+still ...) find, in <code>VG_(translate)</code>, the lines
+<br><code>   dis = True;</code>
+<br><code>   dis = debugging_translation;</code>
+<br>
+and comment out the second line.  This will spew out debugging
+junk faster than you can possibly imagine.
+
+
+
+<h3>UCode operand tags: type <code>Tag</code></h3>
+
+UCode is, more or less, a simple two-address RISC-like code.  In
+keeping with the x86 AT&T assembly syntax, generally speaking the
+first operand is the source operand, and the second is the destination
+operand, which is modified when the uinstr is notionally executed.
+
+<p>
+UCode instructions have up to three operand fields, each of which has
+a corresponding <code>Tag</code> describing it.  Possible values for
+the tag are:
+
+<ul>
+<li><code>NoValue</code>: indicates that the field is not in use.
+<p>
+<li><code>Lit16</code>: the field contains a 16-bit literal.
+<p>
+<li><code>Literal</code>: the field denotes a 32-bit literal, whose
+    value is stored in the <code>lit32</code> field of the uinstr
+    itself.  Since there is only one <code>lit32</code> for the whole
+    uinstr, only one operand field may contain this tag.
+<p>
+<li><code>SpillNo</code>: the field contains a spill slot number, in
+    the range 0 to 23 inclusive, denoting one of the spill slots
+    contained inside <code>VG_(baseBlock)</code>.  Such tags only
+    exist after register allocation.
+<p>
+<li><code>RealReg</code>: the field contains a number in the range 0
+    to 7 denoting an integer x86 ("real") register on the host.  The
+    number is the Intel encoding for integer registers.  Such tags
+    only exist after register allocation.
+<p>
+<li><code>ArchReg</code>: the field contains a number in the range 0
+    to 7 denoting an integer x86 register on the simulated CPU.  In
+    reality this means a reference to one of the first 8 words of
+    <code>VG_(baseBlock)</code>.  Such tags can exist at any point in
+    the translation process.
+<p>
+<li>Last, but not least, <code>TempReg</code>.  The field contains the
+    number of one of an infinite set of virtual (integer)
+    registers. <code>TempReg</code>s are used everywhere throughout
+    the translation process; you can have as many as you want.  The
+    register allocator maps as many as it can into
+    <code>RealReg</code>s and turns the rest into
+    <code>SpillNo</code>s, so <code>TempReg</code>s should not exist
+    after the register allocation phase.
+    <p>
+    <code>TempReg</code>s are always 32 bits long, even if the data
+    they hold is logically shorter.  In that case the upper unused
+    bits are required, and, I think, generally assumed, to be zero.  
+    <code>TempReg</code>s holding V bits for quantities shorter than 
+    32 bits are expected to have ones in the unused places, since a
+    one denotes "undefined".
+</ul>
+
+
+<h3>UCode instructions: type <code>UInstr</code></h3>
+
+<p>
+UCode was carefully designed to make it possible to do register
+allocation on UCode and then translate the result into x86 code
+without needing any extra registers ... well, that was the original
+plan, anyway.  Things have gotten a little more complicated since
+then.  In what follows, UCode instructions are referred to as uinstrs,
+to distinguish them from x86 instructions.  Uinstrs of course have
+uopcodes which are (naturally) different from x86 opcodes.
+
+<p>
+A uinstr (type <code>UInstr</code>) contains
+various fields, not all of which are used by any one uopcode:
+<ul>
+<li>Three 16-bit operand fields, <code>val1</code>, <code>val2</code>
+    and <code>val3</code>.
+<p>
+<li>Three tag fields, <code>tag1</code>, <code>tag2</code>
+    and <code>tag3</code>.  Each of these has a value of type
+    <code>Tag</code>,
+    and they describe what the <code>val1</code>, <code>val2</code>
+    and <code>val3</code> fields contain.
+<p>
+<li>A 32-bit literal field.
+<p>
+<li>Two <code>FlagSet</code>s, specifying which x86 condition codes are
+    read and written by the uinstr.
+<p>
+<li>An opcode byte, containing a value of type <code>Opcode</code>.
+<p>
+<li>A size field, indicating the data transfer size (1/2/4/8/10) in
+    cases where this makes sense, or zero otherwise.
+<p>
+<li>A condition-code field, which, for jumps, holds a
+    value of type <code>Condcode</code>, indicating the condition
+    which applies.  The encoding is as it is in the x86 insn stream,
+    except we add a 17th value <code>CondAlways</code> to indicate
+    an unconditional transfer.
+<p>
+<li>Various 1-bit flags, indicating whether this insn pertains to an
+    x86 CALL or RET instruction, whether a widening is signed or not,
+    etc.
+</ul>
+
+<p>
+UOpcodes (type <code>Opcode</code>) are divided into two groups: those
+necessary merely to express the functionality of the x86 code, and
+extra uopcodes needed to express the instrumentation.  The former
+group contains:
+<ul>
+<li><code>GET</code> and <code>PUT</code>, which move values from the
+    simulated CPU's integer registers (<code>ArchReg</code>s) into
+    <code>TempReg</code>s, and back.  <code>GETF</code> and
+    <code>PUTF</code> do the corresponding thing for the simulated
+    <code>%EFLAGS</code>.  There are no corresponding insns for the
+    FPU register stack, since we don't explicitly simulate its
+    registers.
+<p>
+<li><code>LOAD</code> and <code>STORE</code>, which, in RISC-like
+    fashion, are the only uinstrs able to interact with memory.
+<p>
+<li><code>MOV</code> and <code>CMOV</code> allow unconditional and
+    conditional moves of values between <code>TempReg</code>s.
+<p>
+<li>ALU operations.  Again in RISC-like fashion, these only operate on
+    <code>TempReg</code>s (before reg-alloc) or <code>RealReg</code>s
+    (after reg-alloc).  These are: <code>ADD</code>, <code>ADC</code>,
+    <code>AND</code>, <code>OR</code>, <code>XOR</code>,
+    <code>SUB</code>, <code>SBB</code>, <code>SHL</code>,
+    <code>SHR</code>, <code>SAR</code>, <code>ROL</code>,
+    <code>ROR</code>, <code>RCL</code>, <code>RCR</code>,
+    <code>NOT</code>, <code>NEG</code>, <code>INC</code>,
+    <code>DEC</code>, <code>BSWAP</code>, <code>CC2VAL</code> and
+    <code>WIDEN</code>.  <code>WIDEN</code> does signed or unsigned
+    value widening.  <code>CC2VAL</code> is used to convert condition
+    codes into a value, zero or one.  The rest are obvious.
+    <p>
+    To allow for more efficient code generation, we bend slightly the
+    restriction at the start of the previous para: for
+    <code>ADD</code>, <code>ADC</code>, <code>XOR</code>,
+    <code>SUB</code> and <code>SBB</code>, we allow the first (source)
+    operand to also be an <code>ArchReg</code>, that is, one of the
+    simulated machine's registers.  Also, many of these ALU ops allow
+    the source operand to be a literal.  See
+    <code>VG_(saneUInstr)</code> for the final word on the allowable
+    forms of uinstrs.
+<p>
+<li><code>LEA1</code> and <code>LEA2</code> are not strictly
+    necessary, but allow faciliate better translations.  They
+    record the fancy x86 addressing modes in a direct way, which
+    allows those amodes to be emitted back into the final
+    instruction stream more or less verbatim.
+<p>
+<li><code>CALLM</code> calls a machine-code helper, one of the methods
+    whose address is stored at some <code>VG_(baseBlock)</code>
+    offset.  <code>PUSH</code> and <code>POP</code> move values
+    to/from <code>TempReg</code> to the real (Valgrind's) stack, and
+    <code>CLEAR</code> removes values from the stack.
+    <code>CALLM_S</code> and <code>CALLM_E</code> delimit the
+    boundaries of call setups and clearings, for the benefit of the
+    instrumentation passes.  Getting this right is critical, and so
+    <code>VG_(saneUCodeBlock)</code> makes various checks on the use
+    of these uopcodes.
+    <p>
+    It is important to understand that these uopcodes have nothing to
+    do with the x86 <code>call</code>, <code>return,</code>
+    <code>push</code> or <code>pop</code> instructions, and are not
+    used to implement them.  Those guys turn into combinations of
+    <code>GET</code>, <code>PUT</code>, <code>LOAD</code>,
+    <code>STORE</code>, <code>ADD</code>, <code>SUB</code>, and
+    <code>JMP</code>.  What these uopcodes support is calling of
+    helper functions such as <code>VG_(helper_imul_32_64)</code>,
+    which do stuff which is too difficult or tedious to emit inline.
+<p>
+<li><code>FPU</code>, <code>FPU_R</code> and <code>FPU_W</code>.
+    Valgrind doesn't attempt to simulate the internal state of the
+    FPU at all.  Consequently it only needs to be able to distinguish
+    FPU ops which read and write memory from those that don't, and
+    for those which do, it needs to know the effective address and
+    data transfer size.  This is made easier because the x86 FP
+    instruction encoding is very regular, basically consisting of
+    16 bits for a non-memory FPU insn and 11 (IIRC) bits + an address mode
+    for a memory FPU insn.  So our <code>FPU</code> uinstr carries
+    the 16 bits in its <code>val1</code> field.  And
+    <code>FPU_R</code> and <code>FPU_W</code> carry 11 bits in that
+    field, together with the identity of a <code>TempReg</code> or
+    (later) <code>RealReg</code> which contains the address.
+<p>
+<li><code>JIFZ</code> is unique, in that it allows a control-flow
+    transfer which is not deemed to end a basic block.  It causes a
+    jump to a literal (original) address if the specified argument
+    is zero.
+<p>
+<li>Finally, <code>INCEIP</code> advances the simulated
+    <code>%EIP</code> by the specified literal amount.  This supports
+    lazy <code>%EIP</code> updating, as described below.
+</ul>
+
+<p>
+Stages 1 and 2 of the 6-stage translation process mentioned above
+deal purely with these uopcodes, and no others.  They are
+sufficient to express pretty much all the x86 32-bit protected-mode 
+instruction set, at
+least everything understood by a pre-MMX original Pentium (P54C). 
+
+<p>
+Stages 3, 4, 5 and 6 also deal with the following extra
+"instrumentation" uopcodes.  They are used to express all the
+definedness-tracking and -checking machinery which valgrind does.  In
+later sections we show how to create checking code for each of the
+uopcodes above.  Note that these instrumentation uopcodes, although
+some appearing complicated, have been carefully chosen so that
+efficient x86 code can be generated for them.  GNU superopt v2.5 did a
+great job helping out here.  Anyways, the uopcodes are as follows:
+
+<ul>
+<li><code>GETV</code> and <code>PUTV</code> are analogues to
+    <code>GET</code> and <code>PUT</code> above.  They are identical
+    except that they move the V bits for the specified values back and
+    forth to <code>TempRegs</code>, rather than moving the values
+    themselves.
+<p>
+<li>Similarly, <code>LOADV</code> and <code>STOREV</code> read and
+    write V bits from the synthesised shadow memory that Valgrind
+    maintains.  In fact they do more than that, since they also do
+    address-validity checks, and emit complaints if the read/written
+    addresses are unaddressible.
+<p>
+<li><code>TESTV</code>, whose parameters are a <code>TempReg</code>
+    and a size, tests the V bits in the <code>TempReg</code>, at the
+    specified operation size (0/1/2/4 byte) and emits an error if any
+    of them indicate undefinedness.  This is the only uopcode capable
+    of doing such tests.
+<p>
+<li><code>SETV</code>, whose parameters are also <code>TempReg</code>
+    and a size, makes the V bits in the <code>TempReg</code> indicated
+    definedness, at the specified operation size.  This is usually
+    used to generate the correct V bits for a literal value, which is
+    of course fully defined.
+<p>
+<li><code>GETVF</code> and <code>PUTVF</code> are analogues to
+    <code>GETF</code> and <code>PUTF</code>.  They move the single V
+    bit used to model definedness of <code>%EFLAGS</code> between its
+    home in <code>VG_(baseBlock)</code> and the specified
+    <code>TempReg</code>.
+<p>
+<li><code>TAG1</code> denotes one of a family of unary operations on
+    <code>TempReg</code>s containing V bits.  Similarly,
+    <code>TAG2</code> denotes one in a family of binary operations on
+    V bits.
+</ul>
+
+<p>
+These 10 uopcodes are sufficient to express Valgrind's entire
+definedness-checking semantics.  In fact most of the interesting magic
+is done by the <code>TAG1</code> and <code>TAG2</code>
+suboperations.
+
+<p>
+First, however, I need to explain about V-vector operation sizes.
+There are 4 sizes: 1, 2 and 4, which operate on groups of 8, 16 and 32
+V bits at a time, supporting the usual 1, 2 and 4 byte x86 operations.
+However there is also the mysterious size 0, which really means a
+single V bit.  Single V bits are used in various circumstances; in
+particular, the definedness of <code>%EFLAGS</code> is modelled with a
+single V bit.  Now might be a good time to also point out that for
+V bits, 1 means "undefined" and 0 means "defined".  Similarly, for A
+bits, 1 means "invalid address" and 0 means "valid address".  This
+seems counterintuitive (and so it is), but testing against zero on
+x86s saves instructions compared to testing against all 1s, because
+many ALU operations set the Z flag for free, so to speak.
+
+<p>
+With that in mind, the tag ops are:
+
+<ul>
+<li><b>(UNARY) Pessimising casts</b>: <code>VgT_PCast40</code>,
+    <code>VgT_PCast20</code>, <code>VgT_PCast10</code>,
+    <code>VgT_PCast01</code>, <code>VgT_PCast02</code> and
+    <code>VgT_PCast04</code>.  A "pessimising cast" takes a V-bit
+    vector at one size, and creates a new one at another size,
+    pessimised in the sense that if any of the bits in the source
+    vector indicate undefinedness, then all the bits in the result
+    indicate undefinedness.  In this case the casts are all to or from
+    a single V bit, so for example <code>VgT_PCast40</code> is a
+    pessimising cast from 32 bits to 1, whereas
+    <code>VgT_PCast04</code> simply copies the single source V bit
+    into all 32 bit positions in the result.  Surprisingly, these ops
+    can all be implemented very efficiently.
+    <p>
+    There are also the pessimising casts <code>VgT_PCast14</code>,
+    from 8 bits to 32, <code>VgT_PCast12</code>, from 8 bits to 16,
+    and <code>VgT_PCast11</code>, from 8 bits to 8.  This last one
+    seems nonsensical, but in fact it isn't a no-op because, as
+    mentioned above, any undefined (1) bits in the source infect the
+    entire result.
+<p>
+<li><b>(UNARY) Propagating undefinedness upwards in a word</b>:
+    <code>VgT_Left4</code>, <code>VgT_Left2</code> and
+    <code>VgT_Left1</code>.  These are used to simulate the worst-case
+    effects of carry propagation in adds and subtracts.  They return a
+    V vector identical to the original, except that if the original
+    contained any undefined bits, then it and all bits above it are
+    marked as undefined too.  Hence the Left bit in the names.
+<p>
+<li><b>(UNARY) Signed and unsigned value widening</b>:
+     <code>VgT_SWiden14</code>, <code>VgT_SWiden24</code>,
+     <code>VgT_SWiden12</code>, <code>VgT_ZWiden14</code>,
+     <code>VgT_ZWiden24</code> and <code>VgT_ZWiden12</code>.  These
+     mimic the definedness effects of standard signed and unsigned
+     integer widening.  Unsigned widening creates zero bits in the new
+     positions, so <code>VgT_ZWiden*</code> accordingly park mark
+     those parts of their argument as defined.  Signed widening copies
+     the sign bit into the new positions, so <code>VgT_SWiden*</code>
+     copies the definedness of the sign bit into the new positions.
+     Because 1 means undefined and 0 means defined, these operations
+     can (fascinatingly) be done by the same operations which they
+     mimic.  Go figure.
+<p>
+<li><b>(BINARY) Undefined-if-either-Undefined,
+     Defined-if-either-Defined</b>: <code>VgT_UifU4</code>,
+     <code>VgT_UifU2</code>, <code>VgT_UifU1</code>,
+     <code>VgT_UifU0</code>, <code>VgT_DifD4</code>,
+     <code>VgT_DifD2</code>, <code>VgT_DifD1</code>.  These do simple
+     bitwise operations on pairs of V-bit vectors, with
+     <code>UifU</code> giving undefined if either arg bit is
+     undefined, and <code>DifD</code> giving defined if either arg bit
+     is defined.  Abstract interpretation junkies, if any make it this
+     far, may like to think of them as meets and joins (or is it joins
+     and meets) in the definedness lattices.  
+<p>
+<li><b>(BINARY; one value, one V bits) Generate argument improvement
+    terms for AND and OR</b>: <code>VgT_ImproveAND4_TQ</code>,
+    <code>VgT_ImproveAND2_TQ</code>, <code>VgT_ImproveAND1_TQ</code>,
+    <code>VgT_ImproveOR4_TQ</code>, <code>VgT_ImproveOR2_TQ</code>,
+    <code>VgT_ImproveOR1_TQ</code>.  These help out with AND and OR
+    operations.  AND and OR have the inconvenient property that the
+    definedness of the result depends on the actual values of the
+    arguments as well as their definedness.  At the bit level:
+    <br><code>1 AND undefined = undefined</code>, but 
+    <br><code>0 AND undefined = 0</code>, and similarly 
+    <br><code>0 OR  undefined = undefined</code>, but 
+    <br><code>1 OR  undefined = 1</code>.
+    <br>
+    <p>
+    It turns out that gcc (quite legitimately) generates code which
+    relies on this fact, so we have to model it properly in order to
+    avoid flooding users with spurious value errors.  The ultimate
+    definedness result of AND and OR is calculated using
+    <code>UifU</code> on the definedness of the arguments, but we
+    also <code>DifD</code> in some "improvement" terms which 
+    take into account the above phenomena.  
+    <p>
+    <code>ImproveAND</code> takes as its first argument the actual
+    value of an argument to AND (the T) and the definedness of that
+    argument (the Q), and returns a V-bit vector which is defined (0)
+    for bits which have value 0 and are defined; this, when
+    <code>DifD</code> into the final result causes those bits to be
+    defined even if the corresponding bit in the other argument is undefined.
+    <p>
+    The <code>ImproveOR</code> ops do the dual thing for OR
+    arguments.  Note that XOR does not have this property that one
+    argument can make the other irrelevant, so there is no need for
+    such complexity for XOR.
+</ul>
+
+<p>
+That's all the tag ops.  If you stare at this long enough, and then
+run Valgrind and stare at the pre- and post-instrumented ucode, it
+should be fairly obvious how the instrumentation machinery hangs
+together.
+
+<p>
+One point, if you do this: in order to make it easy to differentiate
+<code>TempReg</code>s carrying values from <code>TempReg</code>s
+carrying V bit vectors, Valgrind prints the former as (for example)
+<code>t28</code> and the latter as <code>q28</code>; the fact that
+they carry the same number serves to indicate their relationship.
+This is purely for the convenience of the human reader; the register
+allocator and code generator don't regard them as different.
+
+
+<h3>Translation into UCode</h3>
+
+<code>VG_(disBB)</code> allocates a new <code>UCodeBlock</code> and
+then uses <code>disInstr</code> to translate x86 instructions one at a
+time into UCode, dumping the result in the <code>UCodeBlock</code>.
+This goes on until a control-flow transfer instruction is encountered.
+
+<p>
+Despite the large size of <code>vg_to_ucode.c</code>, this translation
+is really very simple.  Each x86 instruction is translated entirely
+independently of its neighbours, merrily allocating new
+<code>TempReg</code>s as it goes.  The idea is to have a simple
+translator -- in reality, no more than a macro-expander -- and the --
+resulting bad UCode translation is cleaned up by the UCode
+optimisation phase which follows.  To give you an idea of some x86
+instructions and their translations (this is a complete basic block,
+as Valgrind sees it):
+<pre>
+        0x40435A50:  incl %edx
+
+           0: GETL      %EDX, t0
+           1: INCL      t0  (-wOSZAP)
+           2: PUTL      t0, %EDX
+
+        0x40435A51:  movsbl (%edx),%eax
+
+           3: GETL      %EDX, t2
+           4: LDB       (t2), t2
+           5: WIDENL_Bs t2
+           6: PUTL      t2, %EAX
+
+        0x40435A54:  testb $0x20, 1(%ecx,%eax,2)
+
+           7: GETL      %EAX, t6
+           8: GETL      %ECX, t8
+           9: LEA2L     1(t8,t6,2), t4
+          10: LDB       (t4), t10
+          11: MOVB      $0x20, t12
+          12: ANDB      t12, t10  (-wOSZACP)
+          13: INCEIPo   $9
+
+        0x40435A59:  jnz-8 0x40435A50
+
+          14: Jnzo      $0x40435A50  (-rOSZACP)
+          15: JMPo      $0x40435A5B
+</pre>
+
+<p>
+Notice how the block always ends with an unconditional jump to the
+next block.  This is a bit unnecessary, but makes many things simpler.
+
+<p>
+Most x86 instructions turn into sequences of <code>GET</code>,
+</code>PUT</code>, <code>LEA1</code>, <code>LEA2</code>,
+<code>LOAD</code> and <code>STORE</code>.  Some complicated ones
+however rely on calling helper bits of code in 
+<code>vg_helpers.S</code>.  The ucode instructions <code>PUSH</code>,
+<code>POP</code>, <code>CALL</code>, <code>CALLM_S</code> and
+<code>CALLM_E</code> support this.  The calling convention is somewhat
+ad-hoc and is not the C calling convention.  The helper routines must 
+save all integer registers, and the flags, that they use.  Args are
+passed on the stack underneath the return address, as usual, and if 
+result(s) are to be returned, it (they) are either placed in dummy arg
+slots created by the ucode <code>PUSH</code> sequence, or just
+overwrite the incoming args.
+
+<p>
+In order that the instrumentation mechanism can handle calls to these
+helpers, <code>VG_(saneUCodeBlock)</code> enforces the following
+restrictions on calls to helpers:
+
+<ul>
+<li>Each <code>CALL</code> uinstr must be bracketed by a preceding
+    <code>CALLM_S</code> marker (dummy uinstr) and a trailing
+    <code>CALLM_E</code> marker.  These markers are used by the
+    instrumentation mechanism later to establish the boundaries of the
+    <code>PUSH</code>, <code>POP</code> and <code>CLEAR</code>
+    sequences for the call.
+<p>
+<li><code>PUSH</code>, <code>POP</code> and <code>CLEAR</code>
+    may only appear inside sections bracketed by <code>CALLM_S</code>
+    and <code>CALLM_E</code>, and nowhere else.
+<p>
+<li>In any such bracketed section, no two <code>PUSH</code> insns may
+    push the same <code>TempReg</code>.  Dually, no two two
+    <code>POP</code>s may pop the same <code>TempReg</code>.
+<p>
+<li>Finally, although this is not checked, args should be removed from
+    the stack with <code>CLEAR</code>, rather than <code>POP</code>s
+    into a <code>TempReg</code> which is not subsequently used.  This
+    is because the instrumentation mechanism assumes that all values
+    <code>POP</code>ped from the stack are actually used.
+</ul>
+
+Some of the translations may appear to have redundant
+<code>TempReg</code>-to-<code>TempReg</code> moves.  This helps the
+next phase, UCode optimisation, to generate better code.
+
+
+
+<h3>UCode optimisation</h3>
+
+UCode is then subjected to an improvement pass
+(<code>vg_improve()</code>), which blurs the boundaries between the
+translations of the original x86 instructions.  It's pretty
+straightforward.  Three transformations are done:
+
+<ul>
+<li>Redundant <code>GET</code> elimination.  Actually, more general
+    than that -- eliminates redundant fetches of ArchRegs.  In our
+    running example, uinstr 3 <code>GET</code>s <code>%EDX</code> into
+    <code>t2</code> despite the fact that, by looking at the previous
+    uinstr, it is already in <code>t0</code>.  The <code>GET</code> is
+    therefore removed, and <code>t2</code> renamed to <code>t0</code>.
+    Assuming <code>t0</code> is allocated to a host register, it means
+    the simulated <code>%EDX</code> will exist in a host CPU register
+    for more than one simulated x86 instruction, which seems to me to
+    be a highly desirable property.
+    <p>
+    There is some mucking around to do with subregisters;
+    <code>%AL</code> vs <code>%AH</code> <code>%AX</code> vs
+    <code>%EAX</code> etc.  I can't remember how it works, but in
+    general we are very conservative, and these tend to invalidate the
+    caching. 
+<p>
+<li>Redundant <code>PUT</code> elimination.  This annuls
+    <code>PUT</code>s of values back to simulated CPU registers if a
+    later <code>PUT</code> would overwrite the earlier
+    <code>PUT</code> value, and there is no intervening reads of the
+    simulated register (<code>ArchReg</code>).
+    <p>
+    As before, we are paranoid when faced with subregister references.
+    Also, <code>PUT</code>s of <code>%ESP</code> are never annulled,
+    because it is vital the instrumenter always has an up-to-date
+    <code>%ESP</code> value available, <code>%ESP</code> changes
+    affect addressibility of the memory around the simulated stack
+    pointer.
+    <p>
+    The implication of the above paragraph is that the simulated
+    machine's registers are only lazily updated once the above two
+    optimisation phases have run, with the exception of
+    <code>%ESP</code>.  <code>TempReg</code>s go dead at the end of
+    every basic block, from which is is inferrable that any
+    <code>TempReg</code> caching a simulated CPU reg is flushed (back
+    into the relevant <code>VG_(baseBlock)</code> slot) at the end of
+    every basic block.  The further implication is that the simulated
+    registers are only up-to-date at in between basic blocks, and not
+    at arbitrary points inside basic blocks.  And the consequence of
+    that is that we can only deliver signals to the client in between
+    basic blocks.  None of this seems any problem in practice.
+<p>
+<li>Finally there is a simple def-use thing for condition codes.  If
+    an earlier uinstr writes the condition codes, and the next uinsn
+    along which actually cares about the condition codes writes the
+    same or larger set of them, but does not read any, the earlier
+    uinsn is marked as not writing any condition codes.  This saves 
+    a lot of redundant cond-code saving and restoring.
+</ul>
+
+The effect of these transformations on our short block is rather
+unexciting, and shown below.  On longer basic blocks they can
+dramatically improve code quality.
+
+<pre>
+at 3: delete GET, rename t2 to t0 in (4 .. 6)
+at 7: delete GET, rename t6 to t0 in (8 .. 9)
+at 1: annul flag write OSZAP due to later OSZACP
+
+Improved code:
+           0: GETL      %EDX, t0
+           1: INCL      t0
+           2: PUTL      t0, %EDX
+           4: LDB       (t0), t0
+           5: WIDENL_Bs t0
+           6: PUTL      t0, %EAX
+           8: GETL      %ECX, t8
+           9: LEA2L     1(t8,t0,2), t4
+          10: LDB       (t4), t10
+          11: MOVB      $0x20, t12
+          12: ANDB      t12, t10  (-wOSZACP)
+          13: INCEIPo   $9
+          14: Jnzo      $0x40435A50  (-rOSZACP)
+          15: JMPo      $0x40435A5B
+</pre>
+
+<h3>UCode instrumentation</h3>
+
+Once you understand the meaning of the instrumentation uinstrs,
+discussed in detail above, the instrumentation scheme is fairly
+straighforward.  Each uinstr is instrumented in isolation, and the
+instrumentation uinstrs are placed before the original uinstr.
+Our running example continues below.  I have placed a blank line 
+after every original ucode, to make it easier to see which
+instrumentation uinstrs correspond to which originals.
+
+<p>
+As mentioned somewhere above, <code>TempReg</code>s carrying values 
+have names like <code>t28</code>, and each one has a shadow carrying
+its V bits, with names like <code>q28</code>.  This pairing aids in
+reading instrumented ucode.
+
+<p>
+One decision about all this is where to have "observation points",
+that is, where to check that V bits are valid.  I use a minimalistic
+scheme, only checking where a failure of validity could cause the 
+original program to (seg)fault.  So the use of values as memory
+addresses causes a check, as do conditional jumps (these cause a check
+on the definedness of the condition codes).  And arguments
+<code>PUSH</code>ed for helper calls are checked, hence the wierd
+restrictions on help call preambles described above.
+
+<p>
+Another decision is that once a value is tested, it is thereafter
+regarded as defined, so that we do not emit multiple undefined-value
+errors for the same undefined value.  That means that
+<code>TESTV</code> uinstrs are always followed by <code>SETV</code> 
+on the same (shadow) <code>TempReg</code>s.  Most of these
+<code>SETV</code>s are redundant and are removed by the
+post-instrumentation cleanup phase.
+
+<p>
+The instrumentation for calling helper functions deserves further
+comment.  The definedness of results from a helper is modelled using
+just one V bit.  So, in short, we do pessimising casts of the
+definedness of all the args, down to a single bit, and then
+<code>UifU</code> these bits together.  So this single V bit will say
+"undefined" if any part of any arg is undefined.  This V bit is then
+pessimally cast back up to the result(s) sizes, as needed.  If, by
+seeing that all the args are got rid of with <code>CLEAR</code> and
+none with <code>POP</code>, Valgrind sees that the result of the call
+is not actually used, it immediately examines the result V bit with a
+<code>TESTV</code> -- <code>SETV</code> pair.  If it did not do this,
+there would be no observation point to detect that the some of the
+args to the helper were undefined.  Of course, if the helper's results
+are indeed used, we don't do this, since the result usage will
+presumably cause the result definedness to be checked at some suitable
+future point.
+
+<p>
+In general Valgrind tries to track definedness on a bit-for-bit basis,
+but as the above para shows, for calls to helpers we throw in the
+towel and approximate down to a single bit.  This is because it's too
+complex and difficult to track bit-level definedness through complex
+ops such as integer multiply and divide, and in any case there is no
+reasonable code fragments which attempt to (eg) multiply two
+partially-defined values and end up with something meaningful, so
+there seems little point in modelling multiplies, divides, etc, in
+that level of detail.
+
+<p>
+Integer loads and stores are instrumented with firstly a test of the
+definedness of the address, followed by a <code>LOADV</code> or
+<code>STOREV</code> respectively.  These turn into calls to 
+(for example) <code>VG_(helperc_LOADV4)</code>.  These helpers do two
+things: they perform an address-valid check, and they load or store V
+bits from/to the relevant address in the (simulated V-bit) memory.
+
+<p>
+FPU loads and stores are different.  As above the definedness of the
+address is first tested.  However, the helper routine for FPU loads
+(<code>VGM_(fpu_read_check)</code>) emits an error if either the
+address is invalid or the referenced area contains undefined values.
+It has to do this because we do not simulate the FPU at all, and so
+cannot track definedness of values loaded into it from memory, so we
+have to check them as soon as they are loaded into the FPU, ie, at
+this point.  We notionally assume that everything in the FPU is
+defined.
+
+<p>
+It follows therefore that FPU writes first check the definedness of
+the address, then the validity of the address, and finally mark the
+written bytes as well-defined.
+
+<p>
+If anyone is inspired to extend Valgrind to MMX/SSE insns, I suggest
+you use the same trick.  It works provided that the FPU/MMX unit is
+not used to merely as a conduit to copy partially undefined data from
+one place in memory to another.  Unfortunately the integer CPU is used
+like that (when copying C structs with holes, for example) and this is
+the cause of much of the elaborateness of the instrumentation here
+described.
+
+<p>
+<code>vg_instrument()</code> in <code>vg_translate.c</code> actually
+does the instrumentation.  There are comments explaining how each
+uinstr is handled, so we do not repeat that here.  As explained
+already, it is bit-accurate, except for calls to helper functions.
+Unfortunately the x86 insns <code>bt/bts/btc/btr</code> are done by
+helper fns, so bit-level accuracy is lost there.  This should be fixed
+by doing them inline; it will probably require adding a couple new
+uinstrs.  Also, left and right rotates through the carry flag (x86
+<code>rcl</code> and <code>rcr</code>) are approximated via a single
+V bit; so far this has not caused anyone to complain.  The
+non-carry rotates, <code>rol</code> and <code>ror</code>, are much
+more common and are done exactly.  Re-visiting the instrumentation for
+AND and OR, they seem rather verbose, and I wonder if it could be done
+more concisely now.
+
+<p>
+The lowercase <code>o</code> on many of the uopcodes in the running
+example indicates that the size field is zero, usually meaning a
+single-bit operation.
+
+<p>
+Anyroads, the post-instrumented version of our running example looks
+like this:
+
+<pre>
+Instrumented code:
+           0: GETVL     %EDX, q0
+           1: GETL      %EDX, t0
+
+           2: TAG1o     q0 = Left4 ( q0 )
+           3: INCL      t0
+
+           4: PUTVL     q0, %EDX
+           5: PUTL      t0, %EDX
+
+           6: TESTVL    q0
+           7: SETVL     q0
+           8: LOADVB    (t0), q0
+           9: LDB       (t0), t0
+
+          10: TAG1o     q0 = SWiden14 ( q0 )
+          11: WIDENL_Bs t0
+
+          12: PUTVL     q0, %EAX
+          13: PUTL      t0, %EAX
+
+          14: GETVL     %ECX, q8
+          15: GETL      %ECX, t8
+
+          16: MOVL      q0, q4
+          17: SHLL      $0x1, q4
+          18: TAG2o     q4 = UifU4 ( q8, q4 )
+          19: TAG1o     q4 = Left4 ( q4 )
+          20: LEA2L     1(t8,t0,2), t4
+
+          21: TESTVL    q4
+          22: SETVL     q4
+          23: LOADVB    (t4), q10
+          24: LDB       (t4), t10
+
+          25: SETVB     q12
+          26: MOVB      $0x20, t12
+
+          27: MOVL      q10, q14
+          28: TAG2o     q14 = ImproveAND1_TQ ( t10, q14 )
+          29: TAG2o     q10 = UifU1 ( q12, q10 )
+          30: TAG2o     q10 = DifD1 ( q14, q10 )
+          31: MOVL      q12, q14
+          32: TAG2o     q14 = ImproveAND1_TQ ( t12, q14 )
+          33: TAG2o     q10 = DifD1 ( q14, q10 )
+          34: MOVL      q10, q16
+          35: TAG1o     q16 = PCast10 ( q16 )
+          36: PUTVFo    q16
+          37: ANDB      t12, t10  (-wOSZACP)
+
+          38: INCEIPo   $9
+
+          39: GETVFo    q18
+          40: TESTVo    q18
+          41: SETVo     q18
+          42: Jnzo      $0x40435A50  (-rOSZACP)
+
+          43: JMPo      $0x40435A5B
+</pre>
+
+
+<h3>UCode post-instrumentation cleanup</h3>
+
+<p>
+This pass, coordinated by <code>vg_cleanup()</code>, removes redundant
+definedness computation created by the simplistic instrumentation
+pass.  It consists of two passes,
+<code>vg_propagate_definedness()</code> followed by
+<code>vg_delete_redundant_SETVs</code>.
+
+<p>
+<code>vg_propagate_definedness()</code> is a simple
+constant-propagation and constant-folding pass.  It tries to determine
+which <code>TempReg</code>s containing V bits will always indicate
+"fully defined", and it propagates this information as far as it can,
+and folds out as many operations as possible.  For example, the
+instrumentation for an ADD of a literal to a variable quantity will be
+reduced down so that the definedness of the result is simply the
+definedness of the variable quantity, since the literal is by
+definition fully defined.
+
+<p>
+<code>vg_delete_redundant_SETVs</code> removes <code>SETV</code>s on
+shadow <code>TempReg</code>s for which the next action is a write.
+I don't think there's anything else worth saying about this; it is
+simple.  Read the sources for details.
+
+<p>
+So the cleaned-up running example looks like this.  As above, I have
+inserted line breaks after every original (non-instrumentation) uinstr
+to aid readability.  As with straightforward ucode optimisation, the
+results in this block are undramatic because it is so short; longer
+blocks benefit more because they have more redundancy which gets
+eliminated.
+
+
+<pre>
+at 29: delete UifU1 due to defd arg1
+at 32: change ImproveAND1_TQ to MOV due to defd arg2
+at 41: delete SETV
+at 31: delete MOV
+at 25: delete SETV
+at 22: delete SETV
+at 7: delete SETV
+
+           0: GETVL     %EDX, q0
+           1: GETL      %EDX, t0
+
+           2: TAG1o     q0 = Left4 ( q0 )
+           3: INCL      t0
+
+           4: PUTVL     q0, %EDX
+           5: PUTL      t0, %EDX
+
+           6: TESTVL    q0
+           8: LOADVB    (t0), q0
+           9: LDB       (t0), t0
+
+          10: TAG1o     q0 = SWiden14 ( q0 )
+          11: WIDENL_Bs t0
+
+          12: PUTVL     q0, %EAX
+          13: PUTL      t0, %EAX
+
+          14: GETVL     %ECX, q8
+          15: GETL      %ECX, t8
+
+          16: MOVL      q0, q4
+          17: SHLL      $0x1, q4
+          18: TAG2o     q4 = UifU4 ( q8, q4 )
+          19: TAG1o     q4 = Left4 ( q4 )
+          20: LEA2L     1(t8,t0,2), t4
+
+          21: TESTVL    q4
+          23: LOADVB    (t4), q10
+          24: LDB       (t4), t10
+
+          26: MOVB      $0x20, t12
+
+          27: MOVL      q10, q14
+          28: TAG2o     q14 = ImproveAND1_TQ ( t10, q14 )
+          30: TAG2o     q10 = DifD1 ( q14, q10 )
+          32: MOVL      t12, q14
+          33: TAG2o     q10 = DifD1 ( q14, q10 )
+          34: MOVL      q10, q16
+          35: TAG1o     q16 = PCast10 ( q16 )
+          36: PUTVFo    q16
+          37: ANDB      t12, t10  (-wOSZACP)
+
+          38: INCEIPo   $9
+          39: GETVFo    q18
+          40: TESTVo    q18
+          42: Jnzo      $0x40435A50  (-rOSZACP)
+
+          43: JMPo      $0x40435A5B
+</pre>
+
+
+<h3>Translation from UCode</h3>
+
+This is all very simple, even though <code>vg_from_ucode.c</code>
+is a big file.  Position-independent x86 code is generated into 
+a dynamically allocated array <code>emitted_code</code>; this is
+doubled in size when it overflows.  Eventually the array is handed
+back to the caller of <code>VG_(translate)</code>, who must copy
+the result into TC and TT, and free the array.
+
+<p>
+This file is structured into four layers of abstraction, which,
+thankfully, are glued back together with extensive
+<code>__inline__</code> directives.  From the bottom upwards:
+
+<ul>
+<li>Address-mode emitters, <code>emit_amode_regmem_reg</code> et al.
+<p>
+<li>Emitters for specific x86 instructions.  There are quite a lot of
+    these, with names such as <code>emit_movv_offregmem_reg</code>.
+    The <code>v</code> suffix is Intel parlance for a 16/32 bit insn;
+    there are also <code>b</code> suffixes for 8 bit insns.
+<p>
+<li>The next level up are the <code>synth_*</code> functions, which
+    synthesise possibly a sequence of raw x86 instructions to do some
+    simple task.  Some of these are quite complex because they have to
+    work around Intel's silly restrictions on subregister naming.  See 
+    <code>synth_nonshiftop_reg_reg</code> for example.
+<p>
+<li>Finally, at the top of the heap, we have
+    <code>emitUInstr()</code>,
+    which emits code for a single uinstr.
+</ul>
+
+<p>
+Some comments:
+<ul>
+<li>The hack for FPU instructions becomes apparent here.  To do a
+    <code>FPU</code> ucode instruction, we load the simulated FPU's
+    state into from its <code>VG_(baseBlock)</code> into the real FPU
+    using an x86 <code>frstor</code> insn, do the ucode
+    <code>FPU</code> insn on the real CPU, and write the updated FPU
+    state back into <code>VG_(baseBlock)</code> using an
+    <code>fnsave</code> instruction.  This is pretty brutal, but is
+    simple and it works, and even seems tolerably efficient.  There is
+    no attempt to cache the simulated FPU state in the real FPU over
+    multiple back-to-back ucode FPU instructions.
+    <p>
+    <code>FPU_R</code> and <code>FPU_W</code> are also done this way,
+    with the minor complication that we need to patch in some
+    addressing mode bits so the resulting insn knows the effective
+    address to use.  This is easy because of the regularity of the x86
+    FPU instruction encodings.
+<p>
+<li>An analogous trick is done with ucode insns which claim, in their
+    <code>flags_r</code> and <code>flags_w</code> fields, that they
+    read or write the simulated <code>%EFLAGS</code>.  For such cases
+    we first copy the simulated <code>%EFLAGS</code> into the real
+    <code>%eflags</code>, then do the insn, then, if the insn says it
+    writes the flags, copy back to <code>%EFLAGS</code>.  This is a
+    bit expensive, which is why the ucode optimisation pass goes to
+    some effort to remove redundant flag-update annotations.
+</ul>
+
+<p>
+And so ... that's the end of the documentation for the instrumentating
+translator!  It's really not that complex, because it's composed as a
+sequence of simple(ish) self-contained transformations on
+straight-line blocks of code.
+
+
+<h3>Top-level dispatch loop</h3>
+
+Urk.  In <code>VG_(toploop)</code>.  This is basically boring and
+unsurprising, not to mention fiddly and fragile.  It needs to be
+cleaned up.  
+
+<p>
+The only perhaps surprise is that the whole thing is run
+on top of a <code>setjmp</code>-installed exception handler, because,
+supposing a translation got a segfault, we have to bail out of the
+Valgrind-supplied exception handler <code>VG_(oursignalhandler)</code>
+and immediately start running the client's segfault handler, if it has
+one.  In particular we can't finish the current basic block and then
+deliver the signal at some convenient future point, because signals
+like SIGILL, SIGSEGV and SIGBUS mean that the faulting insn should not
+simply be re-tried.  (I'm sure there is a clearer way to explain this).
+
+
+<h3>Exceptions, creating new translations</h3>
+<h3>Self-modifying code</h3>
+
+<h3>Lazy updates of the simulated program counter</h3>
+
+Simulated <code>%EIP</code> is not updated after every simulated x86
+insn as this was regarded as too expensive.  Instead ucode
+<code>INCEIP</code> insns move it along as and when necessary.
+Currently we don't allow it to fall more than 4 bytes behind reality
+(see <code>VG_(disBB)</code> for the way this works).
+<p>
+Note that <code>%EIP</code> is always brought up to date by the inner
+dispatch loop in <code>VG_(dispatch)</code>, so that if the client
+takes a fault we know at least which basic block this happened in.
+
+
+<h3>The translation cache and translation table</h3>
+
+<h3>Signals</h3>
+
+Horrible, horrible.  <code>vg_signals.c</code>.
+Basically, since we have to intercept all system
+calls anyway, we can see when the client tries to install a signal
+handler.  If it does so, we make a note of what the client asked to
+happen, and ask the kernel to route the signal to our own signal
+handler, <code>VG_(oursignalhandler)</code>.  This simply notes the
+delivery of signals, and returns.  
+
+<p>
+Every 1000 basic blocks, we see if more signals have arrived.  If so,
+<code>VG_(deliver_signals)</code> builds signal delivery frames on the
+client's stack, and allows their handlers to be run.  Valgrind places
+in these signal delivery frames a bogus return address,
+</code>VG_(signalreturn_bogusRA)</code>, and checks all jumps to see
+if any jump to it.  If so, this is a sign that a signal handler is
+returning, and if so Valgrind removes the relevant signal frame from
+the client's stack, restores the from the signal frame the simulated
+state before the signal was delivered, and allows the client to run
+onwards.  We have to do it this way because some signal handlers never
+return, they just <code>longjmp()</code>, which nukes the signal
+delivery frame.
+
+<p>
+The Linux kernel has a different but equally horrible hack for
+detecting signal handler returns.  Discovering it is left as an
+exercise for the reader.
+
+
+
+<h3>Errors, error contexts, error reporting, suppressions</h3>
+<h3>Client malloc/free</h3>
+<h3>Low-level memory management</h3>
+<h3>A and V bitmaps</h3>
+<h3>Symbol table management</h3>
+<h3>Dealing with system calls</h3>
+<h3>Namespace management</h3>
+<h3>GDB attaching</h3>
+<h3>Non-dependence on glibc or anything else</h3>
+<h3>The leak detector</h3>
+<h3>Performance problems</h3>
+<h3>Continuous sanity checking</h3>
+<h3>Tracing, or not tracing, child processes</h3>
+<h3>Assembly glue for syscalls</h3>
+
+
+<hr width="100%">
+
+<h2>Extensions</h2>
+
+Some comments about Stuff To Do.
+
+<h3>Bugs</h3>
+
+Stephan Kulow and Marc Mutz report problems with kmail in KDE 3 CVS
+(RC2 ish) when run on Valgrind.  Stephan has it deadlocking; Marc has
+it looping at startup.  I can't repro either behaviour. Needs
+repro-ing and fixing.
+
+
+<h3>Threads</h3>
+
+Doing a good job of thread support strikes me as almost a
+research-level problem.  The central issues are how to do fast cheap
+locking of the <code>VG_(primary_map)</code> structure, whether or not
+accesses to the individual secondary maps need locking, what
+race-condition issues result, and whether the already-nasty mess that
+is the signal simulator needs further hackery.
+
+<p>
+I realise that threads are the most-frequently-requested feature, and
+I am thinking about it all.  If you have guru-level understanding of 
+fast mutual exclusion mechanisms and race conditions, I would be
+interested in hearing from you.
+
+
+<h3>Verification suite</h3>
+
+Directory <code>tests/</code> contains various ad-hoc tests for
+Valgrind.  However, there is no systematic verification or regression
+suite, that, for example, exercises all the stuff in
+<code>vg_memory.c</code>, to ensure that illegal memory accesses and
+undefined value uses are detected as they should be.  It would be good
+to have such a suite.
+
+
+<h3>Porting to other platforms</h3>
+
+It would be great if Valgrind was ported to FreeBSD and x86 NetBSD,
+and to x86 OpenBSD, if it's possible (doesn't OpenBSD use a.out-style
+executables, not ELF ?)
+
+<p>
+The main difficulties, for an x86-ELF platform, seem to be:
+
+<ul>
+<li>You'd need to rewrite the <code>/proc/self/maps</code> parser
+    (<code>vg_procselfmaps.c</code>).
+    Easy.
+<p>
+<li>You'd need to rewrite <code>vg_syscall_mem.c</code>, or, more
+    specifically, provide one for your OS.  This is tedious, but you
+    can implement syscalls on demand, and the Linux kernel interface
+    is, for the most part, going to look very similar to the *BSD
+    interfaces, so it's really a copy-paste-and-modify-on-demand job.
+    As part of this, you'd need to supply a new
+    <code>vg_kerneliface.h</code> file.
+<p>
+<li>You'd also need to change the syscall wrappers for Valgrind's
+    internal use, in <code>vg_mylibc.c</code>.
+</ul>
+
+All in all, I think a port to x86-ELF *BSDs is not really very
+difficult, and in some ways I would like to see it happen, because
+that would force a more clear factoring of Valgrind into platform
+dependent and independent pieces.  Not to mention, *BSD folks also
+deserve to use Valgrind just as much as the Linux crew do.
+
+
+<p>
+<hr width="100%">
+
+<h2>Easy stuff which ought to be done</h2>
+
+<h3>MMX instructions</h3>
+
+MMX insns should be supported, using the same trick as for FPU insns.
+If the MMX registers are not used to copy uninitialised junk from one
+place to another in memory, this means we don't have to actually
+simulate the internal MMX unit state, so the FPU hack applies.  This
+should be fairly easy.
+
+
+
+<h3>Fix stabs-info reader</h3>
+
+The machinery in <code>vg_symtab2.c</code> which reads "stabs" style
+debugging info is pretty weak.  It usually correctly translates 
+simulated program counter values into line numbers and procedure
+names, but the file name is often completely wrong.  I think the
+logic used to parse "stabs" entries is weak.  It should be fixed.
+The simplest solution, IMO, is to copy either the logic or simply the
+code out of GNU binutils which does this; since GDB can clearly get it
+right, binutils (or GDB?) must have code to do this somewhere.
+
+
+
+
+
+<h3>BT/BTC/BTS/BTR</h3>
+
+These are x86 instructions which test, complement, set, or reset, a
+single bit in a word.  At the moment they are both incorrectly
+implemented and incorrectly instrumented.
+
+<p>
+The incorrect instrumentation is due to use of helper functions.  This
+means we lose bit-level definedness tracking, which could wind up
+giving spurious uninitialised-value use errors.  The Right Thing to do
+is to invent a couple of new UOpcodes, I think <code>GET_BIT</code>
+and <code>SET_BIT</code>, which can be used to implement all 4 x86
+insns, get rid of the helpers, and give bit-accurate instrumentation
+rules for the two new UOpcodes.
+
+<p>
+I realised the other day that they are mis-implemented too.  The x86
+insns take a bit-index and a register or memory location to access.
+For registers the bit index clearly can only be in the range zero to
+register-width minus 1, and I assumed the same applied to memory
+locations too.  But evidently not; for memory locations the index can
+be arbitrary, and the processor will index arbitrarily into memory as
+a result.  This too should be fixed.  Sigh.  Presumably indexing
+outside the immediate word is not actually used by any programs yet
+tested on Valgrind, for otherwise they (presumably) would simply not
+work at all.  If you plan to hack on this, first check the Intel docs
+to make sure my understanding is really correct.
+
+
+
+<h3>Using PREFETCH instructions</h3>
+
+Here's a small but potentially interesting project for performance
+junkies.  Experiments with valgrind's code generator and optimiser(s)
+suggest that reducing the number of instructions executed in the
+translations and mem-check helpers gives disappointingly small
+performance improvements.  Perhaps this is because performance of
+Valgrindified code is limited by cache misses.  After all, each read
+in the original program now gives rise to at least three reads, one
+for the <code>VG_(primary_map)</code>, one of the resulting
+secondary, and the original.  Not to mention, the instrumented
+translations are 13 to 14 times larger than the originals.  All in all
+one would expect the memory system to be hammered to hell and then
+some.
+
+<p>
+So here's an idea.  An x86 insn involving a read from memory, after
+instrumentation, will turn into ucode of the following form:
+<pre>
+    ... calculate effective addr, into ta and qa ...
+    TESTVL qa             -- is the addr defined?
+    LOADV (ta), qloaded   -- fetch V bits for the addr
+    LOAD  (ta), tloaded   -- do the original load
+</pre>
+At the point where the <code>LOADV</code> is done, we know the actual
+address (<code>ta</code>) from which the real <code>LOAD</code> will
+be done.  We also know that the <code>LOADV</code> will take around
+20 x86 insns to do.  So it seems plausible that doing a prefetch of
+<code>ta</code> just before the <code>LOADV</code> might just avoid a
+miss at the <code>LOAD</code> point, and that might be a significant
+performance win.
+
+<p>
+Prefetch insns are notoriously tempermental, more often than not
+making things worse rather than better, so this would require
+considerable fiddling around.  It's complicated because Intels and
+AMDs have different prefetch insns with different semantics, so that
+too needs to be taken into account.  As a general rule, even placing
+the prefetches before the <code>LOADV</code> insn is too near the
+<code>LOAD</code>; the ideal distance is apparently circa 200 CPU
+cycles.  So it might be worth having another analysis/transformation
+pass which pushes prefetches as far back as possible, hopefully 
+immediately after the effective address becomes available.
+
+<p>
+Doing too many prefetches is also bad because they soak up bus
+bandwidth / cpu resources, so some cleverness in deciding which loads
+to prefetch and which to not might be helpful.  One can imagine not
+prefetching client-stack-relative (<code>%EBP</code> or
+<code>%ESP</code>) accesses, since the stack in general tends to show
+good locality anyway.
+
+<p>
+There's quite a lot of experimentation to do here, but I think it
+might make an interesting week's work for someone.
+
+<p>
+As of 15-ish March 2002, I've started to experiment with this, using
+the AMD <code>prefetch/prefetchw</code> insns.
+
+
+
+<h3>User-defined permission ranges</h3>
+
+This is quite a large project -- perhaps a month's hacking for a
+capable hacker to do a good job -- but it's potentially very
+interesting.  The outcome would be that Valgrind could detect a 
+whole class of bugs which it currently cannot.
+
+<p>
+The presentation falls into two pieces.
+
+<p>
+<b>Part 1: user-defined address-range permission setting</b>
+<p>
+
+Valgrind intercepts the client's <code>malloc</code>,
+<code>free</code>, etc calls, watches system calls, and watches the
+stack pointer move.  This is currently the only way it knows about
+which addresses are valid and which not.  Sometimes the client program
+knows extra information about its memory areas.  For example, the
+client could at some point know that all elements of an array are
+out-of-date.  We would like to be able to convey to Valgrind this
+information that the array is now addressable-but-uninitialised, so
+that Valgrind can then warn if elements are used before they get new
+values. 
+
+<p>
+What I would like are some macros like this:
+<pre>
+   VALGRIND_MAKE_NOACCESS(addr, len)
+   VALGRIND_MAKE_WRITABLE(addr, len)
+   VALGRIND_MAKE_READABLE(addr, len)
+</pre>
+and also, to check that memory is addressible/initialised,
+<pre>
+   VALGRIND_CHECK_ADDRESSIBLE(addr, len)
+   VALGRIND_CHECK_INITIALISED(addr, len)
+</pre>
+
+<p>
+I then include in my sources a header defining these macros, rebuild
+my app, run under Valgrind, and get user-defined checks.
+
+<p>
+Now here's a neat trick.  It's a nuisance to have to re-link the app
+with some new library which implements the above macros.  So the idea
+is to define the macros so that the resulting executable is still
+completely stand-alone, and can be run without Valgrind, in which case
+the macros do nothing, but when run on Valgrind, the Right Thing
+happens.  How to do this?  The idea is for these macros to turn into a
+piece of inline assembly code, which (1) has no effect when run on the
+real CPU, (2) is easily spotted by Valgrind's JITter, and (3) no sane
+person would ever write, which is important for avoiding false matches
+in (2).  So here's a suggestion:
+<pre>
+   VALGRIND_MAKE_NOACCESS(addr, len)
+</pre>
+becomes (roughly speaking)
+<pre>
+   movl addr, %eax
+   movl len,  %ebx
+   movl $1,   %ecx   -- 1 describes the action; MAKE_WRITABLE might be
+                     -- 2, etc
+   rorl $13, %ecx
+   rorl $19, %ecx
+   rorl $11, %eax
+   rorl $21, %eax
+</pre>
+The rotate sequences have no effect, and it's unlikely they would
+appear for any other reason, but they define a unique byte-sequence
+which the JITter can easily spot.  Using the operand constraints
+section at the end of a gcc inline-assembly statement, we can tell gcc
+that the assembly fragment kills <code>%eax</code>, <code>%ebx</code>,
+<code>%ecx</code> and the condition codes, so this fragment is made
+harmless when not running on Valgrind, runs quickly when not on
+Valgrind, and does not require any other library support.
+
+
+<p>
+<b>Part 2: using it to detect interference between stack variables</b>
+<p>
+
+Currently Valgrind cannot detect errors of the following form:
+<pre>
+void fooble ( void )
+{
+   int a[10];
+   int b[10];
+   a[10] = 99;
+}
+</pre>
+Now imagine rewriting this as
+<pre>
+void fooble ( void )
+{
+   int spacer0;
+   int a[10];
+   int spacer1;
+   int b[10];
+   int spacer2;
+   VALGRIND_MAKE_NOACCESS(&spacer0, sizeof(int));
+   VALGRIND_MAKE_NOACCESS(&spacer1, sizeof(int));
+   VALGRIND_MAKE_NOACCESS(&spacer2, sizeof(int));
+   a[10] = 99;
+}
+</pre>
+Now the invalid write is certain to hit <code>spacer0</code> or
+<code>spacer1</code>, so Valgrind will spot the error.
+
+<p>
+There are two complications.
+
+<p>
+The first is that we don't want to annotate sources by hand, so the
+Right Thing to do is to write a C/C++ parser, annotator, prettyprinter
+which does this automatically, and run it on post-CPP'd C/C++ source.
+See http://www.cacheprof.org for an example of a system which
+transparently inserts another phase into the gcc/g++ compilation
+route.  The parser/prettyprinter is probably not as hard as it sounds;
+I would write it in Haskell, a powerful functional language well
+suited to doing symbolic computation, with which I am intimately
+familar.  There is already a C parser written in Haskell by someone in
+the Haskell community, and that would probably be a good starting
+point.
+
+<p>
+The second complication is how to get rid of these
+<code>NOACCESS</code> records inside Valgrind when the instrumented
+function exits; after all, these refer to stack addresses and will
+make no sense whatever when some other function happens to re-use the
+same stack address range, probably shortly afterwards.  I think I
+would be inclined to define a special stack-specific macro
+<pre>
+   VALGRIND_MAKE_NOACCESS_STACK(addr, len)
+</pre>
+which causes Valgrind to record the client's <code>%ESP</code> at the
+time it is executed.  Valgrind will then watch for changes in
+<code>%ESP</code> and discard such records as soon as the protected
+area is uncovered by an increase in <code>%ESP</code>.  I hesitate
+with this scheme only because it is potentially expensive, if there
+are hundreds of such records, and considering that changes in
+<code>%ESP</code> already require expensive messing with stack access
+permissions.
+
+<p>
+This is probably easier and more robust than for the instrumenter 
+program to try and spot all exit points for the procedure and place
+suitable deallocation annotations there.  Plus C++ procedures can 
+bomb out at any point if they get an exception, so spotting return
+points at the source level just won't work at all.
+
+<p>
+Although some work, it's all eminently doable, and it would make
+Valgrind into an even-more-useful tool.
+
+<p>
+Update: as of 17 March 2002, this (these hooks) are done.
+
+
+<p>
+</body>
+</html>
diff --git a/coregrind/valgrind.in b/coregrind/valgrind.in
new file mode 100755
index 000000000..f791c663a
--- /dev/null
+++ b/coregrind/valgrind.in
@@ -0,0 +1,167 @@
+#!/bin/sh
+
+# Should point to the installation directory
+prefix="@prefix@"
+exec_prefix="@exec_prefix@"
+VALGRIND="@libdir@/valgrind"
+
+
+# Other stuff ...
+version="@VERSION@"
+emailto="jseward@acm.org"
+
+# The default name of the suppressions file
+vgsupp="--suppressions=$VALGRIND/default.supp"
+
+# name we were invoked with
+vgname=`echo $0 | sed 's,^.*/,,'`
+
+# Valgrind options
+vgopts=
+
+# Prog and arg to run
+argopts=
+
+# Show usage info?
+dousage=0
+
+# show version info?
+doversion=0
+
+# Collect up args for Valgrind
+for arg
+do
+  case "$arg" in
+#   options for the user
+    --help)                 dousage=1; break;;
+    --version)              doversion=1; break;;
+    --logfile-fd=*)         vgopts="$vgopts $arg"; shift;;
+    -v)                     vgopts="$vgopts $arg"; shift;;
+    --verbose)              vgopts="$vgopts -v"; shift;;
+    -q)                     vgopts="$vgopts $arg"; shift;;
+    --quiet)                vgopts="$vgopts $arg"; shift;;
+    --gdb-attach=no)        vgopts="$vgopts $arg"; shift;;
+    --gdb-attach=yes)       vgopts="$vgopts $arg"; shift;;
+    --demangle=no)          vgopts="$vgopts $arg"; shift;;
+    --demangle=yes)         vgopts="$vgopts $arg"; shift;;
+    --num-callers=*)        vgopts="$vgopts $arg"; shift;;
+    --partial-loads-ok=no)  vgopts="$vgopts $arg"; shift;;
+    --partial-loads-ok=yes) vgopts="$vgopts $arg"; shift;;
+    --leak-check=no)        vgopts="$vgopts $arg"; shift;;
+    --leak-check=yes)       vgopts="$vgopts $arg"; shift;;
+    --show-reachable=no)    vgopts="$vgopts $arg"; shift;;
+    --show-reachable=yes)   vgopts="$vgopts $arg"; shift;;
+    --leak-resolution=low)  vgopts="$vgopts $arg"; shift;;
+    --leak-resolution=med)  vgopts="$vgopts $arg"; shift;;
+    --leak-resolution=high) vgopts="$vgopts $arg"; shift;;
+    --sloppy-malloc=no)     vgopts="$vgopts $arg"; shift;;
+    --sloppy-malloc=yes)    vgopts="$vgopts $arg"; shift;;
+    --trace-children=no)    vgopts="$vgopts $arg"; shift;;
+    --trace-children=yes)   vgopts="$vgopts $arg"; shift;;
+    --workaround-gcc296-bugs=no)    vgopts="$vgopts $arg"; shift;;
+    --workaround-gcc296-bugs=yes)   vgopts="$vgopts $arg"; shift;;
+    --freelist-vol=*)       vgopts="$vgopts $arg"; shift;;
+    --suppressions=*)       vgopts="$vgopts $arg"; shift;;
+#   options for debugging Valgrind
+    --sanity-level=*)       vgopts="$vgopts $arg"; shift;;
+    --single-step=yes)      vgopts="$vgopts $arg"; shift;;
+    --single-step=no)       vgopts="$vgopts $arg"; shift;;
+    --optimise=yes)         vgopts="$vgopts $arg"; shift;;
+    --optimise=no)          vgopts="$vgopts $arg"; shift;;
+    --instrument=yes)       vgopts="$vgopts $arg"; shift;;
+    --instrument=no)        vgopts="$vgopts $arg"; shift;;
+    --cleanup=yes)          vgopts="$vgopts $arg"; shift;;
+    --cleanup=no)           vgopts="$vgopts $arg"; shift;;
+    --client-perms=yes)     vgopts="$vgopts $arg"; shift;;
+    --client-perms=no)      vgopts="$vgopts $arg"; shift;;
+    --smc-check=none)       vgopts="$vgopts $arg"; shift;;
+    --smc-check=some)       vgopts="$vgopts $arg"; shift;;
+    --smc-check=all)        vgopts="$vgopts $arg"; shift;;
+    --trace-syscalls=yes)   vgopts="$vgopts $arg"; shift;;
+    --trace-syscalls=no)    vgopts="$vgopts $arg"; shift;;
+    --trace-signals=yes)    vgopts="$vgopts $arg"; shift;;
+    --trace-signals=no)     vgopts="$vgopts $arg"; shift;;
+    --trace-symtab=yes)     vgopts="$vgopts $arg"; shift;;
+    --trace-symtab=no)      vgopts="$vgopts $arg"; shift;;
+    --trace-malloc=yes)     vgopts="$vgopts $arg"; shift;;
+    --trace-malloc=no)      vgopts="$vgopts $arg"; shift;;
+    --stop-after=*)         vgopts="$vgopts $arg"; shift;;
+    --dump-error=*)         vgopts="$vgopts $arg"; shift;;
+    -*)                     dousage=1; break;;
+    *)                      break;;
+  esac
+done
+
+# Collect up the prog and args to run
+for arg
+do
+  case "$arg" in
+   *)     argopts="$argopts $arg"; shift;;
+  esac
+done
+
+if [ z"$doversion" = z1 ]; then
+   echo "valgrind-$version"
+   exit 1
+fi
+
+if [ z"$argopts" = z   -o   z"$dousage" = z1 ]; then
+   echo
+   echo "usage: $vgname [options] prog-and-args"
+   echo
+   echo "  options for the user, with defaults in [ ], are:"
+   echo "    --help                    show this message"
+   echo "    --version                 show version"
+   echo "    -q --quiet                run silently; only print error msgs"
+   echo "    -v --verbose              be more verbose, incl counts of errors"
+   echo "    --gdb-attach=no|yes       start GDB when errors detected? [no]"
+   echo "    --demangle=no|yes         automatically demangle C++ names? [yes]"
+   echo "    --num-callers=<number>    show <num> callers in stack traces [4]"
+   echo "    --partial-loads-ok=no|yes too hard to explain here; see manual [yes]"
+   echo "    --leak-check=no|yes       search for memory leaks at exit? [no]"
+   echo "    --leak-resolution=low|med|high"
+   echo "                              amount of bt merging in leak check [low]"
+   echo "    --show-reachable=no|yes   show reachable blocks in leak check? [no]"
+   echo "    --sloppy-malloc=no|yes    round malloc sizes to next word? [no]"
+   echo "    --trace-children=no|yes   Valgrind-ise child processes? [no]"
+   echo "    --logfile-fd=<number>     file descriptor for messages [2=stderr]"
+   echo "    --freelist-vol=<number>   volume of freed blocks queue [1000000]"
+   echo "    --workaround-gcc296-bugs=no|yes  self explanatory [no]"
+   echo "    --suppressions=<filename> suppress errors described in"
+   echo "                              suppressions file <filename>"
+   echo "    --client-perms=no|yes     handle client VG_MAKE_* requests? [no]"
+   echo
+   echo "  options for debugging Valgrind itself are:"
+   echo "    --sanity-level=<number>   level of sanity checking to do [1]"
+   echo "    --single-step=no|yes      translate each instr separately? [no]"
+   echo "    --optimise=no|yes         improve intermediate code? [yes]"
+   echo "    --instrument=no|yes       actually do memory checks? [yes]"
+   echo "    --cleanup=no|yes          improve after instrumentation? [yes]"
+   echo "    --smc-check=none|some|all check writes for s-m-c? [some]"
+   echo "    --trace-syscalls=no|yes   show all system calls? [no]"
+   echo "    --trace-signals=no|yes    show signal handling details? [no]"
+   echo "    --trace-symtab=no|yes     show symbol table details? [no]"
+   echo "    --trace-malloc=no|yes     show client malloc details? [no]"
+   echo "    --stop-after=<number>     switch to real CPU after executing"
+   echo "                              <number> basic blocks [infinity]"
+   echo "    --dump-error=<number>     show translation for basic block"
+   echo "                              associated with <number>'th"
+   echo "                              error context [0=don't show any]"
+   echo
+   echo "  Extra options are read from env variable \$VALGRIND_OPTS"
+   echo
+   echo "  Valgrind is Copyright (C) 2000-2002 Julian Seward"
+   echo "  and licensed under the GNU General Public License, version 2."
+   echo "  Bug reports, feedback, admiration, abuse, etc, to: $emailto."
+   echo
+   exit 1
+fi
+
+
+VG_ARGS="$VALGRIND_OPTS $vgsupp $vgopts"
+export VG_ARGS
+LD_PRELOAD=$VALGRIND/valgrind.so:$LD_PRELOAD
+export LD_PRELOAD
+exec $argopts
+
+
diff --git a/coregrind/vg_clientmalloc.c b/coregrind/vg_clientmalloc.c
new file mode 100644
index 000000000..d2be752d0
--- /dev/null
+++ b/coregrind/vg_clientmalloc.c
@@ -0,0 +1,937 @@
+
+/*--------------------------------------------------------------------*/
+/*--- An implementation of malloc/free for the client.             ---*/
+/*---                                            vg_clientmalloc.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Defns                                                ---*/
+/*------------------------------------------------------------*/
+
+/* #define DEBUG_CLIENTMALLOC */
+
+/* Holds malloc'd but not freed blocks. */
+#define VG_MALLOCLIST_NO(aa) (((UInt)(aa)) % VG_N_MALLOCLISTS)
+static ShadowChunk* vg_malloclist[VG_N_MALLOCLISTS];
+static Bool         vg_client_malloc_init_done = False;
+
+/* Holds blocks after freeing. */
+static ShadowChunk* vg_freed_list_start   = NULL;
+static ShadowChunk* vg_freed_list_end     = NULL;
+static Int          vg_freed_list_volume  = 0;
+
+/* Stats ... */
+static UInt         vg_cmalloc_n_mallocs  = 0;
+static UInt         vg_cmalloc_n_frees    = 0;
+static UInt         vg_cmalloc_bs_mallocd = 0;
+
+static UInt         vg_mlist_frees = 0;
+static UInt         vg_mlist_tries = 0;
+
+
+/*------------------------------------------------------------*/
+/*--- Fns                                                  ---*/
+/*------------------------------------------------------------*/
+
+/* Allocate a suitably-sized array, copy all the malloc-d block
+   shadows into it, and return both the array and the size of it.
+   This is used by the memory-leak detector.
+*/
+ShadowChunk** VG_(get_malloc_shadows) ( /*OUT*/ UInt* n_shadows )
+{
+   UInt          i, scn;
+   ShadowChunk** arr;
+   ShadowChunk*  sc;
+   *n_shadows = 0;
+   for (scn = 0; scn < VG_N_MALLOCLISTS; scn++) {
+      for (sc = vg_malloclist[scn]; sc != NULL; sc = sc->next) {
+         (*n_shadows)++;
+      }
+   }
+   if (*n_shadows == 0) return NULL;
+
+   arr = VG_(malloc)( VG_AR_PRIVATE, 
+                      *n_shadows * sizeof(ShadowChunk*) );
+
+   i = 0;
+   for (scn = 0; scn < VG_N_MALLOCLISTS; scn++) {
+      for (sc = vg_malloclist[scn]; sc != NULL; sc = sc->next) {
+         arr[i++] = sc;
+      }
+   }
+   vg_assert(i == *n_shadows);
+   return arr;
+}
+
+static void client_malloc_init ( void )
+{
+   UInt ml_no;
+   if (vg_client_malloc_init_done) return;
+   for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++)
+      vg_malloclist[ml_no] = NULL;
+   vg_client_malloc_init_done = True;
+}
+
+
+static __attribute__ ((unused))
+       Int count_freelist ( void )
+{
+   ShadowChunk* sc;
+   Int n = 0;
+   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
+      n++;
+   return n;
+}
+
+static __attribute__ ((unused))
+       Int count_malloclists ( void )
+{
+   ShadowChunk* sc;
+   UInt ml_no;
+   Int  n = 0;
+   for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) 
+      for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next)
+         n++;
+   return n;
+}
+
+static __attribute__ ((unused))
+       void freelist_sanity ( void )
+{
+   ShadowChunk* sc;
+   Int n = 0;
+   /* VG_(printf)("freelist sanity\n"); */
+   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
+      n += sc->size;
+   vg_assert(n == vg_freed_list_volume);
+}
+
+/* Remove sc from malloc list # sc.  It is an unchecked error for
+   sc not to be present in the list. 
+*/
+static void remove_from_malloclist ( UInt ml_no, ShadowChunk* sc )
+{
+   ShadowChunk *sc1, *sc2;
+   if (sc == vg_malloclist[ml_no]) {
+      vg_malloclist[ml_no] = vg_malloclist[ml_no]->next;
+   } else {
+      sc1 = vg_malloclist[ml_no];
+      vg_assert(sc1 != NULL);
+      sc2 = sc1->next;
+      while (sc2 != sc) {
+         vg_assert(sc2 != NULL);
+         sc1 = sc2;
+         sc2 = sc2->next;
+      }
+      vg_assert(sc1->next == sc);
+      vg_assert(sc2 == sc);
+      sc1->next = sc2->next;
+   }
+}
+
+
+/* Put a shadow chunk on the freed blocks queue, possibly freeing up
+   some of the oldest blocks in the queue at the same time. */
+
+static void add_to_freed_queue ( ShadowChunk* sc )
+{
+   ShadowChunk* sc1;
+
+   /* Put it at the end of the freed list */
+   if (vg_freed_list_end == NULL) {
+      vg_assert(vg_freed_list_start == NULL);
+      vg_freed_list_end = vg_freed_list_start = sc;
+      vg_freed_list_volume = sc->size;
+   } else {
+      vg_assert(vg_freed_list_end->next == NULL);
+      vg_freed_list_end->next = sc;
+      vg_freed_list_end = sc;
+      vg_freed_list_volume += sc->size;
+   }
+   sc->next = NULL;
+
+   /* Release enough of the oldest blocks to bring the free queue
+      volume below vg_clo_freelist_vol. */
+
+   while (vg_freed_list_volume > VG_(clo_freelist_vol)) {
+      /* freelist_sanity(); */
+      vg_assert(vg_freed_list_start != NULL);
+      vg_assert(vg_freed_list_end != NULL);
+
+      sc1 = vg_freed_list_start;
+      vg_freed_list_volume -= sc1->size;
+      /* VG_(printf)("volume now %d\n", vg_freed_list_volume); */
+      vg_assert(vg_freed_list_volume >= 0);
+
+      if (vg_freed_list_start == vg_freed_list_end) {
+         vg_freed_list_start = vg_freed_list_end = NULL;
+      } else {
+         vg_freed_list_start = sc1->next;
+      }
+      sc1->next = NULL; /* just paranoia */
+      VG_(free)(VG_AR_CLIENT,  (void*)(sc1->data));
+      VG_(free)(VG_AR_PRIVATE, sc1);
+   }
+}
+
+
+/* Allocate a user-chunk of size bytes.  Also allocate its shadow
+   block, make the shadow block point at the user block.  Put the
+   shadow chunk on the appropriate list, and set all memory
+   protections correctly. */
+
+static ShadowChunk* client_malloc_shadow ( UInt align, UInt size, 
+                                           VgAllocKind kind )
+{
+   ShadowChunk* sc;
+   Addr         p;
+   UInt         ml_no;
+
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] client_malloc_shadow ( al %d, sz %d )\n", 
+               count_malloclists(), 
+               count_freelist(), vg_freed_list_volume,
+               align, size );
+#  endif
+
+   if (align == 0)
+      p = (Addr)VG_(malloc)(VG_AR_CLIENT, size);
+   else
+      p = (Addr)VG_(malloc_aligned)(VG_AR_CLIENT, align, size);
+
+   sc        = VG_(malloc)(VG_AR_PRIVATE, sizeof(ShadowChunk));
+   sc->where = VG_(get_ExeContext)(True);
+   sc->size  = size;
+   sc->allockind = kind;
+   sc->data  = p;
+   ml_no     = VG_MALLOCLIST_NO(p);
+   sc->next  = vg_malloclist[ml_no];
+   vg_malloclist[ml_no] = sc;
+
+   VGM_(make_writable)(p, size);
+   VGM_(make_noaccess)(p + size, 
+                       VG_AR_CLIENT_REDZONE_SZB);
+   VGM_(make_noaccess)(p - VG_AR_CLIENT_REDZONE_SZB, 
+                       VG_AR_CLIENT_REDZONE_SZB);
+
+   return sc;
+}
+
+
+/* Allocate memory, noticing whether or not we are doing the full
+   instrumentation thing. */
+
+void* VG_(client_malloc) ( UInt size, UInt raw_alloc_kind )
+{
+   ShadowChunk* sc;
+   VgAllocKind kind;
+
+   VGP_PUSHCC(VgpCliMalloc);
+   client_malloc_init();
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] client_malloc ( %d, %x )\n", 
+               count_malloclists(), 
+               count_freelist(), vg_freed_list_volume,
+               size, raw_alloc_kind );
+#  endif
+   if (!VG_(clo_instrument)) {
+      VGP_POPCC;
+      return VG_(malloc) ( VG_AR_CLIENT, size );
+   }
+   switch (raw_alloc_kind) {
+      case 0x4002: kind = Vg_AllocNewVec; break;
+      case 0x4001: kind = Vg_AllocNew; break;
+      case 0x4000: /* malloc */
+      case 6666:   /* calloc */
+                   kind = Vg_AllocMalloc; break;
+      default:     /* should not happen */
+                   /* therefore we make sure it doesn't -- JRS */
+                   VG_(panic)("VG_(client_malloc): raw_alloc_kind");
+                   break; /*NOTREACHED*/
+   }
+   sc = client_malloc_shadow ( 0, size, kind );
+   VGP_POPCC;
+   return (void*)(sc->data);
+}
+
+
+void* VG_(client_memalign) ( UInt align, UInt size )
+{
+   ShadowChunk* sc;
+   VGP_PUSHCC(VgpCliMalloc);
+   client_malloc_init();
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] client_memalign ( al %d, sz %d )\n", 
+               count_malloclists(), 
+               count_freelist(), vg_freed_list_volume,
+               align, size );
+#  endif
+   if (!VG_(clo_instrument)) {
+      VGP_POPCC;
+      return VG_(malloc_aligned) ( VG_AR_CLIENT, align, size );
+   }
+   sc = client_malloc_shadow ( align, size, Vg_AllocMalloc );
+   VGP_POPCC;
+   return (void*)(sc->data);
+}
+
+
+void VG_(client_free) ( void* ptrV, UInt raw_alloc_kind )
+{
+   ShadowChunk* sc;
+   UInt         ml_no;
+   VgAllocKind  kind;
+
+   VGP_PUSHCC(VgpCliMalloc);
+   client_malloc_init();
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] client_free ( %p, %x )\n", 
+               count_malloclists(), 
+               count_freelist(), vg_freed_list_volume,
+               ptrV, raw_alloc_kind );
+#  endif
+   if (!VG_(clo_instrument)) {
+      VGP_POPCC;
+      VG_(free) ( VG_AR_CLIENT, ptrV );
+      return;
+   }
+
+   /* first, see if ptrV is one vg_client_malloc gave out. */
+   ml_no = VG_MALLOCLIST_NO(ptrV);
+   vg_mlist_frees++;
+   for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) {
+      vg_mlist_tries++;
+      if ((Addr)ptrV == sc->data)
+         break;
+   }
+
+   if (sc == NULL) {
+      VG_(record_free_error) ( (Addr)ptrV );
+      VGP_POPCC;
+      return;
+   }
+
+   switch (raw_alloc_kind) {
+      case 0x5002: kind = Vg_AllocNewVec; break;
+      case 0x5001: kind = Vg_AllocNew; break;
+      case 0x5000: 
+      default:
+         kind = Vg_AllocMalloc;
+         /* should only happen if bug in client code */
+         break;
+   }
+
+   /* check if its a matching free() / delete / delete [] */
+   if (kind != sc->allockind)
+      VG_(record_freemismatch_error) ( (Addr) ptrV );
+
+   /* Remove the shadow chunk from the mallocd list. */
+   remove_from_malloclist ( ml_no, sc );
+
+   /* Declare it inaccessible. */
+   VGM_(make_noaccess) ( sc->data - VG_AR_CLIENT_REDZONE_SZB, 
+                         sc->size + 2*VG_AR_CLIENT_REDZONE_SZB );
+   VGM_(make_noaccess) ( (Addr)sc, sizeof(ShadowChunk) );
+   sc->where = VG_(get_ExeContext)(True);
+
+   /* Put it out of harm's way for a while. */
+   add_to_freed_queue ( sc );
+   VGP_POPCC;
+}
+
+
+
+void* VG_(client_calloc) ( UInt nmemb, UInt size1 )
+{
+   ShadowChunk* sc;
+   Addr         p;
+   UInt         size, i, ml_no;
+
+   VGP_PUSHCC(VgpCliMalloc);
+   client_malloc_init();
+
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] client_calloc ( %d, %d )\n", 
+               count_malloclists(), 
+               count_freelist(), vg_freed_list_volume,
+               nmemb, size1 );
+#  endif
+
+   if (!VG_(clo_instrument)) {
+      VGP_POPCC;
+      return VG_(calloc) ( VG_AR_CLIENT, nmemb, size1 );
+   }
+
+   size      = nmemb * size1;
+   p         = (Addr)VG_(malloc)(VG_AR_CLIENT, size);
+   sc        = VG_(malloc)(VG_AR_PRIVATE, sizeof(ShadowChunk));
+   sc->where = VG_(get_ExeContext)(True);
+   sc->size  = size;
+   sc->allockind = Vg_AllocMalloc; /* its a lie - but true. eat this :) */
+   sc->data  = p;
+   ml_no     = VG_MALLOCLIST_NO(p);
+   sc->next  = vg_malloclist[ml_no];
+   vg_malloclist[ml_no] = sc;
+
+   VGM_(make_readable)(p, size);
+   VGM_(make_noaccess)(p + size, 
+                       VG_AR_CLIENT_REDZONE_SZB);
+   VGM_(make_noaccess)(p - VG_AR_CLIENT_REDZONE_SZB, 
+                       VG_AR_CLIENT_REDZONE_SZB);
+
+   for (i = 0; i < size; i++) ((UChar*)p)[i] = 0;
+
+   VGP_POPCC;
+   return (void*)p;
+}
+
+
+void* VG_(client_realloc) ( void* ptrV, UInt size_new )
+{
+   ShadowChunk *sc, *sc_new;
+   UInt         i, ml_no;
+
+   VGP_PUSHCC(VgpCliMalloc);
+   client_malloc_init();
+
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] client_realloc ( %p, %d )\n", 
+               count_malloclists(), 
+               count_freelist(), vg_freed_list_volume,
+               ptrV, size_new );
+#  endif
+
+   if (!VG_(clo_instrument)) {
+      vg_assert(ptrV != NULL && size_new != 0);
+      VGP_POPCC;
+      return VG_(realloc) ( VG_AR_CLIENT, ptrV, size_new );
+   }
+
+   /* First try and find the block. */
+   ml_no = VG_MALLOCLIST_NO(ptrV);
+   for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) {
+      if ((Addr)ptrV == sc->data)
+         break;
+   }
+  
+   if (sc == NULL) {
+      VG_(record_free_error) ( (Addr)ptrV );
+      /* Perhaps we should keep going regardless. */
+      VGP_POPCC;
+      return NULL;
+   }
+
+   if (sc->allockind != Vg_AllocMalloc) {
+      /* can not realloc a range that was allocated with new or new [] */
+      VG_(record_freemismatch_error) ( (Addr)ptrV );
+      /* but keep going anyway */
+   }
+
+   if (sc->size == size_new) {
+      /* size unchanged */
+      VGP_POPCC;
+      return ptrV;
+   }
+   if (sc->size > size_new) {
+      /* new size is smaller */
+      VGM_(make_noaccess)( sc->data + size_new, 
+                           sc->size - size_new );
+      sc->size = size_new;
+      VGP_POPCC;
+      return ptrV;
+   } else {
+      /* new size is bigger */
+      sc_new = client_malloc_shadow ( 0, size_new, Vg_AllocMalloc );
+      for (i = 0; i < sc->size; i++)
+         ((UChar*)(sc_new->data))[i] = ((UChar*)(sc->data))[i];
+      VGM_(copy_address_range_perms) ( 
+         sc->data, sc_new->data, sc->size );
+      remove_from_malloclist ( VG_MALLOCLIST_NO(sc->data), sc );
+      VGM_(make_noaccess) ( sc->data - VG_AR_CLIENT_REDZONE_SZB, 
+                            sc->size + 2*VG_AR_CLIENT_REDZONE_SZB );
+      VGM_(make_noaccess) ( (Addr)sc, sizeof(ShadowChunk) );
+      add_to_freed_queue ( sc );
+      VGP_POPCC;
+      return (void*)sc_new->data;
+   }  
+}
+
+
+void VG_(clientmalloc_done) ( void )
+{
+   UInt         nblocks, nbytes, ml_no;
+   ShadowChunk* sc;
+
+   client_malloc_init();
+
+   nblocks = nbytes = 0;
+
+   for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) {
+      for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) {
+         nblocks ++;
+         nbytes  += sc->size;
+      }
+   }
+
+   if (VG_(clo_verbosity) == 0)
+     return;
+
+   VG_(message)(Vg_UserMsg, 
+                "malloc/free: in use at exit: %d bytes in %d blocks.",
+                nbytes, nblocks);
+   VG_(message)(Vg_UserMsg, 
+                "malloc/free: %d allocs, %d frees, %d bytes allocated.",
+                vg_cmalloc_n_mallocs,
+                vg_cmalloc_n_frees, vg_cmalloc_bs_mallocd);
+   if (!VG_(clo_leak_check))
+      VG_(message)(Vg_UserMsg, 
+                   "For a detailed leak analysis,  rerun with: --leak-check=yes");
+   if (0)
+      VG_(message)(Vg_DebugMsg,
+                   "free search: %d tries, %d frees", 
+                   vg_mlist_tries, 
+                   vg_mlist_frees );
+   if (VG_(clo_verbosity) > 1)
+      VG_(message)(Vg_UserMsg, "");
+}
+
+
+/* Describe an address as best you can, for error messages,
+   putting the result in ai. */
+
+void VG_(describe_addr) ( Addr a, AddrInfo* ai )
+{
+   ShadowChunk* sc;
+   UInt         ml_no;
+   Bool         ok;
+
+   /* Perhaps it's a user-def'd block ? */
+   ok = VG_(client_perm_maybe_describe)( a, ai );
+   if (ok)
+      return;
+   /* Perhaps it's on the stack? */
+   if (VG_(is_plausible_stack_addr)(a)
+       && a >= (Addr)VG_(baseBlock)[VGOFF_(m_esp)]) {
+      ai->akind = Stack;
+      return;
+   }
+   /* Search for a freed block which might bracket it. */
+   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next) {
+      if (sc->data - VG_AR_CLIENT_REDZONE_SZB <= a
+          && a < sc->data + sc->size + VG_AR_CLIENT_REDZONE_SZB) {
+         ai->akind      = Freed;
+         ai->blksize    = sc->size;
+         ai->rwoffset   = (Int)(a) - (Int)(sc->data);
+         ai->lastchange = sc->where;
+         return;
+      }
+   }
+   /* Search for a mallocd block which might bracket it. */
+   for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) {
+      for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) {
+         if (sc->data - VG_AR_CLIENT_REDZONE_SZB <= a
+             && a < sc->data + sc->size + VG_AR_CLIENT_REDZONE_SZB) {
+            ai->akind      = Mallocd;
+            ai->blksize    = sc->size;
+            ai->rwoffset   = (Int)(a) - (Int)(sc->data);
+            ai->lastchange = sc->where;
+            return;
+         }
+      }
+   }
+   /* Clueless ... */
+   ai->akind = Unknown;
+   return;
+}
+
+/*------------------------------------------------------------*/
+/*--- Replace the C library versions with our own.  Hairy. ---*/
+/*------------------------------------------------------------*/
+
+/* Below are new versions of malloc, __builtin_new, free, 
+   __builtin_delete, calloc and realloc.
+
+   malloc, __builtin_new, free, __builtin_delete, calloc and realloc
+   can be entered either on the real CPU or the simulated one.  If on
+   the real one, this is because the dynamic linker is running the
+   static initialisers for C++, before starting up Valgrind itself.
+   In this case it is safe to route calls through to
+   VG_(malloc)/vg_free, since that is self-initialising.
+
+   Once Valgrind is initialised, vg_running_on_simd_CPU becomes True.
+   The call needs to be transferred from the simulated CPU back to the
+   real one and routed to the vg_client_* functions.  To do that, the
+   args are passed to vg_trap_here, which the simulator detects.  The
+   bogus epilogue fn call is to guarantee that gcc doesn't tailcall
+   vg_trap_here, since that would cause the simulator's detection to
+   fail -- it only checks the targets of call transfers, not jumps.
+   And of course we have to be sure gcc won't inline either the
+   vg_trap_here or vg_bogus_epilogue.  Ha ha ha.  What a mess.
+*/
+
+/* Place afterwards to guarantee it won't get inlined ... */
+static UInt vg_trap_here_WRAPPER ( UInt arg1, UInt arg2, UInt what_to_do );
+static void vg_bogus_epilogue ( void );
+
+/* ALL calls to malloc wind up here. */
+void* malloc ( UInt n )
+{
+   if (VG_(clo_trace_malloc))
+      VG_(printf)("malloc[simd=%d](%d)", 
+                  (UInt)VG_(running_on_simd_CPU), n );
+
+   if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; }
+
+   vg_cmalloc_n_mallocs ++;
+   vg_cmalloc_bs_mallocd += n;
+
+   if (VG_(running_on_simd_CPU)) {
+      UInt v = vg_trap_here_WRAPPER ( 0, n, 0x4000 );
+      vg_bogus_epilogue();
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return (void*)v;
+   } else {
+      void* v = VG_(malloc)(VG_AR_CLIENT, n);
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return (void*)v;
+   }
+}
+
+void* __builtin_new ( UInt n )
+{
+   if (VG_(clo_trace_malloc))
+      VG_(printf)("__builtin_new[simd=%d](%d)", 
+                  (UInt)VG_(running_on_simd_CPU), n );
+
+   if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; }
+
+   vg_cmalloc_n_mallocs++;
+   vg_cmalloc_bs_mallocd += n;
+
+   if (VG_(running_on_simd_CPU)) {
+      UInt v = vg_trap_here_WRAPPER ( 0, n, 0x4001 );
+      vg_bogus_epilogue();
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return (void*)v;
+   } else {
+      void* v = VG_(malloc)(VG_AR_CLIENT, n);
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return v;
+   }
+}
+
+void* __builtin_vec_new ( Int n )
+{
+   if (VG_(clo_trace_malloc))
+      VG_(printf)("__builtin_vec_new[simd=%d](%d)", 
+                  (UInt)VG_(running_on_simd_CPU), n );
+
+   if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; }
+
+   vg_cmalloc_n_mallocs++;
+   vg_cmalloc_bs_mallocd += n;
+
+   if (VG_(running_on_simd_CPU)) {
+      UInt v = vg_trap_here_WRAPPER ( 0, n, 0x4002 );
+      vg_bogus_epilogue();
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return (void*)v;
+   } else {
+      void* v = VG_(malloc)(VG_AR_CLIENT, n);
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return v;
+   }
+}
+
+void free ( void* p )
+{
+   if (VG_(clo_trace_malloc))
+      VG_(printf)("free[simd=%d](%p)\n", 
+                  (UInt)VG_(running_on_simd_CPU), p );
+   vg_cmalloc_n_frees ++;
+
+   if (p == NULL) 
+      return;
+   if (VG_(running_on_simd_CPU)) {
+      (void)vg_trap_here_WRAPPER ( 0, (UInt)p, 0x5000 );
+      vg_bogus_epilogue();
+   } else {
+      VG_(free)(VG_AR_CLIENT, p);      
+   }
+}
+
+void __builtin_delete ( void* p )
+{
+   if (VG_(clo_trace_malloc))
+      VG_(printf)("__builtin_delete[simd=%d](%p)\n", 
+                  (UInt)VG_(running_on_simd_CPU), p );
+   vg_cmalloc_n_frees ++;
+
+   if (p == NULL) 
+      return;
+   if (VG_(running_on_simd_CPU)) {
+      (void)vg_trap_here_WRAPPER ( 0, (UInt)p, 0x5001 );
+      vg_bogus_epilogue();
+   } else {
+      VG_(free)(VG_AR_CLIENT, p);
+   }
+}
+
+void __builtin_vec_delete ( void* p )
+{
+   if (VG_(clo_trace_malloc))
+       VG_(printf)("__builtin_vec_delete[simd=%d](%p)\n", 
+                   (UInt)VG_(running_on_simd_CPU), p );
+   vg_cmalloc_n_frees ++;
+
+   if (p == NULL) 
+      return;
+   if (VG_(running_on_simd_CPU)) {
+      (void)vg_trap_here_WRAPPER ( 0, (UInt)p, 0x5002 );
+      vg_bogus_epilogue();
+   } else {
+      VG_(free)(VG_AR_CLIENT, p);
+   }
+}
+
+void* calloc ( UInt nmemb, UInt size )
+{
+   if (VG_(clo_trace_malloc))
+      VG_(printf)("calloc[simd=%d](%d,%d)", 
+                  (UInt)VG_(running_on_simd_CPU), nmemb, size );
+   vg_cmalloc_n_mallocs ++;
+   vg_cmalloc_bs_mallocd += size * nmemb;
+
+   if (VG_(running_on_simd_CPU)) {
+      UInt v = vg_trap_here_WRAPPER ( nmemb, size, 6666 );
+      vg_bogus_epilogue();
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return (void*)v;
+   } else {
+      void* v = VG_(calloc)(VG_AR_CLIENT, nmemb, size);
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return v;
+   }
+}
+
+void* realloc ( void* ptrV, UInt new_size )
+{
+   if (VG_(clo_trace_malloc))
+      VG_(printf)("realloc[simd=%d](%p,%d)", 
+                  (UInt)VG_(running_on_simd_CPU), ptrV, new_size );
+
+   if (VG_(clo_sloppy_malloc)) 
+      { while ((new_size % 4) > 0) new_size++; }
+
+   vg_cmalloc_n_frees ++;
+   vg_cmalloc_n_mallocs ++;
+   vg_cmalloc_bs_mallocd += new_size;
+
+   if (ptrV == NULL)
+      return malloc(new_size);
+   if (new_size == 0) {
+      free(ptrV);
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = 0\n" );
+      return NULL;
+   }   
+   if (VG_(running_on_simd_CPU)) {
+      UInt v = vg_trap_here_WRAPPER ( (UInt)ptrV, new_size, 7777 );
+      vg_bogus_epilogue();
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return (void*)v;
+   } else {
+      void* v = VG_(realloc)(VG_AR_CLIENT, ptrV, new_size);
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return v;
+   }
+}
+
+void* memalign ( Int alignment, Int n )
+{
+   if (VG_(clo_trace_malloc))
+      VG_(printf)("memalign[simd=%d](al %d, size %d)", 
+                  (UInt)VG_(running_on_simd_CPU), alignment, n );
+
+   if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; }
+
+   vg_cmalloc_n_mallocs ++;
+   vg_cmalloc_bs_mallocd += n;
+
+   if (VG_(running_on_simd_CPU)) {
+      UInt v = vg_trap_here_WRAPPER ( alignment, n, 8888 );
+      vg_bogus_epilogue();
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return (void*)v;
+   } else {
+      void* v = VG_(malloc_aligned)(VG_AR_CLIENT, alignment, n);
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return (void*)v;
+   }
+}
+
+void* valloc ( Int size )
+{
+   return memalign(VKI_BYTES_PER_PAGE, size);
+}
+
+
+/* Various compatibility wrapper functions, for glibc and libstdc++. */
+void cfree ( void* p )
+{
+   free ( p );
+}
+
+void* mallinfo ( void )
+{ 
+   VG_(message)(Vg_UserMsg, 
+                "Warning: incorrectly-handled call to mallinfo()"); 
+   return NULL;
+}
+
+
+
+int mallopt ( int cmd, int value )
+{
+   /* In glibc-2.2.4, 1 denoted a successful return value for mallopt */
+   return 1;
+}
+
+
+/* Bomb out if we get any of these. */
+void pvalloc ( void )
+{ VG_(panic)("call to pvalloc\n"); }
+
+void malloc_stats ( void )
+{ VG_(panic)("call to malloc_stats\n"); }
+void malloc_usable_size ( void )
+{ VG_(panic)("call to malloc_usable_size\n"); }
+void malloc_trim ( void )
+{ VG_(panic)("call to malloc_trim\n"); }
+void malloc_get_state ( void )
+{ VG_(panic)("call to malloc_get_state\n"); }
+void malloc_set_state ( void )
+{ VG_(panic)("call to malloc_set_state\n"); }
+
+
+int __posix_memalign ( void **memptr, UInt alignment, UInt size )
+{
+    void *mem;
+
+    /* Test whether the SIZE argument is valid.  It must be a power of
+       two multiple of sizeof (void *).  */
+    if (size % sizeof (void *) != 0 || (size & (size - 1)) != 0)
+       return 22 /*EINVAL*/;
+
+    mem = memalign (alignment, size);
+
+    if (mem != NULL) {
+       *memptr = mem;
+       return 0;
+    }
+
+    return 12 /*ENOMEM*/;
+}
+
+ 
+/*------------------------------------------------------------*/
+/*--- Magic supporting hacks.                              ---*/
+/*------------------------------------------------------------*/
+
+extern UInt VG_(trap_here) ( UInt arg1, UInt arg2, UInt what_to_do );
+
+static
+UInt vg_trap_here_WRAPPER ( UInt arg1, UInt arg2, UInt what_to_do )
+{
+   /* The point of this idiocy is to make a plain, ordinary call to
+      vg_trap_here which vg_dispatch_when_CALL can spot.  Left to
+      itself, with -fpic, gcc generates "call vg_trap_here@PLT" which
+      doesn't get spotted, for whatever reason.  I guess I could check
+      _all_ control flow transfers, but that would be an undesirable
+      performance overhead. 
+
+      If you compile without -fpic, gcc generates the obvious call
+      insn, so the wrappers below will work if they just call
+      vg_trap_here.  But I don't want to rule out building with -fpic,
+      hence this hack.  Sigh.
+   */
+   UInt v;
+
+#  define WHERE_TO       VG_(trap_here)
+#  define STRINGIFY(xx)  __STRING(xx)
+
+   asm("# call to vg_trap_here\n"
+       "\t pushl %3\n"
+       "\t pushl %2\n"
+       "\t pushl %1\n"
+       "\t call  "  STRINGIFY(WHERE_TO) "\n"
+       "\t addl $12, %%esp\n"
+       "\t movl %%eax, %0\n"
+       : "=r" (v)
+       : "r" (arg1), "r" (arg2), "r" (what_to_do)
+       : "eax", "esp", "cc", "memory");
+   return v;
+
+#  undef WHERE_TO
+#  undef STRINGIFY
+}
+
+/* Last, but not least ... */
+void vg_bogus_epilogue ( void )
+{
+   /* Runs on simulated CPU only. */
+}
+
+UInt VG_(trap_here) ( UInt arg1, UInt arg2, UInt what_to_do )
+{
+   /* Calls to this fn are detected in vg_dispatch.S and are handled
+      specially.  So this fn should never be entered.  */
+   VG_(panic)("vg_trap_here called!");
+   return 0; /*NOTREACHED*/
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                        vg_clientmalloc.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_constants.h b/coregrind/vg_constants.h
new file mode 100644
index 000000000..ef48ef009
--- /dev/null
+++ b/coregrind/vg_constants.h
@@ -0,0 +1,105 @@
+
+/*--------------------------------------------------------------------*/
+/*--- A header file containing constants (for assembly code).      ---*/
+/*---                                               vg_constants.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#ifndef __VG_CONSTANTS_H
+#define __VG_CONSTANTS_H
+
+
+/* This file is included in all Valgrind source files, including
+   assembly ones. */
+
+/* All symbols externally visible from valgrind.so are prefixed
+   as specified here.  The prefix can be changed, so as to avoid
+   namespace conflict problems.
+*/
+#define VGAPPEND(str1,str2) str1##str2
+
+/* These macros should add different prefixes so the same base
+   name can safely be used across different macros. */
+#define VG_(str)    VGAPPEND(vgPlain_,str)
+#define VGM_(str)   VGAPPEND(vgMem_,str)
+#define VGP_(str)   VGAPPEND(vgProf_,str)
+#define VGOFF_(str) VGAPPEND(vgOff_,str)
+
+/* Reasons why the inner simulation loop might stop (i.e. why has
+   vg_dispatch_ctr reached zero? */
+#define VG_Y_SIGCHECK   0     /* signal check due */
+#define VG_Y_SMC        1     /* write to code detected */
+#define VG_Y_EXIT       2     /* natural or debug end to simulation */
+#define VG_Y_TRANSLATE  3     /* translation of vg_m_eip needed */
+
+/* Check for pending signals every this-many jumps.  Since this
+   happens in the region of once per millisecond, we also take the
+   opportunity do do a bit of quick sanity checking at the same time.
+   Look at the call sites of VG_(deliver_signals). */
+#define VG_SIGCHECK_INTERVAL   1000
+
+/* A ,agic values that %ebp might be set to when returning to the
+   dispatcher.  The only other legitimate value is to point to the
+   start of VG_(baseBlock). */
+#define VG_EBP_DISPATCH_CHECKED 17
+
+/* Debugging hack for assembly code ... sigh. */
+#if 0
+#define OYNK(nnn) pushal;  pushl $nnn; call VG_(oynk) ; addl $4,%esp; popal
+#else
+#define OYNK(nnn)
+#endif
+
+#if 1
+#define OYNNK(nnn) pushal;  pushl $nnn; call VG_(oynk) ; addl $4,%esp; popal
+#else
+#define OYNNK(nnn)
+#endif
+
+/* Constants for the fast translation lookup cache. */
+#define VG_TT_FAST_BITS 15
+#define VG_TT_FAST_SIZE (1 << VG_TT_FAST_BITS)
+#define VG_TT_FAST_MASK ((VG_TT_FAST_SIZE) - 1)
+
+/* Constants for the fast original-code-write check cache. */
+
+/* Usually you want this to be zero. */
+#define VG_SMC_FASTCHECK_IN_C 0
+
+#define VG_SMC_CACHE_BITS  19
+#define VG_SMC_CACHE_SIZE  (1 << VG_SMC_CACHE_BITS)
+#define VG_SMC_CACHE_MASK  ((VG_SMC_CACHE_SIZE) - 1)
+
+#define VG_SMC_CACHE_SHIFT 6
+
+
+#endif /* ndef __VG_INCLUDE_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                           vg_constants.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_demangle.c b/coregrind/vg_demangle.c
new file mode 100644
index 000000000..cb141f60a
--- /dev/null
+++ b/coregrind/vg_demangle.c
@@ -0,0 +1,70 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Demangling of C++ mangled names.                             ---*/
+/*---                                                vg_demangle.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+#include "demangle.h"
+
+#define ADD_TO_RESULT(zzstr,zzn)                   \
+{                                                  \
+   Char* zz = (zzstr);                             \
+   Int nn = (zzn);                                 \
+   Int ii;                                         \
+   for (ii = 0; ii < nn; ii++) {                   \
+      result[n_result] = zz[ii];                   \
+      if (n_result < result_size-1) n_result++;    \
+      result[n_result] = 0;                        \
+   }                                               \
+}
+
+void VG_(demangle) ( Char* orig, Char* result, Int result_size )
+{
+   Int   n_result = 0;
+   Char* demangled = VG_(cplus_demangle) ( orig, DMGL_ANSI | DMGL_PARAMS );
+   if (demangled) {
+      ADD_TO_RESULT(demangled, VG_(strlen)(demangled));
+      VG_(free) (VG_AR_DEMANGLE, demangled);
+   } else {
+      ADD_TO_RESULT(orig, VG_(strlen)(orig));
+   }
+
+   /* Check that the demangler isn't leaking. */
+   /* 15 Feb 02: if this assertion fails, this is not a disaster.
+      Comment it out, and let me know.  (jseward@acm.org). */
+   vg_assert(VG_(is_empty_arena)(VG_AR_DEMANGLE));
+
+   /* VG_(show_all_arena_stats)(); */
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_demangle.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_dispatch.S b/coregrind/vg_dispatch.S
new file mode 100644
index 000000000..52231946e
--- /dev/null
+++ b/coregrind/vg_dispatch.S
@@ -0,0 +1,379 @@
+
+##--------------------------------------------------------------------##
+##--- The core dispatch loop, for jumping to a code address.       ---##
+##---                                                vg_dispatch.S ---##
+##--------------------------------------------------------------------##
+
+/*
+  This file is part of Valgrind, an x86 protected-mode emulator 
+  designed for debugging and profiling binaries on x86-Unixes.
+
+  Copyright (C) 2000-2002 Julian Seward 
+     jseward@acm.org
+     Julian_Seward@muraroa.demon.co.uk
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_constants.h"
+
+
+/*------------------------------------------------------------*/
+/*--- The normal-case dispatch machinery.                  ---*/
+/*------------------------------------------------------------*/
+	
+/* To transfer to an (original) code address, load it into %eax and
+   jump to vg_dispatch.  This fragment of code tries to find the
+   address of the corresponding translation by searching the translation
+   table.   If it fails, a new translation is made, added to the
+   translation table, and then jumped to.  Almost all the hard
+   work is done by C routines; this code simply handles the
+   common case fast -- when the translation address is found in
+   the translation cache.
+
+   At entry, %eax is the only live (real-machine) register; the
+   entire simulated state is tidily saved in vg_m_state.  
+*/
+
+	
+/* The C world needs a way to get started simulating.  So we provide
+   a function void vg_run_innerloop ( void ), which starts running
+   from vg_m_eip, and exits when the counter reaches zero.  This loop
+   can also exit if vg_oursignalhandler() catches a non-resumable
+   signal, for example SIGSEGV.  It then longjmp()s back past here.
+*/
+	
+.globl VG_(run_innerloop)
+VG_(run_innerloop):
+	#OYNK(1000)
+	# ----- entry point to VG_(run_innerloop) -----
+	pushal
+	# Set up the baseBlock pointer
+	movl	$VG_(baseBlock), %ebp
+
+	# fetch m_eip into %eax
+	movl	VGOFF_(m_eip), %esi
+	movl	(%ebp, %esi, 4), %eax
+	
+	# fall thru to vg_dispatch
+	
+.globl VG_(dispatch)
+VG_(dispatch):
+	# %eax holds destination (original) address
+	# To signal any kind of interruption, set vg_dispatch_ctr
+	# to 1, and vg_interrupt_reason to the appropriate value
+	# before jumping here.
+
+	# %ebp indicates further details of the control transfer
+	# requested to the address in %eax.  The idea is that we 
+	# want to check all jump targets to see if they are either
+	# VG_(signalreturn_bogusRA) or VG_(trap_here), both of which
+	# require special treatment.  However, testing all branch
+	# targets is expensive, and anyway in most cases JITter knows
+	# that a jump cannot be to either of these two.  We therefore
+	# adopt the following trick.
+	#
+	# If ebp == & VG_(baseBlock), which is what it started out as,
+	# this is a jump for which the JITter knows no check need be
+	# made.
+	# 
+	# If it is ebp == VG_EBP_DISPATCH_CHECKED, we had better make
+	# the check. 
+	#
+	# If %ebp has any other value, we panic.
+	#
+	# What the JITter assumes is that VG_(signalreturn_bogusRA) can
+	# only be arrived at from an x86 ret insn, and dually that
+	# VG_(trap_here) can only be arrived at from an x86 call insn.
+	# The net effect is that all call and return targets are checked
+	# but straightforward jumps are not.
+	#
+	# Thinks ... is this safe if the client happens to tailcall
+	# VG_(trap_here)  ?  I dont think that can happen -- if it did
+	# it would be a problem.
+	#
+	cmpl	$VG_(baseBlock), %ebp
+	jnz	dispatch_checked_maybe
+
+dispatch_unchecked:
+	# save the jump address at VG_(baseBlock)[VGOFF_(m_eip)],
+	# so that if this block takes a fault, we later know where we were.
+	movl	VGOFF_(m_eip), %esi
+	movl	%eax, (%ebp, %esi, 4)
+	
+	# do we require attention?
+	# this check has to be after the call/ret transfer checks, because
+	# we have to ensure that any control transfer following a syscall
+	# return is an ordinary transfer.  By the time we get here, we have
+	# established that the next transfer, which might get delayed till
+	# after a syscall return, is an ordinary one.
+	# All a bit subtle ...
+	#OYNK(1001)
+	decl	VG_(dispatch_ctr)
+	jz	counter_is_zero
+
+	#OYNK(1002)
+	# try a fast lookup in the translation cache
+	movl	%eax, %ebx
+	andl	$VG_TT_FAST_MASK, %ebx	
+	# ebx = tt_fast index
+	movl	VG_(tt_fast)(,%ebx,4), %ebx	
+	# ebx points at a tt entry
+	# now compare target with the tte.orig_addr field (+0)
+	cmpl	%eax, (%ebx)
+	jnz	full_search
+	# Found a match.  Set the tte.mru_epoch field (+8)
+	# and call the tte.trans_addr field (+4)
+	movl	VG_(current_epoch), %ecx
+	movl	%ecx, 8(%ebx)
+	call	*4(%ebx)
+	jmp	VG_(dispatch)
+	
+full_search:
+	#no luck?  try the full table search	
+	pushl	%eax
+	call	VG_(search_transtab)
+	addl	$4, %esp
+
+	# %eax has trans addr or zero
+	cmpl	$0, %eax
+	jz	need_translation
+	# full table search also zeroes the tte.last_use field,
+	# so we dont have to do so here.
+	call	*%eax
+	jmp	VG_(dispatch)
+
+need_translation:
+	OYNK(1003)
+	movl	$VG_Y_TRANSLATE, VG_(interrupt_reason)
+counter_is_zero:
+	OYNK(1004)
+	popal
+	# ----- (the only) exit point from VG_(run_innerloop) -----
+	# ----- unless of course vg_oursignalhandler longjmp()s
+	# ----- back through it, due to an unmanagable signal
+	ret
+
+
+/* The normal way to get back to the translation loop is to put
+   the address of the next (original) address and return.
+   However, simulation of a RET insn requires a check as to whether 
+   the next address is vg_signalreturn_bogusRA.  If so, a signal 
+   handler is returning, so we need to invoke our own mechanism to 
+   deal with that, by calling vg_signal_returns().  This restores 
+   the simulated machine state from the VgSigContext structure on 
+   the stack, including the (simulated, of course) %eip saved when 
+   the signal was delivered.  We then arrange to jump to the 
+   restored %eip.
+*/ 
+dispatch_checked_maybe:
+	# Possibly a checked dispatch.  Sanity check ...
+	cmpl	$VG_EBP_DISPATCH_CHECKED, %ebp
+	jz	dispatch_checked
+	# ebp has an invalid value ... crap out.
+	pushl	$panic_msg_ebp
+	call	VG_(panic)
+	#	(never returns)
+
+dispatch_checked:
+	OYNK(2000)
+	# first off, restore %ebp -- since it is currently wrong
+	movl	$VG_(baseBlock), %ebp
+
+	# see if we need to mess with stack blocks
+	pushl	%ebp
+	pushl	%eax
+	call	VG_(delete_client_stack_blocks_following_ESP_change)
+	popl	%eax
+	popl	%ebp
+	
+	# is this a signal return?
+	cmpl	$VG_(signalreturn_bogusRA), %eax
+	jz	dispatch_to_signalreturn_bogusRA
+	# should we intercept this call?
+	cmpl	$VG_(trap_here), %eax
+	jz	dispatch_to_trap_here
+	# ok, its not interesting.  Handle the normal way.
+	jmp	dispatch_unchecked
+
+dispatch_to_signalreturn_bogusRA:
+	OYNK(2001)
+	pushal
+	call	VG_(signal_returns)
+	popal
+	# %EIP will now point to the insn which should have followed
+	# the signal delivery.  Jump to it.  Since we no longer have any
+	# hint from the JITter about whether or not it is checkable,
+	# go via the conservative route.
+	movl	VGOFF_(m_eip), %esi
+	movl	(%ebp, %esi, 4), %eax
+	jmp	dispatch_checked
+
+	
+/* Similarly, check CALL targets to see if it is the ultra-magical
+   vg_trap_here(), and, if so, act accordingly.  See vg_clientmalloc.c.
+   Be careful not to get the real and simulated CPUs, 
+   stacks and regs mixed up ...
+*/
+dispatch_to_trap_here:
+	OYNK(111)
+	/* Considering the params to vg_trap_here(), we should have:
+	   12(%ESP) is what_to_do
+	    8(%ESP) is arg2
+	    4(%ESP) is arg1
+	    0(%ESP) is return address
+	*/
+	movl	VGOFF_(m_esp), %esi
+	movl	(%ebp, %esi, 4), %ebx
+	# %ebx now holds simulated %ESP
+	cmpl	$0x4000, 12(%ebx)
+	jz	handle_malloc
+	cmpl	$0x4001, 12(%ebx)
+	jz	handle_malloc
+	cmpl	$0x4002, 12(%ebx)
+	jz	handle_malloc
+	cmpl	$0x5000, 12(%ebx)
+	jz	handle_free
+	cmpl	$0x5001, 12(%ebx)
+	jz	handle_free
+	cmpl	$0x5002, 12(%ebx)
+	jz	handle_free
+	cmpl	$6666, 12(%ebx)
+	jz	handle_calloc
+	cmpl	$7777, 12(%ebx)
+	jz	handle_realloc
+	cmpl	$8888, 12(%ebx)
+	jz	handle_memalign
+	push	$panic_msg_trap
+	call	VG_(panic)
+	# vg_panic never returns
+
+handle_malloc:
+	# %ESP is in %ebx
+	pushl     12(%ebx)
+	pushl	8(%ebx)
+	call	VG_(client_malloc)
+	addl	$8, %esp
+	# returned value is in %eax
+	jmp	save_eax_and_simulate_RET
+	
+handle_free:
+	# %ESP is in %ebx
+	pushl	12(%ebx)
+	pushl	8(%ebx)
+	call	VG_(client_free)
+	addl	$8, %esp
+	jmp	simulate_RET
+	
+handle_calloc:
+	# %ESP is in %ebx
+	pushl	8(%ebx)
+	pushl	4(%ebx)
+	call	VG_(client_calloc)
+	addl	$8, %esp
+	# returned value is in %eax
+	jmp	save_eax_and_simulate_RET
+
+handle_realloc:
+	# %ESP is in %ebx
+	pushl	8(%ebx)
+	pushl	4(%ebx)
+	call	VG_(client_realloc)
+	addl	$8, %esp
+	# returned value is in %eax
+	jmp	save_eax_and_simulate_RET
+
+handle_memalign:
+	# %ESP is in %ebx
+	pushl	8(%ebx)
+	pushl	4(%ebx)
+	call	VG_(client_memalign)
+	addl	$8, %esp
+	# returned value is in %eax
+	jmp	save_eax_and_simulate_RET
+
+save_eax_and_simulate_RET:
+	movl	VGOFF_(m_eax), %esi
+	movl	%eax, (%ebp, %esi, 4)	# %eax -> %EAX
+	# set %EAX bits to VALID
+	movl	VGOFF_(sh_eax), %esi
+	movl	$0x0 /* All 32 bits VALID */, (%ebp, %esi, 4)
+	# fall thru ...
+simulate_RET:
+	# standard return
+	movl	VGOFF_(m_esp), %esi
+	movl	(%ebp, %esi, 4), %ebx	# %ESP -> %ebx
+	movl	0(%ebx), %eax		# RA -> %eax
+	addl	$4, %ebx		# %ESP += 4
+	movl	%ebx, (%ebp, %esi, 4)	# %ebx -> %ESP
+	jmp	dispatch_checked	# jump to %eax
+
+.data
+panic_msg_trap:
+.ascii	"dispatch_to_trap_here: unknown what_to_do"
+.byte	0
+panic_msg_ebp:
+.ascii	"vg_dispatch: %ebp has invalid value!"
+.byte	0
+.text	
+
+	
+/*------------------------------------------------------------*/
+/*--- A helper for delivering signals when the client is   ---*/
+/*--- (presumably) blocked in a system call.               ---*/
+/*------------------------------------------------------------*/
+
+/* Returns, in %eax, the next orig_addr to run.
+   The caller needs to decide whether the returned orig_addr
+   requires special handling.
+ 
+   extern Addr VG_(run_singleton_translation) ( Addr trans_addr )
+*/
+	
+/* should we take care to save the FPU state here? */
+	
+.globl VG_(run_singleton_translation)
+VG_(run_singleton_translation):
+	movl    4(%esp), %eax		# eax = trans_addr
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebp
+
+	# set up ebp correctly for translations
+	movl	$VG_(baseBlock), %ebp
+
+	# run the translation
+	call	*%eax
+
+	# next orig_addr is correctly in %eax already
+	
+	popl	%ebp
+	popl	%edi
+	popl	%esi
+	popl	%edx
+	popl	%ecx
+	popl	%ebx
+	
+        ret
+
+##--------------------------------------------------------------------##
+##--- end                                            vg_dispatch.S ---##
+##--------------------------------------------------------------------##
diff --git a/coregrind/vg_errcontext.c b/coregrind/vg_errcontext.c
new file mode 100644
index 000000000..42e09b53a
--- /dev/null
+++ b/coregrind/vg_errcontext.c
@@ -0,0 +1,1070 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Management of error messages.                vg_errcontext.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+#include "vg_constants.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Defns                                                ---*/
+/*------------------------------------------------------------*/
+
+/* Suppression is a type describing an error which we want to
+   suppress, ie, not show the user, usually because it is caused by a
+   problem in a library which we can't fix, replace or work around.
+   Suppressions are read from a file at startup time, specified by
+   vg_clo_suppressions, and placed in the vg_suppressions list.  This
+   gives flexibility so that new suppressions can be added to the file
+   as and when needed. 
+*/
+typedef 
+   enum { 
+      /* Bad syscall params */
+      Param, 
+      /* Use of invalid values of given size */
+      Value0, Value1, Value2, Value4, Value8, 
+      /* Invalid read/write attempt at given size */
+      Addr1, Addr2, Addr4, Addr8,
+      /* Invalid or mismatching free */
+      FreeS
+   } 
+   SuppressionKind;
+
+
+/* For each caller specified for a suppression, record the nature of
+   the caller name. */
+typedef
+   enum { 
+      /* Name is of an shared object file. */
+      ObjName,
+      /* Name is of a function. */
+      FunName 
+   }
+   SuppressionLocTy;
+
+
+/* A complete suppression record. */
+typedef
+   struct _Suppression {
+      struct _Suppression* next;
+      /* The number of times this error has been suppressed. */
+      Int count;
+      /* The name by which the suppression is referred to. */
+      Char* sname;
+      /* What kind of suppression. */
+      SuppressionKind skind;
+      /* Name of syscall param if skind==Param */
+      Char* param;
+      /* Name of fn where err occurs, and immediate caller (mandatory). */
+      SuppressionLocTy caller0_ty;
+      Char*            caller0;
+      SuppressionLocTy caller1_ty;
+      Char*            caller1;
+      /* Optional extra callers. */
+      SuppressionLocTy caller2_ty;
+      Char*            caller2;
+      SuppressionLocTy caller3_ty;
+      Char*            caller3;
+   } 
+   Suppression;
+
+
+/* ErrContext is a type for recording just enough info to generate an
+   error report for an illegal memory access.  The idea is that
+   (typically) the same few points in the program generate thousands
+   of illegal accesses, and we don't want to spew out a fresh error
+   message for each one.  Instead, we use these structures to common
+   up duplicates.  
+*/
+
+/* What kind of error it is. */
+typedef 
+   enum { ValueErr, AddrErr, 
+          ParamErr, UserErr, /* behaves like an anonymous ParamErr */
+          FreeErr, FreeMismatchErr }
+   ErrKind;
+
+/* What kind of memory access is involved in the error? */
+typedef
+   enum { ReadAxs, WriteAxs, ExecAxs }
+   AxsKind;
+
+/* Top-level struct for recording errors. */
+typedef
+   struct _ErrContext {
+      /* ALL */
+      struct _ErrContext* next;
+      /* ALL */
+      /* NULL if unsuppressed; or ptr to suppression record. */
+      Suppression* supp;
+      /* ALL */
+      Int count;
+      /* ALL */
+      ErrKind ekind;
+      /* ALL */
+      ExeContext* where;
+      /* Addr */
+      AxsKind axskind;
+      /* Addr, Value */
+      Int size;
+      /* Addr, Free, Param, User */
+      Addr addr;
+      /* Addr, Free, Param, User */
+      AddrInfo addrinfo;
+      /* Param */
+      Char* syscall_param;
+      /* Param, User */
+      Bool isWriteableLack;
+   } 
+   ErrContext;
+
+/* The list of error contexts found, both suppressed and unsuppressed.
+   Initially empty, and grows as errors are detected. */
+static ErrContext* vg_err_contexts = NULL;
+
+/* The list of suppression directives, as read from the specified
+   suppressions file. */
+static Suppression* vg_suppressions = NULL;
+
+/* Running count of unsuppressed errors detected. */
+static UInt vg_n_errs_found = 0;
+
+/* Running count of suppressed errors detected. */
+static UInt vg_n_errs_suppressed = 0;
+
+/* forwards ... */
+static Suppression* is_suppressible_error ( ErrContext* ec );
+
+
+/*------------------------------------------------------------*/
+/*--- Helper fns                                           ---*/
+/*------------------------------------------------------------*/
+
+
+static void clear_AddrInfo ( AddrInfo* ai )
+{
+   ai->akind      = Unknown;
+   ai->blksize    = 0;
+   ai->rwoffset   = 0;
+   ai->lastchange = NULL;
+}
+
+static void clear_ErrContext ( ErrContext* ec )
+{
+   ec->next    = NULL;
+   ec->supp    = NULL;
+   ec->count   = 0;
+   ec->ekind   = ValueErr;
+   ec->where   = NULL;
+   ec->axskind = ReadAxs;
+   ec->size    = 0;
+   ec->addr    = 0;
+   clear_AddrInfo ( &ec->addrinfo );
+   ec->syscall_param   = NULL;
+   ec->isWriteableLack = False;
+}
+
+
+static __inline__
+Bool vg_eq_ExeContext ( Bool top_2_only,
+                        ExeContext* e1, ExeContext* e2 )
+{
+   /* Note that frames after the 4th are always ignored. */
+   if (top_2_only) {
+      return VG_(eq_ExeContext_top2(e1, e2));
+   } else {
+      return VG_(eq_ExeContext_top4(e1, e2));
+   }
+}
+
+
+static Bool eq_AddrInfo ( Bool cheap_addr_cmp,
+                          AddrInfo* ai1, AddrInfo* ai2 )
+{
+   if (ai1->akind != ai2->akind) 
+      return False;
+   if (ai1->akind == Freed || ai1->akind == Mallocd) {
+      if (ai1->blksize != ai2->blksize)
+         return False;
+      if (!vg_eq_ExeContext(cheap_addr_cmp, 
+                            ai1->lastchange, ai2->lastchange))
+         return False;
+   }
+   return True;
+}
+
+/* Compare error contexts, to detect duplicates.  Note that if they
+   are otherwise the same, the faulting addrs and associated rwoffsets
+   are allowed to be different.  */
+
+static Bool eq_ErrContext ( Bool cheap_addr_cmp,
+                            ErrContext* e1, ErrContext* e2 )
+{
+   if (e1->ekind != e2->ekind) 
+      return False;
+   if (!vg_eq_ExeContext(cheap_addr_cmp, e1->where, e2->where))
+      return False;
+
+   switch (e1->ekind) {
+      case UserErr:
+      case ParamErr:
+         if (e1->isWriteableLack != e2->isWriteableLack) return False;
+         if (e1->ekind == ParamErr 
+             && 0 != VG_(strcmp)(e1->syscall_param, e2->syscall_param))
+            return False;
+         return True;
+      case FreeErr:
+      case FreeMismatchErr:
+         if (e1->addr != e2->addr) return False;
+         if (!eq_AddrInfo(cheap_addr_cmp, &e1->addrinfo, &e2->addrinfo)) 
+            return False;
+         return True;
+      case AddrErr:
+         if (e1->axskind != e2->axskind) return False;
+         if (e1->size != e2->size) return False;
+         if (!eq_AddrInfo(cheap_addr_cmp, &e1->addrinfo, &e2->addrinfo)) 
+            return False;
+         return True;
+      case ValueErr:
+         if (e1->size != e2->size) return False;
+         return True;
+      default: 
+         VG_(panic)("eq_ErrContext");
+   }
+}
+
+static void pp_AddrInfo ( Addr a, AddrInfo* ai )
+{
+   switch (ai->akind) {
+      case Stack: 
+         VG_(message)(Vg_UserMsg, "   Address 0x%x is on the stack", a);
+         break;
+      case Unknown:
+         VG_(message)(Vg_UserMsg, 
+                      "   Address 0x%x is not stack'd, malloc'd or free'd", a);
+         break;
+      case Freed: case Mallocd: case UserG: case UserS: {
+         UInt delta;
+         UChar* relative;
+         if (ai->rwoffset < 0) {
+            delta    = (UInt)(- ai->rwoffset);
+            relative = "before";
+         } else if (ai->rwoffset >= ai->blksize) {
+            delta    = ai->rwoffset - ai->blksize;
+            relative = "after";
+         } else {
+            delta    = ai->rwoffset;
+            relative = "inside";
+         }
+         if (ai->akind == UserS) {
+            VG_(message)(Vg_UserMsg, 
+               "   Address 0x%x is %d bytes %s a %d-byte stack red-zone created",
+               a, delta, relative, 
+               ai->blksize );
+	 } else {
+            VG_(message)(Vg_UserMsg, 
+               "   Address 0x%x is %d bytes %s a block of size %d %s",
+               a, delta, relative, 
+               ai->blksize,
+               ai->akind==Mallocd ? "alloc'd" 
+                  : ai->akind==Freed ? "free'd" 
+                                     : "client-defined");
+         }
+         VG_(pp_ExeContext)(ai->lastchange);
+         break;
+      }
+      default:
+         VG_(panic)("pp_AddrInfo");
+   }
+}
+
+static void pp_ErrContext ( ErrContext* ec, Bool printCount )
+{
+   if (printCount)
+      VG_(message)(Vg_UserMsg, "Observed %d times:", ec->count );
+   switch (ec->ekind) {
+      case ValueErr:
+         if (ec->size == 0) {
+             VG_(message)(Vg_UserMsg,
+                          "Use of uninitialised CPU condition code");
+         } else {
+             VG_(message)(Vg_UserMsg,
+                          "Use of uninitialised value of size %d",
+                          ec->size);
+         }
+         VG_(pp_ExeContext)(ec->where);
+         break;
+      case AddrErr:
+         switch (ec->axskind) {
+            case ReadAxs:
+               VG_(message)(Vg_UserMsg, "Invalid read of size %d", 
+                                        ec->size ); 
+               break;
+            case WriteAxs:
+               VG_(message)(Vg_UserMsg, "Invalid write of size %d", 
+                                        ec->size ); 
+               break;
+            case ExecAxs:
+               VG_(message)(Vg_UserMsg, "Jump to the invalid address "
+                                        "stated on the next line");
+               break;
+            default: 
+               VG_(panic)("pp_ErrContext(axskind)");
+         }
+         VG_(pp_ExeContext)(ec->where);
+         pp_AddrInfo(ec->addr, &ec->addrinfo);
+         break;
+      case FreeErr:
+         VG_(message)(Vg_UserMsg,"Invalid free() / delete / delete[]");
+         /* fall through */
+      case FreeMismatchErr:
+         if (ec->ekind == FreeMismatchErr)
+            VG_(message)(Vg_UserMsg, 
+                         "Mismatched free() / delete / delete []");
+         VG_(pp_ExeContext)(ec->where);
+         pp_AddrInfo(ec->addr, &ec->addrinfo);
+         break;
+      case ParamErr:
+         if (ec->isWriteableLack) {
+            VG_(message)(Vg_UserMsg, 
+               "Syscall param %s contains unaddressable byte(s)",
+                ec->syscall_param );
+         } else {
+            VG_(message)(Vg_UserMsg, 
+                "Syscall param %s contains uninitialised or "
+                "unaddressable byte(s)",
+            ec->syscall_param);
+         }
+         VG_(pp_ExeContext)(ec->where);
+         pp_AddrInfo(ec->addr, &ec->addrinfo);
+         break;
+      case UserErr:
+         if (ec->isWriteableLack) {
+            VG_(message)(Vg_UserMsg, 
+               "Unaddressable byte(s) found during client check request");
+         } else {
+            VG_(message)(Vg_UserMsg, 
+               "Uninitialised or "
+               "unaddressable byte(s) found during client check request");
+         }
+         VG_(pp_ExeContext)(ec->where);
+         pp_AddrInfo(ec->addr, &ec->addrinfo);
+         break;
+      default: 
+         VG_(panic)("pp_ErrContext");
+   }
+}
+
+
+/* Figure out if we want to attach for GDB for this error, possibly
+   by asking the user. */
+static
+Bool vg_is_GDB_attach_requested ( void )
+{
+   Char ch, ch2;
+   Int res;
+
+   if (VG_(clo_GDB_attach) == False)
+      return False;
+
+   VG_(message)(Vg_UserMsg, "");
+
+  again:
+   VG_(printf)(
+      "==%d== "
+      "---- Attach to GDB ? --- [Return/N/n/Y/y/C/c] ---- ", 
+      VG_(getpid)()
+   );
+
+   res = VG_(read)(0 /*stdin*/, &ch, 1);
+   if (res != 1) goto ioerror;
+   /* res == 1 */
+   if (ch == '\n') return False;
+   if (ch != 'N' && ch != 'n' && ch != 'Y' && ch != 'y' 
+      && ch != 'C' && ch != 'c') goto again;
+
+   res = VG_(read)(0 /*stdin*/, &ch2, 1);
+   if (res != 1) goto ioerror;
+   if (ch2 != '\n') goto again;
+
+   /* No, don't want to attach. */
+   if (ch == 'n' || ch == 'N') return False;
+   /* Yes, want to attach. */
+   if (ch == 'y' || ch == 'Y') return True;
+   /* No, don't want to attach, and don't ask again either. */
+   vg_assert(ch == 'c' || ch == 'C');
+
+  ioerror:
+   VG_(clo_GDB_attach) = False;
+   return False;
+}
+
+
+/* Top-level entry point to the error management subsystem.  All
+   detected errors are notified here; this routine decides if/when the
+   user should see the error. */
+static void VG_(maybe_add_context) ( ErrContext* ec )
+{
+   ErrContext* p;
+   ErrContext* p_prev;
+   Bool        cheap_addr_cmp         = False;
+   static Bool is_first_shown_context = True;
+   static Bool stopping_message       = False;
+   static Bool slowdown_message       = False;
+   static Int  vg_n_errs_shown        = 0;
+
+   /* After M_VG_COLLECT_NO_ERRORS_AFTER different errors have been
+      found, just refuse to collect any more. */
+   if (vg_n_errs_shown >= M_VG_COLLECT_NO_ERRORS_AFTER) {
+      if (!stopping_message) {
+         VG_(message)(Vg_UserMsg, "");
+         VG_(message)(Vg_UserMsg, 
+            "More than %d errors detected.  I'm not reporting any more.",
+            M_VG_COLLECT_NO_ERRORS_AFTER);
+         VG_(message)(Vg_UserMsg, 
+            "Final error counts may be inaccurate.  Go fix your program!");
+         VG_(message)(Vg_UserMsg, "");
+         stopping_message = True;
+      }
+      return;
+   }
+
+   /* After M_VG_COLLECT_ERRORS_SLOWLY_AFTER different errors have
+      been found, be much more conservative about collecting new
+      ones. */
+   if (vg_n_errs_shown >= M_VG_COLLECT_ERRORS_SLOWLY_AFTER) {
+      cheap_addr_cmp = True;
+      if (!slowdown_message) {
+         VG_(message)(Vg_UserMsg, "");
+         VG_(message)(Vg_UserMsg, 
+            "More than %d errors detected.  Subsequent errors",
+            M_VG_COLLECT_ERRORS_SLOWLY_AFTER);
+         VG_(message)(Vg_UserMsg, 
+            "will still be recorded, but in less detail than before.");
+         slowdown_message = True;
+      }
+   }
+
+
+   /* First, see if we've got an error record matching this one. */
+   p      = vg_err_contexts;
+   p_prev = NULL;
+   while (p != NULL) {
+      if (eq_ErrContext(cheap_addr_cmp, p, ec)) {
+         /* Found it. */
+         p->count++;
+	 if (p->supp != NULL) {
+            /* Deal correctly with suppressed errors. */
+            p->supp->count++;
+            vg_n_errs_suppressed++;	 
+         } else {
+            vg_n_errs_found++;
+         }
+
+         /* Move p to the front of the list so that future searches
+            for it are faster. */
+         if (p_prev != NULL) {
+            vg_assert(p_prev->next == p);
+            p_prev->next    = p->next;
+            p->next         = vg_err_contexts;
+            vg_err_contexts = p;
+	 }
+         return;
+      }
+      p_prev = p;
+      p      = p->next;
+   }
+
+   /* Didn't see it.  Copy and add. */
+
+   /* OK, we're really going to collect it. */
+
+   p = VG_(malloc)(VG_AR_ERRCTXT, sizeof(ErrContext));
+   *p = *ec;
+   p->next = vg_err_contexts;
+   p->supp = is_suppressible_error(ec);
+   vg_err_contexts = p;
+   if (p->supp == NULL) {
+      vg_n_errs_found++;
+      if (!is_first_shown_context)
+         VG_(message)(Vg_UserMsg, "");
+      pp_ErrContext(p, False);      
+      is_first_shown_context = False;
+      vg_n_errs_shown++;
+      /* Perhaps we want a GDB attach at this point? */
+      if (vg_is_GDB_attach_requested()) {
+         VG_(swizzle_esp_then_start_GDB)();
+      }
+   } else {
+      vg_n_errs_suppressed++;
+      p->supp->count++;
+   }
+}
+
+
+
+
+/*------------------------------------------------------------*/
+/*--- Exported fns                                         ---*/
+/*------------------------------------------------------------*/
+
+void VG_(record_value_error) ( Int size )
+{
+   ErrContext ec;
+   clear_ErrContext( &ec );
+   ec.count = 1;
+   ec.next  = NULL;
+   ec.where = VG_(get_ExeContext)( False );
+   ec.ekind = ValueErr;
+   ec.size  = size;
+   VG_(maybe_add_context) ( &ec );
+}
+
+void VG_(record_address_error) ( Addr a, Int size, Bool isWrite )
+{
+   ErrContext ec;
+
+   /* If this is caused by an access immediately below %ESP, and the
+      user asks nicely, we just ignore it. */
+   if (VG_(clo_workaround_gcc296_bugs) && VG_(is_just_below_ESP)(a))
+      return;
+
+   clear_ErrContext( &ec );
+   ec.count   = 1;
+   ec.next    = NULL;
+   ec.where   = VG_(get_ExeContext)( False );
+   ec.ekind   = AddrErr;
+   ec.axskind = isWrite ? WriteAxs : ReadAxs;
+   ec.size    = size;
+   ec.addr    = a;
+   VG_(describe_addr) ( a, &ec.addrinfo );
+   VG_(maybe_add_context) ( &ec );
+}
+
+void VG_(record_jump_error) ( Addr a )
+{
+   ErrContext ec;
+   clear_ErrContext( &ec );
+   ec.count   = 1;
+   ec.next    = NULL;
+   ec.where   = VG_(get_ExeContext)( False );
+   ec.ekind   = AddrErr;
+   ec.axskind = ExecAxs;
+   ec.addr    = a;
+   VG_(describe_addr) ( a, &ec.addrinfo );
+   VG_(maybe_add_context) ( &ec );
+}
+
+void VG_(record_free_error) ( Addr a )
+{
+   ErrContext ec;
+   clear_ErrContext( &ec );
+   ec.count   = 1;
+   ec.next    = NULL;
+   ec.where   = VG_(get_ExeContext)( True );
+   ec.ekind   = FreeErr;
+   ec.addr    = a;
+   VG_(describe_addr) ( a, &ec.addrinfo );
+   VG_(maybe_add_context) ( &ec );
+}
+
+void VG_(record_freemismatch_error) ( Addr a )
+{
+   ErrContext ec;
+   clear_ErrContext( &ec );
+   ec.count   = 1;
+   ec.next    = NULL;
+   ec.where   = VG_(get_ExeContext)( True );
+   ec.ekind   = FreeMismatchErr;
+   ec.addr    = a;
+   VG_(describe_addr) ( a, &ec.addrinfo );
+   VG_(maybe_add_context) ( &ec );
+}
+
+void VG_(record_param_err) ( Addr a, Bool isWriteLack, Char* msg )
+{
+   ErrContext ec;
+   clear_ErrContext( &ec );
+   ec.count   = 1;
+   ec.next    = NULL;
+   ec.where   = VG_(get_ExeContext)( False );
+   ec.ekind   = ParamErr;
+   ec.addr    = a;
+   VG_(describe_addr) ( a, &ec.addrinfo );
+   ec.syscall_param = msg;
+   ec.isWriteableLack = isWriteLack;
+   VG_(maybe_add_context) ( &ec );
+}
+
+
+void VG_(record_user_err) ( Addr a, Bool isWriteLack )
+{
+   ErrContext ec;
+   clear_ErrContext( &ec );
+   ec.count   = 1;
+   ec.next    = NULL;
+   ec.where   = VG_(get_ExeContext)( False );
+   ec.ekind   = UserErr;
+   ec.addr    = a;
+   VG_(describe_addr) ( a, &ec.addrinfo );
+   ec.isWriteableLack = isWriteLack;
+   VG_(maybe_add_context) ( &ec );
+}
+
+
+void VG_(show_all_errors) ( void )
+{
+   Int         i, n_min;
+   Int         n_err_contexts, n_supp_contexts;
+   ErrContext  *p, *p_min;
+   Suppression *su;
+   Bool        any_supp;
+
+   if (VG_(clo_verbosity) == 0)
+      return;
+
+   n_err_contexts = 0;
+   for (p = vg_err_contexts; p != NULL; p = p->next) {
+      if (p->supp == NULL)
+         n_err_contexts++;
+   }
+
+   n_supp_contexts = 0;
+   for (su = vg_suppressions; su != NULL; su = su->next) {
+      if (su->count > 0)
+         n_supp_contexts++;
+   }
+
+   VG_(message)(Vg_UserMsg,
+                "ERROR SUMMARY: "
+                "%d errors from %d contexts (suppressed: %d from %d)",
+                vg_n_errs_found, n_err_contexts, 
+                vg_n_errs_suppressed, n_supp_contexts );
+
+   if (VG_(clo_verbosity) <= 1)
+      return;
+
+   /* Print the contexts in order of increasing error count. */
+   for (i = 0; i < n_err_contexts; i++) {
+      n_min = (1 << 30) - 1;
+      p_min = NULL;
+      for (p = vg_err_contexts; p != NULL; p = p->next) {
+         if (p->supp != NULL) continue;
+         if (p->count < n_min) {
+            n_min = p->count;
+            p_min = p;
+         }
+      }
+      if (p_min == NULL) VG_(panic)("pp_AllErrContexts");
+
+      VG_(message)(Vg_UserMsg, "");
+      VG_(message)(Vg_UserMsg, "%d errors in context %d of %d:",
+                   p_min->count,
+                   i+1, n_err_contexts);
+      pp_ErrContext( p_min, False );
+
+      if ((i+1 == VG_(clo_dump_error))) {
+         VG_(translate) ( p_min->where->eips[0], NULL, NULL, NULL );
+      }
+
+      p_min->count = 1 << 30;
+   } 
+
+   if (n_supp_contexts > 0) 
+      VG_(message)(Vg_DebugMsg, "");
+   any_supp = False;
+   for (su = vg_suppressions; su != NULL; su = su->next) {
+      if (su->count > 0) {
+         any_supp = True;
+         VG_(message)(Vg_DebugMsg, "supp: %4d %s", su->count, 
+                                   su->sname);
+      }
+   }
+
+   if (n_err_contexts > 0) {
+      if (any_supp) 
+         VG_(message)(Vg_UserMsg, "");
+      VG_(message)(Vg_UserMsg,
+                   "IN SUMMARY: "
+                   "%d errors from %d contexts (suppressed: %d from %d)",
+                   vg_n_errs_found, n_err_contexts, 
+                   vg_n_errs_suppressed,
+                   n_supp_contexts );
+      VG_(message)(Vg_UserMsg, "");
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Standard suppressions                                ---*/
+/*------------------------------------------------------------*/
+
+/* Get a non-blank, non-comment line of at most nBuf chars from fd.
+   Skips leading spaces on the line. Return True if EOF was hit instead. 
+*/
+
+#define VG_ISSPACE(ch) (((ch)==' ') || ((ch)=='\n') || ((ch)=='\t'))
+
+static Bool getLine ( Int fd, Char* buf, Int nBuf )
+{
+   Char ch;
+   Int  n, i;
+   while (True) {
+      /* First, read until a non-blank char appears. */
+      while (True) {
+         n = VG_(read)(fd, &ch, 1);
+         if (n == 1 && !VG_ISSPACE(ch)) break;
+         if (n == 0) return True;
+      }
+
+      /* Now, read the line into buf. */
+      i = 0;
+      buf[i++] = ch; buf[i] = 0;
+      while (True) {
+         n = VG_(read)(fd, &ch, 1);
+         if (n == 0) return False; /* the next call will return True */
+         if (ch == '\n') break;
+         if (i > 0 && i == nBuf-1) i--;
+         buf[i++] = ch; buf[i] = 0;
+      }
+      while (i > 1 && VG_ISSPACE(buf[i-1])) { 
+         i--; buf[i] = 0; 
+      };
+
+      /* VG_(printf)("The line is `%s'\n", buf); */
+      /* Ok, we have a line.  If a non-comment line, return.
+         If a comment line, start all over again. */
+      if (buf[0] != '#') return False;
+   }
+}
+
+
+/* *p_caller contains the raw name of a caller, supposedly either
+       fun:some_function_name   or
+       obj:some_object_name.
+   Set *p_ty accordingly and advance *p_caller over the descriptor
+   (fun: or obj:) part.
+   Returns False if failed.
+*/
+static Bool setLocationTy ( Char** p_caller, SuppressionLocTy* p_ty )
+{
+   if (VG_(strncmp)(*p_caller, "fun:", 4) == 0) {
+      (*p_caller) += 4;
+      *p_ty = FunName;
+      return True;
+   }
+   if (VG_(strncmp)(*p_caller, "obj:", 4) == 0) {
+      (*p_caller) += 4;
+      *p_ty = ObjName;
+      return True;
+   }
+   VG_(printf)("location should start with fun: or obj:\n");
+   return False;
+}
+
+
+/* Read suppressions from the file specified in vg_clo_suppressions
+   and place them in the suppressions list.  If there's any difficulty
+   doing this, just give up -- there's no point in trying to recover.  
+*/
+#define STREQ(s1,s2) (s1 != NULL && s2 != NULL \
+                      && VG_(strcmp)((s1),(s2))==0)
+
+static Char* copyStr ( Char* str )
+{
+   Int   n, i;
+   Char* str2;
+   n    = VG_(strlen)(str);
+   str2 = VG_(malloc)(VG_AR_PRIVATE, n+1);
+   vg_assert(n > 0);
+   for (i = 0; i < n+1; i++) str2[i] = str[i];
+   return str2;
+}
+
+static void load_one_suppressions_file ( Char* filename )
+{
+#  define N_BUF 200
+   Int  fd;
+   Bool eof;
+   Char buf[N_BUF+1];
+   fd = VG_(open_read)( filename );
+   if (fd == -1) {
+      VG_(message)(Vg_UserMsg, 
+                   "FATAL: can't open suppressions file `%s'", 
+                   filename );
+      VG_(exit)(1);
+   }
+
+   while (True) {
+      Suppression* supp;
+      supp = VG_(malloc)(VG_AR_PRIVATE, sizeof(Suppression));
+      supp->count = 0;
+      supp->param = supp->caller0 = supp->caller1 
+                  = supp->caller2 = supp->caller3 = NULL;
+
+      eof = getLine ( fd, buf, N_BUF );
+      if (eof) break;
+
+      if (!STREQ(buf, "{")) goto syntax_error;
+      
+      eof = getLine ( fd, buf, N_BUF );
+      if (eof || STREQ(buf, "}")) goto syntax_error;
+      supp->sname = copyStr(buf);
+
+      eof = getLine ( fd, buf, N_BUF );
+      if (eof) goto syntax_error;
+      else if (STREQ(buf, "Param"))  supp->skind = Param;
+      else if (STREQ(buf, "Value0")) supp->skind = Value0;
+      else if (STREQ(buf, "Value1")) supp->skind = Value1;
+      else if (STREQ(buf, "Value2")) supp->skind = Value2;
+      else if (STREQ(buf, "Value4")) supp->skind = Value4;
+      else if (STREQ(buf, "Value8")) supp->skind = Value8;
+      else if (STREQ(buf, "Addr1"))  supp->skind = Addr1;
+      else if (STREQ(buf, "Addr2"))  supp->skind = Addr2;
+      else if (STREQ(buf, "Addr4"))  supp->skind = Addr4;
+      else if (STREQ(buf, "Addr8"))  supp->skind = Addr8;
+      else if (STREQ(buf, "Free"))   supp->skind = FreeS;
+      else goto syntax_error;
+
+      if (supp->skind == Param) {
+         eof = getLine ( fd, buf, N_BUF );
+         if (eof) goto syntax_error;
+         supp->param = copyStr(buf);
+      }
+
+      eof = getLine ( fd, buf, N_BUF );
+      if (eof) goto syntax_error;
+      supp->caller0 = copyStr(buf);
+      if (!setLocationTy(&(supp->caller0), &(supp->caller0_ty)))
+         goto syntax_error;
+
+      eof = getLine ( fd, buf, N_BUF );
+      if (eof) goto syntax_error;
+      supp->caller1 = copyStr(buf);
+      if (!setLocationTy(&(supp->caller1), &(supp->caller1_ty)))
+         goto syntax_error;
+      
+      eof = getLine ( fd, buf, N_BUF );
+      if (eof) goto syntax_error;
+      if (!STREQ(buf, "}")) {
+         supp->caller2 = copyStr(buf);
+         if (!setLocationTy(&(supp->caller2), &(supp->caller2_ty)))
+            goto syntax_error;
+         eof = getLine ( fd, buf, N_BUF );
+         if (eof) goto syntax_error;
+         if (!STREQ(buf, "}")) {
+            supp->caller3 = copyStr(buf);
+            if (!setLocationTy(&(supp->caller3), &(supp->caller3_ty)))
+               goto syntax_error;
+            eof = getLine ( fd, buf, N_BUF );
+            if (eof || !STREQ(buf, "}")) goto syntax_error;
+         }
+      }
+
+      supp->next = vg_suppressions;
+      vg_suppressions = supp;
+   }
+
+   VG_(close)(fd);
+   return;
+
+  syntax_error:
+   if (eof) {
+      VG_(message)(Vg_UserMsg, 
+                   "FATAL: in suppressions file `%s': unexpected EOF", 
+                   filename );
+   } else {
+      VG_(message)(Vg_UserMsg, 
+                   "FATAL: in suppressions file `%s': syntax error on: %s", 
+                   filename, buf );
+   }
+   VG_(close)(fd);
+   VG_(message)(Vg_UserMsg, "exiting now.");
+    VG_(exit)(1);
+
+#  undef N_BUF   
+}
+
+
+void VG_(load_suppressions) ( void )
+{
+   Int i;
+   vg_suppressions = NULL;
+   for (i = 0; i < VG_(clo_n_suppressions); i++) {
+      if (VG_(clo_verbosity) > 1) {
+         VG_(message)(Vg_UserMsg, "Reading suppressions file: %s", 
+                                  VG_(clo_suppressions)[i] );
+      }
+      load_one_suppressions_file( VG_(clo_suppressions)[i] );
+   }
+}
+
+
+/* Does an error context match a suppression?  ie is this a
+   suppressible error?  If so, return a pointer to the Suppression
+   record, otherwise NULL.
+   Tries to minimise the number of calls to what_fn_is_this since they
+   are expensive.  
+*/
+static Suppression* is_suppressible_error ( ErrContext* ec )
+{
+#  define STREQ(s1,s2) (s1 != NULL && s2 != NULL \
+                        && VG_(strcmp)((s1),(s2))==0)
+
+   Char caller0_obj[M_VG_ERRTXT];
+   Char caller0_fun[M_VG_ERRTXT];
+   Char caller1_obj[M_VG_ERRTXT];
+   Char caller1_fun[M_VG_ERRTXT];
+   Char caller2_obj[M_VG_ERRTXT];
+   Char caller2_fun[M_VG_ERRTXT];
+   Char caller3_obj[M_VG_ERRTXT];
+   Char caller3_fun[M_VG_ERRTXT];
+
+   Suppression* su;
+   Int          su_size;
+
+   /* vg_what_fn_or_object_is_this returns:
+         <function_name>      or
+         <object_name>        or
+         ???
+      so the strings in the suppression file should match these.
+   */
+
+   /* Initialise these strs so they are always safe to compare, even
+      if what_fn_or_object_is_this doesn't write anything to them. */
+   caller0_obj[0] = caller1_obj[0] = caller2_obj[0] = caller3_obj[0] = 0;
+   caller0_fun[0] = caller1_fun[0] = caller2_obj[0] = caller3_obj[0] = 0;
+
+   VG_(what_obj_and_fun_is_this)
+      ( ec->where->eips[0], caller0_obj, M_VG_ERRTXT,
+                            caller0_fun, M_VG_ERRTXT );
+   VG_(what_obj_and_fun_is_this)
+      ( ec->where->eips[1], caller1_obj, M_VG_ERRTXT,
+                            caller1_fun, M_VG_ERRTXT );
+
+   if (VG_(clo_backtrace_size) > 2) {
+      VG_(what_obj_and_fun_is_this)
+         ( ec->where->eips[2], caller2_obj, M_VG_ERRTXT,
+                               caller2_fun, M_VG_ERRTXT );
+
+      if (VG_(clo_backtrace_size) > 3) {
+         VG_(what_obj_and_fun_is_this)
+            ( ec->where->eips[3], caller3_obj, M_VG_ERRTXT,
+                                  caller3_fun, M_VG_ERRTXT );
+      }
+   }
+
+   /* See if the error context matches any suppression. */
+   for (su = vg_suppressions; su != NULL; su = su->next) {
+      switch (su->skind) {
+         case FreeS:
+         case Param:  case Value0: su_size = 0; break;
+         case Value1: case Addr1:  su_size = 1; break;
+         case Value2: case Addr2:  su_size = 2; break;
+         case Value4: case Addr4:  su_size = 4; break;
+         case Value8: case Addr8:  su_size = 8; break;
+         default: VG_(panic)("errcontext_matches_suppression");
+      }
+      switch (su->skind) {
+         case Param:
+            if (ec->ekind != ParamErr) continue;
+            if (!STREQ(su->param, ec->syscall_param)) continue;
+            break;
+         case Value0: case Value1: case Value2: case Value4: case Value8:
+            if (ec->ekind != ValueErr) continue;
+            if (ec->size  != su_size)  continue;
+            break;
+         case Addr1: case Addr2: case Addr4: case Addr8:
+            if (ec->ekind != AddrErr) continue;
+            if (ec->size  != su_size) continue;
+            break;
+         case FreeS:
+            if (ec->ekind != FreeErr && ec->ekind != FreeMismatchErr) continue;
+            break;
+      }
+
+      switch (su->caller0_ty) {
+         case ObjName: if (!VG_(stringMatch)(su->caller0, 
+                                             caller0_obj)) continue;
+                       break;
+         case FunName: if (!VG_(stringMatch)(su->caller0, 
+                                             caller0_fun)) continue;
+                       break;
+         default: goto baaaad;
+      }
+
+      switch (su->caller1_ty) {
+         case ObjName: if (!VG_(stringMatch)(su->caller1, 
+                                             caller1_obj)) continue;
+                       break;
+         case FunName: if (!VG_(stringMatch)(su->caller1, 
+                                             caller1_fun)) continue;
+                       break;
+         default: goto baaaad;
+      }
+
+      if (VG_(clo_backtrace_size) > 2 && su->caller2 != NULL) {
+         switch (su->caller2_ty) {
+            case ObjName: if (!VG_(stringMatch)(su->caller2, 
+                                                caller2_obj)) continue;
+                          break;
+            case FunName: if (!VG_(stringMatch)(su->caller2, 
+                                                caller2_fun)) continue;
+                          break;
+            default: goto baaaad;
+         }
+      }
+
+      if (VG_(clo_backtrace_size) > 3 && su->caller3 != NULL) {
+         switch (su->caller3_ty) {
+            case ObjName: if (!VG_(stringMatch)(su->caller3,
+                                                caller3_obj)) continue;
+                          break;
+            case FunName: if (!VG_(stringMatch)(su->caller3, 
+                                                caller3_fun)) continue;
+                          break;
+            default: goto baaaad;
+         }
+      }
+
+      return su;
+   }
+
+   return NULL;
+
+  baaaad:
+   VG_(panic)("is_suppressible_error");
+
+#  undef STREQ
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                          vg_errcontext.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_execontext.c b/coregrind/vg_execontext.c
new file mode 100644
index 000000000..759345b7a
--- /dev/null
+++ b/coregrind/vg_execontext.c
@@ -0,0 +1,259 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Storage, and equality on, execution contexts (backtraces).   ---*/
+/*---                                              vg_execontext.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+#include "vg_constants.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Low-level ExeContext storage.                        ---*/
+/*------------------------------------------------------------*/
+
+/* The idea is only to ever store any one context once, so as to save
+   space and make exact comparisons faster. */
+
+static ExeContext* vg_ec_list[VG_N_EC_LISTS];
+
+/* Stats only: the number of times the system was searched to locate a
+   context. */
+static UInt vg_ec_searchreqs;
+
+/* Stats only: the number of full context comparisons done. */
+static UInt vg_ec_searchcmps;
+
+/* Stats only: total number of stored contexts. */
+static UInt vg_ec_totstored;
+
+/* Number of 2, 4 and (fast) full cmps done. */
+static UInt vg_ec_cmp2s;
+static UInt vg_ec_cmp4s;
+static UInt vg_ec_cmpAlls;
+
+
+/*------------------------------------------------------------*/
+/*--- Exported functions.                                  ---*/
+/*------------------------------------------------------------*/
+
+
+/* Initialise this subsystem. */
+void VG_(init_ExeContext_storage) ( void )
+{
+   Int i;
+   vg_ec_searchreqs = 0;
+   vg_ec_searchcmps = 0;
+   vg_ec_totstored = 0;
+   vg_ec_cmp2s = 0;
+   vg_ec_cmp4s = 0;
+   vg_ec_cmpAlls = 0;
+   for (i = 0; i < VG_N_EC_LISTS; i++)
+      vg_ec_list[i] = NULL;
+}
+
+
+/* Show stats. */
+void VG_(show_ExeContext_stats) ( void )
+{
+   VG_(message)(Vg_DebugMsg, 
+      "exectx: %d lists, %d contexts (avg %d per list)",
+      VG_N_EC_LISTS, vg_ec_totstored, 
+      vg_ec_totstored / VG_N_EC_LISTS 
+   );
+   VG_(message)(Vg_DebugMsg, 
+      "exectx: %d searches, %d full compares (%d per 1000)",
+      vg_ec_searchreqs, vg_ec_searchcmps, 
+      vg_ec_searchreqs == 0 
+         ? 0 
+         : (UInt)( (((ULong)vg_ec_searchcmps) * 1000) 
+           / ((ULong)vg_ec_searchreqs )) 
+   );
+   VG_(message)(Vg_DebugMsg, 
+      "exectx: %d cmp2, %d cmp4, %d cmpAll",
+      vg_ec_cmp2s, vg_ec_cmp4s, vg_ec_cmpAlls 
+   );
+}
+
+
+/* Print an ExeContext. */
+void VG_(pp_ExeContext) ( ExeContext* e )
+{
+   VG_(mini_stack_dump) ( e );
+}
+
+
+/* Compare two ExeContexts, comparing all callers. */
+Bool VG_(eq_ExeContext_all) ( ExeContext* e1, ExeContext* e2 )
+{
+   vg_ec_cmpAlls++;
+   /* Just do pointer comparison. */
+   if (e1 != e2) return False;
+   return True;
+}
+
+
+/* Compare two ExeContexts, just comparing the top two callers. */
+Bool VG_(eq_ExeContext_top2) ( ExeContext* e1, ExeContext* e2 )
+{
+   vg_ec_cmp2s++;
+   if (e1->eips[0] != e2->eips[0]
+       || e1->eips[1] != e2->eips[1]) return False;
+   return True;
+}
+
+
+/* Compare two ExeContexts, just comparing the top four callers. */
+Bool VG_(eq_ExeContext_top4) ( ExeContext* e1, ExeContext* e2 )
+{
+   vg_ec_cmp4s++;
+   if (e1->eips[0] != e2->eips[0]
+       || e1->eips[1] != e2->eips[1]) return False;
+
+   if (VG_(clo_backtrace_size) < 3) return True;
+   if (e1->eips[2] != e2->eips[2]) return False;
+
+   if (VG_(clo_backtrace_size) < 4) return True;
+   if (e1->eips[3] != e2->eips[3]) return False;
+
+   return True;
+}
+
+
+/* This guy is the head honcho here.  Take a snapshot of the client's
+   stack.  Search our collection of ExeContexts to see if we already
+   have it, and if not, allocate a new one.  Either way, return a
+   pointer to the context.  If there is a matching context we
+   guarantee to not allocate a new one.  Thus we never store
+   duplicates, and so exact equality can be quickly done as equality
+   on the returned ExeContext* values themselves.  Inspired by Hugs's
+   Text type.  
+*/
+ExeContext* VG_(get_ExeContext) ( Bool skip_top_frame )
+{
+   Int         i;
+   UInt        ebp;
+   Addr        eips[VG_DEEPEST_BACKTRACE];
+   Bool        same;
+   UInt        hash;
+   ExeContext* new_ec;
+   ExeContext* list;
+
+   VGP_PUSHCC(VgpExeContext);
+
+   vg_assert(VG_(clo_backtrace_size) >= 2 
+             && VG_(clo_backtrace_size) <= VG_DEEPEST_BACKTRACE);
+
+   /* First snaffle %EIPs from the client's stack into eips[0
+      .. VG_(clo_backtrace_size)-1], putting zeroes in when the trail
+      goes cold. */
+
+   for (i = 0; i < VG_(clo_backtrace_size); i++)
+      eips[i] = 0;
+   
+#  define GET_CALLER(lval)                                        \
+   if (ebp != 0 && VGM_(check_readable)(ebp, 8, NULL)) {          \
+      lval = ((UInt*)ebp)[1];  /* ret addr */                     \
+      ebp  = ((UInt*)ebp)[0];  /* old ebp */                      \
+   } else {                                                       \
+      lval = ebp = 0;                                             \
+   }
+
+   ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
+
+   if (skip_top_frame) {
+      for (i = 0; i < VG_(clo_backtrace_size); i++)
+         GET_CALLER(eips[i]);
+   } else {
+      eips[0] = VG_(baseBlock)[VGOFF_(m_eip)];
+      for (i = 1; i < VG_(clo_backtrace_size); i++)
+         GET_CALLER(eips[i]);
+   }
+#  undef GET_CALLER
+
+   /* Now figure out if we've seen this one before.  First hash it so
+      as to determine the list number. */
+
+   hash = 0;
+   for (i = 0; i < VG_(clo_backtrace_size); i++) {
+      hash ^= (UInt)eips[i];
+      hash = (hash << 29) | (hash >> 3);
+   }
+   hash = hash % VG_N_EC_LISTS;
+
+   /* And (the expensive bit) look a matching entry in the list. */
+
+   vg_ec_searchreqs++;
+
+   list = vg_ec_list[hash];
+
+   while (True) {
+      if (list == NULL) break;
+      vg_ec_searchcmps++;
+      same = True;
+      for (i = 0; i < VG_(clo_backtrace_size); i++) {
+         if (list->eips[i] != eips[i]) {
+            same = False;
+            break; 
+         }
+      }
+      if (same) break;
+      list = list->next;
+   }
+
+   if (list != NULL) {
+      /* Yay!  We found it.  */
+      VGP_POPCC;
+      return list;
+   }
+
+   /* Bummer.  We have to allocate a new context record. */
+   vg_ec_totstored++;
+
+   new_ec 
+      = VG_(malloc)( 
+           VG_AR_EXECTXT, 
+           sizeof(struct _ExeContextRec *) 
+              + VG_(clo_backtrace_size) * sizeof(Addr) 
+        );
+
+   for (i = 0; i < VG_(clo_backtrace_size); i++)
+      new_ec->eips[i] = eips[i];
+
+   new_ec->next = vg_ec_list[hash];
+   vg_ec_list[hash] = new_ec;
+
+   VGP_POPCC;
+   return new_ec;
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                          vg_execontext.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_from_ucode.c b/coregrind/vg_from_ucode.c
new file mode 100644
index 000000000..4d0429355
--- /dev/null
+++ b/coregrind/vg_from_ucode.c
@@ -0,0 +1,2682 @@
+
+/*--------------------------------------------------------------------*/
+/*--- The JITter: translate ucode back to x86 code.                ---*/
+/*---                                              vg_from_ucode.c ---*/
+/*--------------------------------------------------------------------*/
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Renamings of frequently-used global functions.       ---*/
+/*------------------------------------------------------------*/
+
+#define dis       VG_(disassemble)
+#define nameIReg  VG_(nameOfIntReg)
+#define nameISize VG_(nameOfIntSize)
+
+
+/*------------------------------------------------------------*/
+/*--- Instruction emission -- turning final uinstrs back   ---*/
+/*--- into x86 code.                                       ---*/
+/*------------------------------------------------------------*/
+
+/* [2001-07-08 This comment is now somewhat out of date.]
+
+   This is straightforward but for one thing: to facilitate generating
+   code in a single pass, we generate position-independent code.  To
+   do this, calls and jmps to fixed addresses must specify the address
+   by first loading it into a register, and jump to/call that
+   register.  Fortunately, the only jump to a literal is the jump back
+   to vg_dispatch, and only %eax is live then, conveniently.  Ucode
+   call insns may only have a register as target anyway, so there's no
+   need to do anything fancy for them.
+
+   The emit_* routines constitute the lowest level of instruction
+   emission.  They simply emit the sequence of bytes corresponding to
+   the relevant instruction, with no further ado.  In particular there
+   is no checking about whether uses of byte registers makes sense,
+   nor whether shift insns have their first operand in %cl, etc.
+
+   These issues are taken care of by the level above, the synth_*
+   routines.  These detect impossible operand combinations and turn
+   them into sequences of legal instructions.  Finally, emitUInstr is
+   phrased in terms of the synth_* abstraction layer.  */
+
+static UChar* emitted_code;
+static Int    emitted_code_used;
+static Int    emitted_code_size;
+
+static void expandEmittedCode ( void )
+{
+   Int    i;
+   UChar* tmp = VG_(jitmalloc)(2 * emitted_code_size);
+   /* VG_(printf)("expand to %d\n", 2 * emitted_code_size); */
+   for (i = 0; i < emitted_code_size; i++)
+      tmp[i] = emitted_code[i];
+   VG_(jitfree)(emitted_code);
+   emitted_code = tmp;
+   emitted_code_size *= 2;
+}
+
+static __inline__ void emitB ( UInt b )
+{
+   if (dis) {
+      if (b < 16) VG_(printf)("0%x ", b); else VG_(printf)("%2x ", b);
+   }
+   if (emitted_code_used == emitted_code_size)
+      expandEmittedCode();
+
+   emitted_code[emitted_code_used] = (UChar)b;
+   emitted_code_used++;
+}
+
+static __inline__ void emitW ( UInt l )
+{
+   emitB ( (l) & 0x000000FF );
+   emitB ( (l >> 8) & 0x000000FF );
+}
+
+static __inline__ void emitL ( UInt l )
+{
+   emitB ( (l) & 0x000000FF );
+   emitB ( (l >> 8) & 0x000000FF );
+   emitB ( (l >> 16) & 0x000000FF );
+   emitB ( (l >> 24) & 0x000000FF );
+}
+
+static __inline__ void newEmit ( void )
+{
+   if (dis)
+      VG_(printf)("\t       %4d: ", emitted_code_used );
+}
+
+/* Is this a callee-save register, in the normal C calling convention?  */
+#define VG_CALLEE_SAVED(reg) (reg == R_EBX || reg == R_ESI || reg == R_EDI)
+
+
+/*----------------------------------------------------*/
+/*--- Addressing modes                             ---*/
+/*----------------------------------------------------*/
+
+static __inline__ UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem )
+{
+   return ((mod & 3) << 6) | ((reg & 7) << 3) | (regmem & 7);
+}
+
+static __inline__ UChar mkSIB ( Int scale, Int regindex, Int regbase )
+{
+   Int shift;
+   switch (scale) {
+      case 1: shift = 0; break;
+      case 2: shift = 1; break;
+      case 4: shift = 2; break;
+      case 8: shift = 3; break;
+      default: VG_(panic)( "mkSIB" );
+   }
+   return ((shift & 3) << 6) | ((regindex & 7) << 3) | (regbase & 7);
+}
+
+static __inline__ void emit_amode_litmem_reg ( Addr addr, Int reg )
+{
+   /* ($ADDR), reg */
+   emitB ( mkModRegRM(0, reg, 5) );
+   emitL ( addr );
+}
+
+static __inline__ void emit_amode_regmem_reg ( Int regmem, Int reg )
+{
+   /* (regmem), reg */
+   if (regmem == R_ESP) 
+      VG_(panic)("emit_amode_regmem_reg");
+   if (regmem == R_EBP) {
+      emitB ( mkModRegRM(1, reg, 5) );
+      emitB ( 0x00 );
+   } else {
+      emitB( mkModRegRM(0, reg, regmem) );
+   }
+}
+
+static __inline__ void emit_amode_offregmem_reg ( Int off, Int regmem, Int reg )
+{
+   if (regmem == R_ESP)
+      VG_(panic)("emit_amode_offregmem_reg(ESP)");
+   if (off < -128 || off > 127) {
+      /* Use a large offset */
+      /* d32(regmem), reg */
+      emitB ( mkModRegRM(2, reg, regmem) );
+      emitL ( off );
+   } else {
+      /* d8(regmem), reg */
+      emitB ( mkModRegRM(1, reg, regmem) );
+      emitB ( off & 0xFF );
+   }
+}
+
+static __inline__ void emit_amode_sib_reg ( Int off, Int scale, Int regbase, 
+                                            Int regindex, Int reg )
+{
+   if (regindex == R_ESP)
+      VG_(panic)("emit_amode_sib_reg(ESP)");
+   if (off < -128 || off > 127) {
+      /* Use a 32-bit offset */
+      emitB ( mkModRegRM(2, reg, 4) ); /* SIB with 32-bit displacement */
+      emitB ( mkSIB( scale, regindex, regbase ) );
+      emitL ( off );
+   } else {
+      /* Use an 8-bit offset */
+      emitB ( mkModRegRM(1, reg, 4) ); /* SIB with 8-bit displacement */
+      emitB ( mkSIB( scale, regindex, regbase ) );
+      emitB ( off & 0xFF );
+   }
+}
+
+static __inline__ void emit_amode_ereg_greg ( Int e_reg, Int g_reg )
+{
+   /* other_reg, reg */
+   emitB ( mkModRegRM(3, g_reg, e_reg) );
+}
+
+static __inline__ void emit_amode_greg_ereg ( Int g_reg, Int e_reg )
+{
+   /* other_reg, reg */
+   emitB ( mkModRegRM(3, g_reg, e_reg) );
+}
+
+
+/*----------------------------------------------------*/
+/*--- Opcode translation                           ---*/
+/*----------------------------------------------------*/
+
+static __inline__ Int mkGrp1opcode ( Opcode opc )
+{
+   switch (opc) {
+      case ADD: return 0;
+      case OR:  return 1;
+      case ADC: return 2;
+      case SBB: return 3;
+      case AND: return 4;
+      case SUB: return 5;
+      case XOR: return 6;
+      default: VG_(panic)("mkGrp1opcode");
+   }
+}
+
+static __inline__ Int mkGrp2opcode ( Opcode opc )
+{
+   switch (opc) {
+      case ROL: return 0;
+      case ROR: return 1;
+      case RCL: return 2;
+      case RCR: return 3;
+      case SHL: return 4;
+      case SHR: return 5;
+      case SAR: return 7;
+      default: VG_(panic)("mkGrp2opcode");
+   }
+}
+
+static __inline__ Int mkGrp3opcode ( Opcode opc )
+{
+   switch (opc) {
+      case NOT: return 2;
+      case NEG: return 3;
+      default: VG_(panic)("mkGrp3opcode");
+   }
+}
+
+static __inline__ Int mkGrp4opcode ( Opcode opc )
+{
+   switch (opc) {
+      case INC: return 0;
+      case DEC: return 1;
+      default: VG_(panic)("mkGrp4opcode");
+   }
+}
+
+static __inline__ Int mkGrp5opcode ( Opcode opc )
+{
+   switch (opc) {
+      case CALLM: return 2;
+      case JMP:   return 4;
+      default: VG_(panic)("mkGrp5opcode");
+   }
+}
+
+static __inline__ UChar mkPrimaryOpcode ( Opcode opc )
+{
+   switch (opc) {
+      case ADD: return 0x00;
+      case ADC: return 0x10;
+      case AND: return 0x20;
+      case XOR: return 0x30;
+      case OR:  return 0x08;
+      case SBB: return 0x18;
+      case SUB: return 0x28;
+      default: VG_(panic)("mkPrimaryOpcode");
+  }
+}
+
+/*----------------------------------------------------*/
+/*--- v-size (4, or 2 with OSO) insn emitters      ---*/
+/*----------------------------------------------------*/
+
+static void emit_movv_offregmem_reg ( Int sz, Int off, Int areg, Int reg )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   emitB ( 0x8B ); /* MOV Ev, Gv */
+   emit_amode_offregmem_reg ( off, areg, reg );
+   if (dis)
+      VG_(printf)( "\n\t\tmov%c\t0x%x(%s), %s\n", 
+                   nameISize(sz), off, nameIReg(4,areg), nameIReg(sz,reg));
+}
+
+static void emit_movv_reg_offregmem ( Int sz, Int reg, Int off, Int areg )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   emitB ( 0x89 ); /* MOV Gv, Ev */
+   emit_amode_offregmem_reg ( off, areg, reg );
+   if (dis)
+      VG_(printf)( "\n\t\tmov%c\t%s, 0x%x(%s)\n", 
+                   nameISize(sz), nameIReg(sz,reg), off, nameIReg(4,areg));
+}
+
+static void emit_movv_regmem_reg ( Int sz, Int reg1, Int reg2 )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   emitB ( 0x8B ); /* MOV Ev, Gv */
+   emit_amode_regmem_reg ( reg1, reg2 );
+   if (dis)
+      VG_(printf)( "\n\t\tmov%c\t(%s), %s\n",
+                   nameISize(sz),  nameIReg(4,reg1), nameIReg(sz,reg2));
+}
+
+static void emit_movv_reg_regmem ( Int sz, Int reg1, Int reg2 )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   emitB ( 0x89 ); /* MOV Gv, Ev */
+   emit_amode_regmem_reg ( reg2, reg1 );
+   if (dis)
+      VG_(printf)( "\n\t\tmov%c\t%s, (%s)\n", 
+                   nameISize(sz), nameIReg(sz,reg1), nameIReg(4,reg2));
+}
+
+static void emit_movv_reg_reg ( Int sz, Int reg1, Int reg2 )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   emitB ( 0x89 ); /* MOV Gv, Ev */
+   emit_amode_ereg_greg ( reg2, reg1 );
+   if (dis)
+      VG_(printf)( "\n\t\tmov%c\t%s, %s\n", 
+                   nameISize(sz), nameIReg(sz,reg1), nameIReg(sz,reg2));
+}
+
+static void emit_nonshiftopv_lit_reg ( Int sz, Opcode opc, 
+                                       UInt lit, Int reg )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   if (lit == VG_(extend_s_8to32)(lit & 0x000000FF)) {
+      /* short form OK */
+      emitB ( 0x83 ); /* Grp1 Ib,Ev */
+      emit_amode_ereg_greg ( reg, mkGrp1opcode(opc) );
+      emitB ( lit & 0x000000FF );
+   } else {
+      emitB ( 0x81 ); /* Grp1 Iv,Ev */
+      emit_amode_ereg_greg ( reg, mkGrp1opcode(opc) );
+      if (sz == 2) emitW ( lit ); else emitL ( lit );
+   }
+   if (dis)
+      VG_(printf)( "\n\t\t%s%c\t$0x%x, %s\n", 
+                   VG_(nameUOpcode)(False,opc), nameISize(sz), 
+                   lit, nameIReg(sz,reg));
+}
+
+static void emit_shiftopv_lit_reg ( Int sz, Opcode opc, UInt lit, Int reg )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   emitB ( 0xC1 ); /* Grp2 Ib,Ev */
+   emit_amode_ereg_greg ( reg, mkGrp2opcode(opc) );
+   emitB ( lit );
+   if (dis)
+      VG_(printf)( "\n\t\t%s%c\t$%d, %s\n", 
+                   VG_(nameUOpcode)(False,opc), nameISize(sz), 
+                   lit, nameIReg(sz,reg));
+}
+
+static void emit_shiftopv_cl_stack0 ( Int sz, Opcode opc )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   emitB ( 0xD3 ); /* Grp2 CL,Ev */
+   emitB ( mkModRegRM ( 1, mkGrp2opcode(opc), 4 ) );
+   emitB ( 0x24 ); /* a SIB, I think `d8(%esp)' */
+   emitB ( 0x00 ); /* the d8 displacement */
+   if (dis)
+      VG_(printf)("\n\t\t%s%c %%cl, 0(%%esp)\n",
+                  VG_(nameUOpcode)(False,opc), nameISize(sz) );
+}
+
+static void emit_shiftopb_cl_stack0 ( Opcode opc )
+{
+   newEmit();
+   emitB ( 0xD2 ); /* Grp2 CL,Eb */
+   emitB ( mkModRegRM ( 1, mkGrp2opcode(opc), 4 ) );
+   emitB ( 0x24 ); /* a SIB, I think `d8(%esp)' */
+   emitB ( 0x00 ); /* the d8 displacement */
+   if (dis)
+      VG_(printf)("\n\t\t%s%c %%cl, 0(%%esp)\n",
+                  VG_(nameUOpcode)(False,opc), nameISize(1) );
+}
+
+static void emit_nonshiftopv_offregmem_reg ( Int sz, Opcode opc, 
+                                             Int off, Int areg, Int reg )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   emitB ( 3 + mkPrimaryOpcode(opc) ); /* op Ev, Gv */
+   emit_amode_offregmem_reg ( off, areg, reg );
+   if (dis)
+      VG_(printf)( "\n\t\t%s%c\t0x%x(%s), %s\n", 
+                   VG_(nameUOpcode)(False,opc), nameISize(sz),
+                   off, nameIReg(4,areg), nameIReg(sz,reg));
+}
+
+static void emit_nonshiftopv_reg_reg ( Int sz, Opcode opc, 
+                                       Int reg1, Int reg2 )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+#  if 0
+   /* Perfectly correct, but the GNU assembler uses the other form.
+      Therefore we too use the other form, to aid verification. */
+   emitB ( 3 + mkPrimaryOpcode(opc) ); /* op Ev, Gv */
+   emit_amode_ereg_greg ( reg1, reg2 );
+#  else
+   emitB ( 1 + mkPrimaryOpcode(opc) ); /* op Gv, Ev */
+   emit_amode_greg_ereg ( reg1, reg2 );
+#  endif
+   if (dis)
+      VG_(printf)( "\n\t\t%s%c\t%s, %s\n", 
+                   VG_(nameUOpcode)(False,opc), nameISize(sz), 
+                   nameIReg(sz,reg1), nameIReg(sz,reg2));
+}
+
+static void emit_movv_lit_reg ( Int sz, UInt lit, Int reg )
+{
+   if (lit == 0) {
+      emit_nonshiftopv_reg_reg ( sz, XOR, reg, reg );
+      return;
+   }
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   emitB ( 0xB8+reg ); /* MOV imm, Gv */
+   if (sz == 2) emitW ( lit ); else emitL ( lit );
+   if (dis)
+      VG_(printf)( "\n\t\tmov%c\t$0x%x, %s\n", 
+                   nameISize(sz), lit, nameIReg(sz,reg));
+}
+
+static void emit_unaryopv_reg ( Int sz, Opcode opc, Int reg )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   switch (opc) {
+      case NEG:
+         emitB ( 0xF7 );
+         emit_amode_ereg_greg ( reg, mkGrp3opcode(NEG) );
+         if (dis)
+            VG_(printf)( "\n\t\tneg%c\t%s\n", 
+                         nameISize(sz), nameIReg(sz,reg));
+         break;
+      case NOT:
+         emitB ( 0xF7 );
+         emit_amode_ereg_greg ( reg, mkGrp3opcode(NOT) );
+         if (dis)
+            VG_(printf)( "\n\t\tnot%c\t%s\n", 
+                         nameISize(sz), nameIReg(sz,reg));
+         break;
+      case DEC:
+         emitB ( 0x48 + reg );
+         if (dis)
+            VG_(printf)( "\n\t\tdec%c\t%s\n", 
+                         nameISize(sz), nameIReg(sz,reg));
+         break;
+      case INC:
+         emitB ( 0x40 + reg );
+         if (dis)
+            VG_(printf)( "\n\t\tinc%c\t%s\n", 
+                         nameISize(sz), nameIReg(sz,reg));
+         break;
+      default: 
+         VG_(panic)("emit_unaryopv_reg");
+   }
+}
+
+static void emit_pushv_reg ( Int sz, Int reg )
+{
+   newEmit();
+   if (sz == 2) {
+      emitB ( 0x66 ); 
+   } else {
+      vg_assert(sz == 4);
+   }
+   emitB ( 0x50 + reg );
+   if (dis)
+      VG_(printf)("\n\t\tpush%c %s\n", nameISize(sz), nameIReg(sz,reg));
+}
+
+static void emit_popv_reg ( Int sz, Int reg )
+{
+   newEmit();
+   if (sz == 2) {
+      emitB ( 0x66 ); 
+   } else {
+      vg_assert(sz == 4);
+   }
+   emitB ( 0x58 + reg );
+   if (dis)
+      VG_(printf)("\n\t\tpop%c %s\n", nameISize(sz), nameIReg(sz,reg));
+}
+
+static void emit_pushl_lit8 ( Int lit8 )
+{
+   vg_assert(lit8 >= -128 && lit8 < 128);
+   newEmit();
+   emitB ( 0x6A );
+   emitB ( (UChar)((UInt)lit8) );
+   if (dis)
+      VG_(printf)("\n\t\tpushl $%d\n", lit8 );
+}
+
+static void emit_pushl_lit32 ( UInt int32 )
+{
+   newEmit();
+   emitB ( 0x68 );
+   emitL ( int32 );
+   if (dis)
+      VG_(printf)("\n\t\tpushl $0x%x\n", int32 );
+}
+
+static void emit_cmpl_zero_reg ( Int reg )
+{
+   newEmit();
+   emitB ( 0x83 );
+   emit_amode_ereg_greg ( reg, 7 /* Grp 3 opcode for CMP */ );
+   emitB ( 0x00 );
+   if (dis)
+      VG_(printf)("\n\t\tcmpl $0, %s\n", nameIReg(4,reg));
+}
+
+static void emit_swapl_reg_ECX ( Int reg )
+{
+   newEmit();
+   emitB ( 0x87 ); /* XCHG Gv,Ev */
+   emit_amode_ereg_greg ( reg, R_ECX );
+   if (dis) 
+      VG_(printf)("\n\t\txchgl %%ecx, %s\n", nameIReg(4,reg));
+}
+
+static void emit_swapl_reg_EAX ( Int reg )
+{
+   newEmit();
+   emitB ( 0x90 + reg ); /* XCHG Gv,eAX */
+   if (dis) 
+      VG_(printf)("\n\t\txchgl %%eax, %s\n", nameIReg(4,reg));
+}
+
+static void emit_swapl_reg_reg ( Int reg1, Int reg2 )
+{
+   newEmit();
+   emitB ( 0x87 ); /* XCHG Gv,Ev */
+   emit_amode_ereg_greg ( reg1, reg2 );
+   if (dis) 
+      VG_(printf)("\n\t\txchgl %s, %s\n", nameIReg(4,reg1), 
+                  nameIReg(4,reg2));
+}
+
+static void emit_bswapl_reg ( Int reg )
+{
+   newEmit();
+   emitB ( 0x0F );
+   emitB ( 0xC8 + reg ); /* BSWAP r32 */
+   if (dis) 
+      VG_(printf)("\n\t\tbswapl %s\n", nameIReg(4,reg));
+}
+
+static void emit_movl_reg_reg ( Int regs, Int regd )
+{
+   newEmit();
+   emitB ( 0x89 ); /* MOV Gv,Ev */
+   emit_amode_ereg_greg ( regd, regs );
+   if (dis) 
+      VG_(printf)("\n\t\tmovl %s, %s\n", nameIReg(4,regs), nameIReg(4,regd));
+}
+
+static void emit_testv_lit_reg ( Int sz, UInt lit, Int reg )
+{
+   newEmit();
+   if (sz == 2) {
+      emitB ( 0x66 );
+   } else {
+      vg_assert(sz == 4);
+   }
+   emitB ( 0xF7 ); /* Grp3 Ev */
+   emit_amode_ereg_greg ( reg, 0 /* Grp3 subopcode for TEST */ );
+   if (sz == 2) emitW ( lit ); else emitL ( lit );
+   if (dis)
+      VG_(printf)("\n\t\ttest%c $0x%x, %s\n", nameISize(sz), 
+                                            lit, nameIReg(sz,reg));
+}
+
+static void emit_testv_lit_offregmem ( Int sz, UInt lit, Int off, Int reg )
+{
+   newEmit();
+   if (sz == 2) {
+      emitB ( 0x66 );
+   } else {
+      vg_assert(sz == 4);
+   }
+   emitB ( 0xF7 ); /* Grp3 Ev */
+   emit_amode_offregmem_reg ( off, reg, 0 /* Grp3 subopcode for TEST */ );
+   if (sz == 2) emitW ( lit ); else emitL ( lit );
+   if (dis)
+      VG_(printf)("\n\t\ttest%c $%d, 0x%x(%s)\n", 
+                  nameISize(sz), lit, off, nameIReg(4,reg) );
+}
+
+static void emit_movv_lit_offregmem ( Int sz, UInt lit, Int off, Int memreg )
+{
+   newEmit();
+   if (sz == 2) {
+      emitB ( 0x66 );
+   } else {
+      vg_assert(sz == 4);
+   }
+   emitB ( 0xC7 ); /* Grp11 Ev */
+   emit_amode_offregmem_reg ( off, memreg, 0 /* Grp11 subopcode for MOV */ );
+   if (sz == 2) emitW ( lit ); else emitL ( lit );
+   if (dis)
+      VG_(printf)( "\n\t\tmov%c\t$0x%x, 0x%x(%s)\n", 
+                   nameISize(sz), lit, off, nameIReg(4,memreg) );
+}
+
+
+/*----------------------------------------------------*/
+/*--- b-size (1 byte) instruction emitters         ---*/
+/*----------------------------------------------------*/
+
+/* There is some doubt as to whether C6 (Grp 11) is in the
+   486 insn set.  ToDo: investigate. */
+static void emit_movb_lit_offregmem ( UInt lit, Int off, Int memreg )
+{
+   newEmit();
+   emitB ( 0xC6 ); /* Grp11 Eb */
+   emit_amode_offregmem_reg ( off, memreg, 0 /* Grp11 subopcode for MOV */ );
+   emitB ( lit );
+   if (dis)
+      VG_(printf)( "\n\t\tmovb\t$0x%x, 0x%x(%s)\n", 
+                   lit, off, nameIReg(4,memreg) );
+}
+
+static void emit_nonshiftopb_offregmem_reg ( Opcode opc, 
+                                             Int off, Int areg, Int reg )
+{
+   newEmit();
+   emitB ( 2 + mkPrimaryOpcode(opc) ); /* op Eb, Gb */
+   emit_amode_offregmem_reg ( off, areg, reg );
+   if (dis)
+      VG_(printf)( "\n\t\t%sb\t0x%x(%s), %s\n", 
+                   VG_(nameUOpcode)(False,opc), off, nameIReg(4,areg), 
+                   nameIReg(1,reg));
+}
+
+static void emit_movb_reg_offregmem ( Int reg, Int off, Int areg )
+{
+   /* Could do better when reg == %al. */
+   newEmit();
+   emitB ( 0x88 ); /* MOV G1, E1 */
+   emit_amode_offregmem_reg ( off, areg, reg );
+   if (dis)
+      VG_(printf)( "\n\t\tmovb\t%s, 0x%x(%s)\n", 
+                   nameIReg(1,reg), off, nameIReg(4,areg));
+}
+
+static void emit_nonshiftopb_reg_reg ( Opcode opc, Int reg1, Int reg2 )
+{
+   newEmit();
+   emitB ( 2 + mkPrimaryOpcode(opc) ); /* op Eb, Gb */
+   emit_amode_ereg_greg ( reg1, reg2 );
+   if (dis)
+      VG_(printf)( "\n\t\t%sb\t%s, %s\n", 
+                   VG_(nameUOpcode)(False,opc),
+                   nameIReg(1,reg1), nameIReg(1,reg2));
+}
+
+static void emit_movb_reg_regmem ( Int reg1, Int reg2 )
+{
+   newEmit();
+   emitB ( 0x88 ); /* MOV G1, E1 */
+   emit_amode_regmem_reg ( reg2, reg1 );
+   if (dis)
+      VG_(printf)( "\n\t\tmovb\t%s, (%s)\n", nameIReg(1,reg1), 
+                                             nameIReg(4,reg2));
+}
+
+static void emit_nonshiftopb_lit_reg ( Opcode opc, UInt lit, Int reg )
+{
+   newEmit();
+   emitB ( 0x80 ); /* Grp1 Ib,Eb */
+   emit_amode_ereg_greg ( reg, mkGrp1opcode(opc) );
+   emitB ( lit & 0x000000FF );
+   if (dis)
+      VG_(printf)( "\n\t\t%sb\t$0x%x, %s\n", VG_(nameUOpcode)(False,opc),
+                                             lit, nameIReg(1,reg));
+}
+
+static void emit_shiftopb_lit_reg ( Opcode opc, UInt lit, Int reg )
+{
+   newEmit();
+   emitB ( 0xC0 ); /* Grp2 Ib,Eb */
+   emit_amode_ereg_greg ( reg, mkGrp2opcode(opc) );
+   emitB ( lit );
+   if (dis)
+      VG_(printf)( "\n\t\t%sb\t$%d, %s\n", 
+                   VG_(nameUOpcode)(False,opc),
+                   lit, nameIReg(1,reg));
+}
+
+static void emit_unaryopb_reg ( Opcode opc, Int reg )
+{
+   newEmit();
+   switch (opc) {
+      case INC:
+         emitB ( 0xFE );
+         emit_amode_ereg_greg ( reg, mkGrp4opcode(INC) );
+         if (dis)
+            VG_(printf)( "\n\t\tincb\t%s\n", nameIReg(1,reg));
+         break;
+      case DEC:
+         emitB ( 0xFE );
+         emit_amode_ereg_greg ( reg, mkGrp4opcode(DEC) );
+         if (dis)
+            VG_(printf)( "\n\t\tdecb\t%s\n", nameIReg(1,reg));
+         break;
+      case NOT:
+         emitB ( 0xF6 );
+         emit_amode_ereg_greg ( reg, mkGrp3opcode(NOT) );
+         if (dis)
+            VG_(printf)( "\n\t\tnotb\t%s\n", nameIReg(1,reg));
+         break;
+      case NEG:
+         emitB ( 0xF6 );
+         emit_amode_ereg_greg ( reg, mkGrp3opcode(NEG) );
+         if (dis)
+            VG_(printf)( "\n\t\tnegb\t%s\n", nameIReg(1,reg));
+         break;
+      default: 
+         VG_(panic)("emit_unaryopb_reg");
+   }
+}
+
+static void emit_testb_lit_reg ( UInt lit, Int reg )
+{
+   newEmit();
+   emitB ( 0xF6 ); /* Grp3 Eb */
+   emit_amode_ereg_greg ( reg, 0 /* Grp3 subopcode for TEST */ );
+   emitB ( lit );
+   if (dis)
+      VG_(printf)("\n\t\ttestb $0x%x, %s\n", lit, nameIReg(1,reg));
+}
+
+
+/*----------------------------------------------------*/
+/*--- zero-extended load emitters                  ---*/
+/*----------------------------------------------------*/
+
+static void emit_movzbl_offregmem_reg ( Int off, Int regmem, Int reg )
+{
+   newEmit();
+   emitB ( 0x0F ); emitB ( 0xB6 ); /* MOVZBL */
+   emit_amode_offregmem_reg ( off, regmem, reg );
+   if (dis)
+      VG_(printf)( "\n\t\tmovzbl\t0x%x(%s), %s\n", 
+                   off, nameIReg(4,regmem), nameIReg(4,reg));
+}
+
+static void emit_movzbl_regmem_reg ( Int reg1, Int reg2 )
+{
+   newEmit();
+   emitB ( 0x0F ); emitB ( 0xB6 ); /* MOVZBL */
+   emit_amode_regmem_reg ( reg1, reg2 );
+   if (dis)
+      VG_(printf)( "\n\t\tmovzbl\t(%s), %s\n", nameIReg(4,reg1), 
+                                               nameIReg(4,reg2));
+}
+
+static void emit_movzwl_offregmem_reg ( Int off, Int areg, Int reg )
+{
+   newEmit();
+   emitB ( 0x0F ); emitB ( 0xB7 ); /* MOVZWL */
+   emit_amode_offregmem_reg ( off, areg, reg );
+   if (dis)
+      VG_(printf)( "\n\t\tmovzwl\t0x%x(%s), %s\n",
+                   off, nameIReg(4,areg), nameIReg(4,reg));
+}
+
+static void emit_movzwl_regmem_reg ( Int reg1, Int reg2 )
+{
+   newEmit();
+   emitB ( 0x0F ); emitB ( 0xB7 ); /* MOVZWL */
+   emit_amode_regmem_reg ( reg1, reg2 );
+   if (dis)
+      VG_(printf)( "\n\t\tmovzwl\t(%s), %s\n", nameIReg(4,reg1), 
+                                             nameIReg(4,reg2));
+}
+
+/*----------------------------------------------------*/
+/*--- FPU instruction emitters                     ---*/
+/*----------------------------------------------------*/
+
+static void emit_get_fpu_state ( void )
+{
+   Int off = 4 * VGOFF_(m_fpustate);
+   newEmit();
+   emitB ( 0xDD ); emitB ( 0xA5 ); /* frstor d32(%ebp) */
+   emitL ( off );
+   if (dis)
+      VG_(printf)("\n\t\tfrstor\t%d(%%ebp)\n", off );
+}
+
+static void emit_put_fpu_state ( void )
+{
+   Int off = 4 * VGOFF_(m_fpustate);
+   newEmit();
+   emitB ( 0xDD ); emitB ( 0xB5 ); /* fnsave d32(%ebp) */
+   emitL ( off );
+   if (dis)
+      VG_(printf)("\n\t\tfnsave\t%d(%%ebp)\n", off );
+}
+
+static void emit_fpu_no_mem ( UChar first_byte, 
+                              UChar second_byte )
+{
+   newEmit();
+   emitB ( first_byte );
+   emitB ( second_byte );
+   if (dis)
+      VG_(printf)("\n\t\tfpu-0x%x:0x%x\n", 
+                  (UInt)first_byte, (UInt)second_byte );
+}
+
+static void emit_fpu_regmem ( UChar first_byte, 
+                              UChar second_byte_masked, 
+                              Int reg )
+{
+   newEmit();
+   emitB ( first_byte );
+   emit_amode_regmem_reg ( reg, second_byte_masked >> 3 );
+   if (dis)
+      VG_(printf)("\n\t\tfpu-0x%x:0x%x-(%s)\n", 
+                  (UInt)first_byte, (UInt)second_byte_masked,
+                  nameIReg(4,reg) );
+}
+
+
+/*----------------------------------------------------*/
+/*--- misc instruction emitters                    ---*/
+/*----------------------------------------------------*/
+
+static void emit_call_reg ( Int reg )
+{
+   newEmit();
+   emitB ( 0xFF ); /* Grp5 */
+   emit_amode_ereg_greg ( reg, mkGrp5opcode(CALLM) );
+   if (dis)
+      VG_(printf)( "\n\t\tcall\t*%s\n", nameIReg(4,reg) );
+}
+
+
+static void emit_call_star_EBP_off ( Int byte_off )
+{
+  newEmit();
+  if (byte_off < -128 || byte_off > 127) {
+     emitB ( 0xFF );
+     emitB ( 0x95 );
+     emitL ( byte_off );
+  } else {
+     emitB ( 0xFF );
+     emitB ( 0x55 );
+     emitB ( byte_off );
+  }
+  if (dis)
+     VG_(printf)( "\n\t\tcall * %d(%%ebp)\n", byte_off );
+}
+
+
+static void emit_addlit8_offregmem ( Int lit8, Int regmem, Int off )
+{
+   vg_assert(lit8 >= -128 && lit8 < 128);
+   newEmit();
+   emitB ( 0x83 ); /* Grp1 Ib,Ev */
+   emit_amode_offregmem_reg ( off, regmem, 
+                              0 /* Grp1 subopcode for ADD */ );
+   emitB ( lit8 & 0xFF );
+   if (dis)
+      VG_(printf)( "\n\t\taddl $%d, %d(%s)\n", lit8, off, 
+                                               nameIReg(4,regmem));
+}
+
+
+static void emit_add_lit_to_esp ( Int lit )
+{
+   if (lit < -128 || lit > 127) VG_(panic)("emit_add_lit_to_esp");
+   newEmit();
+   emitB ( 0x83 );
+   emitB ( 0xC4 );
+   emitB ( lit & 0xFF );
+   if (dis)
+      VG_(printf)( "\n\t\taddl $%d, %%esp\n", lit );
+}
+
+
+static void emit_movb_AL_zeroESPmem ( void )
+{
+   /* movb %al, 0(%esp) */
+   /* 88442400              movb    %al, 0(%esp) */
+   newEmit();
+   emitB ( 0x88 );
+   emitB ( 0x44 );
+   emitB ( 0x24 );
+   emitB ( 0x00 );
+   if (dis)
+      VG_(printf)( "\n\t\tmovb %%al, 0(%%esp)\n" );
+}
+
+static void emit_movb_zeroESPmem_AL ( void )
+{
+   /* movb 0(%esp), %al */
+   /* 8A442400              movb    0(%esp), %al */
+   newEmit();
+   emitB ( 0x8A );
+   emitB ( 0x44 );
+   emitB ( 0x24 );
+   emitB ( 0x00 );
+   if (dis)
+      VG_(printf)( "\n\t\tmovb 0(%%esp), %%al\n" );
+}
+
+
+/* Emit a jump short with an 8-bit signed offset.  Note that the
+   offset is that which should be added to %eip once %eip has been
+   advanced over this insn.  */
+static void emit_jcondshort_delta ( Condcode cond, Int delta )
+{
+   vg_assert(delta >= -128 && delta <= 127);
+   newEmit();
+   emitB ( 0x70 + (UInt)cond );
+   emitB ( (UChar)delta );
+   if (dis)
+      VG_(printf)( "\n\t\tj%s-8\t%%eip+%d\n", 
+                   VG_(nameCondcode)(cond), delta );
+}
+
+static void emit_get_eflags ( void )
+{
+   Int off = 4 * VGOFF_(m_eflags);
+   vg_assert(off >= 0 && off < 128);
+   newEmit();
+   emitB ( 0xFF ); /* PUSHL off(%ebp) */
+   emitB ( 0x75 );
+   emitB ( off );
+   emitB ( 0x9D ); /* POPFL */
+   if (dis)
+      VG_(printf)( "\n\t\tpushl %d(%%ebp) ; popfl\n", off );
+}
+
+static void emit_put_eflags ( void )
+{
+   Int off = 4 * VGOFF_(m_eflags);
+   vg_assert(off >= 0 && off < 128);
+   newEmit();
+   emitB ( 0x9C ); /* PUSHFL */
+   emitB ( 0x8F ); /* POPL vg_m_state.m_eflags */
+   emitB ( 0x45 );
+   emitB ( off );
+   if (dis)
+      VG_(printf)( "\n\t\tpushfl ; popl %d(%%ebp)\n", off );
+}
+
+static void emit_setb_reg ( Int reg, Condcode cond )
+{
+   newEmit();
+   emitB ( 0x0F ); emitB ( 0x90 + (UChar)cond );
+   emit_amode_ereg_greg ( reg, 0 );
+   if (dis)
+      VG_(printf)("\n\t\tset%s %s\n", 
+                  VG_(nameCondcode)(cond), nameIReg(1,reg));
+}
+
+static void emit_ret ( void )
+{
+   newEmit();
+   emitB ( 0xC3 ); /* RET */
+   if (dis)
+      VG_(printf)("\n\t\tret\n");
+}
+
+static void emit_pushal ( void )
+{
+   newEmit();
+   emitB ( 0x60 ); /* PUSHAL */
+   if (dis)
+      VG_(printf)("\n\t\tpushal\n");
+}
+
+static void emit_popal ( void )
+{
+   newEmit();
+   emitB ( 0x61 ); /* POPAL */
+   if (dis)
+      VG_(printf)("\n\t\tpopal\n");
+}
+
+static void emit_lea_litreg_reg ( UInt lit, Int regmem, Int reg )
+{
+   newEmit();
+   emitB ( 0x8D ); /* LEA M,Gv */
+   emit_amode_offregmem_reg ( (Int)lit, regmem, reg );
+   if (dis)
+      VG_(printf)("\n\t\tleal 0x%x(%s), %s\n",
+                  lit, nameIReg(4,regmem), nameIReg(4,reg) );
+}
+
+static void emit_lea_sib_reg ( UInt lit, Int scale,
+			       Int regbase, Int regindex, Int reg )
+{
+   newEmit();
+   emitB ( 0x8D ); /* LEA M,Gv */
+   emit_amode_sib_reg ( (Int)lit, scale, regbase, regindex, reg );
+   if (dis)
+      VG_(printf)("\n\t\tleal 0x%x(%s,%s,%d), %s\n",
+                  lit, nameIReg(4,regbase), 
+                       nameIReg(4,regindex), scale,
+                       nameIReg(4,reg) );
+}
+
+static void emit_AMD_prefetch_reg ( Int reg )
+{
+   newEmit();
+   emitB ( 0x0F );
+   emitB ( 0x0D );
+   emit_amode_regmem_reg ( reg, 1 /* 0 is prefetch; 1 is prefetchw */ );
+   if (dis)
+      VG_(printf)("\n\t\tamd-prefetch (%s)\n", nameIReg(4,reg) );
+}
+
+/*----------------------------------------------------*/
+/*--- Instruction synthesisers                     ---*/
+/*----------------------------------------------------*/
+
+static Condcode invertCondition ( Condcode cond )
+{
+   return (Condcode)(1 ^ (UInt)cond);
+}
+
+
+/* Synthesise a call to *baseBlock[offset], ie,
+   call * (4 x offset)(%ebp).
+*/
+static void synth_call_baseBlock_method ( Bool ensure_shortform, 
+                                          Int word_offset )
+{
+   vg_assert(word_offset >= 0);
+   vg_assert(word_offset < VG_BASEBLOCK_WORDS);
+   if (ensure_shortform)
+      vg_assert(word_offset < 32);
+   emit_call_star_EBP_off ( 4 * word_offset );
+}
+
+
+/* Jump to the next translation, by loading its original addr into
+   %eax and returning to the scheduler.  Or, if is a RET transfer,
+   don't return; instead jump to vg_dispatch_when_RET, which checks
+   whether this is a signal handler returning, and takes suitable
+   evasive action.
+*/
+static void synth_jmp_reg ( Int reg, 
+                            Bool is_ret_dispatch,
+                            Bool is_call_dispatch )
+{
+   if (reg != R_EAX)
+      emit_movv_reg_reg ( 4, reg, R_EAX );
+   if (is_ret_dispatch || is_call_dispatch) {
+      /* The (hopefully) rare case. */
+      vg_assert(!(is_ret_dispatch && is_call_dispatch));
+      emit_movv_lit_reg ( 4, VG_EBP_DISPATCH_CHECKED, R_EBP );
+   }
+   emit_ret();
+}
+
+
+/* Same deal as synth_jmp_reg. */
+static void synth_jmp_lit ( Addr addr )
+{
+   emit_movv_lit_reg ( 4, addr, R_EAX );
+   emit_ret();
+}
+
+
+/* Dispatch, but with a call-target check. */
+static void synth_jmp_lit_call_dispatch ( Addr addr )
+{
+   emit_movv_lit_reg ( 4, addr, R_EAX );
+   emit_movv_lit_reg ( 4, VG_EBP_DISPATCH_CHECKED, R_EBP );
+   emit_ret();
+}
+
+
+static void synth_jcond_lit ( Condcode cond, Addr addr )
+{
+  /* Do the following:
+        get eflags
+        jmp short if not cond to xyxyxy
+        addr -> eax
+        ret
+        xyxyxy
+
+   2 0000 750C                  jnz     xyxyxy
+   3 0002 B877665544            movl    $0x44556677, %eax
+   4 0007 C3                    ret
+   5 0008 FFE3                  jmp     *%ebx
+   6                    xyxyxy:
+  */
+   emit_get_eflags();
+   emit_jcondshort_delta ( invertCondition(cond), 5+1 );
+   synth_jmp_lit ( addr );
+}
+
+
+static void synth_jmp_ifzero_reg_lit ( Int reg, Addr addr )
+{
+   /* 0000 83FF00                cmpl    $0, %edi
+      0003 750A                  jnz     next
+      0005 B844332211            movl    $0x11223344, %eax
+      000a C3                    ret
+      next:
+   */
+   emit_cmpl_zero_reg ( reg );
+   emit_jcondshort_delta ( CondNZ, 5+1 );
+   synth_jmp_lit ( addr );
+}
+
+
+static void synth_mov_lit_reg ( Int size, UInt lit, Int reg ) 
+{
+   /* Load the zero-extended literal into reg, at size l,
+      regardless of the request size. */
+   emit_movv_lit_reg ( 4, lit, reg );
+}
+
+
+static void synth_mov_regmem_reg ( Int size, Int reg1, Int reg2 ) 
+{
+   switch (size) {
+      case 4: emit_movv_regmem_reg ( 4, reg1, reg2 ); break;
+      case 2: emit_movzwl_regmem_reg ( reg1, reg2 ); break;
+      case 1: emit_movzbl_regmem_reg ( reg1, reg2 ); break;
+      default: VG_(panic)("synth_mov_regmem_reg");
+   }  
+}
+
+
+static void synth_mov_offregmem_reg ( Int size, Int off, Int areg, Int reg ) 
+{
+   switch (size) {
+      case 4: emit_movv_offregmem_reg ( 4, off, areg, reg ); break;
+      case 2: emit_movzwl_offregmem_reg ( off, areg, reg ); break;
+      case 1: emit_movzbl_offregmem_reg ( off, areg, reg ); break;
+      default: VG_(panic)("synth_mov_offregmem_reg");
+   }  
+}
+
+
+static void synth_mov_reg_offregmem ( Int size, Int reg, 
+                                      Int off, Int areg )
+{
+   switch (size) {
+      case 4: emit_movv_reg_offregmem ( 4, reg, off, areg ); break;
+      case 2: emit_movv_reg_offregmem ( 2, reg, off, areg ); break;
+      case 1: if (reg < 4) {
+                 emit_movb_reg_offregmem ( reg, off, areg ); 
+              }
+              else {
+                 emit_swapl_reg_EAX ( reg );
+                 emit_movb_reg_offregmem ( R_AL, off, areg );
+                 emit_swapl_reg_EAX ( reg );
+              }
+              break;
+      default: VG_(panic)("synth_mov_reg_offregmem");
+   }
+}
+
+
+static void synth_mov_reg_memreg ( Int size, Int reg1, Int reg2 )
+{
+   Int s1;
+   switch (size) {
+      case 4: emit_movv_reg_regmem ( 4, reg1, reg2 ); break;
+      case 2: emit_movv_reg_regmem ( 2, reg1, reg2 ); break;
+      case 1: if (reg1 < 4) {
+                 emit_movb_reg_regmem ( reg1, reg2 ); 
+              }
+              else {
+                 /* Choose a swap reg which is < 4 and not reg1 or reg2. */
+                 for (s1 = 0; s1 == reg1 || s1 == reg2; s1++) ;
+                 emit_swapl_reg_reg ( s1, reg1 );
+                 emit_movb_reg_regmem ( s1, reg2 );
+                 emit_swapl_reg_reg ( s1, reg1 );
+              }
+              break;
+      default: VG_(panic)("synth_mov_reg_litmem");
+   }
+}
+
+
+static void synth_unaryop_reg ( Bool upd_cc,
+                                Opcode opcode, Int size,
+                                Int reg )
+{
+   /* NB! opcode is a uinstr opcode, not an x86 one! */
+   switch (size) {
+      case 4: if (upd_cc) emit_get_eflags();
+              emit_unaryopv_reg ( 4, opcode, reg );
+              if (upd_cc) emit_put_eflags();
+              break;
+      case 2: if (upd_cc) emit_get_eflags();
+              emit_unaryopv_reg ( 2, opcode, reg );
+              if (upd_cc) emit_put_eflags();
+              break;
+      case 1: if (reg < 4) {
+                 if (upd_cc) emit_get_eflags();
+                 emit_unaryopb_reg ( opcode, reg );
+                 if (upd_cc) emit_put_eflags();
+              } else {
+                 emit_swapl_reg_EAX ( reg );
+                 if (upd_cc) emit_get_eflags();
+                 emit_unaryopb_reg ( opcode, R_AL );
+                 if (upd_cc) emit_put_eflags();
+                 emit_swapl_reg_EAX ( reg );
+              }
+              break;
+      default: VG_(panic)("synth_unaryop_reg");
+   }
+}
+
+
+
+static void synth_nonshiftop_reg_reg ( Bool upd_cc, 
+                                       Opcode opcode, Int size, 
+                                       Int reg1, Int reg2 )
+{
+   /* NB! opcode is a uinstr opcode, not an x86 one! */
+   switch (size) {
+      case 4: if (upd_cc) emit_get_eflags();
+              emit_nonshiftopv_reg_reg ( 4, opcode, reg1, reg2 );
+              if (upd_cc) emit_put_eflags();
+              break;
+      case 2: if (upd_cc) emit_get_eflags();
+              emit_nonshiftopv_reg_reg ( 2, opcode, reg1, reg2 );
+              if (upd_cc) emit_put_eflags();
+              break;
+      case 1: { /* Horrible ... */
+         Int s1, s2;
+         /* Choose s1 and s2 to be x86 regs which we can talk about the
+            lowest 8 bits, ie either %eax, %ebx, %ecx or %edx.  Make
+            sure s1 != s2 and that neither of them equal either reg1 or
+            reg2. Then use them as temporaries to make things work. */
+         if (reg1 < 4 && reg2 < 4) {
+            if (upd_cc) emit_get_eflags();
+            emit_nonshiftopb_reg_reg(opcode, reg1, reg2); 
+            if (upd_cc) emit_put_eflags();
+            break;
+         }
+         for (s1 = 0; s1 == reg1 || s1 == reg2; s1++) ;
+         if (reg1 >= 4 && reg2 < 4) {
+            emit_swapl_reg_reg ( reg1, s1 );
+            if (upd_cc) emit_get_eflags();
+            emit_nonshiftopb_reg_reg(opcode, s1, reg2);
+            if (upd_cc) emit_put_eflags();
+            emit_swapl_reg_reg ( reg1, s1 );
+            break;
+         }
+         for (s2 = 0; s2 == reg1 || s2 == reg2 || s2 == s1; s2++) ;
+         if (reg1 < 4 && reg2 >= 4) {
+            emit_swapl_reg_reg ( reg2, s2 );
+            if (upd_cc) emit_get_eflags();
+            emit_nonshiftopb_reg_reg(opcode, reg1, s2);
+            if (upd_cc) emit_put_eflags();
+            emit_swapl_reg_reg ( reg2, s2 );
+            break;
+         }
+         if (reg1 >= 4 && reg2 >= 4 && reg1 != reg2) {
+            emit_swapl_reg_reg ( reg1, s1 );
+            emit_swapl_reg_reg ( reg2, s2 );
+            if (upd_cc) emit_get_eflags();
+            emit_nonshiftopb_reg_reg(opcode, s1, s2);
+            if (upd_cc) emit_put_eflags();
+            emit_swapl_reg_reg ( reg1, s1 );
+            emit_swapl_reg_reg ( reg2, s2 );
+            break;
+         }
+         if (reg1 >= 4 && reg2 >= 4 && reg1 == reg2) {
+            emit_swapl_reg_reg ( reg1, s1 );
+            if (upd_cc) emit_get_eflags();
+            emit_nonshiftopb_reg_reg(opcode, s1, s1);
+            if (upd_cc) emit_put_eflags();
+            emit_swapl_reg_reg ( reg1, s1 );
+            break;
+         }
+         VG_(panic)("synth_nonshiftopb_reg_reg");
+      }
+      default: VG_(panic)("synth_nonshiftop_reg_reg");
+   }
+}
+
+
+static void synth_nonshiftop_offregmem_reg ( 
+   Bool upd_cc,
+   Opcode opcode, Int size, 
+   Int off, Int areg, Int reg )
+{
+   switch (size) {
+      case 4: 
+         if (upd_cc) emit_get_eflags();
+         emit_nonshiftopv_offregmem_reg ( 4, opcode, off, areg, reg ); 
+         if (upd_cc) emit_put_eflags();
+         break;
+      case 2: 
+         if (upd_cc) emit_get_eflags();
+         emit_nonshiftopv_offregmem_reg ( 2, opcode, off, areg, reg ); 
+         if (upd_cc) emit_put_eflags();
+         break;
+      case 1: 
+         if (reg < 4) {
+            if (upd_cc) emit_get_eflags();
+            emit_nonshiftopb_offregmem_reg ( opcode, off, areg, reg );
+            if (upd_cc) emit_put_eflags();
+         } else {
+            emit_swapl_reg_EAX ( reg );
+            if (upd_cc) emit_get_eflags();
+            emit_nonshiftopb_offregmem_reg ( opcode, off, areg, R_AL );
+            if (upd_cc) emit_put_eflags();
+            emit_swapl_reg_EAX ( reg );
+         }
+         break;
+      default: 
+         VG_(panic)("synth_nonshiftop_litmem_reg");
+   }
+}
+
+
+static void synth_nonshiftop_lit_reg ( Bool upd_cc,
+                                       Opcode opcode, Int size, 
+                                       UInt lit, Int reg )
+{
+   switch (size) {
+      case 4: if (upd_cc) emit_get_eflags();
+              emit_nonshiftopv_lit_reg ( 4, opcode, lit, reg );
+              if (upd_cc) emit_put_eflags();
+              break;
+      case 2: if (upd_cc) emit_get_eflags();
+              emit_nonshiftopv_lit_reg ( 2, opcode, lit, reg );
+              if (upd_cc) emit_put_eflags();
+              break;
+      case 1: if (reg < 4) {
+                 if (upd_cc) emit_get_eflags();
+                 emit_nonshiftopb_lit_reg ( opcode, lit, reg );
+                 if (upd_cc) emit_put_eflags();
+              } else {
+                 emit_swapl_reg_EAX ( reg );
+                 if (upd_cc) emit_get_eflags();
+                 emit_nonshiftopb_lit_reg ( opcode, lit, R_AL );
+                 if (upd_cc) emit_put_eflags();
+                 emit_swapl_reg_EAX ( reg );
+              }
+              break;
+      default: VG_(panic)("synth_nonshiftop_lit_reg");
+   }
+}
+
+
+static void synth_push_reg ( Int size, Int reg )
+{
+   switch (size) {
+      case 4: 
+         emit_pushv_reg ( 4, reg ); 
+         break;
+      case 2: 
+         emit_pushv_reg ( 2, reg ); 
+         break;
+      /* Pray that we don't have to generate this really cruddy bit of
+         code very often.  Could do better, but can I be bothered? */
+      case 1: 
+         vg_assert(reg != R_ESP); /* duh */
+         emit_add_lit_to_esp(-1);
+         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
+         emit_movb_AL_zeroESPmem();
+         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
+         break;
+     default: 
+         VG_(panic)("synth_push_reg");
+   }
+}
+
+
+static void synth_pop_reg ( Int size, Int reg )
+{
+   switch (size) {
+      case 4: 
+         emit_popv_reg ( 4, reg ); 
+         break;
+      case 2: 
+         emit_popv_reg ( 2, reg ); 
+         break;
+      case 1:
+         /* Same comment as above applies. */
+         vg_assert(reg != R_ESP); /* duh */
+         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
+         emit_movb_zeroESPmem_AL();
+         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
+         emit_add_lit_to_esp(1);
+         break;
+      default: VG_(panic)("synth_pop_reg");
+   }
+}
+
+
+static void synth_shiftop_reg_reg ( Bool upd_cc,
+                                    Opcode opcode, Int size, 
+                                    Int regs, Int regd )
+{
+   synth_push_reg ( size, regd );
+   if (regs != R_ECX) emit_swapl_reg_ECX ( regs );
+   if (upd_cc) emit_get_eflags();
+   switch (size) {
+      case 4: emit_shiftopv_cl_stack0 ( 4, opcode ); break;
+      case 2: emit_shiftopv_cl_stack0 ( 2, opcode ); break;
+      case 1: emit_shiftopb_cl_stack0 ( opcode ); break;
+      default: VG_(panic)("synth_shiftop_reg_reg");
+   }
+   if (upd_cc) emit_put_eflags();
+   if (regs != R_ECX) emit_swapl_reg_ECX ( regs );
+   synth_pop_reg ( size, regd );
+}
+
+
+static void synth_shiftop_lit_reg ( Bool upd_cc,
+                                    Opcode opcode, Int size, 
+                                    UInt lit, Int reg )
+{
+   switch (size) {
+      case 4: if (upd_cc) emit_get_eflags();
+              emit_shiftopv_lit_reg ( 4, opcode, lit, reg );
+              if (upd_cc) emit_put_eflags();
+              break;
+      case 2: if (upd_cc) emit_get_eflags();
+              emit_shiftopv_lit_reg ( 2, opcode, lit, reg );
+              if (upd_cc) emit_put_eflags();
+              break;
+      case 1: if (reg < 4) {
+                 if (upd_cc) emit_get_eflags();
+                 emit_shiftopb_lit_reg ( opcode, lit, reg );
+                 if (upd_cc) emit_put_eflags();
+              } else {
+                 emit_swapl_reg_EAX ( reg );
+                 if (upd_cc) emit_get_eflags();
+                 emit_shiftopb_lit_reg ( opcode, lit, R_AL );
+                 if (upd_cc) emit_put_eflags();
+                 emit_swapl_reg_EAX ( reg );
+              }
+              break;
+      default: VG_(panic)("synth_nonshiftop_lit_reg");
+   }
+}
+
+
+static void synth_setb_reg ( Int reg, Condcode cond )
+{
+   emit_get_eflags();
+   if (reg < 4) {
+      emit_setb_reg ( reg, cond );
+   } else {
+      emit_swapl_reg_EAX ( reg );
+      emit_setb_reg ( R_AL, cond );
+      emit_swapl_reg_EAX ( reg );
+   }
+}
+
+
+static void synth_fpu_regmem ( UChar first_byte,
+                               UChar second_byte_masked, 
+                               Int reg )
+{
+   emit_get_fpu_state();
+   emit_fpu_regmem ( first_byte, second_byte_masked, reg );
+   emit_put_fpu_state();
+}
+
+
+static void synth_fpu_no_mem ( UChar first_byte,
+                               UChar second_byte )
+{
+   emit_get_fpu_state();
+   emit_fpu_no_mem ( first_byte, second_byte );
+   emit_put_fpu_state();
+}
+
+
+static void synth_movl_reg_reg ( Int src, Int dst )
+{
+   emit_movl_reg_reg ( src, dst );
+}
+
+static void synth_cmovl_reg_reg ( Condcode cond, Int src, Int dst )
+{
+   emit_get_eflags();
+   emit_jcondshort_delta ( invertCondition(cond), 
+                           2 /* length of the next insn */ );
+   emit_movl_reg_reg ( src, dst );
+}
+
+
+/* A word in memory containing a pointer to vg_helper_smc_check4.
+   Never changes. 
+*/
+static const Addr vg_helper_smc_check4_ADDR
+   = (Addr)&VG_(helper_smc_check4);
+
+static void synth_orig_code_write_check ( Int sz, Int reg )
+{
+   UInt offset;
+
+   /*
+     In this example, reg is %eax and sz == 8:
+
+     -- check the first four bytes
+     0087 89C5                  movl    %eax, %ebp
+     0089 FF1544332211          call    * 0x11223344
+                  
+     -- check the second four
+     008f 89C5                  movl    %eax, %ebp
+     0091 83C504                addl    $4, %ebp
+     0094 FF1544332211          call    * 0x11223344
+
+     Because we can't call an absolute address (alas), the
+     address called is stored in memory at 0x11223344 in this
+     example, and it just contains the address of 
+     vg_helper_smc_check4 -- which is where we really want
+     to get to.
+   */
+   vg_assert(0);
+
+   if (sz < 4) sz = 4;
+
+   for (offset = 0; offset < sz; offset += 4) {
+
+      emit_movl_reg_reg ( reg, R_EBP );
+
+      if (offset > 0) {
+         newEmit();
+         emitB ( 0x83 ); emitB ( 0xC5 ); emitB ( offset );
+         if (dis) VG_(printf)("\n");
+      }
+
+      newEmit();
+      emitB ( 0xFF ); emitB ( 0x15 ); 
+      emitL ( (Addr)&vg_helper_smc_check4_ADDR );
+      if (dis) VG_(printf)("\n");
+   }
+}
+
+
+/* Synthesise a minimal test (and which discards result) of reg32
+   against lit.  It's always safe do simply
+      emit_testv_lit_reg ( 4, lit, reg32 )
+   but we try to do better when possible.
+*/
+static void synth_minimal_test_lit_reg ( UInt lit, Int reg32 )
+{
+   if ((lit & 0xFFFFFF00) == 0 && reg32 < 4) {
+      /* We can get away with a byte insn. */
+      emit_testb_lit_reg ( lit, reg32 );
+   }
+   else 
+   if ((lit & 0xFFFF0000) == 0) {
+      /* Literal fits in 16 bits; do a word insn. */
+      emit_testv_lit_reg ( 2, lit, reg32 );
+   }
+   else {
+      /* Totally general ... */
+      emit_testv_lit_reg ( 4, lit, reg32 );
+   }
+}
+
+
+/*----------------------------------------------------*/
+/*--- Top level of the uinstr -> x86 translation.  ---*/
+/*----------------------------------------------------*/
+
+/* Return the byte offset from %ebp (ie, into baseBlock)
+   for the specified ArchReg or SpillNo. */
+
+static Int spillOrArchOffset ( Int size, Tag tag, UInt value )
+{
+   if (tag == SpillNo) {
+      vg_assert(size == 4);
+      vg_assert(value >= 0 && value < VG_MAX_SPILLSLOTS);
+      return 4 * (value + VGOFF_(spillslots));
+   }
+   if (tag == ArchReg) {
+      switch (value) {
+         case R_EAX: return 4 * VGOFF_(m_eax);
+         case R_ECX: return 4 * VGOFF_(m_ecx);
+         case R_EDX: return 4 * VGOFF_(m_edx);
+         case R_EBX: return 4 * VGOFF_(m_ebx);
+         case R_ESP:
+           if (size == 1) return 4 * VGOFF_(m_eax) + 1;
+                     else return 4 * VGOFF_(m_esp);
+         case R_EBP:
+           if (size == 1) return 4 * VGOFF_(m_ecx) + 1;
+                     else return 4 * VGOFF_(m_ebp);
+         case R_ESI:
+           if (size == 1) return 4 * VGOFF_(m_edx) + 1;
+                     else return 4 * VGOFF_(m_esi);
+         case R_EDI:
+           if (size == 1) return 4 * VGOFF_(m_ebx) + 1;
+                     else return 4 * VGOFF_(m_edi);
+      }
+   }
+   VG_(panic)("spillOrArchOffset");
+}
+
+
+static Int eflagsOffset ( void )
+{
+   return 4 * VGOFF_(m_eflags);
+}
+
+
+static Int shadowOffset ( Int arch )
+{
+   switch (arch) {
+      case R_EAX: return 4 * VGOFF_(sh_eax);
+      case R_ECX: return 4 * VGOFF_(sh_ecx);
+      case R_EDX: return 4 * VGOFF_(sh_edx);
+      case R_EBX: return 4 * VGOFF_(sh_ebx);
+      case R_ESP: return 4 * VGOFF_(sh_esp);
+      case R_EBP: return 4 * VGOFF_(sh_ebp);
+      case R_ESI: return 4 * VGOFF_(sh_esi);
+      case R_EDI: return 4 * VGOFF_(sh_edi);
+      default:    VG_(panic)( "shadowOffset");
+   }
+}
+
+
+static Int shadowFlagsOffset ( void )
+{
+   return 4 * VGOFF_(sh_eflags);
+}
+
+
+static void synth_LOADV ( Int sz, Int a_reg, Int tv_reg )
+{
+   Int i, j, helper_offw;
+   Int pushed[VG_MAX_REALREGS+2];
+   Int n_pushed;
+   switch (sz) {
+      case 4: helper_offw = VGOFF_(helperc_LOADV4); break;
+      case 2: helper_offw = VGOFF_(helperc_LOADV2); break;
+      case 1: helper_offw = VGOFF_(helperc_LOADV1); break;
+      default: VG_(panic)("synth_LOADV");
+   }
+   n_pushed = 0;
+   for (i = 0; i < VG_MAX_REALREGS; i++) {
+      j = VG_(rankToRealRegNo) ( i );
+      if (VG_CALLEE_SAVED(j)) continue;
+      if (j == tv_reg || j == a_reg) continue;
+      emit_pushv_reg ( 4, j );
+      pushed[n_pushed++] = j;
+   }
+   emit_pushv_reg ( 4, a_reg );
+   pushed[n_pushed++] = a_reg;
+   vg_assert(n_pushed <= VG_MAX_REALREGS+1);
+
+   synth_call_baseBlock_method ( False, helper_offw );
+   /* Result is in %eax; we need to get it to tv_reg. */
+   if (tv_reg != R_EAX)
+      emit_movv_reg_reg ( 4, R_EAX, tv_reg );
+
+   while (n_pushed > 0) {
+      n_pushed--;
+      if (pushed[n_pushed] == tv_reg) {
+         emit_add_lit_to_esp ( 4 );
+      } else {
+         emit_popv_reg ( 4, pushed[n_pushed] );
+      }
+   }
+}
+
+
+static void synth_STOREV ( Int sz,
+                           Int tv_tag, Int tv_val,
+                           Int a_reg )
+{
+   Int i, j, helper_offw;
+   vg_assert(tv_tag == RealReg || tv_tag == Literal);
+   switch (sz) {
+      case 4: helper_offw = VGOFF_(helperc_STOREV4); break;
+      case 2: helper_offw = VGOFF_(helperc_STOREV2); break;
+      case 1: helper_offw = VGOFF_(helperc_STOREV1); break;
+      default: VG_(panic)("synth_STOREV");
+   }
+   for (i = 0; i < VG_MAX_REALREGS; i++) {
+      j = VG_(rankToRealRegNo) ( i );
+      if (VG_CALLEE_SAVED(j)) continue;
+      if ((tv_tag == RealReg && j == tv_val) || j == a_reg) continue;
+      emit_pushv_reg ( 4, j );
+   }
+   if (tv_tag == RealReg) {
+      emit_pushv_reg ( 4, tv_val );
+   } else {
+     if (tv_val == VG_(extend_s_8to32)(tv_val))
+        emit_pushl_lit8 ( VG_(extend_s_8to32)(tv_val) );
+     else
+        emit_pushl_lit32(tv_val);
+   }
+   emit_pushv_reg ( 4, a_reg );
+   synth_call_baseBlock_method ( False, helper_offw );
+   emit_popv_reg ( 4, a_reg );
+   if (tv_tag == RealReg) {
+      emit_popv_reg ( 4, tv_val );
+   } else {
+      emit_add_lit_to_esp ( 4 );
+   }
+   for (i = VG_MAX_REALREGS-1; i >= 0; i--) {
+      j = VG_(rankToRealRegNo) ( i );
+      if (VG_CALLEE_SAVED(j)) continue;
+      if ((tv_tag == RealReg && j == tv_val) || j == a_reg) continue;
+      emit_popv_reg ( 4, j );
+   }
+}
+
+
+static void synth_WIDEN_signed ( Int sz_src, Int sz_dst, Int reg )
+{
+   if (sz_src == 1 && sz_dst == 4) {
+      emit_shiftopv_lit_reg ( 4, SHL, 24, reg );
+      emit_shiftopv_lit_reg ( 4, SAR, 24, reg );
+   }
+   else if (sz_src == 2 && sz_dst == 4) {
+      emit_shiftopv_lit_reg ( 4, SHL, 16, reg );
+      emit_shiftopv_lit_reg ( 4, SAR, 16, reg );
+   }
+   else if (sz_src == 1 && sz_dst == 2) {
+      emit_shiftopv_lit_reg ( 2, SHL, 8, reg );
+      emit_shiftopv_lit_reg ( 2, SAR, 8, reg );
+   }
+   else
+      VG_(panic)("synth_WIDEN");
+}
+
+
+static void synth_SETV ( Int sz, Int reg )
+{
+   UInt val;
+   switch (sz) {
+      case 4: val = 0x00000000; break;
+      case 2: val = 0xFFFF0000; break;
+      case 1: val = 0xFFFFFF00; break;
+      case 0: val = 0xFFFFFFFE; break;
+      default: VG_(panic)("synth_SETV");
+   }
+   emit_movv_lit_reg ( 4, val, reg );
+}
+
+
+static void synth_TESTV ( Int sz, Int tag, Int val )
+{
+   vg_assert(tag == ArchReg || tag == RealReg);
+   if (tag == ArchReg) {
+      switch (sz) {
+         case 4: 
+            emit_testv_lit_offregmem ( 
+               4, 0xFFFFFFFF, shadowOffset(val), R_EBP );
+            break;
+         case 2: 
+            emit_testv_lit_offregmem ( 
+               4, 0x0000FFFF, shadowOffset(val), R_EBP );
+            break;
+         case 1:
+            if (val < 4) {
+               emit_testv_lit_offregmem ( 
+                  4, 0x000000FF, shadowOffset(val), R_EBP );
+            } else {
+               emit_testv_lit_offregmem ( 
+                  4, 0x0000FF00, shadowOffset(val-4), R_EBP );
+            }
+            break;
+         case 0: 
+            /* should never happen */
+         default: 
+            VG_(panic)("synth_TESTV(ArchReg)");
+      }
+   } else {
+      switch (sz) {
+         case 4:
+            /* Works, but holds the entire 32-bit literal, hence
+               generating a 6-byte insn.  We want to know if any bits
+               in the reg are set, but since this is for the full reg,
+               we might as well compare it against zero, which can be
+               done with a shorter insn. */
+            /* synth_minimal_test_lit_reg ( 0xFFFFFFFF, val ); */
+            emit_cmpl_zero_reg ( val );
+            break;
+         case 2:
+            synth_minimal_test_lit_reg ( 0x0000FFFF, val );
+            break;
+         case 1:
+            synth_minimal_test_lit_reg ( 0x000000FF, val );
+            break;
+         case 0:
+            synth_minimal_test_lit_reg ( 0x00000001, val );
+            break;
+         default: 
+            VG_(panic)("synth_TESTV(RealReg)");
+      }
+   }
+   emit_jcondshort_delta ( CondZ, 3 );
+   synth_call_baseBlock_method (
+      True, /* needed to guarantee that this insn is indeed 3 bytes long */
+      (sz==4 ? VGOFF_(helper_value_check4_fail)
+             : (sz==2 ? VGOFF_(helper_value_check2_fail)
+                      : sz == 1 ? VGOFF_(helper_value_check1_fail)
+                                : VGOFF_(helper_value_check0_fail)))
+   );
+}
+
+
+static void synth_GETV ( Int sz, Int arch, Int reg )
+{
+   /* VG_(printf)("synth_GETV %d of Arch %s\n", sz, nameIReg(sz, arch)); */
+   switch (sz) {
+      case 4: 
+         emit_movv_offregmem_reg ( 4, shadowOffset(arch), R_EBP, reg );
+         break;
+      case 2: 
+         emit_movzwl_offregmem_reg ( shadowOffset(arch), R_EBP, reg );
+         emit_nonshiftopv_lit_reg ( 4, OR, 0xFFFF0000, reg );
+         break;
+      case 1: 
+         if (arch < 4) {
+            emit_movzbl_offregmem_reg ( shadowOffset(arch), R_EBP, reg );
+         } else {
+            emit_movzbl_offregmem_reg ( shadowOffset(arch-4)+1, R_EBP, reg );
+         }
+         emit_nonshiftopv_lit_reg ( 4, OR, 0xFFFFFF00, reg );
+         break;
+      default: 
+         VG_(panic)("synth_GETV");
+   }
+}
+
+
+static void synth_PUTV ( Int sz, Int srcTag, UInt lit_or_reg, Int arch )
+{
+   if (srcTag == Literal) {
+     /* PUTV with a Literal is only ever used to set the corresponding
+        ArchReg to `all valid'.  Should really be a kind of SETV. */
+      UInt lit = lit_or_reg;
+      switch (sz) {
+         case 4:
+            vg_assert(lit == 0x00000000);
+            emit_movv_lit_offregmem ( 4, 0x00000000, 
+                                      shadowOffset(arch), R_EBP );
+            break;
+         case 2:
+            vg_assert(lit == 0xFFFF0000);
+            emit_movv_lit_offregmem ( 2, 0x0000, 
+                                      shadowOffset(arch), R_EBP );
+            break;
+         case 1:
+            vg_assert(lit == 0xFFFFFF00);
+            if (arch < 4) {
+               emit_movb_lit_offregmem ( 0x00, 
+                                         shadowOffset(arch), R_EBP );
+            } else {
+               emit_movb_lit_offregmem ( 0x00, 
+                                         shadowOffset(arch-4)+1, R_EBP );
+            }
+            break;
+         default: 
+            VG_(panic)("synth_PUTV(lit)");
+      }
+
+   } else {
+
+      UInt reg;
+      vg_assert(srcTag == RealReg);
+
+      if (sz == 1 && lit_or_reg >= 4) {
+         emit_swapl_reg_EAX ( lit_or_reg );
+         reg = R_EAX;
+      } else {
+         reg = lit_or_reg;
+      }
+
+      if (sz == 1) vg_assert(reg < 4);
+
+      switch (sz) {
+         case 4:
+            emit_movv_reg_offregmem ( 4, reg,
+                                      shadowOffset(arch), R_EBP );
+            break;
+         case 2:
+            emit_movv_reg_offregmem ( 2, reg,
+                                      shadowOffset(arch), R_EBP );
+            break;
+         case 1:
+            if (arch < 4) {
+               emit_movb_reg_offregmem ( reg,
+                                         shadowOffset(arch), R_EBP );
+	    } else {
+               emit_movb_reg_offregmem ( reg,
+                                         shadowOffset(arch-4)+1, R_EBP );
+            }
+            break;
+         default: 
+            VG_(panic)("synth_PUTV(reg)");
+      }
+
+      if (sz == 1 && lit_or_reg >= 4) {
+         emit_swapl_reg_EAX ( lit_or_reg );
+      }
+   }
+}
+
+
+static void synth_GETVF ( Int reg )
+{
+   emit_movv_offregmem_reg ( 4, shadowFlagsOffset(), R_EBP, reg );
+   /* paranoia only; should be unnecessary ... */
+   /* emit_nonshiftopv_lit_reg ( 4, OR, 0xFFFFFFFE, reg ); */
+}
+
+
+static void synth_PUTVF ( UInt reg )
+{
+   emit_movv_reg_offregmem ( 4, reg, shadowFlagsOffset(), R_EBP );
+}
+
+
+static void synth_handle_esp_assignment ( Int reg )
+{
+   emit_pushal();
+   emit_pushv_reg ( 4, reg );
+   synth_call_baseBlock_method ( False, VGOFF_(handle_esp_assignment) );
+   emit_add_lit_to_esp ( 4 );
+   emit_popal();
+}
+
+
+static void synth_fpu_mem_check_actions ( Bool isWrite, 
+                                          Int size, Int a_reg )
+{
+   Int helper_offw
+     = isWrite ? VGOFF_(fpu_write_check)
+               : VGOFF_(fpu_read_check);
+   emit_pushal();
+   emit_pushl_lit8 ( size );
+   emit_pushv_reg ( 4, a_reg );
+   synth_call_baseBlock_method ( False, helper_offw );
+   emit_add_lit_to_esp ( 8 );   
+   emit_popal();
+}
+
+
+#if 0
+/* FixMe.  Useful for debugging. */
+void VG_(oink) ( Int n )
+{
+   VG_(printf)("OiNk(%d): ", n );
+   VG_(show_reg_tags)( &VG_(m_shadow) );
+}
+
+static void synth_OINK ( Int n )
+{
+   emit_pushal();
+   emit_movv_lit_reg ( 4, n, R_EBP );
+   emit_pushl_reg ( R_EBP );
+   emit_movv_lit_reg ( 4, (Addr)&VG_(oink), R_EBP );
+   emit_call_reg ( R_EBP );
+   emit_add_lit_to_esp ( 4 );
+   emit_popal();
+}
+#endif
+
+static void synth_TAG1_op ( VgTagOp op, Int reg )
+{
+   switch (op) {
+
+      /* Scheme is
+            neg<sz> %reg          -- CF = %reg==0 ? 0 : 1
+            sbbl %reg, %reg       -- %reg = -CF
+            or 0xFFFFFFFE, %reg   -- invalidate all bits except lowest
+      */
+      case VgT_PCast40:
+         emit_unaryopv_reg(4, NEG, reg);
+         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFFFE, reg);
+         break;
+      case VgT_PCast20:
+         emit_unaryopv_reg(2, NEG, reg);
+         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFFFE, reg);
+         break;
+      case VgT_PCast10:
+         if (reg >= 4) {
+            emit_swapl_reg_EAX(reg);
+            emit_unaryopb_reg(NEG, R_EAX);
+            emit_swapl_reg_EAX(reg);
+         } else {
+            emit_unaryopb_reg(NEG, reg);
+         }
+         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFFFE, reg);
+         break;
+
+      /* Scheme is
+            andl $1, %reg -- %reg is 0 or 1
+            negl %reg -- %reg is 0 or 0xFFFFFFFF
+            and possibly an OR to invalidate unused bits.
+      */
+      case VgT_PCast04:
+         emit_nonshiftopv_lit_reg(4, AND, 0x00000001, reg);
+         emit_unaryopv_reg(4, NEG, reg);
+         break;
+      case VgT_PCast02:
+         emit_nonshiftopv_lit_reg(4, AND, 0x00000001, reg);
+         emit_unaryopv_reg(4, NEG, reg);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
+         break;
+      case VgT_PCast01:
+         emit_nonshiftopv_lit_reg(4, AND, 0x00000001, reg);
+         emit_unaryopv_reg(4, NEG, reg);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, reg);
+         break;
+
+      /* Scheme is
+            shl $24, %reg -- make irrelevant bits disappear
+            negl %reg             -- CF = %reg==0 ? 0 : 1
+            sbbl %reg, %reg       -- %reg = -CF
+            and possibly an OR to invalidate unused bits.
+      */
+      case VgT_PCast14:
+         emit_shiftopv_lit_reg(4, SHL, 24, reg);
+         emit_unaryopv_reg(4, NEG, reg);
+         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
+         break;
+      case VgT_PCast12:
+         emit_shiftopv_lit_reg(4, SHL, 24, reg);
+         emit_unaryopv_reg(4, NEG, reg);
+         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
+         break;
+      case VgT_PCast11:
+         emit_shiftopv_lit_reg(4, SHL, 24, reg);
+         emit_unaryopv_reg(4, NEG, reg);
+         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, reg);
+         break;
+
+      /* We steal %ebp (a non-allocable reg) as a temporary:
+            pushl %ebp
+            movl %reg, %ebp
+            negl %ebp
+            orl %ebp, %reg
+            popl %ebp
+         This sequence turns out to be correct regardless of the 
+         operation width.
+      */
+      case VgT_Left4:
+      case VgT_Left2:
+      case VgT_Left1:
+         vg_assert(reg != R_EDI);
+         emit_movv_reg_reg(4, reg, R_EDI);
+         emit_unaryopv_reg(4, NEG, R_EDI);
+         emit_nonshiftopv_reg_reg(4, OR, R_EDI, reg);
+         break;
+
+      /* These are all fairly obvious; do the op and then, if
+         necessary, invalidate unused bits. */
+      case VgT_SWiden14:
+         emit_shiftopv_lit_reg(4, SHL, 24, reg);
+         emit_shiftopv_lit_reg(4, SAR, 24, reg);
+         break;
+      case VgT_SWiden24:
+         emit_shiftopv_lit_reg(4, SHL, 16, reg);
+         emit_shiftopv_lit_reg(4, SAR, 16, reg);
+         break;
+      case VgT_SWiden12:
+         emit_shiftopv_lit_reg(4, SHL, 24, reg);
+         emit_shiftopv_lit_reg(4, SAR, 24, reg);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
+         break;
+      case VgT_ZWiden14:
+         emit_nonshiftopv_lit_reg(4, AND, 0x000000FF, reg);
+         break;
+      case VgT_ZWiden24:
+         emit_nonshiftopv_lit_reg(4, AND, 0x0000FFFF, reg);
+         break;
+      case VgT_ZWiden12:
+         emit_nonshiftopv_lit_reg(4, AND, 0x000000FF, reg);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
+         break;
+
+      default:
+         VG_(panic)("synth_TAG1_op");
+   }
+}
+
+
+static void synth_TAG2_op ( VgTagOp op, Int regs, Int regd )
+{
+   switch (op) {
+
+      /* UifU is implemented by OR, since 1 means Undefined. */
+      case VgT_UifU4:
+      case VgT_UifU2:
+      case VgT_UifU1:
+      case VgT_UifU0:
+         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
+         break;
+
+      /* DifD is implemented by AND, since 0 means Defined. */
+      case VgT_DifD4:
+      case VgT_DifD2:
+      case VgT_DifD1:
+         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
+         break;
+
+      /* ImproveAND(value, tags) = value OR tags.
+	 Defined (0) value 0s give defined (0); all other -> undefined (1).
+         value is in regs; tags is in regd. 
+         Be paranoid and invalidate unused bits; I don't know whether 
+         or not this is actually necessary. */
+      case VgT_ImproveAND4_TQ:
+         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
+         break;
+      case VgT_ImproveAND2_TQ:
+         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, regd);
+         break;
+      case VgT_ImproveAND1_TQ:
+         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, regd);
+         break;
+
+      /* ImproveOR(value, tags) = (not value) OR tags.
+	 Defined (0) value 1s give defined (0); all other -> undefined (1).
+         value is in regs; tags is in regd. 
+         To avoid trashing value, this is implemented (re de Morgan) as
+               not (value AND (not tags))
+         Be paranoid and invalidate unused bits; I don't know whether 
+         or not this is actually necessary. */
+      case VgT_ImproveOR4_TQ:
+         emit_unaryopv_reg(4, NOT, regd);
+         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
+         emit_unaryopv_reg(4, NOT, regd);
+         break;
+      case VgT_ImproveOR2_TQ:
+         emit_unaryopv_reg(4, NOT, regd);
+         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
+         emit_unaryopv_reg(4, NOT, regd);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, regd);
+         break;
+      case VgT_ImproveOR1_TQ:
+         emit_unaryopv_reg(4, NOT, regd);
+         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
+         emit_unaryopv_reg(4, NOT, regd);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, regd);
+         break;
+
+      default:
+         VG_(panic)("synth_TAG2_op");
+   }
+}
+
+/*----------------------------------------------------*/
+/*--- Generate code for a single UInstr.           ---*/
+/*----------------------------------------------------*/
+
+static void emitUInstr ( Int i, UInstr* u )
+{
+   if (dis)
+      VG_(ppUInstr)(i, u);
+
+#  if 0
+   if (0&& VG_(translations_done) >= 600) {
+      Bool old_dis = dis;
+      dis = False; 
+      synth_OINK(i);
+      dis = old_dis;
+   }
+#  endif
+
+   switch (u->opcode) {
+
+      case NOP: case CALLM_S: case CALLM_E: break;
+
+      case INCEIP: {
+         vg_assert(u->tag1 == Lit16);
+         emit_addlit8_offregmem ( u->val1, R_EBP, 4 * VGOFF_(m_eip) );
+         break;
+      }
+
+      case LEA1: {
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == RealReg);
+         emit_lea_litreg_reg ( u->lit32, u->val1, u->val2 );
+         break;
+      }
+
+      case LEA2: {
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == RealReg);
+         vg_assert(u->tag3 == RealReg);
+         emit_lea_sib_reg ( u->lit32, u->extra4b, 
+                            u->val1, u->val2, u->val3 );
+         break;
+      }
+
+      case WIDEN: {
+         vg_assert(u->tag1 == RealReg);
+         if (u->signed_widen) {
+            synth_WIDEN_signed ( u->extra4b, u->size, u->val1 );
+         } else {
+            /* no need to generate any code. */
+         }
+         break;
+      }
+
+      case SETV: {
+         vg_assert(VG_(clo_instrument));
+         vg_assert(u->tag1 == RealReg);
+         synth_SETV ( u->size, u->val1 );
+         break;
+      }
+
+      case STOREV: {
+         vg_assert(VG_(clo_instrument));
+         vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
+         vg_assert(u->tag2 == RealReg);
+         synth_STOREV ( u->size, u->tag1, 
+                                 u->tag1==Literal ? u->lit32 : u->val1, 
+                                 u->val2 );
+         break;
+      }
+
+      case STORE: {
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == RealReg);
+         synth_mov_reg_memreg ( u->size, u->val1, u->val2 );
+         if (u->smc_check) 
+            synth_orig_code_write_check ( u->size, u->val2 );
+         break;
+      }
+
+      case LOADV: {
+         vg_assert(VG_(clo_instrument));
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == RealReg);
+         if (0 && VG_(clo_instrument))
+            emit_AMD_prefetch_reg ( u->val1 );
+         synth_LOADV ( u->size, u->val1, u->val2 );
+         break;
+      }
+
+      case LOAD: {
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == RealReg);
+         synth_mov_regmem_reg ( u->size, u->val1, u->val2 );
+         break;
+      }
+
+      case TESTV: {
+         vg_assert(VG_(clo_instrument));
+         vg_assert(u->tag1 == RealReg || u->tag1 == ArchReg);
+         synth_TESTV(u->size, u->tag1, u->val1);
+         break;
+      }
+
+      case GETV: {
+         vg_assert(VG_(clo_instrument));
+         vg_assert(u->tag1 == ArchReg);
+         vg_assert(u->tag2 == RealReg);
+         synth_GETV(u->size, u->val1, u->val2);
+         break;
+      }
+
+      case GETVF: {
+         vg_assert(VG_(clo_instrument));
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->size == 0);
+         synth_GETVF(u->val1);
+         break;
+      }
+
+      case PUTV: {
+         vg_assert(VG_(clo_instrument));
+         vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
+         vg_assert(u->tag2 == ArchReg);
+         synth_PUTV(u->size, u->tag1, 
+                             u->tag1==Literal ? u->lit32 : u->val1, 
+                             u->val2 );
+         break;
+      }
+
+      case PUTVF: {
+         vg_assert(VG_(clo_instrument));
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->size == 0);
+         synth_PUTVF(u->val1);
+         break;
+      }
+
+      case GET: {
+         vg_assert(u->tag1 == ArchReg || u->tag1 == SpillNo);
+         vg_assert(u->tag2 == RealReg);
+         synth_mov_offregmem_reg ( 
+            u->size, 
+            spillOrArchOffset( u->size, u->tag1, u->val1 ),
+            R_EBP,
+            u->val2 
+         );
+         break;
+      }
+            
+      case PUT: {
+         vg_assert(u->tag2 == ArchReg || u->tag2 == SpillNo);
+         vg_assert(u->tag1 == RealReg);
+         if (u->tag2 == ArchReg 
+             && u->val2 == R_ESP
+             && u->size == 4
+             && VG_(clo_instrument)) {
+            synth_handle_esp_assignment ( u->val1 );
+	 }
+         synth_mov_reg_offregmem ( 
+            u->size, 
+            u->val1, 
+            spillOrArchOffset( u->size, u->tag2, u->val2 ),
+            R_EBP
+         );
+         break;
+      }
+
+      case GETF: {
+         vg_assert(u->size == 2 || u->size == 4);
+         vg_assert(u->tag1 == RealReg);
+         synth_mov_offregmem_reg ( 
+            u->size, 
+            eflagsOffset(),
+            R_EBP,
+            u->val1
+         );
+         break;
+      }
+            
+      case PUTF: {
+         vg_assert(u->size == 2 || u->size == 4);
+         vg_assert(u->tag1 == RealReg);
+         synth_mov_reg_offregmem ( 
+            u->size, 
+            u->val1,
+            eflagsOffset(),
+            R_EBP
+         );
+         break;
+      }
+            
+      case MOV: {
+         vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
+         vg_assert(u->tag2 == RealReg);
+         switch (u->tag1) {
+            case RealReg: vg_assert(u->size == 4);
+                          if (u->val1 != u->val2)
+                             synth_movl_reg_reg ( u->val1, u->val2 ); 
+                          break;
+            case Literal: synth_mov_lit_reg ( u->size, u->lit32, u->val2 ); 
+                          break;
+            default: VG_(panic)("emitUInstr:mov");
+	 }
+         break;
+      }
+
+      case SBB:
+      case ADC:
+      case XOR:
+      case OR:
+      case AND:
+      case SUB:
+      case ADD: {
+         vg_assert(u->tag2 == RealReg);
+         switch (u->tag1) {
+            case Literal: synth_nonshiftop_lit_reg (
+                             VG_(anyFlagUse)(u), 
+                             u->opcode, u->size, u->lit32, u->val2 );
+                          break;
+            case RealReg: synth_nonshiftop_reg_reg (
+                             VG_(anyFlagUse)(u), 
+                             u->opcode, u->size, u->val1, u->val2 );
+                          break;
+            case ArchReg: synth_nonshiftop_offregmem_reg (
+                             VG_(anyFlagUse)(u), 
+                             u->opcode, u->size, 
+                             spillOrArchOffset( u->size, u->tag1, u->val1 ), 
+                             R_EBP,
+                             u->val2 );
+                          break;
+            default: VG_(panic)("emitUInstr:non-shift-op");
+         }
+         break;
+      }
+
+      case RCR:
+      case RCL:
+      case ROR:
+      case ROL:
+      case SAR:
+      case SHR:
+      case SHL: {
+         vg_assert(u->tag2 == RealReg);
+         switch (u->tag1) {
+            case Literal: synth_shiftop_lit_reg (
+                             VG_(anyFlagUse)(u), 
+                             u->opcode, u->size, u->lit32, u->val2 );
+                          break;
+            case RealReg: synth_shiftop_reg_reg (
+                             VG_(anyFlagUse)(u), 
+                             u->opcode, u->size, u->val1, u->val2 );
+                          break;
+            default: VG_(panic)("emitUInstr:non-shift-op");
+         }
+         break;
+      }
+
+      case INC:
+      case DEC:
+      case NEG:
+      case NOT:
+         vg_assert(u->tag1 == RealReg);
+         synth_unaryop_reg ( 
+            VG_(anyFlagUse)(u), u->opcode, u->size, u->val1 );
+         break;
+
+      case BSWAP:
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->size == 4);
+	 vg_assert(!VG_(anyFlagUse)(u));
+         emit_bswapl_reg ( u->val1 );
+         break;
+
+      case CMOV: 
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == RealReg);
+         vg_assert(u->cond != CondAlways);
+         vg_assert(u->size == 4);
+         synth_cmovl_reg_reg ( u->cond, u->val1, u->val2 );
+         break;
+
+      case JMP: {
+         vg_assert(u->tag2 == NoValue);
+         vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
+         if (u->cond == CondAlways) {
+            if (u->tag1 == RealReg) {
+               synth_jmp_reg ( u->val1, u->ret_dispatch, u->call_dispatch );
+            } else {
+               vg_assert(!u->ret_dispatch);
+               if (u->call_dispatch)
+                  synth_jmp_lit_call_dispatch ( 
+                     u->tag1==Literal ? u->lit32 : u->val1 );
+               else
+                  synth_jmp_lit ( 
+                     u->tag1==Literal ? u->lit32 : u->val1 );
+            }
+         } else {
+            if (u->tag1 == RealReg) {
+               VG_(panic)("emitUInstr: conditional jump to reg");
+            } else {
+               vg_assert(!u->ret_dispatch);
+               vg_assert(!u->call_dispatch);
+               synth_jcond_lit ( u->cond, 
+                                 u->tag1==Literal ? u->lit32 : u->val1 );
+            }
+         }
+         break;
+      }
+
+      case JIFZ:
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == Literal);
+         vg_assert(u->size == 4);
+         synth_jmp_ifzero_reg_lit ( u->val1, u->lit32 );
+         break;
+
+      case TAG1:
+         synth_TAG1_op ( u->val3, u->val1 );
+         break;
+
+      case TAG2:
+         if (u->val3 != VgT_DebugFn) {
+            synth_TAG2_op ( u->val3, u->val1, u->val2 );
+         } else {
+            /* Assume a call to VgT_DebugFn passing both args
+               and placing the result back in the second. */
+            Int j, k;
+            /* u->val2 is the reg into which the result is written.  So
+               don't save/restore it.  And it can be used at a temp for
+               the call target, too.  Since %eax is used for the return
+               value from the C procedure, it is preserved only by
+               virtue of not being mentioned as a VG_CALLEE_SAVED reg. */
+            for (k = 0; k < VG_MAX_REALREGS; k++) {
+               j = VG_(rankToRealRegNo) ( k );
+               if (VG_CALLEE_SAVED(j)) continue;
+               if (j == u->val2) continue;
+               emit_pushv_reg ( 4, j );
+            }
+            emit_pushv_reg(4, u->val2);
+            emit_pushv_reg(4, u->val1);
+            emit_movv_lit_reg ( 4, (UInt)(&VG_(DebugFn)), u->val2 );
+            emit_call_reg ( u->val2 );
+            if (u->val2 != R_EAX)
+               emit_movv_reg_reg ( 4, R_EAX, u->val2 );
+            /* nuke args */
+            emit_add_lit_to_esp(8);
+            for (k = VG_MAX_REALREGS-1; k >= 0; k--) {
+               j = VG_(rankToRealRegNo) ( k );
+               if (VG_CALLEE_SAVED(j)) continue;
+               if (j == u->val2) continue;
+               emit_popv_reg ( 4, j );
+            }
+         }
+         break;
+
+      case PUSH:
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == NoValue);
+         emit_pushv_reg ( 4, u->val1 );
+         break;
+
+      case POP:
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == NoValue);
+         emit_popv_reg ( 4, u->val1 );
+         break;
+
+      case CALLM:
+         vg_assert(u->tag1 == Lit16);
+         vg_assert(u->tag2 == NoValue);
+         vg_assert(u->size == 0);
+         if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) 
+            emit_get_eflags();
+         synth_call_baseBlock_method ( False, u->val1 );
+         if (u->flags_w != FlagsEmpty) 
+            emit_put_eflags();
+         break;
+
+      case CLEAR:
+         vg_assert(u->tag1 == Lit16);
+         vg_assert(u->tag2 == NoValue);
+         emit_add_lit_to_esp ( u->val1 );
+         break;
+
+      case CC2VAL:
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == NoValue);
+         vg_assert(VG_(anyFlagUse)(u));
+         synth_setb_reg ( u->val1, u->cond );
+         break;
+
+      /* We assume that writes to memory done by FPU_Ws are not going
+         to be used to create new code, so there's no orig-code-write
+         checks done by default. */
+      case FPU_R: 
+      case FPU_W:         
+         vg_assert(u->tag1 == Lit16);
+         vg_assert(u->tag2 == RealReg);
+         if (VG_(clo_instrument))
+            synth_fpu_mem_check_actions ( 
+               u->opcode==FPU_W, u->size, u->val2 );
+         synth_fpu_regmem ( (u->val1 >> 8) & 0xFF,
+                            u->val1 & 0xFF,
+                            u->val2 );
+         if (u->opcode == FPU_W && u->smc_check) 
+            synth_orig_code_write_check ( u->size, u->val2 );
+         break;
+
+      case FPU:
+         vg_assert(u->tag1 == Lit16);
+         vg_assert(u->tag2 == NoValue);
+         synth_fpu_no_mem ( (u->val1 >> 8) & 0xFF,
+                            u->val1 & 0xFF );
+         break;
+
+      default: 
+         VG_(printf)("emitUInstr: unhandled insn:\n");
+         VG_(ppUInstr)(0,u);
+         VG_(panic)("emitUInstr: unimplemented opcode");
+   }
+
+}
+
+
+/* Emit x86 for the ucode in cb, returning the address of the
+   generated code and setting *nbytes to its size. */
+UChar* VG_(emit_code) ( UCodeBlock* cb, Int* nbytes )
+{
+   Int i;
+   emitted_code_used = 0;
+   emitted_code_size = 500; /* reasonable initial size */
+   emitted_code = VG_(jitmalloc)(emitted_code_size);
+
+   if (dis) VG_(printf)("Generated code:\n");
+
+   for (i = 0; i < cb->used; i++) {
+      if (cb->instrs[i].opcode != NOP) {
+         UInstr* u = &cb->instrs[i];
+#        if 1
+         /* Check on the sanity of this insn. */
+         Bool sane = VG_(saneUInstr)( False, u );
+         if (!sane) {
+            VG_(printf)("\ninsane instruction\n");
+            VG_(ppUInstr)( i, u );
+	 }
+         vg_assert(sane);
+#        endif
+#        if 0
+         /* Pass args to TAG1/TAG2 to vg_DebugFn for sanity checking.
+            Requires a suitable definition of vg_DebugFn. */
+	 if (u->opcode == TAG1) {
+            UInstr t1;
+            vg_assert(u->tag1 == RealReg);
+            VG_(emptyUInstr)( &t1 );
+            t1.opcode = TAG2;
+            t1.tag1 = t1.tag2 = RealReg;
+            t1.val1 = t1.val2 = u->val1;
+            t1.tag3 = Lit16;
+            t1.val3 = VgT_DebugFn;
+            emitUInstr( i, &t1 );
+	 }
+	 if (u->opcode == TAG2) {
+            UInstr t1;
+            vg_assert(u->tag1 == RealReg);
+            vg_assert(u->tag2 == RealReg);
+            VG_(emptyUInstr)( &t1 );
+            t1.opcode = TAG2;
+            t1.tag1 = t1.tag2 = RealReg;
+            t1.val1 = t1.val2 = u->val1;
+            t1.tag3 = Lit16;
+            t1.val3 = VgT_DebugFn;
+            if (u->val3 == VgT_UifU1 || u->val3 == VgT_UifU2 
+                || u->val3 == VgT_UifU4 || u->val3 == VgT_DifD1 
+                || u->val3 == VgT_DifD2 || u->val3 == VgT_DifD4)
+               emitUInstr( i, &t1 );
+            t1.val1 = t1.val2 = u->val2;
+            emitUInstr( i, &t1 );
+	 }
+#        endif
+         emitUInstr( i, u );
+      }
+   }
+
+   /* Returns a pointer to the emitted code.  This will have to be
+      copied by the caller into the translation cache, and then freed
+      using VG_(jitfree). */
+   *nbytes = emitted_code_used;
+   return emitted_code;
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                          vg_from_ucode.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_helpers.S b/coregrind/vg_helpers.S
new file mode 100644
index 000000000..781175d49
--- /dev/null
+++ b/coregrind/vg_helpers.S
@@ -0,0 +1,625 @@
+
+##--------------------------------------------------------------------##
+##--- Support routines for the JITter output.                      ---##
+##---                                                 vg_helpers.S ---##
+##--------------------------------------------------------------------##
+
+/*
+  This file is part of Valgrind, an x86 protected-mode emulator 
+  designed for debugging and profiling binaries on x86-Unixes.
+
+  Copyright (C) 2000-2002 Julian Seward 
+     jseward@acm.org
+     Julian_Seward@muraroa.demon.co.uk
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_constants.h"
+
+
+/* Various helper routines, for instructions which are just too
+   darn tedious for the JITter to output code in-line:
+	
+	* integer division
+	* integer multiplication
+        * setting and getting obscure eflags
+	* double-length shifts
+	
+   All routines use a standard calling convention designed for
+   calling from translations, in which the incoming args are
+   underneath the return address, the callee saves _all_ registers,
+   and the incoming parameters can be modified, to return results.
+*/
+
+
+/*
+   On entry:
+	%ECX value
+	%EBX value
+	%EAX value -- also the result
+	RA   <- %esp  -- after pushal+pushfl is 36(%esp)
+*/
+.global VG_(helper_do_client_request)
+VG_(helper_do_client_request):
+	pushal
+	pushfl
+	
+	movl	48(%esp), %eax
+	pushl	%eax
+	movl	48(%esp), %eax
+	pushl	%eax
+	movl	48(%esp), %eax
+	pushl	%eax
+
+	call	VG_(handle_client_request)
+	movl	%eax, 52(%esp)
+
+	addl	$12, %esp
+	
+	popfl
+	popal
+	ret
+
+
+.global VG_(helper_do_syscall)
+VG_(helper_do_syscall):
+	pushal
+	call	VG_(wrap_syscall)
+	popal
+#	movl	$VG_(baseBlock), %ebp
+	ret
+
+
+	
+.global VG_(helper_value_check0_fail)
+VG_(helper_value_check0_fail):
+	pushal
+	call	VG_(helperc_value_check0_fail)
+	popal
+	ret
+
+.global VG_(helper_value_check1_fail)
+VG_(helper_value_check1_fail):
+	pushal
+	call	VG_(helperc_value_check1_fail)
+	popal
+	ret
+
+.global VG_(helper_value_check2_fail)
+VG_(helper_value_check2_fail):
+	pushal
+	call	VG_(helperc_value_check2_fail)
+	popal
+	ret
+
+.global VG_(helper_value_check4_fail)
+VG_(helper_value_check4_fail):
+	pushal
+	call	VG_(helperc_value_check4_fail)
+	popal
+	ret
+
+
+/* Set things up so the dispatch loop exits normally.  Used when it is
+   detected that the program wants to finish, ie it has called
+   vg_shutdown. 
+*/
+.global VG_(helper_request_normal_exit)
+VG_(helper_request_normal_exit):
+	pushl	%eax
+	movl	VG_(dispatch_ctr), %eax
+	movl	%eax, VG_(dispatch_ctr_SAVED)
+	movl	$1, VG_(dispatch_ctr)
+	movl	$VG_Y_EXIT, VG_(interrupt_reason)
+	popl	%eax
+	ret
+
+
+/* Do a original-code-write check for the address in %ebp. */
+.global VG_(helper_smc_check4)
+VG_(helper_smc_check4):
+#if VG_SMC_FASTCHECK_IN_C
+
+	# save the live regs
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	pushl	%esi
+	pushl	%edi
+	
+	pushl	%ebp
+	call	VG_(smc_check4)
+	addl	$4, %esp
+
+	popl	%edi
+	popl	%esi
+	popl	%edx
+	popl	%ecx
+	popl	%ebx
+	popl	%eax
+	
+	ret
+#else	
+	incl	VG_(smc_total_check4s)
+	pushl	%ebp
+	shrl	$VG_SMC_CACHE_SHIFT, %ebp
+	andl	$VG_SMC_CACHE_MASK, %ebp
+	cmpb	$0, VG_(smc_cache)(%ebp)
+	jnz	vg_smc_cache_failure
+	addl	$4, %esp
+	ret
+      vg_smc_cache_failure:
+	popl	%ebp
+	pushal
+	pushl	%ebp
+	call	VG_(smc_check4)
+	addl	$4, %esp
+	popal
+	ret
+#endif
+
+	
+/* Fetch the time-stamp-ctr reg.
+   On entry:
+	dummy, replaced by %EAX value
+	dummy, replaced by %EDX value
+	RA   <- %esp
+*/
+.global VG_(helper_RDTSC)
+VG_(helper_RDTSC):
+	pushl	%eax
+	pushl	%edx
+	rdtsc
+	movl	%edx, 12(%esp)
+	movl	%eax, 16(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+
+
+/* Do the CPUID instruction.
+   On entry:
+	dummy, replaced by %EAX value
+	dummy, replaced by %EBX value
+	dummy, replaced by %ECX value
+	dummy, replaced by %EDX value
+	RA   <- %esp
+
+   As emulating a real CPUID is kinda hard, as it
+   has to return different values depending on EAX, 
+   we just pretend to not support CPUID at all until
+   it becomes a problem. This will for sure disable
+   all MMX / 3dnow checks so they don't bother us
+   with code we don't understand. (Dirk <dirk@kde.org>)
+   
+   http://www.sandpile.org/ia32/cpuid.htm
+
+   (Later: we instead pretend to be like Werner's P54C P133, that is
+    an original pre-MMX Pentium).
+   <werner> cpuid words (0): 0x1 0x756e6547 0x6c65746e 0x49656e69
+   <werner> cpuid words (1): 0x52b 0x0 0x0 0x1bf
+*/
+.global VG_(helper_CPUID)
+VG_(helper_CPUID):
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	32(%esp), %eax
+/*
+	cpuid
+*/
+/*
+        xor     %eax,%eax
+        xor     %ebx,%ebx
+        xor     %ecx,%ecx
+        xor     %edx,%edx
+*/
+	cmpl	$0, %eax
+	jz	cpuid__0
+	movl	$0x52b, %eax
+	movl	$0x0,   %ebx
+	movl	$0x0,   %ecx
+	movl	$0x1bf, %edx
+	jmp	cpuid__99
+cpuid__0:
+	movl	$0x1,        %eax
+	movl	$0x756e6547, %ebx
+	movl	$0x6c65746e, %ecx
+	movl	$0x49656e69, %edx
+cpuid__99:
+		
+	movl	%edx, 20(%esp)
+	movl	%ecx, 24(%esp)
+	movl	%ebx, 28(%esp)
+	movl	%eax, 32(%esp)
+	popl	%edx
+	popl	%ecx
+	popl	%ebx
+	popl	%eax
+	ret
+
+
+/* Fetch the FPU status register.
+   On entry:
+	dummy, replaced by result
+	RA   <- %esp
+*/
+.global VG_(helper_fstsw_AX)
+VG_(helper_fstsw_AX):
+	pushl	%eax
+	pushl	%esi
+	movl	VGOFF_(m_fpustate), %esi
+	frstor	(%ebp, %esi, 4)
+	fstsw	%ax
+	popl	%esi
+	movw	%ax, 8(%esp)
+	popl	%eax
+	ret
+
+
+/* Copy %ah into %eflags.
+   On entry:
+	value of %eax
+	RA   <- %esp
+*/
+.global VG_(helper_SAHF)
+VG_(helper_SAHF):
+	pushl	%eax
+	movl	8(%esp), %eax
+	sahf
+	popl	%eax
+	ret
+
+
+/* Bit scan forwards/reverse.  Sets flags (??).
+   On entry:
+	value, replaced by result
+	RA   <- %esp
+*/
+.global VG_(helper_bsr)
+VG_(helper_bsr):
+	pushl	%eax
+	bsrl	8(%esp), %eax
+	movl	%eax, 8(%esp)
+	popl	%eax
+	ret
+
+.global VG_(helper_bsf)
+VG_(helper_bsf):
+	pushl	%eax
+	bsfl	8(%esp), %eax
+	movl	%eax, 8(%esp)
+	popl	%eax
+	ret
+
+
+/* Bit test and set/reset/complement.  Sets flags.
+   On entry:
+	src
+	dst
+	RA   <- %esp
+*/
+.global VG_(helper_bt)
+VG_(helper_bt):
+	pushl	%eax
+	movl	12(%esp), %eax
+	btl	%eax, 8(%esp)
+	popl	%eax
+	ret
+.global VG_(helper_bts)
+VG_(helper_bts):
+	pushl	%eax
+	movl	12(%esp), %eax
+	btsl	%eax, 8(%esp)
+	popl	%eax
+	ret
+.global VG_(helper_btr)
+VG_(helper_btr):
+	pushl	%eax
+	movl	12(%esp), %eax
+	btrl	%eax, 8(%esp)
+	popl	%eax
+	ret
+.global VG_(helper_btc)
+VG_(helper_btc):
+	pushl	%eax
+	movl	12(%esp), %eax
+	btcl	%eax, 8(%esp)
+	popl	%eax
+	ret
+	
+	
+/* 32-bit double-length shift left/right.
+   On entry:
+	amount
+	src
+	dst
+	RA   <- %esp
+*/
+.global VG_(helper_shldl)
+VG_(helper_shldl):
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+
+	movb	24(%esp), %cl
+	movl	20(%esp), %ebx
+	movl	16(%esp), %eax
+	shldl	%cl, %ebx, %eax
+	movl	%eax, 16(%esp)
+	
+	popl	%ecx
+	popl	%ebx
+	popl	%eax
+	ret
+
+.global VG_(helper_shldw)
+VG_(helper_shldw):
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+
+	movb	24(%esp), %cl
+	movw	20(%esp), %bx
+	movw	16(%esp), %ax
+	shldw	%cl, %bx, %ax
+	movw	%ax, 16(%esp)
+	
+	popl	%ecx
+	popl	%ebx
+	popl	%eax
+	ret
+
+.global VG_(helper_shrdl)
+VG_(helper_shrdl):
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+
+	movb	24(%esp), %cl
+	movl	20(%esp), %ebx
+	movl	16(%esp), %eax
+	shrdl	%cl, %ebx, %eax
+	movl	%eax, 16(%esp)
+	
+	popl	%ecx
+	popl	%ebx
+	popl	%eax
+	ret
+
+.global VG_(helper_shrdw)
+VG_(helper_shrdw):
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+
+	movb	24(%esp), %cl
+	movw	20(%esp), %bx
+	movw	16(%esp), %ax
+	shrdw	%cl, %bx, %ax
+	movw	%ax, 16(%esp)
+	
+	popl	%ecx
+	popl	%ebx
+	popl	%eax
+	ret
+
+	
+/* Get the direction flag, and return either 1 or -1. */
+.global VG_(helper_get_dirflag)
+VG_(helper_get_dirflag):
+	pushfl
+	pushl	%eax
+
+	pushfl
+	popl	%eax
+	shrl	$10, %eax
+	andl	$1, %eax
+	jnz	L1
+	movl	$1, %eax
+	jmp	L2
+L1:	movl	$-1, %eax
+L2:	movl	%eax, 12(%esp)
+
+	popl %eax
+	popfl
+	ret
+
+
+/* Clear/set the direction flag. */
+.global VG_(helper_CLD)
+VG_(helper_CLD):
+	cld
+	ret
+
+.global VG_(helper_STD)
+VG_(helper_STD):
+	std
+	ret
+
+
+
+/* Signed 32-to-64 multiply. */
+.globl VG_(helper_imul_32_64)
+VG_(helper_imul_32_64):
+	pushl	%eax
+	pushl	%edx
+	movl	16(%esp), %eax
+	imull	12(%esp)
+	movl	%eax, 16(%esp)
+	movl	%edx, 12(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+	
+/* Signed 16-to-32 multiply. */
+.globl VG_(helper_imul_16_32)
+VG_(helper_imul_16_32):
+	pushl	%eax
+	pushl	%edx
+	movw	16(%esp), %ax
+	imulw	12(%esp)
+	movw	%ax, 16(%esp)
+	movw	%dx, 12(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+	
+/* Signed 8-to-16 multiply. */
+.globl VG_(helper_imul_8_16)
+VG_(helper_imul_8_16):
+	pushl	%eax
+	pushl	%edx
+	movb	16(%esp), %al
+	imulb	12(%esp)
+	movw	%ax, 16(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+	
+
+	
+	
+	
+	
+/* Unsigned 32-to-64 multiply. */
+.globl VG_(helper_mul_32_64)
+VG_(helper_mul_32_64):
+	pushl	%eax
+	pushl	%edx
+	movl	16(%esp), %eax
+	mull	12(%esp)
+	movl	%eax, 16(%esp)
+	movl	%edx, 12(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+	
+/* Unsigned 16-to-32 multiply. */
+.globl VG_(helper_mul_16_32)
+VG_(helper_mul_16_32):
+	pushl	%eax
+	pushl	%edx
+	movw	16(%esp), %ax
+	mulw	12(%esp)
+	movw	%ax, 16(%esp)
+	movw	%dx, 12(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+	
+/* Unsigned 8-to-16 multiply. */
+.globl VG_(helper_mul_8_16)
+VG_(helper_mul_8_16):
+	pushl	%eax
+	pushl	%edx
+	movb	16(%esp), %al
+	mulb	12(%esp)
+	movw	%ax, 16(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+	
+	
+	
+		
+/* Unsigned 64-into-32 divide. */
+.globl	VG_(helper_div_64_32)
+VG_(helper_div_64_32):
+	pushl	%eax
+	pushl	%edx
+	movl	16(%esp),%eax
+	movl	12(%esp),%edx
+	divl	20(%esp)
+	movl	%eax,16(%esp)
+	movl	%edx,12(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+
+/* Signed 64-into-32 divide. */
+.globl	VG_(helper_idiv_64_32)
+VG_(helper_idiv_64_32):
+	pushl	%eax
+	pushl	%edx
+	movl	16(%esp),%eax
+	movl	12(%esp),%edx
+	idivl	20(%esp)
+	movl	%eax,16(%esp)
+	movl	%edx,12(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+
+/* Unsigned 32-into-16 divide. */
+.globl	VG_(helper_div_32_16)
+VG_(helper_div_32_16):
+	pushl	%eax
+	pushl	%edx
+	movw	16(%esp),%ax
+	movw	12(%esp),%dx
+	divw	20(%esp)
+	movw	%ax,16(%esp)
+	movw	%dx,12(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+
+/* Signed 32-into-16 divide. */
+.globl	VG_(helper_idiv_32_16)
+VG_(helper_idiv_32_16):
+	pushl	%eax
+	pushl	%edx
+	movw	16(%esp),%ax
+	movw	12(%esp),%dx
+	idivw	20(%esp)
+	movw	%ax,16(%esp)
+	movw	%dx,12(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+
+/* Unsigned 16-into-8 divide. */
+.globl	VG_(helper_div_16_8)
+VG_(helper_div_16_8):
+	pushl	%eax
+	movw	12(%esp),%ax
+	divb	16(%esp)
+	movb	%ah,12(%esp)
+	movb	%al,8(%esp)
+	popl	%eax
+	ret
+
+/* Signed 16-into-8 divide. */
+.globl	VG_(helper_idiv_16_8)
+VG_(helper_idiv_16_8):
+	pushl	%eax
+	movw	12(%esp),%ax
+	idivb	16(%esp)
+	movb	%ah,12(%esp)
+	movb	%al,8(%esp)
+	popl	%eax
+	ret
+
+		
+##--------------------------------------------------------------------##
+##--- end                                             vg_helpers.S ---##
+##--------------------------------------------------------------------##
diff --git a/coregrind/vg_include.h b/coregrind/vg_include.h
new file mode 100644
index 000000000..83d6eae01
--- /dev/null
+++ b/coregrind/vg_include.h
@@ -0,0 +1,1452 @@
+
+/*--------------------------------------------------------------------*/
+/*--- A header file for all parts of Valgrind.                     ---*/
+/*--- Include no other!                                            ---*/
+/*---                                                 vg_include.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#ifndef __VG_INCLUDE_H
+#define __VG_INCLUDE_H
+
+
+#include <stdarg.h>       /* ANSI varargs stuff  */
+#include <setjmp.h>       /* for jmp_buf         */
+
+
+/* ---------------------------------------------------------------------
+   Build options and table sizes.  You should be able to change these
+   options or sizes, recompile, and still have a working system.
+   ------------------------------------------------------------------ */
+
+#include "vg_constants.h"
+
+
+/* Set to 1 to enable time profiling.  Since this uses SIGPROF, we
+   don't want this permanently enabled -- only for profiling
+   builds. */
+#if 0
+#  define VG_PROFILE
+#endif
+
+
+/* Total number of integer registers available for allocation.  That's
+   all of them except %esp, %edi and %ebp.  %edi is a general spare
+   temporary.  %ebp permanently points at VG_(baseBlock).  Note that
+   it's important that this tie in with what rankToRealRegNo() says.
+   DO NOT CHANGE THIS VALUE FROM 5. !  */
+#define VG_MAX_REALREGS 5
+
+/* Total number of spill slots available for allocation, if a TempReg
+   doesn't make it into a RealReg.  Just bomb the entire system if
+   this value is too small; we don't expect it will ever get
+   particularly high. */
+#define VG_MAX_SPILLSLOTS 24
+
+
+/* Constants for the slow translation lookup cache. */
+#define VG_TRANSTAB_SLOW_BITS 11
+#define VG_TRANSTAB_SLOW_SIZE (1 << VG_TRANSTAB_SLOW_BITS)
+#define VG_TRANSTAB_SLOW_MASK ((VG_TRANSTAB_SLOW_SIZE) - 1)
+
+/* Size of a buffer used for creating messages. */
+#define M_VG_MSGBUF 10000
+
+/* Size of a smallish table used to read /proc/self/map entries. */
+#define M_PROCMAP_BUF 20000
+
+/* Max length of pathname to a .so/executable file. */
+#define M_VG_LIBNAMESTR 100
+
+/* Max length of a text fragment used to construct error messages. */
+#define M_VG_ERRTXT 512
+
+/* Max length of the string copied from env var VG_ARGS at startup. */
+#define M_VG_CMDLINE_STRLEN 1000
+
+/* Max number of options for Valgrind which we can handle. */
+#define M_VG_CMDLINE_OPTS 100
+
+/* After this many different unsuppressed errors have been observed,
+   be more conservative about collecting new ones. */
+#define M_VG_COLLECT_ERRORS_SLOWLY_AFTER 50
+
+/* After this many different unsuppressed errors have been observed,
+   stop collecting errors at all, and tell the user their program is
+   evidently a steaming pile of camel dung. */
+#define M_VG_COLLECT_NO_ERRORS_AFTER 500
+
+/* These many bytes below %ESP are considered addressible if we're
+   doing the --workaround-gcc296-bugs hack. */
+#define VG_GCC296_BUG_STACK_SLOP 256
+
+/* The maximum number of calls we're prepared to save in a
+   backtrace. */
+#define VG_DEEPEST_BACKTRACE 50
+
+/* Number of lists in which we keep track of malloc'd but not free'd
+   blocks.  Should be prime. */
+#define VG_N_MALLOCLISTS 997
+
+/* Number of lists in which we keep track of ExeContexts.  Should be
+   prime. */
+#define VG_N_EC_LISTS /*997*/ 4999
+
+
+/* ---------------------------------------------------------------------
+   Basic types
+   ------------------------------------------------------------------ */
+
+typedef unsigned char          UChar;
+typedef unsigned short         UShort;
+typedef unsigned int           UInt;
+typedef unsigned long long int ULong;
+
+typedef signed char          Char;
+typedef signed short         Short;
+typedef signed int           Int;
+typedef signed long long int Long;
+
+typedef unsigned int Addr;
+
+typedef unsigned char Bool;
+#define False ((Bool)0)
+#define True ((Bool)1)
+
+#define mycat_wrk(aaa,bbb) aaa##bbb
+#define mycat(aaa,bbb) mycat_wrk(aaa,bbb)
+
+/* Just pray that gcc's constant folding works properly ... */
+#define BITS(bit7,bit6,bit5,bit4,bit3,bit2,bit1,bit0)               \
+   ( ((bit7) << 7) | ((bit6) << 6) | ((bit5) << 5) | ((bit4) << 4)  \
+     | ((bit3) << 3) | ((bit2) << 2) | ((bit1) << 1) | (bit0))
+
+
+/* ---------------------------------------------------------------------
+   Now the basic types are set up, we can haul in the kernel-interface
+   definitions.
+   ------------------------------------------------------------------ */
+
+#include "./vg_kerneliface.h"
+
+
+/* ---------------------------------------------------------------------
+   Command-line-settable options
+   ------------------------------------------------------------------ */
+
+#define VG_CLO_SMC_NONE 0
+#define VG_CLO_SMC_SOME 1
+#define VG_CLO_SMC_ALL  2
+
+#define VG_CLO_MAX_SFILES 10
+
+/* Enquire about whether to attach to GDB at errors?   default: NO */
+extern Bool  VG_(clo_GDB_attach);
+/* Sanity-check level: 0 = none, 1 (default), > 1 = expensive. */
+extern Int   VG_(sanity_level);
+/* Verbosity level: 0 = silent, 1 (default), > 1 = more verbose. */
+extern Int   VG_(clo_verbosity);
+/* Automatically attempt to demangle C++ names?  default: YES */
+extern Bool  VG_(clo_demangle);
+/* Do leak check at exit?  default: NO */
+extern Bool  VG_(clo_leak_check);
+/* In leak check, show reachable-but-not-freed blocks?  default: NO */
+extern Bool  VG_(clo_show_reachable);
+/* How closely should we compare ExeContexts in leak records? default: 2 */
+extern Int   VG_(clo_leak_resolution);
+/* Round malloc sizes upwards to integral number of words? default:
+   NO */
+extern Bool  VG_(clo_sloppy_malloc);
+/* Allow loads from partially-valid addresses?  default: YES */
+extern Bool  VG_(clo_partial_loads_ok);
+/* Simulate child processes? default: NO */
+extern Bool  VG_(clo_trace_children);
+/* The file id on which we send all messages.  default: 2 (stderr). */
+extern Int   VG_(clo_logfile_fd);
+/* Max volume of the freed blocks queue. */
+extern Int   VG_(clo_freelist_vol);
+/* Assume accesses immediately below %esp are due to gcc-2.96 bugs.
+   default: NO */
+extern Bool  VG_(clo_workaround_gcc296_bugs);
+
+/* The number of suppression files specified. */
+extern Int   VG_(clo_n_suppressions);
+/* The names of the suppression files. */
+extern Char* VG_(clo_suppressions)[VG_CLO_MAX_SFILES];
+
+/* Single stepping?  default: NO */
+extern Bool  VG_(clo_single_step);
+/* Code improvement?  default: YES */
+extern Bool  VG_(clo_optimise);
+/* Memory-check instrumentation?  default: YES */
+extern Bool  VG_(clo_instrument);
+/* DEBUG: clean up instrumented code?  default: YES */
+extern Bool  VG_(clo_cleanup);
+/* Handle client memory-range-permissions-setting requests?  default: NO */
+extern Bool  VG_(clo_client_perms);
+/* SMC write checks?  default: SOME (1,2,4 byte movs to mem) */
+extern Int   VG_(clo_smc_check);
+/* DEBUG: print system calls?  default: NO */
+extern Bool  VG_(clo_trace_syscalls);
+/* DEBUG: print signal details?  default: NO */
+extern Bool  VG_(clo_trace_signals);
+/* DEBUG: print symtab details?  default: NO */
+extern Bool  VG_(clo_trace_symtab);
+/* DEBUG: print malloc details?  default: NO */
+extern Bool  VG_(clo_trace_malloc);
+/* Stop after this many basic blocks.  default: Infinity. */
+extern ULong VG_(clo_stop_after);
+/* Display gory details for the k'th most popular error.  default:
+   Infinity. */
+extern Int   VG_(clo_dump_error);
+/* Number of parents of a backtrace.  Default: 8.  */
+extern Int   VG_(clo_backtrace_size);
+
+
+/* ---------------------------------------------------------------------
+   Debugging and profiling stuff
+   ------------------------------------------------------------------ */
+
+/* No, really.  I _am_ that strange. */
+#define OINK(nnn) VG_(message)(Vg_DebugMsg, "OINK %d",nnn)
+
+/* Tools for building messages from multiple parts. */
+typedef
+   enum { Vg_UserMsg, Vg_DebugMsg, Vg_DebugExtraMsg }
+   VgMsgKind;
+
+extern void VG_(start_msg)  ( VgMsgKind kind );
+extern void VG_(add_to_msg) ( Char* format, ... );
+extern void VG_(end_msg)    ( void );
+
+/* Send a simple, single-part message. */
+extern void VG_(message)    ( VgMsgKind kind, Char* format, ... );
+
+/* Create a logfile into which messages can be dumped. */
+extern void VG_(startup_logging) ( void );
+extern void VG_(shutdown_logging) ( void );
+
+
+/* Profiling stuff */
+#ifdef VG_PROFILE
+
+#define VGP_M_STACK 10
+
+#define VGP_M_CCS 20  /* == the # of elems in VGP_LIST */
+#define VGP_LIST \
+   VGP_PAIR(VgpRun=0,      "running"),                \
+   VGP_PAIR(VgpMalloc,     "low-lev malloc/free"),    \
+   VGP_PAIR(VgpCliMalloc,  "client  malloc/free"),    \
+   VGP_PAIR(VgpTranslate,  "translate-main"),         \
+   VGP_PAIR(VgpToUCode,    "to-ucode"),               \
+   VGP_PAIR(VgpFromUcode,  "from-ucode"),             \
+   VGP_PAIR(VgpImprove,    "improve"),                \
+   VGP_PAIR(VgpInstrument, "instrument"),             \
+   VGP_PAIR(VgpCleanup,    "cleanup"),                \
+   VGP_PAIR(VgpRegAlloc,   "reg-alloc"),              \
+   VGP_PAIR(VgpDoLRU,      "do-lru"),                 \
+   VGP_PAIR(VgpSlowFindT,  "slow-search-transtab"),   \
+   VGP_PAIR(VgpInitAudit,  "init-mem-audit"),         \
+   VGP_PAIR(VgpExeContext, "exe-context"),            \
+   VGP_PAIR(VgpReadSyms,   "read-syms"),              \
+   VGP_PAIR(VgpAddToT,     "add-to-transtab"),        \
+   VGP_PAIR(VgpSARP,       "set-addr-range-perms"),   \
+   VGP_PAIR(VgpSyscall,    "syscall wrapper"),        \
+   VGP_PAIR(VgpSpare1,     "spare 1"),                \
+   VGP_PAIR(VgpSpare2,     "spare 2")
+
+#define VGP_PAIR(enumname,str) enumname
+typedef enum { VGP_LIST } VgpCC;
+#undef VGP_PAIR
+
+extern void VGP_(init_profiling) ( void );
+extern void VGP_(done_profiling) ( void );
+extern void VGP_(pushcc) ( VgpCC );
+extern void VGP_(popcc) ( void );
+
+#define VGP_PUSHCC(cc) VGP_(pushcc)(cc)
+#define VGP_POPCC      VGP_(popcc)()
+
+#else
+
+#define VGP_PUSHCC(cc) /* */
+#define VGP_POPCC      /* */
+
+#endif /* VG_PROFILE */
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_malloc2.c
+   ------------------------------------------------------------------ */
+
+/* Allocation arenas.  
+      SYMTAB    is for Valgrind's symbol table storage.
+      CLIENT    is for the client's mallocs/frees.
+      DEMANGLE  is for the C++ demangler.
+      EXECTXT   is for storing ExeContexts.
+      ERRCTXT   is for storing ErrContexts.
+      PRIVATE   is for Valgrind general stuff.
+      TRANSIENT is for very short-term use.  It should be empty
+                in between uses.
+   When adding a new arena, remember also to add it
+   to ensure_mm_init(). 
+*/
+typedef Int ArenaId;
+
+#define VG_N_ARENAS 7
+
+#define VG_AR_PRIVATE   0    /* :: ArenaId */
+#define VG_AR_SYMTAB    1    /* :: ArenaId */
+#define VG_AR_CLIENT    2    /* :: ArenaId */
+#define VG_AR_DEMANGLE  3    /* :: ArenaId */
+#define VG_AR_EXECTXT   4    /* :: ArenaId */
+#define VG_AR_ERRCTXT   5    /* :: ArenaId */
+#define VG_AR_TRANSIENT 6    /* :: ArenaId */
+
+extern void* VG_(malloc)  ( ArenaId arena, Int nbytes );
+extern void  VG_(free)    ( ArenaId arena, void* ptr );
+extern void* VG_(calloc)  ( ArenaId arena, Int nmemb, Int nbytes );
+extern void* VG_(realloc) ( ArenaId arena, void* ptr, Int size );
+extern void* VG_(malloc_aligned) ( ArenaId aid, Int req_alignB, 
+                                                Int req_pszB );
+
+extern void  VG_(mallocSanityCheckArena) ( ArenaId arena );
+extern void  VG_(mallocSanityCheckAll)   ( void );
+
+extern void  VG_(show_all_arena_stats) ( void );
+extern Bool  VG_(is_empty_arena) ( ArenaId aid );
+
+
+/* The red-zone size for the client.  This can be arbitrary, but
+   unfortunately must be set at compile time. */
+#define VG_AR_CLIENT_REDZONE_SZW 4
+
+#define VG_AR_CLIENT_REDZONE_SZB \
+   (VG_AR_CLIENT_REDZONE_SZW * VKI_BYTES_PER_WORD)
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_signals.c
+   ------------------------------------------------------------------ */
+
+/* The maximum number of basic blocks that we're prepared to run in a
+   signal handler which is called when the client is stuck in a
+   blocking system call.  The purpose of this is to check that such a
+   signal handler doesn't merely do a longjmp() and keep going
+   forever; it should return instead.  NOTE that this doesn't apply to
+   signals delivered under normal conditions, only when they are
+   delivered and the client is already blocked in a system call. */
+#define VG_MAX_BBS_IN_IMMEDIATE_SIGNAL 50000
+
+extern void VG_(sigstartup_actions) ( void );
+
+extern void VG_(deliver_signals) ( void );
+extern void VG_(unblock_host_signal) ( Int sigNo );
+
+
+/* Fake system calls for signal handling. */
+extern void VG_(do__NR_sigaction)     ( void );
+extern void VG_(do__NR_sigprocmask)   ( Int how, vki_ksigset_t* set );
+
+
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_mylibc.c
+   ------------------------------------------------------------------ */
+
+
+#define NULL ((void*)0)
+
+extern void VG_(exit)( Int status )
+            __attribute__ ((__noreturn__));
+
+extern void VG_(printf) ( const char *format, ... );
+/* too noisy ...  __attribute__ ((format (printf, 1, 2))) ; */
+
+extern void VG_(sprintf) ( Char* buf, Char *format, ... );
+
+extern void VG_(vprintf) ( void(*send)(Char), 
+                          const Char *format, va_list vargs );
+
+extern Bool VG_(isspace) ( Char c );
+
+extern Int VG_(strlen) ( const Char* str );
+
+extern Long VG_(atoll) ( Char* str );
+
+extern Char* VG_(strcat) ( Char* dest, const Char* src );
+extern Char* VG_(strncat) ( Char* dest, const Char* src, Int n );
+extern Char* VG_(strpbrk) ( const Char* s, const Char* accept );
+
+extern Char* VG_(strcpy) ( Char* dest, const Char* src );
+
+extern Int VG_(strcmp)    ( const Char* s1, const Char* s2 );
+extern Int VG_(strcmp_ws) ( const Char* s1, const Char* s2 );
+
+extern Int VG_(strncmp)    ( const Char* s1, const Char* s2, Int nmax );
+extern Int VG_(strncmp_ws) ( const Char* s1, const Char* s2, Int nmax );
+
+extern Char* VG_(strstr) ( const Char* haystack, Char* needle );
+extern Char* VG_(strchr) ( const Char* s, Char c );
+extern Char* VG_(strdup) ( ArenaId aid, const Char* s);
+
+extern Char* VG_(getenv) ( Char* name );
+extern Int   VG_(getpid) ( void );
+
+
+extern Char VG_(toupper) ( Char c );
+
+extern void VG_(strncpy_safely) ( Char* dest, const Char* src, Int ndest );
+
+extern void VG_(strncpy) ( Char* dest, const Char* src, Int ndest );
+
+extern Bool VG_(stringMatch) ( Char* pat, Char* str );
+
+
+#define __STRING(x)  #x
+
+/* Asserts are permanently enabled.  Hurrah! */
+#define vg_assert(expr)                                               \
+  ((void) ((expr) ? 0 :						      \
+	   (VG_(assert_fail) (__STRING(expr),			      \
+			      __FILE__, __LINE__,                     \
+                              __PRETTY_FUNCTION__), 0)))
+
+extern void VG_(assert_fail) ( Char* expr, Char* file, 
+                               Int line, Char* fn )
+            __attribute__ ((__noreturn__));
+
+/* Later ... extern void vg_restore_SIGABRT ( void ); */
+
+/* Reading files. */
+extern Int  VG_(open_read) ( Char* pathname );
+extern void VG_(close)     ( Int fd );
+extern Int  VG_(read)      ( Int fd, void* buf, Int count);
+extern Int  VG_(write)     ( Int fd, void* buf, Int count);
+
+/* mmap-ery ... */
+extern void* VG_(mmap)( void* start, UInt length, 
+                        UInt prot, UInt flags, UInt fd, UInt offset );
+
+extern Int VG_(munmap)( void* start, Int length );
+
+
+/* Print a (panic) message, and abort. */
+extern void VG_(panic) ( Char* str )
+            __attribute__ ((__noreturn__));
+
+/* Get memory by anonymous mmap. */
+void* VG_(get_memory_from_mmap) ( Int nBytes );
+
+/* Signal stuff.  Note that these use the vk_ (kernel) structure
+   definitions, which are different in places from those that glibc
+   defines.  Since we're operating right at the kernel interface,
+   glibc's view of the world is entirely irrelevant. */
+extern Int VG_(ksigfillset)( vki_ksigset_t* set );
+extern Int VG_(ksigemptyset)( vki_ksigset_t* set );
+extern Int VG_(ksigaddset)( vki_ksigset_t* set, Int signum );
+
+extern Int VG_(ksigprocmask)( Int how, const vki_ksigset_t* set, 
+                                       vki_ksigset_t* oldset );
+extern Int VG_(ksigaction) ( Int signum,  
+                             const vki_ksigaction* act,  
+                             vki_ksigaction* oldact );
+extern Int VG_(ksigismember) ( vki_ksigset_t* set, Int signum );
+
+extern Int VG_(ksignal)(Int signum, void (*sighandler)(Int));
+
+extern Int VG_(ksigaltstack)( const vki_kstack_t* ss, vki_kstack_t* oss );
+
+
+
+/* ---------------------------------------------------------------------
+   Definitions for the JITter (vg_translate.c, vg_to_ucode.c,
+   vg_from_ucode.c).
+   ------------------------------------------------------------------ */
+
+/* Tags which describe what operands are. */
+typedef
+   enum { TempReg=0, ArchReg=1, RealReg=2, 
+          SpillNo=3, Literal=4, Lit16=5, 
+          NoValue=6 }
+   Tag;
+
+
+/* Microinstruction opcodes. */
+typedef
+   enum {
+      NOP,
+      GET,
+      PUT,
+      LOAD,
+      STORE,
+      MOV,
+      CMOV, /* Used for cmpxchg and cmov */
+      WIDEN,
+      JMP,
+
+      /* Read/write the %EFLAGS register into a TempReg. */
+      GETF, PUTF,
+
+      ADD, ADC, AND, OR,  XOR, SUB, SBB,
+      SHL, SHR, SAR, ROL, ROR, RCL, RCR,
+      NOT, NEG, INC, DEC, BSWAP,
+      CC2VAL,
+
+      /* Not strictly needed, but useful for making better
+         translations of address calculations. */
+      LEA1,  /* reg2 := const + reg1 */
+      LEA2,  /* reg3 := const + reg1 + reg2 * 1,2,4 or 8 */
+
+      /* not for translating x86 calls -- only to call helpers */
+      CALLM_S, CALLM_E, /* Mark start and end of push/pop sequences
+                           for CALLM. */
+      PUSH, POP, CLEAR, /* Add/remove/zap args for helpers. */
+      CALLM,  /* call to a machine-code helper */
+
+      /* Hack for translating string (REP-) insns.  Jump to literal if
+         TempReg/RealReg is zero. */
+      JIFZ,
+
+      /* FPU ops which read/write mem or don't touch mem at all. */
+      FPU_R,
+      FPU_W,
+      FPU,
+
+      /* Advance the simulated %eip by some small (< 128) number. */
+      INCEIP,
+
+      /* uinstrs which are not needed for mere translation of x86 code,
+         only for instrumentation of it. */
+      LOADV,
+      STOREV,
+      GETV,
+      PUTV,
+      TESTV,
+      SETV,
+      /* Get/set the v-bit (and it is only one bit) for the simulated
+         %eflags register. */
+      GETVF,
+      PUTVF,
+
+      /* Do a unary or binary tag op.  Only for post-instrumented
+         code.  For TAG1, first and only arg is a TempReg, and is both
+         arg and result reg.  For TAG2, first arg is src, second is
+         dst, in the normal way; both are TempRegs.  In both cases,
+         3rd arg is a RiCHelper with a Lit16 tag.  This indicates
+         which tag op to do. */
+      TAG1,
+      TAG2
+   }
+   Opcode;
+
+
+/* Condition codes, observing the Intel encoding.  CondAlways is an
+   extra. */
+typedef
+   enum {
+      CondO      = 0,  /* overflow           */
+      CondNO     = 1,  /* no overflow        */
+      CondB      = 2,  /* below              */
+      CondNB     = 3,  /* not below          */
+      CondZ      = 4,  /* zero               */
+      CondNZ     = 5,  /* not zero           */
+      CondBE     = 6,  /* below or equal     */
+      CondNBE    = 7,  /* not below or equal */
+      CondS      = 8,  /* negative           */
+      ConsNS     = 9,  /* not negative       */
+      CondP      = 10, /* parity even        */
+      CondNP     = 11, /* not parity even    */
+      CondL      = 12, /* jump less          */
+      CondNL     = 13, /* not less           */
+      CondLE     = 14, /* less or equal      */
+      CondNLE    = 15, /* not less or equal  */
+      CondAlways = 16  /* Jump always        */
+   } 
+   Condcode;
+
+
+/* Flags.  User-level code can only read/write O(verflow), S(ign),
+   Z(ero), A(ux-carry), C(arry), P(arity), and may also write
+   D(irection).  That's a total of 7 flags.  A FlagSet is a bitset,
+   thusly: 
+      76543210
+       DOSZACP
+   and bit 7 must always be zero since it is unused.
+*/
+typedef UChar FlagSet;
+
+#define FlagD (1<<6)
+#define FlagO (1<<5)
+#define FlagS (1<<4)
+#define FlagZ (1<<3)
+#define FlagA (1<<2)
+#define FlagC (1<<1)
+#define FlagP (1<<0)
+
+#define FlagsOSZACP (FlagO | FlagS | FlagZ | FlagA | FlagC | FlagP)
+#define FlagsOSZAP  (FlagO | FlagS | FlagZ | FlagA |         FlagP)
+#define FlagsOSZCP  (FlagO | FlagS | FlagZ |         FlagC | FlagP)
+#define FlagsOSACP  (FlagO | FlagS |         FlagA | FlagC | FlagP)
+#define FlagsSZACP  (        FlagS | FlagZ | FlagA | FlagC | FlagP)
+#define FlagsSZAP   (        FlagS | FlagZ | FlagA |         FlagP)
+#define FlagsOC     (FlagO |                         FlagC        )
+
+#define FlagsALL    (FlagsOSZACP | FlagD)
+#define FlagsEmpty  (FlagSet)0
+
+#define VG_IS_FLAG_SUBSET(set1,set2) \
+   (( ((FlagSet)set1) & ((FlagSet)set2) ) == ((FlagSet)set1) )
+
+#define VG_UNION_FLAG_SETS(set1,set2) \
+   ( ((FlagSet)set1) | ((FlagSet)set2) )
+
+
+
+/* A Micro (u)-instruction. */
+typedef
+   struct {
+      /* word 1 */
+      UInt    lit32;      /* 32-bit literal */
+
+      /* word 2 */
+      UShort  val1;       /* first operand */
+      UShort  val2;       /* second operand */
+
+      /* word 3 */
+      UShort  val3;       /* third operand */
+      UChar   opcode;     /* opcode */
+      UChar   size;       /* data transfer size */
+
+      /* word 4 */
+      FlagSet flags_r;    /* :: FlagSet */
+      FlagSet flags_w;    /* :: FlagSet */
+      UChar   tag1:4;     /* first  operand tag */
+      UChar   tag2:4;     /* second operand tag */
+      UChar   tag3:4;     /* third  operand tag */
+      UChar   extra4b:4;  /* Spare field, used by WIDEN for src
+                             -size, and by LEA2 for scale 
+                             (1,2,4 or 8) */
+
+      /* word 5 */
+      UChar   cond;            /* condition, for jumps */
+      Bool    smc_check:1;     /* do a smc test, if writes memory. */
+      Bool    signed_widen:1;  /* signed or unsigned WIDEN ? */
+      Bool    ret_dispatch:1;  /* Is this jump as a result of RET ? */
+      Bool    call_dispatch:1; /* Is this jump as a result of CALL ? */
+   }
+   UInstr;
+
+
+/* Expandable arrays of uinstrs. */
+typedef 
+   struct { 
+      Int     used; 
+      Int     size; 
+      UInstr* instrs;
+      Int     nextTemp;
+   }
+   UCodeBlock;
+
+/* Refer to `the last instruction stuffed in', including as an
+   lvalue. */
+#define LAST_UINSTR(cb) (cb)->instrs[(cb)->used-1]
+
+/* An invalid temporary number :-) */
+#define INVALID_TEMPREG 999999999
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_demangle.c
+   ------------------------------------------------------------------ */
+
+extern void VG_(demangle) ( Char* orig, Char* result, Int result_size );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_from_ucode.c
+   ------------------------------------------------------------------ */
+
+extern UChar* VG_(emit_code) ( UCodeBlock* cb, Int* nbytes );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_to_ucode.c
+   ------------------------------------------------------------------ */
+
+extern Int   VG_(disBB)          ( UCodeBlock* cb, Addr eip0 );
+extern Char* VG_(nameOfIntReg)   ( Int size, Int reg );
+extern Char  VG_(nameOfIntSize)  ( Int size );
+extern UInt  VG_(extend_s_8to32) ( UInt x );
+extern Int   VG_(getNewTemp)     ( UCodeBlock* cb );
+extern Int   VG_(getNewShadow)   ( UCodeBlock* cb );
+
+#define SHADOW(tempreg)  ((tempreg)+1)
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_translate.c
+   ------------------------------------------------------------------ */
+
+extern void  VG_(translate)  ( Addr  orig_addr,
+                               UInt* orig_size,
+                               Addr* trans_addr,
+                               UInt* trans_size );
+
+extern void  VG_(emptyUInstr) ( UInstr* u );
+extern void  VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz );
+extern void  VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
+                               Tag tag1, UInt val1 );
+extern void  VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
+                               Tag tag1, UInt val1,
+                               Tag tag2, UInt val2 );
+extern void  VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
+                               Tag tag1, UInt val1,
+                               Tag tag2, UInt val2,
+                               Tag tag3, UInt val3 );
+extern void VG_(setFlagRW) ( UInstr* u, 
+                             FlagSet fr, FlagSet fw );
+
+extern void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 );
+extern Bool VG_(anyFlagUse) ( UInstr* u );
+
+
+
+extern void  VG_(ppUInstr)        ( Int instrNo, UInstr* u );
+extern void  VG_(ppUCodeBlock)    ( UCodeBlock* cb, Char* title );
+
+extern Char* VG_(nameCondcode)    ( Condcode cond );
+extern Bool  VG_(saneUInstr)      ( Bool beforeRA, UInstr* u );
+extern Bool  VG_(saneUCodeBlock)  ( UCodeBlock* cb );
+extern Char* VG_(nameUOpcode)     ( Bool upper, Opcode opc );
+extern Int   VG_(rankToRealRegNo) ( Int rank );
+
+extern void* VG_(jitmalloc) ( Int nbytes );
+extern void  VG_(jitfree)   ( void* ptr );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_execontext.c.
+   ------------------------------------------------------------------ */
+
+/* Records the PC and a bit of the call chain.  The first 4 %eip
+   values are used in comparisons do remove duplicate errors, and for
+   comparing against suppression specifications.  The rest are purely
+   informational (but often important). */
+
+typedef
+   struct _ExeContextRec {
+      struct _ExeContextRec * next;
+      /* The size of this array is VG_(clo_backtrace_size); at least
+         2, at most VG_DEEPEST_BACKTRACE.  [0] is the current %eip,
+         [1] is its caller, [2] is the caller of [1], etc. */
+      Addr eips[0];
+   }
+   ExeContext;
+
+
+/* Initialise the ExeContext storage mechanism. */
+extern void VG_(init_ExeContext_storage) ( void );
+
+/* Print stats (informational only). */
+extern void VG_(show_ExeContext_stats) ( void );
+
+
+/* Take a snapshot of the client's stack.  Search our collection of
+   ExeContexts to see if we already have it, and if not, allocate a
+   new one.  Either way, return a pointer to the context. */
+extern ExeContext* VG_(get_ExeContext) ( Bool skip_top_frame );
+
+/* Print an ExeContext. */
+extern void VG_(pp_ExeContext) ( ExeContext* );
+
+/* Compare two ExeContexts, just comparing the top two callers. */
+extern Bool VG_(eq_ExeContext_top2) ( ExeContext* e1, ExeContext* e2 );
+
+/* Compare two ExeContexts, just comparing the top four callers. */
+extern Bool VG_(eq_ExeContext_top4) ( ExeContext* e1, ExeContext* e2 );
+
+/* Compare two ExeContexts, comparing all callers. */
+extern Bool VG_(eq_ExeContext_all) ( ExeContext* e1, ExeContext* e2 );
+
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_errcontext.c.
+   ------------------------------------------------------------------ */
+
+extern void VG_(load_suppressions)    ( void );
+extern void VG_(show_all_errors)      ( void );
+extern void VG_(record_value_error)   ( Int size );
+extern void VG_(record_free_error)    ( Addr a );
+extern void VG_(record_freemismatch_error)    ( Addr a );
+extern void VG_(record_address_error) ( Addr a, Int size, 
+                                        Bool isWrite );
+extern void VG_(record_jump_error) ( Addr a );
+extern void VG_(record_param_err) ( Addr a, 
+                                    Bool isWriteLack, 
+                                    Char* msg );
+extern void VG_(record_user_err) ( Addr a, Bool isWriteLack );
+
+
+/* The classification of a faulting address. */
+typedef 
+   enum { Stack, Unknown, Freed, Mallocd, UserG, UserS }
+   AddrKind;
+
+/* Records info about a faulting address. */
+typedef
+   struct {
+      /* ALL */
+      AddrKind akind;
+      /* Freed, Mallocd */
+      Int blksize;
+      /* Freed, Mallocd */
+      Int rwoffset;
+      /* Freed, Mallocd */
+      ExeContext* lastchange;
+   }
+   AddrInfo;
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_clientperms.c
+   ------------------------------------------------------------------ */
+
+extern Bool VG_(client_perm_maybe_describe)( Addr a, AddrInfo* ai );
+
+extern UInt VG_(handle_client_request) ( UInt code, Addr aa, UInt nn );
+
+extern void VG_(delete_client_stack_blocks_following_ESP_change) ( void );
+
+extern void VG_(show_client_block_stats) ( void );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_procselfmaps.c
+   ------------------------------------------------------------------ */
+
+extern 
+void VG_(read_procselfmaps) (
+   void (*record_mapping)( Addr, UInt, Char, Char, Char, UInt, UChar* )
+);
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_symtab2.c
+   ------------------------------------------------------------------ */
+
+/* We assume the executable is loaded here ... can't really find
+   out.  There is a hacky sanity check in vg_init_memory_audit()
+   which should trip up most stupidities.
+*/
+#define VG_ASSUMED_EXE_BASE  (Addr)0x8048000
+
+extern void VG_(read_symbols) ( void );
+extern void VG_(mini_stack_dump) ( ExeContext* ec );
+extern void VG_(what_obj_and_fun_is_this)
+                                     ( Addr a,
+                                       Char* obj_buf, Int n_obj_buf,
+                                       Char* fun_buf, Int n_fun_buf );
+
+extern void VG_(symtab_notify_munmap) ( Addr start, UInt length );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_clientmalloc.c
+   ------------------------------------------------------------------ */
+
+/* these numbers are not arbitary. if you change them,
+   adjust vg_dispatch.S as well */
+
+typedef
+   enum { 
+      Vg_AllocMalloc = 0,
+      Vg_AllocNew = 1,
+      Vg_AllocNewVec = 2 
+   }
+   VgAllocKind;
+
+/* Description of a malloc'd chunk. */
+typedef 
+   struct _ShadowChunk {
+      struct _ShadowChunk* next;
+      ExeContext*   where;          /* where malloc'd/free'd */
+      UInt          size : 30;      /* size requested.       */
+      VgAllocKind   allockind : 2;  /* which wrapper did the allocation */
+      Addr          data;           /* ptr to actual block.  */
+   } 
+   ShadowChunk;
+
+extern void          VG_(clientmalloc_done) ( void );
+extern void          VG_(describe_addr) ( Addr a, AddrInfo* ai );
+extern ShadowChunk** VG_(get_malloc_shadows) ( /*OUT*/ UInt* n_shadows );
+
+/* This should never be called; if it is, something's seriously
+   wrong. */
+extern UInt VG_(trap_here) ( UInt arg1, UInt arg2, UInt what_to_do );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_main.c
+   ------------------------------------------------------------------ */
+
+/* How big is the saved FPU state? */
+#define VG_SIZE_OF_FPUSTATE 108
+/* ... and in words ... */
+#define VG_SIZE_OF_FPUSTATE_W ((VG_SIZE_OF_FPUSTATE+3)/4)
+
+/* A structure used as an intermediary when passing the simulated
+   CPU's state to some assembly fragments, particularly system calls.
+   Stuff is copied from baseBlock to here, the assembly magic runs,
+   and then the inverse copy is done. */
+
+extern UInt VG_(m_state_static) [8 /* int regs, in Intel order */ 
+                                 + 1 /* %eflags */ 
+                                 + 1 /* %eip */
+                                 + VG_SIZE_OF_FPUSTATE_W /* FPU state */
+                                ];
+
+/* Handy fns for doing the copy back and forth. */
+extern void VG_(copy_baseBlock_to_m_state_static) ( void );
+extern void VG_(copy_m_state_static_to_baseBlock) ( void );
+
+/* Create, and add to TT/TC, the translation of a client basic
+   block. */
+extern void VG_(create_translation_for) ( Addr orig_addr );
+
+/* Called when some unhandleable client behaviour is detected.
+   Prints a msg and aborts. */
+extern void VG_(unimplemented) ( Char* msg );
+
+/* The stack on which Valgrind runs.  We can't use the same stack as the
+   simulatee -- that's an important design decision.  */
+extern UInt VG_(stack)[10000];
+
+/* Similarly, we have to ask for signals to be delivered on an
+   alternative stack, since it is possible, although unlikely, that
+   we'll have to run client code from inside the Valgrind-installed
+   signal handler.  If this happens it will be done by
+   vg_deliver_signal_immediately(). */
+extern UInt VG_(sigstack)[10000];
+
+
+/* vg_oursignalhandler() might longjmp().  Here's the jmp_buf. */
+extern jmp_buf VG_(toploop_jmpbuf);
+/* ... and if so, here's the signal which caused it to do so. */
+extern Int     VG_(longjmpd_on_signal);
+
+/* Holds client's %esp at the point we gained control.  From this the
+   client's argc, argv and envp are deduced. */
+extern Addr   VG_(esp_at_startup);
+extern Int    VG_(client_argc);
+extern Char** VG_(client_argv);
+extern Char** VG_(client_envp);
+
+/* Remove valgrind.so from a LD_PRELOAD=... string so child processes
+   don't get traced into. */
+extern void   VG_(mash_LD_PRELOAD_string)( Char* ld_preload_str );
+
+/* Something of a function looking for a home ... start up GDB.  This
+   is called from VG_(swizzle_esp_then_start_GDB) and so runs on the
+   *client's* stack.  This is necessary to give GDB the illusion that
+   the client program really was running on the real cpu. */
+extern void VG_(start_GDB_whilst_on_client_stack) ( void );
+
+/* Spew out vast amounts of junk during JITting? */
+extern Bool  VG_(disassemble);
+
+/* 64-bit counter for the number of basic blocks done. */
+extern ULong VG_(bbs_done);
+/* 64-bit counter for the number of bbs to go before a debug exit. */
+extern ULong VG_(bbs_to_go);
+
+/* Counts downwards in vg_run_innerloop. */
+extern UInt VG_(dispatch_ctr);
+
+/* If vg_dispatch_ctr is set to 1 to force a stop, its
+   previous value is saved here. */
+extern UInt VG_(dispatch_ctr_SAVED);
+
+/* This is why vg_run_innerloop() exited. */
+extern UInt VG_(interrupt_reason);
+
+/* Is the client running on the simulated CPU or the real one? */
+extern Bool VG_(running_on_simd_CPU); /* Initially False */
+
+/* The current LRU epoch. */
+extern UInt VG_(current_epoch);
+
+
+/* --- Counters, for informational purposes only. --- */
+
+/* Number of lookups which miss the fast tt helper. */
+extern UInt VG_(tt_fast_misses);
+
+/* Counts for LRU informational messages. */
+
+/* Number and total o/t size of new translations this epoch. */
+extern UInt VG_(this_epoch_in_count);
+extern UInt VG_(this_epoch_in_osize);
+extern UInt VG_(this_epoch_in_tsize);
+/* Number and total o/t size of discarded translations this epoch. */
+extern UInt VG_(this_epoch_out_count);
+extern UInt VG_(this_epoch_out_osize);
+extern UInt VG_(this_epoch_out_tsize);
+/* Number and total o/t size of translations overall. */
+extern UInt VG_(overall_in_count);
+extern UInt VG_(overall_in_osize);
+extern UInt VG_(overall_in_tsize);
+/* Number and total o/t size of discards overall. */
+extern UInt VG_(overall_out_count);
+extern UInt VG_(overall_out_osize);
+extern UInt VG_(overall_out_tsize);
+
+/* The number of LRU-clearings of TT/TC. */
+extern UInt VG_(number_of_lrus);
+
+/* Counts pertaining to the register allocator. */
+
+/* total number of uinstrs input to reg-alloc */
+extern UInt VG_(uinstrs_prealloc);
+
+/* total number of uinstrs added due to spill code */
+extern UInt VG_(uinstrs_spill);
+
+/* number of bbs requiring spill code */
+extern UInt VG_(translations_needing_spill);
+
+/* total of register ranks over all translations */
+extern UInt VG_(total_reg_rank);
+
+/* Counts pertaining to the self-modifying-code detection machinery. */
+
+/* Total number of writes checked. */
+//extern UInt VG_(smc_total_check4s);
+
+/* Number of writes which the fast smc check couldn't show were
+   harmless. */
+extern UInt VG_(smc_cache_passed);
+
+/* Numnber of writes which really did write on original code. */
+extern UInt VG_(smc_fancy_passed);
+
+/* Number of translations discarded as a result. */
+//extern UInt VG_(smc_discard_count);
+
+/* Counts pertaining to internal sanity checking. */
+extern UInt VG_(sanity_fast_count);
+extern UInt VG_(sanity_slow_count);
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_memory.c
+   ------------------------------------------------------------------ */
+
+extern void VGM_(init_memory_audit) ( void );
+extern Addr VGM_(curr_dataseg_end);
+extern void VG_(show_reg_tags) ( void );
+extern void VG_(detect_memory_leaks) ( void );
+extern void VG_(done_prof_mem) ( void );
+
+/* Set permissions for an address range.  Not speed-critical. */
+extern void VGM_(make_noaccess) ( Addr a, UInt len );
+extern void VGM_(make_writable) ( Addr a, UInt len );
+extern void VGM_(make_readable) ( Addr a, UInt len );
+/* Use with care! (read: use for shmat only) */
+extern void VGM_(make_readwritable) ( Addr a, UInt len );
+extern void VGM_(copy_address_range_perms) ( Addr src, Addr dst,
+                                             UInt len );
+
+/* Check permissions for an address range.  Not speed-critical. */
+extern Bool VGM_(check_writable) ( Addr a, UInt len, Addr* bad_addr );
+extern Bool VGM_(check_readable) ( Addr a, UInt len, Addr* bad_addr );
+extern Bool VGM_(check_readable_asciiz) ( Addr a, Addr* bad_addr );
+
+/* Sanity checks which may be done at any time.  Doing them at
+   signal-delivery time turns out to be convenient. */
+extern void VG_(do_sanity_checks) ( Bool force_expensive );
+/* Very cheap ... */
+extern Bool VG_(first_and_last_secondaries_look_plausible) ( void );
+
+/* These functions are called from generated code. */
+extern void VG_(helperc_STOREV4) ( UInt, Addr );
+extern void VG_(helperc_STOREV2) ( UInt, Addr );
+extern void VG_(helperc_STOREV1) ( UInt, Addr );
+
+extern UInt VG_(helperc_LOADV1) ( Addr );
+extern UInt VG_(helperc_LOADV2) ( Addr );
+extern UInt VG_(helperc_LOADV4) ( Addr );
+
+extern void VGM_(handle_esp_assignment) ( Addr new_espA );
+extern void VGM_(fpu_write_check) ( Addr addr, Int size );
+extern void VGM_(fpu_read_check)  ( Addr addr, Int size );
+
+/* Safely (avoiding SIGSEGV / SIGBUS) scan the entire valid address
+   space and pass the addresses and values of all addressible,
+   defined, aligned words to notify_word.  This is the basis for the
+   leak detector.  Returns the number of calls made to notify_word.  */
+UInt VG_(scan_all_valid_memory) ( void (*notify_word)( Addr, UInt ) );
+
+/* Is this address within some small distance below %ESP?  Used only
+   for the --workaround-gcc296-bugs kludge. */
+extern Bool VG_(is_just_below_ESP)( Addr aa );
+
+/* Nasty kludgery to deal with applications which switch stacks,
+   like netscape. */
+#define VG_STACK_STARTS_AT      0xC0000000
+#define VG_PLAUSIBLE_STACK_SIZE 8000000
+
+extern Bool VG_(is_plausible_stack_addr) ( Addr );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_syscall_mem.c
+   ------------------------------------------------------------------ */
+
+/* Counts the depth of nested syscalls.  Is used in
+   VG_(deliver_signals) do discover whether or not the client is in a
+   syscall (presumably _blocked_ in a syscall) when a signal is
+   delivered.  If so, the signal delivery mechanism needs to behave
+   differently from normal. */
+extern Int VG_(syscall_depth);
+
+extern void VG_(wrap_syscall) ( void );
+
+extern Bool VG_(is_kerror) ( Int res );
+
+#define KERNEL_DO_SYSCALL(result_lvalue)                 \
+         VG_(copy_baseBlock_to_m_state_static)();        \
+         VG_(do_syscall)();                              \
+         VG_(copy_m_state_static_to_baseBlock)();        \
+         result_lvalue = VG_(baseBlock)[VGOFF_(m_eax)];
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_transtab.c
+   ------------------------------------------------------------------ */
+
+/* An entry in the translation table (TT). */
+typedef
+   struct {
+      /* +0 */  Addr   orig_addr;
+      /* +4 */  Addr   trans_addr;
+      /* +8 */  UInt   mru_epoch;
+      /* +12 */ UShort orig_size;
+      /* +14 */ UShort trans_size;
+   }
+   TTEntry;
+
+/* The number of basic blocks in an epoch (one age-step). */
+#define VG_BBS_PER_EPOCH 20000
+
+extern void VG_(get_tt_tc_used) ( UInt* tt_used, UInt* tc_used );
+extern void VG_(maybe_do_lru_pass) ( void );
+extern void VG_(flush_transtab) ( void );
+extern Addr VG_(copy_to_transcache) ( Addr trans_addr, Int trans_size );
+extern void VG_(add_to_trans_tab) ( TTEntry* tte );
+
+extern void VG_(smc_mark_original) ( Addr original_addr, 
+                                     Int original_len );
+
+extern void VG_(init_transtab_and_SMC) ( void );
+
+extern void VG_(sanity_check_tc_tt) ( void );
+extern Addr VG_(search_transtab) ( Addr original_addr );
+
+extern void VG_(invalidate_tt_fast)( void );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_vtagops.c
+   ------------------------------------------------------------------ */
+
+/* Lists the names of value-tag operations used in instrumented
+   code.  These are the third argument to TAG1 and TAG2 uinsns. */
+
+typedef
+   enum { 
+     /* Unary. */
+     VgT_PCast40, VgT_PCast20, VgT_PCast10,
+     VgT_PCast01, VgT_PCast02, VgT_PCast04,
+
+     VgT_PCast14, VgT_PCast12, VgT_PCast11,
+
+     VgT_Left4, VgT_Left2, VgT_Left1,
+
+     VgT_SWiden14, VgT_SWiden24, VgT_SWiden12,
+     VgT_ZWiden14, VgT_ZWiden24, VgT_ZWiden12,
+
+     /* Binary; 1st is rd; 2nd is rd+wr */
+     VgT_UifU4, VgT_UifU2, VgT_UifU1, VgT_UifU0,
+     VgT_DifD4, VgT_DifD2, VgT_DifD1,
+
+     VgT_ImproveAND4_TQ, VgT_ImproveAND2_TQ, VgT_ImproveAND1_TQ, 
+     VgT_ImproveOR4_TQ, VgT_ImproveOR2_TQ, VgT_ImproveOR1_TQ,
+     VgT_DebugFn
+   }
+   VgTagOp;
+
+extern Char* VG_(nameOfTagOp) ( VgTagOp );
+extern UInt VG_(DebugFn) ( UInt a1, UInt a2 );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_syscall.S
+   ------------------------------------------------------------------ */
+
+extern void VG_(do_syscall) ( void );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_startup.S
+   ------------------------------------------------------------------ */
+
+extern void VG_(shutdown);
+extern void VG_(switch_to_real_CPU) ( void );
+
+extern void VG_(swizzle_esp_then_start_GDB) ( void );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_dispatch.S
+   ------------------------------------------------------------------ */
+
+extern void VG_(dispatch);
+extern void VG_(run_innerloop) ( void );
+
+/* Returns the next orig_addr to run. */
+extern Addr VG_(run_singleton_translation) ( Addr trans_addr );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_helpers.S
+   ------------------------------------------------------------------ */
+
+/* For doing exits ... */
+extern void VG_(helper_request_normal_exit);
+
+/* SMC fast checks. */
+extern void VG_(helper_smc_check4);
+
+/* Mul, div, etc, -- we don't codegen these directly. */
+extern void VG_(helper_idiv_64_32);
+extern void VG_(helper_div_64_32);
+extern void VG_(helper_idiv_32_16);
+extern void VG_(helper_div_32_16);
+extern void VG_(helper_idiv_16_8);
+extern void VG_(helper_div_16_8);
+
+extern void VG_(helper_imul_32_64);
+extern void VG_(helper_mul_32_64);
+extern void VG_(helper_imul_16_32);
+extern void VG_(helper_mul_16_32);
+extern void VG_(helper_imul_8_16);
+extern void VG_(helper_mul_8_16);
+
+extern void VG_(helper_CLD);
+extern void VG_(helper_STD);
+extern void VG_(helper_get_dirflag);
+
+extern void VG_(helper_shldl);
+extern void VG_(helper_shldw);
+extern void VG_(helper_shrdl);
+extern void VG_(helper_shrdw);
+
+extern void VG_(helper_RDTSC);
+extern void VG_(helper_CPUID);
+
+extern void VG_(helper_bt);
+extern void VG_(helper_bts);
+extern void VG_(helper_btr);
+extern void VG_(helper_btc);
+
+extern void VG_(helper_bsf);
+extern void VG_(helper_bsr);
+
+extern void VG_(helper_fstsw_AX);
+extern void VG_(helper_SAHF);
+
+extern void VG_(helper_value_check4_fail);
+extern void VG_(helper_value_check2_fail);
+extern void VG_(helper_value_check1_fail);
+extern void VG_(helper_value_check0_fail);
+
+extern void VG_(helper_do_syscall);
+extern void VG_(helper_do_client_request);
+
+
+/* ---------------------------------------------------------------------
+   The state of the simulated CPU.
+   ------------------------------------------------------------------ */
+
+/* This is the Intel register encoding. */
+#define R_EAX 0
+#define R_ECX 1
+#define R_EDX 2
+#define R_EBX 3
+#define R_ESP 4
+#define R_EBP 5
+#define R_ESI 6
+#define R_EDI 7
+
+#define R_AL (0+R_EAX)
+#define R_CL (0+R_ECX)
+#define R_DL (0+R_EDX)
+#define R_BL (0+R_EBX)
+#define R_AH (4+R_EAX)
+#define R_CH (4+R_ECX)
+#define R_DH (4+R_EDX)
+#define R_BH (4+R_EBX)
+
+
+/* ---------------------------------------------------------------------
+   Offsets into baseBlock for everything which needs to referred to
+   from generated code.  The order of these decls does not imply 
+   what the order of the actual offsets is.  The latter is important
+   and is set up in vg_main.c.
+   ------------------------------------------------------------------ */
+
+/* An array of words.  In generated code, %ebp always points to the
+   start of this array.  Useful stuff, like the simulated CPU state,
+   and the addresses of helper functions, can then be found by
+   indexing off %ebp.  The following declares variables which, at
+   startup time, are given values denoting offsets into baseBlock.
+   These offsets are in *words* from the start of baseBlock. */
+
+#define VG_BASEBLOCK_WORDS 200
+
+extern UInt VG_(baseBlock)[VG_BASEBLOCK_WORDS];
+
+
+/* -----------------------------------------------------
+   Read-write parts of baseBlock.
+   -------------------------------------------------- */
+
+/* State of the simulated CPU. */
+extern Int VGOFF_(m_eax);
+extern Int VGOFF_(m_ecx);
+extern Int VGOFF_(m_edx);
+extern Int VGOFF_(m_ebx);
+extern Int VGOFF_(m_esp);
+extern Int VGOFF_(m_ebp);
+extern Int VGOFF_(m_esi);
+extern Int VGOFF_(m_edi);
+extern Int VGOFF_(m_eflags);
+extern Int VGOFF_(m_fpustate);
+extern Int VGOFF_(m_eip);
+
+/* Reg-alloc spill area (VG_MAX_SPILLSLOTS words long). */
+extern Int VGOFF_(spillslots);
+
+/* Records the valid bits for the 8 integer regs & flags reg. */
+extern Int VGOFF_(sh_eax);
+extern Int VGOFF_(sh_ecx);
+extern Int VGOFF_(sh_edx);
+extern Int VGOFF_(sh_ebx);
+extern Int VGOFF_(sh_esp);
+extern Int VGOFF_(sh_ebp);
+extern Int VGOFF_(sh_esi);
+extern Int VGOFF_(sh_edi);
+extern Int VGOFF_(sh_eflags);
+
+
+/* -----------------------------------------------------
+   Read-only parts of baseBlock.
+   -------------------------------------------------- */
+
+/* Offsets of addresses of helper functions.  A "helper" function is
+   one which is called from generated code. */
+
+extern Int VGOFF_(helper_idiv_64_32);
+extern Int VGOFF_(helper_div_64_32);
+extern Int VGOFF_(helper_idiv_32_16);
+extern Int VGOFF_(helper_div_32_16);
+extern Int VGOFF_(helper_idiv_16_8);
+extern Int VGOFF_(helper_div_16_8);
+
+extern Int VGOFF_(helper_imul_32_64);
+extern Int VGOFF_(helper_mul_32_64);
+extern Int VGOFF_(helper_imul_16_32);
+extern Int VGOFF_(helper_mul_16_32);
+extern Int VGOFF_(helper_imul_8_16);
+extern Int VGOFF_(helper_mul_8_16);
+
+extern Int VGOFF_(helper_CLD);
+extern Int VGOFF_(helper_STD);
+extern Int VGOFF_(helper_get_dirflag);
+
+extern Int VGOFF_(helper_shldl);
+extern Int VGOFF_(helper_shldw);
+extern Int VGOFF_(helper_shrdl);
+extern Int VGOFF_(helper_shrdw);
+
+extern Int VGOFF_(helper_RDTSC);
+extern Int VGOFF_(helper_CPUID);
+
+extern Int VGOFF_(helper_bt);
+extern Int VGOFF_(helper_bts);
+extern Int VGOFF_(helper_btr);
+extern Int VGOFF_(helper_btc);
+
+extern Int VGOFF_(helper_bsf);
+extern Int VGOFF_(helper_bsr);
+
+extern Int VGOFF_(helper_fstsw_AX);
+extern Int VGOFF_(helper_SAHF);
+
+extern Int VGOFF_(helper_value_check4_fail);
+extern Int VGOFF_(helper_value_check2_fail);
+extern Int VGOFF_(helper_value_check1_fail);
+extern Int VGOFF_(helper_value_check0_fail);
+
+extern Int VGOFF_(helper_do_syscall);
+extern Int VGOFF_(helper_do_client_request);
+
+extern Int VGOFF_(helperc_STOREV4); /* :: UInt -> Addr -> void */
+extern Int VGOFF_(helperc_STOREV2); /* :: UInt -> Addr -> void */
+extern Int VGOFF_(helperc_STOREV1); /* :: UInt -> Addr -> void */
+
+extern Int VGOFF_(helperc_LOADV4); /* :: Addr -> UInt -> void */
+extern Int VGOFF_(helperc_LOADV2); /* :: Addr -> UInt -> void */
+extern Int VGOFF_(helperc_LOADV1); /* :: Addr -> UInt -> void */
+
+extern Int VGOFF_(handle_esp_assignment); /* :: Addr -> void */
+extern Int VGOFF_(fpu_write_check);       /* :: Addr -> Int -> void */
+extern Int VGOFF_(fpu_read_check);        /* :: Addr -> Int -> void */
+
+extern Int VGOFF_(helper_request_normal_exit);
+
+
+
+#endif /* ndef __VG_INCLUDE_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                             vg_include.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_kerneliface.h b/coregrind/vg_kerneliface.h
new file mode 100644
index 000000000..856a1c4ab
--- /dev/null
+++ b/coregrind/vg_kerneliface.h
@@ -0,0 +1,165 @@
+
+/*--------------------------------------------------------------------*/
+/*--- A header file defining structures and constants which are    ---*/
+/*--- important at the kernel boundary for this platform.          ---*/
+/*---                                             vg_kerneliface.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#ifndef __VG_KERNELIFACE_H
+#define __VG_KERNELIFACE_H
+
+/* This file is ONLY to be included into vg_include.h.  Do not include
+   it directly into valgrind source .c files.  This file defines types
+   and constants for the kernel interface, and to make that clear
+   everything is prefixed VKI. */
+
+/*--- All the following stuff is correct for Linux kernels 2.2.X and
+      2.4.X. 
+---*/
+
+/* Should really get this from an include file somewhere. */
+#define VKI_BYTES_PER_PAGE_BITS 12
+#define VKI_BYTES_PER_PAGE (1 << VKI_BYTES_PER_PAGE_BITS)
+
+#define VKI_BYTES_PER_WORD 4
+#define VKI_WORDS_PER_PAGE (VKI_BYTES_PER_PAGE / VKI_BYTES_PER_WORD)
+
+
+/* For system call numbers __NR_... */
+#include <asm/unistd.h>
+
+/* An implementation of signal sets.  These are the same as the sigset
+   implementations in the relevant Linux kernels.  Note carefully that
+   this has nothing to do with glibc's signal sets.  We work entirely
+   at the kernel boundary, so the libc stuff is invisible and
+   irrelevant.  */
+
+/* The following is copied from
+   /usr/src/linux-2.4.9-13/include/asm-i386/signal.h */
+#define VKI_KNSIG       64  /* true for linux 2.2.X and 2.4.X */
+#define VKI_KNSIG_BPW   32  /* since we're using UInts */
+#define VKI_KNSIG_WORDS (VKI_KNSIG / VKI_KNSIG_BPW)
+
+typedef 
+   struct { 
+      UInt ws[VKI_KNSIG_WORDS]; 
+   }
+   vki_ksigset_t;
+
+typedef
+   struct {
+      void*         ksa_handler;
+      unsigned long ksa_flags;
+      void (*ksa_restorer)(void);
+      vki_ksigset_t ksa_mask;
+   }
+   vki_ksigaction;
+
+typedef 
+   struct {
+      void* ss_sp;
+      Int   ss_flags;
+      UInt  ss_size;
+   } 
+   vki_kstack_t;
+
+
+#define VKI_SIG_BLOCK          0    /* for blocking signals */
+#define VKI_SIG_UNBLOCK        1    /* for unblocking signals */
+#define VKI_SIG_SETMASK        2    /* for setting the signal mask */
+
+#define VKI_SIG_DFL ((void*)0)     /* default signal handling */
+#define VKI_SIG_IGN ((void*)1)     /* ignore signal */
+#define VKI_SIG_ERR ((void*)-1)    /* error return from signal */
+
+#define VKI_SA_ONSTACK      0x08000000
+#define VKI_SA_RESTART      0x10000000
+#if 0
+#define VKI_SA_NOCLDSTOP    0x00000001
+#define VKI_SA_NOCLDWAIT    0x00000002 /* not supported yet */
+#define VKI_SA_SIGINFO      0x00000004
+#define VKI_SA_NODEFER      0x40000000
+#define VKI_SA_RESETHAND    0x80000000
+#define VKI_SA_NOMASK       SA_NODEFER
+#define VKI_SA_ONESHOT      SA_RESETHAND
+#define VKI_SA_INTERRUPT    0x20000000 /* dummy -- ignored */
+#define VKI_SA_RESTORER     0x04000000
+#endif
+
+#define VKI_SIGABRT          6
+#define VKI_SIGSEGV         11
+#define VKI_SIGBUS           7
+#define VKI_SIGILL           4
+#define VKI_SIGFPE           8
+#define VKI_SIGKILL          9
+#define VKI_SIGABRT          6
+#define VKI_SIGSTOP         19
+#define VKI_SIGTERM         15
+
+/* The following are copied from /usr/include/bits/mman.h, which in
+   turn claims to have got them from the kernel headers. */
+
+#define VKI_PROT_READ      0x1             /* Page can be read.  */
+#define VKI_PROT_WRITE     0x2             /* Page can be written.  */
+#define VKI_PROT_EXEC      0x4             /* Page can be executed.  */
+#define VKI_MAP_ANONYMOUS  0x20            /* Don't use a file.  */
+#define VKI_MAP_PRIVATE    0x02            /* Changes are private.  */
+
+
+/* Gawd ... hack ... */
+
+typedef struct vki__user_cap_header_struct {
+        UInt version;
+        int pid;
+} vki_cap_user_header_t;
+ 
+typedef struct vki__user_cap_data_struct {
+        UInt effective;
+        UInt permitted;
+        UInt inheritable;
+} vki_cap_user_data_t;
+  
+
+/* "Byrial Jensen" <byrial@image.dk> says:
+               [various] ioctls take a pointer to a "struct
+               termios" but this is another and shorter "struct
+               termios" than the one defined in <termios.h> and used
+               by tcgetattr(3) and tcsetattr(3) and other library
+               functions. GNU libc translate between its library
+               termios and the kernel termios. 
+*/
+
+#define VKI_SIZEOF_STRUCT_TERMIOS 36
+
+
+#endif /* ndef __VG_KERNELIFACE_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                         vg_kerneliface.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_main.c b/coregrind/vg_main.c
new file mode 100644
index 000000000..798d43b0c
--- /dev/null
+++ b/coregrind/vg_main.c
@@ -0,0 +1,1440 @@
+
+/*--------------------------------------------------------------------*/
+/*--- C startup stuff, reached from vg_startup.S.                  ---*/
+/*---                                                    vg_main.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+#include "vg_constants.h"
+#include "vg_version.h"
+
+
+/* ---------------------------------------------------------------------
+   Compute offsets into baseBlock.  See comments in vg_include.h.
+   ------------------------------------------------------------------ */
+
+/* The variables storing offsets. */
+
+#define INVALID_OFFSET (-1)
+
+Int VGOFF_(m_eax) = INVALID_OFFSET;
+Int VGOFF_(m_ecx) = INVALID_OFFSET;
+Int VGOFF_(m_edx) = INVALID_OFFSET;
+Int VGOFF_(m_ebx) = INVALID_OFFSET;
+Int VGOFF_(m_esp) = INVALID_OFFSET;
+Int VGOFF_(m_ebp) = INVALID_OFFSET;
+Int VGOFF_(m_esi) = INVALID_OFFSET;
+Int VGOFF_(m_edi) = INVALID_OFFSET;
+Int VGOFF_(m_eflags) = INVALID_OFFSET;
+Int VGOFF_(m_fpustate) = INVALID_OFFSET;
+Int VGOFF_(m_eip) = INVALID_OFFSET;
+Int VGOFF_(spillslots) = INVALID_OFFSET;
+Int VGOFF_(sh_eax) = INVALID_OFFSET;
+Int VGOFF_(sh_ecx) = INVALID_OFFSET;
+Int VGOFF_(sh_edx) = INVALID_OFFSET;
+Int VGOFF_(sh_ebx) = INVALID_OFFSET;
+Int VGOFF_(sh_esp) = INVALID_OFFSET;
+Int VGOFF_(sh_ebp) = INVALID_OFFSET;
+Int VGOFF_(sh_esi) = INVALID_OFFSET;
+Int VGOFF_(sh_edi) = INVALID_OFFSET;
+Int VGOFF_(sh_eflags) = INVALID_OFFSET;
+Int VGOFF_(helper_idiv_64_32) = INVALID_OFFSET;
+Int VGOFF_(helper_div_64_32) = INVALID_OFFSET;
+Int VGOFF_(helper_idiv_32_16) = INVALID_OFFSET;
+Int VGOFF_(helper_div_32_16) = INVALID_OFFSET;
+Int VGOFF_(helper_idiv_16_8) = INVALID_OFFSET;
+Int VGOFF_(helper_div_16_8) = INVALID_OFFSET;
+Int VGOFF_(helper_imul_32_64) = INVALID_OFFSET;
+Int VGOFF_(helper_mul_32_64) = INVALID_OFFSET;
+Int VGOFF_(helper_imul_16_32) = INVALID_OFFSET;
+Int VGOFF_(helper_mul_16_32) = INVALID_OFFSET;
+Int VGOFF_(helper_imul_8_16) = INVALID_OFFSET;
+Int VGOFF_(helper_mul_8_16) = INVALID_OFFSET;
+Int VGOFF_(helper_CLD) = INVALID_OFFSET;
+Int VGOFF_(helper_STD) = INVALID_OFFSET;
+Int VGOFF_(helper_get_dirflag) = INVALID_OFFSET;
+Int VGOFF_(helper_shldl) = INVALID_OFFSET;
+Int VGOFF_(helper_shldw) = INVALID_OFFSET;
+Int VGOFF_(helper_shrdl) = INVALID_OFFSET;
+Int VGOFF_(helper_shrdw) = INVALID_OFFSET;
+Int VGOFF_(helper_RDTSC) = INVALID_OFFSET;
+Int VGOFF_(helper_CPUID) = INVALID_OFFSET;
+Int VGOFF_(helper_BSWAP) = INVALID_OFFSET;
+Int VGOFF_(helper_bt) = INVALID_OFFSET;
+Int VGOFF_(helper_bts) = INVALID_OFFSET;
+Int VGOFF_(helper_btr) = INVALID_OFFSET;
+Int VGOFF_(helper_btc) = INVALID_OFFSET;
+Int VGOFF_(helper_bsf) = INVALID_OFFSET;
+Int VGOFF_(helper_bsr) = INVALID_OFFSET;
+Int VGOFF_(helper_fstsw_AX) = INVALID_OFFSET;
+Int VGOFF_(helper_SAHF) = INVALID_OFFSET;
+Int VGOFF_(helper_value_check4_fail) = INVALID_OFFSET;
+Int VGOFF_(helper_value_check2_fail) = INVALID_OFFSET;
+Int VGOFF_(helper_value_check1_fail) = INVALID_OFFSET;
+Int VGOFF_(helper_value_check0_fail) = INVALID_OFFSET;
+Int VGOFF_(helper_do_syscall) = INVALID_OFFSET;
+Int VGOFF_(helper_do_client_request) = INVALID_OFFSET;
+Int VGOFF_(helperc_LOADV4) = INVALID_OFFSET;
+Int VGOFF_(helperc_LOADV2) = INVALID_OFFSET;
+Int VGOFF_(helperc_LOADV1) = INVALID_OFFSET;
+Int VGOFF_(helperc_STOREV4) = INVALID_OFFSET;
+Int VGOFF_(helperc_STOREV2) = INVALID_OFFSET;
+Int VGOFF_(helperc_STOREV1) = INVALID_OFFSET;
+Int VGOFF_(handle_esp_assignment) = INVALID_OFFSET;
+Int VGOFF_(fpu_write_check) = INVALID_OFFSET;
+Int VGOFF_(fpu_read_check) = INVALID_OFFSET;
+Int VGOFF_(helper_request_normal_exit) = INVALID_OFFSET;
+
+
+/* This is the actual defn of baseblock. */
+UInt VG_(baseBlock)[VG_BASEBLOCK_WORDS];
+
+/* Words. */
+static Int baB_off = 0;
+
+/* Returns the offset, in words. */
+static Int alloc_BaB ( Int words )
+{
+   Int off = baB_off;
+   baB_off += words;
+   if (baB_off >= VG_BASEBLOCK_WORDS)
+      VG_(panic)( "alloc_BaB: baseBlock is too small");
+
+   return off;   
+}
+
+/* Allocate 1 word in baseBlock and set it to the given value. */
+static Int alloc_BaB_1_set ( Addr a )
+{
+   Int off = alloc_BaB(1);
+   VG_(baseBlock)[off] = (UInt)a;
+   return off;
+}
+
+
+/* Here we assign actual offsets.  It's important to get the most
+   popular referents within 128 bytes of the start, so we can take
+   advantage of short addressing modes relative to %ebp.  Popularity
+   of offsets was measured on 22 Feb 02 running a KDE application, and
+   the slots rearranged accordingly, with a 1.5% reduction in total
+   size of translations. */
+
+static void vg_init_baseBlock ( void )
+{
+   baB_off = 0;
+
+   /* Those with offsets under 128 are carefully chosen. */
+
+   /* WORD offsets in this column */
+   /* 0   */ VGOFF_(m_eax)     = alloc_BaB(1);
+   /* 1   */ VGOFF_(m_ecx)     = alloc_BaB(1);
+   /* 2   */ VGOFF_(m_edx)     = alloc_BaB(1);
+   /* 3   */ VGOFF_(m_ebx)     = alloc_BaB(1);
+   /* 4   */ VGOFF_(m_esp)     = alloc_BaB(1);
+   /* 5   */ VGOFF_(m_ebp)     = alloc_BaB(1);
+   /* 6   */ VGOFF_(m_esi)     = alloc_BaB(1);
+   /* 7   */ VGOFF_(m_edi)     = alloc_BaB(1);
+   /* 8   */ VGOFF_(m_eflags)  = alloc_BaB(1);
+
+   /* 9   */ VGOFF_(sh_eax)    = alloc_BaB(1);
+   /* 10  */ VGOFF_(sh_ecx)    = alloc_BaB(1);
+   /* 11  */ VGOFF_(sh_edx)    = alloc_BaB(1);
+   /* 12  */ VGOFF_(sh_ebx)    = alloc_BaB(1);
+   /* 13  */ VGOFF_(sh_esp)    = alloc_BaB(1);
+   /* 14  */ VGOFF_(sh_ebp)    = alloc_BaB(1);
+   /* 15  */ VGOFF_(sh_esi)    = alloc_BaB(1);
+   /* 16  */ VGOFF_(sh_edi)    = alloc_BaB(1);
+   /* 17  */ VGOFF_(sh_eflags) = alloc_BaB(1);
+
+   /* 18  */ 
+   VGOFF_(helper_value_check4_fail) 
+      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check4_fail) );
+   /* 19 */
+   VGOFF_(helper_value_check0_fail)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check0_fail) );
+
+   /* 20  */
+   VGOFF_(helperc_STOREV4)
+      = alloc_BaB_1_set( (Addr) & VG_(helperc_STOREV4) );
+   /* 21  */
+   VGOFF_(helperc_STOREV1)
+      = alloc_BaB_1_set( (Addr) & VG_(helperc_STOREV1) );
+
+   /* 22  */
+   VGOFF_(helperc_LOADV4)
+      = alloc_BaB_1_set( (Addr) & VG_(helperc_LOADV4) );
+   /* 23  */
+   VGOFF_(helperc_LOADV1)
+      = alloc_BaB_1_set( (Addr) & VG_(helperc_LOADV1) );
+
+   /* 24  */
+   VGOFF_(handle_esp_assignment)
+      = alloc_BaB_1_set( (Addr) & VGM_(handle_esp_assignment) );
+
+   /* 25 */
+   VGOFF_(m_eip) = alloc_BaB(1);
+
+   /* There are currently 24 spill slots */
+   /* 26 .. 49  This overlaps the magic boundary at >= 32 words, but
+      most spills are to low numbered spill slots, so the ones above
+      the boundary don't see much action. */
+   VGOFF_(spillslots) = alloc_BaB(VG_MAX_SPILLSLOTS);
+
+   /* These two pushed beyond the boundary because 2-byte transactions
+      are rare. */
+   /* 50  */
+   VGOFF_(helperc_STOREV2)
+      = alloc_BaB_1_set( (Addr) & VG_(helperc_STOREV2) );
+   /* 51  */
+   VGOFF_(helperc_LOADV2)
+      = alloc_BaB_1_set( (Addr) & VG_(helperc_LOADV2) );
+
+   /* 52  */
+   VGOFF_(fpu_write_check)
+      = alloc_BaB_1_set( (Addr) & VGM_(fpu_write_check) );
+   /* 53  */
+   VGOFF_(fpu_read_check)
+      = alloc_BaB_1_set( (Addr) & VGM_(fpu_read_check) );
+
+   /* Actually I don't think these two are ever used. */
+   /* 54  */ 
+   VGOFF_(helper_value_check2_fail)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check2_fail) );
+   /* 55  */ 
+   VGOFF_(helper_value_check1_fail)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check1_fail) );
+
+   /* I gave up counting at this point.  Since they're way above the
+      short-amode-boundary, there's no point. */
+
+   VGOFF_(m_fpustate) = alloc_BaB(VG_SIZE_OF_FPUSTATE_W);
+
+   VGOFF_(helper_idiv_64_32)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_idiv_64_32) );
+   VGOFF_(helper_div_64_32)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_div_64_32) );
+   VGOFF_(helper_idiv_32_16)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_idiv_32_16) );
+   VGOFF_(helper_div_32_16)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_div_32_16) );
+   VGOFF_(helper_idiv_16_8)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_idiv_16_8) );
+   VGOFF_(helper_div_16_8)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_div_16_8) );
+
+   VGOFF_(helper_imul_32_64)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_imul_32_64) );
+   VGOFF_(helper_mul_32_64)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_mul_32_64) );
+   VGOFF_(helper_imul_16_32)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_imul_16_32) );
+   VGOFF_(helper_mul_16_32)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_mul_16_32) );
+   VGOFF_(helper_imul_8_16)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_imul_8_16) );
+   VGOFF_(helper_mul_8_16)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_mul_8_16) );
+
+   VGOFF_(helper_CLD)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_CLD) );
+   VGOFF_(helper_STD)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_STD) );
+   VGOFF_(helper_get_dirflag)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_get_dirflag) );
+
+   VGOFF_(helper_shldl)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_shldl) );
+   VGOFF_(helper_shldw)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_shldw) );
+   VGOFF_(helper_shrdl)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_shrdl) );
+   VGOFF_(helper_shrdw)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_shrdw) );
+
+   VGOFF_(helper_RDTSC)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_RDTSC) );
+   VGOFF_(helper_CPUID)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_CPUID) );
+
+   VGOFF_(helper_bt)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_bt) );
+   VGOFF_(helper_bts)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_bts) );
+   VGOFF_(helper_btr)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_btr) );
+   VGOFF_(helper_btc)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_btc) );
+
+   VGOFF_(helper_bsf)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_bsf) );
+   VGOFF_(helper_bsr)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_bsr) );
+
+   VGOFF_(helper_fstsw_AX)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_fstsw_AX) );
+   VGOFF_(helper_SAHF)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_SAHF) );
+
+   VGOFF_(helper_request_normal_exit)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_request_normal_exit) );
+
+   VGOFF_(helper_do_syscall)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_do_syscall) );
+   VGOFF_(helper_do_client_request)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_do_client_request) );
+}
+
+
+/* ---------------------------------------------------------------------
+   Global entities which are not referenced from generated code.
+   ------------------------------------------------------------------ */
+
+/* The stack on which Valgrind runs.  We can't use the same stack as
+   the simulatee -- that's an important design decision.  */
+UInt VG_(stack)[10000];
+
+/* Ditto our signal delivery stack. */
+UInt VG_(sigstack)[10000];
+
+/* Saving stuff across system calls. */
+UInt VG_(real_fpu_state_saved_over_syscall_d1)[VG_SIZE_OF_FPUSTATE_W];
+UInt VG_(real_fpu_state_saved_over_syscall_d2)[VG_SIZE_OF_FPUSTATE_W];
+Addr VG_(esp_saved_over_syscall_d1);
+Addr VG_(esp_saved_over_syscall_d2);
+
+/* Counts downwards in vg_run_innerloop. */
+UInt VG_(dispatch_ctr);
+
+/* If vg_dispatch_ctr is set to 1 to force a stop, its
+   previous value is saved here. */
+UInt VG_(dispatch_ctr_SAVED);
+
+/* This is why vg_run_innerloop() exited. */
+UInt VG_(interrupt_reason);
+
+/* vg_oursignalhandler() might longjmp().  Here's the jmp_buf. */
+jmp_buf VG_(toploop_jmpbuf);
+/* ... and if so, here's the signal which caused it to do so. */
+Int     VG_(longjmpd_on_signal);
+
+/* 64-bit counter for the number of basic blocks done. */
+ULong VG_(bbs_done);
+/* 64-bit counter for the number of bbs to go before a debug exit. */
+ULong VG_(bbs_to_go);
+
+/* Produce debugging output? */
+Bool VG_(disassemble) = False;
+
+/* The current LRU epoch. */
+UInt VG_(current_epoch) = 0;
+
+
+/* ---------------------------------------------------------------------
+   Counters, for informational purposes only.
+   ------------------------------------------------------------------ */
+
+/* Number of lookups which miss the fast tt helper. */
+UInt VG_(tt_fast_misses) = 0;
+
+
+/* Counts for LRU informational messages. */
+
+/* Number and total o/t size of new translations this epoch. */
+UInt VG_(this_epoch_in_count) = 0;
+UInt VG_(this_epoch_in_osize) = 0;
+UInt VG_(this_epoch_in_tsize) = 0;
+/* Number and total o/t size of discarded translations this epoch. */
+UInt VG_(this_epoch_out_count) = 0;
+UInt VG_(this_epoch_out_osize) = 0;
+UInt VG_(this_epoch_out_tsize) = 0;
+/* Number and total o/t size of translations overall. */
+UInt VG_(overall_in_count) = 0;
+UInt VG_(overall_in_osize) = 0;
+UInt VG_(overall_in_tsize) = 0;
+/* Number and total o/t size of discards overall. */
+UInt VG_(overall_out_count) = 0;
+UInt VG_(overall_out_osize) = 0;
+UInt VG_(overall_out_tsize) = 0;
+
+/* The number of LRU-clearings of TT/TC. */
+UInt VG_(number_of_lrus) = 0;
+
+
+/* Counts pertaining to the register allocator. */
+
+/* total number of uinstrs input to reg-alloc */
+UInt VG_(uinstrs_prealloc) = 0;
+
+/* total number of uinstrs added due to spill code */
+UInt VG_(uinstrs_spill) = 0;
+
+/* number of bbs requiring spill code */
+UInt VG_(translations_needing_spill) = 0;
+
+/* total of register ranks over all translations */
+UInt VG_(total_reg_rank) = 0;
+
+
+/* Counts pertaining to the self-modifying-code detection machinery. */
+
+/* Total number of writes checked. */
+UInt VG_(smc_total_check4s) = 0;
+
+/* Number of writes which the fast smc check couldn't show were
+   harmless. */
+UInt VG_(smc_cache_passed) = 0;
+
+/* Numnber of writes which really did write on original code. */
+UInt VG_(smc_fancy_passed) = 0;
+
+/* Number of translations discarded as a result. */
+UInt VG_(smc_discard_count) = 0;
+
+
+/* Counts pertaining to internal sanity checking. */
+
+UInt VG_(sanity_fast_count) = 0;
+UInt VG_(sanity_slow_count) = 0;
+
+
+
+/* ---------------------------------------------------------------------
+   Values derived from command-line options.
+   ------------------------------------------------------------------ */
+
+Bool   VG_(clo_GDB_attach);
+Int    VG_(sanity_level);
+Int    VG_(clo_verbosity);
+Bool   VG_(clo_demangle);
+Bool   VG_(clo_leak_check);
+Bool   VG_(clo_show_reachable);
+Int    VG_(clo_leak_resolution);
+Bool   VG_(clo_sloppy_malloc);
+Bool   VG_(clo_partial_loads_ok);
+Bool   VG_(clo_trace_children);
+Int    VG_(clo_logfile_fd);
+Int    VG_(clo_freelist_vol);
+Bool   VG_(clo_workaround_gcc296_bugs);
+Int    VG_(clo_n_suppressions);
+Char*  VG_(clo_suppressions)[VG_CLO_MAX_SFILES];
+Bool   VG_(clo_single_step);
+Bool   VG_(clo_optimise);
+Bool   VG_(clo_instrument);
+Bool   VG_(clo_cleanup);
+Bool   VG_(clo_client_perms);
+Int    VG_(clo_smc_check);
+Bool   VG_(clo_trace_syscalls);
+Bool   VG_(clo_trace_signals);
+Bool   VG_(clo_trace_symtab);
+Bool   VG_(clo_trace_malloc);
+ULong  VG_(clo_stop_after);
+Int    VG_(clo_dump_error);
+Int    VG_(clo_backtrace_size);
+
+/* This Bool is needed by wrappers in vg_clientmalloc.c to decide how
+   to behave.  Initially we say False. */
+Bool VG_(running_on_simd_CPU) = False;
+
+/* Holds client's %esp at the point we gained control. */
+Addr VG_(esp_at_startup);
+
+/* As deduced from VG_(esp_at_startup), the client's argc, argv[] and
+   envp[] as extracted from the client's stack at startup-time. */
+Int    VG_(client_argc);
+Char** VG_(client_argv);
+Char** VG_(client_envp);
+
+/* A place into which to copy the value of env var VG_ARGS, so we
+   don't have to modify the original. */
+static Char vg_cmdline_copy[M_VG_CMDLINE_STRLEN];
+
+
+/* ---------------------------------------------------------------------
+   Top level simulation loop.
+   ------------------------------------------------------------------ */
+
+/* Create a translation of the client basic block beginning at
+   orig_addr, and add it to the translation cache & translation table.
+   This probably doesn't really belong here, but, hey ... */
+void VG_(create_translation_for) ( Addr orig_addr )
+{
+   Addr    trans_addr;
+   TTEntry tte;
+   Int orig_size, trans_size;
+   /* Ensure there is space to hold a translation. */
+   VG_(maybe_do_lru_pass)();
+   VG_(translate)( orig_addr, &orig_size, &trans_addr, &trans_size );
+   /* Copy data at trans_addr into the translation cache.
+      Returned pointer is to the code, not to the 4-byte
+      header. */
+   /* Since the .orig_size and .trans_size fields are
+      UShort, be paranoid. */
+   vg_assert(orig_size > 0 && orig_size < 65536);
+   vg_assert(trans_size > 0 && trans_size < 65536);
+   tte.orig_size  = orig_size;
+   tte.orig_addr  = orig_addr;
+   tte.trans_size = trans_size;
+   tte.trans_addr = VG_(copy_to_transcache)
+                       ( trans_addr, trans_size );
+   tte.mru_epoch  = VG_(current_epoch);
+   /* Free the intermediary -- was allocated by VG_(emit_code). */
+   VG_(jitfree)( (void*)trans_addr );
+   /* Add to trans tab and set back pointer. */
+   VG_(add_to_trans_tab) ( &tte );
+   /* Update stats. */
+   VG_(this_epoch_in_count) ++;
+   VG_(this_epoch_in_osize) += orig_size;
+   VG_(this_epoch_in_tsize) += trans_size;
+   VG_(overall_in_count) ++;
+   VG_(overall_in_osize) += orig_size;
+   VG_(overall_in_tsize) += trans_size;
+   /* Record translated area for SMC detection. */
+   VG_(smc_mark_original) ( 
+      VG_(baseBlock)[VGOFF_(m_eip)], orig_size );
+}
+
+
+/* Runs the client program from %EIP (baseBlock[off_eip]) until it
+   asks to exit, or until vg_bbs_to_go jumps have happened (the latter
+   case is for debugging).  */
+
+void VG_(toploop) ( void )
+{
+   volatile UInt dispatch_ctr_SAVED;
+   volatile Int  done_this_time;
+
+   /* For the LRU structures, records when the epoch began. */
+   volatile ULong epoch_started_at = 0;
+
+   while (True) {
+     next_outer_loop:
+
+      /* Age the LRU structures if an epoch has been completed. */
+      if (VG_(bbs_done) - epoch_started_at >= VG_BBS_PER_EPOCH) {
+         VG_(current_epoch)++;
+         epoch_started_at = VG_(bbs_done);
+         if (VG_(clo_verbosity) > 2) {
+            UInt tt_used, tc_used;
+            VG_(get_tt_tc_used) ( &tt_used, &tc_used );
+            VG_(message)(Vg_UserMsg,
+               "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
+               VG_(bbs_done), 
+               VG_(this_epoch_in_count),
+               VG_(this_epoch_in_osize),
+               VG_(this_epoch_in_tsize),
+               VG_(this_epoch_out_count),
+               VG_(this_epoch_out_osize),
+               VG_(this_epoch_out_tsize),
+               tt_used, tc_used
+            );
+	 }
+         VG_(this_epoch_in_count) = 0;
+         VG_(this_epoch_in_osize) = 0;
+         VG_(this_epoch_in_tsize) = 0;
+         VG_(this_epoch_out_count) = 0;
+         VG_(this_epoch_out_osize) = 0;
+         VG_(this_epoch_out_tsize) = 0;
+      }
+
+      /* Figure out how many bbs to ask vg_run_innerloop to do. */
+      if (VG_(bbs_to_go) >= VG_SIGCHECK_INTERVAL)
+         VG_(dispatch_ctr) = 1 + VG_SIGCHECK_INTERVAL;
+      else
+         VG_(dispatch_ctr) = 1 + (UInt)VG_(bbs_to_go);
+
+      /* ... and remember what we asked for. */
+      dispatch_ctr_SAVED = VG_(dispatch_ctr);
+
+      /* Now have a go at doing them. */
+      VG_(interrupt_reason) = VG_Y_SIGCHECK;
+      if (__builtin_setjmp(VG_(toploop_jmpbuf)) == 0) {
+         /* try this ... */
+         VG_(run_innerloop)();
+         /* We get here if the client didn't take a fault. */
+         switch (VG_(interrupt_reason)) {
+            case VG_Y_SIGCHECK:
+               /* The counter fell to zero and no other situation has
+                  been detected. */
+               vg_assert(VG_(dispatch_ctr) == 0);
+               done_this_time  = dispatch_ctr_SAVED - 1;
+               VG_(bbs_to_go)  -= (ULong)done_this_time;
+               VG_(bbs_done)   += (ULong)done_this_time;
+               /* Exit if the debug run has ended. */
+               if (VG_(bbs_to_go) == 0) goto debug_stop;
+               VG_(deliver_signals)();
+               VG_(do_sanity_checks)(False);
+               goto next_outer_loop;
+            case VG_Y_EXIT:
+               /* The target program tried to exit. */
+               done_this_time = dispatch_ctr_SAVED - VG_(dispatch_ctr_SAVED);
+               done_this_time --;
+               VG_(bbs_to_go)   -= (ULong)done_this_time;
+               VG_(bbs_done)    += (ULong)done_this_time;
+               return;
+            case VG_Y_SMC:
+               /* A write to original code was detected. */
+               done_this_time = dispatch_ctr_SAVED - VG_(dispatch_ctr_SAVED);
+               VG_(bbs_to_go)   -= (ULong)done_this_time;
+               VG_(bbs_done)    += (ULong)done_this_time;
+               VG_(flush_transtab)();
+               goto next_outer_loop;
+            case VG_Y_TRANSLATE: {
+               /* Need to provide a translation of code at vg_m_eip. */
+               done_this_time = dispatch_ctr_SAVED - VG_(dispatch_ctr);
+               vg_assert(done_this_time > 0);
+               done_this_time --;
+               VG_(bbs_to_go) -= (ULong)done_this_time;
+               VG_(bbs_done)  += (ULong)done_this_time;
+               VG_(create_translation_for)(VG_(baseBlock)[VGOFF_(m_eip)]);
+               goto next_outer_loop;
+            }
+            default:
+               VG_(panic)("vg_toploop: invalid interrupt reason");
+         }
+      } else {
+        /* We get here if the client took a fault, which caused our
+           signal handler to longjmp. */
+         done_this_time = dispatch_ctr_SAVED - VG_(dispatch_ctr);
+         VG_(bbs_to_go)   -= (ULong)done_this_time;
+         VG_(bbs_done)    += (ULong)done_this_time;
+         if (VG_(interrupt_reason) == VG_Y_EXIT) return;
+         VG_(deliver_signals)();
+         VG_(do_sanity_checks)(False);
+         VG_(unblock_host_signal)(VG_(longjmpd_on_signal));
+      }
+   }
+
+   /* NOTREACHED */
+
+  debug_stop:
+   /* If we exited because of a debug stop, print the translation 
+      of the last block executed -- by translating it again, and 
+      throwing away the result. */
+   VG_(printf)(
+      "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
+   VG_(translate)( VG_(baseBlock)[VGOFF_(m_eip)], NULL, NULL, NULL );
+   VG_(printf)("\n");
+   VG_(printf)(
+      "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
+}
+
+
+/* ---------------------------------------------------------------------
+   Processing of command-line options.
+   ------------------------------------------------------------------ */
+
+static void bad_option ( Char* opt )
+{
+   VG_(shutdown_logging)();
+   VG_(clo_logfile_fd) = 2; /* stderr */
+   VG_(printf)("valgrind.so: Bad option `%s'; aborting.\n", opt);
+   VG_(exit)(1);
+}
+
+static void config_error ( Char* msg )
+{
+   VG_(shutdown_logging)();
+   VG_(clo_logfile_fd) = 2; /* stderr */
+   VG_(printf)("valgrind.so: Startup or configuration error:\n\t%s\n", msg);
+   VG_(printf)("valgrind.so: Unable to start up properly.  Giving up.\n");
+   VG_(exit)(1);
+}
+
+
+static void process_cmd_line_options ( void )
+{
+   UChar* argv[M_VG_CMDLINE_OPTS];
+   UInt   argc;
+   UChar* p;
+   UChar* str;
+   Int    i, eventually_logfile_fd;
+
+#  define ISSPACE(cc)      ((cc) == ' ' || (cc) == '\t' || (cc) == '\n')
+#  define STREQ(s1,s2)     (0==VG_(strcmp_ws)((s1),(s2)))
+#  define STREQN(nn,s1,s2) (0==VG_(strncmp_ws)((s1),(s2),(nn)))
+
+   /* Set defaults. */
+   VG_(clo_GDB_attach)       = False;
+   VG_(sanity_level)         = 1;
+   VG_(clo_verbosity)        = 1;
+   VG_(clo_demangle)         = True;
+   VG_(clo_leak_check)       = False;
+   VG_(clo_show_reachable)   = False;
+   VG_(clo_leak_resolution)  = 2;
+   VG_(clo_sloppy_malloc)    = False;
+   VG_(clo_partial_loads_ok) = True;
+   VG_(clo_trace_children)   = False;
+   VG_(clo_logfile_fd)       = 2; /* stderr */
+   VG_(clo_freelist_vol)     = 1000000;
+   VG_(clo_workaround_gcc296_bugs) = False;
+   VG_(clo_n_suppressions)   = 0;
+   VG_(clo_single_step)      = False;
+   VG_(clo_optimise)         = True;
+   VG_(clo_instrument)       = True;
+   VG_(clo_cleanup)          = True;
+   VG_(clo_client_perms)     = False;
+   VG_(clo_smc_check)        = /* VG_CLO_SMC_SOME */ VG_CLO_SMC_NONE;
+   VG_(clo_trace_syscalls)   = False;
+   VG_(clo_trace_signals)    = False;
+   VG_(clo_trace_symtab)     = False;
+   VG_(clo_trace_malloc)     = False;
+   VG_(clo_stop_after)       = 1000000000000LL;
+   VG_(clo_dump_error)       = 0;
+   VG_(clo_backtrace_size)   = 4;
+
+   eventually_logfile_fd = VG_(clo_logfile_fd);
+
+   /* Once logging is started, we can safely send messages pertaining
+      to failures in initialisation. */
+   VG_(startup_logging)();
+
+   /* Magically find the client's argc/argv/envp.  This kludge is
+      entirely dependent on the stack layout imposed by libc at
+      startup.  Hence the magic offsets.  Then check (heuristically)
+      that the results are plausible.  There must be a better way to
+      do this ... */
+
+#  if 0
+   /* Use this to search for the correct offsets if the tests below
+      barf. */
+   { Int i;
+     VG_(printf)("startup %%esp is %p\n", VG_(esp_at_startup) );
+     for (i = 0; i < 10; i++) {
+        Char* p = ((Char**)VG_(esp_at_startup))[i];
+        VG_(printf)("%d:  %p\n", i, p);
+     }
+   }
+#  endif
+
+   /* These offsets (5,6,7) are right for my RedHat 7.2 (glibc-2.2.4)
+      box. */
+
+   VG_(client_argc) = (Int)   ( ((void**)VG_(esp_at_startup)) [5] );
+   VG_(client_argv) = (Char**)( ((void**)VG_(esp_at_startup)) [6] );
+   VG_(client_envp) = (Char**)( ((void**)VG_(esp_at_startup)) [7] );
+
+   if ( ((UInt)VG_(client_argc)) > 0 &&
+        ((UInt)VG_(client_argc)) < 10000 &&
+        (Addr)VG_(client_argv) >= 0x8000000 &&
+        (Addr)VG_(client_envp) >= 0x8000000)
+      goto argc_argv_envp_OK;
+
+   /* If that's no good, try some other offsets discovered by KDE
+      folks on 8 Feb 02:
+      For glibc > 2.2.4 the offset 9/10/11 did the trick. Coolo found
+      out those, on I think a Caldera 3.1 with glibc 2.2.4 -- the same
+      offsets worked for on a debian sid with glibc 2.2.5.  */
+
+   VG_(client_argc) = (Int)   ( ((void**)VG_(esp_at_startup)) [9] );
+   VG_(client_argv) = (Char**)( ((void**)VG_(esp_at_startup)) [10] );
+   VG_(client_envp) = (Char**)( ((void**)VG_(esp_at_startup)) [11] );
+
+   if ( ((UInt)VG_(client_argc)) > 0 &&
+        ((UInt)VG_(client_argc)) < 10000 &&
+        (Addr)VG_(client_argv) >= 0x8000000 &&
+        (Addr)VG_(client_envp) >= 0x8000000)
+      goto argc_argv_envp_OK;
+
+   /* Doesn't look promising.  Try offsets for RedHat 6.2
+      (glibc-2.1.3) instead.  In this case, the argv and envp vectors
+      are actually on the stack (bizarrely). */
+
+   VG_(client_argc) = (Int)      ( ((void**)VG_(esp_at_startup)) [4] );
+   VG_(client_argv) = (Char**) & ( ((void**)VG_(esp_at_startup)) [5] );
+   VG_(client_envp) 
+      = (Char**) & ( ((void**)VG_(esp_at_startup)) [6 + VG_(client_argc)] );
+
+   if ( ((UInt)VG_(client_argc)) > 0 &&
+        ((UInt)VG_(client_argc)) < 10000 &&
+        (Addr)VG_(client_argv) >= 0x8000000 &&
+        (Addr)VG_(client_envp) >= 0x8000000)
+      goto argc_argv_envp_OK;
+
+   /* Here's yet another variant, from <hansen> (irc.kde.org). */
+
+   VG_(client_argc) = (Int)      ( ((void**)VG_(esp_at_startup)) [9] );
+   VG_(client_argv) = (Char**) & ( ((void**)VG_(esp_at_startup)) [10] );
+   VG_(client_envp) 
+      = (Char**) & ( ((void**)VG_(esp_at_startup)) [11 + VG_(client_argc)] );
+
+   if ( ((UInt)VG_(client_argc)) > 0 &&
+        ((UInt)VG_(client_argc)) < 10000 &&
+        (Addr)VG_(client_argv) >= 0x8000000 &&
+        (Addr)VG_(client_envp) >= 0x8000000)
+      goto argc_argv_envp_OK;
+
+   /* VG_(printf)("%d %p %p\n", VG_(client_argc), VG_(client_argv), 
+                                                  VG_(client_envp));
+   */
+   /* We're hosed.  Give up :-( */
+   config_error(
+      "Can't get plausible values for client's argc/argv/envp.\n\t"
+      "You may be able to fix this; see process_cmd_line_options()\n\t"
+      "in vg_main.c"
+   );
+   /* NOTREACHED */
+
+  argc_argv_envp_OK:
+
+   /* Now that VG_(client_envp) has been set, we can extract the args
+      for Valgrind itself.  Copy into global var so that we don't have to
+      write zeroes to the getenv'd value itself. */
+   str = VG_(getenv)("VG_ARGS");
+   argc = 0;
+
+   if (!str) {
+      config_error("Can't read options from env var VG_ARGS.");
+   }
+
+   if (VG_(strlen)(str) >= M_VG_CMDLINE_STRLEN-1) {
+      config_error("Command line length exceeds M_CMDLINE_STRLEN.");
+   }
+   VG_(strcpy)(vg_cmdline_copy, str);
+   str = NULL;
+
+   p = &vg_cmdline_copy[0];
+   while (True) {
+      while (ISSPACE(*p)) { *p = 0; p++; }
+      if (*p == 0) break;
+      if (argc < M_VG_CMDLINE_OPTS-1) { 
+         argv[argc] = p; argc++; 
+      } else {
+         config_error(
+            "Found more than M_CMDLINE_OPTS command-line opts.");
+      }
+      while (*p != 0 && !ISSPACE(*p)) p++;
+   }
+
+   for (i = 0; i < argc; i++) {
+
+      if (STREQ(argv[i], "-v") || STREQ(argv[i], "--verbose"))
+         VG_(clo_verbosity)++;
+      else if (STREQ(argv[i], "-q") || STREQ(argv[i], "--quiet"))
+         VG_(clo_verbosity)--;
+
+      else if (STREQ(argv[i], "--gdb-attach=yes"))
+         VG_(clo_GDB_attach) = True;
+      else if (STREQ(argv[i], "--gdb-attach=no"))
+         VG_(clo_GDB_attach) = False;
+
+      else if (STREQ(argv[i], "--demangle=yes"))
+         VG_(clo_demangle) = True;
+      else if (STREQ(argv[i], "--demangle=no"))
+         VG_(clo_demangle) = False;
+
+      else if (STREQ(argv[i], "--partial-loads-ok=yes"))
+         VG_(clo_partial_loads_ok) = True;
+      else if (STREQ(argv[i], "--partial-loads-ok=no"))
+         VG_(clo_partial_loads_ok) = False;
+
+      else if (STREQ(argv[i], "--leak-check=yes"))
+         VG_(clo_leak_check) = True;
+      else if (STREQ(argv[i], "--leak-check=no"))
+         VG_(clo_leak_check) = False;
+
+      else if (STREQ(argv[i], "--show-reachable=yes"))
+         VG_(clo_show_reachable) = True;
+      else if (STREQ(argv[i], "--show-reachable=no"))
+         VG_(clo_show_reachable) = False;
+
+      else if (STREQ(argv[i], "--leak-resolution=low"))
+         VG_(clo_leak_resolution) = 2;
+      else if (STREQ(argv[i], "--leak-resolution=med"))
+         VG_(clo_leak_resolution) = 4;
+      else if (STREQ(argv[i], "--leak-resolution=high"))
+         VG_(clo_leak_resolution) = VG_DEEPEST_BACKTRACE;
+
+      else if (STREQ(argv[i], "--sloppy-malloc=yes"))
+         VG_(clo_sloppy_malloc) = True;
+      else if (STREQ(argv[i], "--sloppy-malloc=no"))
+         VG_(clo_sloppy_malloc) = False;
+
+      else if (STREQ(argv[i], "--trace-children=yes"))
+         VG_(clo_trace_children) = True;
+      else if (STREQ(argv[i], "--trace-children=no"))
+         VG_(clo_trace_children) = False;
+
+      else if (STREQ(argv[i], "--workaround-gcc296-bugs=yes"))
+         VG_(clo_workaround_gcc296_bugs) = True;
+      else if (STREQ(argv[i], "--workaround-gcc296-bugs=no"))
+         VG_(clo_workaround_gcc296_bugs) = False;
+
+      else if (STREQN(15, argv[i], "--sanity-level="))
+         VG_(sanity_level) = (Int)VG_(atoll)(&argv[i][15]);
+
+      else if (STREQN(13, argv[i], "--logfile-fd="))
+         eventually_logfile_fd = (Int)VG_(atoll)(&argv[i][13]);
+
+      else if (STREQN(15, argv[i], "--freelist-vol=")) {
+         VG_(clo_freelist_vol) = (Int)VG_(atoll)(&argv[i][15]);
+         if (VG_(clo_freelist_vol) < 0) VG_(clo_freelist_vol) = 2;
+      }
+
+      else if (STREQN(15, argv[i], "--suppressions=")) {
+         if (VG_(clo_n_suppressions) >= VG_CLO_MAX_SFILES) {
+            VG_(message)(Vg_UserMsg, "Too many logfiles specified.");
+            VG_(message)(Vg_UserMsg, 
+                         "Increase VG_CLO_MAX_SFILES and recompile.");
+            bad_option(argv[i]);
+         }
+         VG_(clo_suppressions)[VG_(clo_n_suppressions)] = &argv[i][15];
+         VG_(clo_n_suppressions)++;
+      }
+      else if (STREQ(argv[i], "--single-step=yes"))
+         VG_(clo_single_step) = True;
+      else if (STREQ(argv[i], "--single-step=no"))
+         VG_(clo_single_step) = False;
+
+      else if (STREQ(argv[i], "--optimise=yes"))
+         VG_(clo_optimise) = True;
+      else if (STREQ(argv[i], "--optimise=no"))
+         VG_(clo_optimise) = False;
+
+      else if (STREQ(argv[i], "--instrument=yes"))
+         VG_(clo_instrument) = True;
+      else if (STREQ(argv[i], "--instrument=no"))
+         VG_(clo_instrument) = False;
+
+      else if (STREQ(argv[i], "--cleanup=yes"))
+         VG_(clo_cleanup) = True;
+      else if (STREQ(argv[i], "--cleanup=no"))
+         VG_(clo_cleanup) = False;
+
+      else if (STREQ(argv[i], "--client-perms=yes"))
+         VG_(clo_client_perms) = True;
+      else if (STREQ(argv[i], "--client-perms=no"))
+         VG_(clo_client_perms) = False;
+
+      else if (STREQ(argv[i], "--smc-check=none"))
+         VG_(clo_smc_check) = VG_CLO_SMC_NONE;
+      else if (STREQ(argv[i], "--smc-check=some"))
+         VG_(clo_smc_check) = VG_CLO_SMC_SOME;
+      else if (STREQ(argv[i], "--smc-check=all"))
+         VG_(clo_smc_check) = VG_CLO_SMC_ALL;
+
+      else if (STREQ(argv[i], "--trace-syscalls=yes"))
+         VG_(clo_trace_syscalls) = True;
+      else if (STREQ(argv[i], "--trace-syscalls=no"))
+         VG_(clo_trace_syscalls) = False;
+
+      else if (STREQ(argv[i], "--trace-signals=yes"))
+         VG_(clo_trace_signals) = True;
+      else if (STREQ(argv[i], "--trace-signals=no"))
+         VG_(clo_trace_signals) = False;
+
+      else if (STREQ(argv[i], "--trace-symtab=yes"))
+         VG_(clo_trace_symtab) = True;
+      else if (STREQ(argv[i], "--trace-symtab=no"))
+         VG_(clo_trace_symtab) = False;
+
+      else if (STREQ(argv[i], "--trace-malloc=yes"))
+         VG_(clo_trace_malloc) = True;
+      else if (STREQ(argv[i], "--trace-malloc=no"))
+         VG_(clo_trace_malloc) = False;
+
+      else if (STREQN(13, argv[i], "--stop-after="))
+         VG_(clo_stop_after) = VG_(atoll)(&argv[i][13]);
+
+      else if (STREQN(13, argv[i], "--dump-error="))
+         VG_(clo_dump_error) = (Int)VG_(atoll)(&argv[i][13]);
+
+      else if (STREQN(14, argv[i], "--num-callers=")) {
+         /* Make sure it's sane. */
+	 VG_(clo_backtrace_size) = (Int)VG_(atoll)(&argv[i][14]);
+         if (VG_(clo_backtrace_size) < 2)
+            VG_(clo_backtrace_size) = 2;
+         if (VG_(clo_backtrace_size) >= VG_DEEPEST_BACKTRACE)
+            VG_(clo_backtrace_size) = VG_DEEPEST_BACKTRACE;
+      }
+
+      else
+         bad_option(argv[i]);
+   }
+
+#  undef ISSPACE
+#  undef STREQ
+#  undef STREQN
+
+   if (VG_(clo_verbosity < 0))
+      VG_(clo_verbosity) = 0;
+
+   if (VG_(clo_GDB_attach) && VG_(clo_trace_children)) {
+      VG_(message)(Vg_UserMsg, "");
+      VG_(message)(Vg_UserMsg, 
+         "--gdb-attach=yes conflicts with --trace-children=yes");
+      VG_(message)(Vg_UserMsg, 
+         "Please choose one or the other, but not both.");
+      bad_option("--gdb-attach=yes and --trace-children=yes");
+   }
+
+   if (VG_(clo_client_perms) && !VG_(clo_instrument)) {
+      VG_(message)(Vg_UserMsg, "");
+      VG_(message)(Vg_UserMsg, 
+         "--client-perms=yes requires --instrument=yes");
+      bad_option("--client-perms=yes without --instrument=yes");
+   }
+
+   if (VG_(clo_client_perms))
+      vg_assert(VG_(clo_instrument));
+
+   VG_(clo_logfile_fd) = eventually_logfile_fd;
+
+#  define STRINGIFY(xx)  __STRING(xx)
+   if (VG_(clo_verbosity > 0))
+      VG_(message)(Vg_UserMsg, 
+                   "valgrind-%s, a memory error detector for x86 GNU/Linux.",
+                   STRINGIFY(VG_VERSION));
+#  undef STRINGIFY
+   if (VG_(clo_verbosity > 0))
+      VG_(message)(Vg_UserMsg, 
+                   "Copyright (C) 2000-2002, and GNU GPL'd, by Julian Seward.");
+   if (VG_(clo_verbosity) > 1) {
+      VG_(message)(Vg_UserMsg, "Startup, with flags:");
+      for (i = 0; i < argc; i++) {
+         VG_(message)(Vg_UserMsg, "   %s", argv[i]);
+      }
+   }
+
+   if (VG_(clo_n_suppressions) == 0) {
+      config_error("No error-suppression files were specified.");
+   }
+}
+
+
+/* ---------------------------------------------------------------------
+   Copying to/from m_state_static.
+   ------------------------------------------------------------------ */
+
+UInt VG_(m_state_static) [8 /* int regs, in Intel order */ 
+                          + 1 /* %eflags */ 
+                          + 1 /* %eip */
+                          + VG_SIZE_OF_FPUSTATE_W /* FPU state */
+                         ];
+
+void VG_(copy_baseBlock_to_m_state_static) ( void )
+{
+   Int i;
+   VG_(m_state_static)[ 0/4] = VG_(baseBlock)[VGOFF_(m_eax)];
+   VG_(m_state_static)[ 4/4] = VG_(baseBlock)[VGOFF_(m_ecx)];
+   VG_(m_state_static)[ 8/4] = VG_(baseBlock)[VGOFF_(m_edx)];
+   VG_(m_state_static)[12/4] = VG_(baseBlock)[VGOFF_(m_ebx)];
+   VG_(m_state_static)[16/4] = VG_(baseBlock)[VGOFF_(m_esp)];
+   VG_(m_state_static)[20/4] = VG_(baseBlock)[VGOFF_(m_ebp)];
+   VG_(m_state_static)[24/4] = VG_(baseBlock)[VGOFF_(m_esi)];
+   VG_(m_state_static)[28/4] = VG_(baseBlock)[VGOFF_(m_edi)];
+
+   VG_(m_state_static)[32/4] = VG_(baseBlock)[VGOFF_(m_eflags)];
+   VG_(m_state_static)[36/4] = VG_(baseBlock)[VGOFF_(m_eip)];
+
+   for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
+      VG_(m_state_static)[40/4 + i] 
+         = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
+}
+
+
+void VG_(copy_m_state_static_to_baseBlock) ( void )
+{
+   Int i;
+   VG_(baseBlock)[VGOFF_(m_eax)] = VG_(m_state_static)[ 0/4];
+   VG_(baseBlock)[VGOFF_(m_ecx)] = VG_(m_state_static)[ 4/4];
+   VG_(baseBlock)[VGOFF_(m_edx)] = VG_(m_state_static)[ 8/4];
+   VG_(baseBlock)[VGOFF_(m_ebx)] = VG_(m_state_static)[12/4];
+   VG_(baseBlock)[VGOFF_(m_esp)] = VG_(m_state_static)[16/4];
+   VG_(baseBlock)[VGOFF_(m_ebp)] = VG_(m_state_static)[20/4];
+   VG_(baseBlock)[VGOFF_(m_esi)] = VG_(m_state_static)[24/4];
+   VG_(baseBlock)[VGOFF_(m_edi)] = VG_(m_state_static)[28/4];
+
+   VG_(baseBlock)[VGOFF_(m_eflags)] = VG_(m_state_static)[32/4];
+   VG_(baseBlock)[VGOFF_(m_eip)] = VG_(m_state_static)[36/4];
+
+   for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
+      VG_(baseBlock)[VGOFF_(m_fpustate) + i]
+         = VG_(m_state_static)[40/4 + i];
+}
+
+
+/* ---------------------------------------------------------------------
+   Show accumulated counts.
+   ------------------------------------------------------------------ */
+
+static void vg_show_counts ( void )
+{
+   VG_(message)(Vg_DebugMsg,
+                " dispatch: %lu basic blocks, %d tt_fast misses.", 
+                VG_(bbs_done),  VG_(tt_fast_misses));
+   VG_(message)(Vg_DebugMsg,
+                "translate: new %d (%d -> %d), discard %d (%d -> %d).",
+                VG_(overall_in_count),
+                VG_(overall_in_osize),
+                VG_(overall_in_tsize),
+                VG_(overall_out_count),
+                VG_(overall_out_osize),
+                VG_(overall_out_tsize) );
+   VG_(message)(Vg_DebugMsg,
+		"      lru: %d epochs, %d clearings.",
+		VG_(current_epoch),
+                VG_(number_of_lrus) );
+   VG_(message)(Vg_DebugMsg, 
+                "reg-alloc: %d t-req-spill, "
+                "%d+%d orig+spill uis, %d total-reg-r.",
+                VG_(translations_needing_spill),
+                VG_(uinstrs_prealloc),
+                VG_(uinstrs_spill),
+                VG_(total_reg_rank) );
+   VG_(message)(Vg_DebugMsg, 
+                "smc-check: %d checks, %d fast pass, "
+                "%d slow pass, %d discards.",
+		VG_(smc_total_check4s),
+		VG_(smc_cache_passed),
+		VG_(smc_fancy_passed),
+		VG_(smc_discard_count) );
+   VG_(message)(Vg_DebugMsg, 
+                "   sanity: %d cheap, %d expensive checks.",
+                VG_(sanity_fast_count), 
+                VG_(sanity_slow_count) );
+}
+
+
+/* ---------------------------------------------------------------------
+   Main!
+   ------------------------------------------------------------------ */
+
+/* Where we jump to once Valgrind has got control, and the real
+   machine's state has been copied to the m_state_static. */
+
+void VG_(main) ( void )
+{
+   Int i;
+
+   /* Set up our stack sanity-check words. */
+   for (i = 0; i < 10; i++) {
+      VG_(stack)[i]         = (UInt)(&VG_(stack)[i])         ^ 0xA4B3C2D1;
+      VG_(stack)[10000-1-i] = (UInt)(&VG_(stack)[10000-i-1]) ^ 0xABCD4321;
+   }
+
+   /* Set up baseBlock offsets and copy the saved machine's state into
+      it. */
+   vg_init_baseBlock();
+   VG_(copy_m_state_static_to_baseBlock)();
+
+   /* Process Valgrind's command-line opts (from env var VG_OPTS). */
+   process_cmd_line_options();
+
+   /* Initialise the signal handling subsystem. */
+   VG_(sigstartup_actions)();
+
+#  ifdef VG_PROFILE
+   VGP_(init_profiling)();
+#  endif
+
+   if (VG_(clo_instrument)) {
+      VGP_PUSHCC(VgpInitAudit);
+      VGM_(init_memory_audit)();
+      VGP_POPCC;
+      VGP_PUSHCC(VgpReadSyms);
+      VG_(read_symbols)();
+      VGP_POPCC;
+   }
+
+   /* This should come after init_memory_audit; otherwise the latter
+      carefully sets up the permissions maps to cover the anonymous
+      mmaps for the translation table and translation cache, which
+      wastes > 20M of virtual address space. */
+   VG_(init_transtab_and_SMC)();
+
+   if (VG_(clo_verbosity) == 1) {
+      VG_(message)(Vg_UserMsg, 
+                   "For more details, rerun with: -v");
+   }
+
+   /* Now it is safe for malloc et al in vg_clientmalloc.c to act
+      instrumented-ly. */
+   VG_(running_on_simd_CPU) = True;
+   if (VG_(clo_instrument)) {
+      VGM_(make_readable) ( (Addr)&VG_(running_on_simd_CPU), 1 );
+      VGM_(make_readable) ( (Addr)&VG_(clo_instrument), 1 );
+      VGM_(make_readable) ( (Addr)&VG_(clo_trace_malloc), 1 );
+      VGM_(make_readable) ( (Addr)&VG_(clo_sloppy_malloc), 1 );
+   }
+
+   if (VG_(clo_verbosity) > 0)
+      VG_(message)(Vg_UserMsg, "");
+
+   VG_(bbs_to_go) = VG_(clo_stop_after);
+   VG_(toploop)();
+
+   if (VG_(clo_verbosity) > 0)
+      VG_(message)(Vg_UserMsg, "");
+
+   if (VG_(clo_instrument)) {
+      VG_(show_all_errors)();
+      VG_(clientmalloc_done)();
+      if (VG_(clo_verbosity) == 1) {
+         VG_(message)(Vg_UserMsg, 
+                      "For counts of detected errors, rerun with: -v");
+      }
+      if (VG_(clo_leak_check)) VG_(detect_memory_leaks)();
+   }
+   VG_(running_on_simd_CPU) = False;
+   
+   VG_(do_sanity_checks)(True /*include expensive checks*/ );
+
+   if (VG_(clo_verbosity) > 1)
+      vg_show_counts();
+
+   if (0) {
+      VG_(message)(Vg_DebugMsg, "");
+      VG_(message)(Vg_DebugMsg, 
+         "------ Valgrind's internal memory use stats follow ------" );
+      VG_(mallocSanityCheckAll)();
+      VG_(show_all_arena_stats)();
+      VG_(message)(Vg_DebugMsg, 
+         "------ Valgrind's ExeContext management stats follow ------" );
+      VG_(show_ExeContext_stats)();
+      VG_(message)(Vg_DebugMsg, 
+         "------ Valgrind's client block stats follow ---------------" );
+      VG_(show_client_block_stats)();
+   }
+ 
+#  ifdef VG_PROFILE
+   VGP_(done_profiling)();
+#  endif
+
+   VG_(done_prof_mem)();
+
+   VG_(shutdown_logging)();
+
+   /* In LD_PRELOAD, convert "valgrind.so" into "valgrinq.so", so that
+      child processes don't get traced into.  Also done on simulated
+      execve system call. */
+   if (!VG_(clo_trace_children)) { 
+      VG_(mash_LD_PRELOAD_string)(VG_(getenv)("LD_PRELOAD"));
+   }
+
+   /* Prepare to restore state to the real CPU. */
+   VG_(copy_baseBlock_to_m_state_static)();
+
+   /* This pushes a return address on the simulator's stack, which
+      is abandoned.  We call vg_sigshutdown_actions() at the end
+      of vg_switch_to_real_CPU(), so as to ensure that the original
+      stack and machine state is restored before the real signal
+      mechanism is restored.
+   */
+   VG_(switch_to_real_CPU)();
+}
+
+
+/* Debugging thing .. can be called from assembly with OYNK macro. */
+void VG_(oynk) ( Int n )
+{
+   OINK(n);
+}
+
+
+/* Find "valgrind.so" in a LD_PRELOAD=... string, and convert it to
+   "valgrinq.so", which doesn't do anything.  This is used to avoid
+   tracing into child processes.  To make this work the build system
+   also supplies a dummy file, "valgrinq.so". 
+*/
+void VG_(mash_LD_PRELOAD_string)( Char* ld_preload_str )
+{
+   Char* p;
+   if (ld_preload_str == NULL)
+      return;
+   p = VG_(strstr)(ld_preload_str, "valgrind.so");
+   if (p == NULL)
+      return;
+   p[7] = 'q';
+}
+
+/* RUNS ON THE CLIENT'S STACK, but on the real CPU.  Start GDB and get
+   it to attach to this process.  Called if the user requests this
+   service after an error has been shown, so she can poke around and
+   look at parameters, memory, etc.  You can't meaningfully get GDB to
+   continue the program, though; to continue, quit GDB.  */
+extern void VG_(start_GDB_whilst_on_client_stack) ( void )
+{
+   UChar buf[100];
+   VG_(sprintf)(buf,
+                "/usr/bin/gdb -nw /proc/%d/exe %d", 
+                VG_(getpid)(), VG_(getpid)());
+   VG_(printf)("starting GDB with cmd: %s\n", buf);
+   VG_(mash_LD_PRELOAD_string)(VG_(getenv)("LD_PRELOAD"));
+   { /* HACK ALERT */
+     extern int system ( const char * );
+     system(buf);
+     /* end of HACK ALERT */
+   }
+   VG_(message)(Vg_UserMsg, "");
+   VG_(message)(Vg_UserMsg, 
+      "GDB has detached.  Valgrind regains control.  We continue.");
+}
+
+
+/* Print some helpful-ish text about unimplemented things, and give
+   up. */
+extern void VG_(unimplemented) ( Char* msg )
+{
+   VG_(message)(Vg_UserMsg, "");
+   VG_(message)(Vg_UserMsg, 
+      "Valgrind detected that your program requires");
+   VG_(message)(Vg_UserMsg, 
+      "the following unimplemented functionality:");
+   VG_(message)(Vg_UserMsg, "   %s", msg);
+   VG_(message)(Vg_UserMsg,
+      "This may be because the functionality is hard to implement,");
+   VG_(message)(Vg_UserMsg,
+      "or because no reasonable program would behave this way,");
+   VG_(message)(Vg_UserMsg,
+      "or because nobody has yet needed it.  In any case, let me know");
+   VG_(message)(Vg_UserMsg,
+      "(jseward@acm.org) and/or try to work around the problem, if you can.");
+   VG_(message)(Vg_UserMsg,
+      "");
+   VG_(message)(Vg_UserMsg,
+      "Valgrind has to exit now.  Sorry.  Bye!");
+   VG_(message)(Vg_UserMsg,
+      "");
+   VG_(exit)(1);
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- Replace some C lib things with equivs which don't get ---*/
+/*--- spurious value warnings.  THEY RUN ON SIMD CPU!       ---*/
+/*-------------------------------------------------------------*/
+
+char* strrchr ( const char* s, int c )
+{
+   UChar  ch   = (UChar)((UInt)c);
+   UChar* p    = (UChar*)s;
+   UChar* last = NULL;
+   while (True) {
+      if (*p == ch) last = p;
+      if (*p == 0) return last;
+      p++;
+   }
+}
+
+char* strchr ( const char* s, int c )
+{
+   UChar  ch = (UChar)((UInt)c);
+   UChar* p  = (UChar*)s;
+   while (True) {
+      if (*p == ch) return p;
+      if (*p == 0) return NULL;
+      p++;
+   }
+}
+
+char* strcat ( char* dest, const char* src )
+{
+   Char* dest_orig = dest;
+   while (*dest) dest++;
+   while (*src) *dest++ = *src++;
+   *dest = 0;
+   return dest_orig;
+}
+
+unsigned int strlen ( const char* str )
+{
+   UInt i = 0;
+   while (str[i] != 0) i++;
+   return i;
+}
+
+char* strcpy ( char* dest, const char* src )
+{
+   Char* dest_orig = dest;
+   while (*src) *dest++ = *src++;
+   *dest = 0;
+   return dest_orig;
+}
+
+int strncmp ( const char* s1, const char* s2, unsigned int nmax )
+{
+   unsigned int n = 0;
+   while (True) {
+      if (n >= nmax) return 0;
+      if (*s1 == 0 && *s2 == 0) return 0;
+      if (*s1 == 0) return -1;
+      if (*s2 == 0) return 1;
+
+      if (*(UChar*)s1 < *(UChar*)s2) return -1;
+      if (*(UChar*)s1 > *(UChar*)s2) return 1;
+
+      s1++; s2++; n++;
+   }
+}
+
+int strcmp ( const char* s1, const char* s2 )
+{
+   while (True) {
+      if (*s1 == 0 && *s2 == 0) return 0;
+      if (*s1 == 0) return -1;
+      if (*s2 == 0) return 1;
+
+      if (*(char*)s1 < *(char*)s2) return -1;
+      if (*(char*)s1 > *(char*)s2) return 1;
+
+      s1++; s2++;
+   }
+}
+
+void* memchr(const void *s, int c, unsigned int n)
+{
+   unsigned int i;
+   UChar c0 = (UChar)c;
+   UChar* p = (UChar*)s;
+   for (i = 0; i < n; i++)
+      if (p[i] == c0) return (void*)(&p[i]);
+   return NULL;
+}
+
+void* memcpy( void *dst, const void *src, unsigned int len )
+{
+    register char *d;
+    register char *s;
+    if ( dst > src ) {
+        d = (char *)dst + len - 1;
+        s = (char *)src + len - 1;
+        while ( len-- )
+            *d-- = *s--;
+    } else if ( dst < src ) {
+        d = (char *)dst;
+        s = (char *)src;
+        while ( len-- )
+            *d++ = *s++;
+    }
+    return dst;
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                vg_main.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_malloc2.c b/coregrind/vg_malloc2.c
new file mode 100644
index 000000000..1ad35be1c
--- /dev/null
+++ b/coregrind/vg_malloc2.c
@@ -0,0 +1,1298 @@
+
+/*--------------------------------------------------------------------*/
+/*--- An implementation of malloc/free which doesn't use sbrk.     ---*/
+/*---                                                 vg_malloc2.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+
+#include "vg_include.h"
+
+/* Define to turn on (heavyweight) debugging machinery. */
+/* #define DEBUG_MALLOC */
+
+
+/*------------------------------------------------------------*/
+/*--- Structs n stuff                                      ---*/
+/*------------------------------------------------------------*/
+
+#define VG_REDZONE_LO_MASK 0x31415927
+#define VG_REDZONE_HI_MASK 0x14141356
+
+#define VG_N_MALLOC_LISTS 16 /* do not change this */
+
+
+typedef UInt Word;
+typedef Word WordF;
+typedef Word WordL;
+
+
+/* A superblock. */
+typedef 
+   struct _Superblock {
+      struct _Superblock* next;
+      /* number of payload words in this superblock. */
+      Int  n_payload_words;
+      Word payload_words[0];
+   }
+   Superblock;
+
+
+/* An arena. */
+typedef 
+   struct {
+      Char*       name;
+      Int         rz_szW; /* Red zone size in words */
+      Bool        rz_check; /* Check red-zone on free? */
+      Int         min_sblockW; /* Minimum superblock size */
+      WordF*      freelist[VG_N_MALLOC_LISTS];
+      Superblock* sblocks;
+      /* Stats only. */
+      UInt bytes_on_loan;
+      UInt bytes_mmaped;
+      UInt bytes_on_loan_max;
+   } 
+   Arena;
+
+
+/* Block layout:
+
+     this block total sizeW   (1 word)
+     freelist previous ptr    (1 word)
+     freelist next  ptr       (1 word)
+     red zone words (depends on .rz_szW field of Arena)
+     (payload words)
+     red zone words (depends on .rz_szW field of Arena)
+     this block total sizeW  (1 word)
+
+     Total size in words (bszW) and payload size in words (pszW)
+     are related by
+        bszW == pszW + 4 + 2 * a->rz_szW
+
+     Furthermore, both size fields in the block are negative if it is
+     not in use, and positive if it is in use.  A block size of zero
+     is not possible, because a block always has at least four words
+     of overhead.  
+*/
+typedef
+   struct {
+      Int   bszW_lo;
+      Word* prev;
+      Word* next;
+      Word  redzone[0];
+   } 
+   BlockHeader;
+
+
+/*------------------------------------------------------------*/
+/*--- Forwardses ... and misc ...                          ---*/
+/*------------------------------------------------------------*/
+
+static Bool blockSane ( Arena* a, Word* b );
+
+/* Align ptr p upwards to an align-sized boundary. */
+static
+void* align_upwards ( void* p, Int align )
+{
+   Addr a = (Addr)p;
+   if ((a % align) == 0) return (void*)a;
+   return (void*)(a - (a % align) + align);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Arena management stuff                               ---*/
+/*------------------------------------------------------------*/
+
+/* The arena structures themselves. */
+static Arena vg_arena[VG_N_ARENAS];
+
+/* Functions external to this module identify arenas using ArenaIds,
+   not Arena*s.  This fn converts the former to the latter. */
+static Arena* arenaId_to_ArenaP ( ArenaId arena )
+{
+   vg_assert(arena >= 0 && arena < VG_N_ARENAS);
+   return & vg_arena[arena];
+}
+
+
+/* Initialise an arena. */
+static
+void arena_init ( Arena* a, Char* name, 
+                  Int rz_szW, Bool rz_check, Int min_sblockW )
+{
+   Int i;
+   vg_assert((min_sblockW % VKI_WORDS_PER_PAGE) == 0);
+   a->name = name;
+   a->rz_szW = rz_szW;
+   a->rz_check = rz_check;
+   a->min_sblockW = min_sblockW;
+   for (i = 0; i < VG_N_MALLOC_LISTS; i++) a->freelist[i] = NULL;
+   a->sblocks = NULL;
+   a->bytes_on_loan     = 0;
+   a->bytes_mmaped      = 0;
+   a->bytes_on_loan_max = 0;
+}
+
+
+/* Print vital stats for an arena. */
+void VG_(show_all_arena_stats) ( void )
+{
+   Int i;
+   for (i = 0; i < VG_N_ARENAS; i++) {
+      VG_(message)(Vg_DebugMsg,
+         "Arena `%s': %7d max useful, %7d mmap'd, %7d current useful",
+         vg_arena[i].name, 
+         vg_arena[i].bytes_on_loan_max, 
+         vg_arena[i].bytes_mmaped, 
+         vg_arena[i].bytes_on_loan 
+      );
+   }
+}
+
+
+/* It is important that this library is self-initialising, because it
+   may get called very early on -- as a result of C++ static
+   constructor initialisations -- before Valgrind itself is
+   initialised.  Hence vg_malloc() and vg_free() below always call
+   ensure_mm_init() to ensure things are correctly initialised.  */
+
+static
+void ensure_mm_init ( void )
+{
+   static Bool init_done = False;
+   if (init_done) return;
+
+   /* Use a checked red zone size of 1 word for our internal stuff,
+      and an unchecked zone of arbitrary size for the client.  Of
+      course the client's red zone is checked really, but using the
+      addressibility maps, not by the mechanism implemented here,
+      which merely checks at the time of freeing that the red zone
+      words are unchanged. */
+
+   arena_init ( &vg_arena[VG_AR_PRIVATE], "private ", 
+                1, True, 262144 );
+
+   arena_init ( &vg_arena[VG_AR_SYMTAB],  "symtab  ", 
+                1, True, 262144 );
+
+   arena_init ( &vg_arena[VG_AR_CLIENT],  "client  ",  
+                VG_AR_CLIENT_REDZONE_SZW, False, 262144 );
+
+   arena_init ( &vg_arena[VG_AR_DEMANGLE], "demangle",  
+                4 /*paranoid*/, True, 16384 );
+
+   arena_init ( &vg_arena[VG_AR_EXECTXT],  "exectxt ",  
+                1, True, 16384 );
+
+   arena_init ( &vg_arena[VG_AR_ERRCTXT],  "errctxt ",  
+                1, True, 16384 );
+
+   arena_init ( &vg_arena[VG_AR_TRANSIENT], "transien",  
+                2, True, 16384 );
+
+   init_done = True;
+#  ifdef DEBUG_MALLOC
+   VG_(mallocSanityCheckAll)();
+#  endif
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Arena management stuff                               ---*/
+/*------------------------------------------------------------*/
+
+static
+Superblock* newSuperblock ( Arena* a, Int cszW )
+{
+   Superblock* sb;
+   cszW += 2; /* Take into account sb->next and sb->n_words fields */
+   if (cszW < a->min_sblockW) cszW = a->min_sblockW;
+   while ((cszW % VKI_WORDS_PER_PAGE) > 0) cszW++;
+   sb = VG_(get_memory_from_mmap) ( cszW * sizeof(Word) );
+   sb->n_payload_words = cszW - 2;
+   a->bytes_mmaped += cszW * sizeof(Word);
+   if (0)
+      VG_(message)(Vg_DebugMsg, "newSuperblock, %d payload words", 
+                                sb->n_payload_words);
+   return sb;
+}
+
+
+/* Find the superblock containing the given chunk. */
+static
+Superblock* findSb ( Arena* a, UInt* ch )
+{
+   Superblock* sb;
+   for (sb = a->sblocks; sb; sb = sb->next)
+      if (&sb->payload_words[0] <= ch
+          && ch < &sb->payload_words[sb->n_payload_words]) 
+         return sb;
+   VG_(printf)("findSb: can't find pointer %p in arena `%s'\n",
+               ch, a->name );
+   VG_(panic)("findSb: vg_free() in wrong arena?");
+   return NULL; /*NOTREACHED*/
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Low-level functions for working with blocks.         ---*/
+/*------------------------------------------------------------*/
+
+/* Add the not-in-use attribute to a bszW. */
+static __inline__
+Int mk_free_bszW ( Int bszW )
+{
+   vg_assert(bszW != 0);
+   return (bszW < 0) ? bszW : -bszW;
+}
+
+/* Add the in-use attribute to a bszW. */
+static __inline__
+Int mk_inuse_bszW ( Int bszW )
+{
+   vg_assert(bszW != 0);
+   return (bszW < 0) ? -bszW : bszW;
+}
+
+/* Remove the in-use/not-in-use attribute from a bszW, leaving just
+   the size. */
+static __inline__
+Int mk_plain_bszW ( Int bszW )
+{
+   vg_assert(bszW != 0);
+   return (bszW < 0) ? -bszW : bszW;
+}
+
+/* Does this bszW have the in-use attribute ? */
+static __inline__
+Bool is_inuse_bszW ( Int bszW )
+{
+   vg_assert(bszW != 0);
+   return (bszW < 0) ? False : True;
+}
+
+
+/* Given the addr of the first word of a block, return the addr of the
+   last word. */
+static __inline__
+WordL* first_to_last ( WordF* fw )
+{
+   return fw + mk_plain_bszW(fw[0]) - 1;
+}
+
+/* Given the addr of the last word of a block, return the addr of the
+   first word. */
+static __inline__
+WordF* last_to_first ( WordL* lw )
+{
+   return lw - mk_plain_bszW(lw[0]) + 1;
+}
+
+
+/* Given the addr of the first word of a block, return the addr of the
+   first word of its payload. */
+static __inline__
+Word* first_to_payload ( Arena* a, WordF* fw )
+{
+   return & fw[3 + a->rz_szW];
+}
+
+/* Given the addr of the first word of a the payload of a block,
+   return the addr of the first word of the block. */
+static __inline__
+Word* payload_to_first ( Arena* a, WordF* payload )
+{
+   return & payload[- 3 - a->rz_szW];
+}
+
+/* Set and get the lower size field of a block. */
+static __inline__
+void set_bszW_lo ( WordF* fw, Int bszW ) { 
+   fw[0] = bszW; 
+}
+static __inline__
+Int get_bszW_lo ( WordF* fw )
+{
+   return fw[0];
+}
+
+
+/* Set and get the next and previous link fields of a block. */
+static __inline__
+void set_prev_p  ( WordF* fw, Word* prev_p ) { 
+   fw[1] = (Word)prev_p; 
+}
+static __inline__
+void set_next_p  ( WordF* fw, Word* next_p ) { 
+   fw[2] = (Word)next_p; 
+}
+static __inline__
+Word* get_prev_p  ( WordF* fw ) { 
+   return (Word*)(fw[1]);
+}
+static __inline__
+Word* get_next_p  ( WordF* fw ) { 
+   return (Word*)(fw[2]);
+}
+
+
+/* Set and get the upper size field of a block. */
+static __inline__
+void set_bszW_hi ( WordF* fw, Int bszW ) {
+   WordL* lw = first_to_last(fw);
+   vg_assert(lw == fw + mk_plain_bszW(bszW) - 1);
+   lw[0] = bszW;
+}
+static __inline__
+Int get_bszW_hi ( WordF* fw ) {
+   WordL* lw = first_to_last(fw);
+   return lw[0];
+}
+
+/* Get the upper size field of a block, given a pointer to the last
+   word of it. */
+static __inline__
+Int get_bszW_hi_from_last_word ( WordL* lw ) {
+   WordF* fw = last_to_first(lw);
+   return get_bszW_lo(fw);
+}
+
+
+/* Read and write the lower and upper red-zone words of a block. */
+static __inline__
+void set_rz_lo_word ( Arena* a, WordF* fw, Int rz_wordno, Word w )
+{
+   fw[3 + rz_wordno] = w;
+}
+static __inline__
+void set_rz_hi_word ( Arena* a, WordF* fw, Int rz_wordno, Word w )
+{
+   WordL* lw = first_to_last(fw);
+   lw[-1-rz_wordno] = w;
+}
+static __inline__
+Word get_rz_lo_word ( Arena* a, WordF* fw, Int rz_wordno )
+{
+   return fw[3 + rz_wordno];
+}
+static __inline__
+Word get_rz_hi_word ( Arena* a, WordF* fw, Int rz_wordno )
+{
+   WordL* lw = first_to_last(fw);
+   return lw[-1-rz_wordno];
+}
+
+
+/* Return the lower, upper and total overhead in words for a block.
+   These are determined purely by which arena the block lives in. */
+static __inline__
+Int overhead_szW_lo ( Arena* a )
+{
+   return 3 + a->rz_szW;
+}
+static __inline__
+Int overhead_szW_hi ( Arena* a )
+{
+   return 1 + a->rz_szW;
+}
+static __inline__
+Int overhead_szW ( Arena* a )
+{
+   return overhead_szW_lo(a) + overhead_szW_hi(a);
+}
+
+
+/* Convert pointer size in words to block size in words, and back. */
+static __inline__
+Int pszW_to_bszW ( Arena* a, Int pszW )
+{
+   vg_assert(pszW >= 0);
+   return pszW + overhead_szW(a);
+}
+static __inline__
+Int bszW_to_pszW ( Arena* a, Int bszW )
+{
+   Int pszW = bszW - overhead_szW(a);
+   vg_assert(pszW >= 0);
+   return pszW;
+}
+
+/*------------------------------------------------------------*/
+/*--- Functions for working with freelists.                ---*/
+/*------------------------------------------------------------*/
+
+/* Determination of which freelist a block lives on is based on the
+   payload size, not block size, in words. */
+
+/* Convert a payload size in words to a freelist number. */
+
+static
+Int pszW_to_listNo ( Int pszW )
+{
+   vg_assert(pszW >= 0);
+   if (pszW <= 3)   return 0;
+   if (pszW <= 4)   return 1;
+   if (pszW <= 5)   return 2;
+   if (pszW <= 6)   return 3;
+   if (pszW <= 7)   return 4;
+   if (pszW <= 8)   return 5;
+   if (pszW <= 9)   return 6;
+   if (pszW <= 10)  return 7;
+   if (pszW <= 11)  return 8;
+   if (pszW <= 12)  return 9;
+   if (pszW <= 16)  return 10;
+   if (pszW <= 32)  return 11;
+   if (pszW <= 64)  return 12;
+   if (pszW <= 128) return 13;
+   if (pszW <= 256) return 14;
+   return 15;
+}
+
+
+/* What are the minimum and maximum payload sizes for a given list? */
+
+static
+Int listNo_to_pszW_min ( Int listNo )
+{
+   Int pszW = 0;
+   vg_assert(listNo >= 0 && listNo <= VG_N_MALLOC_LISTS);
+   while (pszW_to_listNo(pszW) < listNo) pszW++;
+   return pszW;
+}
+
+static
+Int listNo_to_pszW_max ( Int listNo )
+{
+   vg_assert(listNo >= 0 && listNo <= VG_N_MALLOC_LISTS);
+   if (listNo == VG_N_MALLOC_LISTS-1) {
+      return 999999999;
+   } else {
+      return listNo_to_pszW_min(listNo+1) - 1;
+   }
+}
+
+
+/* A nasty hack to try and reduce fragmentation.  Try and replace
+   a->freelist[lno] with another block on the same list but with a
+   lower address, with the idea of attempting to recycle the same
+   blocks rather than cruise through the address space. */
+
+static 
+void swizzle ( Arena* a, Int lno )
+{
+   UInt* p_best;
+   UInt* pp;
+   UInt* pn;
+   Int   i;
+
+   p_best = a->freelist[lno];
+   if (p_best == NULL) return;
+
+   pn = pp = p_best;
+   for (i = 0; i < 20; i++) {
+      pn = get_next_p(pn);
+      pp = get_prev_p(pp);
+      if (pn < p_best) p_best = pn;
+      if (pp < p_best) p_best = pp;
+   }
+   if (p_best < a->freelist[lno]) {
+#     ifdef DEBUG_MALLOC
+      VG_(printf)("retreat by %d\n", 
+           ((Char*)(a->freelist[lno])) - ((Char*)p_best));
+#     endif
+      a->freelist[lno] = p_best;
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Creating and deleting blocks.                        ---*/
+/*------------------------------------------------------------*/
+
+/* Mark the words at b .. b+bszW-1 as not in use, and add them to the
+   relevant free list. */
+
+static
+void mkFreeBlock ( Arena* a, Word* b, Int bszW, Int b_lno )
+{
+   Int pszW = bszW_to_pszW(a, bszW);
+   vg_assert(pszW >= 0);
+   vg_assert(b_lno == pszW_to_listNo(pszW));
+   /* Set the size fields and indicate not-in-use. */
+   set_bszW_lo(b, mk_free_bszW(bszW));
+   set_bszW_hi(b, mk_free_bszW(bszW));
+
+   /* Add to the relevant list. */
+   if (a->freelist[b_lno] == NULL) {
+      set_prev_p(b, b);
+      set_next_p(b, b);
+      a->freelist[b_lno] = b;
+   } else {
+      Word* b_prev = get_prev_p(a->freelist[b_lno]);
+      Word* b_next = a->freelist[b_lno];
+      set_next_p(b_prev, b);
+      set_prev_p(b_next, b);
+      set_next_p(b, b_next);
+      set_prev_p(b, b_prev);
+   }
+#  ifdef DEBUG_MALLOC
+   (void)blockSane(a,b);
+#  endif
+}
+
+
+/* Mark the words at b .. b+bszW-1 as in use, and set up the block
+   appropriately. */
+static
+void mkInuseBlock ( Arena* a, UInt* b, UInt bszW )
+{
+   Int i;
+   set_bszW_lo(b, mk_inuse_bszW(bszW));
+   set_bszW_hi(b, mk_inuse_bszW(bszW));
+   set_prev_p(b, NULL);
+   set_next_p(b, NULL);
+   if (a->rz_check) {
+      for (i = 0; i < a->rz_szW; i++) {
+         set_rz_lo_word(a, b, i, (UInt)b ^ VG_REDZONE_LO_MASK);
+         set_rz_hi_word(a, b, i, (UInt)b ^ VG_REDZONE_HI_MASK);
+      }
+   }
+#  ifdef DEBUG_MALLOC
+   (void)blockSane(a,b);
+#  endif
+}
+
+
+/* Remove a block from a given list.  Does no sanity checking. */
+static
+void unlinkBlock ( Arena* a, UInt* b, Int listno )
+{
+   vg_assert(listno >= 0 && listno < VG_N_MALLOC_LISTS);
+   if (get_prev_p(b) == b) {
+      /* Only one element in the list; treat it specially. */
+      vg_assert(get_next_p(b) == b);
+      a->freelist[listno] = NULL;
+   } else {
+      UInt* b_prev = get_prev_p(b);
+      UInt* b_next = get_next_p(b);
+      a->freelist[listno] = b_prev;
+      set_next_p(b_prev, b_next);
+      set_prev_p(b_next, b_prev);
+      swizzle ( a, listno );
+   }
+   set_prev_p(b, NULL);
+   set_next_p(b, NULL);
+}
+
+
+/* Split an existing free block into two pieces, and put the fragment
+   (the second one along in memory) onto the relevant free list.
+   req_bszW is the required size of the block which isn't the
+   fragment. */
+static
+void splitChunk ( Arena* a, UInt* b, Int b_listno, UInt req_bszW )
+{
+   Int b_bszW, frag_bszW;
+   b_bszW = mk_plain_bszW(get_bszW_lo(b));
+   vg_assert(req_bszW < b_bszW);
+   frag_bszW = b_bszW - req_bszW;
+   vg_assert(frag_bszW >= overhead_szW(a));
+   /*
+   printf( "split %d into %d and %d\n", 
+                   b_bszW,req_bszW,frag_bszW  );
+   */
+   vg_assert(bszW_to_pszW(a, frag_bszW) > 0);
+   unlinkBlock(a, b, b_listno);
+   mkInuseBlock(a, b, req_bszW);
+   mkFreeBlock(a, &b[req_bszW], frag_bszW, 
+                  pszW_to_listNo(bszW_to_pszW(a, frag_bszW)));
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Sanity-check/debugging machinery.                    ---*/
+/*------------------------------------------------------------*/
+
+/* Do some crude sanity checks on a chunk. */
+static 
+Bool blockSane ( Arena* a, Word* b )
+{
+#  define BLEAT(str) VG_(printf)("blockSane: fail -- %s\n",str)
+   Int i;
+   if (get_bszW_lo(b) != get_bszW_hi(b)) 
+      {BLEAT("sizes");return False;}
+   if (a->rz_check && is_inuse_bszW(get_bszW_lo(b))) {
+      for (i = 0; i < a->rz_szW; i++) {
+         if (get_rz_lo_word(a, b, i) != ((Word)b ^ VG_REDZONE_LO_MASK))
+            {BLEAT("redzone-lo");return False;}
+         if (get_rz_hi_word(a, b, i) != ((Word)b ^ VG_REDZONE_HI_MASK))
+            {BLEAT("redzone-hi");return False;}
+      }      
+   }
+   return True;
+#  undef BLEAT
+}
+
+
+/* Print superblocks (only for debugging). */
+static 
+void ppSuperblocks ( Arena* a )
+{
+   Int i, ch_bszW, blockno;
+   UInt* ch;
+   Superblock* sb = a->sblocks;
+   blockno = 1;
+
+   while (sb) {
+      VG_(printf)( "\n" );
+      VG_(printf)( "superblock %d at %p, sb->n_pl_ws = %d, next = %p\n", 
+                   blockno++, sb, sb->n_payload_words, sb->next );
+      i = 0;
+      while (True) {
+         if (i >= sb->n_payload_words) break;
+         ch     = &sb->payload_words[i];
+         ch_bszW = get_bszW_lo(ch);
+         VG_(printf)( "   block at %d, bszW %d: ", i, mk_plain_bszW(ch_bszW) );
+         VG_(printf)( "%s, ", is_inuse_bszW(ch_bszW) ? "inuse" : "free" );
+         VG_(printf)( "%s\n", blockSane(a,ch) ? "ok" : "BAD" );
+         i += mk_plain_bszW(ch_bszW);
+      }
+      if (i > sb->n_payload_words) 
+         VG_(printf)( "   last block overshoots end of SB\n");
+      sb = sb->next;
+   }
+   VG_(printf)( "end of superblocks\n\n" );
+}
+
+
+/* Sanity check both the superblocks and the chains. */
+void VG_(mallocSanityCheckArena) ( ArenaId aid )
+{
+   Int         i, superblockctr, b_bszW, b_pszW, blockctr_sb, blockctr_li;
+   Int         blockctr_sb_free, listno, list_min_pszW, list_max_pszW;
+   Superblock* sb;
+   Bool        thisFree, lastWasFree;
+   Word*       b;
+   Word*       b_prev;
+   UInt        arena_bytes_on_loan;
+   Arena*      a;
+
+#  define BOMB VG_(panic)("vg_mallocSanityCheckArena")
+
+   a = arenaId_to_ArenaP(aid);
+   
+   /* First, traverse all the superblocks, inspecting the chunks in
+      each. */
+   superblockctr = blockctr_sb = blockctr_sb_free = 0;
+   arena_bytes_on_loan = 0;
+   sb = a->sblocks;
+   while (sb) {
+      lastWasFree = False;
+      superblockctr++;
+      i = 0;
+      while (True) {
+         if (i >= sb->n_payload_words) break;
+         blockctr_sb++;
+         b     = &sb->payload_words[i];
+         b_bszW = get_bszW_lo(b);
+         if (!blockSane(a, b)) {
+            VG_(printf)( "mallocSanityCheck: sb %p, block %d (bszW %d): "
+                         "BAD\n",
+                         sb, i, b_bszW );
+            BOMB;
+         }
+         thisFree = !is_inuse_bszW(b_bszW);
+         if (thisFree && lastWasFree) {
+            VG_(printf)( "mallocSanityCheck: sb %p, block %d (bszW %d): "
+                         "UNMERGED FREES\n",
+                         sb, i, b_bszW );
+            BOMB;
+         }
+         lastWasFree = thisFree;
+         if (thisFree) blockctr_sb_free++;
+         if (!thisFree) 
+            arena_bytes_on_loan += sizeof(Word) * bszW_to_pszW(a, b_bszW);
+         i += mk_plain_bszW(b_bszW);
+      }
+      if (i > sb->n_payload_words) {
+         VG_(printf)( "mallocSanityCheck: sb %p: last block "
+                      "overshoots end\n", sb);
+         BOMB;
+      }
+      sb = sb->next;
+   }
+
+   if (arena_bytes_on_loan != a->bytes_on_loan) {
+            VG_(printf)( 
+                    "mallocSanityCheck: a->bytes_on_loan %d, "
+                    "arena_bytes_on_loan %d: "
+                    "MISMATCH\n", a->bytes_on_loan, arena_bytes_on_loan);
+      ppSuperblocks(a);
+      BOMB;
+   }
+
+   /* Second, traverse each list, checking that the back pointers make
+      sense, counting blocks encountered, and checking that each block
+      is an appropriate size for this list. */
+   blockctr_li = 0;
+   for (listno = 0; listno < VG_N_MALLOC_LISTS; listno++) {
+      list_min_pszW = listNo_to_pszW_min(listno);
+      list_max_pszW = listNo_to_pszW_max(listno);
+      b = a->freelist[listno];
+      if (b == NULL) continue;
+      while (True) {
+         b_prev = b;
+         b = get_next_p(b);
+         if (get_prev_p(b) != b_prev) {
+            VG_(printf)( "mallocSanityCheck: list %d at %p: "
+                         "BAD LINKAGE\n", 
+                         listno, b );
+            BOMB;
+         }
+         b_pszW = bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(b)));
+         if (b_pszW < list_min_pszW || b_pszW > list_max_pszW) {
+            VG_(printf)( 
+               "mallocSanityCheck: list %d at %p: "
+               "WRONG CHAIN SIZE %d (%d, %d)\n", 
+               listno, b, b_pszW, list_min_pszW, list_max_pszW );
+            BOMB;
+         }
+         blockctr_li++;
+         if (b == a->freelist[listno]) break;
+      }
+   }
+
+   if (blockctr_sb_free != blockctr_li) {
+      VG_(printf)( 
+         "mallocSanityCheck: BLOCK COUNT MISMATCH "
+         "(via sbs %d, via lists %d)\n",
+         blockctr_sb_free, blockctr_li );
+      ppSuperblocks(a);
+      BOMB;
+   }
+
+   VG_(message)(Vg_DebugMsg,
+                "mSC [%s]: %2d sbs, %5d tot bs, %4d/%-4d free bs, "
+                "%2d lists, %7d mmap, %7d loan", 
+                a->name,
+                superblockctr,
+                blockctr_sb, blockctr_sb_free, blockctr_li, 
+                VG_N_MALLOC_LISTS, 
+                a->bytes_mmaped, a->bytes_on_loan);   
+#  undef BOMB
+}
+
+
+void VG_(mallocSanityCheckAll) ( void )
+{
+   Int i;
+   for (i = 0; i < VG_N_ARENAS; i++)
+      VG_(mallocSanityCheckArena) ( i );
+}
+
+
+/* Really, this isn't the right place for this.  Nevertheless: find
+   out if an arena is empty -- currently has no bytes on loan.  This
+   is useful for checking for memory leaks (of valgrind, not the
+   client.) 
+*/
+Bool VG_(is_empty_arena) ( ArenaId aid )
+{
+   Arena*      a;
+   Superblock* sb;
+   WordF*      b;
+   Int         b_bszW;
+   ensure_mm_init();
+   a = arenaId_to_ArenaP(aid);
+   for (sb = a->sblocks; sb != NULL; sb = sb->next) {
+      /* If the superblock is empty, it should contain a single free
+         block, of the right size. */
+      b = &(sb->payload_words[0]);
+      b_bszW = get_bszW_lo(b);
+      if (is_inuse_bszW(b_bszW)) return False;
+      if (mk_plain_bszW(b_bszW) != sb->n_payload_words) return False;
+      /* So this block is not in use and is of the right size.  Keep
+         going. */
+   }
+   return True;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Externally-visible functions.                        ---*/
+/*------------------------------------------------------------*/
+
+void* VG_(malloc) ( ArenaId aid, Int req_pszB )
+{
+   Int         req_pszW, req_bszW, frag_bszW, b_bszW, lno;
+   Superblock* new_sb;
+   Word*       b;
+   Arena*      a;
+
+   VGP_PUSHCC(VgpMalloc);
+
+   ensure_mm_init();
+   a = arenaId_to_ArenaP(aid);
+
+   vg_assert(req_pszB >= 0);
+   vg_assert(req_pszB < 0x7FFFFFF0);
+
+   req_pszW = (req_pszB + VKI_BYTES_PER_WORD - 1) / VKI_BYTES_PER_WORD;
+
+   /* Keep gcc -O happy: */
+   b = NULL;
+
+   /* Start searching at this list. */
+   lno = pszW_to_listNo(req_pszW);
+
+   /* This loop finds a list which has a block big enough, or sets
+      req_listno to N_LISTS if no such block exists. */
+   while (True) {
+      if (lno == VG_N_MALLOC_LISTS) break;
+      /* If this list is empty, try the next one. */
+      if (a->freelist[lno] == NULL) {
+         lno++;
+         continue;
+      }
+      /* Scan a->list[lno] to find a big-enough chunk. */
+      b = a->freelist[lno];
+      b_bszW = mk_plain_bszW(get_bszW_lo(b));
+      while (True) {
+         if (bszW_to_pszW(a, b_bszW) >= req_pszW) break;
+         b = get_next_p(b);
+         b_bszW = mk_plain_bszW(get_bszW_lo(b));
+         if (b == a->freelist[lno]) break;
+      }
+      if (bszW_to_pszW(a, b_bszW) >= req_pszW) break;
+      /* No luck?  Try a larger list. */
+      lno++;
+   }
+
+   /* Either lno < VG_N_MALLOC_LISTS and b points to the selected
+      block, or lno == VG_N_MALLOC_LISTS, and we have to allocate a
+      new superblock. */
+
+   if (lno == VG_N_MALLOC_LISTS) {
+      req_bszW = pszW_to_bszW(a, req_pszW);      
+      new_sb = newSuperblock(a, req_bszW);
+      vg_assert(new_sb != NULL);
+      new_sb->next = a->sblocks;
+      a->sblocks = new_sb;
+      b = &(new_sb->payload_words[0]);
+      lno = pszW_to_listNo(bszW_to_pszW(a, new_sb->n_payload_words));
+      mkFreeBlock ( a, b, new_sb->n_payload_words, lno);
+   }
+
+   /* Ok, we can allocate from b, which lives in list req_listno. */
+   vg_assert(b != NULL);
+   vg_assert(lno >= 0 && lno < VG_N_MALLOC_LISTS);
+   vg_assert(a->freelist[lno] != NULL);
+   b_bszW = mk_plain_bszW(get_bszW_lo(b));
+   req_bszW = pszW_to_bszW(a, req_pszW);
+   /* req_bszW is the size of the block we are after.  b_bszW is the
+      size of what we've actually got. */
+   vg_assert(b_bszW >= req_bszW);
+
+   /* Could we split this block and still get a useful fragment?
+      Where "useful" means that the payload size of the frag is at
+      least one word.  */
+   frag_bszW = b_bszW - req_bszW;
+   if (frag_bszW > overhead_szW(a)) {
+      splitChunk(a, b, lno, req_bszW);
+   } else {
+      /* No, mark as in use and use as-is. */
+      unlinkBlock(a, b, lno);
+      /*
+      set_bszW_lo(b, mk_inuse_bszW(b_bszW));
+      set_bszW_hi(b, mk_inuse_bszW(b_bszW));
+      */
+      mkInuseBlock(a, b, b_bszW);
+   }
+   vg_assert(req_bszW <= mk_plain_bszW(get_bszW_lo(b)));
+
+   a->bytes_on_loan 
+      += sizeof(Word) 
+         * bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(b)));
+   if (a->bytes_on_loan > a->bytes_on_loan_max)
+      a->bytes_on_loan_max = a->bytes_on_loan;
+
+#  ifdef DEBUG_MALLOC
+   VG_(mallocSanityCheckArena)(aid);
+#  endif
+
+   VGP_POPCC;
+   return first_to_payload(a, b);
+}
+
+ 
+void VG_(free) ( ArenaId aid, void* ptr )
+{
+   Superblock* sb;
+   UInt*       sb_payl_firstw;
+   UInt*       sb_payl_lastw;
+   UInt*       other;
+   UInt*       ch;
+   Int         ch_bszW, ch_pszW, other_bszW, ch_listno;
+   Arena*      a;
+
+   VGP_PUSHCC(VgpMalloc);
+
+   ensure_mm_init();
+   a = arenaId_to_ArenaP(aid);
+
+   if (ptr == NULL) return;
+
+   ch = payload_to_first(a, ptr);
+
+#  ifdef DEBUG_MALLOC
+   vg_assert(blockSane(a,ch));
+#  endif
+
+   a->bytes_on_loan 
+      -= sizeof(Word) 
+         * bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(ch)));
+
+   sb             = findSb( a, ch );
+   sb_payl_firstw = &(sb->payload_words[0]);
+   sb_payl_lastw  = &(sb->payload_words[sb->n_payload_words-1]);
+
+   /* Put this chunk back on a list somewhere. */
+   ch_bszW    = get_bszW_lo(ch);
+   ch_pszW    = bszW_to_pszW(a, ch_bszW);
+   ch_listno  = pszW_to_listNo(ch_pszW);
+   mkFreeBlock( a, ch, ch_bszW, ch_listno );
+
+   /* See if this block can be merged with the following one. */
+   other = ch + ch_bszW;
+   /* overhead_szW(a) is the smallest possible bszW for this arena.
+      So the nearest possible end to the block beginning at other is
+      other+overhead_szW(a)-1.  Hence the test below. */
+   if (other+overhead_szW(a)-1 <= sb_payl_lastw) {
+      other_bszW = get_bszW_lo(other);
+      if (!is_inuse_bszW(other_bszW)) {
+         /* VG_(printf)( "merge-successor\n"); */
+         other_bszW = mk_plain_bszW(other_bszW);
+#        ifdef DEBUG_MALLOC
+         vg_assert(blockSane(a, other));
+#        endif
+         unlinkBlock( a, ch, ch_listno );
+         unlinkBlock( a, other, pszW_to_listNo(bszW_to_pszW(a,other_bszW)) );
+         ch_bszW += other_bszW; 
+         ch_listno = pszW_to_listNo(bszW_to_pszW(a, ch_bszW));
+         mkFreeBlock( a, ch, ch_bszW, ch_listno );
+      }
+   }
+
+   /* See if this block can be merged with its predecessor. */
+   if (ch-overhead_szW(a) >= sb_payl_firstw) {
+      other_bszW = get_bszW_hi_from_last_word( ch-1 );
+      if (!is_inuse_bszW(other_bszW)) {
+         /* VG_(printf)( "merge-predecessor\n"); */
+         other = last_to_first( ch-1 );
+         other_bszW = mk_plain_bszW(other_bszW);         
+         unlinkBlock( a, ch, ch_listno );
+         unlinkBlock( a, other, pszW_to_listNo(bszW_to_pszW(a, other_bszW)) );
+         ch = other;
+         ch_bszW += other_bszW;
+         ch_listno = pszW_to_listNo(bszW_to_pszW(a, ch_bszW));
+         mkFreeBlock( a, ch, ch_bszW, ch_listno );
+      }
+   }
+
+#  ifdef DEBUG_MALLOC
+   VG_(mallocSanityCheckArena)(aid);
+#  endif
+
+   VGP_POPCC;
+}
+
+
+/*
+   The idea for malloc_aligned() is to allocate a big block, base, and
+   then split it into two parts: frag, which is returned to the the
+   free pool, and align, which is the bit we're really after.  Here's
+   a picture.  L and H denote the block lower and upper overheads, in
+   words.  The details are gruesome.  Note it is slightly complicated
+   because the initial request to generate base may return a bigger
+   block than we asked for, so it is important to distinguish the base
+   request size and the base actual size.
+
+   frag_b                   align_b
+   |                        |
+   |    frag_p              |    align_p
+   |    |                   |    |
+   v    v                   v    v
+
+   +---+                +---+---+               +---+
+   | L |----------------| H | L |---------------| H |
+   +---+                +---+---+               +---+
+
+   ^    ^                        ^
+   |    |                        :
+   |    base_p                   this addr must be aligned
+   |
+   base_b
+
+   .    .               .   .   .               .   .
+   <------ frag_bszW ------->   .               .   .
+   .    <------------- base_pszW_act ----------->   .
+   .    .               .   .   .               .   .
+
+*/
+void* VG_(malloc_aligned) ( ArenaId aid, Int req_alignB, Int req_pszB )
+{
+   Int    req_alignW, req_pszW, base_pszW_req, base_pszW_act, frag_bszW;
+   Word   *base_b, *base_p, *align_p;
+   UInt   saved_bytes_on_loan;
+   Arena* a;
+
+   ensure_mm_init();
+   a = arenaId_to_ArenaP(aid);
+
+   vg_assert(req_pszB >= 0);
+   vg_assert(req_pszB < 0x7FFFFFF0);
+
+   /* Check that the requested alignment seems reasonable; that is, is
+      a power of 2.  There must be a better way to do this.  What is
+      it? */
+   switch (req_alignB) {
+      case 8: case 16: case 32: case 64: case 128: case 256: 
+      case 512: case 1024: case 2048: case 4096: case 8192: 
+      case 16384: case 32768: case 65536: case 131072: 
+      case 1048576: 
+         /* can't be bothered to calculate larger ones */
+         break;
+      default:
+         VG_(printf)("vg_malloc_aligned(%p, %d, %d)\nbad alignment request", 
+                     a, req_pszB, req_alignB );
+         VG_(panic)("vg_malloc_aligned");
+         /*NOTREACHED*/
+   }
+
+   /* Required alignment, in words.  Since it's constrained to be a
+      power of 2 >= word size, no need to align the alignment.  Still,
+      we check. */
+   req_alignW = req_alignB / VKI_BYTES_PER_WORD;
+   vg_assert(req_alignB == req_alignW * VKI_BYTES_PER_WORD);
+
+   /* Required payload size for the aligned chunk. */
+   req_pszW = (req_pszB + VKI_BYTES_PER_WORD - 1) / VKI_BYTES_PER_WORD;
+   
+   /* Payload size to request for the big block that we will split
+      up. */
+   base_pszW_req = req_pszW + overhead_szW(a) + req_alignW;
+
+   /* Payload ptr for the block we are going to split.  Note this
+      changes a->bytes_on_loan; we save and restore it ourselves. */
+   saved_bytes_on_loan = a->bytes_on_loan;
+   base_p = VG_(malloc) ( aid, base_pszW_req * VKI_BYTES_PER_WORD );
+   a->bytes_on_loan = saved_bytes_on_loan;
+
+   /* Block ptr for the block we are going to split. */
+   base_b = payload_to_first ( a, base_p );
+
+   /* Pointer to the payload of the aligned block we are going to
+      return.  This has to be suitably aligned. */
+   align_p = align_upwards ( base_b + 2 * overhead_szW_lo(a) 
+                                    + overhead_szW_hi(a),
+                             req_alignB );
+
+   /* The block size of the fragment we will create.  This must be big
+      enough to actually create a fragment. */
+   frag_bszW = align_p - overhead_szW_lo(a) - base_b;
+   vg_assert(frag_bszW >= overhead_szW(a));
+
+   /* The actual payload size of the block we are going to split. */
+   base_pszW_act = bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(base_b)));
+
+   /* Create the fragment block, and put it back on the relevant free
+      list. */
+   mkFreeBlock ( a, base_b, frag_bszW, 
+                 pszW_to_listNo(bszW_to_pszW(a, frag_bszW)) );
+
+   /* Create the aligned block. */
+   mkInuseBlock ( a,
+                  align_p - overhead_szW_lo(a), 
+                  base_p + base_pszW_act 
+                         + overhead_szW_hi(a) 
+                         - (align_p - overhead_szW_lo(a)) );
+
+   /* Final sanity checks. */
+   vg_assert(( (UInt)align_p % req_alignB) == 0);
+
+   vg_assert(is_inuse_bszW(get_bszW_lo(payload_to_first(a, align_p))));
+
+   vg_assert(req_pszW 
+             <= 
+             bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(
+                payload_to_first(a, align_p))))
+            );
+
+   a->bytes_on_loan 
+      += sizeof(Word)
+         * bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(
+              payload_to_first(a, align_p))));
+   if (a->bytes_on_loan > a->bytes_on_loan_max)
+      a->bytes_on_loan_max = a->bytes_on_loan;
+
+#  ifdef DEBUG_MALLOC
+   VG_(mallocSanityCheckArena)(aid);
+#  endif
+
+   return align_p;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Services layered on top of malloc/free.              ---*/
+/*------------------------------------------------------------*/
+
+void* VG_(calloc) ( ArenaId aid, Int nmemb, Int nbytes )
+{
+   Int    i, size;
+   UChar* p;
+   size = nmemb * nbytes;
+   vg_assert(size > 0);
+   p = VG_(malloc) ( aid, size );
+   for (i = 0; i < size; i++) p[i] = 0;
+   return p;
+}
+
+
+void* VG_(realloc) ( ArenaId aid, void* ptr, Int req_pszB )
+{
+   Arena* a;
+   Int    old_bszW, old_pszW, old_pszB, i;
+   UChar  *p_old, *p_new;
+   UInt*  ch;
+
+   ensure_mm_init();
+   a = arenaId_to_ArenaP(aid);
+
+   vg_assert(req_pszB >= 0);
+   vg_assert(req_pszB < 0x7FFFFFF0);
+
+   ch = payload_to_first(a, ptr);
+   vg_assert(blockSane(a, ch));
+
+   old_bszW = get_bszW_lo(ch);
+   vg_assert(is_inuse_bszW(old_bszW));
+   old_bszW = mk_plain_bszW(old_bszW);
+   old_pszW = bszW_to_pszW(a, old_bszW);
+   old_pszB = old_pszW * VKI_BYTES_PER_WORD;
+
+   if (req_pszB <= old_pszB) return ptr;
+
+   p_new = VG_(malloc) ( aid, req_pszB );
+   p_old = (UChar*)ptr;
+   for (i = 0; i < old_pszB; i++)
+      p_new[i] = p_old[i];
+
+   VG_(free)(aid, p_old);
+   return p_new;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- The original test driver machinery.                  ---*/
+/*------------------------------------------------------------*/
+
+#if 0
+
+#if 1
+#define N_TEST_TRANSACTIONS 100000000
+#define N_TEST_ARR 200000
+#define M_TEST_MALLOC 1000
+#else
+#define N_TEST_TRANSACTIONS 500000
+#define N_TEST_ARR 30000
+#define M_TEST_MALLOC 500
+#endif
+
+
+void* test_arr[N_TEST_ARR];
+
+int main ( int argc, char** argv )
+{
+   Int i, j, k, nbytes, qq;
+   unsigned char* chp;
+   Arena* a = &arena[VG_AR_PRIVATE];
+   srandom(1);
+   for (i = 0; i < N_TEST_ARR; i++)
+      test_arr[i] = NULL;
+
+   for (i = 0; i < N_TEST_TRANSACTIONS; i++) {
+      if (i % 50000 == 0) mallocSanityCheck(a);
+      j = random() % N_TEST_ARR;
+      if (test_arr[j]) {
+         vg_free(a, test_arr[j]);
+         test_arr[j] = NULL;
+      } else {
+         nbytes = 1 + random() % M_TEST_MALLOC;
+         qq = random()%64;
+         if (qq == 32) 
+            nbytes *= 17;
+         else if (qq == 33)
+            nbytes = 0;
+         test_arr[j] 
+           = (i % 17) == 0
+                ? vg_memalign(a, nbytes, 1<< (3+(random()%10)))
+                : vg_malloc( a, nbytes );
+         chp = test_arr[j];
+         for (k = 0; k < nbytes; k++) 
+            chp[k] = (unsigned char)(k + 99);
+      }
+   }
+
+
+   for (i = 0; i < N_TEST_ARR; i++) {
+      if (test_arr[i]) {
+         vg_free(a, test_arr[i]);
+         test_arr[i] = NULL;
+      }
+   }
+   mallocSanityCheck(a);
+
+   fprintf(stderr, "ALL DONE\n");
+
+   show_arena_stats(a);
+   fprintf(stderr, "%d max useful, %d bytes mmap'd (%4.1f%%), %d useful\n",
+           a->bytes_on_loan_max, 
+           a->bytes_mmaped, 
+	   100.0 * (double)a->bytes_on_loan_max / (double)a->bytes_mmaped,
+           a->bytes_on_loan );
+
+   return 0;
+}
+#endif /* 0 */
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                             vg_malloc2.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_memory.c b/coregrind/vg_memory.c
new file mode 100644
index 000000000..13ae15795
--- /dev/null
+++ b/coregrind/vg_memory.c
@@ -0,0 +1,2300 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Maintain bitmaps of memory, tracking the accessibility (A)   ---*/
+/*--- and validity (V) status of each byte.                        ---*/
+/*---                                                  vg_memory.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+
+/* Define to debug the mem audit system. */
+/* #define VG_DEBUG_MEMORY */
+
+/* Define to debug the memory-leak-detector. */
+/* #define VG_DEBUG_LEAKCHECK */
+
+/* Define to collect detailed performance info. */
+/* #define VG_PROFILE_MEMORY */
+
+
+/*------------------------------------------------------------*/
+/*--- Low-level support for memory checking.               ---*/
+/*------------------------------------------------------------*/
+
+/* 
+   All reads and writes are checked against a memory map, which
+   records the state of all memory in the process.  The memory map is
+   organised like this:
+
+   The top 16 bits of an address are used to index into a top-level
+   map table, containing 65536 entries.  Each entry is a pointer to a
+   second-level map, which records the accesibililty and validity
+   permissions for the 65536 bytes indexed by the lower 16 bits of the
+   address.  Each byte is represented by nine bits, one indicating
+   accessibility, the other eight validity.  So each second-level map
+   contains 73728 bytes.  This two-level arrangement conveniently
+   divides the 4G address space into 64k lumps, each size 64k bytes.
+
+   All entries in the primary (top-level) map must point to a valid
+   secondary (second-level) map.  Since most of the 4G of address
+   space will not be in use -- ie, not mapped at all -- there is a
+   distinguished secondary map, which indicates `not addressible and
+   not valid' writeable for all bytes.  Entries in the primary map for
+   which the entire 64k is not in use at all point at this
+   distinguished map.
+
+   [...] lots of stuff deleted due to out of date-ness
+
+   As a final optimisation, the alignment and address checks for
+   4-byte loads and stores are combined in a neat way.  The primary
+   map is extended to have 262144 entries (2^18), rather than 2^16.
+   The top 3/4 of these entries are permanently set to the
+   distinguished secondary map.  For a 4-byte load/store, the
+   top-level map is indexed not with (addr >> 16) but instead f(addr),
+   where
+
+    f( XXXX XXXX XXXX XXXX ____ ____ ____ __YZ )
+        = ____ ____ ____ __YZ XXXX XXXX XXXX XXXX  or 
+        = ____ ____ ____ __ZY XXXX XXXX XXXX XXXX
+
+   ie the lowest two bits are placed above the 16 high address bits.
+   If either of these two bits are nonzero, the address is misaligned;
+   this will select a secondary map from the upper 3/4 of the primary
+   map.  Because this is always the distinguished secondary map, a
+   (bogus) address check failure will result.  The failure handling
+   code can then figure out whether this is a genuine addr check
+   failure or whether it is a possibly-legitimate access at a
+   misaligned address.  
+*/
+
+
+/*------------------------------------------------------------*/
+/*--- Crude profiling machinery.                           ---*/
+/*------------------------------------------------------------*/
+
+#ifdef VG_PROFILE_MEMORY
+
+#define N_PROF_EVENTS 120
+
+static UInt event_ctr[N_PROF_EVENTS];
+
+static void init_prof_mem ( void )
+{
+   Int i;
+   for (i = 0; i < N_PROF_EVENTS; i++)
+      event_ctr[i] = 0;
+}
+
+void VG_(done_prof_mem) ( void )
+{
+   Int i;
+   for (i = 0; i < N_PROF_EVENTS; i++) {
+      if ((i % 10) == 0) 
+         VG_(printf)("\n");
+      if (event_ctr[i] > 0)
+         VG_(printf)( "prof mem event %2d: %d\n", i, event_ctr[i] );
+   }
+   VG_(printf)("\n");
+}
+
+#define PROF_EVENT(ev)                                  \
+   do { vg_assert((ev) >= 0 && (ev) < N_PROF_EVENTS);   \
+        event_ctr[ev]++;                                \
+   } while (False);
+
+#else
+
+static void init_prof_mem ( void ) { }
+       void VG_(done_prof_mem) ( void ) { }
+
+#define PROF_EVENT(ev) /* */
+
+#endif
+
+/* Event index.  If just the name of the fn is given, this means the
+   number of calls to the fn.  Otherwise it is the specified event.
+
+   10   alloc_secondary_map
+
+   20   get_abit
+   21   get_vbyte
+   22   set_abit
+   23   set_vbyte
+   24   get_abits4_ALIGNED
+   25   get_vbytes4_ALIGNED
+
+   30   set_address_range_perms
+   31   set_address_range_perms(lower byte loop)
+   32   set_address_range_perms(quadword loop)
+   33   set_address_range_perms(upper byte loop)
+   
+   35   make_noaccess
+   36   make_writable
+   37   make_readable
+
+   40   copy_address_range_perms
+   41   copy_address_range_perms(byte loop)
+   42   check_writable
+   43   check_writable(byte loop)
+   44   check_readable
+   45   check_readable(byte loop)
+   46   check_readable_asciiz
+   47   check_readable_asciiz(byte loop)
+
+   50   make_aligned_word_NOACCESS
+   51   make_aligned_word_WRITABLE
+
+   60   helperc_LOADV4
+   61   helperc_STOREV4
+   62   helperc_LOADV2
+   63   helperc_STOREV2
+   64   helperc_LOADV1
+   65   helperc_STOREV1
+
+   70   rim_rd_V4_SLOWLY
+   71   rim_wr_V4_SLOWLY
+   72   rim_rd_V2_SLOWLY
+   73   rim_wr_V2_SLOWLY
+   74   rim_rd_V1_SLOWLY
+   75   rim_wr_V1_SLOWLY
+
+   80   fpu_read
+   81   fpu_read aligned 4
+   82   fpu_read aligned 8
+   83   fpu_read 2
+   84   fpu_read 10
+
+   85   fpu_write
+   86   fpu_write aligned 4
+   87   fpu_write aligned 8
+   88   fpu_write 2
+   89   fpu_write 10
+
+   90   fpu_read_check_SLOWLY
+   91   fpu_read_check_SLOWLY(byte loop)
+   92   fpu_write_check_SLOWLY
+   93   fpu_write_check_SLOWLY(byte loop)
+
+   100  is_plausible_stack_addr
+   101  handle_esp_assignment
+   102  handle_esp_assignment(-4)
+   103  handle_esp_assignment(+4)
+   104  handle_esp_assignment(+16)
+   105  handle_esp_assignment(-12)
+   106  handle_esp_assignment(+8)
+   107  handle_esp_assignment(-8)
+
+   110  vg_handle_esp_assignment_SLOWLY
+   111  vg_handle_esp_assignment_SLOWLY(normal; move down)
+   112  vg_handle_esp_assignment_SLOWLY(normal; move up)
+   113  vg_handle_esp_assignment_SLOWLY(normal)
+   114  vg_handle_esp_assignment_SLOWLY(>= HUGE_DELTA)
+*/
+
+/*------------------------------------------------------------*/
+/*--- Function declarations.                               ---*/
+/*------------------------------------------------------------*/
+
+/* Set permissions for an address range.  Not speed-critical. */
+void VGM_(make_noaccess) ( Addr a, UInt len );
+void VGM_(make_writable) ( Addr a, UInt len );
+void VGM_(make_readable) ( Addr a, UInt len );
+
+/* Check permissions for an address range.  Not speed-critical. */
+Bool VGM_(check_writable) ( Addr a, UInt len, Addr* bad_addr );
+Bool VGM_(check_readable) ( Addr a, UInt len, Addr* bad_addr );
+Bool VGM_(check_readable_asciiz) ( Addr a, Addr* bad_addr );
+
+static UInt vgm_rd_V4_SLOWLY ( Addr a );
+static UInt vgm_rd_V2_SLOWLY ( Addr a );
+static UInt vgm_rd_V1_SLOWLY ( Addr a );
+static void vgm_wr_V4_SLOWLY ( Addr a, UInt vbytes );
+static void vgm_wr_V2_SLOWLY ( Addr a, UInt vbytes );
+static void vgm_wr_V1_SLOWLY ( Addr a, UInt vbytes );
+static void fpu_read_check_SLOWLY ( Addr addr, Int size );
+static void fpu_write_check_SLOWLY ( Addr addr, Int size );
+
+
+/*------------------------------------------------------------*/
+/*--- Data defns.                                          ---*/
+/*------------------------------------------------------------*/
+
+typedef 
+   struct {
+      UChar abits[8192];
+      UChar vbyte[65536];
+   }
+   SecMap;
+
+/* These two are statically allocated.  Should they be non-public? */
+SecMap* VG_(primary_map)[ /*65536*/ 262144 ];
+static SecMap  vg_distinguished_secondary_map;
+
+#define IS_DISTINGUISHED_SM(smap) \
+   ((smap) == &vg_distinguished_secondary_map)
+
+#define ENSURE_MAPPABLE(addr,caller)                                   \
+   do {                                                                \
+      if (IS_DISTINGUISHED_SM(VG_(primary_map)[(addr) >> 16])) {       \
+         VG_(primary_map)[(addr) >> 16] = alloc_secondary_map(caller); \
+         /* VG_(printf)("new 2map because of %p\n", addr);   */       \
+      }                                                                \
+   } while(0)
+
+#define BITARR_SET(aaa_p,iii_p)                         \
+   do {                                                 \
+      UInt   iii = (UInt)iii_p;                         \
+      UChar* aaa = (UChar*)aaa_p;                       \
+      aaa[iii >> 3] |= (1 << (iii & 7));                \
+   } while (0)
+
+#define BITARR_CLEAR(aaa_p,iii_p)                       \
+   do {                                                 \
+      UInt   iii = (UInt)iii_p;                         \
+      UChar* aaa = (UChar*)aaa_p;                       \
+      aaa[iii >> 3] &= ~(1 << (iii & 7));               \
+   } while (0)
+
+#define BITARR_TEST(aaa_p,iii_p)                        \
+      (0 != (((UChar*)aaa_p)[ ((UInt)iii_p) >> 3 ]      \
+               & (1 << (((UInt)iii_p) & 7))))           \
+
+
+#define VGM_BIT_VALID      0
+#define VGM_BIT_INVALID    1
+
+#define VGM_NIBBLE_VALID   0
+#define VGM_NIBBLE_INVALID 0xF
+
+#define VGM_BYTE_VALID     0
+#define VGM_BYTE_INVALID   0xFF
+
+#define VGM_WORD_VALID     0
+#define VGM_WORD_INVALID   0xFFFFFFFF
+
+#define VGM_EFLAGS_VALID   0xFFFFFFFE
+#define VGM_EFLAGS_INVALID 0xFFFFFFFF
+
+
+#define IS_ALIGNED4_ADDR(aaa_p) (0 == (((UInt)(aaa_p)) & 3))
+
+
+/*------------------------------------------------------------*/
+/*--- Basic bitmap management, reading and writing.        ---*/
+/*------------------------------------------------------------*/
+
+/* Allocate and initialise a secondary map. */
+
+static SecMap* alloc_secondary_map ( __attribute__ ((unused)) 
+                                     Char* caller )
+{
+   SecMap* map;
+   UInt  i;
+   PROF_EVENT(10);
+
+   /* Mark all bytes as invalid access and invalid value. */
+
+   /* It just happens that a SecMap occupies exactly 18 pages --
+      although this isn't important, so the following assert is
+      spurious. */
+   vg_assert(0 == (sizeof(SecMap) % VKI_BYTES_PER_PAGE));
+   map = VG_(get_memory_from_mmap)( sizeof(SecMap) );
+
+   for (i = 0; i < 8192; i++)
+      map->abits[i] = VGM_BYTE_INVALID; /* Invalid address */
+   for (i = 0; i < 65536; i++)
+      map->vbyte[i] = VGM_BYTE_INVALID; /* Invalid Value */
+
+   /* VG_(printf)("ALLOC_2MAP(%s)\n", caller ); */
+   return map;
+}
+
+
+/* Basic reading/writing of the bitmaps, for byte-sized accesses. */
+
+static __inline__ UChar get_abit ( Addr a )
+{
+   SecMap* sm     = VG_(primary_map)[a >> 16];
+   UInt    sm_off = a & 0xFFFF;
+   PROF_EVENT(20);
+   return BITARR_TEST(sm->abits, sm_off) 
+             ? VGM_BIT_INVALID : VGM_BIT_VALID;
+}
+
+static __inline__ UChar get_vbyte ( Addr a )
+{
+   SecMap* sm     = VG_(primary_map)[a >> 16];
+   UInt    sm_off = a & 0xFFFF;
+   PROF_EVENT(21);
+   return sm->vbyte[sm_off];
+}
+
+static __inline__ void set_abit ( Addr a, UChar abit )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   PROF_EVENT(22);
+   ENSURE_MAPPABLE(a, "set_abit");
+   sm     = VG_(primary_map)[a >> 16];
+   sm_off = a & 0xFFFF;
+   if (abit) 
+      BITARR_SET(sm->abits, sm_off);
+   else
+      BITARR_CLEAR(sm->abits, sm_off);
+}
+
+static __inline__ void set_vbyte ( Addr a, UChar vbyte )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   PROF_EVENT(23);
+   ENSURE_MAPPABLE(a, "set_vbyte");
+   sm     = VG_(primary_map)[a >> 16];
+   sm_off = a & 0xFFFF;
+   sm->vbyte[sm_off] = vbyte;
+}
+
+
+/* Reading/writing of the bitmaps, for aligned word-sized accesses. */
+
+static __inline__ UChar get_abits4_ALIGNED ( Addr a )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   UChar   abits8;
+   PROF_EVENT(24);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+#  endif
+   sm     = VG_(primary_map)[a >> 16];
+   sm_off = a & 0xFFFF;
+   abits8 = sm->abits[sm_off >> 3];
+   abits8 >>= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
+   abits8 &= 0x0F;
+   return abits8;
+}
+
+static UInt __inline__ get_vbytes4_ALIGNED ( Addr a )
+{
+   SecMap* sm     = VG_(primary_map)[a >> 16];
+   UInt    sm_off = a & 0xFFFF;
+   PROF_EVENT(25);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+#  endif
+   return ((UInt*)(sm->vbyte))[sm_off >> 2];
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Setting permissions over address ranges.             ---*/
+/*------------------------------------------------------------*/
+
+static void set_address_range_perms ( Addr a, UInt len, 
+                                      UInt example_a_bit,
+                                      UInt example_v_bit )
+{
+   UChar   vbyte, abyte8;
+   UInt    vword4, sm_off;
+   SecMap* sm;
+
+   PROF_EVENT(30);
+
+   if (len == 0)
+      return;
+
+   if (len > 100 * 1000 * 1000) 
+      VG_(message)(Vg_UserMsg, 
+                   "Warning: set address range perms: "
+                   "large range %d, a %d, v %d",
+                   len, example_a_bit, example_v_bit );
+
+   VGP_PUSHCC(VgpSARP);
+
+   /* Requests to change permissions of huge address ranges may
+      indicate bugs in our machinery.  30,000,000 is arbitrary, but so
+      far all legitimate requests have fallen beneath that size. */
+   /* 4 Mar 02: this is just stupid; get rid of it. */
+   /* vg_assert(len < 30000000); */
+
+   /* Check the permissions make sense. */
+   vg_assert(example_a_bit == VGM_BIT_VALID 
+             || example_a_bit == VGM_BIT_INVALID);
+   vg_assert(example_v_bit == VGM_BIT_VALID 
+             || example_v_bit == VGM_BIT_INVALID);
+   if (example_a_bit == VGM_BIT_INVALID)
+      vg_assert(example_v_bit == VGM_BIT_INVALID);
+
+   /* The validity bits to write. */
+   vbyte = example_v_bit==VGM_BIT_VALID 
+              ? VGM_BYTE_VALID : VGM_BYTE_INVALID;
+
+   /* In order that we can charge through the address space at 8
+      bytes/main-loop iteration, make up some perms. */
+   abyte8 = (example_a_bit << 7)
+            | (example_a_bit << 6)
+            | (example_a_bit << 5)
+            | (example_a_bit << 4)
+            | (example_a_bit << 3)
+            | (example_a_bit << 2)
+            | (example_a_bit << 1)
+            | (example_a_bit << 0);
+   vword4 = (vbyte << 24) | (vbyte << 16) | (vbyte << 8) | vbyte;
+
+#  ifdef VG_DEBUG_MEMORY
+   /* Do it ... */
+   while (True) {
+      PROF_EVENT(31);
+      if (len == 0) break;
+      set_abit ( a, example_a_bit );
+      set_vbyte ( a, vbyte );
+      a++;
+      len--;
+   }
+
+#  else
+   /* Slowly do parts preceding 8-byte alignment. */
+   while (True) {
+      PROF_EVENT(31);
+      if (len == 0) break;
+      if ((a % 8) == 0) break;
+      set_abit ( a, example_a_bit );
+      set_vbyte ( a, vbyte );
+      a++;
+      len--;
+   }   
+
+   if (len == 0) {
+      VGP_POPCC;
+      return;
+   }
+   vg_assert((a % 8) == 0 && len > 0);
+
+   /* Once aligned, go fast. */
+   while (True) {
+      PROF_EVENT(32);
+      if (len < 8) break;
+      ENSURE_MAPPABLE(a, "set_address_range_perms(fast)");
+      sm = VG_(primary_map)[a >> 16];
+      sm_off = a & 0xFFFF;
+      sm->abits[sm_off >> 3] = abyte8;
+      ((UInt*)(sm->vbyte))[(sm_off >> 2) + 0] = vword4;
+      ((UInt*)(sm->vbyte))[(sm_off >> 2) + 1] = vword4;
+      a += 8;
+      len -= 8;
+   }
+
+   if (len == 0) {
+      VGP_POPCC;
+      return;
+   }
+   vg_assert((a % 8) == 0 && len > 0 && len < 8);
+
+   /* Finish the upper fragment. */
+   while (True) {
+      PROF_EVENT(33);
+      if (len == 0) break;
+      set_abit ( a, example_a_bit );
+      set_vbyte ( a, vbyte );
+      a++;
+      len--;
+   }   
+#  endif
+
+   /* Check that zero page and highest page have not been written to
+      -- this could happen with buggy syscall wrappers.  Today
+      (2001-04-26) had precisely such a problem with
+      __NR_setitimer. */
+   vg_assert(VG_(first_and_last_secondaries_look_plausible));
+   VGP_POPCC;
+}
+
+
+/* Set permissions for address ranges ... */
+
+void VGM_(make_noaccess) ( Addr a, UInt len )
+{
+   PROF_EVENT(35);
+   set_address_range_perms ( a, len, VGM_BIT_INVALID, VGM_BIT_INVALID );
+}
+
+void VGM_(make_writable) ( Addr a, UInt len )
+{
+   PROF_EVENT(36);
+   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_INVALID );
+}
+
+void VGM_(make_readable) ( Addr a, UInt len )
+{
+   PROF_EVENT(37);
+   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_VALID );
+}
+
+void VGM_(make_readwritable) ( Addr a, UInt len )
+{
+   PROF_EVENT(38);
+   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_VALID );
+}
+
+/* Block-copy permissions (needed for implementing realloc()). */
+
+void VGM_(copy_address_range_perms) ( Addr src, Addr dst, UInt len )
+{
+   UInt i;
+   PROF_EVENT(40);
+   for (i = 0; i < len; i++) {
+      UChar abit  = get_abit ( src+i );
+      UChar vbyte = get_vbyte ( src+i );
+      PROF_EVENT(41);
+      set_abit ( dst+i, abit );
+      set_vbyte ( dst+i, vbyte );
+   }
+}
+
+
+/* Check permissions for address range.  If inadequate permissions
+   exist, *bad_addr is set to the offending address, so the caller can
+   know what it is. */
+
+Bool VGM_(check_writable) ( Addr a, UInt len, Addr* bad_addr )
+{
+   UInt  i;
+   UChar abit;
+   PROF_EVENT(42);
+   for (i = 0; i < len; i++) {
+      PROF_EVENT(43);
+      abit = get_abit(a);
+      if (abit == VGM_BIT_INVALID) {
+         if (bad_addr != NULL) *bad_addr = a;
+         return False;
+      }
+      a++;
+   }
+   return True;
+}
+
+Bool VGM_(check_readable) ( Addr a, UInt len, Addr* bad_addr )
+{
+   UInt  i;
+   UChar abit;
+   UChar vbyte;
+   PROF_EVENT(44);
+   for (i = 0; i < len; i++) {
+      abit  = get_abit(a);
+      vbyte = get_vbyte(a);
+      PROF_EVENT(45);
+      if (abit != VGM_BIT_VALID || vbyte != VGM_BYTE_VALID) {
+         if (bad_addr != NULL) *bad_addr = a;
+         return False;
+      }
+      a++;
+   }
+   return True;
+}
+
+
+/* Check a zero-terminated ascii string.  Tricky -- don't want to
+   examine the actual bytes, to find the end, until we're sure it is
+   safe to do so. */
+
+Bool VGM_(check_readable_asciiz) ( Addr a, Addr* bad_addr )
+{
+   UChar abit;
+   UChar vbyte;
+   PROF_EVENT(46);
+   while (True) {
+      PROF_EVENT(47);
+      abit  = get_abit(a);
+      vbyte = get_vbyte(a);
+      if (abit != VGM_BIT_VALID || vbyte != VGM_BYTE_VALID) {
+         if (bad_addr != NULL) *bad_addr = a;
+         return False;
+      }
+      /* Ok, a is safe to read. */
+      if (* ((UChar*)a) == 0) return True;
+      a++;
+   }
+}
+
+
+/* Setting permissions for aligned words.  This supports fast stack
+   operations. */
+
+static __inline__ void make_aligned_word_NOACCESS ( Addr a )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   UChar   mask;
+   PROF_EVENT(50);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+#  endif
+   ENSURE_MAPPABLE(a, "make_aligned_word_NOACCESS");
+   sm     = VG_(primary_map)[a >> 16];
+   sm_off = a & 0xFFFF;
+   ((UInt*)(sm->vbyte))[sm_off >> 2] = VGM_WORD_INVALID;
+   mask = 0x0F;
+   mask <<= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
+   /* mask now contains 1s where we wish to make address bits
+      invalid (1s). */
+   sm->abits[sm_off >> 3] |= mask;
+}
+
+static __inline__ void make_aligned_word_WRITABLE ( Addr a )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   UChar   mask;
+   PROF_EVENT(51);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+#  endif
+   ENSURE_MAPPABLE(a, "make_aligned_word_WRITABLE");
+   sm     = VG_(primary_map)[a >> 16];
+   sm_off = a & 0xFFFF;
+   ((UInt*)(sm->vbyte))[sm_off >> 2] = VGM_WORD_INVALID;
+   mask = 0x0F;
+   mask <<= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
+   /* mask now contains 1s where we wish to make address bits
+      invalid (0s). */
+   sm->abits[sm_off >> 3] &= ~mask;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Functions called directly from generated code.       ---*/
+/*------------------------------------------------------------*/
+
+static __inline__ UInt rotateRight16 ( UInt x )
+{
+   /* Amazingly, gcc turns this into a single rotate insn. */
+   return (x >> 16) | (x << 16);
+}
+
+
+static __inline__ UInt shiftRight16 ( UInt x )
+{
+   return x >> 16;
+}
+
+
+/* Read/write 1/2/4 sized V bytes, and emit an address error if
+   needed. */
+
+/* VG_(helperc_{LD,ST}V{1,2,4}) handle the common case fast.
+   Under all other circumstances, it defers to the relevant _SLOWLY
+   function, which can handle all situations.
+*/
+
+UInt VG_(helperc_LOADV4) ( Addr a )
+{
+#  ifdef VG_DEBUG_MEMORY
+   return vgm_rd_V4_SLOWLY(a);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x3FFFF;
+   SecMap* sm     = VG_(primary_map)[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   UChar   abits  = sm->abits[a_off];
+   abits >>= (a & 4);
+   abits &= 15;
+   PROF_EVENT(60);
+   if (abits == VGM_NIBBLE_VALID) {
+      /* Handle common case quickly: a is suitably aligned, is mapped,
+         and is addressible. */
+      UInt v_off = a & 0xFFFF;
+      return ((UInt*)(sm->vbyte))[ v_off >> 2 ];
+   } else {
+      /* Slow but general case. */
+      return vgm_rd_V4_SLOWLY(a);
+   }
+#  endif
+}
+
+void VG_(helperc_STOREV4) ( Addr a, UInt vbytes )
+{
+#  ifdef VG_DEBUG_MEMORY
+   vgm_wr_V4_SLOWLY(a, vbytes);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x3FFFF;
+   SecMap* sm     = VG_(primary_map)[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   UChar   abits  = sm->abits[a_off];
+   abits >>= (a & 4);
+   abits &= 15;
+   PROF_EVENT(61);
+   if (abits == VGM_NIBBLE_VALID) {
+      /* Handle common case quickly: a is suitably aligned, is mapped,
+         and is addressible. */
+      UInt v_off = a & 0xFFFF;
+      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = vbytes;
+   } else {
+      /* Slow but general case. */
+      vgm_wr_V4_SLOWLY(a, vbytes);
+   }
+#  endif
+}
+
+UInt VG_(helperc_LOADV2) ( Addr a )
+{
+#  ifdef VG_DEBUG_MEMORY
+   return vgm_rd_V2_SLOWLY(a);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x1FFFF;
+   SecMap* sm     = VG_(primary_map)[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(62);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      UInt v_off = a & 0xFFFF;
+      return 0xFFFF0000 
+             |  
+             (UInt)( ((UShort*)(sm->vbyte))[ v_off >> 1 ] );
+   } else {
+      /* Slow but general case. */
+      return vgm_rd_V2_SLOWLY(a);
+   }
+#  endif
+}
+
+void VG_(helperc_STOREV2) ( Addr a, UInt vbytes )
+{
+#  ifdef VG_DEBUG_MEMORY
+   vgm_wr_V2_SLOWLY(a, vbytes);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x1FFFF;
+   SecMap* sm     = VG_(primary_map)[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(63);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      UInt v_off = a & 0xFFFF;
+      ((UShort*)(sm->vbyte))[ v_off >> 1 ] = vbytes & 0x0000FFFF;
+   } else {
+      /* Slow but general case. */
+      vgm_wr_V2_SLOWLY(a, vbytes);
+   }
+#  endif
+}
+
+UInt VG_(helperc_LOADV1) ( Addr a )
+{
+#  ifdef VG_DEBUG_MEMORY
+   return vgm_rd_V1_SLOWLY(a);
+#  else
+   UInt    sec_no = shiftRight16(a);
+   SecMap* sm     = VG_(primary_map)[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(64);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      UInt v_off = a & 0xFFFF;
+      return 0xFFFFFF00
+             |
+             (UInt)( ((UChar*)(sm->vbyte))[ v_off ] );
+   } else {
+      /* Slow but general case. */
+      return vgm_rd_V1_SLOWLY(a);
+   }
+#  endif
+}
+
+void VG_(helperc_STOREV1) ( Addr a, UInt vbytes )
+{
+#  ifdef VG_DEBUG_MEMORY
+   vgm_wr_V1_SLOWLY(a, vbytes);
+#  else
+   UInt    sec_no = shiftRight16(a);
+   SecMap* sm     = VG_(primary_map)[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(65);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      UInt v_off = a & 0xFFFF;
+      ((UChar*)(sm->vbyte))[ v_off ] = vbytes & 0x000000FF;
+   } else {
+      /* Slow but general case. */
+      vgm_wr_V1_SLOWLY(a, vbytes);
+   }
+#  endif
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Fallback functions to handle cases that the above    ---*/
+/*--- VG_(helperc_{LD,ST}V{1,2,4}) can't manage.           ---*/
+/*------------------------------------------------------------*/
+
+static UInt vgm_rd_V4_SLOWLY ( Addr a )
+{
+   Bool a0ok, a1ok, a2ok, a3ok;
+   UInt vb0, vb1, vb2, vb3;
+
+   PROF_EVENT(70);
+
+   /* First establish independently the addressibility of the 4 bytes
+      involved. */
+   a0ok = get_abit(a+0) == VGM_BIT_VALID;
+   a1ok = get_abit(a+1) == VGM_BIT_VALID;
+   a2ok = get_abit(a+2) == VGM_BIT_VALID;
+   a3ok = get_abit(a+3) == VGM_BIT_VALID;
+
+   /* Also get the validity bytes for the address. */
+   vb0 = (UInt)get_vbyte(a+0);
+   vb1 = (UInt)get_vbyte(a+1);
+   vb2 = (UInt)get_vbyte(a+2);
+   vb3 = (UInt)get_vbyte(a+3);
+
+   /* Now distinguish 3 cases */
+
+   /* Case 1: the address is completely valid, so:
+      - no addressing error
+      - return V bytes as read from memory
+   */
+   if (a0ok && a1ok && a2ok && a3ok) {
+      UInt vw = VGM_WORD_INVALID;
+      vw <<= 8; vw |= vb3;
+      vw <<= 8; vw |= vb2;
+      vw <<= 8; vw |= vb1;
+      vw <<= 8; vw |= vb0;
+      return vw;
+   }
+
+   /* Case 2: the address is completely invalid.  
+      - emit addressing error
+      - return V word indicating validity.  
+      This sounds strange, but if we make loads from invalid addresses 
+      give invalid data, we also risk producing a number of confusing
+      undefined-value errors later, which confuses the fact that the
+      error arose in the first place from an invalid address. 
+   */
+   /* VG_(printf)("%p (%d %d %d %d)\n", a, a0ok, a1ok, a2ok, a3ok); */
+   if (!VG_(clo_partial_loads_ok) 
+       || ((a & 3) != 0)
+       || (!a0ok && !a1ok && !a2ok && !a3ok)) {
+      VG_(record_address_error)( a, 4, False );
+      return (VGM_BYTE_VALID << 24) | (VGM_BYTE_VALID << 16) 
+             | (VGM_BYTE_VALID << 8) | VGM_BYTE_VALID;
+   }
+
+   /* Case 3: the address is partially valid.  
+      - no addressing error
+      - returned V word is invalid where the address is invalid, 
+        and contains V bytes from memory otherwise. 
+      Case 3 is only allowed if VG_(clo_partial_loads_ok) is True
+      (which is the default), and the address is 4-aligned.  
+      If not, Case 2 will have applied.
+   */
+   vg_assert(VG_(clo_partial_loads_ok));
+   {
+      UInt vw = VGM_WORD_INVALID;
+      vw <<= 8; vw |= (a3ok ? vb3 : VGM_BYTE_INVALID);
+      vw <<= 8; vw |= (a2ok ? vb2 : VGM_BYTE_INVALID);
+      vw <<= 8; vw |= (a1ok ? vb1 : VGM_BYTE_INVALID);
+      vw <<= 8; vw |= (a0ok ? vb0 : VGM_BYTE_INVALID);
+      return vw;
+   }
+}
+
+static void vgm_wr_V4_SLOWLY ( Addr a, UInt vbytes )
+{
+   /* Check the address for validity. */
+   Bool aerr = False;
+   PROF_EVENT(71);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+2) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+3) != VGM_BIT_VALID) aerr = True;
+
+   /* Store the V bytes, remembering to do it little-endian-ly. */
+   set_vbyte( a+0, vbytes & 0x000000FF ); vbytes >>= 8;
+   set_vbyte( a+1, vbytes & 0x000000FF ); vbytes >>= 8;
+   set_vbyte( a+2, vbytes & 0x000000FF ); vbytes >>= 8;
+   set_vbyte( a+3, vbytes & 0x000000FF );
+
+   /* If an address error has happened, report it. */
+   if (aerr)
+      VG_(record_address_error)( a, 4, True );
+}
+
+static UInt vgm_rd_V2_SLOWLY ( Addr a )
+{
+   /* Check the address for validity. */
+   UInt vw   = VGM_WORD_INVALID;
+   Bool aerr = False;
+   PROF_EVENT(72);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
+
+   /* Fetch the V bytes, remembering to do it little-endian-ly. */
+   vw <<= 8; vw |= (UInt)get_vbyte(a+1);
+   vw <<= 8; vw |= (UInt)get_vbyte(a+0);
+
+   /* If an address error has happened, report it. */
+   if (aerr) {
+      VG_(record_address_error)( a, 2, False );
+      vw = (VGM_BYTE_INVALID << 24) | (VGM_BYTE_INVALID << 16) 
+           | (VGM_BYTE_VALID << 8) | (VGM_BYTE_VALID);
+   }
+   return vw;   
+}
+
+static void vgm_wr_V2_SLOWLY ( Addr a, UInt vbytes )
+{
+   /* Check the address for validity. */
+   Bool aerr = False;
+   PROF_EVENT(73);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
+
+   /* Store the V bytes, remembering to do it little-endian-ly. */
+   set_vbyte( a+0, vbytes & 0x000000FF ); vbytes >>= 8;
+   set_vbyte( a+1, vbytes & 0x000000FF );
+
+   /* If an address error has happened, report it. */
+   if (aerr)
+      VG_(record_address_error)( a, 2, True );
+}
+
+static UInt vgm_rd_V1_SLOWLY ( Addr a )
+{
+   /* Check the address for validity. */
+   UInt vw   = VGM_WORD_INVALID;
+   Bool aerr = False;
+   PROF_EVENT(74);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+
+   /* Fetch the V byte. */
+   vw <<= 8; vw |= (UInt)get_vbyte(a+0);
+
+   /* If an address error has happened, report it. */
+   if (aerr) {
+      VG_(record_address_error)( a, 1, False );
+      vw = (VGM_BYTE_INVALID << 24) | (VGM_BYTE_INVALID << 16) 
+           | (VGM_BYTE_INVALID << 8) | (VGM_BYTE_VALID);
+   }
+   return vw;   
+}
+
+static void vgm_wr_V1_SLOWLY ( Addr a, UInt vbytes )
+{
+   /* Check the address for validity. */
+   Bool aerr = False;
+   PROF_EVENT(75);
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+
+   /* Store the V bytes, remembering to do it little-endian-ly. */
+   set_vbyte( a+0, vbytes & 0x000000FF );
+
+   /* If an address error has happened, report it. */
+   if (aerr)
+      VG_(record_address_error)( a, 1, True );
+}
+
+
+/* ---------------------------------------------------------------------
+   Called from generated code, or from the assembly helpers.
+   Handlers for value check failures.
+   ------------------------------------------------------------------ */
+
+void VG_(helperc_value_check0_fail) ( void )
+{
+   VG_(record_value_error) ( 0 );
+}
+
+void VG_(helperc_value_check1_fail) ( void )
+{
+   VG_(record_value_error) ( 1 );
+}
+
+void VG_(helperc_value_check2_fail) ( void )
+{
+   VG_(record_value_error) ( 2 );
+}
+
+void VG_(helperc_value_check4_fail) ( void )
+{
+   VG_(record_value_error) ( 4 );
+}
+
+
+/* ---------------------------------------------------------------------
+   FPU load and store checks, called from generated code.
+   ------------------------------------------------------------------ */
+
+void VGM_(fpu_read_check) ( Addr addr, Int size )
+{
+   /* Ensure the read area is both addressible and valid (ie,
+      readable).  If there's an address error, don't report a value
+      error too; but if there isn't an address error, check for a
+      value error. 
+
+      Try to be reasonably fast on the common case; wimp out and defer
+      to fpu_read_check_SLOWLY for everything else.  */
+
+   SecMap* sm;
+   UInt    sm_off, v_off, a_off;
+   Addr    addr4;
+
+   PROF_EVENT(80);
+
+#  ifdef VG_DEBUG_MEMORY
+   fpu_read_check_SLOWLY ( addr, size );
+#  else
+
+   if (size == 4) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow4;
+      PROF_EVENT(81);
+      /* Properly aligned. */
+      sm     = VG_(primary_map)[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow4;
+      /* Properly aligned and addressible. */
+      v_off = addr & 0xFFFF;
+      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
+         goto slow4;
+      /* Properly aligned, addressible and with valid data. */
+      return;
+     slow4:
+      fpu_read_check_SLOWLY ( addr, 4 );
+      return;
+   }
+
+   if (size == 8) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow8;
+      PROF_EVENT(82);
+      /* Properly aligned.  Do it in two halves. */
+      addr4 = addr + 4;
+      /* First half. */
+      sm     = VG_(primary_map)[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* First half properly aligned and addressible. */
+      v_off = addr & 0xFFFF;
+      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
+         goto slow8;
+      /* Second half. */
+      sm     = VG_(primary_map)[addr4 >> 16];
+      sm_off = addr4 & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* Second half properly aligned and addressible. */
+      v_off = addr4 & 0xFFFF;
+      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
+         goto slow8;
+      /* Both halves properly aligned, addressible and with valid
+         data. */
+      return;
+     slow8:
+      fpu_read_check_SLOWLY ( addr, 8 );
+      return;
+   }
+
+   /* Can't be bothered to huff'n'puff to make these (allegedly) rare
+      cases go quickly.  */
+   if (size == 2) {
+      PROF_EVENT(83);
+      fpu_read_check_SLOWLY ( addr, 2 );
+      return;
+   }
+
+   if (size == 10) {
+      PROF_EVENT(84);
+      fpu_read_check_SLOWLY ( addr, 10 );
+      return;
+   }
+
+   VG_(printf)("size is %d\n", size);
+   VG_(panic)("vgm_fpu_read_check: unhandled size");
+#  endif
+}
+
+
+void VGM_(fpu_write_check) ( Addr addr, Int size )
+{
+   /* Ensure the written area is addressible, and moan if otherwise.
+      If it is addressible, make it valid, otherwise invalid. 
+   */
+
+   SecMap* sm;
+   UInt    sm_off, v_off, a_off;
+   Addr    addr4;
+
+   PROF_EVENT(85);
+
+#  ifdef VG_DEBUG_MEMORY
+   fpu_write_check_SLOWLY ( addr, size );
+#  else
+
+   if (size == 4) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow4;
+      PROF_EVENT(86);
+      /* Properly aligned. */
+      sm     = VG_(primary_map)[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow4;
+      /* Properly aligned and addressible.  Make valid. */
+      v_off = addr & 0xFFFF;
+      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
+      return;
+     slow4:
+      fpu_write_check_SLOWLY ( addr, 4 );
+      return;
+   }
+
+   if (size == 8) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow8;
+      PROF_EVENT(87);
+      /* Properly aligned.  Do it in two halves. */
+      addr4 = addr + 4;
+      /* First half. */
+      sm     = VG_(primary_map)[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* First half properly aligned and addressible.  Make valid. */
+      v_off = addr & 0xFFFF;
+      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
+      /* Second half. */
+      sm     = VG_(primary_map)[addr4 >> 16];
+      sm_off = addr4 & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* Second half properly aligned and addressible. */
+      v_off = addr4 & 0xFFFF;
+      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
+      /* Properly aligned, addressible and with valid data. */
+      return;
+     slow8:
+      fpu_write_check_SLOWLY ( addr, 8 );
+      return;
+   }
+
+   /* Can't be bothered to huff'n'puff to make these (allegedly) rare
+      cases go quickly.  */
+   if (size == 2) {
+      PROF_EVENT(88);
+      fpu_write_check_SLOWLY ( addr, 2 );
+      return;
+   }
+
+   if (size == 10) {
+      PROF_EVENT(89);
+      fpu_write_check_SLOWLY ( addr, 10 );
+      return;
+   }
+
+   VG_(printf)("size is %d\n", size);
+   VG_(panic)("vgm_fpu_write_check: unhandled size");
+#  endif
+}
+
+
+/* ---------------------------------------------------------------------
+   Slow, general cases for FPU load and store checks.
+   ------------------------------------------------------------------ */
+
+/* Generic version.  Test for both addr and value errors, but if
+   there's an addr error, don't report a value error even if it
+   exists. */
+
+void fpu_read_check_SLOWLY ( Addr addr, Int size )
+{
+   Int  i;
+   Bool aerr = False;
+   Bool verr = False;
+   PROF_EVENT(90);
+   for (i = 0; i < size; i++) {
+      PROF_EVENT(91);
+      if (get_abit(addr+i) != VGM_BIT_VALID)
+         aerr = True;
+      if (get_vbyte(addr+i) != VGM_BYTE_VALID)
+         verr = True;
+   }
+
+   if (aerr) {
+      VG_(record_address_error)( addr, size, False );
+   } else {
+     if (verr)
+        VG_(record_value_error)( size );
+   }
+}
+
+
+/* Generic version.  Test for addr errors.  Valid addresses are
+   given valid values, and invalid addresses invalid values. */
+
+void fpu_write_check_SLOWLY ( Addr addr, Int size )
+{
+   Int  i;
+   Addr a_here;
+   Bool a_ok;
+   Bool aerr = False;
+   PROF_EVENT(92);
+   for (i = 0; i < size; i++) {
+      PROF_EVENT(93);
+      a_here = addr+i;
+      a_ok = get_abit(a_here) == VGM_BIT_VALID;
+      if (a_ok) {
+	set_vbyte(a_here, VGM_BYTE_VALID);
+      } else {
+	set_vbyte(a_here, VGM_BYTE_INVALID);
+        aerr = True;
+      }
+   }
+   if (aerr) {
+      VG_(record_address_error)( addr, size, True );
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Tracking permissions around %esp changes.            ---*/
+/*------------------------------------------------------------*/
+
+/*
+   The stack
+   ~~~~~~~~~
+   The stack's segment seems to be dynamically extended downwards
+   by the kernel as the stack pointer moves down.  Initially, a
+   1-page (4k) stack is allocated.  When %esp moves below that for
+   the first time, presumably a page fault occurs.  The kernel
+   detects that the faulting address is in the range from %esp upwards
+   to the current valid stack.  It then extends the stack segment
+   downwards for enough to cover the faulting address, and resumes
+   the process (invisibly).  The process is unaware of any of this.
+
+   That means that Valgrind can't spot when the stack segment is
+   being extended.  Fortunately, we want to precisely and continuously
+   update stack permissions around %esp, so we need to spot all
+   writes to %esp anyway.
+
+   The deal is: when %esp is assigned a lower value, the stack is
+   being extended.  Create a secondary maps to fill in any holes
+   between the old stack ptr and this one, if necessary.  Then 
+   mark all bytes in the area just "uncovered" by this %esp change
+   as write-only.
+
+   When %esp goes back up, mark the area receded over as unreadable
+   and unwritable.
+
+   Just to record the %esp boundary conditions somewhere convenient:
+   %esp always points to the lowest live byte in the stack.  All
+   addresses below %esp are not live; those at and above it are.  
+*/
+
+/* Does this address look like something in the program's main 
+   stack ? */
+Bool VG_(is_plausible_stack_addr) ( Addr aa )
+{
+   UInt a = (UInt)aa;
+   PROF_EVENT(100);
+   if (a < VG_STACK_STARTS_AT && 
+       a > VG_STACK_STARTS_AT - VG_PLAUSIBLE_STACK_SIZE)
+      return True;
+   else
+      return False;
+}
+
+
+/* Is this address within some small distance below %ESP?  Used only
+   for the --workaround-gcc296-bugs kludge. */
+Bool VG_(is_just_below_ESP)( Addr aa )
+{
+   UInt esp = VG_(baseBlock)[VGOFF_(m_esp)];
+   if (esp > (UInt)aa
+       && (esp - (UInt)aa) <= VG_GCC296_BUG_STACK_SLOP)
+      return True;
+   else
+      return False;
+}
+
+
+/* Kludgey ... how much does %esp have to change before we reckon that
+   the application is switching stacks ? */
+#define VG_HUGE_DELTA (VG_PLAUSIBLE_STACK_SIZE / 4)
+
+static Addr get_page_base ( Addr a )
+{
+   return a & ~(VKI_BYTES_PER_PAGE-1);
+}
+
+
+static void vg_handle_esp_assignment_SLOWLY ( Addr );
+
+void VGM_(handle_esp_assignment) ( Addr new_espA )
+{
+   UInt old_esp = VG_(baseBlock)[VGOFF_(m_esp)];
+   UInt new_esp = (UInt)new_espA;
+   Int  delta   = ((Int)new_esp) - ((Int)old_esp);
+
+   PROF_EVENT(101);
+
+#  ifndef VG_DEBUG_MEMORY
+
+   if (IS_ALIGNED4_ADDR(old_esp)) {
+
+      /* Deal with the most common cases fast.  These are ordered in
+         the sequence most common first. */
+
+      if (delta == -4) {
+         /* Moving down by 4 and properly aligned.. */
+         PROF_EVENT(102);
+         make_aligned_word_WRITABLE(new_esp);
+         return;
+      }
+
+      if (delta == 4) {
+         /* Moving up by 4 and properly aligned. */
+         PROF_EVENT(103);
+         make_aligned_word_NOACCESS(old_esp);
+         return;
+      }
+
+      if (delta == 16) {
+         /* Also surprisingly common. */
+         PROF_EVENT(104);
+         make_aligned_word_NOACCESS(old_esp);
+         make_aligned_word_NOACCESS(old_esp+4);
+         make_aligned_word_NOACCESS(old_esp+8);
+         make_aligned_word_NOACCESS(old_esp+12);
+         return;
+      }
+
+      if (delta == -12) {
+         PROF_EVENT(105);
+         make_aligned_word_WRITABLE(new_esp);
+         make_aligned_word_WRITABLE(new_esp+4);
+         make_aligned_word_WRITABLE(new_esp+8);
+         return;
+      }
+
+      if (delta == 8) {
+         PROF_EVENT(106);
+         make_aligned_word_NOACCESS(old_esp);
+         make_aligned_word_NOACCESS(old_esp+4);
+         return;
+      }
+
+      if (delta == -8) {
+         PROF_EVENT(107);
+         make_aligned_word_WRITABLE(new_esp);
+         make_aligned_word_WRITABLE(new_esp+4);
+         return;
+      }
+   }
+
+#  endif
+
+   /* The above special cases handle 90% to 95% of all the stack
+      adjustments.  The rest we give to the slow-but-general
+      mechanism. */
+   vg_handle_esp_assignment_SLOWLY ( new_espA );
+}
+
+
+static void vg_handle_esp_assignment_SLOWLY ( Addr new_espA )
+{
+   UInt old_esp = VG_(baseBlock)[VGOFF_(m_esp)];
+   UInt new_esp = (UInt)new_espA;
+   Int  delta   = ((Int)new_esp) - ((Int)old_esp);
+
+   PROF_EVENT(110);
+   if (-(VG_HUGE_DELTA) < delta && delta < VG_HUGE_DELTA) {
+      /* "Ordinary" stack change. */
+      if (new_esp < old_esp) {
+         /* Moving down; the stack is growing. */
+         PROF_EVENT(111);
+         VGM_(make_writable) ( new_esp, old_esp - new_esp );
+         return;
+      }
+      if (new_esp > old_esp) {
+         /* Moving up; the stack is shrinking. */
+         PROF_EVENT(112);
+         VGM_(make_noaccess) ( old_esp, new_esp - old_esp );
+         return;
+      }
+      PROF_EVENT(113);
+      return; /* when old_esp == new_esp */
+   }
+
+   /* %esp has changed by more than HUGE_DELTA.  We take this to mean
+      that the application is switching to a new stack, for whatever
+      reason, and we attempt to initialise the permissions around the
+      new stack in some plausible way.  All pretty kludgey; needed to
+      make netscape-4.07 run without generating thousands of error
+      contexts.
+
+      If we appear to be switching back to the main stack, don't mess
+      with the permissions in the area at and above the stack ptr.
+      Otherwise, we're switching to an alternative stack; make the
+      area above %esp readable -- this doesn't seem right -- the right
+      thing to do would be to make it writable -- but is needed to
+      avoid huge numbers of errs in netscape.  To be investigated. */
+
+   { Addr invalid_down_to = get_page_base(new_esp) 
+                            - 0 * VKI_BYTES_PER_PAGE;
+     Addr valid_up_to     = get_page_base(new_esp) + VKI_BYTES_PER_PAGE
+                            + 0 * VKI_BYTES_PER_PAGE;
+     PROF_EVENT(114);
+     if (VG_(clo_verbosity) > 1)
+        VG_(message)(Vg_UserMsg, "Warning: client switching stacks?  "
+                                 "%%esp: %p --> %p",
+                                  old_esp, new_esp);
+     /* VG_(printf)("na %p,   %%esp %p,   wr %p\n",
+                    invalid_down_to, new_esp, valid_up_to ); */
+     VGM_(make_noaccess) ( invalid_down_to, new_esp - invalid_down_to );
+     if (!VG_(is_plausible_stack_addr)(new_esp)) {
+        VGM_(make_readable) ( new_esp, valid_up_to - new_esp );
+     }
+   }
+}
+
+
+/*--------------------------------------------------------------*/
+/*--- Initialise the memory audit system on program startup. ---*/
+/*--------------------------------------------------------------*/
+
+/* Handle one entry derived from /proc/self/maps. */
+
+static
+void init_memory_audit_callback ( 
+        Addr start, UInt size, 
+        Char rr, Char ww, Char xx, 
+        UInt foffset, UChar* filename )
+{
+   UChar example_a_bit;
+   UChar example_v_bit;
+   UInt  r_esp;
+   Bool  is_stack_segment;
+
+   /* Sanity check ... if this is the executable's text segment,
+      ensure it is loaded where we think it ought to be.  Any file
+      name which doesn't contain ".so" is assumed to be the
+      executable. */
+   if (filename != NULL
+       && xx == 'x'
+       && VG_(strstr(filename, ".so")) == NULL
+      ) {
+      /* We assume this is the executable. */
+      if (start != VG_ASSUMED_EXE_BASE) {
+         VG_(message)(Vg_UserMsg,
+                      "FATAL: executable base addr not as assumed.");
+         VG_(message)(Vg_UserMsg, "name %s, actual %p, assumed %p.",
+                      filename, start, VG_ASSUMED_EXE_BASE);
+         VG_(panic)("VG_ASSUMED_EXE_BASE doesn't match reality");
+      }
+   }
+    
+   if (0)
+      VG_(message)(Vg_DebugMsg, 
+                   "initial map %8x-%8x %c%c%c? %8x (%d) (%s)",
+                   start,start+size,rr,ww,xx,foffset,
+                   size, filename?filename:(UChar*)"NULL");
+
+   r_esp = VG_(baseBlock)[VGOFF_(m_esp)];
+   is_stack_segment = start <= r_esp && r_esp < start+size;
+
+   /* Figure out the segment's permissions.
+
+      All segments are addressible -- since a process can read its
+      own text segment.
+
+      A read-but-not-write segment presumably contains initialised
+      data, so is all valid.  Read-write segments presumably contains
+      uninitialised data, so is all invalid.  */
+
+   /* ToDo: make this less bogus. */
+   if (rr != 'r' && xx != 'x' && ww != 'w') {
+      /* Very bogus; this path never gets taken. */
+      /* A no, V no */
+      example_a_bit = VGM_BIT_INVALID;
+      example_v_bit = VGM_BIT_INVALID;
+   } else {
+      /* A yes, V yes */
+      example_a_bit = VGM_BIT_VALID;
+      example_v_bit = VGM_BIT_VALID;
+      /* Causes a lot of errs for unknown reasons. 
+         if (filename is valgrind.so 
+               [careful about end conditions on filename]) {
+            example_a_bit = VGM_BIT_INVALID;
+            example_v_bit = VGM_BIT_INVALID;
+         }
+      */
+   }
+
+   set_address_range_perms ( start, size, 
+                             example_a_bit, example_v_bit );
+
+   if (is_stack_segment) {
+      /* This is the stack segment.  Mark all below %esp as
+         noaccess. */
+      if (0)
+         VG_(message)(Vg_DebugMsg, 
+                      "invalidating stack area: %x .. %x",
+                      start,r_esp);
+      VGM_(make_noaccess)( start, r_esp-start );
+   }
+}
+
+
+
+/* ONLY HERE for sbrk() */
+#include <unistd.h>
+
+/* Initialise the memory audit system. */
+void VGM_(init_memory_audit) ( void )
+{
+   Int i;
+
+   init_prof_mem();
+
+   for (i = 0; i < 8192; i++)
+      vg_distinguished_secondary_map.abits[i] 
+         = VGM_BYTE_INVALID; /* Invalid address */
+   for (i = 0; i < 65536; i++)
+      vg_distinguished_secondary_map.vbyte[i] 
+         = VGM_BYTE_INVALID; /* Invalid Value */
+
+   /* These entries gradually get overwritten as the used address
+      space expands. */
+   for (i = 0; i < 65536; i++)
+      VG_(primary_map)[i] = &vg_distinguished_secondary_map;
+   /* These ones should never change; it's a bug in Valgrind if they
+      do. */
+   for (i = 65536; i < 262144; i++)
+      VG_(primary_map)[i] = &vg_distinguished_secondary_map;
+
+   /* Read the initial memory mapping from the /proc filesystem, and
+      set up our own maps accordingly. */
+   VG_(read_procselfmaps) ( init_memory_audit_callback );
+
+   /* Last but not least, set up the shadow regs with reasonable (sic)
+      values.  All regs are claimed to have valid values.
+   */
+   VG_(baseBlock)[VGOFF_(sh_esp)]    = VGM_WORD_VALID;
+   VG_(baseBlock)[VGOFF_(sh_ebp)]    = VGM_WORD_VALID;
+   VG_(baseBlock)[VGOFF_(sh_eax)]    = VGM_WORD_VALID;
+   VG_(baseBlock)[VGOFF_(sh_ecx)]    = VGM_WORD_VALID;
+   VG_(baseBlock)[VGOFF_(sh_edx)]    = VGM_WORD_VALID;
+   VG_(baseBlock)[VGOFF_(sh_ebx)]    = VGM_WORD_VALID;
+   VG_(baseBlock)[VGOFF_(sh_esi)]    = VGM_WORD_VALID;
+   VG_(baseBlock)[VGOFF_(sh_edi)]    = VGM_WORD_VALID;
+   VG_(baseBlock)[VGOFF_(sh_eflags)] = VGM_EFLAGS_VALID;
+
+   /* Record the end of the data segment, so that vg_syscall_mem.c
+      can make sense of calls to brk(). 
+   */
+   VGM_(curr_dataseg_end) = (Addr)sbrk(0);
+   if (VGM_(curr_dataseg_end) == (Addr)(-1))
+      VG_(panic)("vgm_init_memory_audit: can't determine data-seg end");
+
+   if (0)
+      VG_(printf)("DS END is %p\n", VGM_(curr_dataseg_end));
+
+   /* Read the list of errors to suppress.  This should be found in
+      the file specified by vg_clo_suppressions. */
+   VG_(load_suppressions)();
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Low-level address-space scanning, for the leak       ---*/
+/*--- detector.                                            ---*/
+/*------------------------------------------------------------*/
+
+static 
+jmp_buf memscan_jmpbuf;
+
+static
+void vg_scan_all_valid_memory_sighandler ( Int sigNo )
+{
+   __builtin_longjmp(memscan_jmpbuf, 1);
+}
+
+UInt VG_(scan_all_valid_memory) ( void (*notify_word)( Addr, UInt ) )
+{
+   /* All volatile, because some gccs seem paranoid about longjmp(). */
+   volatile UInt res, numPages, page, vbytes, primaryMapNo, nWordsNotified;
+   volatile Addr pageBase, addr;
+   volatile SecMap* sm;
+   volatile UChar abits;
+   volatile UInt page_first_word;
+
+   vki_ksigaction sigbus_saved;
+   vki_ksigaction sigbus_new;
+   vki_ksigaction sigsegv_saved;
+   vki_ksigaction sigsegv_new;
+   vki_ksigset_t  blockmask_saved;
+   vki_ksigset_t  unblockmask_new;
+
+   /* Temporarily install a new sigsegv and sigbus handler, and make
+      sure SIGBUS, SIGSEGV and SIGTERM are unblocked.  (Perhaps the
+      first two can never be blocked anyway?)  */
+
+   sigbus_new.ksa_handler = vg_scan_all_valid_memory_sighandler;
+   sigbus_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
+   sigbus_new.ksa_restorer = NULL;
+   res = VG_(ksigemptyset)( &sigbus_new.ksa_mask );
+   vg_assert(res == 0);
+
+   sigsegv_new.ksa_handler = vg_scan_all_valid_memory_sighandler;
+   sigsegv_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
+   sigsegv_new.ksa_restorer = NULL;
+   res = VG_(ksigemptyset)( &sigsegv_new.ksa_mask );
+   vg_assert(res == 0+0);
+
+   res =  VG_(ksigemptyset)( &unblockmask_new );
+   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGBUS );
+   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGSEGV );
+   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGTERM );
+   vg_assert(res == 0+0+0);
+
+   res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_new, &sigbus_saved );
+   vg_assert(res == 0+0+0+0);
+
+   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_new, &sigsegv_saved );
+   vg_assert(res == 0+0+0+0+0);
+
+   res = VG_(ksigprocmask)( VKI_SIG_UNBLOCK, &unblockmask_new, &blockmask_saved );
+   vg_assert(res == 0+0+0+0+0+0);
+
+   /* The signal handlers are installed.  Actually do the memory scan. */
+   numPages = 1 << (32-VKI_BYTES_PER_PAGE_BITS);
+   vg_assert(numPages == 1048576);
+   vg_assert(4096 == (1 << VKI_BYTES_PER_PAGE_BITS));
+
+   nWordsNotified = 0;
+
+   for (page = 0; page < numPages; page++) {
+      pageBase = page << VKI_BYTES_PER_PAGE_BITS;
+      primaryMapNo = pageBase >> 16;
+      sm = VG_(primary_map)[primaryMapNo];
+      if (IS_DISTINGUISHED_SM(sm)) continue;
+      if (__builtin_setjmp(memscan_jmpbuf) == 0) {
+         /* try this ... */
+         page_first_word = * (volatile UInt*)pageBase;
+         /* we get here if we didn't get a fault */
+         /* Scan the page */
+         for (addr = pageBase; addr < pageBase+VKI_BYTES_PER_PAGE; addr += 4) {
+            abits  = get_abits4_ALIGNED(addr);
+            vbytes = get_vbytes4_ALIGNED(addr);
+            if (abits == VGM_NIBBLE_VALID 
+                && vbytes == VGM_WORD_VALID) {
+               nWordsNotified++;
+               notify_word ( addr, *(UInt*)addr );
+	    }
+         }
+      } else {
+         /* We get here if reading the first word of the page caused a
+            fault, which in turn caused the signal handler to longjmp.
+            Ignore this page. */
+         if (0)
+         VG_(printf)(
+            "vg_scan_all_valid_memory_sighandler: ignoring page at %p\n",
+            pageBase 
+         );
+      }
+   }
+
+   /* Restore signal state to whatever it was before. */
+   res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_saved, NULL );
+   vg_assert(res == 0 +0);
+
+   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_saved, NULL );
+   vg_assert(res == 0 +0 +0);
+
+   res = VG_(ksigprocmask)( VKI_SIG_SETMASK, &blockmask_saved, NULL );
+   vg_assert(res == 0 +0 +0 +0);
+
+   return nWordsNotified;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
+/*------------------------------------------------------------*/
+
+/* A block is either 
+   -- Proper-ly reached; a pointer to its start has been found
+   -- Interior-ly reached; only an interior pointer to it has been found
+   -- Unreached; so far, no pointers to any part of it have been found. 
+*/
+typedef 
+   enum { Unreached, Interior, Proper } 
+   Reachedness;
+
+/* A block record, used for generating err msgs. */
+typedef
+   struct _LossRecord {
+      struct _LossRecord* next;
+      /* Where these lost blocks were allocated. */
+      ExeContext*  allocated_at;
+      /* Their reachability. */
+      Reachedness  loss_mode;
+      /* Number of blocks and total # bytes involved. */
+      UInt         total_bytes;
+      UInt         num_blocks;
+   }
+   LossRecord;
+
+
+/* Find the i such that ptr points at or inside the block described by
+   shadows[i].  Return -1 if none found.  This assumes that shadows[]
+   has been sorted on the ->data field. */
+
+#ifdef VG_DEBUG_LEAKCHECK
+/* Used to sanity-check the fast binary-search mechanism. */
+static Int find_shadow_for_OLD ( Addr          ptr, 
+                                 ShadowChunk** shadows,
+                                 Int           n_shadows )
+
+{
+   Int  i;
+   Addr a_lo, a_hi;
+   PROF_EVENT(70);
+   for (i = 0; i < n_shadows; i++) {
+      PROF_EVENT(71);
+      a_lo = shadows[i]->data;
+      a_hi = ((Addr)shadows[i]->data) + shadows[i]->size - 1;
+      if (a_lo <= ptr && ptr <= a_hi)
+         return i;
+   }
+   return -1;
+}
+#endif
+
+
+static Int find_shadow_for ( Addr          ptr, 
+                             ShadowChunk** shadows,
+                             Int           n_shadows )
+{
+   Addr a_mid_lo, a_mid_hi;
+   Int lo, mid, hi, retVal;
+   PROF_EVENT(70);
+   /* VG_(printf)("find shadow for %p = ", ptr); */
+   retVal = -1;
+   lo = 0;
+   hi = n_shadows-1;
+   while (True) {
+      PROF_EVENT(71);
+
+      /* invariant: current unsearched space is from lo to hi,
+         inclusive. */
+      if (lo > hi) break; /* not found */
+
+      mid      = (lo + hi) / 2;
+      a_mid_lo = shadows[mid]->data;
+      a_mid_hi = ((Addr)shadows[mid]->data) + shadows[mid]->size - 1;
+
+      if (ptr < a_mid_lo) {
+         hi = mid-1;
+         continue;
+      } 
+      if (ptr > a_mid_hi) {
+         lo = mid+1;
+         continue;
+      }
+      vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
+      retVal = mid;
+      break;
+   }
+
+#  ifdef VG_DEBUG_LEAKCHECK
+   vg_assert(retVal == find_shadow_for_OLD ( ptr, shadows, n_shadows ));
+#  endif
+   /* VG_(printf)("%d\n", retVal); */
+   return retVal;
+}
+
+
+
+static void sort_malloc_shadows ( ShadowChunk** shadows, UInt n_shadows )
+{
+   Int   incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+                      9841, 29524, 88573, 265720,
+                      797161, 2391484 };
+   Int          lo = 0;
+   Int          hi = n_shadows-1;
+   Int          i, j, h, bigN, hp;
+   ShadowChunk* v;
+
+   PROF_EVENT(72);
+   bigN = hi - lo + 1; if (bigN < 2) return;
+   hp = 0; while (incs[hp] < bigN) hp++; hp--;
+
+   for (; hp >= 0; hp--) {
+      PROF_EVENT(73);
+      h = incs[hp];
+      i = lo + h;
+      while (1) {
+         PROF_EVENT(74);
+         if (i > hi) break;
+         v = shadows[i];
+         j = i;
+         while (shadows[j-h]->data > v->data) {
+            PROF_EVENT(75);
+            shadows[j] = shadows[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         shadows[j] = v;
+         i++;
+      }
+   }
+}
+
+/* Globals, for the callback used by VG_(detect_memory_leaks). */
+
+static ShadowChunk** vglc_shadows;
+static Int           vglc_n_shadows;
+static Reachedness*  vglc_reachedness;
+static Addr          vglc_min_mallocd_addr;
+static Addr          vglc_max_mallocd_addr;
+
+static 
+void vg_detect_memory_leaks_notify_addr ( Addr a, UInt word_at_a )
+{
+   Int sh_no;
+   Addr ptr = (Addr)word_at_a;
+   if (ptr >= vglc_min_mallocd_addr && ptr <= vglc_max_mallocd_addr) {
+      /* Might be legitimate; we'll have to investigate further. */
+      sh_no = find_shadow_for ( ptr, vglc_shadows, vglc_n_shadows );
+      if (sh_no != -1) {
+         /* Found a block at/into which ptr points. */
+         vg_assert(sh_no >= 0 && sh_no < vglc_n_shadows);
+         vg_assert(ptr < vglc_shadows[sh_no]->data 
+                         + vglc_shadows[sh_no]->size);
+         /* Decide whether Proper-ly or Interior-ly reached. */
+         if (ptr == vglc_shadows[sh_no]->data) {
+            vglc_reachedness[sh_no] = Proper;
+         } else {
+            if (vglc_reachedness[sh_no] == Unreached)
+               vglc_reachedness[sh_no] = Interior;
+         }
+      }
+   }
+}
+
+
+void VG_(detect_memory_leaks) ( void )
+{
+   Int    i;
+   Int    blocks_leaked, bytes_leaked;
+   Int    blocks_dubious, bytes_dubious;
+   Int    blocks_reachable, bytes_reachable;
+   Int    n_lossrecords;
+   UInt   bytes_notified;
+   
+   LossRecord*  errlist;
+   LossRecord*  p;
+
+   Bool (*ec_comparer_fn) ( ExeContext*, ExeContext* );
+   PROF_EVENT(76);
+   vg_assert(VG_(clo_instrument));
+
+   /* Decide how closely we want to match ExeContexts in leak
+      records. */
+   switch (VG_(clo_leak_resolution)) {
+      case 2: 
+         ec_comparer_fn = VG_(eq_ExeContext_top2); 
+         break;
+      case 4: 
+         ec_comparer_fn = VG_(eq_ExeContext_top4); 
+         break;
+      case VG_DEEPEST_BACKTRACE: 
+         ec_comparer_fn = VG_(eq_ExeContext_all); 
+         break;
+      default: 
+         VG_(panic)("VG_(detect_memory_leaks): "
+                    "bad VG_(clo_leak_resolution)");
+         break;
+   }
+
+   /* vg_get_malloc_shadows allocates storage for shadows */
+   vglc_shadows = VG_(get_malloc_shadows)( &vglc_n_shadows );
+   if (vglc_n_shadows == 0) {
+      vg_assert(vglc_shadows == NULL);
+      VG_(message)(Vg_UserMsg, 
+                   "No malloc'd blocks -- no leaks are possible.\n");
+      return;
+   }
+
+   VG_(message)(Vg_UserMsg, 
+                "searching for pointers to %d not-freed blocks.", 
+                vglc_n_shadows );
+   sort_malloc_shadows ( vglc_shadows, vglc_n_shadows );
+
+   /* Sanity check; assert that the blocks are now in order and that
+      they don't overlap. */
+   for (i = 0; i < vglc_n_shadows-1; i++) {
+      vg_assert( ((Addr)vglc_shadows[i]->data)
+                 < ((Addr)vglc_shadows[i+1]->data) );
+      vg_assert( ((Addr)vglc_shadows[i]->data) + vglc_shadows[i]->size
+                 < ((Addr)vglc_shadows[i+1]->data) );
+   }
+
+   vglc_min_mallocd_addr = ((Addr)vglc_shadows[0]->data);
+   vglc_max_mallocd_addr = ((Addr)vglc_shadows[vglc_n_shadows-1]->data)
+                         + vglc_shadows[vglc_n_shadows-1]->size - 1;
+
+   vglc_reachedness 
+      = VG_(malloc)( VG_AR_PRIVATE, vglc_n_shadows * sizeof(Reachedness) );
+   for (i = 0; i < vglc_n_shadows; i++)
+      vglc_reachedness[i] = Unreached;
+
+   /* Do the scan of memory. */
+   bytes_notified
+       = VG_(scan_all_valid_memory)( &vg_detect_memory_leaks_notify_addr )
+         * VKI_BYTES_PER_WORD;
+
+   VG_(message)(Vg_UserMsg, "checked %d bytes.", bytes_notified);
+
+   blocks_leaked    = bytes_leaked    = 0;
+   blocks_dubious   = bytes_dubious   = 0;
+   blocks_reachable = bytes_reachable = 0;
+
+   for (i = 0; i < vglc_n_shadows; i++) {
+      if (vglc_reachedness[i] == Unreached) {
+         blocks_leaked++;
+         bytes_leaked += vglc_shadows[i]->size;
+      }
+      else if (vglc_reachedness[i] == Interior) {
+         blocks_dubious++;
+         bytes_dubious += vglc_shadows[i]->size;
+      }
+      else if (vglc_reachedness[i] == Proper) {
+         blocks_reachable++;
+         bytes_reachable += vglc_shadows[i]->size;
+      }
+   }
+
+   VG_(message)(Vg_UserMsg, "");
+   VG_(message)(Vg_UserMsg, "definitely lost: %d bytes in %d blocks.", 
+                            bytes_leaked, blocks_leaked );
+   VG_(message)(Vg_UserMsg, "possibly lost:   %d bytes in %d blocks.", 
+                            bytes_dubious, blocks_dubious );
+   VG_(message)(Vg_UserMsg, "still reachable: %d bytes in %d blocks.", 
+                            bytes_reachable, blocks_reachable );
+
+
+   /* Common up the lost blocks so we can print sensible error
+      messages. */
+
+   n_lossrecords = 0;
+   errlist       = NULL;
+   for (i = 0; i < vglc_n_shadows; i++) {
+      for (p = errlist; p != NULL; p = p->next) {
+         if (p->loss_mode == vglc_reachedness[i]
+             && ec_comparer_fn (
+                   p->allocated_at, 
+                   vglc_shadows[i]->where) ) {
+            break;
+	 }
+      }
+      if (p != NULL) {
+         p->num_blocks  ++;
+         p->total_bytes += vglc_shadows[i]->size;
+      } else {
+         n_lossrecords ++;
+         p = VG_(malloc)(VG_AR_PRIVATE, sizeof(LossRecord));
+         p->loss_mode    = vglc_reachedness[i];
+         p->allocated_at = vglc_shadows[i]->where;
+         p->total_bytes  = vglc_shadows[i]->size;
+         p->num_blocks   = 1;
+         p->next         = errlist;
+         errlist         = p;
+      }
+   }
+   
+   for (i = 0; i < n_lossrecords; i++) {
+      LossRecord* p_min = NULL;
+      UInt        n_min = 0xFFFFFFFF;
+      for (p = errlist; p != NULL; p = p->next) {
+         if (p->num_blocks > 0 && p->total_bytes < n_min) {
+            n_min = p->total_bytes;
+            p_min = p;
+         }
+      }
+      vg_assert(p_min != NULL);
+
+      if ( (!VG_(clo_show_reachable)) && p_min->loss_mode == Proper) {
+         p_min->num_blocks = 0;
+         continue;
+      }
+
+      VG_(message)(Vg_UserMsg, "");
+      VG_(message)(
+         Vg_UserMsg,
+         "%d bytes in %d blocks are %s in loss record %d of %d",
+         p_min->total_bytes, p_min->num_blocks,
+         p_min->loss_mode==Unreached ? "definitely lost" :
+            (p_min->loss_mode==Interior ? "possibly lost"
+                                        : "still reachable"),
+         i+1, n_lossrecords
+      );
+      VG_(pp_ExeContext)(p_min->allocated_at);
+      p_min->num_blocks = 0;
+   }
+
+   VG_(message)(Vg_UserMsg, "");
+   VG_(message)(Vg_UserMsg, "LEAK SUMMARY:");
+   VG_(message)(Vg_UserMsg, "   possibly lost:   %d bytes in %d blocks.", 
+                            bytes_dubious, blocks_dubious );
+   VG_(message)(Vg_UserMsg, "   definitely lost: %d bytes in %d blocks.", 
+                            bytes_leaked, blocks_leaked );
+   VG_(message)(Vg_UserMsg, "   still reachable: %d bytes in %d blocks.", 
+                            bytes_reachable, blocks_reachable );
+   if (!VG_(clo_show_reachable)) {
+      VG_(message)(Vg_UserMsg, 
+         "Reachable blocks (those to which a pointer was found) are not shown.");
+      VG_(message)(Vg_UserMsg, 
+         "To see them, rerun with: --show-reachable=yes");
+   }
+   VG_(message)(Vg_UserMsg, "");
+
+   VG_(free) ( VG_AR_PRIVATE, vglc_shadows );
+   VG_(free) ( VG_AR_PRIVATE, vglc_reachedness );
+}
+
+
+/* ---------------------------------------------------------------------
+   Sanity check machinery (permanently engaged).
+   ------------------------------------------------------------------ */
+
+/* Check that nobody has spuriously claimed that the first or last 16
+   pages (64 KB) of address space have become accessible.  Failure of
+   the following do not per se indicate an internal consistency
+   problem, but they are so likely to that we really want to know
+   about it if so. */
+
+Bool VG_(first_and_last_secondaries_look_plausible) ( void )
+{
+   if (IS_DISTINGUISHED_SM(VG_(primary_map)[0])
+       && IS_DISTINGUISHED_SM(VG_(primary_map)[65535])) {
+      return True;
+   } else {
+      return False;
+   }
+}
+
+
+/* A fast sanity check -- suitable for calling circa once per
+   millisecond. */
+
+void VG_(do_sanity_checks) ( Bool force_expensive )
+{
+   Int    i;
+   Bool   do_expensive_checks;
+
+   if (VG_(sanity_level) < 1) return;
+
+   /* --- First do all the tests that we can do quickly. ---*/
+
+   VG_(sanity_fast_count)++;
+
+   /* Check that we haven't overrun our private stack. */
+   for (i = 0; i < 10; i++) {
+      vg_assert(VG_(stack)[i]
+                == ((UInt)(&VG_(stack)[i]) ^ 0xA4B3C2D1));
+      vg_assert(VG_(stack)[10000-1-i] 
+                == ((UInt)(&VG_(stack)[10000-i-1]) ^ 0xABCD4321));
+   }
+
+   /* Check stuff pertaining to the memory check system. */
+
+   if (VG_(clo_instrument)) {
+
+      /* Check that the eflags tag is as expected. */
+      UInt vv = VG_(baseBlock)[VGOFF_(sh_eflags)];
+      vg_assert(vv == VGM_EFLAGS_VALID || VGM_EFLAGS_INVALID);
+
+      /* Check that nobody has spuriously claimed that the first or
+         last 16 pages of memory have become accessible [...] */
+      vg_assert(VG_(first_and_last_secondaries_look_plausible));
+   }
+
+#  if 0
+   if ( (VG_(baseBlock)[VGOFF_(sh_eflags)] & 1) == 1)
+     VG_(printf)("UNDEF\n") ; else 
+       VG_(printf)("def\n") ;
+#  endif
+
+   /* --- Now some more expensive checks. ---*/
+
+   /* Once every 25 times, check some more expensive stuff. */
+
+   do_expensive_checks = False;
+   if (force_expensive) 
+      do_expensive_checks = True;
+   if (VG_(sanity_level) > 1) 
+      do_expensive_checks = True;
+   if (VG_(sanity_level) == 1 
+       && (VG_(sanity_fast_count) % 25) == 0)
+      do_expensive_checks = True;
+
+   if (do_expensive_checks) {
+      VG_(sanity_slow_count)++;
+
+#     if 0
+      { void zzzmemscan(void); zzzmemscan(); }
+#     endif
+
+      if ((VG_(sanity_fast_count) % 250) == 0)
+         VG_(sanity_check_tc_tt)();
+
+      if (VG_(clo_instrument)) {
+         /* Make sure nobody changed the distinguished secondary. */
+         for (i = 0; i < 8192; i++)
+            vg_assert(vg_distinguished_secondary_map.abits[i] 
+                      == VGM_BYTE_INVALID);
+         for (i = 0; i < 65536; i++)
+            vg_assert(vg_distinguished_secondary_map.vbyte[i] 
+                      == VGM_BYTE_INVALID);
+
+         /* Make sure that the upper 3/4 of the primary map hasn't
+            been messed with. */
+         for (i = 65536; i < 262144; i++)
+            vg_assert(VG_(primary_map)[i] 
+                      == & vg_distinguished_secondary_map);
+      }
+      /* 
+      if ((VG_(sanity_fast_count) % 500) == 0) VG_(mallocSanityCheckAll)(); 
+      */
+   }
+
+   if (VG_(sanity_level) > 1) {
+      /* Check sanity of the low-level memory manager.  Note that bugs
+         in the client's code can cause this to fail, so we don't do
+         this check unless specially asked for.  And because it's
+         potentially very expensive. */
+      VG_(mallocSanityCheckAll)();
+   }
+}
+
+
+/* ---------------------------------------------------------------------
+   Debugging machinery (turn on to debug).  Something of a mess.
+   ------------------------------------------------------------------ */
+
+/* Print the value tags on the 8 integer registers & flag reg. */
+
+static void uint_to_bits ( UInt x, Char* str )
+{
+   Int i;
+   Int w = 0;
+   /* str must point to a space of at least 36 bytes. */
+   for (i = 31; i >= 0; i--) {
+      str[w++] = (x & ( ((UInt)1) << i)) ? '1' : '0';
+      if (i == 24 || i == 16 || i == 8)
+         str[w++] = ' ';
+   }
+   str[w++] = 0;
+   vg_assert(w == 36);
+}
+
+void VG_(show_reg_tags) ( void )
+{
+   Char buf1[36];
+   Char buf2[36];
+   UInt z_eax, z_ebx, z_ecx, z_edx, 
+        z_esi, z_edi, z_ebp, z_esp, z_eflags;
+
+   z_eax    = VG_(baseBlock)[VGOFF_(sh_eax)];
+   z_ebx    = VG_(baseBlock)[VGOFF_(sh_ebx)];
+   z_ecx    = VG_(baseBlock)[VGOFF_(sh_ecx)];
+   z_edx    = VG_(baseBlock)[VGOFF_(sh_edx)];
+   z_esi    = VG_(baseBlock)[VGOFF_(sh_esi)];
+   z_edi    = VG_(baseBlock)[VGOFF_(sh_edi)];
+   z_ebp    = VG_(baseBlock)[VGOFF_(sh_ebp)];
+   z_esp    = VG_(baseBlock)[VGOFF_(sh_esp)];
+   z_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
+   
+   uint_to_bits(z_eflags, buf1);
+   VG_(message)(Vg_DebugMsg, "efl %\n", buf1);
+
+   uint_to_bits(z_eax, buf1);
+   uint_to_bits(z_ebx, buf2);
+   VG_(message)(Vg_DebugMsg, "eax %s   ebx %s\n", buf1, buf2);
+
+   uint_to_bits(z_ecx, buf1);
+   uint_to_bits(z_edx, buf2);
+   VG_(message)(Vg_DebugMsg, "ecx %s   edx %s\n", buf1, buf2);
+
+   uint_to_bits(z_esi, buf1);
+   uint_to_bits(z_edi, buf2);
+   VG_(message)(Vg_DebugMsg, "esi %s   edi %s\n", buf1, buf2);
+
+   uint_to_bits(z_ebp, buf1);
+   uint_to_bits(z_esp, buf2);
+   VG_(message)(Vg_DebugMsg, "ebp %s   esp %s\n", buf1, buf2);
+}
+
+
+#if 0
+/* For debugging only.  Scan the address space and touch all allegedly
+   addressible words.  Useful for establishing where Valgrind's idea of
+   addressibility has diverged from what the kernel believes. */
+
+static 
+void zzzmemscan_notify_word ( Addr a, UInt w )
+{
+}
+
+void zzzmemscan ( void )
+{
+   Int n_notifies
+      = VG_(scan_all_valid_memory)( zzzmemscan_notify_word );
+   VG_(printf)("zzzmemscan: n_bytes = %d\n", 4 * n_notifies );
+}
+#endif
+
+
+
+
+#if 0
+static Int zzz = 0;
+
+void show_bb ( Addr eip_next )
+{
+   VG_(printf)("[%4d] ", zzz);
+   VG_(show_reg_tags)( &VG_(m_shadow );
+   VG_(translate) ( eip_next, NULL, NULL, NULL );
+}
+#endif /* 0 */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              vg_memory.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_messages.c b/coregrind/vg_messages.c
new file mode 100644
index 000000000..343a85962
--- /dev/null
+++ b/coregrind/vg_messages.c
@@ -0,0 +1,105 @@
+
+/*--------------------------------------------------------------------*/
+/*--- For sending error/informative messages.                      ---*/
+/*---                                                 vg_message.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+
+#include "vg_include.h"
+
+
+static char vg_mbuf[M_VG_MSGBUF];
+static int vg_n_mbuf;
+
+static void add_to_buf ( Char c )
+{
+  if (vg_n_mbuf >= (M_VG_MSGBUF-1)) return;
+  vg_mbuf[vg_n_mbuf++] = c;
+  vg_mbuf[vg_n_mbuf]   = 0;
+}
+
+
+/* Publically visible from here onwards. */
+
+void
+VG_(add_to_msg) ( Char *format, ... )
+{
+   va_list vargs;
+   va_start(vargs,format);
+   VG_(vprintf) ( add_to_buf, format, vargs );
+   va_end(vargs);
+}
+
+/* Send a simple single-part message. */
+void VG_(message) ( VgMsgKind kind, Char* format, ... )
+{
+   va_list vargs;
+   va_start(vargs,format);
+   VG_(start_msg) ( kind );
+   VG_(vprintf) ( add_to_buf, format, vargs );
+   va_end(vargs);
+   VG_(end_msg)();
+}
+
+void VG_(start_msg) ( VgMsgKind kind )
+{
+   Char c;
+   vg_n_mbuf = 0;
+   vg_mbuf[vg_n_mbuf] = 0;
+   switch (kind) {
+      case Vg_UserMsg:       c = '='; break;
+      case Vg_DebugMsg:      c = '-'; break;
+      case Vg_DebugExtraMsg: c = '+'; break;
+      default:               c = '?'; break;
+   }
+   VG_(add_to_msg)( "%c%c%d%c%c ", 
+                    c,c, VG_(getpid)(), c,c );
+}
+
+
+void VG_(end_msg) ( void )
+{
+   if (VG_(clo_logfile_fd) >= 0) {
+      add_to_buf('\n');
+      VG_(write)(VG_(clo_logfile_fd), vg_mbuf, VG_(strlen)(vg_mbuf));
+   }
+}
+
+
+void VG_(startup_logging) ( void )
+{
+}
+
+void VG_(shutdown_logging) ( void )
+{
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                             vg_message.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_mylibc.c b/coregrind/vg_mylibc.c
new file mode 100644
index 000000000..2ba0753d3
--- /dev/null
+++ b/coregrind/vg_mylibc.c
@@ -0,0 +1,929 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Reimplementation of some C library stuff, to avoid depending ---*/
+/*--- on libc.so.                                                  ---*/
+/*---                                                  vg_mylibc.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+
+
+
+/* ---------------------------------------------------------------------
+   Really Actually DO system calls.
+   ------------------------------------------------------------------ */
+
+/* Ripped off from /usr/include/asm/unistd.h. */
+
+static
+UInt vg_do_syscall0 ( UInt syscallno )
+{ 
+   UInt __res;
+   __asm__ volatile ("int $0x80"
+                     : "=a" (__res)
+                     : "0" (syscallno) );
+   return __res;
+}
+
+
+static
+UInt vg_do_syscall1 ( UInt syscallno, UInt arg1 )
+{ 
+   UInt __res;
+   __asm__ volatile ("int $0x80"
+                     : "=a" (__res)
+                     : "0" (syscallno),
+                       "b" (arg1) );
+   return __res;
+}
+
+
+static
+UInt vg_do_syscall2 ( UInt syscallno, 
+                      UInt arg1, UInt arg2 )
+{ 
+   UInt __res;
+   __asm__ volatile ("int $0x80"
+                     : "=a" (__res)
+                     : "0" (syscallno),
+                       "b" (arg1),
+                       "c" (arg2) );
+   return __res;
+}
+
+
+static
+UInt vg_do_syscall3 ( UInt syscallno, 
+                      UInt arg1, UInt arg2, UInt arg3 )
+{ 
+   UInt __res;
+   __asm__ volatile ("int $0x80"
+                     : "=a" (__res)
+                     : "0" (syscallno),
+                       "b" (arg1),
+                       "c" (arg2),
+                       "d" (arg3) );
+   return __res;
+}
+
+
+static
+UInt vg_do_syscall4 ( UInt syscallno, 
+                      UInt arg1, UInt arg2, UInt arg3, UInt arg4 )
+{ 
+   UInt __res;
+   __asm__ volatile ("int $0x80"
+                     : "=a" (__res)
+                     : "0" (syscallno),
+                       "b" (arg1),
+                       "c" (arg2),
+                       "d" (arg3),
+                       "S" (arg4) );
+   return __res;
+}
+
+
+#if 0
+static
+UInt vg_do_syscall5 ( UInt syscallno, 
+                      UInt arg1, UInt arg2, UInt arg3, UInt arg4, 
+                      UInt arg5 )
+{ 
+   UInt __res;
+   __asm__ volatile ("int $0x80"
+                     : "=a" (__res)
+                     : "0" (syscallno),
+                       "b" (arg1),
+                       "c" (arg2),
+                       "d" (arg3),
+                       "S" (arg4),
+                       "D" (arg5) );
+   return __res;
+}
+#endif
+
+/* ---------------------------------------------------------------------
+   Wrappers around system calls, and other stuff, to do with signals.
+   ------------------------------------------------------------------ */
+
+/* sigemptyset, sigfullset, sigaddset and sigdelset return 0 on
+   success and -1 on error.  
+*/
+Int VG_(ksigfillset)( vki_ksigset_t* set )
+{
+   Int i;
+   if (set == NULL)
+      return -1;
+   for (i = 0; i < VKI_KNSIG_WORDS; i++)
+      set->ws[i] = 0xFFFFFFFF;
+   return 0;
+}
+
+Int VG_(ksigemptyset)( vki_ksigset_t* set )
+{
+   Int i;
+   if (set == NULL)
+      return -1;
+   for (i = 0; i < VKI_KNSIG_WORDS; i++)
+      set->ws[i] = 0x0;
+   return 0;
+}
+
+Int VG_(ksigaddset)( vki_ksigset_t* set, Int signum )
+{
+   if (set == NULL)
+      return -1;
+   if (signum < 1 && signum > VKI_KNSIG)
+      return -1;
+   signum--;
+   set->ws[signum / VKI_KNSIG_BPW] |= (1 << (signum % VKI_KNSIG_BPW));
+   return 0;
+}
+
+Int VG_(ksigismember) ( vki_ksigset_t* set, Int signum )
+{
+   if (set == NULL)
+      return -1;
+   if (signum < 1 && signum > VKI_KNSIG)
+      return -1;
+   signum--;
+   if (1 & ((set->ws[signum / VKI_KNSIG_BPW]) >> (signum % VKI_KNSIG_BPW)))
+      return 1;
+   else
+      return 0;
+}
+
+
+/* The functions sigaction, sigprocmask, sigpending and sigsuspend
+   return 0 on success and -1 on error.  
+*/
+Int VG_(ksigprocmask)( Int how, 
+                       const vki_ksigset_t* set, 
+                       vki_ksigset_t* oldset)
+{
+   Int res 
+      = vg_do_syscall4(__NR_rt_sigprocmask, 
+                       how, (UInt)set, (UInt)oldset, 
+                       VKI_KNSIG_WORDS * VKI_BYTES_PER_WORD);
+   return VG_(is_kerror)(res) ? -1 : 0;
+}
+
+
+Int VG_(ksigaction) ( Int signum,  
+                      const vki_ksigaction* act,  
+                      vki_ksigaction* oldact)
+{
+   Int res
+     = vg_do_syscall4(__NR_rt_sigaction,
+                      signum, (UInt)act, (UInt)oldact, 
+                      VKI_KNSIG_WORDS * VKI_BYTES_PER_WORD);
+   return VG_(is_kerror)(res) ? -1 : 0;
+}
+
+
+Int VG_(ksigaltstack)( const vki_kstack_t* ss, vki_kstack_t* oss )
+{
+   Int res
+     = vg_do_syscall2(__NR_sigaltstack, (UInt)ss, (UInt)oss);
+   return VG_(is_kerror)(res) ? -1 : 0;
+}
+
+ 
+Int VG_(ksignal)(Int signum, void (*sighandler)(Int))
+{
+   Int res;
+   vki_ksigaction sa;
+   sa.ksa_handler = sighandler;
+   sa.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
+   sa.ksa_restorer = NULL;
+   res = VG_(ksigemptyset)( &sa.ksa_mask );
+   vg_assert(res == 0);
+   res = vg_do_syscall4(__NR_rt_sigaction,
+                        signum, (UInt)(&sa), (UInt)NULL,
+                        VKI_KNSIG_WORDS * VKI_BYTES_PER_WORD);
+   return VG_(is_kerror)(res) ? -1 : 0;
+}
+
+
+/* ---------------------------------------------------------------------
+   mmap/munmap, exit
+   ------------------------------------------------------------------ */
+
+/* Returns -1 on failure. */
+void* VG_(mmap)( void* start, UInt length, 
+                 UInt prot, UInt flags, UInt fd, UInt offset)
+{
+   Int  res;
+   UInt args[6];
+   args[0] = (UInt)start;
+   args[1] = length;
+   args[2] = prot;
+   args[3] = flags;
+   args[4] = fd;
+   args[5] = offset;
+   res = vg_do_syscall1(__NR_mmap, (UInt)(&(args[0])) );
+   return VG_(is_kerror)(res) ? ((void*)(-1)) : (void*)res;
+}
+
+/* Returns -1 on failure. */
+Int VG_(munmap)( void* start, Int length )
+{
+   Int res = vg_do_syscall2(__NR_munmap, (UInt)start, (UInt)length );
+   return VG_(is_kerror)(res) ? -1 : 0;
+}
+
+void VG_(exit)( Int status )
+{
+   (void)vg_do_syscall1(__NR_exit, (UInt)status );
+   /* Why are we still alive here? */
+   /*NOTREACHED*/
+   vg_assert(2+2 == 5);
+}
+
+/* ---------------------------------------------------------------------
+   printf implementation.  The key function, vg_vprintf(), emits chars 
+   into a caller-supplied function.  Distantly derived from:
+
+      vprintf replacement for Checker.
+      Copyright 1993, 1994, 1995 Tristan Gingold
+      Written September 1993 Tristan Gingold
+      Tristan Gingold, 8 rue Parmentier, F-91120 PALAISEAU, FRANCE
+
+   (Checker itself was GPL'd.)
+   ------------------------------------------------------------------ */
+
+
+/* Some flags.  */
+#define VG_MSG_SIGNED    1 /* The value is signed. */
+#define VG_MSG_ZJUSTIFY  2 /* Must justify with '0'. */
+#define VG_MSG_LJUSTIFY  4 /* Must justify on the left. */
+
+
+/* Copy a string into the buffer. */
+static void
+myvprintf_str ( void(*send)(Char), Int flags, Int width, Char* str, 
+                Bool capitalise )
+{
+#  define MAYBE_TOUPPER(ch) (capitalise ? VG_(toupper)(ch) : (ch))
+
+   Int i, extra;
+   Int len = VG_(strlen)(str);
+
+   if (width == 0) {
+      for (i = 0; i < len; i++)
+         send(MAYBE_TOUPPER(str[i]));
+      return;
+   }
+
+   if (len > width) {
+      for (i = 0; i < width; i++)
+         send(MAYBE_TOUPPER(str[i]));
+      return;
+   }
+
+   extra = width - len;
+   if (flags & VG_MSG_LJUSTIFY) {
+      for (i = 0; i < extra; i++)
+         send(' ');
+   }
+   for (i = 0; i < len; i++)
+      send(MAYBE_TOUPPER(str[i]));
+   if (!(flags & VG_MSG_LJUSTIFY)) {
+      for (i = 0; i < extra; i++)
+         send(' ');
+   }
+
+#  undef MAYBE_TOUPPER
+}
+
+/* Write P into the buffer according to these args:
+ *  If SIGN is true, p is a signed.
+ *  BASE is the base.
+ *  If WITH_ZERO is true, '0' must be added.
+ *  WIDTH is the width of the field.
+ */
+static void
+myvprintf_int64 ( void(*send)(Char), Int flags, Int base, Int width, ULong p)
+{
+   Char buf[40];
+   Int  ind = 0;
+   Int  i;
+   Bool neg = False;
+   Char *digits = "0123456789ABCDEF";
+ 
+   if (base < 2 || base > 16)
+      return;
+ 
+   if ((flags & VG_MSG_SIGNED) && (Long)p < 0) {
+      p   = - (Long)p;
+      neg = True;
+   }
+
+   if (p == 0)
+      buf[ind++] = '0';
+   else {
+      while (p > 0) {
+         buf[ind++] = digits[p % base];
+         p /= base;
+       }
+   }
+
+   if (neg)
+      buf[ind++] = '-';
+
+   if (width > 0 && !(flags & VG_MSG_LJUSTIFY)) {
+      for(; ind < width; ind++) {
+         vg_assert(ind < 39);
+         buf[ind] = (flags & VG_MSG_ZJUSTIFY) ? '0': ' ';
+      }
+   }
+
+   /* Reverse copy to buffer.  */
+   for (i = ind -1; i >= 0; i--)
+      send(buf[i]);
+
+   if (width > 0 && (flags & VG_MSG_LJUSTIFY)) {
+      for(; ind < width; ind++)
+         send((flags & VG_MSG_ZJUSTIFY) ? '0': ' ');
+   }
+}
+
+
+/* A simple vprintf().  */
+void
+VG_(vprintf) ( void(*send)(Char), const Char *format, va_list vargs )
+{
+   int i;
+   int flags;
+   int width;
+   Bool is_long;
+
+   /* We assume that vargs has already been initialised by the 
+      caller, using va_start, and that the caller will similarly
+      clean up with va_end.
+   */
+
+   for (i = 0; format[i] != 0; i++) {
+      if (format[i] != '%') {
+         send(format[i]);
+         continue;
+      }
+      i++;
+      /* A '%' has been found.  Ignore a trailing %. */
+      if (format[i] == 0)
+         break;
+      if (format[i] == '%') {
+         /* `%%' is replaced by `%'. */
+         send('%');
+         continue;
+      }
+      flags = 0;
+      is_long = False;
+      width = 0; /* length of the field. */
+      /* If '-' follows '%', justify on the left. */
+      if (format[i] == '-') {
+         flags |= VG_MSG_LJUSTIFY;
+         i++;
+      }
+      /* If '0' follows '%', pads will be inserted. */
+      if (format[i] == '0') {
+         flags |= VG_MSG_ZJUSTIFY;
+         i++;
+      }
+      /* Compute the field length. */
+      while (format[i] >= '0' && format[i] <= '9') {
+         width *= 10;
+         width += format[i++] - '0';
+      }
+      while (format[i] == 'l') {
+         i++;
+         is_long = True;
+      }
+
+      switch (format[i]) {
+         case 'd': /* %d */
+            flags |= VG_MSG_SIGNED;
+            if (is_long)
+               myvprintf_int64(send, flags, 10, width, 
+                               (ULong)(va_arg (vargs, Long)));
+            else
+               myvprintf_int64(send, flags, 10, width, 
+                               (ULong)(va_arg (vargs, Int)));
+            break;
+         case 'u': /* %u */
+            if (is_long)
+               myvprintf_int64(send, flags, 10, width, 
+                               (ULong)(va_arg (vargs, ULong)));
+            else
+               myvprintf_int64(send, flags, 10, width, 
+                               (ULong)(va_arg (vargs, UInt)));
+            break;
+         case 'p': /* %p */
+            send('0');
+            send('x');
+            myvprintf_int64(send, flags, 16, width, 
+                            (ULong)((UInt)va_arg (vargs, void *)));
+            break;
+         case 'x': /* %x */
+            if (is_long)
+               myvprintf_int64(send, flags, 16, width, 
+                               (ULong)(va_arg (vargs, ULong)));
+            else
+               myvprintf_int64(send, flags, 16, width, 
+                               (ULong)(va_arg (vargs, UInt)));
+            break;
+         case 'c': /* %c */
+            send(va_arg (vargs, int));
+            break;
+         case 's': case 'S': { /* %s */
+            char *str = va_arg (vargs, char *);
+            if (str == (char*) 0) str = "(null)";
+            myvprintf_str(send, flags, width, str, format[i]=='S');
+            break;
+         }
+         default:
+            break;
+      }
+   }
+}
+
+
+/* A general replacement for printf().  Note that only low-level 
+   debugging info should be sent via here.  The official route is to
+   to use vg_message().  This interface is deprecated.
+*/
+static char myprintf_buf[100];
+static int  n_myprintf_buf;
+
+static void add_to_myprintf_buf ( Char c )
+{
+   if (n_myprintf_buf >= 100-10 /*paranoia*/ ) {
+      if (VG_(clo_logfile_fd) >= 0)
+         VG_(write)
+           (VG_(clo_logfile_fd), myprintf_buf, VG_(strlen)(myprintf_buf));
+      n_myprintf_buf = 0;
+      myprintf_buf[n_myprintf_buf] = 0;      
+   }
+   myprintf_buf[n_myprintf_buf++] = c;
+   myprintf_buf[n_myprintf_buf] = 0;
+}
+
+void VG_(printf) ( const char *format, ... )
+{
+   va_list vargs;
+   va_start(vargs,format);
+   
+   n_myprintf_buf = 0;
+   myprintf_buf[n_myprintf_buf] = 0;      
+   VG_(vprintf) ( add_to_myprintf_buf, format, vargs );
+
+   if (n_myprintf_buf > 0 && VG_(clo_logfile_fd) >= 0)
+      VG_(write)
+         ( VG_(clo_logfile_fd), myprintf_buf, VG_(strlen)(myprintf_buf));
+
+   va_end(vargs);
+}
+
+
+/* A general replacement for sprintf(). */
+static Char* vg_sprintf_ptr;
+
+static void add_to_vg_sprintf_buf ( Char c )
+{
+   *vg_sprintf_ptr++ = c;
+}
+
+void VG_(sprintf) ( Char* buf, Char *format, ... )
+{
+   va_list vargs;
+   va_start(vargs,format);
+
+   vg_sprintf_ptr = buf;
+   VG_(vprintf) ( add_to_vg_sprintf_buf, format, vargs );
+   add_to_vg_sprintf_buf(0);
+
+   va_end(vargs);
+}
+
+
+/* ---------------------------------------------------------------------
+   Misc str* functions.
+   ------------------------------------------------------------------ */
+
+Bool VG_(isspace) ( Char c )
+{
+   return (c == ' ' || c == '\n' || c == '\t' || c == 0);
+}
+
+
+Int VG_(strlen) ( const Char* str )
+{
+   Int i = 0;
+   while (str[i] != 0) i++;
+   return i;
+}
+
+
+Long VG_(atoll) ( Char* str )
+{
+   Bool neg = False;
+   Long n = 0;
+   if (*str == '-') { str++; neg = True; };
+   while (*str >= '0' && *str <= '9') {
+      n = 10*n + (Long)(*str - '0');
+      str++;
+   }
+   if (neg) n = -n;
+   return n;
+}
+
+
+Char* VG_(strcat) ( Char* dest, const Char* src )
+{
+   Char* dest_orig = dest;
+   while (*dest) dest++;
+   while (*src) *dest++ = *src++;
+   *dest = 0;
+   return dest_orig;
+}
+
+
+Char* VG_(strncat) ( Char* dest, const Char* src, Int n )
+{
+   Char* dest_orig = dest;
+   while (*dest) dest++;
+   while (*src && n > 0) { *dest++ = *src++; n--; }
+   *dest = 0;
+   return dest_orig;
+}
+
+
+Char* VG_(strpbrk) ( const Char* s, const Char* accept )
+{
+   const Char* a;
+   while (*s) {
+      a = accept;
+      while (*a)
+         if (*a++ == *s)
+            return (Char *) s;
+      s++;
+   }
+   return NULL;
+}
+
+
+Char* VG_(strcpy) ( Char* dest, const Char* src )
+{
+   Char* dest_orig = dest;
+   while (*src) *dest++ = *src++;
+   *dest = 0;
+   return dest_orig;
+}
+
+
+/* Copy bytes, not overrunning the end of dest and always ensuring
+   zero termination. */
+void VG_(strncpy_safely) ( Char* dest, const Char* src, Int ndest )
+{
+   Int i;
+   vg_assert(ndest > 0);
+   i = 0;
+   dest[i] = 0;
+   while (True) {
+      if (src[i] == 0) return;
+      if (i >= ndest-1) return;
+      dest[i] = src[i];
+      i++;
+      dest[i] = 0;
+   }
+}
+
+
+void VG_(strncpy) ( Char* dest, const Char* src, Int ndest )
+{
+   VG_(strncpy_safely)( dest, src, ndest+1 ); 
+}
+
+
+Int VG_(strcmp) ( const Char* s1, const Char* s2 )
+{
+   while (True) {
+      if (*s1 == 0 && *s2 == 0) return 0;
+      if (*s1 == 0) return -1;
+      if (*s2 == 0) return 1;
+
+      if (*(UChar*)s1 < *(UChar*)s2) return -1;
+      if (*(UChar*)s1 > *(UChar*)s2) return 1;
+
+      s1++; s2++;
+   }
+}
+
+
+Int VG_(strcmp_ws) ( const Char* s1, const Char* s2 )
+{
+   while (True) {
+      if (VG_(isspace)(*s1) && VG_(isspace)(*s2)) return 0;
+      if (VG_(isspace)(*s1)) return -1;
+      if (VG_(isspace)(*s2)) return 1;
+
+      if (*(UChar*)s1 < *(UChar*)s2) return -1;
+      if (*(UChar*)s1 > *(UChar*)s2) return 1;
+
+      s1++; s2++;
+   }
+}
+
+
+Int VG_(strncmp) ( const Char* s1, const Char* s2, Int nmax )
+{
+   Int n = 0;
+   while (True) {
+      if (n >= nmax) return 0;
+      if (*s1 == 0 && *s2 == 0) return 0;
+      if (*s1 == 0) return -1;
+      if (*s2 == 0) return 1;
+
+      if (*(UChar*)s1 < *(UChar*)s2) return -1;
+      if (*(UChar*)s1 > *(UChar*)s2) return 1;
+
+      s1++; s2++; n++;
+   }
+}
+
+
+Int VG_(strncmp_ws) ( const Char* s1, const Char* s2, Int nmax )
+{
+   Int n = 0;
+   while (True) {
+      if (n >= nmax) return 0;
+      if (VG_(isspace)(*s1) && VG_(isspace)(*s2)) return 0;
+      if (VG_(isspace)(*s1)) return -1;
+      if (VG_(isspace)(*s2)) return 1;
+
+      if (*(UChar*)s1 < *(UChar*)s2) return -1;
+      if (*(UChar*)s1 > *(UChar*)s2) return 1;
+
+      s1++; s2++; n++;
+   }
+}
+
+
+Char* VG_(strstr) ( const Char* haystack, Char* needle )
+{
+   Int n = VG_(strlen)(needle);
+   while (True) {
+      if (haystack[0] == 0) 
+         return NULL;
+      if (VG_(strncmp)(haystack, needle, n) == 0) 
+         return (Char*)haystack;
+      haystack++;
+   }
+}
+
+
+Char* VG_(strchr) ( const Char* s, Char c )
+{
+   while (True) {
+      if (*s == c) return (Char*)s;
+      if (*s == 0) return NULL;
+      s++;
+   }
+}
+
+
+Char VG_(toupper) ( Char c )
+{
+   if (c >= 'a' && c <= 'z')
+      return c + ('A' - 'a'); 
+   else
+      return c;
+}
+
+
+Char* VG_(strdup) ( ArenaId aid, const Char* s )
+{
+    Int   i;
+    Int   len = VG_(strlen)(s) + 1;
+    Char* res = VG_(malloc) (aid, len);
+    for (i = 0; i < len; i++)
+       res[i] = s[i];
+    return res;
+}
+
+
+/* ---------------------------------------------------------------------
+   A simple string matching routine, purloined from Hugs98.
+      `*'    matches any sequence of zero or more characters
+      `?'    matches any single character exactly 
+      `\c'   matches the character c only (ignoring special chars)
+      c      matches the character c only
+   ------------------------------------------------------------------ */
+
+/* Keep track of recursion depth. */
+static Int recDepth;
+
+static Bool stringMatch_wrk ( Char* pat, Char* str )
+{
+   vg_assert(recDepth >= 0 && recDepth < 500);
+   recDepth++;
+   for (;;) {
+      switch (*pat) {
+         case '\0' : return (*str=='\0');
+         case '*'  : do {
+                        if (stringMatch_wrk(pat+1,str)) {
+                           recDepth--;
+                           return True;
+                        }
+                     } while (*str++);
+                     recDepth--;
+                     return False;
+         case '?'  : if (*str++=='\0') {
+                        recDepth--;
+                        return False;
+                     }
+                     pat++;
+                     break;
+         case '\\' : if (*++pat == '\0') {
+                        recDepth--;
+                        return False; /* spurious trailing \ in pattern */
+                     }
+                     /* falls through to ... */
+         default   : if (*pat++ != *str++) {
+                        recDepth--;
+                        return False;
+                     }
+                     break;
+      }
+   }
+}
+
+Bool VG_(stringMatch) ( Char* pat, Char* str )
+{
+   Bool b;
+   recDepth = 0;
+   b = stringMatch_wrk ( pat, str );
+   /*
+   VG_(printf)("%s   %s   %s\n",
+	       b?"TRUE ":"FALSE", pat, str);
+   */
+   return b;
+}
+
+
+/* ---------------------------------------------------------------------
+   Assertery.
+   ------------------------------------------------------------------ */
+
+#define EMAIL_ADDR "jseward@acm.org"
+
+void VG_(assert_fail) ( Char* expr, Char* file, Int line, Char* fn )
+{
+   VG_(printf)("\n%s: %s:%d (%s): Assertion `%s' failed.\n",
+               "valgrind", file, line, fn, expr );
+   VG_(printf)("Please report this bug to me at: %s\n\n", EMAIL_ADDR);
+   VG_(shutdown_logging)();
+   /* vg_restore_SIGABRT(); */
+   VG_(exit)(1);
+}
+
+void VG_(panic) ( Char* str )
+{
+   VG_(printf)("\nvalgrind: the `impossible' happened:\n   %s\n", str);
+   VG_(printf)("Basic block ctr is approximately %llu\n", VG_(bbs_done) );
+   VG_(printf)("Please report this bug to me at: %s\n\n", EMAIL_ADDR);
+   VG_(shutdown_logging)();
+   /* vg_restore_SIGABRT(); */
+   VG_(exit)(1);
+}
+
+#undef EMAIL_ADDR
+
+
+/* ---------------------------------------------------------------------
+   Primitive support for reading files.
+   ------------------------------------------------------------------ */
+
+/* Returns -1 on failure. */
+Int VG_(open_read) ( Char* pathname )
+{
+   Int fd;
+   /* VG_(printf)("vg_open_read %s\n", pathname ); */
+
+   /* This gets a segmentation fault if pathname isn't a valid file.
+      I don't know why.  It seems like the call to open is getting
+      intercepted and messed with by glibc ... */
+   /* fd = open( pathname, O_RDONLY ); */
+   /* ... so we go direct to the horse's mouth, which seems to work
+      ok: */
+   const int O_RDONLY = 0; /* See /usr/include/bits/fcntl.h */
+   fd = vg_do_syscall3(__NR_open, (UInt)pathname, O_RDONLY, 0);
+   /* VG_(printf)("result = %d\n", fd); */
+   if (VG_(is_kerror)(fd)) fd = -1;
+   return fd;
+}
+ 
+
+void VG_(close) ( Int fd )
+{
+   vg_do_syscall1(__NR_close, fd);
+}
+
+
+Int VG_(read) ( Int fd, void* buf, Int count)
+{
+   Int res;
+   /* res = read( fd, buf, count ); */
+   res = vg_do_syscall3(__NR_read, fd, (UInt)buf, count);
+   if (VG_(is_kerror)(res)) res = -1;
+   return res;
+}
+
+Int VG_(write) ( Int fd, void* buf, Int count)
+{
+   Int res;
+   /* res = write( fd, buf, count ); */
+   res = vg_do_syscall3(__NR_write, fd, (UInt)buf, count);
+   if (VG_(is_kerror)(res)) res = -1;
+   return res;
+}
+
+/* Misc functions looking for a proper home. */
+
+/* We do getenv without libc's help by snooping around in
+   VG_(client_env) as determined at startup time. */
+Char* VG_(getenv) ( Char* varname )
+{
+   Int i, n;
+   n = VG_(strlen)(varname);
+   for (i = 0; VG_(client_envp)[i] != NULL; i++) {
+      Char* s = VG_(client_envp)[i];
+      if (VG_(strncmp)(varname, s, n) == 0 && s[n] == '=') {
+         return & s[n+1];
+      }
+   }
+   return NULL;
+}
+
+/* You'd be amazed how many places need to know the current pid. */
+Int VG_(getpid) ( void )
+{
+   Int res;
+   /* res = getpid(); */
+   res = vg_do_syscall0(__NR_getpid);
+   return res;
+}
+
+
+/* ---------------------------------------------------------------------
+   Primitive support for bagging memory via mmap.
+   ------------------------------------------------------------------ */
+
+void* VG_(get_memory_from_mmap) ( Int nBytes )
+{
+   static UInt tot_alloc = 0;
+   void* p = VG_(mmap)( 0, nBytes,
+                        VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC, 
+                        VKI_MAP_PRIVATE|VKI_MAP_ANONYMOUS, -1, 0 );
+   if (p != ((void*)(-1))) {
+      tot_alloc += (UInt)nBytes;
+      if (0)
+         VG_(printf)("get_memory_from_mmap: %d tot, %d req\n",
+                     tot_alloc, nBytes);
+      return p;
+   }
+   VG_(printf)("vg_get_memory_from_mmap failed on request of %d\n", 
+               nBytes);
+   VG_(panic)("vg_get_memory_from_mmap: out of memory!  Fatal!  Bye!\n");
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              vg_mylibc.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_procselfmaps.c b/coregrind/vg_procselfmaps.c
new file mode 100644
index 000000000..b0733539f
--- /dev/null
+++ b/coregrind/vg_procselfmaps.c
@@ -0,0 +1,201 @@
+
+/*--------------------------------------------------------------------*/
+/*--- A simple parser for /proc/self/maps on Linux 2.4.X           ---*/
+/*---                                            vg_procselfmaps.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+
+#include "vg_include.h"
+
+
+/* static ... to keep it out of the stack frame. */
+
+static Char procmap_buf[M_PROCMAP_BUF];
+
+
+/* Helper fns. */
+
+static Int hexdigit ( Char c )
+{
+   if (c >= '0' && c <= '9') return (Int)(c - '0');
+   if (c >= 'a' && c <= 'f') return 10 + (Int)(c - 'a');
+   if (c >= 'A' && c <= 'F') return 10 + (Int)(c - 'A');
+   return -1;
+}
+
+static Int readchar ( Char* buf, Char* ch )
+{
+   if (*buf == 0) return 0;
+   *ch = *buf;
+   return 1;
+}
+
+static Int readhex ( Char* buf, UInt* val )
+{
+   Int n = 0;
+   *val = 0;
+   while (hexdigit(*buf) >= 0) {
+      *val = (*val << 4) + hexdigit(*buf);
+      n++; buf++;
+   }
+   return n;
+}
+
+
+
+/* Read /proc/self/maps.  For each map entry, call
+   record_mapping, passing it, in this order:
+
+      start address in memory
+      length
+      r permissions char; either - or r
+      w permissions char; either - or w
+      x permissions char; either - or x
+      offset in file, or zero if no file
+      filename, zero terminated, or NULL if no file
+
+   So the sig of the called fn might be
+
+      void (*record_mapping)( Addr start, UInt size, 
+                              Char r, Char w, Char x, 
+                              UInt foffset, UChar* filename )
+
+   Note that the supplied filename is transiently stored; record_mapping 
+   should make a copy if it wants to keep it.
+
+   If there's a syntax error or other failure, just abort.  
+*/
+
+void VG_(read_procselfmaps) (
+   void (*record_mapping)( Addr, UInt, Char, Char, Char, UInt, UChar* )
+)
+{
+   Int    i, j, n_tot, n_chunk, fd, i_eol;
+   Addr   start, endPlusOne;
+   UChar* filename;
+   UInt   foffset;
+   UChar  rr, ww, xx, pp, ch;
+
+   /* Read the initial memory mapping from the /proc filesystem. */
+   fd = VG_(open_read) ( "/proc/self/maps" );
+   if (fd == -1) {
+      VG_(message)(Vg_UserMsg, "FATAL: can't open /proc/self/maps");
+      VG_(exit)(1);
+   }
+   n_tot = 0;
+   do {
+      n_chunk = VG_(read) ( fd, &procmap_buf[n_tot], M_PROCMAP_BUF - n_tot );
+      n_tot += n_chunk;
+   } while ( n_chunk > 0 && n_tot < M_PROCMAP_BUF );
+   VG_(close)(fd);
+   if (n_tot >= M_PROCMAP_BUF-5) {
+      VG_(message)(Vg_UserMsg, "FATAL: M_PROCMAP_BUF is too small; "
+                               "increase it and recompile");
+       VG_(exit)(1);
+   }
+   if (n_tot == 0) {
+      VG_(message)(Vg_UserMsg, "FATAL: I/O error on /proc/self/maps" );
+       VG_(exit)(1);
+   }
+   procmap_buf[n_tot] = 0;
+   if (0)
+      VG_(message)(Vg_DebugMsg, "raw:\n%s", procmap_buf );
+
+   /* Ok, it's safely aboard.  Parse the entries. */
+
+   i = 0;
+   while (True) {
+      if (i >= n_tot) break;
+
+      /* Read (without fscanf :) the pattern %8x-%8x %c%c%c%c %8x */
+      j = readhex(&procmap_buf[i], &start);
+      if (j > 0) i += j; else goto syntaxerror;
+      j = readchar(&procmap_buf[i], &ch);
+      if (j == 1 && ch == '-') i += j; else goto syntaxerror;
+      j = readhex(&procmap_buf[i], &endPlusOne);
+      if (j > 0) i += j; else goto syntaxerror;
+
+      j = readchar(&procmap_buf[i], &ch);
+      if (j == 1 && ch == ' ') i += j; else goto syntaxerror;
+
+      j = readchar(&procmap_buf[i], &rr);
+      if (j == 1 && (rr == 'r' || rr == '-')) i += j; else goto syntaxerror;
+      j = readchar(&procmap_buf[i], &ww);
+      if (j == 1 && (ww == 'w' || ww == '-')) i += j; else goto syntaxerror;
+      j = readchar(&procmap_buf[i], &xx);
+      if (j == 1 && (xx == 'x' || xx == '-')) i += j; else goto syntaxerror;
+      /* I haven't a clue what this last field means. */
+      j = readchar(&procmap_buf[i], &pp);
+      if (j == 1 && (pp == 'p' || pp == '-' || pp == 's')) 
+                                              i += j; else goto syntaxerror;
+
+      j = readchar(&procmap_buf[i], &ch);
+      if (j == 1 && ch == ' ') i += j; else goto syntaxerror;
+
+      j = readhex(&procmap_buf[i], &foffset);
+      if (j > 0) i += j; else goto syntaxerror;
+      
+      goto read_line_ok;
+
+    syntaxerror:
+      VG_(message)(Vg_UserMsg, "FATAL: syntax error reading /proc/self/maps");
+      { Int k;
+        VG_(printf)("last 50 chars: `");
+        for (k = i-50; k <= i; k++) VG_(printf)("%c", procmap_buf[k]);
+        VG_(printf)("'\n");
+      }
+       VG_(exit)(1);
+
+    read_line_ok:
+      /* Try and find the name of the file mapped to this segment, if
+         it exists. */
+      while (procmap_buf[i] != '\n' && i < M_PROCMAP_BUF-1) i++;
+      i_eol = i;
+      i--;
+      while (!VG_(isspace)(procmap_buf[i]) && i >= 0) i--;
+      i++;
+      if (i < i_eol-1 && procmap_buf[i] == '/') {
+         filename = &procmap_buf[i];
+         filename[i_eol - i] = '\0';
+      } else {
+         filename = NULL;
+         foffset = 0;
+      }
+
+      (*record_mapping) ( start, endPlusOne-start, 
+                          rr, ww, xx, 
+                          foffset, filename );
+
+      i = i_eol + 1;
+   }
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                        vg_procselfmaps.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_signals.c b/coregrind/vg_signals.c
new file mode 100644
index 000000000..ed7ef67cb
--- /dev/null
+++ b/coregrind/vg_signals.c
@@ -0,0 +1,823 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Implementation of POSIX signals.                             ---*/
+/*---                                                 vg_signals.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+
+#include "vg_include.h"
+#include "vg_constants.h"
+#include "vg_unsafe.h"
+
+
+/* ---------------------------------------------------------------------
+   An implementation of signal sets and other grunge, identical to 
+   that in the target kernels (Linux 2.2.X and 2.4.X).
+   ------------------------------------------------------------------ */
+
+
+
+/* ---------------------------------------------------------------------
+   Signal state for this process.
+   ------------------------------------------------------------------ */
+
+/* For each signal, the current action.  Is NULL if the client hasn't
+   asked to handle the signal.  Consequently, we expect never to
+   receive a signal for which the corresponding handler is NULL. */
+void* VG_(sighandler)[VKI_KNSIG];
+
+/* For each signal, either:
+   -- VG_SIGIDLE if not pending and not running
+   -- Handler address if pending
+   -- VG_SIGRUNNING if the handler is running and hasn't (returned or 
+      unblocked the signal using sigprocmask following a longjmp out 
+      of the handler).
+ */
+#define VG_SIGIDLE    ((void*)0)
+#define VG_SIGRUNNING ((void*)1)
+
+void* VG_(sigpending)[VKI_KNSIG];
+
+/* See decl in vg_include.h for explanation. */
+Int VG_(syscall_depth) = 0;
+
+
+/* ---------------------------------------------------------------------
+   The signal simulation proper.  A simplified version of what the 
+   Linux kernel does.
+   ------------------------------------------------------------------ */
+
+/* A structure in which to save the application's registers
+   during the execution of signal handlers. */
+
+typedef
+   struct {
+      UInt retaddr;  /* Sig handler's (bogus) return address */
+      Int  sigNo;    /* The arg to the sig handler.  */
+      UInt magicPI;
+      UInt fpustate[VG_SIZE_OF_FPUSTATE_W];
+      UInt eax;
+      UInt ecx;
+      UInt edx;
+      UInt ebx;
+      UInt ebp;
+      UInt esp;
+      UInt esi;
+      UInt edi;
+      Addr eip;
+      UInt eflags;
+      UInt magicE;
+   }
+   VgSigContext;
+
+
+
+/* This is the bogus return address which the implementation
+   of RET in vg_cpu.c checks for.  If it spots a return to 
+   here, it calls vg_signal_returns().  We should never actually
+   enter this procedure, neither on the real nor simulated CPU.
+*/
+void VG_(signalreturn_bogusRA) ( void )
+{
+   VG_(panic) ( "vg_signalreturn_bogusRA -- something is badly wrong" );
+}
+
+
+/* Set up a stack frame (VgSigContext) for the client's signal
+   handler.  This includes the signal number and a bogus return
+   address.  */
+static
+void vg_push_signal_frame ( int sigNo )
+{
+   Int          i;
+   UInt         esp;
+   VgSigContext sigctx;
+   for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
+      sigctx.fpustate[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
+
+   sigctx.magicPI    = 0x31415927;
+   sigctx.magicE     = 0x27182818;
+   sigctx.eax        = VG_(baseBlock)[VGOFF_(m_eax)];
+   sigctx.ecx        = VG_(baseBlock)[VGOFF_(m_ecx)];
+   sigctx.edx        = VG_(baseBlock)[VGOFF_(m_edx)];
+   sigctx.ebx        = VG_(baseBlock)[VGOFF_(m_ebx)];
+   sigctx.ebp        = VG_(baseBlock)[VGOFF_(m_ebp)];
+   sigctx.esp        = VG_(baseBlock)[VGOFF_(m_esp)];
+   sigctx.esi        = VG_(baseBlock)[VGOFF_(m_esi)];
+   sigctx.edi        = VG_(baseBlock)[VGOFF_(m_edi)];
+   sigctx.eflags     = VG_(baseBlock)[VGOFF_(m_eflags)];
+   sigctx.eip        = VG_(baseBlock)[VGOFF_(m_eip)];
+   sigctx.retaddr    = (UInt)(&VG_(signalreturn_bogusRA));
+   sigctx.sigNo      = sigNo;
+
+   esp = VG_(baseBlock)[VGOFF_(m_esp)];
+   vg_assert((sizeof(VgSigContext) & 0x3) == 0);
+
+   esp -= sizeof(VgSigContext);
+   for (i = 0; i < sizeof(VgSigContext)/4; i++)
+      ((UInt*)esp)[i] = ((UInt*)(&sigctx))[i];
+
+   /* Make sigNo and retaddr fields readable -- at 0(%ESP) and 4(%ESP) */
+   if (VG_(clo_instrument)) {
+      VGM_(make_readable) ( ((Addr)esp)+0 ,4 );
+      VGM_(make_readable) ( ((Addr)esp)+4 ,4 );
+   }
+
+   VG_(baseBlock)[VGOFF_(m_esp)] = esp;
+   VG_(baseBlock)[VGOFF_(m_eip)] = (Addr)VG_(sigpending)[sigNo];
+   /* 
+   VG_(printf)("pushed signal frame; %%ESP now = %p, next %%EBP = %p\n", 
+               esp, VG_(baseBlock)[VGOFF_(m_eip)]);
+   */
+}
+
+
+/* Clear the signal frame created by vg_push_signal_frame, restore the
+   simulated machine state, and return the signal number that the
+   frame was for. */
+static
+Int vg_pop_signal_frame ( void )
+{
+   UInt          esp;
+   Int           sigNo, i;
+   VgSigContext* sigctx;
+   /* esp is now pointing at the magicPI word on the stack, viz,
+      eight bytes above the bottom of the vg_sigcontext.
+   */
+   esp    = VG_(baseBlock)[VGOFF_(m_esp)];
+   sigctx = (VgSigContext*)(esp-4);
+
+   vg_assert(sigctx->magicPI == 0x31415927);
+   vg_assert(sigctx->magicE  == 0x27182818);
+   if (VG_(clo_trace_signals))
+      VG_(message)(Vg_DebugMsg, "vg_pop_signal_frame: valid magic");
+
+   /* restore machine state */
+   for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
+      VG_(baseBlock)[VGOFF_(m_fpustate) + i] = sigctx->fpustate[i];
+
+   /* Mark the sigctx structure as nonaccessible.  Has to happen
+      _before_ vg_m_state.m_esp is given a new value.*/
+   if (VG_(clo_instrument)) 
+      VGM_(handle_esp_assignment) ( sigctx->esp );
+
+   /* Restore machine state from the saved context. */
+   VG_(baseBlock)[VGOFF_(m_eax)]     = sigctx->eax;
+   VG_(baseBlock)[VGOFF_(m_ecx)]     = sigctx->ecx;
+   VG_(baseBlock)[VGOFF_(m_edx)]     = sigctx->edx;
+   VG_(baseBlock)[VGOFF_(m_ebx)]     = sigctx->ebx;
+   VG_(baseBlock)[VGOFF_(m_ebp)]     = sigctx->ebp;
+   VG_(baseBlock)[VGOFF_(m_esp)]     = sigctx->esp;
+   VG_(baseBlock)[VGOFF_(m_esi)]     = sigctx->esi;
+   VG_(baseBlock)[VGOFF_(m_edi)]     = sigctx->edi;
+   VG_(baseBlock)[VGOFF_(m_eflags)]  = sigctx->eflags;
+   VG_(baseBlock)[VGOFF_(m_eip)]     = sigctx->eip;
+   sigNo                             = sigctx->sigNo;
+   return sigNo;
+}
+
+
+/* A handler is returning.  Restore the machine state from the stacked
+   VgSigContext and continue with whatever was going on before the
+   handler ran.  */
+
+void VG_(signal_returns) ( void )
+{
+   Int            sigNo, ret;
+   vki_ksigset_t  block_procmask;
+   vki_ksigset_t  saved_procmask;
+
+   /* Block host signals ... */
+   VG_(ksigfillset)(&block_procmask);
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &block_procmask, &saved_procmask);
+   vg_assert(ret == 0);
+
+   sigNo = vg_pop_signal_frame();
+
+   /* You would have thought that the following assertion made sense
+      here:
+
+         vg_assert(vg_sigpending[sigNo] == VG_SIGRUNNING);
+
+      Alas, you would be wrong.  If a sigprocmask has been intercepted
+      and it unblocks this signal, then vg_sigpending[sigNo] will
+      either be VG_SIGIDLE, or (worse) another instance of it will
+      already have arrived, so that the stored value is that of the
+      handler.
+
+      Note that these anomalies can only occur when a signal handler
+      unblocks its own signal inside itself AND THEN RETURNS anyway
+      (which seems a bizarre thing to do).
+
+      Ho Hum.  This seems like a race condition which surely isn't
+      handled correctly.  */
+
+   vg_assert(sigNo >= 1 && sigNo < VKI_KNSIG);
+   VG_(sigpending)[sigNo] = VG_SIGIDLE;
+
+   /* Unlock and return. */
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &saved_procmask, NULL);
+   vg_assert(ret == 0);
+
+   /* The main dispatch loop now continues at vg_m_eip. */
+}
+
+
+/* Restore the default host behaviour of SIGABRT, and unblock it,
+   so we can exit the simulator cleanly by doing exit/abort/assert fail.
+*/
+void VG_(restore_SIGABRT) ( void )
+{
+   vki_ksigset_t   set;
+   vki_ksigaction  act;
+   act.ksa_flags   = VKI_SA_RESTART;
+   act.ksa_handler = VKI_SIG_DFL;
+   VG_(ksigemptyset)(&act.ksa_mask);
+
+   VG_(ksigemptyset)(&set);
+   VG_(ksigaddset)(&set,VKI_SIGABRT);
+
+   /* If this doesn't work, tough.  Don't check return code. */
+   VG_(ksigaction)(VKI_SIGABRT, &act, NULL);
+   VG_(ksigprocmask)(VKI_SIG_UNBLOCK, &set, NULL);   
+}
+
+
+/* Deliver all pending signals, by building stack frames for their
+   handlers. */
+void VG_(deliver_signals) ( void )
+{
+   vki_ksigset_t  block_procmask;
+   vki_ksigset_t  saved_procmask;
+   Int            ret, sigNo;
+   Bool           found;
+ 
+   /* A cheap check.  We don't need to have exclusive access
+      to the queue, because in the worst case, vg_oursignalhandler
+      will add signals, causing us to return, thinking there
+      are no signals to deliver, when in fact there are some.
+      A subsequent call here will handle the signal(s) we missed.
+   */
+   found = False;
+   for (sigNo = 1; sigNo < VKI_KNSIG; sigNo++)
+      if (VG_(sigpending)[sigNo] != VG_SIGIDLE &&
+          VG_(sigpending)[sigNo] != VG_SIGRUNNING) found = True;
+
+   if (!found) return;
+
+   /* Now we have to do it properly.  Get exclusive access by
+      blocking all the host's signals.  That means vg_oursignalhandler
+      can't run whilst we are messing with stuff.
+   */
+   VG_(ksigfillset)(&block_procmask);
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &block_procmask, &saved_procmask);
+   vg_assert(ret == 0);
+
+   for (sigNo = 1; sigNo < VKI_KNSIG; sigNo++) {
+      if (VG_(sigpending)[sigNo] == VG_SIGIDLE ||
+          VG_(sigpending)[sigNo] == VG_SIGRUNNING) continue;
+
+      if (VG_(clo_trace_signals))
+         VG_(message)(Vg_DebugMsg,"delivering signal %d", sigNo );
+
+      /* Create a signal delivery frame, and set the client's %ESP and
+         %EIP so that when execution continues, we will enter the
+         signal handler with the frame on top of the client's stack,
+         as it expects. */
+      vg_push_signal_frame ( sigNo );
+
+      /* Signify that the signal has been delivered. */
+      VG_(sigpending)[sigNo] = VG_SIGRUNNING;
+   }
+
+   /* Unlock and return. */
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &saved_procmask, NULL);
+   vg_assert(ret == 0);
+   return;
+}
+
+
+/* ----------- HACK ALERT ----------- */
+/* Note carefully that this runs with all host signals disabled! */
+static
+void vg_deliver_signal_immediately ( Int sigNo )
+{
+   Int   n_bbs_done;
+   Int   sigNo2;
+   Addr  next_orig_addr;
+   Addr  next_trans_addr;
+
+   if (VG_(clo_verbosity) > 0
+       && (True || VG_(clo_trace_signals)))
+      VG_(message)(Vg_DebugExtraMsg,
+         "deliver signal %d immediately: BEGIN", sigNo );
+   /* VG_(printf)("resumption addr is %p\n", 
+      VG_(baseBlock)[VGOFF_(m_eip)]); */
+
+   vg_push_signal_frame ( sigNo );
+   n_bbs_done = 0;
+
+   /* Single-step the client (ie, run the handler) until it jumps to
+      VG_(signalreturn_bogusRA) */
+
+   while (True) {
+
+      if (n_bbs_done >= VG_MAX_BBS_IN_IMMEDIATE_SIGNAL)
+         VG_(unimplemented)(
+            "handling signal whilst client blocked in syscall: "
+            "handler runs too long"
+         );
+
+      next_orig_addr = VG_(baseBlock)[VGOFF_(m_eip)];
+
+      if (next_orig_addr == (Addr)(&VG_(trap_here)))
+         VG_(unimplemented)(
+            "handling signal whilst client blocked in syscall: "
+            "handler calls malloc (et al)"
+         );
+
+      /* VG_(printf)("next orig addr = %p\n", next_orig_addr); */
+      if (next_orig_addr == (Addr)(&VG_(signalreturn_bogusRA)))
+         break;
+
+      next_trans_addr = VG_(search_transtab) ( next_orig_addr );
+      if (next_trans_addr == (Addr)NULL) {
+         VG_(create_translation_for) ( next_orig_addr );
+         next_trans_addr = VG_(search_transtab) ( next_orig_addr );
+      }
+
+      vg_assert(next_trans_addr != (Addr)NULL);
+      next_orig_addr = VG_(run_singleton_translation)(next_trans_addr);
+      VG_(baseBlock)[VGOFF_(m_eip)] = next_orig_addr;
+      n_bbs_done++;
+   }
+
+   sigNo2 = vg_pop_signal_frame();
+   vg_assert(sigNo2 == sigNo);
+
+   if (VG_(clo_verbosity) > 0
+       && (True || VG_(clo_trace_signals)))
+     VG_(message)(Vg_DebugExtraMsg,
+         "deliver signal %d immediately: END, %d bbs done", 
+         sigNo, n_bbs_done );
+
+   /* Invalidate the tt_fast cache.  We've been (potentially) adding
+      translations and even possibly doing LRUs without keeping it up
+      to date, so we'd better nuke it before going any further, to
+      avoid inconsistencies with the main TT/TC structure. */
+   VG_(invalidate_tt_fast)();
+}
+
+
+/* ----------- end of HACK ALERT ----------- */
+
+
+/* Receive a signal from the host, and either discard it or park it in
+   the queue of pending signals.  All other signals will be blocked
+   when this handler runs.  Runs with all host signals blocked, so as
+   to have mutual exclusion when adding stuff to the queue. */
+
+static void VG_(oursignalhandler) ( Int sigNo )
+{
+   Int           ret;
+   vki_ksigset_t block_procmask;
+   vki_ksigset_t saved_procmask;
+
+   if (VG_(clo_trace_signals)) {
+      VG_(start_msg)(Vg_DebugMsg);
+      VG_(add_to_msg)("signal %d arrived ... ", sigNo );
+   }
+   vg_assert(sigNo >= 1 && sigNo < VKI_KNSIG);
+
+   /* Sanity check.  Ensure we're really running on the signal stack
+      we asked for. */
+   if ( !(
+            ((Char*)(&(VG_(sigstack)[0])) <= (Char*)(&ret))
+            &&
+            ((Char*)(&ret) < (Char*)(&(VG_(sigstack)[10000])))
+         )
+        ) {
+     VG_(message)(Vg_DebugMsg, "FATAL: signal delivered on the wrong stack?!");
+     VG_(message)(Vg_DebugMsg, "A possible workaround follows.  Please tell me");
+     VG_(message)(Vg_DebugMsg, "(jseward@acm.org) if the suggested workaround doesn't help.");
+     VG_(unimplemented)
+        ("support for progs compiled with -p/-pg; rebuild your prog without -p/-pg");
+   }
+
+   vg_assert((Char*)(&(VG_(sigstack)[0])) <= (Char*)(&ret));
+   vg_assert((Char*)(&ret) < (Char*)(&(VG_(sigstack)[10000])));
+
+   if (sigNo == VKI_SIGABRT && VG_(sighandler)[sigNo] == NULL) {
+      /* We get here if SIGABRT is delivered and the client hasn't
+         asked to catch it.  The aim is to exit in a controlled
+         manner. */
+      if (VG_(clo_trace_signals)) {
+         VG_(add_to_msg)("catching SIGABRT");
+         VG_(end_msg)();
+      }
+      VG_(ksignal)(VKI_SIGABRT, VKI_SIG_DFL);
+      VG_(interrupt_reason) = VG_Y_EXIT;
+      VG_(longjmpd_on_signal) = VKI_SIGABRT;
+      __builtin_longjmp(VG_(toploop_jmpbuf),1);
+   }
+
+   /* Block all host signals. */
+   VG_(ksigfillset)(&block_procmask);
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &block_procmask, &saved_procmask);
+   vg_assert(ret == 0);
+
+   if (VG_(sighandler)[sigNo] == NULL) {
+      if (VG_(clo_trace_signals)) {
+         VG_(add_to_msg)("unexpected!");
+         VG_(end_msg)();
+      }
+      VG_(panic)("vg_oursignalhandler: unexpected signal");
+   }
+
+   /* Decide what to do with it. */
+   if (VG_(sigpending)[sigNo] == VG_SIGRUNNING) {
+       /* Already running; ignore it. */
+      if (VG_(clo_trace_signals)) {
+         VG_(add_to_msg)("already running; discarded" );
+         VG_(end_msg)();
+      }
+   }
+   else
+   if (VG_(sigpending)[sigNo] != VG_SIGRUNNING && 
+       VG_(sigpending)[sigNo] != VG_SIGIDLE) {
+      /* Not running and not idle == pending; ignore it. */
+      if (VG_(clo_trace_signals)) {
+         VG_(add_to_msg)("already pending; discarded" );
+         VG_(end_msg)();
+      }
+   } 
+   else {
+      /* Ok, we'd better deliver it to the client, one way or another. */
+      vg_assert(VG_(sigpending)[sigNo] == VG_SIGIDLE);
+
+      if (VG_(syscall_depth) == 0) {
+         /* The usual case; delivering a signal to the client, and the
+            client is not currently in a syscall.  Queue it up for
+            delivery at some point in the future. */
+         VG_(sigpending)[sigNo] = VG_(sighandler)[sigNo];
+         if (VG_(clo_trace_signals)) {
+            VG_(add_to_msg)("queued" );
+            VG_(end_msg)();
+         }
+      } else {
+         /* The nasty case, which was causing kmail to freeze up: the
+            client is (presumably blocked) in a syscall.  We have to
+            deliver the signal right now, because it may be that
+            running the sighandler is the only way that the syscall
+            will be able to return.  In which case, if we don't do
+            that, the client will deadlock. */
+         if (VG_(clo_trace_signals)) {
+            VG_(add_to_msg)("delivering immediately" );
+            VG_(end_msg)();
+         }
+         /* Note that this runs with all host signals blocked. */
+         VG_(sigpending)[sigNo] = VG_(sighandler)[sigNo];
+         vg_deliver_signal_immediately(sigNo);
+         VG_(sigpending)[sigNo] = VG_SIGIDLE;
+         /* VG_(printf)("resuming at %p\n", VG_(baseBlock)[VGOFF_(m_eip)]); */
+      }
+   }
+
+   /* We've finished messing with the queue, so re-enable host signals. */
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &saved_procmask, NULL);
+
+   vg_assert(ret == 0);
+   if (sigNo == VKI_SIGSEGV || sigNo == VKI_SIGBUS 
+       || sigNo == VKI_SIGFPE || sigNo == VKI_SIGILL) {
+      /* Can't continue; must longjmp and thus enter the sighandler
+         immediately. */
+      VG_(longjmpd_on_signal) = sigNo;
+      __builtin_longjmp(VG_(toploop_jmpbuf),1);
+   }
+}
+
+
+/* The outer insn loop calls here to reenable a host signal if
+   vg_oursighandler longjmp'd.
+*/
+void VG_(unblock_host_signal) ( Int sigNo )
+{
+   Int ret;
+   vki_ksigset_t set;
+   VG_(ksigemptyset)(&set);
+   ret = VG_(ksigaddset)(&set,sigNo);
+   vg_assert(ret == 0);
+   ret = VG_(ksigprocmask)(VKI_SIG_UNBLOCK,&set,NULL);
+   vg_assert(ret == 0);
+}
+
+
+static __attribute((unused))
+void pp_vg_ksigaction ( vki_ksigaction* sa )
+{
+   Int i;
+   VG_(printf)("vg_ksigaction: handler %p, flags 0x%x, restorer %p\n", 
+               sa->ksa_handler, sa->ksa_flags, sa->ksa_restorer);
+   VG_(printf)("vg_ksigaction: { ");
+   for (i = 1; i < VKI_KNSIG; i++)
+      if (VG_(ksigismember(&(sa->ksa_mask),i)))
+         VG_(printf)("%d ", i);
+   VG_(printf)("}\n");
+}
+
+
+/* Copy the process' real signal state to the sim state.  Whilst
+   doing this, block all real signals.
+*/
+void VG_(sigstartup_actions) ( void )
+{
+   Int i, ret;
+
+   vki_ksigset_t  block_procmask;
+   vki_ksigset_t  saved_procmask;
+   vki_kstack_t   altstack_info;
+   vki_ksigaction sa;
+
+   /*  VG_(printf)("SIGSTARTUP\n"); */
+   /* Block all signals.  
+      saved_procmask remembers the previous mask. */
+   VG_(ksigfillset)(&block_procmask);
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &block_procmask, &saved_procmask);
+   vg_assert(ret == 0);
+
+   /* Register an alternative stack for our own signal handler to run
+      on. */
+   altstack_info.ss_sp = &(VG_(sigstack)[0]);
+   altstack_info.ss_size = 10000 * sizeof(UInt);
+   altstack_info.ss_flags = 0;
+   ret = VG_(ksigaltstack)(&altstack_info, NULL);
+   if (ret != 0) {
+      VG_(panic)(
+         "vg_sigstartup_actions: couldn't install alternative sigstack");
+   }
+   if (VG_(clo_trace_signals)) {
+      VG_(message)(Vg_DebugExtraMsg, 
+         "vg_sigstartup_actions: sigstack installed ok");
+   }
+
+   /* Set initial state for the signal simulation. */
+   for (i = 1; i < VKI_KNSIG; i++)
+      VG_(sighandler[i]) = VG_(sigpending[i]) = NULL;
+
+   for (i = 1; i < VKI_KNSIG; i++) {
+
+      /* Get the old host action */
+      ret = VG_(ksigaction)(i, NULL, &sa);
+      vg_assert(ret == 0);
+
+      /* If there's already a handler set, record it, then route the
+         signal through to our handler. */
+      if (sa.ksa_handler != VKI_SIG_IGN && sa.ksa_handler != VKI_SIG_DFL) {
+         if (VG_(clo_trace_signals))
+            VG_(printf)("snaffling handler 0x%x for signal %d\n", 
+                        (Addr)(sa.ksa_handler), i );
+         if ((sa.ksa_flags & VKI_SA_ONSTACK) != 0)
+            VG_(unimplemented)
+               ("signals on an alternative stack (SA_ONSTACK)");
+         VG_(sighandler[i]) = sa.ksa_handler;
+         sa.ksa_handler = &VG_(oursignalhandler);
+         ret = VG_(ksigaction)(i, &sa, NULL);
+         vg_assert(ret == 0);
+      }
+   }
+
+   VG_(ksignal)(VKI_SIGABRT, &VG_(oursignalhandler));
+
+   /* Finally, restore the blocking mask. */
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &saved_procmask, NULL);
+   vg_assert(ret == 0);   
+}
+
+
+/* Copy the process' sim signal state to the real state,
+   for when we transfer from the simulated to real CPU.
+   PROBLEM: what if we're running a signal handler when we
+   get here?  Hmm.
+   I guess we wind up in vg_signalreturn_bogusRA, *or* the
+   handler has done/will do a longjmp, in which case we're ok.
+
+   It is important (see vg_startup.S) that this proc does not
+   change the state of the real FPU, since it is called when
+   running the program on the real CPU.
+*/
+void VG_(sigshutdown_actions) ( void )
+{
+   Int i, ret;
+
+   vki_ksigset_t  block_procmask;
+   vki_ksigset_t  saved_procmask;
+   vki_ksigaction sa;
+
+   /* Block all signals. */
+   VG_(ksigfillset)(&block_procmask);
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &block_procmask, &saved_procmask);
+   vg_assert(ret == 0);
+
+   /* copy the sim signal actions to the real ones. */
+   for (i = 1; i < VKI_KNSIG; i++) {
+      if (i == VKI_SIGKILL || i == VKI_SIGSTOP) continue;
+      if (VG_(sighandler)[i] == NULL) continue;
+      ret = VG_(ksigaction)(i, NULL, &sa);
+      vg_assert(ret == 0);
+      sa.ksa_handler = VG_(sighandler)[i];
+      ret = VG_(ksigaction)(i, &sa, NULL);      
+   }
+
+   /* Finally, copy the simulated process mask to the real one. */
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &saved_procmask, NULL);
+   vg_assert(ret == 0);
+}
+
+
+/* ---------------------------------------------------------------------
+   Handle signal-related syscalls from the simulatee.
+   ------------------------------------------------------------------ */
+
+/* Do more error checking? */
+void VG_(do__NR_sigaction) ( void )
+{
+   UInt res;
+   void* our_old_handler;
+   vki_ksigaction* new_action;
+   vki_ksigaction* old_action;
+   UInt param1
+      = VG_(baseBlock)[VGOFF_(m_ebx)]; /* int sigNo */
+   UInt param2 
+      = VG_(baseBlock)[VGOFF_(m_ecx)]; /* k_sigaction* new_action */
+   UInt param3 
+      = VG_(baseBlock)[VGOFF_(m_edx)]; /* k_sigaction* old_action */
+   new_action  = (vki_ksigaction*)param2;
+   old_action  = (vki_ksigaction*)param3;
+
+   if (VG_(clo_trace_signals))
+      VG_(message)(Vg_DebugExtraMsg, 
+         "__NR_sigaction: sigNo %d, "
+         "new 0x%x, old 0x%x, new flags 0x%x",
+         param1,(UInt)new_action,(UInt)old_action,
+         (UInt)(new_action ? new_action->ksa_flags : 0) );
+   /* VG_(ppSigProcMask)(); */
+
+   if (param1 < 1 || param1 >= VKI_KNSIG) goto bad;
+
+   our_old_handler = VG_(sighandler)[param1];
+   /* VG_(printf)("old handler = 0x%x\n", our_old_handler); */
+   /* If a new handler has been specified, mess with its handler. */
+   if (new_action) {
+      if (new_action->ksa_handler == VKI_SIG_IGN ||
+          new_action->ksa_handler == VKI_SIG_DFL) {
+         VG_(sighandler)[param1] = NULL; 
+         VG_(sigpending)[param1] = NULL;
+         /* Dangerous!  Could lose signals like this. */
+      } else {
+         /* VG_(printf)("new handler = 0x%x\n", new_action->ksa_handler); */
+         /* The client isn't allowed to use an alternative signal
+            stack.  We, however, must. */
+         if ((new_action->ksa_flags & VKI_SA_ONSTACK) != 0)
+            VG_(unimplemented)
+               ("signals on an alternative stack (SA_ONSTACK)");
+         new_action->ksa_flags |= VKI_SA_ONSTACK;
+         VG_(sighandler)[param1] = new_action->ksa_handler;
+         new_action->ksa_handler = &VG_(oursignalhandler);
+      }
+   }
+
+   KERNEL_DO_SYSCALL(res);
+   /* VG_(printf)("RES = %d\n", res); */
+   /* If the client asks for the old handler, maintain our fiction
+      by stuffing in the handler it thought it asked for ... */
+   if (old_action) {
+      if (old_action->ksa_handler == VKI_SIG_IGN ||
+          old_action->ksa_handler == VKI_SIG_DFL) {
+         /* No old action; we should have a NULL handler. */
+         vg_assert(our_old_handler == NULL);
+      } else {
+         /* There's a handler. */
+         if (param1 != VKI_SIGKILL && param1 != VKI_SIGABRT) {
+            vg_assert(old_action->ksa_handler == &VG_(oursignalhandler));
+	    vg_assert((old_action->ksa_flags & VKI_SA_ONSTACK) != 0);
+         }
+         old_action->ksa_handler = our_old_handler;
+         /* Since the client is not allowed to ask for an alternative
+            sig stack, unset the bit for anything we pass back to
+            it. */
+         old_action->ksa_flags &= ~VKI_SA_ONSTACK;
+      }
+   }
+
+   VG_(ksignal)(VKI_SIGABRT, &VG_(oursignalhandler));
+   goto good;
+
+  good:
+   VG_(baseBlock)[VGOFF_(m_eax)] = (UInt)0;
+   return;
+
+  bad:
+   VG_(message)(Vg_UserMsg,
+                "Warning: bad signal number %d in __NR_sigaction.", 
+                param1);
+   VG_(baseBlock)[VGOFF_(m_eax)] = (UInt)(-1);
+   return;
+}
+
+
+/* The kernel handles sigprocmask in the usual way, but we also need
+   to inspect it, so as to spot requests to unblock signals.  We then
+   inspect vg_sigpending, which records the current state of signal
+   delivery to the client.  The problematic case is when a signal is
+   delivered to the client, in which case the relevant vg_sigpending
+   slot is set to VG_SIGRUNNING.  This inhibits further signal
+   deliveries.  This mechanism implements the POSIX requirement that a
+   signal is blocked in its own handler.
+
+   If the handler returns normally, the slot is changed back to
+   VG_SIGIDLE, so that further instances of the signal can be
+   delivered.  The problem occurs when the handler never returns, but
+   longjmps.  POSIX mandates that you then have to do an explicit
+   setprocmask to re-enable the signal.  That is what we try and spot
+   here.  Although the call is passed to the kernel, we also need to
+   spot unblocked signals whose state is VG_SIGRUNNING, and change it
+   back to VG_SIGIDLE.  
+*/
+void VG_(do__NR_sigprocmask) ( Int how, vki_ksigset_t* set )
+{
+   Int i;
+   if (VG_(clo_trace_signals))
+      VG_(message)(Vg_DebugMsg, 
+                   "vg_do__NR_sigprocmask: how = %d (%s), set = %p", 
+                   how,
+                   how==VKI_SIG_BLOCK ? "SIG_BLOCK" : (
+                      how==VKI_SIG_UNBLOCK ? "SIG_UNBLOCK" : (
+                      how==VKI_SIG_SETMASK ? "SIG_SETMASK" : "???")),
+                   set
+                  );
+
+   /* Sometimes this happens.  I don't know what it signifies. */
+   if (set == NULL) 
+      return;
+
+   /* Not interested in blocking of signals. */
+   if (how == VKI_SIG_BLOCK) 
+      return;
+
+   /* Detect and ignore unknown action. */
+   if (how != VKI_SIG_UNBLOCK && how != VKI_SIG_SETMASK) {
+      VG_(message)(Vg_DebugMsg, 
+                  "sigprocmask: unknown `how' field %d", how);
+      return;
+   }
+
+   for (i = 1; i < VKI_KNSIG; i++) {
+      Bool unblock_me = False;
+      if (how == VKI_SIG_SETMASK) {
+         if (!VG_(ksigismember)(set,i))
+            unblock_me = True;
+      } else { /* how == SIG_UNBLOCK */
+         if (VG_(ksigismember)(set,i))
+            unblock_me = True;
+      }
+      if (unblock_me && VG_(sigpending)[i] == VG_SIGRUNNING) {
+         VG_(sigpending)[i] = VG_SIGIDLE;
+	 if (VG_(clo_verbosity) > 1)
+            VG_(message)(Vg_UserMsg, 
+                         "Warning: unblocking signal %d "
+                         "due to sigprocmask", i );
+      }
+   }
+}
+
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                             vg_signals.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_startup.S b/coregrind/vg_startup.S
new file mode 100644
index 000000000..3fa965cc8
--- /dev/null
+++ b/coregrind/vg_startup.S
@@ -0,0 +1,221 @@
+
+##--------------------------------------------------------------------##
+##--- Startup and shutdown code for Valgrind.                      ---##
+##---                                                 vg_startup.S ---##
+##--------------------------------------------------------------------##
+
+/*
+  This file is part of Valgrind, an x86 protected-mode emulator 
+  designed for debugging and profiling binaries on x86-Unixes.
+
+  Copyright (C) 2000-2002 Julian Seward 
+     jseward@acm.org
+     Julian_Seward@muraroa.demon.co.uk
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_constants.h"
+
+
+#---------------------------------------------------------------------
+#
+# Startup and shutdown code for Valgrind.  Particularly hairy.
+#
+# The dynamic linker, ld.so, will run the contents of the .init
+# section, once it has located, mmap-d and and linked the shared
+# libraries needed by the program.  Valgrind is itself a shared
+# library.  ld.so then runs code in the .init sections of each
+# library in turn, in order to give them a chance to initialise
+# themselves.  We hijack this mechanism.  Our startup routine
+# does return -- and execution continues -- except on the
+# synthetic CPU, not the real one.  But ld.so, and the program
+# it is starting, cant tell the difference.
+#
+# The management apologise for the lack of apostrophes in these
+# comments.  GNU as seems to object to them, for some reason.
+
+
+.section .init
+	call VG_(startup)
+.section .fini
+	call VG_(shutdown)
+.section .text
+	
+
+
+VG_(startup):
+        # Record %esp as it was when we got here.  This is because argv/c
+	# and envp[] are passed as args to this function, and we need to see
+	# envp so we can get at the env var VG_ARGS without help from libc.
+	# The stack layout at this point depends on the version of glibc in
+	# use.  See process_cmd_line_options() in vg_main.c for details.
+        movl    %esp, VG_(esp_at_startup)
+        
+	# We have control!  Save the state of the machine in
+	# the simulators state, and switch stacks.
+	# Except ... we cant copy the machines registers into their
+	# final places in vg_baseBlock, because the offsets to them
+	# have not yet been set up.  Instead, they are copied to a
+	# temporary place (m_state_static).  In vg_main.c, once the
+	# baseBlock offsets are set up, values are copied into baseBlock.
+	movl	%eax, VG_(m_state_static)+0
+	movl	%ecx, VG_(m_state_static)+4
+	movl	%edx, VG_(m_state_static)+8
+	movl	%ebx, VG_(m_state_static)+12
+	movl	%esp, VG_(m_state_static)+16
+	movl	%ebp, VG_(m_state_static)+20
+	movl	%esi, VG_(m_state_static)+24
+	movl	%edi, VG_(m_state_static)+28
+	pushfl
+	popl	%eax
+	movl	%eax, VG_(m_state_static)+32
+	fwait
+	fnsave	VG_(m_state_static)+40
+	frstor	VG_(m_state_static)+40
+
+	# keep the first and last 10 words free to check for overruns	
+	movl	$VG_(stack)+39996 -40, %esp
+
+	# Now some real magic.  We need this procedure to return,
+	# since thats what ld.so expects, but running on the
+	# simulator.  So vg_main starts the simulator running at
+	# the insn labelled first_insn_to_simulate.
+
+	movl	$first_insn_to_simulate, VG_(m_state_static)+36
+	jmp	VG_(main)
+first_insn_to_simulate:
+	# Nothing else to do -- just return in the "normal" way.
+	ret
+
+
+
+.global VG_(shutdown)	
+VG_(shutdown):
+	# ld.so will call here after execution of the program proper
+	# is complete, to allow libraries to close down cleanly.
+	# Note that we will enter here on the synthetic CPU, not
+	# the real one!  So the interpreter must notice when this
+	# procedure is called, and use that as its cue to switch
+	# back to the real CPU.  That means the code placed here is
+	# utterly irrelevant, since it will never get run, but I
+	# place a RET here anyway, since it is the traditional way
+	# to return from a subroutine :-)
+	ret
+
+
+
+.global	VG_(switch_to_real_CPU)
+VG_(switch_to_real_CPU):
+	# Once Valgrind has decided it needs to exit, either
+	# because it has detected a call to vg_shutdown, or
+	# because the specified number of insns have been completed
+	# during a debugging run, it jumps here, which copies the
+	# simulators state into the real machine state.  Execution
+	# of the rest of the program continues on the real CPU,
+	# and there is no way for the simulator to regain control
+	# after this point.
+	frstor	VG_(m_state_static)+40
+	movl	VG_(m_state_static)+32, %eax
+	pushl	%eax
+	popfl
+	movl	VG_(m_state_static)+0, %eax
+	movl	VG_(m_state_static)+4, %ecx
+	movl	VG_(m_state_static)+8, %edx
+	movl	VG_(m_state_static)+12, %ebx
+	movl	VG_(m_state_static)+16, %esp
+	movl	VG_(m_state_static)+20, %ebp
+	movl	VG_(m_state_static)+24, %esi
+	movl	VG_(m_state_static)+28, %edi
+
+	pushal
+	pushfl
+	# We hope that vg_sigshutdown_actions does not alter
+	# the FPU state.
+	call	 VG_(sigshutdown_actions)
+	popfl
+	popal
+	# re-restore the FPU state anyway ...
+	frstor	VG_(m_state_static)+40	
+	jmp	*VG_(m_state_static)+36
+
+
+
+/*------------------------------------------------------------*/
+/*--- A function to temporarily copy %ESP/%EBP into        ---*/
+/*--- %esp/%ebp and then start up GDB.                     ---*/
+/*------------------------------------------------------------*/
+
+/*--- This is clearly not re-entrant! ---*/
+.data
+vg_ebp_saved_over_GDB_start:
+	.word	0
+vg_esp_saved_over_GDB_start:
+	.word	0
+.text
+	
+.global VG_(swizzle_esp_then_start_GDB)	
+VG_(swizzle_esp_then_start_GDB):
+	pushal
+
+	# remember the simulators current stack/frame pointers
+	movl	%ebp, vg_ebp_saved_over_GDB_start
+	movl	%esp, vg_esp_saved_over_GDB_start
+	
+	movl	$VG_(baseBlock), %ebx
+
+	# fetch %ESP into %esp
+	movl	VGOFF_(m_esp), %esi
+	movl	(%ebx, %esi, 4), %esp
+
+	### %esp now refers to clients stack
+	### mess with the clients stack to make it look as if it
+	### called this procedure, since otherwise it will look to gdb
+	### as if the top (currently executing) stack frame of the
+	### client is missing.
+	
+	# push %EIP, via %eax.  This is a faked-up return address.
+	movl	VGOFF_(m_eip), %esi
+	movl	(%ebx, %esi, 4), %eax
+	pushl	%eax
+
+	# push %EBP, via %eax.  This is a faked %ebp-chain pointer.
+	movl	VGOFF_(m_ebp), %esi
+	movl	(%ebx, %esi, 4), %eax
+	pushl	%eax
+
+	movl	%esp, %ebp
+	
+	call	VG_(start_GDB_whilst_on_client_stack)
+
+	# restore the simulators stack/frame pointer
+	movl	vg_ebp_saved_over_GDB_start, %ebp
+	movl	vg_esp_saved_over_GDB_start, %esp
+	
+	popal
+	ret
+
+# gcc puts this construction at the end of every function.  I think it
+# allows the linker to figure out the size of the function.  So we do
+# the same, in the vague hope that it might help GDBs navigation.
+.Lend_of_swizzle:
+	.size	VG_(swizzle_esp_then_start_GDB), .Lend_of_swizzle-VG_(swizzle_esp_then_start_GDB)
+
+##--------------------------------------------------------------------##
+##--- end                                             vg_startup.S ---##
+##--------------------------------------------------------------------##
diff --git a/coregrind/vg_symtab2.c b/coregrind/vg_symtab2.c
new file mode 100644
index 000000000..cfb6a58bc
--- /dev/null
+++ b/coregrind/vg_symtab2.c
@@ -0,0 +1,1435 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Management of symbols and debugging information.             ---*/
+/*---                                                 vg_symtab2.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+#include "vg_unsafe.h"
+
+#include <elf.h>          /* ELF defns                      */
+#include <a.out.h>        /* stabs defns                    */
+
+/* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
+   dlopen()ed libraries, which is something that KDE3 does a lot.
+   Still kludgey, though less than before:
+
+   * we don't check whether we should throw away some symbol tables 
+     when munmap() happens
+
+   * symbol table reading code for ELF binaries is a shambles.  
+     Use GHC's fptools/ghc/rts/Linker.c as the basis for something better.
+*/
+
+/*------------------------------------------------------------*/
+/*--- Structs n stuff                                      ---*/
+/*------------------------------------------------------------*/
+
+/* A structure to hold an ELF symbol (very crudely). */
+typedef 
+   struct { 
+      Addr addr;   /* lowest address of entity */
+      UInt size;   /* size in bytes */
+      Int  nmoff;  /* offset of name in this SegInfo's str tab */
+   }
+   RiSym;
+
+
+/* A structure to hold addr-to-source info for a single line. */
+typedef
+   struct {
+      Addr   addr;   /* lowest address for this line */
+      Int    fnmoff; /* source filename; offset in this SegInfo's str tab */
+      UShort lineno; /* source line number, or zero */
+      UShort size;   /* size in bytes; we go to a bit of trouble to
+                        catch overflows of this */
+   }
+   RiLoc;
+
+
+/* A structure which contains information pertaining to one mapped
+   text segment. */
+typedef
+   struct _SegInfo {
+      struct _SegInfo* next;
+      /* Description of the mapped segment. */
+      Addr   start;
+      UInt   size;
+      UChar* filename; /* in mallocville */
+      UInt   foffset;
+      /* An expandable array of symbols. */
+      RiSym* symtab;
+      UInt   symtab_used;
+      UInt   symtab_size;
+      /* An expandable array of locations. */
+      RiLoc* loctab;
+      UInt   loctab_used;
+      UInt   loctab_size;
+      /* An expandable array of characters -- the string table. */
+      Char*  strtab;
+      UInt   strtab_used;
+      UInt   strtab_size;
+      /* offset    is what we need to add to symbol table entries
+                   to get the real location of that symbol in memory.
+                   For executables, offset is zero.  
+                   For .so's, offset == base_addr.
+                   This seems like a giant kludge to me.
+      */
+      UInt   offset;
+   } 
+   SegInfo;
+
+
+/* -- debug helper -- */
+static void ppSegInfo ( SegInfo* si )
+{
+   VG_(printf)("name: %s\n"
+               "start %p, size %d, foffset %d\n",
+               si->filename?si->filename : (UChar*)"NULL",
+               si->start, si->size, si->foffset );
+}
+
+static void freeSegInfo ( SegInfo* si )
+{
+   vg_assert(si != NULL);
+   if (si->filename) VG_(free)(VG_AR_SYMTAB, si->filename);
+   if (si->symtab) VG_(free)(VG_AR_SYMTAB, si->symtab);
+   if (si->loctab) VG_(free)(VG_AR_SYMTAB, si->loctab);
+   if (si->strtab) VG_(free)(VG_AR_SYMTAB, si->strtab);
+   VG_(free)(VG_AR_SYMTAB, si);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Adding stuff                                         ---*/
+/*------------------------------------------------------------*/
+
+/* Add a str to the string table, including terminating zero, and
+   return offset of the string in vg_strtab. */
+
+static __inline__
+Int addStr ( SegInfo* si, Char* str )
+{
+   Char* new_tab;
+   Int   new_sz, i, space_needed;
+   
+   space_needed = 1 + VG_(strlen)(str);
+   if (si->strtab_used + space_needed > si->strtab_size) {
+      new_sz = 2 * si->strtab_size;
+      if (new_sz == 0) new_sz = 5000;
+      new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz);
+      if (si->strtab != NULL) {
+         for (i = 0; i < si->strtab_used; i++)
+            new_tab[i] = si->strtab[i];
+         VG_(free)(VG_AR_SYMTAB, si->strtab);
+      }
+      si->strtab      = new_tab;
+      si->strtab_size = new_sz;
+   }
+
+   for (i = 0; i < space_needed; i++)
+      si->strtab[si->strtab_used+i] = str[i];
+
+   si->strtab_used += space_needed;
+   vg_assert(si->strtab_used <= si->strtab_size);
+   return si->strtab_used - space_needed;
+}
+
+/* Add a symbol to the symbol table. */
+
+static __inline__
+void addSym ( SegInfo* si, RiSym* sym )
+{
+   Int    new_sz, i;
+   RiSym* new_tab;
+
+   /* Ignore zero-sized syms. */
+   if (sym->size == 0) return;
+
+   if (si->symtab_used == si->symtab_size) {
+      new_sz = 2 * si->symtab_size;
+      if (new_sz == 0) new_sz = 500;
+      new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiSym) );
+      if (si->symtab != NULL) {
+         for (i = 0; i < si->symtab_used; i++)
+            new_tab[i] = si->symtab[i];
+         VG_(free)(VG_AR_SYMTAB, si->symtab);
+      }
+      si->symtab = new_tab;
+      si->symtab_size = new_sz;
+   }
+
+   si->symtab[si->symtab_used] = *sym;
+   si->symtab_used++;
+   vg_assert(si->symtab_used <= si->symtab_size);
+}
+
+/* Add a location to the location table. */
+
+static __inline__
+void addLoc ( SegInfo* si, RiLoc* loc )
+{
+   Int    new_sz, i;
+   RiLoc* new_tab;
+
+   /* Ignore zero-sized locs. */
+   if (loc->size == 0) return;
+
+   if (si->loctab_used == si->loctab_size) {
+      new_sz = 2 * si->loctab_size;
+      if (new_sz == 0) new_sz = 500;
+      new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiLoc) );
+      if (si->loctab != NULL) {
+         for (i = 0; i < si->loctab_used; i++)
+            new_tab[i] = si->loctab[i];
+         VG_(free)(VG_AR_SYMTAB, si->loctab);
+      }
+      si->loctab = new_tab;
+      si->loctab_size = new_sz;
+   }
+
+   si->loctab[si->loctab_used] = *loc;
+   si->loctab_used++;
+   vg_assert(si->loctab_used <= si->loctab_size);
+}
+
+
+
+/*------------------------------------------------------------*/
+/*--- Helpers                                              ---*/
+/*------------------------------------------------------------*/
+
+/* Non-fatal -- use vg_panic if terminal. */
+static 
+void vg_symerr ( Char* msg )
+{
+   if (VG_(clo_verbosity) > 1)
+      VG_(message)(Vg_UserMsg,"%s", msg );
+}
+
+
+/* Print a symbol. */
+static
+void printSym ( SegInfo* si, Int i )
+{
+  VG_(printf)( "%5d:  %8p .. %8p (%d)      %s\n",
+               i,
+               si->symtab[i].addr, 
+               si->symtab[i].addr + si->symtab[i].size - 1, si->symtab[i].size,
+                &si->strtab[si->symtab[i].nmoff] );
+}
+
+
+#if 0
+/* Print the entire sym tab. */
+static __attribute__ ((unused))
+void printSymtab ( void )
+{
+   Int i;
+   VG_(printf)("\n------ BEGIN vg_symtab ------\n");
+   for (i = 0; i < vg_symtab_used; i++)
+      printSym(i);
+   VG_(printf)("------ BEGIN vg_symtab ------\n");
+}
+#endif
+
+#if 0
+/* Paranoid strcat. */
+static
+void safeCopy ( UChar* dst, UInt maxlen, UChar* src )
+{
+   UInt i = 0, j = 0;
+   while (True) {
+      if (i >= maxlen) return;
+      if (dst[i] == 0) break;
+      i++;
+   }
+   while (True) {
+      if (i >= maxlen) return;
+      dst[i] = src[j];
+      if (src[j] == 0) return;
+      i++; j++;
+   }
+}
+#endif
+
+/*------------------------------------------------------------*/
+/*--- Canonicalisers                                       ---*/
+/*------------------------------------------------------------*/
+
+/* Sort the symtab by starting address, and emit warnings if any
+   symbols have overlapping address ranges.  We use that old chestnut,
+   shellsort.  Mash the table around so as to establish the property
+   that addresses are in order and the ranges to not overlap.  This
+   facilitates using binary search to map addresses to symbols when we
+   come to query the table.
+*/
+static 
+void canonicaliseSymtab ( SegInfo* si )
+{
+   /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
+   Int   incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
+                      4592, 13776, 33936, 86961, 198768, 
+                      463792, 1391376 };
+   Int   lo = 0;
+   Int   hi = si->symtab_used-1;
+   Int   i, j, h, bigN, hp, n_merged, n_truncated;
+   RiSym v;
+   Addr  s1, s2, e1, e2;
+
+#  define SWAP(ty,aa,bb) \
+      do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0)
+
+   bigN = hi - lo + 1; if (bigN < 2) return;
+   hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
+   vg_assert(0 <= hp && hp < 16);
+
+   for (; hp >= 0; hp--) {
+      h = incs[hp];
+      i = lo + h;
+      while (1) {
+         if (i > hi) break;
+         v = si->symtab[i];
+         j = i;
+         while (si->symtab[j-h].addr > v.addr) {
+            si->symtab[j] = si->symtab[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         si->symtab[j] = v;
+         i++;
+      }
+   }
+
+  cleanup_more:
+ 
+   /* If two symbols have identical address ranges, favour the
+      one with the longer name. 
+   */
+   do {
+      n_merged = 0;
+      j = si->symtab_used;
+      si->symtab_used = 0;
+      for (i = 0; i < j; i++) {
+         if (i < j-1
+             && si->symtab[i].addr   == si->symtab[i+1].addr
+             && si->symtab[i].size   == si->symtab[i+1].size) {
+            n_merged++;
+            /* merge the two into one */
+            if (VG_(strlen)(&si->strtab[si->symtab[i].nmoff]) 
+                > VG_(strlen)(&si->strtab[si->symtab[i+1].nmoff])) {
+               si->symtab[si->symtab_used++] = si->symtab[i];
+            } else {
+               si->symtab[si->symtab_used++] = si->symtab[i+1];
+            }
+            i++;
+         } else {
+            si->symtab[si->symtab_used++] = si->symtab[i];
+         }
+      }
+      if (VG_(clo_trace_symtab))
+         VG_(printf)( "%d merged\n", n_merged);
+   }
+   while (n_merged > 0);
+
+   /* Detect and "fix" overlapping address ranges. */
+   n_truncated = 0;
+
+   for (i = 0; i < si->symtab_used-1; i++) {
+
+      vg_assert(si->symtab[i].addr <= si->symtab[i+1].addr);
+
+      /* Check for common (no overlap) case. */ 
+      if (si->symtab[i].addr + si->symtab[i].size 
+          <= si->symtab[i+1].addr)
+         continue;
+
+      /* There's an overlap.  Truncate one or the other. */
+      if (VG_(clo_trace_symtab)) {
+         VG_(printf)("overlapping address ranges in symbol table\n\t");
+         printSym(si,i);
+         VG_(printf)("\t");
+         printSym(si,i+1);
+         VG_(printf)("\n");
+      }
+
+      /* Truncate one or the other. */
+      s1 = si->symtab[i].addr;
+      s2 = si->symtab[i+1].addr;
+      e1 = s1 + si->symtab[i].size - 1;
+      e2 = s2 + si->symtab[i+1].size - 1;
+      if (s1 < s2) {
+         e1 = s2-1;
+      } else {
+         vg_assert(s1 == s2);
+         if (e1 > e2) { 
+            s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2); 
+         } else 
+         if (e1 < e2) {
+            s2 = e1+1;
+         } else {
+	   /* e1 == e2.  Identical addr ranges.  We'll eventually wind
+              up back at cleanup_more, which will take care of it. */
+	 }
+      }
+      si->symtab[i].addr   = s1;
+      si->symtab[i+1].addr = s2;
+      si->symtab[i].size   = e1 - s1 + 1;
+      si->symtab[i+1].size = e2 - s2 + 1;
+      vg_assert(s1 <= s2);
+      vg_assert(si->symtab[i].size > 0);
+      vg_assert(si->symtab[i+1].size > 0);
+      /* It may be that the i+1 entry now needs to be moved further
+         along to maintain the address order requirement. */
+      j = i+1;
+      while (j < si->symtab_used-1 
+             && si->symtab[j].addr > si->symtab[j+1].addr) {
+         SWAP(RiSym,si->symtab[j],si->symtab[j+1]);
+         j++;
+      }
+      n_truncated++;
+   }
+
+   if (n_truncated > 0) goto cleanup_more;
+
+   /* Ensure relevant postconditions hold. */
+   for (i = 0; i < si->symtab_used-1; i++) {
+      /* No zero-sized symbols. */
+      vg_assert(si->symtab[i].size > 0);
+      /* In order. */
+      vg_assert(si->symtab[i].addr < si->symtab[i+1].addr);
+      /* No overlaps. */
+      vg_assert(si->symtab[i].addr + si->symtab[i].size - 1
+                < si->symtab[i+1].addr);
+   }
+#  undef SWAP
+}
+
+
+
+/* Sort the location table by starting address.  Mash the table around
+   so as to establish the property that addresses are in order and the
+   ranges do not overlap.  This facilitates using binary search to map
+   addresses to locations when we come to query the table.  */
+static 
+void canonicaliseLoctab ( SegInfo* si )
+{
+   /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
+   Int   incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
+                      4592, 13776, 33936, 86961, 198768, 
+                      463792, 1391376 };
+   Int   lo = 0;
+   Int   hi = si->loctab_used-1;
+   Int   i, j, h, bigN, hp;
+   RiLoc v;
+
+#  define SWAP(ty,aa,bb) \
+      do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0);
+
+   /* Sort by start address. */
+
+   bigN = hi - lo + 1; if (bigN < 2) return;
+   hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
+   vg_assert(0 <= hp && hp < 16);
+
+   for (; hp >= 0; hp--) {
+      h = incs[hp];
+      i = lo + h;
+      while (1) {
+         if (i > hi) break;
+         v = si->loctab[i];
+         j = i;
+         while (si->loctab[j-h].addr > v.addr) {
+            si->loctab[j] = si->loctab[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         si->loctab[j] = v;
+         i++;
+      }
+   }
+
+   /* If two adjacent entries overlap, truncate the first. */
+   for (i = 0; i < si->loctab_used-1; i++) {
+      vg_assert(si->loctab[i].size < 10000);
+      if (si->loctab[i].addr + si->loctab[i].size > si->loctab[i+1].addr) {
+         /* Do this in signed int32 because the actual .size fields
+            are unsigned 16s. */
+         Int new_size = si->loctab[i+1].addr - si->loctab[i].addr;
+         if (new_size < 0) {
+            si->loctab[i].size = 0;
+         } else
+         if (new_size >= 65536) {
+           si->loctab[i].size = 65535;
+         } else {
+           si->loctab[i].size = (UShort)new_size;
+         }
+      }
+   }
+
+   /* Zap any zero-sized entries resulting from the truncation
+      process. */
+   j = 0;
+   for (i = 0; i < si->loctab_used; i++) {
+      if (si->loctab[i].size > 0) {
+         si->loctab[j] = si->loctab[i];
+         j++;
+      }
+   }
+   si->loctab_used = j;
+
+   /* Ensure relevant postconditions hold. */
+   for (i = 0; i < si->loctab_used-1; i++) {
+      /* 
+      VG_(printf)("%d   (%d) %d 0x%x\n", 
+                   i, si->loctab[i+1].confident, 
+                   si->loctab[i+1].size, si->loctab[i+1].addr );
+      */
+      /* No zero-sized symbols. */
+      vg_assert(si->loctab[i].size > 0);
+      /* In order. */
+      vg_assert(si->loctab[i].addr < si->loctab[i+1].addr);
+      /* No overlaps. */
+      vg_assert(si->loctab[i].addr + si->loctab[i].size - 1
+                < si->loctab[i+1].addr);
+   }
+#  undef SWAP
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Read info from a .so/exe file.                       ---*/
+/*------------------------------------------------------------*/
+
+static __inline__
+void addLineInfo ( SegInfo* si,
+                   Int      fnmoff,
+                   Addr     start,
+                   Addr     end,
+                   UInt     lineno )
+{
+   RiLoc loc;
+   UInt size = end - start + 1;
+#  if 0
+   if (size > 10000)
+   VG_(printf)( "line %4d: %p .. %p, in %s\n",
+                lineno, start, end, 
+                &si->strtab[fnmoff] );
+#  endif
+   /* Sanity ... */
+   if (size > 10000) return;
+
+   if (start >= si->start+si->size 
+       || end < si->start) return;
+
+   loc.addr      = start;
+   loc.size      = (UShort)size;
+   loc.lineno    = lineno;
+   loc.fnmoff    = fnmoff;
+   addLoc ( si, &loc );
+}
+
+
+/* Read the symbols from the object/exe specified by the SegInfo into
+   the tables within the supplied SegInfo.  */
+static
+void vg_read_lib_symbols ( SegInfo* si )
+{
+   Elf32_Ehdr*   ehdr;       /* The ELF header                          */
+   Elf32_Shdr*   shdr;       /* The section table                       */
+   UChar*        sh_strtab;  /* The section table's string table        */
+   struct nlist* stab;       /* The .stab table                         */
+   UChar*        stabstr;    /* The .stab string table                  */
+   Int           stab_sz;    /* Size in bytes of the .stab table        */
+   Int           stabstr_sz; /* Size in bytes of the .stab string table */
+   Int           fd;
+   Int           i;
+   Bool          ok;
+   Addr          oimage;
+   Int           n_oimage;
+   struct stat   stat_buf;
+
+   /* for the .stabs reader */
+   Int    curr_filenmoff;
+   Addr   curr_fnbaseaddr;
+   Addr   range_startAddr;
+   Int    range_lineno;
+
+   oimage = (Addr)NULL;
+   if (VG_(clo_verbosity) > 1)
+      VG_(message)(Vg_UserMsg, "Reading syms from %s", 
+                               si->filename );
+
+   /* mmap the object image aboard, so that we can read symbols and
+      line number info out of it.  It will be munmapped immediately
+      thereafter; it is only aboard transiently. */
+
+   i = stat(si->filename, &stat_buf);
+   if (i != 0) {
+      vg_symerr("Can't stat .so/.exe (to determine its size)?!");
+      return;
+   }
+   n_oimage = stat_buf.st_size;
+
+   fd = VG_(open_read)(si->filename);
+   if (fd == -1) {
+      vg_symerr("Can't open .so/.exe to read symbols?!");
+      return;
+   }
+
+   oimage = (Addr)VG_(mmap)( NULL, n_oimage, PROT_READ, MAP_PRIVATE, fd, 0 );
+   if (oimage == ((Addr)(-1))) {
+      VG_(message)(Vg_UserMsg,
+                   "mmap failed on %s", si->filename );
+      VG_(close)(fd);
+      return;
+   }
+
+   VG_(close)(fd);
+
+   /* Ok, the object image is safely in oimage[0 .. n_oimage-1]. 
+      Now verify that it is a valid ELF .so or executable image.
+   */
+   ok = (n_oimage >= sizeof(Elf32_Ehdr));
+   ehdr = (Elf32_Ehdr*)oimage;
+
+   if (ok) {
+      ok &= (ehdr->e_ident[EI_MAG0] == 0x7F
+             && ehdr->e_ident[EI_MAG1] == 'E'
+             && ehdr->e_ident[EI_MAG2] == 'L'
+             && ehdr->e_ident[EI_MAG3] == 'F');
+      ok &= (ehdr->e_ident[EI_CLASS] == ELFCLASS32
+             && ehdr->e_ident[EI_DATA] == ELFDATA2LSB
+             && ehdr->e_ident[EI_VERSION] == EV_CURRENT);
+      ok &= (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN);
+      ok &= (ehdr->e_machine == EM_386);
+      ok &= (ehdr->e_version == EV_CURRENT);
+      ok &= (ehdr->e_shstrndx != SHN_UNDEF);
+      ok &= (ehdr->e_shoff != 0 && ehdr->e_shnum != 0);
+   }
+
+   if (!ok) {
+      vg_symerr("Invalid ELF header, or missing stringtab/sectiontab.");
+      VG_(munmap) ( (void*)oimage, n_oimage );
+      return;
+   }
+
+   if (VG_(clo_trace_symtab))
+      VG_(printf)( 
+          "shoff = %d,  shnum = %d,  size = %d,  n_vg_oimage = %d\n",
+          ehdr->e_shoff, ehdr->e_shnum, sizeof(Elf32_Shdr), n_oimage );
+
+   if (ehdr->e_shoff + ehdr->e_shnum*sizeof(Elf32_Shdr) > n_oimage) {
+      vg_symerr("ELF section header is beyond image end?!");
+      VG_(munmap) ( (void*)oimage, n_oimage );
+      return;
+   }
+
+   shdr = (Elf32_Shdr*)(oimage + ehdr->e_shoff);
+   sh_strtab = (UChar*)(oimage + shdr[ehdr->e_shstrndx].sh_offset);
+
+   /* try and read the object's symbol table */
+   {
+      UChar*     o_strtab    = NULL;
+      Elf32_Sym* o_symtab    = NULL;
+      UInt       o_strtab_sz = 0;
+      UInt       o_symtab_sz = 0;
+
+      UChar*     o_got = NULL;
+      UChar*     o_plt = NULL;
+      UInt       o_got_sz = 0;
+      UInt       o_plt_sz = 0;
+
+      Bool       snaffle_it;
+      Addr       sym_addr;
+
+      /* find the .stabstr and .stab sections */
+      for (i = 0; i < ehdr->e_shnum; i++) {
+         if (0 == VG_(strcmp)(".symtab",sh_strtab + shdr[i].sh_name)) {
+            o_symtab    = (Elf32_Sym*)(oimage + shdr[i].sh_offset);
+            o_symtab_sz = shdr[i].sh_size;
+            vg_assert((o_symtab_sz % sizeof(Elf32_Sym)) == 0);
+            /* check image overrun here */
+         }
+         if (0 == VG_(strcmp)(".strtab",sh_strtab + shdr[i].sh_name)) {
+            o_strtab    = (UChar*)(oimage + shdr[i].sh_offset);
+            o_strtab_sz = shdr[i].sh_size;
+            /* check image overrun here */
+         }
+
+         /* find out where the .got and .plt sections will be in the
+            executable image, not in the object image transiently loaded.
+         */
+         if (0 == VG_(strcmp)(".got",sh_strtab + shdr[i].sh_name)) {
+            o_got    = (UChar*)(si->offset
+                                + shdr[i].sh_offset);
+            o_got_sz = shdr[i].sh_size;
+            /* check image overrun here */
+         }
+         if (0 == VG_(strcmp)(".plt",sh_strtab + shdr[i].sh_name)) {
+            o_plt    = (UChar*)(si->offset
+                                + shdr[i].sh_offset);
+            o_plt_sz = shdr[i].sh_size;
+            /* check image overrun here */
+         }
+
+      }
+
+      if (VG_(clo_trace_symtab)) {
+         if (o_plt) VG_(printf)( "PLT: %p .. %p\n",
+                                 o_plt, o_plt + o_plt_sz - 1 );
+         if (o_got) VG_(printf)( "GOT: %p .. %p\n",
+                                 o_got, o_got + o_got_sz - 1 );
+      }
+
+      if (o_strtab == NULL || o_symtab == NULL) {
+         vg_symerr("   object doesn't have a symbol table");
+      } else {
+         /* Perhaps should start at i = 1; ELF docs suggest that entry
+            0 always denotes `unknown symbol'. */
+         for (i = 1; i < o_symtab_sz/sizeof(Elf32_Sym); i++){
+#           if 0
+            VG_(printf)("raw symbol: ");
+            switch (ELF32_ST_BIND(o_symtab[i].st_info)) {
+               case STB_LOCAL:  VG_(printf)("LOC "); break;
+               case STB_GLOBAL: VG_(printf)("GLO "); break;
+               case STB_WEAK:   VG_(printf)("WEA "); break;
+               case STB_LOPROC: VG_(printf)("lop "); break;
+               case STB_HIPROC: VG_(printf)("hip "); break;
+               default:         VG_(printf)("??? "); break;
+            }
+            switch (ELF32_ST_TYPE(o_symtab[i].st_info)) {
+               case STT_NOTYPE:  VG_(printf)("NOT "); break;
+               case STT_OBJECT:  VG_(printf)("OBJ "); break;
+               case STT_FUNC:    VG_(printf)("FUN "); break;
+               case STT_SECTION: VG_(printf)("SEC "); break;
+               case STT_FILE:    VG_(printf)("FIL "); break;
+               case STT_LOPROC:  VG_(printf)("lop "); break;
+               case STT_HIPROC:  VG_(printf)("hip "); break;
+               default:          VG_(printf)("??? "); break;
+            }
+            VG_(printf)(
+                ": value %p, size %d, name %s\n",
+                si->offset+(UChar*)o_symtab[i].st_value,
+                o_symtab[i].st_size,
+                o_symtab[i].st_name 
+                   ? ((Char*)o_strtab+o_symtab[i].st_name) 
+                   : (Char*)"NONAME");                
+#           endif
+
+            /* Figure out if we're interested in the symbol.
+               Firstly, is it of the right flavour? 
+            */
+            snaffle_it
+               =  ( (ELF32_ST_BIND(o_symtab[i].st_info) == STB_GLOBAL ||
+                     ELF32_ST_BIND(o_symtab[i].st_info) == STB_LOCAL /* ||
+		     ELF32_ST_BIND(o_symtab[i].st_info) == STB_WEAK */)
+                    &&
+                    (ELF32_ST_TYPE(o_symtab[i].st_info) == STT_FUNC /*||
+                     ELF32_ST_TYPE(o_symtab[i].st_info) == STT_OBJECT*/)
+                  );
+
+            /* Secondly, if it's apparently in a GOT or PLT, it's really
+               a reference to a symbol defined elsewhere, so ignore it. 
+            */
+            sym_addr = si->offset
+                       + (UInt)o_symtab[i].st_value;
+            if (o_got != NULL
+                && sym_addr >= (Addr)o_got 
+                && sym_addr < (Addr)(o_got+o_got_sz)) {
+               snaffle_it = False;
+               if (VG_(clo_trace_symtab)) {
+	          VG_(printf)( "in GOT: %s\n", 
+                               o_strtab+o_symtab[i].st_name);
+               }
+            }
+            if (o_plt != NULL
+                && sym_addr >= (Addr)o_plt 
+                && sym_addr < (Addr)(o_plt+o_plt_sz)) {
+               snaffle_it = False;
+               if (VG_(clo_trace_symtab)) {
+	          VG_(printf)( "in PLT: %s\n", 
+                               o_strtab+o_symtab[i].st_name);
+               }
+            }
+
+            /* Don't bother if nameless, or zero-sized. */
+            if (snaffle_it
+                && (o_symtab[i].st_name == (Elf32_Word)NULL
+                    || /* VG_(strlen)(o_strtab+o_symtab[i].st_name) == 0 */
+                       /* equivalent but cheaper ... */
+                       * ((UChar*)(o_strtab+o_symtab[i].st_name)) == 0
+                    || o_symtab[i].st_size == 0)) {
+               snaffle_it = False;
+               if (VG_(clo_trace_symtab)) {
+	          VG_(printf)( "size=0: %s\n", 
+                               o_strtab+o_symtab[i].st_name);
+               }
+            }
+
+#           if 0
+            /* Avoid _dl_ junk.  (Why?) */
+            /* 01-02-24: disabled until I find out if it really helps. */
+            if (snaffle_it
+                && (VG_(strncmp)("_dl_", o_strtab+o_symtab[i].st_name, 4) == 0
+                    || VG_(strncmp)("_r_debug", 
+                                   o_strtab+o_symtab[i].st_name, 8) == 0)) {
+               snaffle_it = False;
+               if (VG_(clo_trace_symtab)) {
+                  VG_(printf)( "_dl_ junk: %s\n", 
+                               o_strtab+o_symtab[i].st_name);
+               }
+            }
+#           endif
+
+            /* This seems to significantly reduce the number of junk
+               symbols, and particularly reduces the number of
+               overlapping address ranges.  Don't ask me why ... */
+	    if (snaffle_it && (Int)o_symtab[i].st_value == 0) {
+               snaffle_it = False;
+               if (VG_(clo_trace_symtab)) {
+                  VG_(printf)( "valu=0: %s\n", 
+                               o_strtab+o_symtab[i].st_name);
+               }
+            }
+
+	    /* If no part of the symbol falls within the mapped range,
+               ignore it. */
+            if (sym_addr+o_symtab[i].st_size <= si->start
+                || sym_addr >= si->start+si->size) {
+               snaffle_it = False;
+	    }
+
+            if (snaffle_it) {
+               /* it's an interesting symbol; record ("snaffle") it. */
+               RiSym sym;
+               Char* t0 = o_symtab[i].st_name 
+                             ? (Char*)(o_strtab+o_symtab[i].st_name) 
+                             : (Char*)"NONAME";
+               Int nmoff = addStr ( si, t0 );
+               vg_assert(nmoff >= 0 
+                         /* && 0==VG_(strcmp)(t0,&vg_strtab[nmoff]) */ );
+               vg_assert( (Int)o_symtab[i].st_value >= 0);
+               /* VG_(printf)("%p + %d:   %s\n", si->addr, 
+                              (Int)o_symtab[i].st_value, t0 ); */
+               sym.addr  = sym_addr;
+               sym.size  = o_symtab[i].st_size;
+               sym.nmoff = nmoff;
+               addSym ( si, &sym );
+	    }
+         }
+      }
+   }
+
+   /* Reading of the "stabs" debug format information, if any. */
+   stabstr    = NULL;
+   stab       = NULL;
+   stabstr_sz = 0;
+   stab_sz    = 0;
+   /* find the .stabstr and .stab sections */
+   for (i = 0; i < ehdr->e_shnum; i++) {
+      if (0 == VG_(strcmp)(".stab",sh_strtab + shdr[i].sh_name)) {
+         stab = (struct nlist *)(oimage + shdr[i].sh_offset);
+         stab_sz = shdr[i].sh_size;
+      }
+      if (0 == VG_(strcmp)(".stabstr",sh_strtab + shdr[i].sh_name)) {
+         stabstr = (UChar*)(oimage + shdr[i].sh_offset);
+         stabstr_sz = shdr[i].sh_size;
+      }
+   }
+
+   if (stab == NULL || stabstr == NULL) {
+      vg_symerr("   object doesn't have any debug info");
+      VG_(munmap) ( (void*)oimage, n_oimage );
+      return;
+   }
+
+   if ( stab_sz + (UChar*)stab > n_oimage + (UChar*)oimage
+        || stabstr_sz + (UChar*)stabstr 
+           > n_oimage + (UChar*)oimage ) {
+      vg_symerr("   ELF debug data is beyond image end?!");
+      VG_(munmap) ( (void*)oimage, n_oimage );
+      return;
+   }
+
+   /* Ok.  It all looks plausible.  Go on and read debug data. 
+         stab kinds: 100   N_SO     a source file name
+                      68   N_SLINE  a source line number
+                      36   N_FUN ?  start of a function
+
+      In this loop, we maintain a current file name, updated
+      as N_SOs appear, and a current function base address,
+      updated as N_FUNs appear.  Based on that, address ranges
+      for N_SLINEs are calculated, and stuffed into the 
+      line info table.
+
+      N_SLINE indicates the start of a source line.  Functions are
+      delimited by N_FUNS, at the start with a non-empty string and at
+      the end with an empty string.  The latter facilitates detecting
+      where to close the last N_SLINE for a function. 
+   */
+   curr_filenmoff  = addStr(si,"???");
+   curr_fnbaseaddr = (Addr)NULL;
+   range_startAddr = 0;
+   range_lineno    = 0;
+
+   for (i = 0; i < stab_sz/(int)sizeof(struct nlist); i++) {
+#     if 0
+      VG_(printf) ( "   %2d  ", i );
+      VG_(printf) ( "type=0x%x   othr=%d   desc=%d   value=0x%x   strx=%d  %s",
+                    stab[i].n_type, stab[i].n_other, stab[i].n_desc, 
+                    (int)stab[i].n_value,
+                    (int)stab[i].n_un.n_strx, 
+                    stabstr + stab[i].n_un.n_strx );
+      VG_(printf)("\n");
+#     endif
+
+      switch (stab[i].n_type) {
+
+         case 68: { /* N_SLINE */
+            /* flush the current line, if any, and start a new one */
+            Addr range_endAddr 
+               = curr_fnbaseaddr 
+                    + (UInt)stab[i].n_value - 1;
+            if (range_startAddr != 0) {
+               addLineInfo ( si,
+                             curr_filenmoff,
+                             range_startAddr,
+                             range_endAddr,
+                             range_lineno );
+            }
+            range_startAddr = range_endAddr + 1;
+            range_lineno = stab[i].n_desc;              
+            break;
+         }
+
+         case 36: { /* N_FUN */
+            if ('\0' == * (stabstr + stab[i].n_un.n_strx) ) {
+               /* N_FUN with no name -- indicates the end of a fn.
+                  Flush the current line, if any, but don't start a
+                  new one. */
+               Addr range_endAddr 
+                  = curr_fnbaseaddr 
+                       + (UInt)stab[i].n_value - 1;
+               if (range_startAddr != 0) {
+                  addLineInfo ( si,
+                                curr_filenmoff,
+                                range_startAddr,
+                                range_endAddr,
+                                range_lineno );
+               }
+               range_startAddr = 0;
+            } else {
+               /* N_FUN with a name -- indicates the start of a fn.  */
+               curr_fnbaseaddr = si->offset
+                                 + (Addr)stab[i].n_value;
+               range_startAddr = curr_fnbaseaddr;
+            }
+            break;
+         }
+
+         case 100: /* N_SO */
+         case 132: /* N_SOL */
+         /* seems to give lots of locations in header files */
+         /* case 130: */ /* BINCL */
+         { 
+            UChar* nm = stabstr + stab[i].n_un.n_strx;
+            UInt len = VG_(strlen)(nm);
+            if (len > 0 && nm[len-1] != '/')
+               curr_filenmoff = addStr ( si, nm );
+            else
+               if (len == 0)
+                  curr_filenmoff = addStr ( si, "?1\0" );
+            break;
+         }
+
+#        if 0
+         case 162: /* EINCL */
+            curr_filenmoff = addStr ( si, "?2\0" );
+            break;
+#        endif
+
+         default:
+            break;
+      }
+   } /* for (i = 0; i < stab_sz/(int)sizeof(struct nlist); i++) */
+
+   /* Last, but not least, heave the oimage back overboard. */
+   VG_(munmap) ( (void*)oimage, n_oimage );
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Main entry point for symbols table reading.          ---*/
+/*------------------------------------------------------------*/
+
+/* The root structure for the entire symbol table system.  It is a
+   linked list of SegInfos.  Note that this entire mechanism assumes
+   that what we read from /proc/self/maps doesn't contain overlapping
+   address ranges, and as a result the SegInfos in this list describe
+   disjoint address ranges. 
+*/
+static SegInfo* segInfo = NULL;
+
+
+static
+void read_symtab_callback ( 
+        Addr start, UInt size, 
+        Char rr, Char ww, Char xx, 
+        UInt foffset, UChar* filename )
+{
+   SegInfo* si;
+
+   /* Stay sane ... */
+   if (size == 0)
+      return;
+
+   /* We're only interested in collecting symbols in executable
+      segments which are associated with a real file.  Hence: */
+   if (filename == NULL || xx != 'x')
+      return;
+   if (0 == VG_(strcmp)(filename, "/dev/zero"))
+      return;
+
+   /* Perhaps we already have this one?  If so, skip. */
+   for (si = segInfo; si != NULL; si = si->next) {
+      /*
+      if (0==VG_(strcmp)(si->filename, filename)) 
+         VG_(printf)("same fnames: %c%c%c (%p, %d) (%p, %d) %s\n", 
+                     rr,ww,xx,si->start,si->size,start,size,filename);
+      */
+      /* For some reason the observed size of a mapping can change, so
+         we don't use that to determine uniqueness. */
+      if (si->start == start
+          /* && si->size == size */
+          && 0==VG_(strcmp)(si->filename, filename)) {
+         return;
+      }
+   }
+
+   /* Get the record initialised right. */
+   si = VG_(malloc)(VG_AR_SYMTAB, sizeof(SegInfo));
+   si->next = segInfo;
+   segInfo = si;
+
+   si->start    = start;
+   si->size     = size;
+   si->foffset  = foffset;
+   si->filename = VG_(malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(filename));
+   VG_(strcpy)(si->filename, filename);
+
+   si->symtab = NULL;
+   si->symtab_size = si->symtab_used = 0;
+   si->loctab = NULL;
+   si->loctab_size = si->loctab_used = 0;
+   si->strtab = NULL;
+   si->strtab_size = si->strtab_used = 0;
+
+   /* Kludge ... */
+   si->offset 
+      = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
+
+   /* And actually fill it up. */
+   vg_read_lib_symbols ( si );
+   canonicaliseSymtab ( si );
+   canonicaliseLoctab ( si );
+}
+
+
+/* This one really is the Head Honcho.  Update the symbol tables to
+   reflect the current state of /proc/self/maps.  Rather than re-read
+   everything, just read the entries which are not already in segInfo.
+   So we can call here repeatedly, after every mmap of a non-anonymous
+   segment with execute permissions, for example, to pick up new
+   libraries as they are dlopen'd.  Conversely, when the client does
+   munmap(), vg_symtab_notify_munmap() throws away any symbol tables
+   which happen to correspond to the munmap()d area.  */
+void VG_(read_symbols) ( void )
+{
+   if (! VG_(clo_instrument)) 
+     return;
+
+   VG_(read_procselfmaps) ( read_symtab_callback );
+
+   /* Do a sanity check on the symbol tables: ensure that the address
+      space pieces they cover do not overlap (otherwise we are severely
+      hosed).  This is a quadratic algorithm, but there shouldn't be
+      many of them.  
+   */
+   { SegInfo *si, *si2;
+     for (si = segInfo; si != NULL; si = si->next) {
+        /* Check no overlap between *si and those in the rest of the
+           list. */
+        for (si2 = si->next; si2 != NULL; si2 = si2->next) {
+           Addr lo = si->start;
+           Addr hi = si->start + si->size - 1;
+           Addr lo2 = si2->start;
+           Addr hi2 = si2->start + si2->size - 1;
+           Bool overlap;
+           vg_assert(lo < hi);
+	   vg_assert(lo2 < hi2);
+           /* the main assertion */
+           overlap = (lo <= lo2 && lo2 <= hi)
+                      || (lo <= hi2 && hi2 <= hi);
+           //vg_assert(!overlap);
+	   if (overlap) {
+              VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" );
+              ppSegInfo ( si );
+              ppSegInfo ( si2 );
+              VG_(printf)("\n\n"); 
+              vg_assert(! overlap);
+	   }
+        }
+     }
+   }    
+}
+
+
+/* When an munmap() call happens, check to see whether it corresponds
+   to a segment for a .so, and if so discard the relevant SegInfo.
+   This might not be a very clever idea from the point of view of
+   accuracy of error messages, but we need to do it in order to
+   maintain the no-overlapping invariant.  
+*/
+void VG_(symtab_notify_munmap) ( Addr start, UInt length )
+{
+   SegInfo *prev, *curr;
+
+   if (! VG_(clo_instrument)) 
+     return;
+
+   prev = NULL;
+   curr = segInfo;
+   while (True) {
+      if (curr == NULL) break;
+      if (start == curr->start) break;
+      prev = curr;
+      curr = curr->next;
+   }
+   if (curr == NULL) return;
+
+   VG_(message)(Vg_UserMsg, 
+                "discard syms in %s due to munmap()", 
+                curr->filename ? curr->filename : (UChar*)"???");
+
+   vg_assert(prev == NULL || prev->next == curr);
+
+   if (prev == NULL) {
+      segInfo = curr->next;
+   } else {
+      prev->next = curr->next;
+   }
+
+   freeSegInfo(curr);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Use of symbol table & location info to create        ---*/
+/*--- plausible-looking stack dumps.                       ---*/
+/*------------------------------------------------------------*/
+
+/* Find a symbol-table index containing the specified pointer, or -1
+   if not found.  Binary search.  */
+
+static Int search_one_symtab ( SegInfo* si, Addr ptr )
+{
+   Addr a_mid_lo, a_mid_hi;
+   Int  mid, 
+        lo = 0, 
+        hi = si->symtab_used-1;
+   while (True) {
+      /* current unsearched space is from lo to hi, inclusive. */
+      if (lo > hi) return -1; /* not found */
+      mid      = (lo + hi) / 2;
+      a_mid_lo = si->symtab[mid].addr;
+      a_mid_hi = ((Addr)si->symtab[mid].addr) + si->symtab[mid].size - 1;
+
+      if (ptr < a_mid_lo) { hi = mid-1; continue; } 
+      if (ptr > a_mid_hi) { lo = mid+1; continue; }
+      vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
+      return mid;
+   }
+}
+
+
+/* Search all symtabs that we know about to locate ptr.  If found, set
+   *psi to the relevant SegInfo, and *symno to the symtab entry number
+   within that.  If not found, *psi is set to NULL.  */
+
+static void search_all_symtabs ( Addr ptr, SegInfo** psi, Int* symno )
+{
+   Int      sno;
+   SegInfo* si;
+   for (si = segInfo; si != NULL; si = si->next) {
+      if (si->start <= ptr && ptr < si->start+si->size) {
+         sno = search_one_symtab ( si, ptr );
+         if (sno == -1) goto not_found;
+         *symno = sno;
+         *psi = si;
+         return;
+      }
+   }
+  not_found:
+   *psi = NULL;
+}
+
+
+/* Find a location-table index containing the specified pointer, or -1
+   if not found.  Binary search.  */
+
+static Int search_one_loctab ( SegInfo* si, Addr ptr )
+{
+   Addr a_mid_lo, a_mid_hi;
+   Int  mid, 
+        lo = 0, 
+        hi = si->loctab_used-1;
+   while (True) {
+      /* current unsearched space is from lo to hi, inclusive. */
+      if (lo > hi) return -1; /* not found */
+      mid      = (lo + hi) / 2;
+      a_mid_lo = si->loctab[mid].addr;
+      a_mid_hi = ((Addr)si->loctab[mid].addr) + si->loctab[mid].size - 1;
+
+      if (ptr < a_mid_lo) { hi = mid-1; continue; } 
+      if (ptr > a_mid_hi) { lo = mid+1; continue; }
+      vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
+      return mid;
+   }
+}
+
+
+/* Search all loctabs that we know about to locate ptr.  If found, set
+   *psi to the relevant SegInfo, and *locno to the loctab entry number
+   within that.  If not found, *psi is set to NULL.
+*/
+static void search_all_loctabs ( Addr ptr, SegInfo** psi, Int* locno )
+{
+   Int      lno;
+   SegInfo* si;
+   for (si = segInfo; si != NULL; si = si->next) {
+      if (si->start <= ptr && ptr < si->start+si->size) {
+         lno = search_one_loctab ( si, ptr );
+         if (lno == -1) goto not_found;
+         *locno = lno;
+         *psi = si;
+         return;
+      }
+   }
+  not_found:
+   *psi = NULL;
+}
+
+
+/* The whole point of this whole big deal: map a code address to a
+   plausible symbol name.  Returns False if no idea; otherwise True.
+   Caller supplies buf and nbuf.  If no_demangle is True, don't do
+   demangling, regardless of vg_clo_demangle -- probably because the
+   call has come from vg_what_fn_or_object_is_this. */
+static
+Bool vg_what_fn_is_this ( Bool no_demangle, Addr a, 
+                          Char* buf, Int nbuf )
+{
+   SegInfo* si;
+   Int      sno;
+   search_all_symtabs ( a, &si, &sno );
+   if (si == NULL) 
+      return False;
+   if (no_demangle) {
+      VG_(strncpy_safely) 
+         ( buf, & si->strtab[si->symtab[sno].nmoff], nbuf );
+   } else {
+      VG_(demangle) ( & si->strtab[si->symtab[sno].nmoff], buf, nbuf );
+   }
+   return True;
+}
+
+
+/* Map a code address to the name of a shared object file.  Returns
+   False if no idea; otherwise False.  Caller supplies buf and
+   nbuf. */
+static
+Bool vg_what_object_is_this ( Addr a, Char* buf, Int nbuf )
+{
+   SegInfo* si;
+   for (si = segInfo; si != NULL; si = si->next) {
+      if (si->start <= a && a < si->start+si->size) {
+         VG_(strncpy_safely)(buf, si->filename, nbuf);
+         return True;
+      }
+   }
+   return False;
+}
+
+/* Return the name of an erring fn in a way which is useful
+   for comparing against the contents of a suppressions file. 
+   Always writes something to buf.  Also, doesn't demangle the
+   name, because we want to refer to mangled names in the 
+   suppressions file.
+*/
+void VG_(what_obj_and_fun_is_this) ( Addr a,
+                                     Char* obj_buf, Int n_obj_buf,
+                                     Char* fun_buf, Int n_fun_buf )
+{
+   (void)vg_what_object_is_this ( a, obj_buf, n_obj_buf );
+   (void)vg_what_fn_is_this ( True, a, fun_buf, n_fun_buf );
+}
+
+
+/* Map a code address to a (filename, line number) pair.  
+   Returns True if successful.
+*/
+static
+Bool vg_what_line_is_this ( Addr a, 
+                            UChar* filename, Int n_filename, 
+                            UInt* lineno )
+{
+   SegInfo* si;
+   Int      locno;
+   search_all_loctabs ( a, &si, &locno );
+   if (si == NULL) 
+      return False;
+   VG_(strncpy_safely)(filename, & si->strtab[si->loctab[locno].fnmoff], 
+                       n_filename);
+   *lineno = si->loctab[locno].lineno;
+   return True;
+}
+
+
+/* Print a mini stack dump, showing the current location. */
+void VG_(mini_stack_dump) ( ExeContext* ec )
+{
+
+#define APPEND(str)                                              \
+   { UChar* sss;                                                 \
+     for (sss = str; n < M_VG_ERRTXT-1 && *sss != 0; n++,sss++)  \
+        buf[n] = *sss;                                           \
+     buf[n] = 0;                                                 \
+   }
+
+   Bool   know_fnname;
+   Bool   know_objname;
+   Bool   know_srcloc;
+   UInt   lineno; 
+   UChar  ibuf[20];
+   UInt   i, n, clueless;
+
+   UChar  buf[M_VG_ERRTXT];
+   UChar  buf_fn[M_VG_ERRTXT];
+   UChar  buf_obj[M_VG_ERRTXT];
+   UChar  buf_srcloc[M_VG_ERRTXT];
+
+   Int stop_at = VG_(clo_backtrace_size);
+
+   n = 0;
+
+   know_fnname  = vg_what_fn_is_this(False,ec->eips[0], buf_fn, M_VG_ERRTXT);
+   know_objname = vg_what_object_is_this(ec->eips[0], buf_obj, M_VG_ERRTXT);
+   know_srcloc  = vg_what_line_is_this(ec->eips[0], 
+                                       buf_srcloc, M_VG_ERRTXT, 
+                                       &lineno);
+
+   APPEND("   at ");
+   VG_(sprintf)(ibuf,"0x%x: ", ec->eips[0]);
+   APPEND(ibuf);
+   if (know_fnname) { 
+      APPEND(buf_fn);
+      if (!know_srcloc && know_objname) {
+         APPEND(" (in ");
+         APPEND(buf_obj);
+         APPEND(")");
+      }
+   } else if (know_objname && !know_srcloc) {
+      APPEND("(within ");
+      APPEND(buf_obj);
+      APPEND(")");
+   } else {
+      APPEND("???");
+   }
+   if (know_srcloc) {
+      APPEND(" (");
+      APPEND(buf_srcloc);
+      APPEND(":");
+      VG_(sprintf)(ibuf,"%d",lineno);
+      APPEND(ibuf);
+      APPEND(")");
+   }
+   VG_(message)(Vg_UserMsg, "%s", buf);
+
+   clueless = 0;
+   for (i = 1; i < stop_at; i++) {
+      know_fnname  = vg_what_fn_is_this(False,ec->eips[i], buf_fn, M_VG_ERRTXT);
+      know_objname = vg_what_object_is_this(ec->eips[i],buf_obj, M_VG_ERRTXT);
+      know_srcloc  = vg_what_line_is_this(ec->eips[i], 
+                                          buf_srcloc, M_VG_ERRTXT, 
+                                          &lineno);
+      n = 0;
+      APPEND("   by ");
+      if (ec->eips[i] == 0) {
+         APPEND("<bogus frame pointer> ");
+      } else {
+         VG_(sprintf)(ibuf,"0x%x: ",ec->eips[i]);
+         APPEND(ibuf);
+      }
+      if (know_fnname) { 
+         APPEND(buf_fn) 
+         if (!know_srcloc && know_objname) {
+            APPEND(" (in ");
+            APPEND(buf_obj);
+            APPEND(")");
+         }
+      } else {
+         if (know_objname && !know_srcloc) {
+            APPEND("(within ");
+            APPEND(buf_obj);
+            APPEND(")"); 
+         } else {
+            APPEND("???");
+         }
+         if (!know_srcloc) clueless++;
+         if (clueless == 2)
+            i = stop_at; /* force exit after this iteration */
+      };
+      if (know_srcloc) {
+         APPEND(" (");
+         APPEND(buf_srcloc);
+         APPEND(":");
+         VG_(sprintf)(ibuf,"%d",lineno);
+         APPEND(ibuf);
+         APPEND(")");
+      }
+      VG_(message)(Vg_UserMsg, "%s", buf);
+   }   
+}
+
+#undef APPEND
+
+/*--------------------------------------------------------------------*/
+/*--- end                                             vg_symtab2.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_syscall.S b/coregrind/vg_syscall.S
new file mode 100644
index 000000000..210328a69
--- /dev/null
+++ b/coregrind/vg_syscall.S
@@ -0,0 +1,179 @@
+
+##--------------------------------------------------------------------##
+##--- Support for doing system calls.                              ---##
+##---                                                 vg_syscall.S ---##
+##--------------------------------------------------------------------##
+
+/*
+  This file is part of Valgrind, an x86 protected-mode emulator 
+  designed for debugging and profiling binaries on x86-Unixes.
+
+  Copyright (C) 2000-2002 Julian Seward 
+     jseward@acm.org
+     Julian_Seward@muraroa.demon.co.uk
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_constants.h"
+
+
+.globl	VG_(do_syscall)
+
+# NOTE that this routine expects the simulated machines state
+# to be in m_state_static.  Therefore it needs to be wrapped by
+# code which copies from baseBlock before the call, into
+# m_state_static, and back afterwards.
+	
+VG_(do_syscall):
+	cmpl	$2, VG_(syscall_depth)
+	jz	do_syscall_DEPTH_2
+
+	# depth 1 copy follows ...
+	# Save all the int registers of the real machines state on the
+	# simulators stack.
+	pushal
+
+	# and save the real FPU state too
+	fwait
+	fnsave	VG_(real_fpu_state_saved_over_syscall_d1)
+	frstor	VG_(real_fpu_state_saved_over_syscall_d1)
+
+	# remember what the simulators stack pointer is
+	movl	%esp, VG_(esp_saved_over_syscall_d1)
+	
+	# Now copy the simulated machines state into the real one
+	# esp still refers to the simulators stack
+	frstor	VG_(m_state_static)+40
+	movl	VG_(m_state_static)+32, %eax
+	pushl	%eax
+	popfl
+	movl	VG_(m_state_static)+0, %eax
+	movl	VG_(m_state_static)+4, %ecx
+	movl	VG_(m_state_static)+8, %edx
+	movl	VG_(m_state_static)+12, %ebx
+	movl	VG_(m_state_static)+16, %esp
+	movl	VG_(m_state_static)+20, %ebp
+	movl	VG_(m_state_static)+24, %esi
+	movl	VG_(m_state_static)+28, %edi
+
+	# esp now refers to the simulatees stack
+	# Do the actual system call
+	int	$0x80
+
+	# restore stack as soon as possible
+	# esp refers to simulatees stack
+	movl	%esp, VG_(m_state_static)+16
+	movl	VG_(esp_saved_over_syscall_d1), %esp
+	# esp refers to simulators stack
+
+	# ... and undo everything else.  
+	# Copy real state back to simulated state.	
+	movl	%eax, VG_(m_state_static)+0
+	movl	%ecx, VG_(m_state_static)+4
+	movl	%edx, VG_(m_state_static)+8
+	movl	%ebx, VG_(m_state_static)+12
+	movl	%ebp, VG_(m_state_static)+20
+	movl	%esi, VG_(m_state_static)+24
+	movl	%edi, VG_(m_state_static)+28
+	pushfl
+	popl	%eax
+	movl	%eax, VG_(m_state_static)+32
+	fwait
+	fnsave	VG_(m_state_static)+40
+	frstor	VG_(m_state_static)+40
+
+	# Restore the state of the simulator
+	frstor	VG_(real_fpu_state_saved_over_syscall_d1)
+	popal
+
+	ret
+
+
+
+
+
+
+
+
+do_syscall_DEPTH_2:
+
+	# depth 2 copy follows ...
+	# Save all the int registers of the real machines state on the
+	# simulators stack.
+	pushal
+
+	# and save the real FPU state too
+	fwait
+	fnsave	VG_(real_fpu_state_saved_over_syscall_d2)
+	frstor	VG_(real_fpu_state_saved_over_syscall_d2)
+
+	# remember what the simulators stack pointer is
+	movl	%esp, VG_(esp_saved_over_syscall_d2)
+	
+	# Now copy the simulated machines state into the real one
+	# esp still refers to the simulators stack
+	frstor	VG_(m_state_static)+40
+	movl	VG_(m_state_static)+32, %eax
+	pushl	%eax
+	popfl
+	movl	VG_(m_state_static)+0, %eax
+	movl	VG_(m_state_static)+4, %ecx
+	movl	VG_(m_state_static)+8, %edx
+	movl	VG_(m_state_static)+12, %ebx
+	movl	VG_(m_state_static)+16, %esp
+	movl	VG_(m_state_static)+20, %ebp
+	movl	VG_(m_state_static)+24, %esi
+	movl	VG_(m_state_static)+28, %edi
+
+	# esp now refers to the simulatees stack
+	# Do the actual system call
+	int	$0x80
+
+	# restore stack as soon as possible
+	# esp refers to simulatees stack
+	movl	%esp, VG_(m_state_static)+16
+	movl	VG_(esp_saved_over_syscall_d2), %esp
+	# esp refers to simulators stack
+
+	# ... and undo everything else.  
+	# Copy real state back to simulated state.	
+	movl	%eax, VG_(m_state_static)+0
+	movl	%ecx, VG_(m_state_static)+4
+	movl	%edx, VG_(m_state_static)+8
+	movl	%ebx, VG_(m_state_static)+12
+	movl	%ebp, VG_(m_state_static)+20
+	movl	%esi, VG_(m_state_static)+24
+	movl	%edi, VG_(m_state_static)+28
+	pushfl
+	popl	%eax
+	movl	%eax, VG_(m_state_static)+32
+	fwait
+	fnsave	VG_(m_state_static)+40
+	frstor	VG_(m_state_static)+40
+
+	# Restore the state of the simulator
+	frstor	VG_(real_fpu_state_saved_over_syscall_d2)
+	popal
+
+	ret
+
+
+##--------------------------------------------------------------------##
+##--- end                                             vg_syscall.S ---##
+##--------------------------------------------------------------------##
diff --git a/coregrind/vg_to_ucode.c b/coregrind/vg_to_ucode.c
new file mode 100644
index 000000000..f31214889
--- /dev/null
+++ b/coregrind/vg_to_ucode.c
@@ -0,0 +1,4309 @@
+
+/*--------------------------------------------------------------------*/
+/*--- The JITter: translate x86 code to ucode.                     ---*/
+/*---                                                vg_to_ucode.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Renamings of frequently-used global functions.       ---*/
+/*------------------------------------------------------------*/
+
+#define uInstr0   VG_(newUInstr0)
+#define uInstr1   VG_(newUInstr1)
+#define uInstr2   VG_(newUInstr2)
+#define uInstr3   VG_(newUInstr3)
+#define dis       VG_(disassemble)
+#define nameIReg  VG_(nameOfIntReg)
+#define nameISize VG_(nameOfIntSize)
+#define newTemp   VG_(getNewTemp)
+#define uLiteral  VG_(setLiteralField)
+
+
+/*------------------------------------------------------------*/
+/*--- Here so it can be inlined everywhere.                ---*/
+/*------------------------------------------------------------*/
+
+/* Allocate a new temp reg number. */
+__inline__ Int VG_(getNewTemp) ( UCodeBlock* cb )
+{
+   Int t = cb->nextTemp;
+   cb->nextTemp += 2;
+   return t;
+}
+
+Int VG_(getNewShadow) ( UCodeBlock* cb )
+{
+   Int t = cb->nextTemp;
+   cb->nextTemp += 2;
+   return SHADOW(t);
+}
+
+/* Handy predicates. */
+#define SMC_IF_SOME(cb)                              \
+   do {                                              \
+      if (VG_(clo_smc_check) >= VG_CLO_SMC_SOME) {   \
+           LAST_UINSTR((cb)).smc_check = True;       \
+      }                                              \
+   } while (0)
+
+#define SMC_IF_ALL(cb)                               \
+   do {                                              \
+      if (VG_(clo_smc_check) == VG_CLO_SMC_ALL) {    \
+         LAST_UINSTR((cb)).smc_check = True;         \
+      }                                              \
+   } while (0)
+
+
+/*------------------------------------------------------------*/
+/*--- Helper bits and pieces for deconstructing the        ---*/
+/*--- x86 insn stream.                                     ---*/
+/*------------------------------------------------------------*/
+
+static Char* nameGrp1 ( Int opc_aux )
+{
+   static Char* grp1_names[8] 
+     = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
+   if (opc_aux < 0 || opc_aux > 7) VG_(panic)("nameGrp1");
+   return grp1_names[opc_aux];
+}
+
+static Char* nameGrp2 ( Int opc_aux )
+{
+   static Char* grp2_names[8] 
+     = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
+   if (opc_aux < 0 || opc_aux > 7) VG_(panic)("nameGrp2");
+   return grp2_names[opc_aux];
+}
+
+static Char* nameGrp4 ( Int opc_aux )
+{
+   static Char* grp4_names[8] 
+     = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
+   if (opc_aux < 0 || opc_aux > 1) VG_(panic)("nameGrp4");
+   return grp4_names[opc_aux];
+}
+
+static Char* nameGrp5 ( Int opc_aux )
+{
+   static Char* grp5_names[8] 
+     = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
+   if (opc_aux < 0 || opc_aux > 6) VG_(panic)("nameGrp5");
+   return grp5_names[opc_aux];
+}
+
+static Char* nameGrp8 ( Int opc_aux )
+{
+   static Char* grp8_names[8] 
+     = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
+   if (opc_aux < 4 || opc_aux > 7) VG_(panic)("nameGrp8");
+   return grp8_names[opc_aux];
+}
+
+Char* VG_(nameOfIntReg) ( Int size, Int reg )
+{
+   static Char* ireg32_names[8] 
+     = { "%eax", "%ecx", "%edx", "%ebx", 
+         "%esp", "%ebp", "%esi", "%edi" };
+   static Char* ireg16_names[8] 
+     = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" };
+   static Char* ireg8_names[8] 
+     = { "%al", "%cl", "%dl", "%bl", "%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" };
+   if (reg < 0 || reg > 7) goto bad;
+   switch (size) {
+      case 4: return ireg32_names[reg];
+      case 2: return ireg16_names[reg];
+      case 1: return ireg8_names[reg];
+   }
+  bad:
+   VG_(panic)("nameOfIntReg");
+   return NULL; /*notreached*/
+}
+
+Char VG_(nameOfIntSize) ( Int size )
+{
+   switch (size) {
+      case 4: return 'l';
+      case 2: return 'w';
+      case 1: return 'b';
+      default: VG_(panic)("nameOfIntSize");
+   }
+}
+
+__inline__ UInt VG_(extend_s_8to32) ( UInt x )
+{
+   return (UInt)((((Int)x) << 24) >> 24);
+}
+
+__inline__ static UInt extend_s_16to32 ( UInt x )
+{
+   return (UInt)((((Int)x) << 16) >> 16);
+}
+
+
+/* Get a byte value out of the insn stream and sign-extend to 32
+   bits. */
+__inline__ static UInt getSDisp8 ( Addr eip0 )
+{
+   UChar* eip = (UChar*)eip0;
+   return VG_(extend_s_8to32)( (UInt) (eip[0]) );
+}
+
+__inline__ static UInt getSDisp16 ( Addr eip0 )
+{
+   UChar* eip = (UChar*)eip0;
+   UInt d = *eip++;
+   d |= ((*eip++) << 8);
+   return extend_s_16to32(d);
+}
+
+/* Get a 32-bit value out of the insn stream. */
+__inline__ static UInt getUDisp32 ( Addr eip0 )
+{
+   UChar* eip = (UChar*)eip0;
+   UInt v = eip[3]; v <<= 8;
+   v |= eip[2]; v <<= 8;
+   v |= eip[1]; v <<= 8;
+   v |= eip[0];
+   return v;
+}
+
+__inline__ static UInt getUDisp16 ( Addr eip0 )
+{
+   UChar* eip = (UChar*)eip0;
+   UInt v = eip[1]; v <<= 8;
+   v |= eip[0];
+   return v;
+}
+
+__inline__ static UChar getUChar ( Addr eip0 )
+{
+   UChar* eip = (UChar*)eip0;
+   return eip[0];
+}
+
+__inline__ static UInt LOW24 ( UInt x )
+{
+   return x & 0x00FFFFFF;
+}
+
+__inline__ static UInt HI8 ( UInt x )
+{
+   return x >> 24;
+}
+
+__inline__ static UInt getUDisp ( Int size, Addr eip )
+{
+   switch (size) {
+      case 4: return getUDisp32(eip);
+      case 2: return getUDisp16(eip);
+      case 1: return getUChar(eip);
+      default: VG_(panic)("getUDisp");
+  }
+  return 0; /*notreached*/
+}
+
+__inline__ static UInt getSDisp ( Int size, Addr eip )
+{
+   switch (size) {
+      case 4: return getUDisp32(eip);
+      case 2: return getSDisp16(eip);
+      case 1: return getSDisp8(eip);
+      default: VG_(panic)("getUDisp");
+  }
+  return 0; /*notreached*/
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Flag-related helpers.                                ---*/
+/*------------------------------------------------------------*/
+
+/* For the last uinsn inserted into cb, set the read, written and
+   undefined flags.  Undefined flags are counted as written, but it
+   seems worthwhile to distinguish them. 
+*/
+static __inline__ void uFlagsRWU ( UCodeBlock* cb,
+                                   FlagSet rr, FlagSet ww, FlagSet uu )
+{
+   VG_(setFlagRW)(
+      &LAST_UINSTR(cb), rr, VG_UNION_FLAG_SETS(ww,uu)
+   );
+}
+
+
+static void setFlagsFromUOpcode ( UCodeBlock* cb, Int uopc )
+{
+   switch (uopc) {
+      case XOR: case OR: case AND:
+         uFlagsRWU(cb, FlagsEmpty, FlagsOSZCP,  FlagA); break;
+      case ADC: case SBB: 
+         uFlagsRWU(cb, FlagC,      FlagsOSZACP, FlagsEmpty); break;
+      case ADD: case SUB: case NEG: 
+         uFlagsRWU(cb, FlagsEmpty, FlagsOSZACP, FlagsEmpty); break;
+      case INC: case DEC:
+         uFlagsRWU(cb, FlagsEmpty, FlagsOSZAP,  FlagsEmpty); break;
+      case SHR: case SAR: case SHL:
+         uFlagsRWU(cb, FlagsEmpty, FlagsOSZCP,  FlagA); break;
+      case ROL: case ROR:
+         uFlagsRWU(cb, FlagsEmpty, FlagsOC,     FlagsEmpty); break;
+      case RCR: case RCL: 
+         uFlagsRWU(cb, FlagC,      FlagsOC,     FlagsEmpty); break;
+      case NOT:
+         uFlagsRWU(cb, FlagsEmpty, FlagsEmpty,  FlagsEmpty); break;
+      default: 
+         VG_(printf)("unhandled case is %s\n", 
+                     VG_(nameUOpcode)(True, uopc));
+         VG_(panic)("setFlagsFromUOpcode: unhandled case");
+   }
+}
+
+static __inline__ void uCond ( UCodeBlock* cb, Condcode cond )
+{
+   LAST_UINSTR(cb).cond = cond;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Disassembling addressing modes                       ---*/
+/*------------------------------------------------------------*/
+
+/* Generate ucode to calculate an address indicated by a ModRM and
+   following SIB bytes, getting the value in a new temporary.  The
+   temporary, and the number of bytes in the address mode, are
+   returned, as a pair (length << 8) | temp.  Note that this fn should
+   not be called if the R/M part of the address denotes a register
+   instead of memory.  If buf is non-NULL, text of the addressing mode
+   is placed therein. */
+
+static UInt disAMode ( UCodeBlock* cb, Addr eip0, UChar* buf )
+{
+   UChar* eip        = (UChar*)eip0;
+   UChar  mod_reg_rm = *eip++;
+   Int    tmp        = newTemp(cb);
+
+   /* squeeze out the reg field from mod_reg_rm, since a 256-entry
+      jump table seems a bit excessive. 
+   */
+   mod_reg_rm &= 0xC7;               /* is now XX000YYY */
+   mod_reg_rm |= (mod_reg_rm >> 3);  /* is now XX0XXYYY */
+   mod_reg_rm &= 0x1F;               /* is now 000XXYYY */
+   switch (mod_reg_rm) {
+
+      /* (%eax) .. (%edi), not including (%esp) or (%ebp).
+         --> GET %reg, t 
+      */
+      case 0x00: case 0x01: case 0x02: case 0x03: 
+      /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
+         { UChar rm  = mod_reg_rm;
+           uInstr2(cb, GET, 4, ArchReg, rm,  TempReg, tmp);
+           if (buf) VG_(sprintf)(buf,"(%s)", nameIReg(4,rm));
+           return (1<<24 | tmp);
+         }
+
+      /* d8(%eax) ... d8(%edi), not including d8(%esp) 
+         --> GET %reg, t ; ADDL d8, t
+      */
+      case 0x08: case 0x09: case 0x0A: case 0x0B: 
+      /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
+         { UChar rm  = mod_reg_rm & 7;
+           Int   tmq = newTemp(cb);
+           UInt  d   = getSDisp8((Addr)eip); eip++;
+           uInstr2(cb, GET,  4, ArchReg, rm,  TempReg, tmq);
+           uInstr2(cb, LEA1, 4, TempReg, tmq, TempReg, tmp);
+           LAST_UINSTR(cb).lit32 = d;
+           if (buf) VG_(sprintf)(buf,"%d(%s)", d, nameIReg(4,rm));
+           return (2<<24 | tmp);
+         }
+
+      /* d32(%eax) ... d32(%edi), not including d32(%esp)
+         --> GET %reg, t ; ADDL d8, t
+      */
+      case 0x10: case 0x11: case 0x12: case 0x13: 
+      /* ! 14 */ case 0x15: case 0x16: case 0x17:
+         { UChar rm  = mod_reg_rm & 7;
+           Int   tmq = newTemp(cb);
+           UInt  d   = getUDisp32((Addr)eip); eip += 4;
+           uInstr2(cb, GET,  4, ArchReg, rm,  TempReg, tmq);
+           uInstr2(cb, LEA1, 4, TempReg, tmq, TempReg, tmp);
+           LAST_UINSTR(cb).lit32 = d;
+           if (buf) VG_(sprintf)(buf,"0x%x(%s)", d, nameIReg(4,rm));
+           return (5<<24 | tmp);
+         }
+
+      /* a register, %eax .. %edi.  This shouldn't happen. */
+      case 0x18: case 0x19: case 0x1A: case 0x1B:
+      case 0x1C: case 0x1D: case 0x1E: case 0x1F:
+         VG_(panic)("disAMode: not an addr!");
+
+      /* a 32-bit literal address
+         --> MOV d32, tmp 
+      */
+      case 0x05: 
+         { UInt d = getUDisp32((Addr)eip); eip += 4;
+           uInstr2(cb, MOV, 4, Literal, 0, TempReg, tmp);
+           uLiteral(cb, d);
+           if (buf) VG_(sprintf)(buf,"(0x%x)", d);
+           return (5<<24 | tmp);
+         }
+
+      case 0x04: {
+         /* SIB, with no displacement.  Special cases:
+            -- %esp cannot act as an index value.  
+               If index_r indicates %esp, zero is used for the index.
+            -- when mod is zero and base indicates EBP, base is instead
+               a 32-bit literal.
+            It's all madness, I tell you.  Extract %index, %base and 
+            scale from the SIB byte.  The value denoted is then:
+               | %index == %ESP && %base == %EBP
+               = d32 following SIB byte
+               | %index == %ESP && %base != %EBP
+               = %base
+               | %index != %ESP && %base == %EBP
+               = d32 following SIB byte + (%index << scale)
+               | %index != %ESP && %base != %ESP
+               = %base + (%index << scale)
+
+            What happens to the souls of CPU architects who dream up such
+            horrendous schemes, do you suppose?  
+         */
+         UChar sib     = *eip++;
+         UChar scale   = (sib >> 6) & 3;
+         UChar index_r = (sib >> 3) & 7;
+         UChar base_r  = sib & 7;
+
+         if (index_r != R_ESP && base_r != R_EBP) {
+            Int index_tmp = newTemp(cb);
+            Int base_tmp  = newTemp(cb);
+            uInstr2(cb, GET,  4, ArchReg, index_r,  TempReg, index_tmp);
+            uInstr2(cb, GET,  4, ArchReg, base_r,   TempReg, base_tmp);
+            uInstr3(cb, LEA2, 4, TempReg, base_tmp, TempReg, index_tmp, 
+                                 TempReg, tmp);
+            LAST_UINSTR(cb).lit32   = 0;
+            LAST_UINSTR(cb).extra4b = 1 << scale;
+            if (buf) VG_(sprintf)(buf,"(%s,%s,%d)", nameIReg(4,base_r),
+                                  nameIReg(4,index_r),1<<scale);
+            return (2<<24 | tmp);
+         }
+
+         if (index_r != R_ESP && base_r == R_EBP) {
+            Int index_tmp = newTemp(cb);
+            UInt d = getUDisp32((Addr)eip); eip += 4;
+            uInstr2(cb, GET,  4, ArchReg, index_r,  TempReg, index_tmp);
+            uInstr2(cb, MOV,  4, Literal, 0,        TempReg, tmp);
+            uLiteral(cb, 0);
+            uInstr3(cb, LEA2, 4, TempReg, tmp,      TempReg, index_tmp, 
+                                 TempReg, tmp);
+            LAST_UINSTR(cb).lit32   = d;
+            LAST_UINSTR(cb).extra4b = 1 << scale;
+            if (buf) VG_(sprintf)(buf,"0x%x(,%s,%d)", d, 
+                                  nameIReg(4,index_r),1<<scale);
+            return (6<<24 | tmp);
+         }
+
+         if (index_r == R_ESP && base_r != R_EBP) {
+            uInstr2(cb, GET, 4, ArchReg, base_r, TempReg, tmp);
+            if (buf) VG_(sprintf)(buf,"(%s,,)", nameIReg(4,base_r));
+            return (2<<24 | tmp);
+         }
+
+         if (index_r == R_ESP && base_r == R_EBP) {
+            UInt d = getUDisp32((Addr)eip); eip += 4;
+            uInstr2(cb, MOV, 4, Literal, 0, TempReg, tmp);
+	    uLiteral(cb, d);
+            if (buf) VG_(sprintf)(buf,"0x%x()", d);
+            return (6<<24 | tmp);
+         }
+
+         vg_assert(0);
+      }
+
+      /* SIB, with 8-bit displacement.  Special cases:
+         -- %esp cannot act as an index value.  
+            If index_r indicates %esp, zero is used for the index.
+         Denoted value is:
+            | %index == %ESP
+            = d8 + %base
+            | %index != %ESP
+            = d8 + %base + (%index << scale)
+      */
+      case 0x0C: {
+         UChar sib     = *eip++;
+         UChar scale   = (sib >> 6) & 3;
+         UChar index_r = (sib >> 3) & 7;
+         UChar base_r  = sib & 7;
+         UInt d        = getSDisp8((Addr)eip); eip++;
+
+         if (index_r == R_ESP) {
+            Int tmq = newTemp(cb);
+            uInstr2(cb, GET,  4, ArchReg, base_r,  TempReg, tmq);
+            uInstr2(cb, LEA1, 4, TempReg, tmq, TempReg, tmp);
+            LAST_UINSTR(cb).lit32 = d;
+            if (buf) VG_(sprintf)(buf,"%d(%s,,)", d, nameIReg(4,base_r));
+            return (3<<24 | tmp);
+         } else {
+            Int index_tmp = newTemp(cb);
+            Int base_tmp  = newTemp(cb);
+            uInstr2(cb, GET, 4,  ArchReg, index_r,  TempReg, index_tmp);
+            uInstr2(cb, GET, 4,  ArchReg, base_r,   TempReg, base_tmp);
+            uInstr3(cb, LEA2, 4, TempReg, base_tmp, TempReg, index_tmp, 
+                                 TempReg, tmp);
+            LAST_UINSTR(cb).lit32   = d;
+            LAST_UINSTR(cb).extra4b = 1 << scale;
+            if (buf) VG_(sprintf)(buf,"%d(%s,%s,%d)", d, nameIReg(4,base_r), 
+                                  nameIReg(4,index_r), 1<<scale);
+            return (3<<24 | tmp);
+         }
+         vg_assert(0);
+      }
+
+      /* SIB, with 32-bit displacement.  Special cases:
+         -- %esp cannot act as an index value.  
+            If index_r indicates %esp, zero is used for the index.
+         Denoted value is:
+            | %index == %ESP
+            = d32 + %base
+            | %index != %ESP
+            = d32 + %base + (%index << scale)
+      */
+      case 0x14: {
+         UChar sib     = *eip++;
+         UChar scale   = (sib >> 6) & 3;
+         UChar index_r = (sib >> 3) & 7;
+         UChar base_r  = sib & 7;
+         UInt d        = getUDisp32((Addr)eip); eip += 4;
+
+         if (index_r == R_ESP) {
+            Int tmq = newTemp(cb);
+            uInstr2(cb, GET,  4, ArchReg, base_r,  TempReg, tmq);
+            uInstr2(cb, LEA1, 4, TempReg, tmq, TempReg, tmp);
+            LAST_UINSTR(cb).lit32 = d;
+            if (buf) VG_(sprintf)(buf,"%d(%s,,)", d, nameIReg(4,base_r));
+            return (6<<24 | tmp);
+         } else {
+            Int index_tmp = newTemp(cb);
+            Int base_tmp = newTemp(cb);
+            uInstr2(cb, GET,  4, ArchReg, index_r, TempReg, index_tmp);
+            uInstr2(cb, GET,  4, ArchReg, base_r, TempReg, base_tmp);
+            uInstr3(cb, LEA2, 4, TempReg, base_tmp, TempReg, index_tmp, 
+                                 TempReg, tmp);
+            LAST_UINSTR(cb).lit32   = d;
+            LAST_UINSTR(cb).extra4b = 1 << scale;
+            if (buf) VG_(sprintf)(buf,"%d(%s,%s,%d)", d, nameIReg(4,base_r), 
+                                  nameIReg(4,index_r), 1<<scale);
+            return (6<<24 | tmp);
+         }
+         vg_assert(0);
+      }
+
+      default:
+         VG_(panic)("disAMode");
+         return 0; /*notreached*/
+   }
+}
+
+
+/* Figure out the number of (insn-stream) bytes constituting the amode
+   beginning at eip0.  Is useful for getting hold of literals beyond
+   the end of the amode before it has been disassembled.  */
+
+static UInt lengthAMode ( Addr eip0 )
+{
+   UChar* eip        = (UChar*)eip0;
+   UChar  mod_reg_rm = *eip++;
+
+   /* squeeze out the reg field from mod_reg_rm, since a 256-entry
+      jump table seems a bit excessive. 
+   */
+   mod_reg_rm &= 0xC7;               /* is now XX000YYY */
+   mod_reg_rm |= (mod_reg_rm >> 3);  /* is now XX0XXYYY */
+   mod_reg_rm &= 0x1F;               /* is now 000XXYYY */
+   switch (mod_reg_rm) {
+
+      /* (%eax) .. (%edi), not including (%esp) or (%ebp). */
+      case 0x00: case 0x01: case 0x02: case 0x03: 
+      /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
+         return 1;
+
+      /* d8(%eax) ... d8(%edi), not including d8(%esp). */ 
+      case 0x08: case 0x09: case 0x0A: case 0x0B: 
+      /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
+         return 2;
+
+      /* d32(%eax) ... d32(%edi), not including d32(%esp). */
+      case 0x10: case 0x11: case 0x12: case 0x13: 
+      /* ! 14 */ case 0x15: case 0x16: case 0x17:
+         return 5;
+
+      /* a register, %eax .. %edi.  (Not an addr, but still handled.) */
+      case 0x18: case 0x19: case 0x1A: case 0x1B:
+      case 0x1C: case 0x1D: case 0x1E: case 0x1F:
+         return 1;
+
+      /* a 32-bit literal address. */
+      case 0x05: return 5;
+
+      /* SIB, no displacement.  */
+      case 0x04: {
+         UChar sib     = *eip++;
+         UChar base_r  = sib & 7;
+         if (base_r == R_EBP) return 6; else return 2;
+      }
+      /* SIB, with 8-bit displacement.  */
+      case 0x0C: return 3;
+
+      /* SIB, with 32-bit displacement.  */
+      case 0x14: return 6;
+
+      default:
+         VG_(panic)("amode_from_RM");
+         return 0; /*notreached*/
+   }
+}
+
+
+/* Extract the reg field from a modRM byte. */
+static __inline__ Int gregOfRM ( UChar mod_reg_rm )
+{
+   return (Int)( (mod_reg_rm >> 3) & 7 );
+}
+
+/* Figure out whether the mod and rm parts of a modRM byte refer to a
+   register or memory.  If so, the byte will have the form 11XXXYYY,
+   where YYY is the register number. */
+static __inline__ Bool epartIsReg ( UChar mod_reg_rm )
+{
+   return (0xC0 == (mod_reg_rm & 0xC0));
+}
+
+/* ... and extract the register number ... */
+static __inline__ Int eregOfRM ( UChar mod_reg_rm )
+{
+   return (Int)(mod_reg_rm & 0x7);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Disassembling common idioms                          ---*/
+/*------------------------------------------------------------*/
+
+static
+void codegen_XOR_reg_with_itself ( UCodeBlock* cb, Int size, 
+                                   Int ge_reg, Int tmp )
+{
+   if (dis) 
+      VG_(printf)("xor%c %s, %s\n", nameISize(size),
+                  nameIReg(size,ge_reg), nameIReg(size,ge_reg) );
+   uInstr2(cb, MOV, size, Literal, 0, TempReg, tmp);
+   uLiteral(cb, 0);
+   uInstr2(cb, XOR, size, TempReg, tmp, TempReg, tmp);
+   setFlagsFromUOpcode(cb, XOR);
+   uInstr2(cb, PUT, size, TempReg, tmp, ArchReg, ge_reg);
+}
+
+
+/* Handle binary integer instructions of the form
+      op E, G  meaning
+      op reg-or-mem, reg
+   Is passed the a ptr to the modRM byte, the actual operation, and the
+   data size.  Returns the address advanced completely over this
+   instruction.
+
+   E(src) is reg-or-mem
+   G(dst) is reg.
+
+   If E is reg, -->    GET %G,  tmp
+                       OP %E,   tmp
+                       PUT tmp, %G
+ 
+   If E is mem and OP is not reversible, 
+                -->    (getAddr E) -> tmpa
+                       LD (tmpa), tmpa
+                       GET %G, tmp2
+                       OP tmpa, tmp2
+                       PUT tmp2, %G
+
+   If E is mem and OP is reversible
+                -->    (getAddr E) -> tmpa
+                       LD (tmpa), tmpa
+                       OP %G, tmpa
+                       PUT tmpa, %G
+*/
+static
+Addr dis_op2_E_G ( UCodeBlock* cb, 
+                   Opcode      opc, 
+                   Bool        keep,
+                   Int         size, 
+                   Addr        eip0,
+                   Char*       t_x86opc )
+{
+   Bool  reversible;
+   UChar rm = getUChar(eip0);
+   UChar dis_buf[50];
+
+   if (epartIsReg(rm)) {
+      Int tmp = newTemp(cb);
+
+      /* Specially handle XOR reg,reg, because that doesn't really
+         depend on reg, and doing the obvious thing potentially
+         generates a spurious value check failure due to the bogus
+         dependency. */
+      if (opc == XOR && gregOfRM(rm) == eregOfRM(rm)) {
+         codegen_XOR_reg_with_itself ( cb, size, gregOfRM(rm), tmp );
+         return 1+eip0;
+      }
+
+      uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tmp);
+      if (opc == AND || opc == OR) {
+         Int tao = newTemp(cb);
+         uInstr2(cb, GET, size, ArchReg, eregOfRM(rm), TempReg, tao); 
+         uInstr2(cb, opc, size, TempReg, tao, TempReg, tmp);
+         setFlagsFromUOpcode(cb, opc);
+      } else {
+         uInstr2(cb, opc, size, ArchReg, eregOfRM(rm), TempReg, tmp);
+         setFlagsFromUOpcode(cb, opc);
+      }
+      if (keep)
+         uInstr2(cb, PUT, size, TempReg, tmp, ArchReg, gregOfRM(rm));
+      if (dis) VG_(printf)("%s%c %s,%s\n", t_x86opc, nameISize(size), 
+                           nameIReg(size,eregOfRM(rm)),
+                           nameIReg(size,gregOfRM(rm)));
+      return 1+eip0;
+   }
+
+   /* E refers to memory */    
+   reversible
+      = (opc == ADD || opc == OR || opc == AND || opc == XOR || opc == ADC)
+           ? True : False;
+   if (reversible) {
+      UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL);
+      Int  tmpa = LOW24(pair);
+      uInstr2(cb, LOAD, size, TempReg, tmpa, TempReg, tmpa);
+
+      if (opc == AND || opc == OR) {
+         Int tao = newTemp(cb);
+         uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tao); 
+         uInstr2(cb, opc, size, TempReg, tao, TempReg, tmpa);
+         setFlagsFromUOpcode(cb, opc);
+      } else {
+         uInstr2(cb, opc,  size, ArchReg, gregOfRM(rm), TempReg, tmpa);
+         setFlagsFromUOpcode(cb, opc);
+      }
+      if (keep)
+         uInstr2(cb, PUT,  size, TempReg, tmpa, ArchReg, gregOfRM(rm));
+      if (dis) VG_(printf)("%s%c %s,%s\n", t_x86opc, nameISize(size), 
+                           dis_buf,nameIReg(size,gregOfRM(rm)));
+      return HI8(pair)+eip0;
+   } else {
+      UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL);
+      Int  tmpa = LOW24(pair);
+      Int  tmp2 = newTemp(cb);
+      uInstr2(cb, LOAD, size, TempReg, tmpa, TempReg, tmpa);
+      uInstr2(cb, GET,  size, ArchReg, gregOfRM(rm), TempReg, tmp2);
+      uInstr2(cb, opc,  size, TempReg, tmpa, TempReg, tmp2);
+      setFlagsFromUOpcode(cb, opc);
+      if (keep)
+         uInstr2(cb, PUT,  size, TempReg, tmp2, ArchReg, gregOfRM(rm));
+      if (dis) VG_(printf)("%s%c %s,%s\n", t_x86opc, nameISize(size), 
+                           dis_buf,nameIReg(size,gregOfRM(rm)));
+      return HI8(pair)+eip0;
+   }
+}
+
+
+
+/* Handle binary integer instructions of the form
+      op G, E  meaning
+      op reg, reg-or-mem
+   Is passed the a ptr to the modRM byte, the actual operation, and the
+   data size.  Returns the address advanced completely over this
+   instruction.
+
+   G(src) is reg.
+   E(dst) is reg-or-mem
+
+   If E is reg, -->    GET %E,  tmp
+                       OP %G,   tmp
+                       PUT tmp, %E
+ 
+   If E is mem, -->    (getAddr E) -> tmpa
+                       LD (tmpa), tmpv
+                       OP %G, tmpv
+                       ST tmpv, (tmpa)
+*/
+static
+Addr dis_op2_G_E ( UCodeBlock* cb, 
+                   Opcode      opc, 
+                   Bool        keep,
+                   Int         size, 
+                   Addr        eip0,
+                   Char*       t_x86opc )
+{
+   UChar rm = getUChar(eip0);
+   UChar dis_buf[50];
+
+   if (epartIsReg(rm)) {
+      Int tmp = newTemp(cb);
+
+      /* Specially handle XOR reg,reg, because that doesn't really
+         depend on reg, and doing the obvious thing potentially
+         generates a spurious value check failure due to the bogus
+         dependency. */
+      if (opc == XOR && gregOfRM(rm) == eregOfRM(rm)) {
+         codegen_XOR_reg_with_itself ( cb, size, gregOfRM(rm), tmp );
+         return 1+eip0;
+      }
+
+      uInstr2(cb, GET, size, ArchReg, eregOfRM(rm), TempReg, tmp);
+
+      if (opc == AND || opc == OR) {
+         Int tao = newTemp(cb);
+         uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tao); 
+         uInstr2(cb, opc, size, TempReg, tao, TempReg, tmp);
+         setFlagsFromUOpcode(cb, opc);
+      } else {
+         uInstr2(cb, opc, size, ArchReg, gregOfRM(rm), TempReg, tmp);
+         setFlagsFromUOpcode(cb, opc);
+      }
+      if (keep)
+         uInstr2(cb, PUT, size, TempReg, tmp, ArchReg, eregOfRM(rm));
+      if (dis) VG_(printf)("%s%c %s,%s\n", t_x86opc, nameISize(size), 
+                           nameIReg(size,gregOfRM(rm)),
+                           nameIReg(size,eregOfRM(rm)));
+      return 1+eip0;
+   }
+
+   /* E refers to memory */    
+   {
+      UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL);
+      Int  tmpa = LOW24(pair);
+      Int  tmpv = newTemp(cb);
+      uInstr2(cb, LOAD,  size, TempReg, tmpa, TempReg, tmpv);
+
+      if (opc == AND || opc == OR) {
+         Int tao = newTemp(cb);
+         uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tao); 
+         uInstr2(cb, opc, size, TempReg, tao, TempReg, tmpv);
+         setFlagsFromUOpcode(cb, opc);
+      } else {
+         uInstr2(cb, opc, size, ArchReg, gregOfRM(rm), TempReg, tmpv);
+         setFlagsFromUOpcode(cb, opc);
+      }
+      if (keep) {
+         uInstr2(cb, STORE, size, TempReg, tmpv, TempReg, tmpa);
+         SMC_IF_ALL(cb);
+      }
+      if (dis) VG_(printf)("%s%c %s,%s\n", t_x86opc, nameISize(size), 
+                           nameIReg(size,gregOfRM(rm)), dis_buf);
+      return HI8(pair)+eip0;
+   }
+}
+
+
+/* Handle move instructions of the form
+      mov E, G  meaning
+      mov reg-or-mem, reg
+   Is passed the a ptr to the modRM byte, and the data size.  Returns
+   the address advanced completely over this instruction.
+
+   E(src) is reg-or-mem
+   G(dst) is reg.
+
+   If E is reg, -->    GET %G,  tmpv
+                       PUT tmpv, %G
+ 
+   If E is mem  -->    (getAddr E) -> tmpa
+                       LD (tmpa), tmpb
+                       PUT tmpb, %G
+*/
+static
+Addr dis_mov_E_G ( UCodeBlock* cb, 
+                   Int         size, 
+                   Addr        eip0 )
+{
+   UChar rm  = getUChar(eip0);
+   UChar dis_buf[50];
+
+   if (epartIsReg(rm)) {
+      Int tmpv = newTemp(cb);
+      uInstr2(cb, GET, size, ArchReg, eregOfRM(rm), TempReg, tmpv);
+      uInstr2(cb, PUT, size, TempReg, tmpv, ArchReg, gregOfRM(rm));
+      if (dis) VG_(printf)("mov%c %s,%s\n", nameISize(size), 
+                           nameIReg(size,eregOfRM(rm)),
+                           nameIReg(size,gregOfRM(rm)));
+      return 1+eip0;
+   }
+
+   /* E refers to memory */    
+   {
+      UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL);
+      Int  tmpa = LOW24(pair);
+      Int  tmpb = newTemp(cb);
+      uInstr2(cb, LOAD, size, TempReg, tmpa, TempReg, tmpb);
+      uInstr2(cb, PUT,  size, TempReg, tmpb, ArchReg, gregOfRM(rm));
+      if (dis) VG_(printf)("mov%c %s,%s\n", nameISize(size), 
+                           dis_buf,nameIReg(size,gregOfRM(rm)));
+      return HI8(pair)+eip0;
+   }
+}
+
+
+/* Handle move instructions of the form
+      mov G, E  meaning
+      mov reg, reg-or-mem
+   Is passed the a ptr to the modRM byte, and the data size.  Returns
+   the address advanced completely over this instruction.
+
+   G(src) is reg.
+   E(dst) is reg-or-mem
+
+   If E is reg, -->    GET %G,  tmp
+                       PUT tmp, %E
+ 
+   If E is mem, -->    (getAddr E) -> tmpa
+                       GET %G, tmpv
+                       ST tmpv, (tmpa) 
+*/
+static
+Addr dis_mov_G_E ( UCodeBlock* cb, 
+                   Int         size, 
+                   Addr        eip0 )
+{
+   UChar rm = getUChar(eip0);
+   UChar dis_buf[50];
+
+   if (epartIsReg(rm)) {
+      Int tmpv = newTemp(cb);
+      uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tmpv);
+      uInstr2(cb, PUT, size, TempReg, tmpv, ArchReg, eregOfRM(rm));
+      if (dis) VG_(printf)("mov%c %s,%s\n", nameISize(size), 
+                           nameIReg(size,gregOfRM(rm)),
+                           nameIReg(size,eregOfRM(rm)));
+      return 1+eip0;
+   }
+
+   /* E refers to memory */    
+   {
+      UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL);
+      Int  tmpa = LOW24(pair);
+      Int  tmpv = newTemp(cb);
+      uInstr2(cb, GET,   size, ArchReg, gregOfRM(rm), TempReg, tmpv);
+      uInstr2(cb, STORE, size, TempReg, tmpv, TempReg, tmpa);
+      SMC_IF_SOME(cb);
+      if (dis) VG_(printf)("mov%c %s,%s\n", nameISize(size), 
+                           nameIReg(size,gregOfRM(rm)), dis_buf);
+      return HI8(pair)+eip0;
+   }
+}
+
+
+/* op $immediate, AL/AX/EAX. */
+static
+Addr dis_op_imm_A ( UCodeBlock* cb, 
+                    Int         size,
+                    Opcode      opc,
+                    Bool        keep,
+                    Addr        eip,
+                    Char*       t_x86opc )
+{
+   Int  tmp = newTemp(cb);
+   UInt lit = getUDisp(size,eip);
+   uInstr2(cb, GET, size, ArchReg, R_EAX, TempReg, tmp);
+   if (opc == AND || opc == OR) {
+      Int tao = newTemp(cb);
+      uInstr2(cb, MOV, size, Literal, 0, TempReg, tao);
+      uLiteral(cb, lit);
+      uInstr2(cb, opc, size, TempReg, tao, TempReg, tmp);
+      setFlagsFromUOpcode(cb, opc);
+   } else {
+      uInstr2(cb, opc, size, Literal, 0, TempReg, tmp);
+      uLiteral(cb, lit);
+      setFlagsFromUOpcode(cb, opc);
+   }
+   if (keep)
+      uInstr2(cb, PUT, size, TempReg, tmp, ArchReg, R_EAX);
+   if (dis) VG_(printf)("%s%c $0x%x, %s\n", t_x86opc, nameISize(size), 
+                        lit, nameIReg(size,R_EAX));
+   return eip+size;
+}
+
+
+/* Sign- and Zero-extending moves. */
+static
+Addr dis_movx_E_G ( UCodeBlock* cb, 
+                    Addr eip, Int szs, Int szd, Bool sign_extend )
+{
+   UChar dis_buf[50];
+   UChar rm = getUChar(eip);
+   if (epartIsReg(rm)) {
+      Int tmpv = newTemp(cb);
+      uInstr2(cb, GET, szs, ArchReg, eregOfRM(rm), TempReg, tmpv);
+      uInstr1(cb, WIDEN, szd, TempReg, tmpv);
+      LAST_UINSTR(cb).extra4b = szs;
+      LAST_UINSTR(cb).signed_widen = sign_extend;
+      uInstr2(cb, PUT, szd, TempReg, tmpv, ArchReg, gregOfRM(rm));
+      if (dis) VG_(printf)("mov%c%c%c %s,%s\n", 
+                           sign_extend ? 's' : 'z',
+                           nameISize(szs), nameISize(szd),
+                           nameIReg(szs,eregOfRM(rm)),
+                           nameIReg(szd,gregOfRM(rm)));
+      return 1+eip;
+   }
+
+   /* E refers to memory */    
+   {
+      UInt pair = disAMode ( cb, eip, dis?dis_buf:NULL);
+      Int  tmpa = LOW24(pair);
+      uInstr2(cb, LOAD, szs, TempReg, tmpa, TempReg, tmpa);
+      uInstr1(cb, WIDEN, szd, TempReg, tmpa);
+      LAST_UINSTR(cb).extra4b = szs;
+      LAST_UINSTR(cb).signed_widen = sign_extend;
+      uInstr2(cb, PUT, szd, TempReg, tmpa, ArchReg, gregOfRM(rm));
+      if (dis) VG_(printf)("mov%c%c%c %s,%s\n", 
+                           sign_extend ? 's' : 'z',
+                           nameISize(szs), nameISize(szd),
+                           dis_buf,
+                           nameIReg(szd,gregOfRM(rm)));
+      return HI8(pair)+eip;
+   }
+}
+
+
+/* Generate code to divide ArchRegs EDX:EAX / DX:AX / AX by the 32 /
+   16 / 8 bit quantity in the given TempReg.  */
+static
+void codegen_div ( UCodeBlock* cb, Int sz, Int t, Bool signed_divide )
+{
+   Int  helper;
+   Int  ta = newTemp(cb);
+   Int  td = newTemp(cb);
+
+   switch (sz) {
+      case 4: helper = (signed_divide ? VGOFF_(helper_idiv_64_32) 
+                                      : VGOFF_(helper_div_64_32));
+              break;
+      case 2: helper = (signed_divide ? VGOFF_(helper_idiv_32_16) 
+                                      : VGOFF_(helper_div_32_16));
+              break;
+      case 1: helper = (signed_divide ? VGOFF_(helper_idiv_16_8)
+                                      : VGOFF_(helper_div_16_8));
+              break;
+      default: VG_(panic)("codegen_div");
+   }
+   uInstr0(cb, CALLM_S, 0);
+   if (sz == 4 || sz == 2) {
+      uInstr1(cb, PUSH,  sz, TempReg, t);
+      uInstr2(cb, GET,   sz, ArchReg, R_EAX,  TempReg, ta);
+      uInstr1(cb, PUSH,  sz, TempReg, ta);
+      uInstr2(cb, GET,   sz, ArchReg, R_EDX,  TempReg, td);
+      uInstr1(cb, PUSH,  sz, TempReg, td);
+      uInstr1(cb, CALLM,  0, Lit16,   helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsOSZACP);
+      uInstr1(cb, POP,   sz, TempReg, t);
+      uInstr2(cb, PUT,   sz, TempReg, t,      ArchReg, R_EDX);
+      uInstr1(cb, POP,   sz, TempReg, t);
+      uInstr2(cb, PUT,   sz, TempReg, t,      ArchReg, R_EAX);
+      uInstr1(cb, CLEAR,  0, Lit16,   4);
+   } else {
+      uInstr1(cb, PUSH,  1, TempReg, t);
+      uInstr2(cb, GET,   2, ArchReg, R_EAX,  TempReg, ta);
+      uInstr1(cb, PUSH,  2, TempReg, ta);
+      uInstr2(cb, MOV,   1, Literal, 0,      TempReg, td);
+      uLiteral(cb, 0);
+      uInstr1(cb, PUSH,  1, TempReg, td);
+      uInstr1(cb, CALLM, 0, Lit16,   helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsOSZACP);
+      uInstr1(cb, POP,   1, TempReg, t);
+      uInstr2(cb, PUT,   1, TempReg, t,      ArchReg, R_AL);
+      uInstr1(cb, POP,   1, TempReg, t);
+      uInstr2(cb, PUT,   1, TempReg, t,      ArchReg, R_AH);
+      uInstr1(cb, CLEAR, 0, Lit16,   4);
+   }
+   uInstr0(cb, CALLM_E, 0);
+}
+
+
+static 
+Addr dis_Grp1 ( UCodeBlock* cb, Addr eip, UChar modrm, 
+                Int am_sz, Int d_sz, Int sz, UInt d32 )
+{
+   Int   t1, t2, uopc;
+   UInt  pair;
+   UChar dis_buf[50];
+   if (epartIsReg(modrm)) {
+      vg_assert(am_sz == 1);
+      t1  = newTemp(cb);
+      uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+      switch (gregOfRM(modrm)) {
+         case 0: uopc = ADD; break;  case 1: uopc = OR;  break;
+         case 2: uopc = ADC; break;  case 3: uopc = SBB; break;
+         case 4: uopc = AND; break;  case 5: uopc = SUB; break;
+         case 6: uopc = XOR; break;  case 7: uopc = SUB; break;
+         default: VG_(panic)("dis_Grp1(Reg): unhandled case");
+      }
+      if (uopc == AND || uopc == OR) {
+         Int tao = newTemp(cb);
+         uInstr2(cb, MOV, sz, Literal, 0, TempReg, tao);
+         uLiteral(cb, d32);
+         uInstr2(cb, uopc, sz, TempReg, tao, TempReg, t1);
+         setFlagsFromUOpcode(cb, uopc);
+      } else {
+         uInstr2(cb, uopc, sz, Literal, 0, TempReg, t1);
+         uLiteral(cb, d32);
+         setFlagsFromUOpcode(cb, uopc);
+      }
+      if (gregOfRM(modrm) < 7)
+         uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm));
+      eip += (am_sz + d_sz);
+      if (dis)
+         VG_(printf)("%s%c $0x%x, %s\n",
+                     nameGrp1(gregOfRM(modrm)), nameISize(sz), d32, 
+                     nameIReg(sz,eregOfRM(modrm)));
+   } else {
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL);
+      t1   = LOW24(pair);
+      t2   = newTemp(cb);
+      eip  += HI8(pair);
+      eip  += d_sz;
+      uInstr2(cb, LOAD, sz, TempReg, t1, TempReg, t2);
+      switch (gregOfRM(modrm)) {
+         case 0: uopc = ADD; break;  case 1: uopc = OR;  break;
+         case 2: uopc = ADC; break;  case 3: uopc = SBB; break;
+         case 4: uopc = AND; break;  case 5: uopc = SUB; break;
+         case 6: uopc = XOR; break;  case 7: uopc = SUB; break;
+         default: VG_(panic)("dis_Grp1(Mem): unhandled case");
+      }
+      if (uopc == AND || uopc == OR) {
+         Int tao = newTemp(cb);
+         uInstr2(cb, MOV, sz, Literal, 0, TempReg, tao);
+         uLiteral(cb, d32);
+         uInstr2(cb, uopc, sz, TempReg, tao, TempReg, t2);
+         setFlagsFromUOpcode(cb, uopc);
+      } else {
+         uInstr2(cb, uopc, sz, Literal, 0, TempReg, t2);
+         uLiteral(cb, d32);
+         setFlagsFromUOpcode(cb, uopc);
+      }
+      if (gregOfRM(modrm) < 7) {
+         uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1);
+         SMC_IF_ALL(cb);
+      }
+      if (dis)
+         VG_(printf)("%s%c $0x%x, %s\n",
+                     nameGrp1(gregOfRM(modrm)), nameISize(sz), d32, 
+                     dis_buf);
+   }
+   return eip;
+}
+
+
+/* Group 2 extended opcodes. */
+static
+Addr dis_Grp2 ( UCodeBlock* cb, Addr eip, UChar modrm,
+                Int am_sz, Int d_sz, Int sz, 
+                Tag orig_src_tag, UInt orig_src_val )
+{
+   /* orig_src_tag and orig_src_val denote either ArchReg(%CL) or a
+      Literal.  And eip on entry points at the modrm byte. */
+   Int   t1, t2, uopc;
+   UInt  pair;
+   UChar dis_buf[50];
+   UInt  src_val;
+   Tag   src_tag;
+
+   /* Get the amount to be shifted by into src_tag/src_val. */
+   if (orig_src_tag == ArchReg) {
+      src_val = newTemp(cb);
+      src_tag = TempReg;
+      uInstr2(cb, GET, 1, orig_src_tag, orig_src_val, TempReg, src_val);
+   } else {
+      src_val = orig_src_val;
+      src_tag = Literal;
+   }
+
+   if (epartIsReg(modrm)) {
+      vg_assert(am_sz == 1);
+      t1  = newTemp(cb);
+      uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+      switch (gregOfRM(modrm)) {
+         case 0: uopc = ROL; break;  case 1: uopc = ROR; break;
+         case 2: uopc = RCL; break;  case 3: uopc = RCR; break;
+         case 4: uopc = SHL; break;  case 5: uopc = SHR; break;
+         case 7: uopc = SAR; break;
+         default: VG_(panic)("dis_Grp2(Reg): unhandled case");
+      }
+      if (src_tag == Literal) {
+          uInstr2(cb, uopc, sz, Literal, 0, TempReg, t1);
+	  uLiteral(cb, src_val);
+      } else {
+          uInstr2(cb, uopc, sz, src_tag, src_val, TempReg, t1);
+      }
+      setFlagsFromUOpcode(cb, uopc);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm));
+      eip += (am_sz + d_sz);
+      if (dis) {
+         if (orig_src_tag == Literal)
+            VG_(printf)("%s%c $0x%x, %s\n",
+                        nameGrp2(gregOfRM(modrm)), nameISize(sz), 
+                        orig_src_val, nameIReg(sz,eregOfRM(modrm)));
+         else
+            VG_(printf)("%s%c %s, %s\n",
+                        nameGrp2(gregOfRM(modrm)), nameISize(sz),
+                        nameIReg(1,orig_src_val),
+                        nameIReg(sz,eregOfRM(modrm)));
+      }
+   } else {
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL);
+      t1   = LOW24(pair);
+      t2   = newTemp(cb);
+      eip  += HI8(pair);
+      eip  += d_sz;
+      uInstr2(cb, LOAD, sz, TempReg, t1, TempReg, t2);
+      switch (gregOfRM(modrm)) {
+         case 0: uopc = ROL; break;  case 1: uopc = ROR; break;
+         case 2: uopc = RCL; break;  case 3: uopc = RCR; break;
+         case 4: uopc = SHL; break;  case 5: uopc = SHR; break;
+         case 7: uopc = SAR; break;
+         default: VG_(panic)("dis_Grp2(Reg): unhandled case");
+      }
+      if (src_tag == Literal) {
+         uInstr2(cb, uopc, sz, Literal, 0, TempReg, t2);
+	 uLiteral(cb, src_val);
+      } else {
+         uInstr2(cb, uopc, sz, src_tag, src_val, TempReg, t2);
+      }
+      setFlagsFromUOpcode(cb, uopc);
+      uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1);
+      SMC_IF_ALL(cb);
+      if (dis) {
+         if (orig_src_tag == Literal)
+            VG_(printf)("%s%c $0x%x, %s\n",
+                        nameGrp2(gregOfRM(modrm)), nameISize(sz), 
+                        orig_src_val, dis_buf);
+         else 
+            VG_(printf)("%s%c %s, %s\n",
+                        nameGrp2(gregOfRM(modrm)), nameISize(sz), 
+                        nameIReg(1,orig_src_val),
+                        dis_buf);
+      }
+   }
+   return eip;
+}
+
+
+
+/* Group 8 extended opcodes. */
+static
+Addr dis_Grp8 ( UCodeBlock* cb, Addr eip, UChar modrm,
+                Int am_sz, Int sz, UInt src_val )
+{
+   /* src_val denotes a d8.
+      And eip on entry points at the modrm byte. */
+   Int   t1, t2, helper;
+   UInt  pair;
+   UChar dis_buf[50];
+
+   switch (gregOfRM(modrm)) {
+      case 4: helper = VGOFF_(helper_bt);  break;
+      case 5: helper = VGOFF_(helper_bts); break;
+      case 6: helper = VGOFF_(helper_btr); break;
+      case 7: helper = VGOFF_(helper_btc); break;
+      /* If this needs to be extended, be careful to do the flag
+         setting in the parts below correctly. */
+      default: VG_(panic)("dis_Grp8");
+   }
+
+   t1 = newTemp(cb);
+   uInstr2(cb, MOV,  4, Literal, 0, TempReg, t1);
+   uLiteral(cb, src_val);
+   uInstr0(cb, CALLM_S, 0);
+   uInstr1(cb, PUSH, 4, TempReg, t1);
+   
+   if (epartIsReg(modrm)) {
+      vg_assert(am_sz == 1);
+      t2 = newTemp(cb);
+      uInstr2(cb, GET,   sz, ArchReg, eregOfRM(modrm), TempReg, t2);
+      uInstr1(cb, PUSH,  sz, TempReg, t2);
+      uInstr1(cb, CALLM, 0,  Lit16,   helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagC, FlagsOSZAP);
+      uInstr1(cb, POP,   sz, TempReg, t2);
+      uInstr2(cb, PUT,   sz, TempReg, t2, ArchReg, eregOfRM(modrm));
+      uInstr1(cb, CLEAR, 0,  Lit16,   4);
+      eip += (am_sz + 1);
+      if (dis)
+         VG_(printf)("%s%c $0x%x, %s\n",
+                     nameGrp8(gregOfRM(modrm)), nameISize(sz),
+                     src_val,
+                     nameIReg(sz,eregOfRM(modrm)));
+   } else {
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL);
+      t1   = LOW24(pair);
+      t2   = newTemp(cb);
+      eip  += HI8(pair);
+      eip  += 1;
+      uInstr2(cb, LOAD,  sz, TempReg, t1, TempReg, t2);
+      uInstr1(cb, PUSH,  sz, TempReg, t2);
+      uInstr1(cb, CALLM, 0,  Lit16,   helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagC, FlagsOSZAP);
+      uInstr1(cb, POP,   sz, TempReg, t2);
+      uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1);
+      SMC_IF_ALL(cb);
+      uInstr1(cb, CLEAR, 0, Lit16,    4);
+      if (dis)
+            VG_(printf)("%s%c $0x%x, %s\n",
+                        nameGrp8(gregOfRM(modrm)), nameISize(sz), src_val, 
+                        dis_buf);
+   }
+	uInstr0(cb, CALLM_E, 0);
+   return eip;
+}
+
+
+
+
+/* Generate ucode to multiply the value in EAX/AX/AL by the register
+   specified by the ereg of modrm, and park the result in
+   EDX:EAX/DX:AX/AX. */
+static void codegen_mul_A_D_Reg ( UCodeBlock* cb, Int sz, 
+                                  UChar modrm, Bool signed_multiply )
+{
+   Int helper = signed_multiply 
+                ?
+                   (sz==1 ? VGOFF_(helper_imul_8_16) 
+                          : (sz==2 ? VGOFF_(helper_imul_16_32) 
+                                   : VGOFF_(helper_imul_32_64)))
+                :
+                   (sz==1 ? VGOFF_(helper_mul_8_16)
+                          : (sz==2 ? VGOFF_(helper_mul_16_32) 
+                                   : VGOFF_(helper_mul_32_64)));
+   Int t1 = newTemp(cb);
+   Int ta = newTemp(cb);
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, GET,   sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+   uInstr1(cb, PUSH,  sz, TempReg, t1);
+   uInstr2(cb, GET,   sz, ArchReg, R_EAX,  TempReg, ta);
+   uInstr1(cb, PUSH,  sz, TempReg, ta);
+   uInstr1(cb, CALLM, 0,  Lit16,   helper);
+   uFlagsRWU(cb, FlagsEmpty, FlagsOC, FlagsSZAP);
+   if (sz > 1) {
+      uInstr1(cb, POP, sz, TempReg, t1);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, R_EDX);
+      uInstr1(cb, POP, sz, TempReg, t1);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, R_EAX);
+   } else {
+      uInstr1(cb, CLEAR, 0, Lit16,   4);
+      uInstr1(cb, POP,   2, TempReg, t1);
+      uInstr2(cb, PUT,   2, TempReg, t1, ArchReg, R_EAX);
+   }
+	uInstr0(cb, CALLM_E, 0);
+   if (dis) VG_(printf)("%s%c %s\n", signed_multiply ? "imul" : "mul",
+                        nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
+
+}
+
+
+/* Generate ucode to multiply the value in EAX/AX/AL by the value in
+   TempReg temp, and park the result in EDX:EAX/DX:AX/AX. */
+static void codegen_mul_A_D_Temp ( UCodeBlock* cb, Int sz, 
+                                   Int temp, Bool signed_multiply,
+                                   UChar* dis_buf )
+{
+   Int helper = signed_multiply 
+                ?
+                   (sz==1 ? VGOFF_(helper_imul_8_16) 
+                          : (sz==2 ? VGOFF_(helper_imul_16_32) 
+                                   : VGOFF_(helper_imul_32_64)))
+                :
+                   (sz==1 ? VGOFF_(helper_mul_8_16) 
+                          : (sz==2 ? VGOFF_(helper_mul_16_32)
+                                   : VGOFF_(helper_mul_32_64)));
+   Int t1 = newTemp(cb);
+   uInstr0(cb, CALLM_S, 0);
+   uInstr1(cb, PUSH,  sz, TempReg, temp);
+   uInstr2(cb, GET,   sz, ArchReg, R_EAX,  TempReg, t1);
+   uInstr1(cb, PUSH,  sz, TempReg, t1);
+   uInstr1(cb, CALLM, 0,  Lit16,   helper);
+   uFlagsRWU(cb, FlagsEmpty, FlagsOC, FlagsSZAP);
+   if (sz > 1) {
+      uInstr1(cb, POP, sz, TempReg, t1);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, R_EDX);
+      uInstr1(cb, POP, sz, TempReg, t1);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, R_EAX);
+   } else {
+      uInstr1(cb, CLEAR, 0, Lit16,   4);
+      uInstr1(cb, POP,   2, TempReg, t1);
+      uInstr2(cb, PUT,   2, TempReg, t1, ArchReg, R_EAX);
+   }
+   uInstr0(cb, CALLM_E, 0);
+   if (dis) VG_(printf)("%s%c %s\n", signed_multiply ? "imul" : "mul",
+                        nameISize(sz), dis_buf);
+}
+
+
+/* Group 3 extended opcodes. */
+static 
+Addr dis_Grp3 ( UCodeBlock* cb, Int sz, Addr eip )
+{
+   Int   t1, t2;
+   UInt  pair, d32;
+   UChar modrm;
+   UChar dis_buf[50];
+   t1 = t2 = INVALID_TEMPREG;
+   modrm = getUChar(eip);
+   if (epartIsReg(modrm)) {
+      t1 = newTemp(cb);
+      switch (gregOfRM(modrm)) {
+         case 0: { /* TEST */
+            Int tao = newTemp(cb);
+            eip++; d32 = getUDisp(sz, eip); eip += sz;
+            uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+	    uInstr2(cb, MOV, sz, Literal, 0, TempReg, tao);
+	    uLiteral(cb, d32);
+            uInstr2(cb, AND, sz, TempReg, tao, TempReg, t1);
+            setFlagsFromUOpcode(cb, AND);
+            if (dis)
+               VG_(printf)("test%c $0x%x, %s\n",
+                   nameISize(sz), d32, nameIReg(sz, eregOfRM(modrm)));
+            break;
+         }
+         case 2: /* NOT */
+            eip++;
+            uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+            uInstr1(cb, NOT, sz, TempReg, t1);
+            setFlagsFromUOpcode(cb, NOT);
+            uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm));
+            if (dis)
+               VG_(printf)("not%c %s\n",
+                   nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
+            break;
+         case 3: /* NEG */
+            eip++;
+            uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+            uInstr1(cb, NEG, sz, TempReg, t1);
+            setFlagsFromUOpcode(cb, NEG);
+            uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm));
+            if (dis)
+               VG_(printf)("neg%c %s\n",
+                   nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
+            break;
+         case 4: /* MUL */
+            eip++;
+            codegen_mul_A_D_Reg ( cb, sz, modrm, False );
+            break;
+         case 5: /* IMUL */
+            eip++;
+            codegen_mul_A_D_Reg ( cb, sz, modrm, True );
+            break;
+         case 6: /* DIV */
+            eip++;
+            uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+            codegen_div ( cb, sz, t1, False );
+            if (dis)
+               VG_(printf)("div%c %s\n", nameISize(sz), 
+                           nameIReg(sz, eregOfRM(modrm)));
+            break;
+         case 7: /* IDIV */
+            eip++;
+            uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+            codegen_div ( cb, sz, t1, True );
+            if (dis)
+               VG_(printf)("idiv%c %s\n", nameISize(sz), 
+                           nameIReg(sz, eregOfRM(modrm)));
+            break;
+         default: 
+            VG_(printf)(
+               "unhandled Grp3(R) case %d\n", (UInt)gregOfRM(modrm));
+            VG_(panic)("Grp3");
+      }
+   } else {
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+      t2   = LOW24(pair);
+      t1   = newTemp(cb);
+      eip  += HI8(pair);
+      uInstr2(cb, LOAD, sz, TempReg, t2, TempReg, t1);
+      switch (gregOfRM(modrm)) {
+         case 0: { /* TEST */
+            Int tao = newTemp(cb);
+            d32 = getUDisp(sz, eip); eip += sz;
+            uInstr2(cb, MOV, sz, Literal, 0, TempReg, tao);
+            uLiteral(cb, d32);
+            uInstr2(cb, AND, sz, TempReg, tao, TempReg, t1);
+            setFlagsFromUOpcode(cb, AND);
+            if (dis)
+               VG_(printf)("test%c $0x%x, %s\n", 
+                           nameISize(sz), d32, dis_buf);
+            break;
+         }
+         case 2: /* NOT */
+            uInstr1(cb, NOT, sz, TempReg, t1);
+            setFlagsFromUOpcode(cb, NOT);
+            uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
+            SMC_IF_ALL(cb);
+            if (dis)
+               VG_(printf)("not%c %s\n", nameISize(sz), dis_buf);
+            break;
+         case 3: /* NEG */
+            uInstr1(cb, NEG, sz, TempReg, t1);
+            setFlagsFromUOpcode(cb, NEG);
+            uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
+            SMC_IF_ALL(cb);
+            if (dis)
+               VG_(printf)("neg%c %s\n", nameISize(sz), dis_buf);
+            break;
+         case 4: /* MUL */
+            codegen_mul_A_D_Temp ( cb, sz, t1, False, 
+                                   dis?dis_buf:NULL );
+            break;
+         case 5: /* IMUL */
+            codegen_mul_A_D_Temp ( cb, sz, t1, True, dis?dis_buf:NULL );
+            break;
+         case 6: /* DIV */
+            codegen_div ( cb, sz, t1, False );
+            if (dis)
+               VG_(printf)("div%c %s\n", nameISize(sz), dis_buf);
+            break;
+         case 7: /* IDIV */
+            codegen_div ( cb, sz, t1, True );
+            if (dis)
+               VG_(printf)("idiv%c %s\n", nameISize(sz), dis_buf);
+            break;
+         default: 
+            VG_(printf)(
+               "unhandled Grp3(M) case %d\n", (UInt)gregOfRM(modrm));
+            VG_(panic)("Grp3");
+      }
+   }
+   return eip;
+}
+
+
+/* Group 4 extended opcodes. */
+static
+Addr dis_Grp4 ( UCodeBlock* cb, Addr eip )
+{
+   Int   t1, t2;
+   UInt  pair;
+   UChar modrm;
+   UChar dis_buf[50];
+   t1 = t2 = INVALID_TEMPREG;
+
+   modrm = getUChar(eip);
+   if (epartIsReg(modrm)) {
+      t1 = newTemp(cb);
+      uInstr2(cb, GET, 1, ArchReg, eregOfRM(modrm), TempReg, t1);
+      switch (gregOfRM(modrm)) {
+         case 0: /* INC */
+            uInstr1(cb, INC, 1, TempReg, t1);
+            setFlagsFromUOpcode(cb, INC);
+            uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, eregOfRM(modrm));
+            break;
+         case 1: /* DEC */
+            uInstr1(cb, DEC, 1, TempReg, t1);
+            setFlagsFromUOpcode(cb, DEC);
+            uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, eregOfRM(modrm));
+            break;
+         default: 
+            VG_(printf)(
+               "unhandled Grp4(R) case %d\n", (UInt)gregOfRM(modrm));
+            VG_(panic)("Grp4");
+      }
+      eip++;
+      if (dis)
+         VG_(printf)("%sb %s\n", nameGrp4(gregOfRM(modrm)),
+                     nameIReg(1, eregOfRM(modrm)));
+   } else {
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+      t2   = LOW24(pair);
+      t1   = newTemp(cb);
+      uInstr2(cb, LOAD, 1, TempReg, t2, TempReg, t1);
+      switch (gregOfRM(modrm)) {
+         case 0: /* INC */ 
+            uInstr1(cb, INC, 1, TempReg, t1);
+            setFlagsFromUOpcode(cb, INC);
+            uInstr2(cb, STORE, 1, TempReg, t1, TempReg, t2);
+            SMC_IF_ALL(cb);
+            break;
+         case 1: /* DEC */
+            uInstr1(cb, DEC, 1, TempReg, t1);
+            setFlagsFromUOpcode(cb, DEC);
+            uInstr2(cb, STORE, 1, TempReg, t1, TempReg, t2);
+            SMC_IF_ALL(cb);
+            break;
+         default: 
+            VG_(printf)(
+               "unhandled Grp4(M) case %d\n", (UInt)gregOfRM(modrm));
+            VG_(panic)("Grp4");
+      }
+      eip += HI8(pair);
+      if (dis)
+         VG_(printf)("%sb %s\n", nameGrp4(gregOfRM(modrm)), dis_buf);
+   }
+   return eip;
+}
+
+
+/* Group 5 extended opcodes. */
+static
+Addr dis_Grp5 ( UCodeBlock* cb, Int sz, Addr eip, Bool* isEnd )
+{
+   Int   t1, t2, t3, t4;
+   UInt  pair;
+   UChar modrm;
+   UChar dis_buf[50];
+   t1 = t2 = t3 = t4 = INVALID_TEMPREG;
+
+   modrm = getUChar(eip);
+   if (epartIsReg(modrm)) {
+      t1 = newTemp(cb);
+      uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+      switch (gregOfRM(modrm)) {
+         case 0: /* INC */
+            uInstr1(cb, INC, sz, TempReg, t1);
+            setFlagsFromUOpcode(cb, INC);
+            uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm));
+            break;
+         case 1: /* DEC */
+            uInstr1(cb, DEC, sz, TempReg, t1);
+            setFlagsFromUOpcode(cb, DEC);
+            uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm));
+            break;
+         case 2: /* call Ev */
+            t3 = newTemp(cb); t4 = newTemp(cb);
+            uInstr2(cb, GET,   4, ArchReg, R_ESP, TempReg, t3);
+            uInstr2(cb, SUB,   4, Literal, 0,     TempReg, t3);
+	    uLiteral(cb, 4);
+            uInstr2(cb, PUT,   4, TempReg, t3,    ArchReg, R_ESP);
+            uInstr2(cb, MOV,   4, Literal, 0,     TempReg, t4);
+	    uLiteral(cb, eip+1);
+            uInstr2(cb, STORE, 4, TempReg, t4,    TempReg, t3);
+            SMC_IF_ALL(cb);
+            uInstr1(cb, JMP,   0, TempReg, t1);
+            uCond(cb, CondAlways);
+            LAST_UINSTR(cb).call_dispatch = True;
+            *isEnd = True;
+            break;
+         case 4: /* jmp Ev */
+            uInstr1(cb, JMP, 0, TempReg, t1);
+            uCond(cb, CondAlways);
+            *isEnd = True;
+            break;
+         default: 
+            VG_(printf)(
+               "unhandled Grp5(R) case %d\n", (UInt)gregOfRM(modrm));
+            VG_(panic)("Grp5");
+      }
+      eip++;
+      if (dis)
+         VG_(printf)("%s%c %s\n", nameGrp5(gregOfRM(modrm)),
+                     nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
+   } else {
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+      t2   = LOW24(pair);
+      t1   = newTemp(cb);
+      uInstr2(cb, LOAD, sz, TempReg, t2, TempReg, t1);
+      switch (gregOfRM(modrm)) {
+         case 0: /* INC */ 
+            uInstr1(cb, INC, sz, TempReg, t1);
+            setFlagsFromUOpcode(cb, INC);
+            uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
+            SMC_IF_ALL(cb);
+            break;
+         case 1: /* DEC */
+            uInstr1(cb, DEC, sz, TempReg, t1);
+            setFlagsFromUOpcode(cb, DEC);
+            uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
+            SMC_IF_ALL(cb);
+            break;
+         case 2: /* call Ev */
+            t3 = newTemp(cb); t4 = newTemp(cb);
+            uInstr2(cb, GET,   4, ArchReg, R_ESP, TempReg, t3);
+            uInstr2(cb, SUB,   4, Literal, 0,     TempReg, t3);
+            uLiteral(cb, 4);
+            uInstr2(cb, PUT,   4, TempReg, t3,    ArchReg, R_ESP);
+            uInstr2(cb, MOV,   4, Literal, 0,     TempReg, t4);
+	         uLiteral(cb, eip+HI8(pair));
+            uInstr2(cb, STORE, 4, TempReg, t4,    TempReg, t3);
+            SMC_IF_ALL(cb);
+            uInstr1(cb, JMP,   0, TempReg, t1);
+            uCond(cb, CondAlways);
+            LAST_UINSTR(cb).call_dispatch = True;
+            *isEnd = True;
+            break;
+         case 4: /* JMP Ev */
+            uInstr1(cb, JMP, 0, TempReg, t1);
+            uCond(cb, CondAlways);
+            *isEnd = True;
+            break;
+         case 6: /* PUSH Ev */
+            t3 = newTemp(cb);
+            uInstr2(cb, GET,    4, ArchReg, R_ESP, TempReg, t3);
+            uInstr2(cb, SUB,    4, Literal, 0,     TempReg, t3);
+	    uLiteral(cb, sz);
+            uInstr2(cb, PUT,    4, TempReg, t3,    ArchReg, R_ESP);
+            uInstr2(cb, STORE, sz, TempReg, t1,    TempReg, t3);
+            SMC_IF_ALL(cb);
+            break;
+         default: 
+            VG_(printf)(
+               "unhandled Grp5(M) case %d\n", (UInt)gregOfRM(modrm));
+            VG_(panic)("Grp5");
+      }
+      eip += HI8(pair);
+      if (dis)
+         VG_(printf)("%s%c %s\n", nameGrp5(gregOfRM(modrm)),
+                     nameISize(sz), dis_buf);
+   }
+   return eip;
+}
+
+
+/* Template for REPE CMPS<sz>.  Assumes this insn is the last one in
+   the basic block, and so emits a jump to the next insn. */
+static 
+void codegen_REPE_CMPS ( UCodeBlock* cb, Int sz, Addr eip, Addr eip_next )
+{
+   Int tc,  /* ECX */
+       td,  /* EDI */   ts, /* ESI */
+       tdv, /* (EDI) */ tsv /* (ESI) */;
+
+   tdv = newTemp(cb);
+   tsv = newTemp(cb);
+   td = newTemp(cb);
+   ts = newTemp(cb);
+   tc = newTemp(cb);
+
+   uInstr2(cb, GET,   4, ArchReg, R_ECX, TempReg, tc);
+   uInstr2(cb, JIFZ,  4, TempReg, tc,    Literal, 0);
+   uLiteral(cb, eip_next);
+   uInstr1(cb, DEC,   4, TempReg, tc);
+   uInstr2(cb, PUT,   4, TempReg, tc,    ArchReg, R_ECX);
+
+   uInstr2(cb, GET,   4, ArchReg, R_EDI, TempReg, td);
+   uInstr2(cb, GET,   4, ArchReg, R_ESI, TempReg, ts);
+
+   uInstr2(cb, LOAD, sz, TempReg, td,    TempReg, tdv);
+   uInstr2(cb, LOAD, sz, TempReg, ts,    TempReg, tsv);
+
+   uInstr2(cb, SUB,  sz, TempReg, tdv,   TempReg, tsv);
+   setFlagsFromUOpcode(cb, SUB);
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, MOV,   4, Literal, 0,     TempReg, tdv);
+   uLiteral(cb, 0);
+   uInstr1(cb, PUSH,  4, TempReg, tdv);
+
+   uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
+   uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+   uInstr1(cb, POP,   4, TempReg, tdv);
+	uInstr0(cb, CALLM_E, 0);
+   if (sz == 4 || sz == 2) {
+      uInstr2(cb, SHL, 4, Literal, 0, TempReg, tdv);
+      uLiteral(cb, sz/2);
+   }
+   uInstr2(cb, ADD,   4, TempReg, tdv,    TempReg, td);
+   uInstr2(cb, ADD,   4, TempReg, tdv,    TempReg, ts);
+
+   uInstr2(cb, PUT,   4, TempReg, td,     ArchReg, R_EDI);
+   uInstr2(cb, PUT,   4, TempReg, ts,     ArchReg, R_ESI);
+
+   uInstr1(cb, JMP,   0, Literal, 0);
+   uLiteral(cb, eip);
+   uCond(cb, CondZ);
+   uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+   uInstr1(cb, JMP,   0, Literal, 0);
+   uLiteral(cb, eip_next);
+   uCond(cb, CondAlways);
+}
+
+
+/* Template for REPNE SCAS<sz>.  Assumes this insn is the last one in
+   the basic block, and so emits a jump to the next insn. */
+static 
+void codegen_REPNE_SCAS ( UCodeBlock* cb, Int sz, Addr eip, Addr eip_next )
+{
+   Int ta /* EAX */, tc /* ECX */, td /* EDI */, tv;
+   ta = newTemp(cb);
+   tc = newTemp(cb);
+   tv = newTemp(cb);
+   td = newTemp(cb);
+
+   uInstr2(cb, GET,   4, ArchReg, R_ECX, TempReg, tc);
+   uInstr2(cb, JIFZ,  4, TempReg, tc,    Literal, 0);
+   uLiteral(cb, eip_next);
+   uInstr1(cb, DEC,   4, TempReg, tc);
+   uInstr2(cb, PUT,   4, TempReg, tc,    ArchReg, R_ECX);
+
+   uInstr2(cb, GET,  sz, ArchReg, R_EAX, TempReg, ta);
+   uInstr2(cb, GET,   4, ArchReg, R_EDI, TempReg, td);
+   uInstr2(cb, LOAD, sz, TempReg, td,    TempReg, tv);
+   /* next uinstr kills ta, but that's ok -- don't need it again */
+   uInstr2(cb, SUB,  sz, TempReg, tv,    TempReg, ta);
+   setFlagsFromUOpcode(cb, SUB);
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, MOV,   4, Literal, 0,     TempReg, tv);
+   uLiteral(cb, 0);
+   uInstr1(cb, PUSH,  4, TempReg, tv);
+
+   uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
+   uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+   uInstr1(cb, POP,   4, TempReg, tv);
+	uInstr0(cb, CALLM_E, 0);
+
+   if (sz == 4 || sz == 2) {
+      uInstr2(cb, SHL, 4, Literal, 0, TempReg, tv);
+      uLiteral(cb, sz/2);
+   }
+   uInstr2(cb, ADD,   4, TempReg, tv,    TempReg, td);
+   uInstr2(cb, PUT,   4, TempReg, td,    ArchReg, R_EDI);
+   uInstr1(cb, JMP,   0, Literal, 0);
+   uLiteral(cb, eip);
+   uCond(cb, CondNZ);
+   uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+   uInstr1(cb, JMP,   0, Literal, 0);
+   uLiteral(cb, eip_next);
+   uCond(cb, CondAlways);
+}
+
+
+/* Template for REPE MOVS<sz>.  Assumes this insn is the last one in
+   the basic block, and so emits a jump to the next insn. */
+static 
+void codegen_REPE_MOVS ( UCodeBlock* cb, Int sz, Addr eip, Addr eip_next )
+{
+   Int ts /* ESI */, tc /* ECX */, td /* EDI */, tv;
+   tc = newTemp(cb);
+   td = newTemp(cb);
+   ts = newTemp(cb);
+   tv = newTemp(cb);
+
+   uInstr2(cb, GET,   4, ArchReg, R_ECX, TempReg, tc);
+   uInstr2(cb, JIFZ,  4, TempReg, tc,    Literal, 0);
+   uLiteral(cb, eip_next);
+   uInstr1(cb, DEC,   4, TempReg, tc);
+   uInstr2(cb, PUT,   4, TempReg, tc,    ArchReg, R_ECX);
+
+   uInstr2(cb, GET,   4, ArchReg, R_EDI, TempReg, td);
+   uInstr2(cb, GET,   4, ArchReg, R_ESI, TempReg, ts);
+
+   uInstr2(cb, LOAD,  sz, TempReg, ts,    TempReg, tv);
+   uInstr2(cb, STORE, sz, TempReg, tv,    TempReg, td);
+   SMC_IF_SOME(cb);
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, MOV,   4, Literal, 0,     TempReg, tv);
+   uLiteral(cb, 0);
+   uInstr1(cb, PUSH,  4, TempReg, tv);
+
+   uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
+   uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+   uInstr1(cb, POP,   4, TempReg, tv);
+	uInstr0(cb, CALLM_E, 0);
+
+   if (sz == 4 || sz == 2) {
+      uInstr2(cb, SHL, 4, Literal, 0, TempReg, tv);
+      uLiteral(cb, sz/2);
+   }
+   uInstr2(cb, ADD,   4, TempReg, tv,    TempReg, td);
+   uInstr2(cb, ADD,   4, TempReg, tv,    TempReg, ts);
+
+   uInstr2(cb, PUT,   4, TempReg, td,    ArchReg, R_EDI);
+   uInstr2(cb, PUT,   4, TempReg, ts,    ArchReg, R_ESI);
+
+   uInstr1(cb, JMP,   0, Literal, 0);
+   uLiteral(cb, eip);
+   uCond(cb, CondAlways);
+}
+
+
+/* Template for REPE STOS<sz>.  Assumes this insn is the last one in
+   the basic block, and so emits a jump to the next insn. */
+static 
+void codegen_REPE_STOS ( UCodeBlock* cb, Int sz, Addr eip, Addr eip_next )
+{
+   Int ta /* EAX */, tc /* ECX */, td /* EDI */;
+   ta = newTemp(cb);
+   tc = newTemp(cb);
+   td = newTemp(cb);
+
+   uInstr2(cb, GET,    4, ArchReg, R_ECX, TempReg, tc);
+   uInstr2(cb, JIFZ,   4, TempReg, tc,    Literal, 0);
+   uLiteral(cb, eip_next);
+   uInstr1(cb, DEC,    4, TempReg, tc);
+   uInstr2(cb, PUT,    4, TempReg, tc,    ArchReg, R_ECX);
+
+   uInstr2(cb, GET,   sz, ArchReg, R_EAX, TempReg, ta);
+   uInstr2(cb, GET,    4, ArchReg, R_EDI, TempReg, td);
+   uInstr2(cb, STORE, sz, TempReg, ta,    TempReg, td);
+   SMC_IF_SOME(cb);
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, MOV,   4, Literal, 0,     TempReg, ta);
+   uLiteral(cb, 0);
+   uInstr1(cb, PUSH,  4, TempReg, ta);
+
+   uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
+   uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+   uInstr1(cb, POP,   4, TempReg, ta);
+	uInstr0(cb, CALLM_E, 0);
+
+   if (sz == 4 || sz == 2) {
+      uInstr2(cb, SHL, 4, Literal, 0, TempReg, ta);
+      uLiteral(cb, sz/2);
+   }
+   uInstr2(cb, ADD,   4, TempReg, ta,    TempReg, td);
+   uInstr2(cb, PUT,   4, TempReg, td,    ArchReg, R_EDI);
+
+   uInstr1(cb, JMP,   0, Literal, 0);
+   uLiteral(cb, eip);
+   uCond(cb, CondAlways);
+}
+
+
+/* Template for CMPS<sz>, _not_ preceded by a REP prefix. */
+static 
+void codegen_CMPS ( UCodeBlock* cb, Int sz )
+{
+   Int td,  /* EDI */   ts, /* ESI */
+       tdv, /* (EDI) */ tsv /* (ESI) */;
+   tdv = newTemp(cb);
+   tsv = newTemp(cb);
+   td  = newTemp(cb);
+   ts  = newTemp(cb);
+
+   uInstr2(cb, GET,   4, ArchReg, R_EDI, TempReg, td);
+   uInstr2(cb, GET,   4, ArchReg, R_ESI, TempReg, ts);
+
+   uInstr2(cb, LOAD, sz, TempReg, td,    TempReg, tdv);
+   uInstr2(cb, LOAD, sz, TempReg, ts,    TempReg, tsv);
+
+   uInstr2(cb, SUB,  sz, TempReg, tdv,   TempReg, tsv); 
+   setFlagsFromUOpcode(cb, SUB);
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, MOV,   4, Literal, 0,     TempReg, tdv);
+   uLiteral(cb, 0);
+   uInstr1(cb, PUSH,  4, TempReg, tdv);
+
+   uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
+   uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+   uInstr1(cb, POP,   4, TempReg, tdv);
+	uInstr0(cb, CALLM_E, 0);
+
+   if (sz == 4 || sz == 2) {
+      uInstr2(cb, SHL, 4, Literal, 0, TempReg, tdv);
+      uLiteral(cb, sz/2);
+   }
+   uInstr2(cb, ADD,   4, TempReg, tdv,    TempReg, td);
+   uInstr2(cb, ADD,   4, TempReg, tdv,    TempReg, ts);
+
+   uInstr2(cb, PUT,   4, TempReg, td,    ArchReg, R_EDI);
+   uInstr2(cb, PUT,   4, TempReg, ts,    ArchReg, R_ESI);
+}
+
+
+/* Template for MOVS<sz>, _not_ preceded by a REP prefix. */
+static 
+void codegen_MOVS ( UCodeBlock* cb, Int sz )
+{
+   Int tv, /* the value being copied */
+       td, /* EDI */ ts /* ESI */;
+   tv = newTemp(cb);
+   td = newTemp(cb);
+   ts = newTemp(cb);
+
+   uInstr2(cb, GET,   4, ArchReg, R_EDI, TempReg, td);
+   uInstr2(cb, GET,   4, ArchReg, R_ESI, TempReg, ts);
+
+   uInstr2(cb, LOAD,  sz, TempReg, ts,    TempReg, tv);
+   uInstr2(cb, STORE, sz, TempReg, tv,    TempReg, td);
+   SMC_IF_SOME(cb);
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, MOV,   4, Literal, 0,     TempReg, tv);
+   uLiteral(cb, 0);
+   uInstr1(cb, PUSH,  4, TempReg, tv);
+
+   uInstr1(cb, CALLM, 0, Lit16,   VGOFF_(helper_get_dirflag));
+   uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+   uInstr1(cb, POP,   4, TempReg, tv);
+	uInstr0(cb, CALLM_E, 0);
+
+   if (sz == 4 || sz == 2) {
+      uInstr2(cb, SHL, 4, Literal, 0, TempReg, tv);
+      uLiteral(cb, sz/2);
+   }
+   uInstr2(cb, ADD,   4, TempReg, tv,    TempReg, td);
+   uInstr2(cb, ADD,   4, TempReg, tv,    TempReg, ts);
+
+   uInstr2(cb, PUT,   4, TempReg, td,    ArchReg, R_EDI);
+   uInstr2(cb, PUT,   4, TempReg, ts,    ArchReg, R_ESI);
+}
+
+
+/* Template for STOS<sz>, _not_ preceded by a REP prefix. */
+static 
+void codegen_STOS ( UCodeBlock* cb, Int sz )
+{
+   Int ta /* EAX */, td /* EDI */;
+   ta = newTemp(cb);
+   td = newTemp(cb);
+
+   uInstr2(cb, GET,   sz, ArchReg, R_EAX, TempReg, ta);
+   uInstr2(cb, GET,    4, ArchReg, R_EDI, TempReg, td);
+   uInstr2(cb, STORE, sz, TempReg, ta,    TempReg, td);
+   SMC_IF_SOME(cb);
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, MOV,   4, Literal, 0,     TempReg, ta);
+   uLiteral(cb, 0);
+   uInstr1(cb, PUSH,  4, TempReg, ta);
+
+   uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
+   uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+   uInstr1(cb, POP,   4, TempReg, ta);
+	uInstr0(cb, CALLM_E, 0);
+
+   if (sz == 4 || sz == 2) {
+      uInstr2(cb, SHL, 4, Literal, 0, TempReg, ta);
+      uLiteral(cb, sz/2);
+   }
+   uInstr2(cb, ADD,   4, TempReg, ta,    TempReg, td);
+   uInstr2(cb, PUT,   4, TempReg, td,    ArchReg, R_EDI);
+}
+
+
+/* Template for LODS<sz>, _not_ preceded by a REP prefix. */
+static 
+void codegen_LODS ( UCodeBlock* cb, Int sz )
+{
+   Int ta /* EAX */, ts /* ESI */;
+   ta = newTemp(cb);
+   ts = newTemp(cb);
+
+   uInstr2(cb, GET,    4, ArchReg, R_ESI, TempReg, ts);
+   uInstr2(cb, LOAD,  sz, TempReg, ts,    TempReg, ta);
+   uInstr2(cb, PUT,   sz, TempReg, ta,    ArchReg, R_EAX);
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, MOV,   4, Literal, 0,     TempReg, ta);
+   uLiteral(cb, 0);
+   uInstr1(cb, PUSH,  4, TempReg, ta);
+
+   uInstr1(cb, CALLM, 0, Lit16,   VGOFF_(helper_get_dirflag));
+   uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+   uInstr1(cb, POP,   4, TempReg, ta);
+	uInstr0(cb, CALLM_E, 0);
+
+   if (sz == 4 || sz == 2) {
+      uInstr2(cb, SHL, 4, Literal, 0, TempReg, ta);
+      uLiteral(cb, sz/2);
+   }
+   uInstr2(cb, ADD,   4, TempReg, ta,    TempReg, ts);
+   uInstr2(cb, PUT,   4, TempReg, ts,    ArchReg, R_ESI);
+}
+
+
+/* Template for REPNE SCAS<sz>, _not_ preceded by a REP prefix. */
+static 
+void codegen_SCAS ( UCodeBlock* cb, Int sz )
+{
+   Int ta /* EAX */, td /* EDI */, tv;
+   ta = newTemp(cb);
+   tv = newTemp(cb);
+   td = newTemp(cb);
+
+   uInstr2(cb, GET,  sz, ArchReg, R_EAX, TempReg, ta);
+   uInstr2(cb, GET,   4, ArchReg, R_EDI, TempReg, td);
+   uInstr2(cb, LOAD, sz, TempReg, td,    TempReg, tv);
+   /* next uinstr kills ta, but that's ok -- don't need it again */
+   uInstr2(cb, SUB,  sz, TempReg, tv,    TempReg, ta);
+   setFlagsFromUOpcode(cb, SUB);
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, MOV,   4, Literal, 0,     TempReg, tv);
+   uLiteral(cb, 0);
+   uInstr1(cb, PUSH,  4, TempReg, tv);
+
+   uInstr1(cb, CALLM, 0, Lit16,   VGOFF_(helper_get_dirflag));
+   uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+   uInstr1(cb, POP,   4, TempReg, tv);
+	uInstr0(cb, CALLM_E, 0);
+
+   if (sz == 4 || sz == 2) {
+      uInstr2(cb, SHL, 4, Literal, 0, TempReg, tv);
+      uLiteral(cb, sz/2);
+   }
+   uInstr2(cb, ADD,   4, TempReg, tv,    TempReg, td);
+   uInstr2(cb, PUT,   4, TempReg, td,    ArchReg, R_EDI);
+}
+
+
+/* (I)MUL E, G.  Supplied eip points to the modR/M byte. */
+static
+Addr dis_mul_E_G ( UCodeBlock* cb, 
+                   Int         size, 
+                   Addr        eip0,
+                   Bool        signed_multiply )
+{
+   Int ta, tg, te, helper;
+   UChar dis_buf[50];
+   UChar rm = getUChar(eip0);
+   ta = INVALID_TEMPREG;
+   te = newTemp(cb);
+   tg = newTemp(cb);
+
+   switch (size) {
+      case 4: helper = signed_multiply ? VGOFF_(helper_imul_32_64) 
+                                       : VGOFF_(helper_mul_32_64);
+              break;
+      case 2: helper = signed_multiply ? VGOFF_(helper_imul_16_32) 
+                                       : VGOFF_(helper_mul_16_32);
+              break;
+      case 1: helper = signed_multiply ? VGOFF_(helper_imul_8_16)
+                                       : VGOFF_(helper_mul_8_16);
+              break;
+      default: VG_(panic)("dis_mul_E_G");
+   }
+
+   uInstr0(cb, CALLM_S, 0);
+   if (epartIsReg(rm)) {
+      uInstr2(cb, GET,   size, ArchReg, eregOfRM(rm), TempReg, te);
+      uInstr2(cb, GET,   size, ArchReg, gregOfRM(rm), TempReg, tg);
+      uInstr1(cb, PUSH,  size, TempReg, te);
+      uInstr1(cb, PUSH,  size, TempReg, tg);
+      uInstr1(cb, CALLM, 0,    Lit16,   helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagsOC, FlagsSZAP);
+      uInstr1(cb, CLEAR, 0,    Lit16,   4);
+      uInstr1(cb, POP,   size, TempReg, tg);
+      uInstr2(cb, PUT,   size, TempReg, tg,   ArchReg, gregOfRM(rm));
+      uInstr0(cb, CALLM_E, 0);
+      if (dis) VG_(printf)("%smul%c %s, %s\n",
+                           signed_multiply ? "i" : "",
+                           nameISize(size), 
+                           nameIReg(size,eregOfRM(rm)),
+                           nameIReg(size,gregOfRM(rm)));
+      return 1+eip0;
+   } else {
+      UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL);
+      ta = LOW24(pair);
+      uInstr2(cb, LOAD,  size, TempReg, ta, TempReg, te);
+      uInstr2(cb, GET,   size, ArchReg, gregOfRM(rm), TempReg, tg);
+      uInstr1(cb, PUSH,  size, TempReg, te);
+      uInstr1(cb, PUSH,  size, TempReg, tg);
+      uInstr1(cb, CALLM, 0,    Lit16, helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagsOC, FlagsSZAP);
+      uInstr1(cb, CLEAR, 0,    Lit16,   4);
+      uInstr1(cb, POP,   size, TempReg, tg);
+      uInstr2(cb, PUT,   size, TempReg, tg,   ArchReg, gregOfRM(rm));
+      uInstr0(cb, CALLM_E, 0);
+      if (dis) VG_(printf)("%smul%c %s, %s\n",
+                           signed_multiply ? "i" : "",
+                           nameISize(size), 
+                           dis_buf,nameIReg(size,gregOfRM(rm)));
+      return HI8(pair)+eip0;
+   }
+}
+
+
+/* IMUL I * E -> G.  Supplied eip points to the modR/M byte. */
+static
+Addr dis_imul_I_E_G ( UCodeBlock* cb, 
+                      Int         size, 
+                      Addr        eip,
+                      Int         litsize )
+{
+   Int ta, te, tl, helper, d32;
+   UChar dis_buf[50];
+   UChar rm = getUChar(eip);
+   ta = INVALID_TEMPREG;
+   te = newTemp(cb);
+   tl = newTemp(cb);
+
+   switch (size) {
+      case 4: helper = VGOFF_(helper_imul_32_64); break;
+      case 2: helper = VGOFF_(helper_imul_16_32); break;
+      case 1: helper = VGOFF_(helper_imul_8_16); break;
+      default: VG_(panic)("dis_imul_I_E_G");
+   }
+
+   uInstr0(cb, CALLM_S, 0);
+   if (epartIsReg(rm)) {
+      uInstr2(cb, GET,   size, ArchReg, eregOfRM(rm), TempReg, te);
+      uInstr1(cb, PUSH,  size, TempReg, te);
+      eip++;
+   } else {
+      UInt pair = disAMode ( cb, eip, dis?dis_buf:NULL);
+      ta = LOW24(pair);
+      uInstr2(cb, LOAD,  size, TempReg, ta, TempReg, te);
+      uInstr1(cb, PUSH,  size, TempReg, te);
+      eip += HI8(pair);
+   }
+
+   d32 = getSDisp(litsize,eip);
+   eip += litsize;
+
+   uInstr2(cb, MOV,   size, Literal, 0,   TempReg, tl);
+   uLiteral(cb, d32);
+   uInstr1(cb, PUSH,  size, TempReg, tl);
+   uInstr1(cb, CALLM, 0,    Lit16, helper);
+   uFlagsRWU(cb, FlagsEmpty, FlagsOC, FlagsSZAP);
+   uInstr1(cb, CLEAR, 0,    Lit16,   4);
+   uInstr1(cb, POP,   size, TempReg, te);
+   uInstr2(cb, PUT,   size, TempReg, te,   ArchReg, gregOfRM(rm));
+   uInstr0(cb, CALLM_E, 0);
+
+   if (dis) {
+      if (epartIsReg(rm)) {
+         VG_(printf)("imul %d, %s, %s\n", d32, nameIReg(size,eregOfRM(rm)),
+                                          nameIReg(size,gregOfRM(rm)));
+      } else {
+         VG_(printf)("imul %d, %s, %s\n", d32, dis_buf,
+                                          nameIReg(size,gregOfRM(rm)));
+      }
+   }
+
+   return eip;
+}   
+
+
+/* Handle FPU insns which read/write memory.  On entry, eip points to
+   the second byte of the insn (the one following D8 .. DF). */
+static 
+Addr dis_fpu_mem ( UCodeBlock* cb, Int size, Bool is_write, 
+                   Addr eip, UChar first_byte )
+{
+   Int   ta;
+   UInt  pair;
+   UChar dis_buf[50];
+   UChar second_byte = getUChar(eip);
+   vg_assert(second_byte < 0xC0);
+   second_byte &= 0x38;
+   pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+   ta   = LOW24(pair);
+   eip  += HI8(pair);
+   uInstr2(cb, is_write ? FPU_W : FPU_R, size,
+               Lit16, 
+               (((UShort)first_byte) << 8) | ((UShort)second_byte),
+               TempReg, ta);
+   if (is_write) SMC_IF_ALL(cb);
+   if (dis) {
+      if (is_write)
+         VG_(printf)("fpu_w_%d 0x%x:0x%x, %s\n",
+                     size, (UInt)first_byte, 
+                           (UInt)second_byte, dis_buf );
+      else
+         VG_(printf)("fpu_r_%d %s, 0x%x:0x%x\n",
+                     size, dis_buf,
+                     (UInt)first_byte, 
+                     (UInt)second_byte );
+   }
+   return eip;
+}
+
+
+/* Handle FPU insns which don't reference memory.  On entry, eip points to
+   the second byte of the insn (the one following D8 .. DF). */
+static 
+Addr dis_fpu_no_mem ( UCodeBlock* cb, Addr eip, UChar first_byte )
+{
+   UChar second_byte = getUChar(eip); eip++;
+   vg_assert(second_byte >= 0xC0);
+   uInstr1(cb, FPU, 0,
+               Lit16,
+               (((UShort)first_byte) << 8) | ((UShort)second_byte)
+          );
+   if (dis) VG_(printf)("fpu 0x%x:0x%x\n",
+                        (UInt)first_byte, (UInt)second_byte );
+   return eip;
+}
+
+
+/* Top-level handler for all FPU insns.  On entry, eip points to the
+   second byte of the insn. */
+static
+Addr dis_fpu ( UCodeBlock* cb, UChar first_byte, Addr eip )
+{
+   const Bool rd = False; 
+   const Bool wr = True;
+   UChar second_byte = getUChar(eip);
+
+   /* Handle FSTSW %ax specially. */
+   if (first_byte == 0xDF && second_byte == 0xE0) {
+      Int t1 = newTemp(cb);
+      uInstr0(cb, CALLM_S, 0);
+      uInstr2(cb, MOV,   4, Literal, 0,  TempReg, t1);
+      uLiteral(cb, 0);
+      uInstr1(cb, PUSH,  4, TempReg, t1);
+      uInstr1(cb, CALLM, 0, Lit16,   VGOFF_(helper_fstsw_AX) );
+      uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
+      uInstr1(cb, POP,   2,  TempReg, t1);
+      uInstr2(cb, PUT,   2,  TempReg, t1, ArchReg, R_EAX);
+      uInstr0(cb, CALLM_E, 0);
+      if (dis) VG_(printf)("fstsw %%ax\n");
+      eip++;
+      return eip;
+   }
+
+   /* Handle all non-memory FPU ops simply. */
+   if (second_byte >= 0xC0)
+      return dis_fpu_no_mem ( cb, eip, first_byte );
+
+   /* The insn references memory; need to determine 
+      whether it reads or writes, and at what size. */
+   switch (first_byte) {
+
+      case 0xD8:
+         switch ((second_byte >> 3) & 7) {
+            case 0: /* FADDs */
+            case 1: /* FMULs */
+            case 2: /* FCOMs */
+            case 3: /* FCOMPs */
+            case 4: /* FSUBs */
+            case 5: /* FSUBRs */
+            case 6: /* FDIVs */
+            case 7: /* FDIVRs */
+               return dis_fpu_mem(cb, 4, rd, eip, first_byte); 
+            default: 
+               goto unhandled;
+         }
+         break;
+
+      case 0xD9:
+         switch ((second_byte >> 3) & 7) {
+            case 0: /* FLDs */
+               return dis_fpu_mem(cb, 4, rd, eip, first_byte); 
+            case 2: /* FSTs */
+            case 3: /* FSTPs */
+               return dis_fpu_mem(cb, 4, wr, eip, first_byte); 
+            case 5: /* FLDCW */
+               return dis_fpu_mem(cb, 2, rd, eip, first_byte); 
+            case 7: /* FSTCW */
+               /* HACK!  FSTCW actually writes 2 bytes, not 4.  glibc
+                  gets lots of moaning in __floor() if we do the right
+                  thing here. */
+               /* Later ... hack disabled .. we do do the Right Thing. */
+               return dis_fpu_mem(cb, /*4*/ 2, wr, eip, first_byte); 
+            default: 
+               goto unhandled;
+         }
+         break;
+
+      case 0xDA:
+         switch ((second_byte >> 3) & 7) {
+            case 0: /* FIADD */
+            case 1: /* FIMUL */
+            case 2: /* FICOM */
+            case 3: /* FICOMP */
+            case 4: /* FISUB */
+            case 5: /* FISUBR */
+            case 6: /* FIDIV */
+            case 7: /* FIDIVR */
+               return dis_fpu_mem(cb, 4, rd, eip, first_byte); 
+            default: 
+               goto unhandled;
+         }
+         break;
+
+      case 0xDB:
+         switch ((second_byte >> 3) & 7) {
+            case 0: /* FILD dword-integer */
+               return dis_fpu_mem(cb, 4, rd, eip, first_byte); 
+            case 2: /* FIST dword-integer */
+               return dis_fpu_mem(cb, 4, wr, eip, first_byte); 
+            case 3: /* FISTPl */
+               return dis_fpu_mem(cb, 4, wr, eip, first_byte); 
+            case 5: /* FLD extended-real */
+               return dis_fpu_mem(cb, 10, rd, eip, first_byte); 
+            case 7: /* FSTP extended-real */
+               return dis_fpu_mem(cb, 10, wr, eip, first_byte); 
+            default: 
+               goto unhandled;
+         }
+         break;
+
+      case 0xDC:
+         switch ((second_byte >> 3) & 7) {
+            case 0: /* FADD double-real */
+            case 1: /* FMUL double-real */
+            case 2: /* FCOM double-real */
+            case 3: /* FCOMP double-real */
+            case 4: /* FSUB double-real */
+            case 5: /* FSUBR double-real */
+            case 6: /* FDIV double-real */
+            case 7: /* FDIVR double-real */
+               return dis_fpu_mem(cb, 8, rd, eip, first_byte); 
+            default: 
+               goto unhandled;
+         }
+         break;
+
+      case 0xDD:
+         switch ((second_byte >> 3) & 7) {
+            case 0: /* FLD double-real */
+               return dis_fpu_mem(cb, 8, rd, eip, first_byte); 
+            case 2: /* FST double-real */
+            case 3: /* FSTP double-real */
+               return dis_fpu_mem(cb, 8, wr, eip, first_byte); 
+            default: 
+               goto unhandled;
+         }
+         break;
+
+      case 0xDF:
+         switch ((second_byte >> 3) & 7) {
+            case 0: /* FILD word-integer */
+               return dis_fpu_mem(cb, 2, rd, eip, first_byte); 
+            case 2: /* FIST word-integer */
+               return dis_fpu_mem(cb, 2, wr, eip, first_byte); 
+            case 3: /* FISTP word-integer */
+               return dis_fpu_mem(cb, 2, wr, eip, first_byte); 
+            case 5: /* FILD qword-integer */
+               return dis_fpu_mem(cb, 8, rd, eip, first_byte); 
+            case 7: /* FISTP qword-integer */
+               return dis_fpu_mem(cb, 8, wr, eip, first_byte); 
+            default: 
+               goto unhandled;
+         }
+         break;
+
+      default: goto unhandled;
+   }
+
+  unhandled: 
+   VG_(printf)("dis_fpu: unhandled memory case 0x%2x:0x%2x(%d)\n",
+               (UInt)first_byte, (UInt)second_byte, 
+               (UInt)((second_byte >> 3) & 7) );
+   VG_(panic)("dis_fpu: unhandled opcodes");
+}
+
+
+/* Double length left shifts.  Apparently only required in v-size (no
+   b- variant). */
+static
+Addr dis_SHLRD_Gv_Ev ( UCodeBlock* cb, Addr eip, UChar modrm,
+                       Int sz, 
+                       Tag amt_tag, UInt amt_val,
+                       Bool left_shift )
+{
+   /* amt_tag and amt_val denote either ArchReg(%CL) or a Literal.
+      And eip on entry points at the modrm byte. */
+   Int   t, t1, t2, ta, helper;
+   UInt  pair;
+   UChar dis_buf[50];
+
+   vg_assert(sz == 2 || sz == 4);
+
+   helper = left_shift 
+               ? (sz==4 ? VGOFF_(helper_shldl) 
+                        : VGOFF_(helper_shldw))
+               : (sz==4 ? VGOFF_(helper_shrdl) 
+                        : VGOFF_(helper_shrdw));
+
+   /* Get the amount to be shifted by onto the stack. */
+   t = newTemp(cb);
+   t1 = newTemp(cb);
+   t2 = newTemp(cb);
+   if (amt_tag == ArchReg) {
+      vg_assert(amt_val == R_CL);
+      uInstr2(cb, GET, 1, ArchReg, amt_val, TempReg, t);
+   } else {
+      uInstr2(cb, MOV, 1, Literal, 0, TempReg, t);
+      uLiteral(cb, amt_val);
+   }
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr1(cb, PUSH, 1, TempReg, t);
+
+   /* The E-part is the destination; this is shifted.  The G-part
+      supplies bits to be shifted into the E-part, but is not
+      changed. */
+
+   uInstr2(cb, GET,  sz, ArchReg, gregOfRM(modrm), TempReg, t1);
+   uInstr1(cb, PUSH, sz, TempReg, t1);
+
+   if (epartIsReg(modrm)) {
+      eip++;
+      uInstr2(cb, GET,   sz, ArchReg, eregOfRM(modrm), TempReg, t2);
+      uInstr1(cb, PUSH,  sz, TempReg, t2);
+      uInstr1(cb, CALLM, 0,  Lit16,   helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagsOSZACP, FlagsEmpty);
+      uInstr1(cb, POP,   sz, TempReg, t);
+      uInstr2(cb, PUT,   sz, TempReg, t, ArchReg, eregOfRM(modrm));
+      if (dis)
+         VG_(printf)("shld%c %%cl, %s, %s\n",
+                     nameISize(sz), nameIReg(sz, gregOfRM(modrm)), 
+                     nameIReg(sz, eregOfRM(modrm)));
+   } else {
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+      ta   = LOW24(pair);
+      eip  += HI8(pair);
+      uInstr2(cb, LOAD,  sz, TempReg, ta,     TempReg, t2);
+      uInstr1(cb, PUSH,  sz, TempReg, t2);
+      uInstr1(cb, CALLM, 0,  Lit16,   helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagsOSZACP, FlagsEmpty);
+      uInstr1(cb, POP,   sz, TempReg, t);
+      uInstr2(cb, STORE, sz, TempReg, t,      TempReg, ta);
+      SMC_IF_ALL(cb);
+      if (dis)
+         VG_(printf)("shld%c %%cl, %s, %s\n",
+                     nameISize(sz), nameIReg(sz, gregOfRM(modrm)), 
+                     dis_buf);
+   }
+  
+   if (amt_tag == Literal) eip++;
+   uInstr1(cb, CLEAR, 0, Lit16, 8);
+
+   uInstr0(cb, CALLM_E, 0);
+   return eip;
+}
+
+
+/* Handle BT/BTS/BTR/BTC Gv, Ev.  Apparently b-size is not
+   required. */
+
+typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
+
+static Char* nameBtOp ( BtOp op )
+{
+   switch (op) {
+      case BtOpNone:  return "";
+      case BtOpSet:   return "s";
+      case BtOpReset: return "r";
+      case BtOpComp:  return "c";
+      default: VG_(panic)("nameBtOp");
+   }
+}
+
+static
+Addr dis_bt_G_E ( UCodeBlock* cb, Int sz, Addr eip, BtOp op )
+{
+   Int   t, t2, ta, helper;
+   UInt  pair;
+   UChar dis_buf[50];
+   UChar modrm;
+
+   vg_assert(sz == 2 || sz == 4);
+   vg_assert(sz == 4);
+   switch (op) {
+      case BtOpNone:  helper = VGOFF_(helper_bt); break;
+      case BtOpSet:   helper = VGOFF_(helper_bts); break;
+      case BtOpReset: helper = VGOFF_(helper_btr); break;
+      case BtOpComp:  helper = VGOFF_(helper_btc); break;
+      default: VG_(panic)("dis_bt_G_E");
+   }
+
+   modrm  = getUChar(eip);
+
+   t = newTemp(cb);
+   t2 = newTemp(cb);
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, GET,  sz, ArchReg, gregOfRM(modrm), TempReg, t);
+   uInstr1(cb, PUSH, sz, TempReg, t);
+
+   if (epartIsReg(modrm)) {
+      eip++;
+      uInstr2(cb, GET,   sz, ArchReg, eregOfRM(modrm), TempReg, t2);
+      uInstr1(cb, PUSH,  sz, TempReg, t2);
+      uInstr1(cb, CALLM, 0,  Lit16, helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagC, FlagsOSZAP);
+      uInstr1(cb, POP,   sz, TempReg, t);
+      uInstr2(cb, PUT,   sz, TempReg, t, ArchReg, eregOfRM(modrm));
+      if (dis)
+         VG_(printf)("bt%s%c %s, %s\n",
+                     nameBtOp(op),
+                     nameISize(sz), nameIReg(sz, gregOfRM(modrm)), 
+                     nameIReg(sz, eregOfRM(modrm)));
+   } else {
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+      ta   = LOW24(pair);
+      eip  += HI8(pair);
+      uInstr2(cb, LOAD,  sz, TempReg, ta,     TempReg, t2);
+      uInstr1(cb, PUSH,  sz, TempReg, t2);
+      uInstr1(cb, CALLM, 0,  Lit16, helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagC, FlagsOSZAP);
+      uInstr1(cb, POP,   sz, TempReg, t);
+      uInstr2(cb, STORE, sz, TempReg, t,      TempReg, ta);
+      SMC_IF_ALL(cb);
+      if (dis)
+         VG_(printf)("bt%s%c %s, %s\n",
+                     nameBtOp(op),
+                     nameISize(sz), nameIReg(sz, gregOfRM(modrm)), 
+                     dis_buf);
+   }
+  
+   uInstr1(cb, CLEAR, 0, Lit16, 4);
+   uInstr0(cb, CALLM_E, 0);
+
+   return eip;
+}
+
+
+/* Handle BSF/BSR.  Only v-size seems necessary. */
+static
+Addr dis_bs_E_G ( UCodeBlock* cb, Int sz, Addr eip, Bool fwds )
+{
+   Int   t, ta, helper;
+   UInt  pair;
+   UChar dis_buf[50];
+   UChar modrm;
+
+   vg_assert(sz == 2 || sz == 4);
+   vg_assert(sz==4);
+
+   helper = fwds ? VGOFF_(helper_bsf) : VGOFF_(helper_bsr);
+   modrm  = getUChar(eip);
+   t      = newTemp(cb);
+
+   if (epartIsReg(modrm)) {
+      eip++;
+      uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t);
+      if (dis)
+         VG_(printf)("bs%c%c %s, %s\n",
+                     fwds ? 'f' : 'r',
+                     nameISize(sz), nameIReg(sz, eregOfRM(modrm)), 
+                     nameIReg(sz, gregOfRM(modrm)));
+   } else {
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+      ta   = LOW24(pair);
+      eip  += HI8(pair);
+      uInstr2(cb, LOAD, sz, TempReg, ta, TempReg, t);
+      if (dis)
+         VG_(printf)("bs%c%c %s, %s\n",
+                     fwds ? 'f' : 'r',
+                     nameISize(sz), dis_buf,
+                     nameIReg(sz, gregOfRM(modrm)));
+   }
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr1(cb, PUSH,  sz,  TempReg, t);
+   uInstr1(cb, CALLM, 0,   Lit16, helper);
+   uFlagsRWU(cb, FlagsEmpty, FlagZ, FlagsOSACP);
+   uInstr1(cb, POP,   sz,  TempReg, t);
+   uInstr2(cb, PUT,   sz,  TempReg, t, ArchReg, gregOfRM(modrm));
+   uInstr0(cb, CALLM_E, 0);
+
+   return eip;
+}
+
+
+static 
+void codegen_xchg_eAX_Reg ( UCodeBlock* cb, Int sz, Int reg )
+{
+   Int t1, t2;
+   vg_assert(sz == 2 || sz == 4);
+   t1 = newTemp(cb);
+   t2 = newTemp(cb);
+   uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, t1);
+   uInstr2(cb, GET, sz, ArchReg, reg,   TempReg, t2);
+   uInstr2(cb, PUT, sz, TempReg, t2,    ArchReg, R_EAX);
+   uInstr2(cb, PUT, sz, TempReg, t1,    ArchReg, reg);
+   if (dis)
+      VG_(printf)("xchg%c %s, %s\n", nameISize(sz),
+                  nameIReg(sz, R_EAX), nameIReg(sz, reg));
+}
+
+
+static 
+void codegen_SAHF ( UCodeBlock* cb )
+{
+   Int t  = newTemp(cb);
+   uInstr2(cb, GET,   4, ArchReg, R_EAX, TempReg, t);
+   uInstr0(cb, CALLM_S, 0);
+   uInstr1(cb, PUSH,  4, TempReg, t);
+   uInstr1(cb, CALLM, 0, Lit16,   VGOFF_(helper_SAHF));
+   uFlagsRWU(cb, FlagsEmpty, FlagsSZACP, FlagsEmpty);
+   uInstr1(cb, CLEAR, 0, Lit16, 4);
+   uInstr0(cb, CALLM_E, 0);
+}
+
+
+static
+Addr dis_cmpxchg_G_E ( UCodeBlock* cb, 
+                       Int         size, 
+                       Addr        eip0 )
+{
+   Int   ta, junk, dest, src, acc;
+   UChar dis_buf[50];
+   UChar rm;
+
+   rm   = getUChar(eip0);
+   acc  = newTemp(cb);
+   src  = newTemp(cb);
+   dest = newTemp(cb);
+   junk = newTemp(cb);
+   /* Only needed to get gcc's dataflow analyser off my back. */
+   ta   = INVALID_TEMPREG;
+
+   if (epartIsReg(rm)) {
+     uInstr2(cb, GET, size, ArchReg, eregOfRM(rm), TempReg, dest);
+     eip0++;
+     if (dis) VG_(printf)("cmpxchg%c %s,%s\n", 
+                          nameISize(size),
+                          nameIReg(size,gregOfRM(rm)),
+                          nameIReg(size,eregOfRM(rm)) );
+     nameIReg(size,eregOfRM(rm));
+   } else {
+      UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL );
+      ta        = LOW24(pair);
+      uInstr2(cb, LOAD, size, TempReg, ta, TempReg, dest);
+      eip0 += HI8(pair);
+      if (dis) VG_(printf)("cmpxchg%c %s,%s\n",  nameISize(size), 
+                           nameIReg(size,gregOfRM(rm)), dis_buf);
+   }
+
+   uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, src);
+   uInstr2(cb, GET, size, ArchReg, R_EAX,        TempReg, acc);
+   uInstr2(cb, MOV, size, TempReg, acc,          TempReg, junk);
+   uInstr2(cb, SUB, size, TempReg, dest,         TempReg, junk);
+   setFlagsFromUOpcode(cb, SUB);
+
+   uInstr2(cb, CMOV, 4, TempReg, src,  TempReg, dest);
+   uCond(cb, CondZ);
+   uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+   uInstr2(cb, CMOV, 4, TempReg, dest, TempReg, acc);
+   uCond(cb, CondNZ);
+   uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+
+   uInstr2(cb, PUT, size, TempReg, acc, ArchReg, R_EAX);
+   if (epartIsReg(rm)) {
+     uInstr2(cb, PUT,   size, TempReg, dest, ArchReg, eregOfRM(rm));
+   } else {
+     uInstr2(cb, STORE, size, TempReg, dest, TempReg, ta);
+   }
+
+   return eip0;
+}
+
+
+/* Handle conditional move instructions of the form
+      cmovcc E(reg-or-mem), G(reg)
+
+   E(src) is reg-or-mem
+   G(dst) is reg.
+
+   If E is reg, -->    GET %E, tmps
+                       GET %G, tmpd
+                       CMOVcc tmps, tmpd
+                       PUT tmpd, %G
+ 
+   If E is mem  -->    (getAddr E) -> tmpa
+                       LD (tmpa), tmps
+                       GET %G, tmpd
+                       CMOVcc tmps, tmpd
+                       PUT tmpd, %G
+*/
+static
+Addr dis_cmov_E_G ( UCodeBlock* cb, 
+                    Int         size, 
+                    Condcode    cond,
+                    Addr        eip0 )
+{
+   UChar rm  = getUChar(eip0);
+   UChar dis_buf[50];
+
+   Int tmps = newTemp(cb);
+   Int tmpd = newTemp(cb);   
+
+   if (epartIsReg(rm)) {
+      uInstr2(cb, GET,  size, ArchReg, eregOfRM(rm), TempReg, tmps);
+      uInstr2(cb, GET,  size, ArchReg, gregOfRM(rm), TempReg, tmpd);
+      uInstr2(cb, CMOV,    4, TempReg, tmps, TempReg, tmpd);
+      uCond(cb, cond);
+      uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+      uInstr2(cb, PUT, size, TempReg, tmpd, ArchReg, gregOfRM(rm));
+      if (dis) VG_(printf)("cmov%c%s %s,%s\n", 
+                           nameISize(size), 
+                           VG_(nameCondcode)(cond),
+                           nameIReg(size,eregOfRM(rm)),
+                           nameIReg(size,gregOfRM(rm)));
+      return 1+eip0;
+   }
+
+   /* E refers to memory */    
+   {
+      UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL);
+      Int  tmpa = LOW24(pair);
+      uInstr2(cb, LOAD, size, TempReg, tmpa, TempReg, tmps);
+      uInstr2(cb, GET,  size, ArchReg, gregOfRM(rm), TempReg, tmpd);
+      uInstr2(cb, CMOV,    4, TempReg, tmps, TempReg, tmpd);
+      uCond(cb, cond);
+      uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+      uInstr2(cb, PUT, size, TempReg, tmpd, ArchReg, gregOfRM(rm));
+      if (dis) VG_(printf)("cmov%c%s %s,%s\n", 
+                           nameISize(size), 
+                           VG_(nameCondcode)(cond),
+                           dis_buf,
+                           nameIReg(size,gregOfRM(rm)));
+      return HI8(pair)+eip0;
+   }
+}
+
+
+static
+Addr dis_xadd_G_E ( UCodeBlock* cb, 
+                    Int         sz, 
+                    Addr        eip0 )
+{
+   UChar rm  = getUChar(eip0);
+   UChar dis_buf[50];
+
+   Int tmpd = newTemp(cb);   
+   Int tmpt = newTemp(cb);
+
+   if (epartIsReg(rm)) {
+      uInstr2(cb, GET, sz, ArchReg, eregOfRM(rm), TempReg, tmpd);
+      uInstr2(cb, GET, sz, ArchReg, gregOfRM(rm), TempReg, tmpt);
+      uInstr2(cb, ADD, sz, TempReg, tmpd, TempReg, tmpt);
+      setFlagsFromUOpcode(cb, ADD);
+      uInstr2(cb, PUT, sz, TempReg, tmpt, ArchReg, eregOfRM(rm));
+      uInstr2(cb, PUT, sz, TempReg, tmpd, ArchReg, gregOfRM(rm));
+      if (dis)
+         VG_(printf)("xadd%c %s, %s\n", nameISize(sz), 
+                     nameIReg(sz,gregOfRM(rm)), 
+                     nameIReg(sz,eregOfRM(rm)));
+      return 1+eip0;
+   } else {
+      UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL);
+      Int  tmpa  = LOW24(pair);
+      uInstr2(cb, LOAD, sz, TempReg, tmpa,          TempReg, tmpd);
+      uInstr2(cb, GET,  sz, ArchReg, gregOfRM(rm),  TempReg, tmpt);
+      uInstr2(cb,  ADD, sz, TempReg, tmpd, TempReg, tmpt);
+      setFlagsFromUOpcode(cb, ADD);
+      uInstr2(cb, STORE, sz, TempReg, tmpt, TempReg, tmpa);
+      SMC_IF_SOME(cb);
+      uInstr2(cb, PUT, sz, TempReg, tmpd, ArchReg, gregOfRM(rm));
+      if (dis)
+         VG_(printf)("xadd%c %s, %s\n", nameISize(sz), 
+                     nameIReg(sz,gregOfRM(rm)), 
+                     dis_buf);
+      return HI8(pair)+eip0;
+   }
+}
+
+
+/* Push %ECX, %EBX and %EAX, call helper_do_client_request, and put
+   the resulting %EAX value back. */
+static 
+void dis_ClientRequest ( UCodeBlock* cb )
+{
+   Int tmpc = newTemp(cb);
+   Int tmpb = newTemp(cb);
+   Int tmpa = newTemp(cb);
+   uInstr2(cb, GET,  4, ArchReg, R_ECX, TempReg, tmpc);
+   uInstr2(cb, GET,  4, ArchReg, R_EBX, TempReg, tmpb);
+   uInstr2(cb, GET,  4, ArchReg, R_EAX, TempReg, tmpa);
+   uInstr0(cb, CALLM_S, 0);
+   uInstr1(cb, PUSH, 4, TempReg, tmpc);
+   uInstr1(cb, PUSH, 4, TempReg, tmpb);
+   uInstr1(cb, PUSH, 4, TempReg, tmpa);
+   uInstr1(cb, CALLM, 0, Lit16,   VGOFF_(helper_do_client_request));
+   uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
+   uInstr1(cb, POP, 4, TempReg, tmpa);
+   uInstr1(cb, CLEAR, 0, Lit16, 8);
+   uInstr0(cb, CALLM_E, 0);
+   uInstr2(cb, PUT, 4, TempReg, tmpa, ArchReg, R_EAX);
+   if (dis) 
+      VG_(printf)("%%eax = client_request ( %%eax, %%ebx, %%ecx )\n");
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Disassembling entire basic blocks                    ---*/
+/*------------------------------------------------------------*/
+
+/* Disassemble a single instruction into ucode, returning the update
+   eip, and setting *isEnd to True if this is the last insn in a basic
+   block.  Also do debug printing if (dis). */
+
+static Addr disInstr ( UCodeBlock* cb, Addr eip, Bool* isEnd )
+{
+   UChar opc, modrm, abyte;
+   UInt  d32, pair;
+   Int   t1, t2, t3, t4;
+   UChar dis_buf[50];
+   Int   am_sz, d_sz;
+
+   Int   sz           = 4;
+   Int   first_uinstr = cb->used;
+   *isEnd = False;
+   t1 = t2 = t3 = t4 = INVALID_TEMPREG;
+
+   if (dis) VG_(printf)("\t0x%x:  ", eip);
+
+   /* Spot the client-request magic sequence, if required. */
+   if (VG_(clo_client_perms)) {
+      UChar* myeip = (UChar*)eip;
+      /* Spot this:
+         C1C01D                roll $29, %eax
+         C1C003                roll $3,  %eax
+         C1C01B                roll $27, %eax
+         C1C005                roll $5,  %eax
+      */
+      if (myeip[0] == 0xC1 && myeip[1] == 0xC0 && myeip[2] == 0x1D &&
+          myeip[3] == 0xC1 && myeip[4] == 0xC0 && myeip[5] == 0x03 &&
+          myeip[6] == 0xC1 && myeip[7] == 0xC0 && myeip[8] == 0x1B &&
+          myeip[9] == 0xC1 && myeip[10] == 0xC0 && myeip[11] == 0x05) {
+         vg_assert(VG_(clo_instrument));
+         dis_ClientRequest(cb);
+         eip += 12;
+         return eip;
+      }
+   }
+
+   /* Skip a LOCK prefix. */
+   if (getUChar(eip) == 0xF0) eip++;
+
+   /* Crap out if we see a segment override prefix. */
+   if (getUChar(eip) == 0x65) {
+      VG_(message)(Vg_DebugMsg, "");
+      VG_(message)(Vg_DebugMsg, "Possible workaround for the following abort: do not use special");
+      VG_(message)(Vg_DebugMsg, "PII/PIII-specific pthreads library (possibly in /lib/i686/*.so).");
+      VG_(message)(Vg_DebugMsg, "You might be able to kludge around this by renaming /lib/i686 to");
+      VG_(message)(Vg_DebugMsg, "/lib/i686-HIDDEN.  On RedHat 7.2 this causes ld.so to fall back");
+      VG_(message)(Vg_DebugMsg, "to using the less specialised versions in /lib instead, which");
+      VG_(message)(Vg_DebugMsg, "valgrind might be able to better deal with.");
+      VG_(message)(Vg_DebugMsg, "");
+      VG_(message)(Vg_DebugMsg, "WARNING. WARNING. WARNING. WARNING. WARNING. WARNING. WARNING.");
+      VG_(message)(Vg_DebugMsg, "WARNING: The suggested kludge may also render your system unbootable");
+      VG_(message)(Vg_DebugMsg, "WARNING: or otherwise totally screw it up.  Only try this if you");
+      VG_(message)(Vg_DebugMsg, "WARNING: know what you are doing, and are prepared to take risks.");
+      VG_(message)(Vg_DebugMsg, "YOU HAVE BEEN WARNED. YOU HAVE BEEN WARNED. YOU HAVE BEEN WARNED.");
+      VG_(message)(Vg_DebugMsg, "");
+      VG_(message)(Vg_DebugMsg, "Another consideration is that this may well mean your application");
+      VG_(message)(Vg_DebugMsg, "uses threads, which valgrind doesn't currently support, so even if");
+      VG_(message)(Vg_DebugMsg, "you work around this problem, valgrind may abort later if it sees");
+      VG_(message)(Vg_DebugMsg, "a clone() system call.");
+      VG_(unimplemented)("x86 segment override (SEG=GS) prefix; see above for details");
+   }
+
+   /* Detect operand-size overrides. */
+   if (getUChar(eip) == 0x66) { sz = 2; eip++; };
+
+   opc = getUChar(eip); eip++;
+
+   switch (opc) {
+
+   /* ------------------------ Control flow --------------- */
+
+   case 0xC2: /* RET imm16 */
+      d32 = getUDisp16(eip); eip += 2;
+      goto do_Ret;
+   case 0xC3: /* RET */
+      d32 = 0;
+      goto do_Ret;
+   do_Ret:
+      t1 = newTemp(cb); t2 = newTemp(cb);
+      uInstr2(cb, GET,  4, ArchReg, R_ESP, TempReg, t1);
+      uInstr2(cb, LOAD, 4, TempReg, t1,    TempReg, t2);
+      uInstr2(cb, ADD,  4, Literal, 0,     TempReg, t1);
+      uLiteral(cb, 4+d32);
+      uInstr2(cb, PUT,  4, TempReg, t1,    ArchReg, R_ESP);
+      uInstr1(cb, JMP,  0, TempReg, t2);
+      uCond(cb, CondAlways);
+
+      if (d32 == 0)
+         LAST_UINSTR(cb).ret_dispatch = True;
+
+      *isEnd = True;
+      if (dis) {
+         if (d32 == 0) VG_(printf)("ret\n"); 
+                  else VG_(printf)("ret %d\n", d32);
+      }
+      break;
+      
+   case 0xE8: /* CALL J4 */
+      d32 = getUDisp32(eip); eip += 4;
+      d32 += eip; /* eip now holds return-to addr, d32 is call-to addr */
+      if (d32 == (Addr)&VG_(shutdown)) {
+         /* Set vg_dispatch_ctr to 1, vg_interrupt_reason to VG_Y_EXIT,
+            and get back to the dispatch loop.  We ask for a jump to this
+            CALL insn because vg_dispatch will ultimately transfer control
+            to the real CPU, and we want this call to be the first insn
+            it does. */
+         uInstr0(cb, CALLM_S, 0);
+         uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_request_normal_exit));
+         uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
+         uInstr0(cb, CALLM_E, 0);
+         uInstr1(cb, JMP, 0, Literal, 0);
+         uLiteral(cb, eip-5);
+         uCond(cb, CondAlways);
+         *isEnd = True;
+         if (dis) VG_(printf)("call 0x%x\n",d32);
+      } else
+      if (d32 == eip && getUChar(eip) >= 0x58 
+                     && getUChar(eip) <= 0x5F) {
+         /* Specially treat the position-independent-code idiom 
+                 call X
+              X: popl %reg
+            as 
+                 movl %eip, %reg.
+            since this generates better code, but for no other reason. */
+         Int archReg = getUChar(eip) - 0x58;
+         /* VG_(printf)("-- fPIC thingy\n"); */
+         t1 = newTemp(cb);
+         uInstr2(cb, MOV, 4, Literal, 0, TempReg, t1);
+         uLiteral(cb, eip);
+         uInstr2(cb, PUT, 4, TempReg, t1,  ArchReg, archReg);
+         eip++; /* Step over the POP */
+         if (dis) 
+            VG_(printf)("call 0x%x ; popl %s\n",d32,nameIReg(4,archReg));
+      } else {
+         /* The normal sequence for a call. */
+         t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb);
+         uInstr2(cb, GET,   4, ArchReg, R_ESP, TempReg, t3);
+         uInstr2(cb, MOV,   4, TempReg, t3,    TempReg, t1);
+         uInstr2(cb, SUB,   4, Literal, 0,     TempReg, t1);
+	 uLiteral(cb, 4);
+         uInstr2(cb, PUT,   4, TempReg, t1,    ArchReg, R_ESP);
+         uInstr2(cb, MOV,   4, Literal, 0,     TempReg, t2);
+	 uLiteral(cb, eip);
+         uInstr2(cb, STORE, 4, TempReg, t2,    TempReg, t1);
+         SMC_IF_ALL(cb);
+         uInstr1(cb, JMP,   0, Literal, 0);
+	 uLiteral(cb, d32);
+         uCond(cb, CondAlways);
+         LAST_UINSTR(cb).call_dispatch = True;
+         *isEnd = True;
+         if (dis) VG_(printf)("call 0x%x\n",d32);
+      }
+      break;
+
+   case 0xC9: /* LEAVE */
+      t1 = newTemp(cb); t2 = newTemp(cb);
+      uInstr2(cb, GET,  4, ArchReg, R_EBP, TempReg, t1);
+      uInstr2(cb, PUT,  4, TempReg, t1, ArchReg, R_ESP);
+      uInstr2(cb, LOAD, 4, TempReg, t1, TempReg, t2);
+      uInstr2(cb, PUT,  4, TempReg, t2, ArchReg, R_EBP);
+      uInstr2(cb, ADD,  4, Literal, 0, TempReg, t1);
+      uLiteral(cb, 4);
+      uInstr2(cb, PUT,  4, TempReg, t1, ArchReg, R_ESP);
+      if (dis) VG_(printf)("leave");
+      break;
+
+   /* ------------------------ CWD/CDQ -------------------- */
+
+   case 0x98: /* CBW */
+      t1 = newTemp(cb);
+      if (sz == 4) {
+         uInstr2(cb, GET,   2, ArchReg, R_EAX, TempReg, t1);
+         uInstr1(cb, WIDEN, 4, TempReg, t1); /* 4 == dst size */
+         LAST_UINSTR(cb).extra4b = 2; /* the source size */
+         LAST_UINSTR(cb).signed_widen = True;
+         uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX);
+         if (dis) VG_(printf)("cwd\n");
+      } else {
+         vg_assert(sz == 2);
+         uInstr2(cb, GET,   1, ArchReg, R_EAX, TempReg, t1);
+         uInstr1(cb, WIDEN, 2, TempReg, t1); /* 2 == dst size */
+         LAST_UINSTR(cb).extra4b = 1; /* the source size */
+         LAST_UINSTR(cb).signed_widen = True;
+         uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX);
+         if (dis) VG_(printf)("cbw\n");
+      }
+      break;
+
+   case 0x99: /* CWD/CDQ */
+      t1 = newTemp(cb);
+      uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, t1);
+      uInstr2(cb, SAR, sz, Literal, 0,     TempReg, t1);
+      uLiteral(cb, sz == 2 ? 15  : 31);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, R_EDX);
+      if (dis) VG_(printf)(sz == 2 ? "cwdq\n" : "cdqq\n");
+      break;
+
+   /* ------------------------ FPU ops -------------------- */
+
+   case 0x9E: /* SAHF */
+      codegen_SAHF ( cb );
+      if (dis) VG_(printf)("sahf\n");
+      break;
+
+   case 0x9B: /* FWAIT */
+      /* ignore? */
+      if (dis) VG_(printf)("fwait\n");
+      break;
+
+   case 0xD8:
+   case 0xD9:
+   case 0xDA:
+   case 0xDB:
+   case 0xDC:
+   case 0xDD:
+   case 0xDE:
+   case 0xDF:
+      eip = dis_fpu ( cb, opc, eip );
+      break;
+
+   /* ------------------------ INC & DEC ------------------ */
+
+   case 0x40: /* INC eAX */
+   case 0x41: /* INC eCX */
+   case 0x42: /* INC eDX */
+   case 0x43: /* INC eBX */
+   case 0x45: /* INC eBP */
+   case 0x46: /* INC eSI */
+   case 0x47: /* INC eDI */
+      t1 = newTemp(cb);
+      uInstr2(cb, GET, sz, ArchReg, (UInt)(opc - 0x40),
+                             TempReg, t1);
+      uInstr1(cb, INC, sz, TempReg, t1);
+      setFlagsFromUOpcode(cb, INC);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg,
+                             (UInt)(opc - 0x40));
+      if (dis)
+         VG_(printf)("inc%c %s\n", nameISize(sz), nameIReg(sz,opc-0x40));
+      break;
+
+   case 0x48: /* DEC eAX */
+   case 0x49: /* DEC eCX */
+   case 0x4A: /* DEC eDX */
+   case 0x4B: /* DEC eBX */
+   case 0x4D: /* DEC eBP */
+   case 0x4E: /* DEC eSI */
+   case 0x4F: /* DEC eDI */
+      t1 = newTemp(cb);
+      uInstr2(cb, GET, sz, ArchReg, (UInt)(opc - 0x48),
+                             TempReg, t1);
+      uInstr1(cb, DEC, sz, TempReg, t1);
+      setFlagsFromUOpcode(cb, DEC);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg,
+                             (UInt)(opc - 0x48));
+      if (dis)
+         VG_(printf)("dec%c %s\n", nameISize(sz), nameIReg(sz,opc-0x48));
+      break;
+
+   /* ------------------------ INT ------------------------ */
+
+   case 0xCD: /* INT imm8 */
+      d32 = getUChar(eip); eip++;
+      if (d32 != 0x80) VG_(panic)("disInstr: INT but not 0x80 !");
+      /* It's important that all ArchRegs carry their up-to-date value
+         at this point.  So we declare an end-of-block here, which
+         forces any TempRegs caching ArchRegs to be flushed. */
+      t1 = newTemp(cb);
+      uInstr0(cb, CALLM_S, 0);
+      uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_do_syscall) );
+      uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
+      uInstr0(cb, CALLM_E, 0);
+      uInstr1(cb, JMP,  0, Literal, 0);
+      uLiteral(cb, eip);
+      uCond(cb, CondAlways);
+      *isEnd = True;
+      if (dis) VG_(printf)("int $0x80\n");
+      break;
+
+   /* ------------------------ Jcond, byte offset --------- */
+
+   case 0xEB: /* Jb (jump, byte offset) */
+      d32 = (eip+1) + getSDisp8(eip); eip++;
+      uInstr1(cb, JMP, 0, Literal, 0);
+      uLiteral(cb, d32);
+      uCond(cb, CondAlways);
+      *isEnd = True;
+      if (dis)
+         VG_(printf)("jmp-8 0x%x\n", d32);
+      break;
+
+   case 0xE9: /* Jv (jump, 16/32 offset) */
+      d32 = (eip+sz) + getSDisp(sz,eip); eip += sz;
+      uInstr1(cb, JMP, 0, Literal, 0);
+      uLiteral(cb, d32);
+      uCond(cb, CondAlways);
+      *isEnd = True;
+      if (dis)
+        VG_(printf)("jmp 0x%x\n", d32);
+      break;
+
+   case 0x70:
+   case 0x71:
+   case 0x72: /* JBb/JNAEb (jump below) */
+   case 0x73: /* JNBb/JAEb (jump not below) */
+   case 0x74: /* JZb/JEb (jump zero) */
+   case 0x75: /* JNZb/JNEb (jump not zero) */
+   case 0x76: /* JBEb/JNAb (jump below or equal) */
+   case 0x77: /* JNBEb/JAb (jump not below or equal) */
+   case 0x78: /* JSb (jump negative) */
+   case 0x79: /* JSb (jump not negative) */
+   case 0x7A: /* JP (jump parity even) */
+   case 0x7B: /* JNP/JPO (jump parity odd) */
+   case 0x7C: /* JLb/JNGEb (jump less) */
+   case 0x7D: /* JGEb/JNLb (jump greater or equal) */
+   case 0x7E: /* JLEb/JNGb (jump less or equal) */
+   case 0x7F: /* JGb/JNLEb (jump greater) */
+      d32 = (eip+1) + getSDisp8(eip); eip++;
+      uInstr1(cb, JMP, 0, Literal, 0);
+      uLiteral(cb, d32);
+      uCond(cb, (Condcode)(opc - 0x70));
+      uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+      /* It's actually acceptable not to end this basic block at a
+         control transfer, reducing the number of jumps through
+         vg_dispatch, at the expense of possibly translating the insns
+         following this jump twice.  This does give faster code, but
+         on the whole I don't think the effort is worth it. */
+      uInstr1(cb, JMP, 0, Literal, 0);
+      uLiteral(cb, eip);
+      uCond(cb, CondAlways);
+      *isEnd = True;
+      /* The above 3 lines would be removed if the bb was not to end
+         here. */
+      if (dis)
+         VG_(printf)("j%s-8 0x%x\n", VG_(nameCondcode)(opc - 0x70), d32);
+      break;
+
+   case 0xE3: /* JECXZ or perhaps JCXZ, depending on OSO ?  Intel
+                 manual says it depends on address size override,
+                 which doesn't sound right to me. */
+      d32 = (eip+1) + getSDisp8(eip); eip++;
+      t1 = newTemp(cb);
+      uInstr2(cb, GET,  4,  ArchReg, R_ECX, TempReg, t1);
+      uInstr2(cb, JIFZ, 4,  TempReg, t1,    Literal, 0);
+      uLiteral(cb, d32);
+      if (dis)
+         VG_(printf)("j%sz 0x%x\n", nameIReg(sz, R_ECX), d32);
+      break;
+
+   case 0xE2: /* LOOP disp8 */
+      /* Again, the docs say this uses ECX/CX as a count depending on
+         the address size override, not the operand one.  Since we
+         don't handle address size overrides, I guess that means
+         ECX. */
+      d32 = (eip+1) + getSDisp8(eip); eip++;
+      t1 = newTemp(cb);
+      uInstr2(cb, GET,  4, ArchReg, R_ECX, TempReg, t1);
+      uInstr1(cb, DEC,  4, TempReg, t1);
+      uInstr2(cb, PUT,  4, TempReg, t1,    ArchReg, R_ECX);
+      uInstr2(cb, JIFZ, 4, TempReg, t1,    Literal, 0);
+      uLiteral(cb, eip);
+      uInstr1(cb, JMP,  0, Literal, 0);
+      uLiteral(cb, d32);
+      uCond(cb, CondAlways);
+      *isEnd = True;
+      if (dis)
+         VG_(printf)("loop 0x%x\n", d32);
+      break;
+
+   /* ------------------------ IMUL ----------------------- */
+
+   case 0x69: /* IMUL Iv, Ev, Gv */
+      eip = dis_imul_I_E_G ( cb, sz, eip, sz );
+      break;
+   case 0x6B: /* IMUL Ib, Ev, Gv */
+      eip = dis_imul_I_E_G ( cb, sz, eip, 1 );
+      break;
+
+   /* ------------------------ MOV ------------------------ */
+
+   case 0x88: /* MOV Gb,Eb */
+      eip = dis_mov_G_E(cb, 1, eip);
+      break;
+
+   case 0x89: /* MOV Gv,Ev */
+      eip = dis_mov_G_E(cb, sz, eip);
+      break;
+
+   case 0x8A: /* MOV Eb,Gb */
+      eip = dis_mov_E_G(cb, 1, eip);
+      break;
+ 
+   case 0x8B: /* MOV Ev,Gv */
+      eip = dis_mov_E_G(cb, sz, eip);
+      break;
+ 
+   case 0x8D: /* LEA M,Gv */
+      modrm = getUChar(eip);
+      if (epartIsReg(modrm)) 
+         VG_(panic)("LEA M,Gv: modRM refers to register");
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+      eip  += HI8(pair);
+      t1   = LOW24(pair);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, gregOfRM(modrm));
+      if (dis)
+         VG_(printf)("lea%c %s, %s\n", nameISize(sz), dis_buf, 
+                                       nameIReg(sz,gregOfRM(modrm)));
+      break;
+
+   case 0xA0: /* MOV Ob,AL */
+      sz = 1;
+      /* Fall through ... */
+   case 0xA1: /* MOV Ov,eAX */
+      d32 = getUDisp32(eip); eip += 4;
+      t1 = newTemp(cb); t2 = newTemp(cb);
+      uInstr2(cb, MOV,   4, Literal, 0,   TempReg, t2);
+      uLiteral(cb, d32);
+      uInstr2(cb, LOAD, sz, TempReg, t2,  TempReg, t1);
+      uInstr2(cb, PUT,  sz, TempReg, t1,  ArchReg, R_EAX);
+      if (dis) VG_(printf)("mov%c 0x%x,%s\n", nameISize(sz), 
+                           d32, nameIReg(sz,R_EAX));
+      break;
+
+   case 0xA2: /* MOV AL,Ob */
+      sz = 1;
+      /* Fall through ... */
+   case 0xA3: /* MOV eAX,Ov */
+      d32 = getUDisp32(eip); eip += 4;
+      t1 = newTemp(cb); t2 = newTemp(cb);
+      uInstr2(cb, GET,   sz, ArchReg, R_EAX, TempReg, t1);
+      uInstr2(cb, MOV,    4, Literal, 0,     TempReg, t2);
+      uLiteral(cb, d32);
+      uInstr2(cb, STORE, sz, TempReg, t1,    TempReg, t2);
+      SMC_IF_SOME(cb);
+      if (dis) VG_(printf)("mov%c %s,0x%x\n", nameISize(sz), 
+                           nameIReg(sz,R_EAX), d32);
+      break;
+
+   case 0xB0: /* MOV imm,AL */
+   case 0xB1: /* MOV imm,CL */
+   case 0xB2: /* MOV imm,DL */
+   case 0xB3: /* MOV imm,BL */
+   case 0xB4: /* MOV imm,AH */
+   case 0xB5: /* MOV imm,CH */
+   case 0xB6: /* MOV imm,DH */
+   case 0xB7: /* MOV imm,BH */
+      d32 = getUChar(eip); eip += 1;
+      t1 = newTemp(cb);
+      uInstr2(cb, MOV, 1, Literal, 0,  TempReg, t1);
+      uLiteral(cb, d32);
+      uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, opc-0xB0);
+      if (dis) VG_(printf)("movb $0x%x,%s\n", d32,
+                           nameIReg(1,opc-0xB0));
+      break;
+
+   case 0xB8: /* MOV imm,eAX */
+   case 0xB9: /* MOV imm,eCX */
+   case 0xBA: /* MOV imm,eDX */
+   case 0xBB: /* MOV imm,eBX */
+   case 0xBD: /* MOV imm,eBP */
+   case 0xBE: /* MOV imm,eSI */
+   case 0xBF: /* MOV imm,eDI */
+      d32 = getUDisp(sz,eip); eip += sz;
+      t1 = newTemp(cb);
+      uInstr2(cb, MOV, sz, Literal, 0,  TempReg, t1);
+      uLiteral(cb, d32);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, opc-0xB8);
+      if (dis) VG_(printf)("mov%c $0x%x,%s\n", nameISize(sz), d32,
+                           nameIReg(sz,opc-0xB8));
+      break;
+
+   case 0xC6: /* MOV Ib,Eb */
+      sz = 1;
+      goto do_Mov_I_E;
+   case 0xC7: /* MOV Iv,Ev */
+      goto do_Mov_I_E;
+
+   do_Mov_I_E:
+      modrm = getUChar(eip);
+      if (epartIsReg(modrm)) {
+         d32 = getUDisp(sz,eip); eip += sz;
+         t1 = newTemp(cb);
+         uInstr2(cb, MOV, sz, Literal, 0,  TempReg, t1);
+	 uLiteral(cb, d32);
+         uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm));
+         if (dis) VG_(printf)("mov%c $0x%x, %s\n", nameISize(sz), d32, 
+                              nameIReg(sz,eregOfRM(modrm)));
+      } else {
+         pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+         eip += HI8(pair);
+         d32 = getUDisp(sz,eip); eip += sz;
+         t1 = newTemp(cb);
+         t2 = LOW24(pair);
+         uInstr2(cb, MOV, sz, Literal, 0, TempReg, t1);
+	 uLiteral(cb, d32);
+         uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
+         SMC_IF_SOME(cb);
+         if (dis) VG_(printf)("mov%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
+      }
+      break;
+
+   /* ------------------------ opl imm, A ----------------- */
+
+   case 0x04: /* ADD Ib, AL */
+      eip = dis_op_imm_A(cb, 1, ADD, True, eip, "add" );
+      break;
+   case 0x05: /* ADD Iv, eAX */
+      eip = dis_op_imm_A(cb, sz, ADD, True, eip, "add" );
+      break;
+
+   case 0x0C: /* OR Ib, AL */
+      eip = dis_op_imm_A(cb, 1, OR, True, eip, "or" );
+      break;
+   case 0x0D: /* OR Iv, eAX */
+      eip = dis_op_imm_A(cb, sz, OR, True, eip, "or" );
+      break;
+
+   case 0x24: /* AND Ib, AL */
+      eip = dis_op_imm_A(cb, 1, AND, True, eip, "and" );
+      break;
+   case 0x25: /* AND Iv, eAX */
+      eip = dis_op_imm_A(cb, sz, AND, True, eip, "and" );
+      break;
+
+   case 0x2C: /* SUB Ib, AL */
+      eip = dis_op_imm_A(cb, 1, SUB, True, eip, "sub" );
+      break;
+   case 0x2D: /* SUB Iv, eAX */
+      eip = dis_op_imm_A(cb, sz, SUB, True, eip, "sub" );
+      break;
+
+   case 0x34: /* XOR Ib, AL */
+      eip = dis_op_imm_A(cb, 1, XOR, True, eip, "xor" );
+      break;
+   case 0x35: /* XOR Iv, eAX */
+      eip = dis_op_imm_A(cb, sz, XOR, True, eip, "xor" );
+      break;
+
+   case 0x3C: /* CMP Ib, AL */
+      eip = dis_op_imm_A(cb, 1, SUB, False, eip, "cmp" );
+      break;
+   case 0x3D: /* CMP Iv, eAX */
+      eip = dis_op_imm_A(cb, sz, SUB, False, eip, "cmp" );
+      break;
+
+   case 0xA8: /* TEST Ib, AL */
+      eip = dis_op_imm_A(cb, 1, AND, False, eip, "test" );
+      break;
+   case 0xA9: /* TEST Iv, eAX */
+      eip = dis_op_imm_A(cb, sz, AND, False, eip, "test" );
+      break;
+
+   /* ------------------------ opl Ev, Gv ----------------- */
+
+   case 0x02: /* ADD Eb,Gb */
+      eip = dis_op2_E_G ( cb, ADD, True, 1, eip, "add" );
+      break;
+   case 0x03: /* ADD Ev,Gv */
+      eip = dis_op2_E_G ( cb, ADD, True, sz, eip, "add" );
+      break;
+
+   case 0x0A: /* OR Eb,Gb */
+      eip = dis_op2_E_G ( cb, OR, True, 1, eip, "or" );
+      break;
+   case 0x0B: /* OR Ev,Gv */
+      eip = dis_op2_E_G ( cb, OR, True, sz, eip, "or" );
+      break;
+
+   case 0x13: /* ADC Ev,Gv */
+      eip = dis_op2_E_G ( cb, ADC, True, sz, eip, "adc" );
+      break;
+
+   case 0x1B: /* SBB Ev,Gv */
+      eip = dis_op2_E_G ( cb, SBB, True, sz, eip, "sbb" );
+      break;
+
+   case 0x22: /* AND Eb,Gb */
+      eip = dis_op2_E_G ( cb, AND, True, 1, eip, "and" );
+      break;
+   case 0x23: /* AND Ev,Gv */
+      eip = dis_op2_E_G ( cb, AND, True, sz, eip, "and" );
+      break;
+
+   case 0x2A: /* SUB Eb,Gb */
+      eip = dis_op2_E_G ( cb, SUB, True, 1, eip, "sub" );
+      break;
+   case 0x2B: /* SUB Ev,Gv */
+      eip = dis_op2_E_G ( cb, SUB, True, sz, eip, "sub" );
+      break;
+
+   case 0x32: /* XOR Eb,Gb */
+      eip = dis_op2_E_G ( cb, XOR, True, 1, eip, "xor" );
+      break;
+   case 0x33: /* XOR Ev,Gv */
+      eip = dis_op2_E_G ( cb, XOR, True, sz, eip, "xor" );
+      break;
+
+   case 0x3A: /* CMP Eb,Gb */
+      eip = dis_op2_E_G ( cb, SUB, False, 1, eip, "cmp" );
+      break;
+   case 0x3B: /* CMP Ev,Gv */
+      eip = dis_op2_E_G ( cb, SUB, False, sz, eip, "cmp" );
+      break;
+
+   case 0x84: /* TEST Eb,Gb */
+      eip = dis_op2_E_G ( cb, AND, False, 1, eip, "test" );
+      break;
+   case 0x85: /* TEST Ev,Gv */
+      eip = dis_op2_E_G ( cb, AND, False, sz, eip, "test" );
+      break;
+
+   /* ------------------------ opl Gv, Ev ----------------- */
+
+   case 0x00: /* ADD Gb,Eb */
+      eip = dis_op2_G_E ( cb, ADD, True, 1, eip, "add" );
+      break;
+   case 0x01: /* ADD Gv,Ev */
+      eip = dis_op2_G_E ( cb, ADD, True, sz, eip, "add" );
+      break;
+
+   case 0x08: /* OR Gb,Eb */
+      eip = dis_op2_G_E ( cb, OR, True, 1, eip, "or" );
+      break;
+   case 0x09: /* OR Gv,Ev */
+      eip = dis_op2_G_E ( cb, OR, True, sz, eip, "or" );
+      break;
+
+   case 0x11: /* ADC Gv,Ev */
+      eip = dis_op2_G_E ( cb, ADC, True, sz, eip, "adc" );
+      break;
+
+   case 0x19: /* SBB Gv,Ev */
+      eip = dis_op2_G_E ( cb, SBB, True, sz, eip, "sbb" );
+      break;
+
+   case 0x20: /* AND Gb,Eb */
+      eip = dis_op2_G_E ( cb, AND, True, 1, eip, "and" );
+      break;
+   case 0x21: /* AND Gv,Ev */
+      eip = dis_op2_G_E ( cb, AND, True, sz, eip, "and" );
+      break;
+
+   case 0x28: /* SUB Gb,Eb */
+      eip = dis_op2_G_E ( cb, SUB, True, 1, eip, "sub" );
+      break;
+   case 0x29: /* SUB Gv,Ev */
+      eip = dis_op2_G_E ( cb, SUB, True, sz, eip, "sub" );
+      break;
+
+   case 0x30: /* XOR Gb,Eb */
+      eip = dis_op2_G_E ( cb, XOR, True, 1, eip, "xor" );
+      break;
+   case 0x31: /* XOR Gv,Ev */
+      eip = dis_op2_G_E ( cb, XOR, True, sz, eip, "xor" );
+      break;
+
+   case 0x38: /* CMP Gb,Eb */
+      eip = dis_op2_G_E ( cb, SUB, False, 1, eip, "cmp" );
+      break;
+   case 0x39: /* CMP Gv,Ev */
+      eip = dis_op2_G_E ( cb, SUB, False, sz, eip, "cmp" );
+      break;
+
+   /* ------------------------ POP ------------------------ */
+
+   case 0x58: /* POP eAX */
+   case 0x59: /* POP eCX */
+   case 0x5A: /* POP eDX */
+   case 0x5B: /* POP eBX */
+   case 0x5C: /* POP eSP */
+   case 0x5D: /* POP eBP */
+   case 0x5E: /* POP eSI */
+   case 0x5F: /* POP eDI */
+      t1 = newTemp(cb); t2 = newTemp(cb);
+      uInstr2(cb, GET,    4, ArchReg, R_ESP,    TempReg, t2);
+      uInstr2(cb, LOAD,  sz, TempReg, t2,       TempReg, t1);
+      uInstr2(cb, ADD,    4, Literal, 0,        TempReg, t2);
+      uLiteral(cb, sz);
+      uInstr2(cb, PUT,    4, TempReg, t2,       ArchReg, R_ESP);
+      uInstr2(cb, PUT,   sz, TempReg, t1,       ArchReg, opc-0x58);
+      if (dis) 
+         VG_(printf)("pop%c %s\n", nameISize(sz), nameIReg(sz,opc-0x58));
+      break;
+
+   case 0x9D: /* POPF */
+      vg_assert(sz == 2 || sz == 4);
+      t1 = newTemp(cb); t2 = newTemp(cb);
+      uInstr2(cb, GET,    4, ArchReg, R_ESP,    TempReg, t2);
+      uInstr2(cb, LOAD,  sz, TempReg, t2,       TempReg, t1);
+      uInstr2(cb, ADD,    4, Literal, 0,        TempReg, t2);
+      uLiteral(cb, sz);
+      uInstr2(cb, PUT,    4, TempReg, t2,       ArchReg, R_ESP);
+      uInstr1(cb, PUTF,  sz, TempReg, t1);
+      /* PUTF writes all the flags we are interested in */
+      uFlagsRWU(cb, FlagsEmpty, FlagsALL, FlagsEmpty);
+      if (dis) 
+         VG_(printf)("popf%c\n", nameISize(sz));
+      break;
+
+   case 0x61: /* POPA */
+    { Int reg;
+      /* Just to keep things sane, we assert for a size 4.  It's
+         probably OK for size 2 as well, but I'd like to find a test
+         case; ie, have the assertion fail, before committing to it.
+         If it fails for you, uncomment the sz == 2 bit, try again,
+         and let me know whether or not it works.  (jseward@acm.org).  */
+      vg_assert(sz == 4 /* || sz == 2 */);
+
+      /* Eight values are popped, one per register, but the value of
+         %esp on the stack is ignored and instead incremented (in one
+         hit at the end) for each of the values. */
+      t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb);
+      uInstr2(cb, GET,    4, ArchReg, R_ESP, TempReg, t2);
+      uInstr2(cb, MOV,    4, TempReg, t2,    TempReg, t3);
+
+      /* Do %edi, %esi, %ebp */
+      for (reg = 7; reg >= 5; reg--) {
+          uInstr2(cb, LOAD,  sz, TempReg, t2, TempReg, t1);
+          uInstr2(cb, ADD,    4, Literal, 0,  TempReg, t2);
+          uLiteral(cb, sz);
+          uInstr2(cb, PUT,   sz, TempReg, t1, ArchReg, reg);
+      }
+      /* Ignore (skip) value of %esp on stack. */
+      uInstr2(cb, ADD,    4, Literal, 0,  TempReg, t2);
+      uLiteral(cb, sz);
+      /* Do %ebx, %edx, %ecx, %eax */
+      for (reg = 3; reg >= 0; reg--) {
+          uInstr2(cb, LOAD,  sz, TempReg, t2, TempReg, t1);
+          uInstr2(cb, ADD,    4, Literal, 0,  TempReg, t2);
+          uLiteral(cb, sz);
+          uInstr2(cb, PUT,   sz, TempReg, t1, ArchReg, reg);
+      }
+      uInstr2(cb, ADD,    4, Literal, 0,  TempReg, t3);
+      uLiteral(cb, sz * 8);             /* One 'sz' per register */
+      uInstr2(cb, PUT,    4, TempReg, t3, ArchReg, R_ESP);
+      if (dis)
+         VG_(printf)("popa%c\n", nameISize(sz));
+      break;
+    }
+
+   /* ------------------------ PUSH ----------------------- */
+
+   case 0x50: /* PUSH eAX */
+   case 0x51: /* PUSH eCX */
+   case 0x52: /* PUSH eDX */
+   case 0x54: /* PUSH eSP */
+   case 0x53: /* PUSH eBX */
+   case 0x55: /* PUSH eBP */
+   case 0x56: /* PUSH eSI */
+   case 0x57: /* PUSH eDI */
+      /* This is the Right Way, in that the value to be pushed is
+         established before %esp is changed, so that pushl %esp
+         correctly pushes the old value. */
+      t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb);
+      uInstr2(cb, GET,   sz, ArchReg, opc-0x50, TempReg, t1);
+      uInstr2(cb, GET,    4, ArchReg, R_ESP,    TempReg, t3);
+      uInstr2(cb, MOV,    4, TempReg, t3,       TempReg, t2);
+      uInstr2(cb, SUB,    4, Literal, 0,        TempReg, t2);
+      uLiteral(cb, sz);
+      uInstr2(cb, PUT,    4, TempReg, t2,       ArchReg, R_ESP);
+      uInstr2(cb, STORE, sz, TempReg, t1,       TempReg, t2);
+      SMC_IF_ALL(cb);
+      if (dis) 
+         VG_(printf)("push%c %s\n", nameISize(sz), nameIReg(sz,opc-0x50));
+      break;
+
+   case 0x68: /* PUSH Iv */
+      d32 = getUDisp(sz,eip); eip += sz;
+      goto do_push_I;
+   case 0x6A: /* PUSH Ib, sign-extended to sz */
+      d32 = getSDisp8(eip); eip += 1;
+      goto do_push_I;
+   do_push_I:
+      t1 = newTemp(cb); t2 = newTemp(cb);
+      uInstr2(cb, GET,    4, ArchReg, R_ESP, TempReg, t1);
+      uInstr2(cb, SUB,    4, Literal, 0,     TempReg, t1);
+      uLiteral(cb, sz);
+      uInstr2(cb, PUT,    4, TempReg, t1,    ArchReg, R_ESP);
+      uInstr2(cb, MOV,   sz, Literal, 0,     TempReg, t2);
+      uLiteral(cb, d32);
+      uInstr2(cb, STORE, sz, TempReg, t2,    TempReg, t1);
+      SMC_IF_ALL(cb);
+      if (dis) 
+         VG_(printf)("push%c $0x%x\n", nameISize(sz), d32);
+      break;
+
+   case 0x9C: /* PUSHF */
+      vg_assert(sz == 2 || sz == 4);
+      t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb);
+      uInstr1(cb, GETF,  sz, TempReg, t1);
+      /* GETF reads all the flags we are interested in */
+      uFlagsRWU(cb, FlagsALL, FlagsEmpty, FlagsEmpty);
+      uInstr2(cb, GET,    4, ArchReg, R_ESP,    TempReg, t3);
+      uInstr2(cb, MOV,    4, TempReg, t3,       TempReg, t2);
+      uInstr2(cb, SUB,    4, Literal, 0,        TempReg, t2);
+      uLiteral(cb, sz);
+      uInstr2(cb, PUT,    4, TempReg, t2,       ArchReg, R_ESP);
+      uInstr2(cb, STORE, sz, TempReg, t1,       TempReg, t2);
+      SMC_IF_ALL(cb);
+      if (dis) 
+         VG_(printf)("pushf%c\n", nameISize(sz));
+      break;
+
+   case 0x60: /* PUSHA */
+    { Int reg;
+      /* Just to keep things sane, we assert for a size 4.  It's
+         probably OK for size 2 as well, but I'd like to find a test
+         case; ie, have the assertion fail, before committing to it.
+         If it fails for you, uncomment the sz == 2 bit, try again,
+         and let me know whether or not it works.  (jseward@acm.org).  */
+      vg_assert(sz == 4 /* || sz == 2 */);
+
+      /* This is the Right Way, in that the value to be pushed is
+         established before %esp is changed, so that pusha
+         correctly pushes the old %esp value.  New value of %esp is
+         pushed at start. */
+      t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb);
+      t4 = newTemp(cb);
+      uInstr2(cb, GET,    4, ArchReg, R_ESP, TempReg, t3);
+      uInstr2(cb, MOV,    4, TempReg, t3,    TempReg, t2);
+      uInstr2(cb, MOV,    4, TempReg, t3,    TempReg, t4);
+      uInstr2(cb, SUB,    4, Literal, 0,     TempReg, t4);
+      uLiteral(cb, sz * 8);             /* One 'sz' per register. */
+      uInstr2(cb, PUT,    4, TempReg, t4,  ArchReg, R_ESP);
+      /* Do %eax, %ecx, %edx, %ebx */
+      for (reg = 0; reg <= 3; reg++) {
+         uInstr2(cb, GET,   sz, ArchReg, reg, TempReg, t1);
+         uInstr2(cb, SUB,    4, Literal,   0, TempReg, t2);
+         uLiteral(cb, sz);
+         uInstr2(cb, STORE, sz, TempReg,  t1, TempReg, t2);
+         SMC_IF_ALL(cb);
+      }
+      /* Push old value of %esp */
+      uInstr2(cb, SUB,    4, Literal,   0, TempReg, t2);
+      uLiteral(cb, sz);
+      uInstr2(cb, STORE, sz, TempReg,  t3, TempReg, t2);
+      SMC_IF_ALL(cb);
+      /* Do %ebp, %esi, %edi */
+      for (reg = 5; reg <= 7; reg++) {
+         uInstr2(cb, GET,   sz, ArchReg, reg, TempReg, t1);
+         uInstr2(cb, SUB,    4, Literal,   0, TempReg, t2);
+         uLiteral(cb, sz);
+         uInstr2(cb, STORE, sz, TempReg,  t1, TempReg, t2);
+         SMC_IF_ALL(cb);
+      }
+      if (dis)
+         VG_(printf)("pusha%c\n", nameISize(sz));
+      break;
+    }
+
+   /* ------------------------ SCAS et al ----------------- */
+
+   case 0xA4: /* MOVSb, no REP prefix */
+      codegen_MOVS ( cb, 1 );
+      if (dis) VG_(printf)("movsb\n");
+      break;
+   case 0xA5: /* MOVSv, no REP prefix */
+      codegen_MOVS ( cb, sz );
+      if (dis) VG_(printf)("movs%c\n", nameISize(sz));
+      break;
+
+   case 0xA6: /* CMPSb, no REP prefix */
+      codegen_CMPS ( cb, 1 );
+      if (dis) VG_(printf)("cmpsb\n");
+      break;
+
+   case 0xAA: /* STOSb, no REP prefix */
+      codegen_STOS ( cb, 1 );
+      if (dis) VG_(printf)("stosb\n");
+      break;
+   case 0xAB: /* STOSv, no REP prefix */
+      codegen_STOS ( cb, sz );
+      if (dis) VG_(printf)("stos%c\n", nameISize(sz));
+      break;
+
+   case 0xAC: /* LODSb, no REP prefix */
+      codegen_LODS ( cb, 1 );
+      if (dis) VG_(printf)("lodsb\n");
+      break;
+
+   case 0xAE: /* SCASb, no REP prefix */
+      codegen_SCAS ( cb, 1 );
+      if (dis) VG_(printf)("scasb\n");
+      break;
+
+   case 0xFC: /* CLD */
+      uInstr0(cb, CALLM_S, 0);
+      uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CLD));
+      uFlagsRWU(cb, FlagsEmpty, FlagD, FlagsEmpty);
+      uInstr0(cb, CALLM_E, 0);
+      if (dis) VG_(printf)("cld\n");
+      break;
+
+   case 0xFD: /* STD */
+      uInstr0(cb, CALLM_S, 0);
+      uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_STD));
+      uFlagsRWU(cb, FlagsEmpty, FlagD, FlagsEmpty);
+      uInstr0(cb, CALLM_E, 0);
+      if (dis) VG_(printf)("std\n");
+      break;
+
+   case 0xF2: { /* REPNE prefix insn */
+      Addr eip_orig = eip - 1;
+      abyte = getUChar(eip); eip++;
+      if (abyte == 0x66) { sz = 2; abyte = getUChar(eip); eip++; }
+
+      if (abyte == 0xAE || 0xAF) { /* REPNE SCAS<sz> */
+         if (abyte == 0xAE) sz = 1;
+         codegen_REPNE_SCAS ( cb, sz, eip_orig, eip );
+         *isEnd = True;         
+         if (dis) VG_(printf)("repne scas%c\n", nameISize(sz));
+      }
+      else {
+         VG_(printf)("REPNE then 0x%x\n", (UInt)abyte);
+         VG_(panic)("Unhandled REPNE case");
+      }
+      break;
+   }
+
+   case 0xF3: { /* REPE prefix insn */
+      Addr eip_orig = eip - 1;
+      abyte = getUChar(eip); eip++;
+      if (abyte == 0x66) { sz = 2; abyte = getUChar(eip); eip++; }
+
+      if (abyte == 0xA4 || abyte == 0xA5) { /* REPE MOV<sz> */
+         if (abyte == 0xA4) sz = 1;
+         codegen_REPE_MOVS ( cb, sz, eip_orig, eip );
+         *isEnd = True;
+         if (dis) VG_(printf)("repe mov%c\n", nameISize(sz));
+      }
+      else 
+      if (abyte == 0xA6 || abyte == 0xA7) { /* REPE CMP<sz> */
+         if (abyte == 0xA6) sz = 1;
+         codegen_REPE_CMPS ( cb, sz, eip_orig, eip );
+         *isEnd = True;
+         if (dis) VG_(printf)("repe cmps%c\n", nameISize(sz));
+      } 
+      else
+      if (abyte == 0xAA || abyte == 0xAB) { /* REPE STOS<sz> */
+         if (abyte == 0xAA) sz = 1;
+         codegen_REPE_STOS ( cb, sz, eip_orig, eip );
+         *isEnd = True;
+         if (dis) VG_(printf)("repe stos%c\n", nameISize(sz));
+      } else {
+         VG_(printf)("REPE then 0x%x\n", (UInt)abyte);
+         VG_(panic)("Unhandled REPE case");
+      }
+      break;
+   }
+
+   /* ------------------------ XCHG ----------------------- */
+
+   case 0x86: /* XCHG Gb,Eb */
+      sz = 1;
+      /* Fall through ... */
+   case 0x87: /* XCHG Gv,Ev */
+      modrm = getUChar(eip);
+      t1 = newTemp(cb); t2 = newTemp(cb);
+      if (epartIsReg(modrm)) {
+         uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+         uInstr2(cb, GET, sz, ArchReg, gregOfRM(modrm), TempReg, t2);
+         uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, gregOfRM(modrm));
+         uInstr2(cb, PUT, sz, TempReg, t2, ArchReg, eregOfRM(modrm));
+         eip++;
+         if (dis)
+            VG_(printf)("xchg%c %s, %s\n", nameISize(sz), 
+                        nameIReg(sz,gregOfRM(modrm)), 
+                        nameIReg(sz,eregOfRM(modrm)));
+      } else {
+         pair = disAMode ( cb, eip, dis?dis_buf:NULL);
+         t3   = LOW24(pair);
+         uInstr2(cb, LOAD, sz, TempReg, t3, TempReg, t1);
+         uInstr2(cb, GET, sz, ArchReg, gregOfRM(modrm), TempReg, t2);
+         uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t3);
+         SMC_IF_SOME(cb);
+         uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, gregOfRM(modrm));
+         eip += HI8(pair);
+         if (dis)
+            VG_(printf)("xchg%c %s, %s\n", nameISize(sz), 
+                        nameIReg(sz,gregOfRM(modrm)), 
+                        dis_buf);
+      }
+      break;
+
+   case 0x90: /* XCHG eAX,eAX */
+      if (dis) VG_(printf)("nop\n");
+      break;
+   case 0x91: /* XCHG eCX,eSI */
+   case 0x96: /* XCHG eAX,eSI */
+   case 0x97: /* XCHG eAX,eDI */
+      codegen_xchg_eAX_Reg ( cb, sz, opc - 0x90 );
+      break;
+
+   /* ------------------------ (Grp1 extensions) ---------- */
+
+   case 0x80: /* Grp1 Ib,Eb */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      sz    = 1;
+      d_sz  = 1;
+      d32   = getSDisp8(eip + am_sz);
+      eip   = dis_Grp1 ( cb, eip, modrm, am_sz, d_sz, sz, d32 );
+      break;
+
+   case 0x81: /* Grp1 Iv,Ev */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      d_sz  = sz;
+      d32   = getUDisp(d_sz, eip + am_sz);
+      eip   = dis_Grp1 ( cb, eip, modrm, am_sz, d_sz, sz, d32 );
+      break;
+
+   case 0x83: /* Grp1 Ib,Ev */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      d_sz  = 1;
+      d32   = getSDisp8(eip + am_sz);
+      eip   = dis_Grp1 ( cb, eip, modrm, am_sz, d_sz, sz, d32 );
+      break;
+
+   /* ------------------------ (Grp2 extensions) ---------- */
+
+   case 0xC0: /* Grp2 Ib,Eb */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      d_sz  = 1;
+      d32   = getSDisp8(eip + am_sz);
+      sz    = 1;
+      eip   = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, Literal, d32 );
+      break;
+
+   case 0xC1: /* Grp2 Ib,Ev */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      d_sz  = 1;
+      d32   = getSDisp8(eip + am_sz);
+      eip   = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, Literal, d32 );
+      break;
+
+   case 0xD0: /* Grp2 1,Eb */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      d_sz  = 0;
+      d32   = 1;
+      sz    = 1;
+      eip   = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, Literal, d32 );
+      break;
+
+   case 0xD1: /* Grp2 1,Ev */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      d_sz  = 0;
+      d32   = 1;
+      eip   = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, Literal, d32 );
+      break;
+
+   case 0xD3: /* Grp2 CL,Ev */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      d_sz  = 0;
+      eip   = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, ArchReg, R_ECX );
+      break;
+
+   /* ------------------------ (Grp3 extensions) ---------- */
+
+   case 0xF6: /* Grp3 Eb */
+      eip = dis_Grp3 ( cb, 1, eip );
+      break;
+   case 0xF7: /* Grp3 Ev */
+      eip = dis_Grp3 ( cb, sz, eip );
+      break;
+
+   /* ------------------------ (Grp4 extensions) ---------- */
+
+   case 0xFE: /* Grp4 Eb */
+      eip = dis_Grp4 ( cb, eip );
+      break;
+
+   /* ------------------------ (Grp5 extensions) ---------- */
+
+   case 0xFF: /* Grp5 Ev */
+      eip = dis_Grp5 ( cb, sz, eip, isEnd );
+      break;
+
+   /* ------------------------ Escapes to 2-byte opcodes -- */
+
+   case 0x0F: {
+      opc = getUChar(eip); eip++;
+      switch (opc) {
+
+      /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */
+
+      case 0xBA: /* Grp8 Ib,Ev */
+         modrm = getUChar(eip);
+         am_sz = lengthAMode(eip);
+         d32   = getSDisp8(eip + am_sz);
+         eip = dis_Grp8 ( cb, eip, modrm, am_sz, sz, d32 );
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */
+
+      case 0xBC: /* BSF Gv,Ev */
+         eip = dis_bs_E_G ( cb, sz, eip, True );
+         break;
+      case 0xBD: /* BSR Gv,Ev */
+         eip = dis_bs_E_G ( cb, sz, eip, False );
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */
+
+      case 0xC8: /* BSWAP %eax */
+      case 0xC9:
+      case 0xCA:
+      case 0xCB:
+      case 0xCC:
+      case 0xCD:
+      case 0xCE:
+      case 0xCF: /* BSWAP %edi */
+         /* AFAICS from the Intel docs, this only exists at size 4. */
+         vg_assert(sz == 4);
+         t1 = newTemp(cb);
+         uInstr2(cb, GET,   4, ArchReg, opc-0xC8, TempReg, t1);
+	 uInstr1(cb, BSWAP, 4, TempReg, t1);
+         uInstr2(cb, PUT,   4, TempReg, t1, ArchReg, opc-0xC8);
+         if (dis) VG_(printf)("bswapl %s\n", nameIReg(4, opc-0xC8));
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
+
+      case 0xA3: /* BT Gv,Ev */
+         eip = dis_bt_G_E ( cb, sz, eip, BtOpNone );
+         break;
+      case 0xB3: /* BTR Gv,Ev */
+         eip = dis_bt_G_E ( cb, sz, eip, BtOpReset );
+         break;
+      case 0xAB: /* BTS Gv,Ev */
+         eip = dis_bt_G_E ( cb, sz, eip, BtOpSet );
+         break;
+      case 0xBB: /* BTC Gv,Ev */
+         eip = dis_bt_G_E ( cb, sz, eip, BtOpComp );
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
+
+      case 0x40:
+      case 0x41:
+      case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
+      case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
+      case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
+      case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
+      case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
+      case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
+      case 0x48: /* CMOVSb (cmov negative) */
+      case 0x49: /* CMOVSb (cmov not negative) */
+      case 0x4A: /* CMOVP (cmov parity even) */
+      case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
+      case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
+      case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
+      case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
+         eip = dis_cmov_E_G(cb, sz, (Condcode)(opc - 0x40), eip);
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
+
+      case 0xB1: /* CMPXCHG Gv,Ev */
+         eip = dis_cmpxchg_G_E ( cb, sz, eip );
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
+
+      case 0xA2: /* CPUID */
+         t1 = newTemp(cb);
+         t2 = newTemp(cb);
+         t3 = newTemp(cb);
+         t4 = newTemp(cb);
+         uInstr0(cb, CALLM_S, 0);
+
+         uInstr2(cb, GET,   4, ArchReg, R_EAX, TempReg, t1);
+         uInstr1(cb, PUSH,  4, TempReg, t1);
+
+         uInstr2(cb, MOV,   4, Literal, 0, TempReg, t2);
+         uLiteral(cb, 0);
+         uInstr1(cb, PUSH,  4, TempReg, t2);
+
+         uInstr2(cb, MOV,   4, Literal, 0, TempReg, t3);
+         uLiteral(cb, 0);
+         uInstr1(cb, PUSH,  4, TempReg, t3);
+
+         uInstr2(cb, MOV,   4, Literal, 0, TempReg, t4);
+         uLiteral(cb, 0);
+         uInstr1(cb, PUSH,  4, TempReg, t4);
+
+         uInstr1(cb, CALLM, 0, Lit16,   VGOFF_(helper_CPUID));
+         uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
+
+         uInstr1(cb, POP,   4, TempReg, t4);
+         uInstr2(cb, PUT,   4, TempReg, t4, ArchReg, R_EDX);
+
+         uInstr1(cb, POP,   4, TempReg, t3);
+         uInstr2(cb, PUT,   4, TempReg, t3, ArchReg, R_ECX);
+
+         uInstr1(cb, POP,   4, TempReg, t2);
+         uInstr2(cb, PUT,   4, TempReg, t2, ArchReg, R_EBX);
+
+         uInstr1(cb, POP,   4, TempReg, t1);
+         uInstr2(cb, PUT,   4, TempReg, t1, ArchReg, R_EAX);
+
+         uInstr0(cb, CALLM_E, 0);
+         if (dis) VG_(printf)("cpuid\n");
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */
+
+      case 0xB6: /* MOVZXb Eb,Gv */
+         eip = dis_movx_E_G ( cb, eip, 1, 4, False );
+         break;
+      case 0xB7: /* MOVZXw Ew,Gv */
+         eip = dis_movx_E_G ( cb, eip, 2, 4, False );
+         break;
+
+      case 0xBE: /* MOVSXb Eb,Gv */
+         eip = dis_movx_E_G ( cb, eip, 1, 4, True );
+         break;
+      case 0xBF: /* MOVSXw Ew,Gv */
+         eip = dis_movx_E_G ( cb, eip, 2, 4, True );
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
+
+      case 0xAF: /* IMUL Ev, Gv */
+         eip = dis_mul_E_G ( cb, sz, eip, True );
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */
+      case 0x80:
+      case 0x81:
+      case 0x82: /* JBb/JNAEb (jump below) */
+      case 0x83: /* JNBb/JAEb (jump not below) */
+      case 0x84: /* JZb/JEb (jump zero) */
+      case 0x85: /* JNZb/JNEb (jump not zero) */
+      case 0x86: /* JBEb/JNAb (jump below or equal) */
+      case 0x87: /* JNBEb/JAb (jump not below or equal) */
+      case 0x88: /* JSb (jump negative) */
+      case 0x89: /* JSb (jump not negative) */
+      case 0x8A: /* JP (jump parity even) */
+      case 0x8C: /* JLb/JNGEb (jump less) */
+      case 0x8D: /* JGEb/JNLb (jump greater or equal) */
+      case 0x8E: /* JLEb/JNGb (jump less or equal) */
+      case 0x8F: /* JGb/JNLEb (jump greater) */
+         d32 = (eip+4) + getUDisp32(eip); eip += 4;
+         uInstr1(cb, JMP, 0, Literal, 0);
+	 uLiteral(cb, d32);
+         uCond(cb, (Condcode)(opc - 0x80));
+         uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+         uInstr1(cb, JMP, 0, Literal, 0);
+	 uLiteral(cb, eip);
+         uCond(cb, CondAlways);
+         *isEnd = True;
+         if (dis)
+            VG_(printf)("j%s-32 0x%x\n", 
+                        VG_(nameCondcode)(opc - 0x80), d32);
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */
+
+      case 0x31: /* RDTSC */
+         t1 = newTemp(cb);
+         t2 = newTemp(cb);
+         t3 = newTemp(cb);
+         uInstr0(cb, CALLM_S, 0);
+         uInstr2(cb, MOV,   4, Literal, 0, TempReg, t1);
+         uLiteral(cb, 0);
+         uInstr1(cb, PUSH,  4, TempReg, t1);
+         uInstr2(cb, MOV,   4, Literal, 0, TempReg, t2);
+         uLiteral(cb, 0);
+         uInstr1(cb, PUSH,  4, TempReg, t2);
+         uInstr1(cb, CALLM, 0, Lit16,   VGOFF_(helper_RDTSC));
+         uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
+         uInstr1(cb, POP,   4, TempReg, t3);
+         uInstr2(cb, PUT,   4, TempReg, t3, ArchReg, R_EDX);
+         uInstr1(cb, POP,   4, TempReg, t3);
+         uInstr2(cb, PUT,   4, TempReg, t3, ArchReg, R_EAX);
+         uInstr0(cb, CALLM_E, 0);
+         if (dis) VG_(printf)("rdtsc\n");
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */
+      case 0x90:
+      case 0x91:
+      case 0x92: /* set-Bb/set-NAEb (jump below) */
+      case 0x93: /* set-NBb/set-AEb (jump not below) */
+      case 0x94: /* set-Zb/set-Eb (jump zero) */
+      case 0x95: /* set-NZb/set-NEb (jump not zero) */
+      case 0x96: /* set-BEb/set-NAb (jump below or equal) */
+      case 0x97: /* set-NBEb/set-Ab (jump not below or equal) */
+      case 0x98: /* set-Sb (jump negative) */
+      case 0x99: /* set-Sb (jump not negative) */
+      case 0x9A: /* set-P (jump parity even) */
+      case 0x9B: /* set-NP (jump parity odd) */
+      case 0x9C: /* set-Lb/set-NGEb (jump less) */
+      case 0x9D: /* set-GEb/set-NLb (jump greater or equal) */
+      case 0x9E: /* set-LEb/set-NGb (jump less or equal) */
+      case 0x9F: /* set-Gb/set-NLEb (jump greater) */
+         modrm = getUChar(eip);
+         t1 = newTemp(cb);
+         if (epartIsReg(modrm)) {
+            eip++;
+            uInstr1(cb, CC2VAL, 1, TempReg, t1);
+            uCond(cb, (Condcode)(opc-0x90));
+            uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+            uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, eregOfRM(modrm));
+            if (dis) VG_(printf)("set%s %s\n", 
+                                 VG_(nameCondcode)(opc-0x90), 
+                                 nameIReg(1,eregOfRM(modrm)));
+         } else {
+            pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+            t2 = LOW24(pair);
+            eip += HI8(pair);
+            uInstr1(cb, CC2VAL, 1, TempReg, t1);
+            uCond(cb, (Condcode)(opc-0x90));
+            uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+            uInstr2(cb, STORE, 1, TempReg, t1, TempReg, t2);
+            SMC_IF_ALL(cb);
+            if (dis) VG_(printf)("set%s %s\n", 
+                                 VG_(nameCondcode)(opc-0x90), 
+                                 dis_buf);
+         }
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */
+
+      case 0xA4: /* SHLDv imm8,Gv,Ev */
+         modrm = getUChar(eip);
+         eip = dis_SHLRD_Gv_Ev ( 
+                  cb, eip, modrm, sz, 
+                  Literal, getUChar(eip + lengthAMode(eip)),
+                  True );
+         break;
+      case 0xA5: /* SHLDv %cl,Gv,Ev */
+         modrm = getUChar(eip);
+         eip = dis_SHLRD_Gv_Ev ( 
+                  cb, eip, modrm, sz, ArchReg, R_CL, True );
+         break;
+
+      case 0xAC: /* SHRDv imm8,Gv,Ev */
+         modrm = getUChar(eip);
+         eip = dis_SHLRD_Gv_Ev ( 
+                  cb, eip, modrm, sz, 
+                  Literal, getUChar(eip + lengthAMode(eip)),
+                  False );
+         break;
+      case 0xAD: /* SHRDv %cl,Gv,Ev */
+         modrm = getUChar(eip);
+         eip = dis_SHLRD_Gv_Ev ( 
+                  cb, eip, modrm, sz, ArchReg, R_CL, False );
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
+
+      case 0xC1: /* XADD Gv,Ev */
+         eip = dis_xadd_G_E ( cb, sz, eip );
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */
+
+      default:
+         VG_(printf)("disInstr: unhandled 2-byte opcode 0x%x\n", 
+                     (UInt)opc);
+         VG_(unimplemented)("unhandled x86 0x0F 2-byte opcode");
+      }
+
+      break;
+   }
+
+   /* ------------------------ ??? ------------------------ */
+
+   default:
+      VG_(printf)("disInstr: unhandled opcode 0x%x then 0x%x\n", 
+                  (UInt)opc, (UInt)getUChar(eip));
+      VG_(panic)("unhandled x86 opcode");
+   }
+
+   if (dis)
+      VG_(printf)("\n");
+   for (; first_uinstr < cb->used; first_uinstr++) {
+      Bool sane = VG_(saneUInstr)(True, &cb->instrs[first_uinstr]);
+      if (dis || !sane) 
+         VG_(ppUInstr)(sane ? first_uinstr : -1,
+                       &cb->instrs[first_uinstr]);
+      vg_assert(sane);
+   }
+
+   return eip;
+}
+
+
+/* Disassemble a complete basic block, starting at eip, and dumping
+   the ucode into cb.  Returns the size, in bytes, of the basic
+   block. */
+
+Int VG_(disBB) ( UCodeBlock* cb, Addr eip0 )
+{
+   Addr eip   = eip0;
+   Bool isEnd = False;
+   Bool block_sane;
+   if (dis) VG_(printf)("\n");
+
+   if (VG_(clo_single_step)) {
+      eip = disInstr ( cb, eip, &isEnd );
+      uInstr1(cb, JMP, 0, Literal, 0);
+      uLiteral(cb, eip);
+      uCond(cb, CondAlways);
+      if (dis) VG_(ppUInstr)(cb->used-1, &cb->instrs[cb->used-1]);
+   } else {
+      Int delta = 0;
+      Addr eip2;
+      while (True) {
+         if (isEnd) break;
+         eip2 = disInstr ( cb, eip, &isEnd );
+         delta += (eip2 - eip);
+         eip = eip2;
+         if (delta > 4 && !isEnd) {
+            uInstr1(cb, INCEIP, 0, Lit16, delta);
+            if (dis) VG_(ppUInstr)(cb->used-1, &cb->instrs[cb->used-1]);
+            delta = 0;
+         }
+         /* Split up giant basic blocks into pieces, so the
+            translations fall within 64k. */
+         if (eip - eip0 > 2000) {
+            if (VG_(clo_verbosity) > 0)
+               VG_(message)(Vg_DebugMsg, 
+                  "Warning: splitting giant basic block into pieces");
+            uInstr1(cb, JMP, 0, Literal, 0);
+            uLiteral(cb, eip);
+            uCond(cb, CondAlways);
+            if (dis) VG_(ppUInstr)(cb->used-1, &cb->instrs[cb->used-1]);
+            if (dis) VG_(printf)("\n");
+            break;
+         }
+         if (dis) VG_(printf)("\n");
+      }
+   }
+
+   block_sane = VG_(saneUCodeBlock)(cb);
+   if (!block_sane) {
+      VG_(ppUCodeBlock)(cb, "block failing sanity check");
+      vg_assert(block_sane);
+   }
+
+   return eip - eip0;
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_to_ucode.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_translate.c b/coregrind/vg_translate.c
new file mode 100644
index 000000000..430aebab8
--- /dev/null
+++ b/coregrind/vg_translate.c
@@ -0,0 +1,3096 @@
+
+/*--------------------------------------------------------------------*/
+/*--- The JITter proper: register allocation & code improvement    ---*/
+/*---                                               vg_translate.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Renamings of frequently-used global functions.       ---*/
+/*------------------------------------------------------------*/
+
+#define uInstr1   VG_(newUInstr1)
+#define uInstr2   VG_(newUInstr2)
+#define uInstr3   VG_(newUInstr3)
+#define dis       VG_(disassemble)
+#define nameIReg  VG_(nameOfIntReg)
+#define nameISize VG_(nameOfIntSize)
+#define uLiteral  VG_(setLiteralField)
+#define newTemp   VG_(getNewTemp)
+#define newShadow VG_(getNewShadow)
+
+
+/*------------------------------------------------------------*/
+/*--- Memory management for the translater.                ---*/
+/*------------------------------------------------------------*/
+
+#define N_JITBLOCKS    4
+#define N_JITBLOCK_SZ  5000
+
+static UChar jitstorage[N_JITBLOCKS][N_JITBLOCK_SZ];
+static Bool  jitstorage_inuse[N_JITBLOCKS];
+static Bool  jitstorage_initdone = False;
+
+static __inline__ void jitstorage_initialise ( void )
+{
+   Int i;
+   if (jitstorage_initdone) return;
+   jitstorage_initdone = True;
+   for (i = 0; i < N_JITBLOCKS; i++)
+      jitstorage_inuse[i] = False; 
+}
+
+void* VG_(jitmalloc) ( Int nbytes )
+{
+   Int i;
+   jitstorage_initialise();
+   if (nbytes > N_JITBLOCK_SZ) {
+      /* VG_(printf)("too large: %d\n", nbytes); */
+      return VG_(malloc)(VG_AR_PRIVATE, nbytes);
+   }
+   for (i = 0; i < N_JITBLOCKS; i++) {
+      if (!jitstorage_inuse[i]) {
+         jitstorage_inuse[i] = True;
+         /* VG_(printf)("alloc %d -> %d\n", nbytes, i ); */
+         return & jitstorage[i][0];
+      }
+   }
+   VG_(panic)("out of slots in vg_jitmalloc\n");
+   return VG_(malloc)(VG_AR_PRIVATE, nbytes);
+}
+
+void VG_(jitfree) ( void* ptr )
+{
+   Int i;
+   jitstorage_initialise();
+   for (i = 0; i < N_JITBLOCKS; i++) {
+      if (ptr == & jitstorage[i][0]) {
+         vg_assert(jitstorage_inuse[i]);
+         jitstorage_inuse[i] = False;
+         return;
+      }
+   }
+   VG_(free)(VG_AR_PRIVATE, ptr);
+}
+
+/*------------------------------------------------------------*/
+/*--- Basics                                               ---*/
+/*------------------------------------------------------------*/
+
+static UCodeBlock* allocCodeBlock ( void )
+{
+   UCodeBlock* cb = VG_(malloc)(VG_AR_PRIVATE, sizeof(UCodeBlock));
+   cb->used = cb->size = cb->nextTemp = 0;
+   cb->instrs = NULL;
+   return cb;
+}
+
+
+static void freeCodeBlock ( UCodeBlock* cb )
+{
+   if (cb->instrs) VG_(free)(VG_AR_PRIVATE, cb->instrs);
+   VG_(free)(VG_AR_PRIVATE, cb);
+}
+
+
+/* Ensure there's enough space in a block to add one uinstr. */
+static __inline__
+void ensureUInstr ( UCodeBlock* cb )
+{
+   if (cb->used == cb->size) {
+      if (cb->instrs == NULL) {
+         vg_assert(cb->size == 0);
+         vg_assert(cb->used == 0);
+         cb->size = 8;
+         cb->instrs = VG_(malloc)(VG_AR_PRIVATE, 8 * sizeof(UInstr));
+      } else {
+         Int i;
+         UInstr* instrs2 = VG_(malloc)(VG_AR_PRIVATE, 
+                                       2 * sizeof(UInstr) * cb->size);
+         for (i = 0; i < cb->used; i++)
+            instrs2[i] = cb->instrs[i];
+         cb->size *= 2;
+         VG_(free)(VG_AR_PRIVATE, cb->instrs);
+         cb->instrs = instrs2;
+      }
+   }
+
+   vg_assert(cb->used < cb->size);
+}
+
+
+__inline__ 
+void VG_(emptyUInstr) ( UInstr* u )
+{
+   u->val1 = u->val2 = u->val3 = 0;
+   u->tag1 = u->tag2 = u->tag3 = NoValue;
+   u->flags_r = u->flags_w = FlagsEmpty;
+   u->call_dispatch = False;
+   u->smc_check = u->signed_widen = u->ret_dispatch = False;
+   u->lit32    = 0;
+   u->opcode   = 0;
+   u->size     = 0;
+   u->cond     = 0;
+   u->extra4b  = 0;
+}
+
+
+/* Add an instruction to a ucode block, and return the index of the
+   instruction. */
+__inline__
+void VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
+                       Tag tag1, UInt val1,
+                       Tag tag2, UInt val2,
+                       Tag tag3, UInt val3 )
+{
+   UInstr* ui;
+   ensureUInstr(cb);
+   ui = & cb->instrs[cb->used];
+   cb->used++;
+   VG_(emptyUInstr)(ui);
+   ui->val1   = val1;
+   ui->val2   = val2;
+   ui->val3   = val3;
+   ui->opcode = opcode;
+   ui->tag1   = tag1;
+   ui->tag2   = tag2;
+   ui->tag3   = tag3;
+   ui->size   = sz;
+   if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
+   if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
+   if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG);
+}
+
+
+__inline__
+void VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
+                       Tag tag1, UInt val1,
+                       Tag tag2, UInt val2 )
+{
+   UInstr* ui;
+   ensureUInstr(cb);
+   ui = & cb->instrs[cb->used];
+   cb->used++;
+   VG_(emptyUInstr)(ui);
+   ui->val1   = val1;
+   ui->val2   = val2;
+   ui->opcode = opcode;
+   ui->tag1   = tag1;
+   ui->tag2   = tag2;
+   ui->size   = sz;
+   if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
+   if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
+}
+
+
+__inline__
+void VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
+                       Tag tag1, UInt val1 )
+{
+   UInstr* ui;
+   ensureUInstr(cb);
+   ui = & cb->instrs[cb->used];
+   cb->used++;
+   VG_(emptyUInstr)(ui);
+   ui->val1   = val1;
+   ui->opcode = opcode;
+   ui->tag1   = tag1;
+   ui->size   = sz;
+   if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
+}
+
+
+__inline__
+void VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz )
+{
+   UInstr* ui;
+   ensureUInstr(cb);
+   ui = & cb->instrs[cb->used];
+   cb->used++;
+   VG_(emptyUInstr)(ui);
+   ui->opcode = opcode;
+   ui->size   = sz;
+}
+
+
+/* Copy an instruction into the given codeblock. */
+static __inline__ 
+void copyUInstr ( UCodeBlock* cb, UInstr* instr )
+{
+   ensureUInstr(cb);
+   cb->instrs[cb->used] = *instr;
+   cb->used++;
+}
+
+
+/* Copy auxiliary info from one uinstr to another. */
+static __inline__ 
+void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
+{
+   dst->cond          = src->cond;
+   dst->extra4b       = src->extra4b;
+   dst->smc_check     = src->smc_check;
+   dst->signed_widen  = src->signed_widen;
+   dst->ret_dispatch  = src->ret_dispatch;
+   dst->call_dispatch = src->call_dispatch;
+   dst->flags_r       = src->flags_r;
+   dst->flags_w       = src->flags_w;
+}
+
+
+/* Set the flag R/W sets on a uinstr. */
+void VG_(setFlagRW) ( UInstr* u, FlagSet fr, FlagSet fw )
+{
+   /* VG_(ppUInstr)(-1,u); */
+   vg_assert(fr == (fr & FlagsALL));
+   vg_assert(fw == (fw & FlagsALL));
+   u->flags_r = fr;
+   u->flags_w = fw;
+}
+
+
+/* Set the lit32 field of the most recent uinsn. */
+void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 )
+{
+   LAST_UINSTR(cb).lit32 = lit32;
+}
+
+
+Bool VG_(anyFlagUse) ( UInstr* u )
+{
+   return (u->flags_r != FlagsEmpty 
+           || u->flags_w != FlagsEmpty);
+}
+
+
+
+
+/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
+   register number.  This effectively defines the order in which real
+   registers are allocated.  %ebp is excluded since it is permanently
+   reserved for pointing at VG_(baseBlock).  %edi is a general spare
+   temp used for Left4 and various misc tag ops.
+
+   Important!  If you change the set of allocatable registers from
+   %eax, %ebx, %ecx, %edx, %esi you must change the
+   save/restore sequences in vg_helper_smc_check4 to match!  
+*/
+__inline__ Int VG_(rankToRealRegNo) ( Int rank )
+{
+   switch (rank) {
+#     if 1
+      /* Probably the best allocation ordering. */
+      case 0: return R_EAX;
+      case 1: return R_EBX;
+      case 2: return R_ECX;
+      case 3: return R_EDX;
+      case 4: return R_ESI;
+#     else
+      /* Contrary; probably the worst.  Helpful for debugging, tho. */
+      case 4: return R_EAX;
+      case 3: return R_EBX;
+      case 2: return R_ECX;
+      case 1: return R_EDX;
+      case 0: return R_ESI;
+#     endif
+      default: VG_(panic)("rankToRealRegNo");
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Sanity checking uinstrs.                             ---*/
+/*------------------------------------------------------------*/
+
+/* This seems as good a place as any to record some important stuff
+   about ucode semantics.
+
+   * TempRegs are 32 bits wide.  LOADs of 8/16 bit values into a
+     TempReg are defined to zero-extend the loaded value to 32 bits.
+     This is needed to make the translation of movzbl et al work
+     properly.
+
+   * Similarly, GETs of a 8/16 bit ArchRegs are zero-extended.
+
+   * Arithmetic on TempRegs is at the specified size.  For example,
+     SUBW t1, t2 has to result in a real 16 bit x86 subtraction 
+     being emitted -- not a 32 bit one.
+
+   * On some insns we allow the cc bit to be set.  If so, the
+     intention is that the simulated machine's %eflags register
+     is copied into that of the real machine before the insn,
+     and copied back again afterwards.  This means that the 
+     code generated for that insn must be very careful only to
+     update %eflags in the intended way.  This is particularly
+     important for the routines referenced by CALL insns.
+*/
+
+/* Meaning of operand kinds is as follows:
+
+   ArchReg  is a register of the simulated CPU, stored in memory,
+            in vg_m_state.m_eax .. m_edi.  These values are stored
+            using the Intel register encoding.
+
+   RealReg  is a register of the real CPU.  There are VG_MAX_REALREGS
+            available for allocation.  As with ArchRegs, these values
+            are stored using the Intel register encoding.
+
+   TempReg  is a temporary register used to express the results of
+            disassembly.  There is an unlimited supply of them -- 
+            register allocation and spilling eventually assigns them 
+            to RealRegs.
+
+   SpillNo  is a spill slot number.  The number of required spill
+            slots is VG_MAX_PSEUDOS, in general.  Only allowed 
+            as the ArchReg operand of GET and PUT.
+
+   Lit16    is a signed 16-bit literal value.
+
+   Literal  is a 32-bit literal value.  Each uinstr can only hold
+            one of these.
+
+   The disassembled code is expressed purely in terms of ArchReg,
+   TempReg and Literal operands.  Eventually, register allocation
+   removes all the TempRegs, giving a result using ArchRegs, RealRegs,
+   and Literals.  New x86 code can easily be synthesised from this.
+   There are carefully designed restrictions on which insns can have
+   which operands, intended to make it possible to generate x86 code
+   from the result of register allocation on the ucode efficiently and
+   without need of any further RealRegs.
+
+   Restrictions on insns (as generated by the disassembler) are as
+   follows:
+
+      A=ArchReg   S=SpillNo   T=TempReg   L=Literal   R=RealReg
+      N=NoValue
+
+         GETF       T       N       N
+         PUTF       T       N       N
+
+         GET        A,S     T       N
+         PUT        T       A,S     N
+         LOAD       T       T       N
+         STORE      T       T       N
+         MOV        T,L     T       N
+         CMOV       T       T       N
+         WIDEN      T       N       N
+         JMP        T,L     N       N
+         CALLM      L       N       N
+         CALLM_S    N       N       N
+         CALLM_E    N       N       N
+         PUSH,POP   T       N       N
+         CLEAR      L       N       N
+
+         AND, OR
+                    T       T       N
+
+         ADD, ADC, XOR, SUB, SBB
+                    A,L,T   T       N
+
+         SHL, SHR, SAR, ROL, ROR, RCL, RCR
+                    L,T     T       N
+
+         NOT, NEG, INC, DEC, CC2VAL, BSWAP
+                    T       N       N
+
+         JIFZ       T       L       N
+
+         FPU_R      L       T       N
+         FPU_W      L       T       N
+         FPU        L       T       N
+
+         LEA1       T       T   (const in a seperate field)
+         LEA2       T       T       T   (const & shift ditto)
+
+         INCEIP     L       N       N
+ 
+   and for instrumentation insns:
+
+         LOADV      T       T       N
+         STOREV     T,L     T       N
+         GETV       A       T       N
+         PUTV       T,L     A       N
+         GETVF      T       N       N
+         PUTVF      T       N       N
+         WIDENV     T       N       N
+         TESTV      A,T     N       N
+         SETV       A,T     N       N
+         TAG1       T       N       N
+         TAG2       T       T       N
+
+   Before register allocation, S operands should not appear anywhere.
+   After register allocation, all T operands should have been
+   converted into Rs, and S operands are allowed in GET and PUT --
+   denoting spill saves/restores.  
+
+   The size field should be 0 for insns for which it is meaningless,
+   ie those which do not directly move/operate on data.
+*/
+Bool VG_(saneUInstr) ( Bool beforeRA, UInstr* u )
+{
+#  define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
+#  define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
+#  define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
+#  define A1  (u->tag1 == ArchReg)
+#  define A2  (u->tag2 == ArchReg)
+#  define AS1 ((u->tag1 == ArchReg) || ((!beforeRA && (u->tag1 == SpillNo))))
+#  define AS2 ((u->tag2 == ArchReg) || ((!beforeRA && (u->tag2 == SpillNo))))
+#  define AS3 ((u->tag3 == ArchReg) || ((!beforeRA && (u->tag3 == SpillNo))))
+#  define L1  (u->tag1 == Literal && u->val1 == 0)
+#  define L2  (u->tag2 == Literal && u->val2 == 0)
+#  define Ls1 (u->tag1 == Lit16)
+#  define Ls3 (u->tag3 == Lit16)
+#  define N1  (u->tag1 == NoValue)
+#  define N2  (u->tag2 == NoValue)
+#  define N3  (u->tag3 == NoValue)
+#  define SZ4 (u->size == 4)
+#  define SZ2 (u->size == 2)
+#  define SZ1 (u->size == 1)
+#  define SZ0 (u->size == 0)
+#  define CC0 (u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty)
+#  define FLG_RD (u->flags_r == FlagsALL && u->flags_w == FlagsEmpty)
+#  define FLG_WR (u->flags_r == FlagsEmpty && u->flags_w == FlagsALL)
+#  define CC1 (!(CC0))
+#  define SZ4_IF_TR1 ((u->tag1 == TempReg || u->tag1 == RealReg) \
+                      ? (u->size == 4) : True)
+
+   Int n_lits = 0;
+   if (u->tag1 == Literal) n_lits++;
+   if (u->tag2 == Literal) n_lits++;
+   if (u->tag3 == Literal) n_lits++;
+   if (n_lits > 1) 
+      return False;
+
+   switch (u->opcode) {
+      case GETF:
+	return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_RD;
+      case PUTF:
+	return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_WR;
+      case CALLM_S: case CALLM_E:
+         return SZ0 && N1 && N2 && N3;
+      case INCEIP:
+         return SZ0 && CC0 && Ls1 && N2 && N3;
+      case LEA1:
+         return CC0 && TR1 && TR2 && N3 && SZ4;
+      case LEA2:
+         return CC0 && TR1 && TR2 && TR3 && SZ4;
+      case NOP: 
+         return SZ0 && CC0 && N1 && N2 && N3;
+      case GET: 
+         return CC0 && AS1 && TR2 && N3;
+      case PUT: 
+         return CC0 && TR1 && AS2 && N3;
+      case LOAD: case STORE: 
+         return CC0 && TR1 && TR2 && N3;
+      case MOV:
+         return CC0 && (TR1 || L1) && TR2 && N3 && SZ4_IF_TR1;
+      case CMOV:
+         return CC1 && TR1 && TR2 && N3 && SZ4;
+      case JMP: 
+         return (u->cond==CondAlways ? CC0 : CC1)
+                && (TR1 || L1) && N2 && SZ0 && N3;
+      case CLEAR:
+         return CC0 && Ls1 && N2 && SZ0 && N3;
+      case CALLM:
+         return SZ0 && Ls1 && N2 && N3;
+      case PUSH: case POP:
+         return CC0 && TR1 && N2 && N3;
+      case AND: case OR:
+         return TR1 && TR2 && N3;
+      case ADD: case ADC: case XOR: case SUB: case SBB:
+         return (A1 || TR1 || L1) && TR2 && N3;
+      case SHL: case SHR: case SAR: case ROL: case ROR: case RCL: case RCR:
+         return       (TR1 || L1) && TR2 && N3;
+      case NOT: case NEG: case INC: case DEC:
+         return        TR1 && N2 && N3;
+      case BSWAP:
+         return TR1 && N2 && N3 && CC0 && SZ4;
+      case CC2VAL: 
+         return CC1 && SZ1 && TR1 && N2 && N3;
+      case JIFZ:
+         return CC0 && SZ4 && TR1 && L2 && N3;
+      case FPU_R:  case FPU_W: 
+         return CC0 && Ls1 && TR2 && N3;
+      case FPU: 
+         return SZ0 && CC0 && Ls1 && N2 && N3;
+      case LOADV:
+         return CC0 && TR1 && TR2 && N3;
+      case STOREV:
+         return CC0 && (TR1 || L1) && TR2 && N3;
+      case GETV: 
+         return CC0 && A1 && TR2 && N3;
+      case PUTV: 
+         return CC0 && (TR1 || L1) && A2 && N3;
+      case GETVF: 
+         return CC0 && TR1 && N2 && N3 && SZ0;
+      case PUTVF: 
+         return CC0 && TR1 && N2 && N3 && SZ0;
+      case WIDEN:
+         return CC0 && TR1 && N2 && N3;
+      case TESTV: 
+         return CC0 && (A1 || TR1) && N2 && N3;
+      case SETV:
+         return CC0 && (A1 || TR1) && N2 && N3;
+      case TAG1:
+         return CC0 && TR1 && N2 && Ls3 && SZ0;
+      case TAG2:
+         return CC0 && TR1 && TR2 && Ls3 && SZ0;
+      default: 
+         VG_(panic)("vg_saneUInstr: unhandled opcode");
+   }
+#  undef SZ4_IF_TR1
+#  undef CC0
+#  undef CC1
+#  undef SZ4
+#  undef SZ2
+#  undef SZ1
+#  undef SZ0
+#  undef TR1
+#  undef TR2
+#  undef TR3
+#  undef A1
+#  undef A2
+#  undef AS1
+#  undef AS2
+#  undef AS3
+#  undef L1
+#  undef Ls1
+#  undef L2
+#  undef Ls3
+#  undef N1
+#  undef N2
+#  undef N3
+#  undef FLG_RD
+#  undef FLG_WR
+}
+
+
+/* Sanity checks to do with CALLMs in UCodeBlocks. */
+Bool VG_(saneUCodeBlock) ( UCodeBlock* cb )
+{
+   Int  callm = 0;
+   Int  callm_s = 0;
+   Int  callm_e = 0;
+   Int  callm_ptr, calls_ptr;
+   Int  i, j, t;
+   Bool incall = False;
+
+   /* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */
+
+   for (i = 0; i < cb->used; i++) {
+      switch (cb->instrs[i].opcode) {
+         case CALLM:
+            if (!incall) return False;
+            callm++; 
+            break;
+         case CALLM_S: 
+            if (incall) return False;
+            incall = True;
+            callm_s++; 
+            break;
+         case CALLM_E: 
+            if (!incall) return False;
+            incall = False;
+            callm_e++; 
+            break;
+         case PUSH: case POP: case CLEAR:
+            if (!incall) return False;
+            break;
+         default:
+            break;
+      }
+   }
+   if (incall) return False;
+   if (callm != callm_s || callm != callm_e) return False;
+
+   /* Check the sections between CALLM_S and CALLM's.  Ensure that no
+      PUSH uinsn pushes any TempReg that any other PUSH in the same
+      section pushes.  Ie, check that the TempReg args to PUSHes in
+      the section are unique.  If not, the instrumenter generates
+      incorrect code for CALLM insns. */
+
+   callm_ptr = 0;
+
+ find_next_CALLM:
+   /* Search for the next interval, making calls_ptr .. callm_ptr
+      bracket it. */
+   while (callm_ptr < cb->used 
+          && cb->instrs[callm_ptr].opcode != CALLM)
+      callm_ptr++;
+   if (callm_ptr == cb->used)
+      return True;
+   vg_assert(cb->instrs[callm_ptr].opcode == CALLM);
+
+   calls_ptr = callm_ptr - 1;
+   while (cb->instrs[calls_ptr].opcode != CALLM_S)
+      calls_ptr--;
+   vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S);
+   vg_assert(calls_ptr >= 0);
+
+   /* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */
+
+   /* For each PUSH insn in the interval ... */
+   for (i = calls_ptr + 1; i < callm_ptr; i++) {
+      if (cb->instrs[i].opcode != PUSH) continue;
+      t = cb->instrs[i].val1;
+      /* Ensure no later PUSH insns up to callm_ptr push the same
+         TempReg.  Return False if any such are found. */
+      for (j = i+1; j < callm_ptr; j++) {
+         if (cb->instrs[j].opcode == PUSH &&
+             cb->instrs[j].val1 == t)
+            return False;
+      }
+   }
+
+   /* This interval is clean.  Keep going ... */
+   callm_ptr++;
+   goto find_next_CALLM;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Printing uinstrs.                                    ---*/
+/*------------------------------------------------------------*/
+
+Char* VG_(nameCondcode) ( Condcode cond )
+{
+   switch (cond) {
+      case CondO:      return "o";
+      case CondNO:     return "no";
+      case CondB:      return "b";
+      case CondNB:     return "nb";
+      case CondZ:      return "z";
+      case CondNZ:     return "nz";
+      case CondBE:     return "be";
+      case CondNBE:    return "nbe";
+      case CondS:      return "s";
+      case ConsNS:     return "ns";
+      case CondP:      return "p";
+      case CondNP:     return "np";
+      case CondL:      return "l";
+      case CondNL:     return "nl";
+      case CondLE:     return "le";
+      case CondNLE:    return "nle";
+      case CondAlways: return "MP"; /* hack! */
+      default: VG_(panic)("nameCondcode");
+   }
+}
+
+
+static void vg_ppFlagSet ( Char* prefix, FlagSet set )
+{
+   VG_(printf)("%s", prefix);
+   if (set & FlagD) VG_(printf)("D");
+   if (set & FlagO) VG_(printf)("O");
+   if (set & FlagS) VG_(printf)("S");
+   if (set & FlagZ) VG_(printf)("Z");
+   if (set & FlagA) VG_(printf)("A");
+   if (set & FlagC) VG_(printf)("C");
+   if (set & FlagP) VG_(printf)("P");
+}
+
+
+static void ppTempReg ( Int tt )
+{
+   if ((tt & 1) == 0)
+      VG_(printf)("t%d", tt);
+   else
+      VG_(printf)("q%d", tt-1);
+}
+
+
+static void ppUOperand ( UInstr* u, Int operandNo, Int sz, Bool parens )
+{
+   UInt tag, val;
+   switch (operandNo) {
+      case 1: tag = u->tag1; val = u->val1; break;
+      case 2: tag = u->tag2; val = u->val2; break;
+      case 3: tag = u->tag3; val = u->val3; break;
+      default: VG_(panic)("ppUOperand(1)");
+   }
+   if (tag == Literal) val = u->lit32;
+
+   if (parens) VG_(printf)("(");
+   switch (tag) {
+      case TempReg: ppTempReg(val); break;
+      case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break;
+      case Literal: VG_(printf)("$0x%x", val); break;
+      case Lit16:   VG_(printf)("$0x%x", val); break;
+      case NoValue: VG_(printf)("NoValue"); break;
+      case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
+      case SpillNo: VG_(printf)("spill%d", val); break;
+      default: VG_(panic)("ppUOperand(2)");
+   }
+   if (parens) VG_(printf)(")");
+}
+
+
+Char* VG_(nameUOpcode) ( Bool upper, Opcode opc )
+{
+   switch (opc) {
+      case ADD:   return (upper ? "ADD" : "add");
+      case ADC:   return (upper ? "ADC" : "adc");
+      case AND:   return (upper ? "AND" : "and");
+      case OR:    return (upper ? "OR"  : "or");
+      case XOR:   return (upper ? "XOR" : "xor");
+      case SUB:   return (upper ? "SUB" : "sub");
+      case SBB:   return (upper ? "SBB" : "sbb");
+      case SHL:   return (upper ? "SHL" : "shl");
+      case SHR:   return (upper ? "SHR" : "shr");
+      case SAR:   return (upper ? "SAR" : "sar");
+      case ROL:   return (upper ? "ROL" : "rol");
+      case ROR:   return (upper ? "ROR" : "ror");
+      case RCL:   return (upper ? "RCL" : "rcl");
+      case RCR:   return (upper ? "RCR" : "rcr");
+      case NOT:   return (upper ? "NOT" : "not");
+      case NEG:   return (upper ? "NEG" : "neg");
+      case INC:   return (upper ? "INC" : "inc");
+      case DEC:   return (upper ? "DEC" : "dec");
+      case BSWAP: return (upper ? "BSWAP" : "bswap");
+      default:    break;
+   }
+   if (!upper) VG_(panic)("vg_nameUOpcode: invalid !upper");
+   switch (opc) {
+      case GETVF:   return "GETVF";
+      case PUTVF:   return "PUTVF";
+      case TAG1:    return "TAG1";
+      case TAG2:    return "TAG2";
+      case CALLM_S: return "CALLM_S";
+      case CALLM_E: return "CALLM_E";
+      case INCEIP:  return "INCEIP";
+      case LEA1:    return "LEA1";
+      case LEA2:    return "LEA2";
+      case NOP:     return "NOP";
+      case GET:     return "GET";
+      case PUT:     return "PUT";
+      case GETF:    return "GETF";
+      case PUTF:    return "PUTF";
+      case LOAD:    return "LD" ;
+      case STORE:   return "ST" ;
+      case MOV:     return "MOV";
+      case CMOV:    return "CMOV";
+      case WIDEN:   return "WIDEN";
+      case JMP:     return "J"    ;
+      case JIFZ:    return "JIFZ" ;
+      case CALLM:   return "CALLM";
+      case PUSH:    return "PUSH" ;
+      case POP:     return "POP"  ;
+      case CLEAR:   return "CLEAR";
+      case CC2VAL:  return "CC2VAL";
+      case FPU_R:   return "FPU_R";
+      case FPU_W:   return "FPU_W";
+      case FPU:     return "FPU"  ;
+      case LOADV:   return "LOADV";
+      case STOREV:  return "STOREV";
+      case GETV:    return "GETV";
+      case PUTV:    return "PUTV";
+      case TESTV:   return "TESTV";
+      case SETV:    return "SETV";
+      default:      VG_(panic)("nameUOpcode: unhandled case");
+   }
+}
+
+
+void VG_(ppUInstr) ( Int instrNo, UInstr* u )
+{
+   VG_(printf)("\t%4d: %s", instrNo, 
+                            VG_(nameUOpcode)(True, u->opcode));
+   if (u->opcode == JMP || u->opcode == CC2VAL)
+      VG_(printf)("%s", VG_(nameCondcode(u->cond)));
+
+   switch (u->size) {
+      case 0:  VG_(printf)("o"); break;
+      case 1:  VG_(printf)("B"); break;
+      case 2:  VG_(printf)("W"); break;
+      case 4:  VG_(printf)("L"); break;
+      case 8:  VG_(printf)("Q"); break;
+      default: VG_(printf)("%d", (Int)u->size); break;
+   }
+
+   switch (u->opcode) {
+
+      case TAG1:
+         VG_(printf)("\t");
+         ppUOperand(u, 1, 4, False);
+         VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
+         ppUOperand(u, 1, 4, False);
+         VG_(printf)(" )");
+         break;
+
+      case TAG2:
+         VG_(printf)("\t");
+         ppUOperand(u, 2, 4, False);
+         VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
+         ppUOperand(u, 1, 4, False);
+         VG_(printf)(", ");
+         ppUOperand(u, 2, 4, False);
+         VG_(printf)(" )");
+         break;
+
+      case CALLM_S: case CALLM_E:
+         break;
+
+      case INCEIP:
+         VG_(printf)("\t$%d", u->val1);
+         break;
+
+      case LEA2:
+         VG_(printf)("\t%d(" , u->lit32);
+         ppUOperand(u, 1, 4, False);
+         VG_(printf)(",");
+         ppUOperand(u, 2, 4, False);
+         VG_(printf)(",%d), ", (Int)u->extra4b);
+         ppUOperand(u, 3, 4, False);
+         break;
+
+      case LEA1:
+         VG_(printf)("\t%d" , u->lit32);
+         ppUOperand(u, 1, 4, True);
+         VG_(printf)(", ");
+         ppUOperand(u, 2, 4, False);
+         break;
+
+      case NOP:
+         break;
+
+      case FPU_W:
+         VG_(printf)("\t0x%x:0x%x, ",
+                     (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
+         ppUOperand(u, 2, 4, True);
+         break;
+
+      case FPU_R:
+         VG_(printf)("\t");
+         ppUOperand(u, 2, 4, True);
+         VG_(printf)(", 0x%x:0x%x",
+                     (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
+         break;
+
+      case FPU:
+         VG_(printf)("\t0x%x:0x%x",
+                     (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
+         break;
+
+      case STOREV: case LOADV:
+      case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
+         VG_(printf)("\t");
+         ppUOperand(u, 1, u->size, u->opcode==LOAD || u->opcode==LOADV); 
+         VG_(printf)(", ");
+         ppUOperand(u, 2, u->size, u->opcode==STORE || u->opcode==STOREV);
+         break;
+
+      case GETF: case PUTF:
+         VG_(printf)("\t");
+         ppUOperand(u, 1, u->size, False);
+         break;
+
+      case JMP: case CC2VAL:
+      case PUSH: case POP: case CLEAR: case CALLM:
+         if (u->opcode == JMP && u->ret_dispatch)
+            VG_(printf)("-r");
+         if (u->opcode == JMP && u->call_dispatch)
+            VG_(printf)("-c");
+         VG_(printf)("\t");
+         ppUOperand(u, 1, u->size, False);
+         break;
+
+      case JIFZ:
+         VG_(printf)("\t");
+         ppUOperand(u, 1, u->size, False);
+         VG_(printf)(", ");
+         ppUOperand(u, 2, u->size, False);
+         break;
+
+      case PUTVF: case GETVF:
+         VG_(printf)("\t");
+         ppUOperand(u, 1, 0, False); 
+         break;
+
+      case NOT: case NEG: case INC: case DEC: case BSWAP:
+         VG_(printf)("\t");
+         ppUOperand(u, 1, u->size, False); 
+         break;
+
+      case ADD: case ADC: case AND: case OR:  
+      case XOR: case SUB: case SBB:   
+      case SHL: case SHR: case SAR: 
+      case ROL: case ROR: case RCL: case RCR:   
+         VG_(printf)("\t");
+         ppUOperand(u, 1, u->size, False); 
+         VG_(printf)(", ");
+         ppUOperand(u, 2, u->size, False);
+         break;
+
+      case GETV: case PUTV:
+         VG_(printf)("\t");
+         ppUOperand(u, 1, u->opcode==PUTV ? 4 : u->size, False);
+         VG_(printf)(", ");
+         ppUOperand(u, 2, u->opcode==GETV ? 4 : u->size, False);
+         break;
+
+      case WIDEN:
+         VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
+                              u->signed_widen?'s':'z');
+         VG_(printf)("\t");
+         ppUOperand(u, 1, u->size, False);
+         break;
+
+      case TESTV: case SETV:
+         VG_(printf)("\t");
+         ppUOperand(u, 1, u->size, False);
+         break;
+
+      default: VG_(panic)("ppUInstr: unhandled opcode");
+   }
+
+   if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) {
+      VG_(printf)("  (");
+      if (u->flags_r != FlagsEmpty) 
+         vg_ppFlagSet("-r", u->flags_r);
+      if (u->flags_w != FlagsEmpty) 
+         vg_ppFlagSet("-w", u->flags_w);
+      VG_(printf)(")");
+   }
+   VG_(printf)("\n");
+}
+
+
+void VG_(ppUCodeBlock) ( UCodeBlock* cb, Char* title )
+{
+   Int i;
+   VG_(printf)("\n%s\n", title);
+   for (i = 0; i < cb->used; i++)
+      if (0 || cb->instrs[i].opcode != NOP)
+         VG_(ppUInstr) ( i, &cb->instrs[i] );
+   VG_(printf)("\n");
+}
+
+
+/*------------------------------------------------------------*/
+/*--- uinstr helpers for register allocation               ---*/
+/*--- and code improvement.                                ---*/
+/*------------------------------------------------------------*/
+
+/* A structure for communicating temp uses, and for indicating
+   temp->real register mappings for patchUInstr. */
+typedef
+   struct {
+      Int   realNo;
+      Int   tempNo;
+      Bool  isWrite;
+   }
+   TempUse;
+
+
+/* Get the temp use of a uinstr, parking them in an array supplied by
+   the caller, which is assumed to be big enough.  Return the number
+   of entries.  Insns which read _and_ write a register wind up
+   mentioning it twice.  Entries are placed in the array in program
+   order, so that if a reg is read-modified-written, it appears first
+   as a read and then as a write.  
+*/
+static __inline__ 
+Int getTempUsage ( UInstr* u, TempUse* arr )
+{
+
+#  define RD(ono)                                  \
+      if (mycat(u->tag,ono) == TempReg)            \
+         { arr[n].tempNo  = mycat(u->val,ono);     \
+           arr[n].isWrite = False; n++; }
+#  define WR(ono)                                  \
+      if (mycat(u->tag,ono) == TempReg)            \
+         { arr[n].tempNo  = mycat(u->val,ono);     \
+           arr[n].isWrite = True; n++; }
+
+   Int n = 0;
+   switch (u->opcode) {
+      case LEA1: RD(1); WR(2); break;
+      case LEA2: RD(1); RD(2); WR(3); break;
+
+      case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E: break;
+      case FPU_R: case FPU_W: RD(2); break;
+
+      case GETF:  WR(1); break;
+      case PUTF:  RD(1); break;
+
+      case GET:   WR(2); break;
+      case PUT:   RD(1); break;
+      case LOAD:  RD(1); WR(2); break;
+      case STORE: RD(1); RD(2); break;
+      case MOV:   RD(1); WR(2); break;
+
+      case JMP:   RD(1); break;
+      case CLEAR: case CALLM: break;
+
+      case PUSH: RD(1); break;
+      case POP:  WR(1); break;
+
+      case TAG2:
+      case CMOV:
+      case ADD: case ADC: case AND: case OR:  
+      case XOR: case SUB: case SBB:   
+         RD(1); RD(2); WR(2); break;
+
+      case SHL: case SHR: case SAR: 
+      case ROL: case ROR: case RCL: case RCR:
+         RD(1); RD(2); WR(2); break;
+
+      case NOT: case NEG: case INC: case DEC: case TAG1: case BSWAP:
+         RD(1); WR(1); break;
+
+      case WIDEN: RD(1); WR(1); break;
+
+      case CC2VAL: WR(1); break;
+      case JIFZ: RD(1); break;
+
+      /* These sizes are only ever consulted when the instrumentation
+         code is being added, so the following can return
+         manifestly-bogus sizes. */
+      case LOADV:   RD(1); WR(2); break;
+      case STOREV:  RD(1); RD(2); break;
+      case GETV:    WR(2); break;
+      case PUTV:    RD(1); break;
+      case TESTV:   RD(1); break;
+      case SETV:    WR(1); break;
+      case PUTVF:   RD(1); break;
+      case GETVF:   WR(1); break;
+
+      default: VG_(panic)("getTempUsage: unhandled opcode");
+   }
+   return n;
+
+#  undef RD
+#  undef WR
+}
+
+
+/* Change temp regs in u into real regs, as directed by tmap. */
+static __inline__ 
+void patchUInstr ( UInstr* u, TempUse* tmap, Int n_tmap )
+{
+   Int i;
+   if (u->tag1 == TempReg) {
+      for (i = 0; i < n_tmap; i++)
+         if (tmap[i].tempNo == u->val1) break;
+      if (i == n_tmap) VG_(panic)("patchUInstr(1)");
+      u->tag1 = RealReg;
+      u->val1 = tmap[i].realNo;
+   }
+   if (u->tag2 == TempReg) {
+      for (i = 0; i < n_tmap; i++)
+         if (tmap[i].tempNo == u->val2) break;
+      if (i == n_tmap) VG_(panic)("patchUInstr(2)");
+      u->tag2 = RealReg;
+      u->val2 = tmap[i].realNo;
+   }
+   if (u->tag3 == TempReg) {
+      for (i = 0; i < n_tmap; i++)
+         if (tmap[i].tempNo == u->val3) break;
+      if (i == n_tmap) VG_(panic)("patchUInstr(3)");
+      u->tag3 = RealReg;
+      u->val3 = tmap[i].realNo;
+   }
+}
+
+
+/* Tedious x86-specific hack which compensates for the fact that the
+   register numbers for %ah .. %dh do not correspond to those for %eax
+   .. %edx.  It maps a (reg size, reg no) pair to the number of the
+   containing 32-bit reg. */
+static __inline__ 
+Int containingArchRegOf ( Int sz, Int aregno )
+{
+   switch (sz) {
+      case 4: return aregno;
+      case 2: return aregno;
+      case 1: return aregno >= 4 ? aregno-4 : aregno;
+      default: VG_(panic)("containingArchRegOf");
+   }
+}
+
+
+/* If u reads an ArchReg, return the number of the containing arch
+   reg.  Otherwise return -1.  Used in redundant-PUT elimination. */
+static __inline__ 
+Int maybe_uinstrReadsArchReg ( UInstr* u )
+{
+   switch (u->opcode) {
+      case GET:
+      case ADD: case ADC: case AND: case OR:  
+      case XOR: case SUB: case SBB:   
+      case SHL: case SHR: case SAR: case ROL: 
+      case ROR: case RCL: case RCR:
+         if (u->tag1 == ArchReg) 
+            return containingArchRegOf ( u->size, u->val1 ); 
+         else
+            return -1;
+
+      case GETF: case PUTF:
+      case CALLM_S: case CALLM_E:
+      case INCEIP:
+      case LEA1:
+      case LEA2:
+      case NOP:
+      case PUT:
+      case LOAD:
+      case STORE:
+      case MOV:
+      case CMOV:
+      case JMP:
+      case CALLM: case CLEAR: case PUSH: case POP:
+      case NOT: case NEG: case INC: case DEC: case BSWAP:
+      case CC2VAL:
+      case JIFZ:
+      case FPU: case FPU_R: case FPU_W:
+      case WIDEN:
+         return -1;
+
+      default: 
+         VG_(ppUInstr)(0,u);
+         VG_(panic)("maybe_uinstrReadsArchReg: unhandled opcode");
+   }
+}
+
+static __inline__
+Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
+{
+   Int i, k;
+   TempUse tempUse[3];
+   k = getTempUsage ( u, &tempUse[0] );
+   for (i = 0; i < k; i++)
+      if (tempUse[i].tempNo == tempreg)
+         return True;
+   return False;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- ucode improvement.                                   ---*/
+/*------------------------------------------------------------*/
+
+/* Improve the code in cb by doing
+   -- Redundant ArchReg-fetch elimination
+   -- Redundant PUT elimination
+   -- Redundant cond-code restore/save elimination
+   The overall effect of these is to allow target registers to be
+   cached in host registers over multiple target insns.  
+*/
+static void vg_improve ( UCodeBlock* cb )
+{
+   Int     i, j, k, m, n, ar, tr, told, actual_areg;
+   Int     areg_map[8];
+   Bool    annul_put[8];
+   TempUse tempUse[3];
+   UInstr* u;
+   Bool    wr;
+   Int*    last_live_before;
+   FlagSet future_dead_flags;
+
+   if (cb->nextTemp > 0)
+      last_live_before = VG_(jitmalloc) ( cb->nextTemp * sizeof(Int) );
+   else
+      last_live_before = NULL;
+
+   
+   /* PASS 1: redundant GET elimination.  (Actually, more general than
+      that -- eliminates redundant fetches of ArchRegs). */
+
+   /* Find the live-range-ends for all temporaries.  Duplicates code
+      in the register allocator :-( */
+
+   for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1;
+
+   for (i = cb->used-1; i >= 0; i--) {
+      u = &cb->instrs[i];
+
+      k = getTempUsage(u, &tempUse[0]);
+
+      /* For each temp usage ... bwds in program order. */
+      for (j = k-1; j >= 0; j--) {
+         tr = tempUse[j].tempNo;
+         wr = tempUse[j].isWrite;
+         if (last_live_before[tr] == -1) {
+            vg_assert(tr >= 0 && tr < cb->nextTemp);
+            last_live_before[tr] = wr ? (i+1) : i;
+         }
+      }
+
+   }
+
+#  define BIND_ARCH_TO_TEMP(archreg,tempreg)\
+   { Int q;                                           \
+     /* Invalidate any old binding(s) to tempreg. */  \
+     for (q = 0; q < 8; q++)                          \
+        if (areg_map[q] == tempreg) areg_map[q] = -1; \
+     /* Add the new binding. */                       \
+     areg_map[archreg] = (tempreg);                   \
+   }
+
+   /* Set up the A-reg map. */
+   for (i = 0; i < 8; i++) areg_map[i] = -1;
+
+   /* Scan insns. */
+   for (i = 0; i < cb->used; i++) {
+      u = &cb->instrs[i];
+      if (u->opcode == GET && u->size == 4) {
+         /* GET; see if it can be annulled. */
+         vg_assert(u->tag1 == ArchReg);
+         vg_assert(u->tag2 == TempReg);
+         ar   = u->val1;
+         tr   = u->val2;
+         told = areg_map[ar];
+         if (told != -1 && last_live_before[told] <= i) {
+            /* ar already has an old mapping to told, but that runs
+               out here.  Annul this GET, rename tr to told for the
+               rest of the block, and extend told's live range to that
+               of tr.  */
+            u->opcode = NOP;
+            u->tag1 = u->tag2 = NoValue;
+            n = last_live_before[tr] + 1;
+            if (n > cb->used) n = cb->used;
+            last_live_before[told] = last_live_before[tr];
+            last_live_before[tr] = i-1;
+            if (VG_(disassemble))
+               VG_(printf)(
+                  "at %d: delete GET, rename t%d to t%d in (%d .. %d)\n", 
+                  i, tr, told,i+1, n-1);
+            for (m = i+1; m < n; m++) {
+               if (cb->instrs[m].tag1 == TempReg 
+                   && cb->instrs[m].val1 == tr) 
+                 cb->instrs[m].val1 = told;
+               if (cb->instrs[m].tag2 == TempReg 
+                   && cb->instrs[m].val2 == tr) 
+                 cb->instrs[m].val2 = told;
+            }
+            BIND_ARCH_TO_TEMP(ar,told);
+         }
+         else
+            BIND_ARCH_TO_TEMP(ar,tr);
+      }
+      else if (u->opcode == GET && u->size != 4) {
+         /* Invalidate any mapping for this archreg.  */
+         actual_areg = containingArchRegOf ( u->size, u->val1 );
+         areg_map[actual_areg] = -1;
+      } 
+      else if (u->opcode == PUT && u->size == 4) {
+         /* PUT; re-establish t -> a binding */
+         vg_assert(u->tag1 == TempReg);
+         vg_assert(u->tag2 == ArchReg);
+         BIND_ARCH_TO_TEMP(u->val2, u->val1);
+      }
+      else if (u->opcode == PUT && u->size != 4) {
+         /* Invalidate any mapping for this archreg. */
+         actual_areg = containingArchRegOf ( u->size, u->val2 );
+         areg_map[actual_areg] = -1;
+      } else {
+
+         /* see if insn has an archreg as a read operand; if so try to
+            map it. */
+         if (u->tag1 == ArchReg && u->size == 4 
+                                && areg_map[u->val1] != -1) {
+            switch (u->opcode) {
+               case ADD: case SUB: case AND: case OR: case XOR:
+               case ADC: case SBB:
+               case SHL: case SHR: case SAR: case ROL: case ROR:
+               case RCL: case RCR:
+                  if (VG_(disassemble)) 
+                     VG_(printf)(
+                        "at %d: change ArchReg %S to TempReg t%d\n", 
+                        i, nameIReg(4,u->val1), areg_map[u->val1]);
+                  u->tag1 = TempReg;
+                  u->val1 = areg_map[u->val1];
+                  /* Remember to extend the live range of the TempReg,
+                     if necessary. */
+                  if (last_live_before[u->val1] < i)
+                     last_live_before[u->val1] = i;
+                  break;
+               default: 
+                  break;
+            }
+         }
+
+         /* boring insn; invalidate any mappings to temps it writes */
+         k = getTempUsage(u, &tempUse[0]);
+
+         for (j = 0; j < k; j++) {
+            wr  = tempUse[j].isWrite;
+            if (!wr) continue;
+            tr = tempUse[j].tempNo;
+            for (m = 0; m < 8; m++)
+               if (areg_map[m] == tr) areg_map[m] = -1;
+         }
+      }
+         
+   }
+
+#  undef BIND_ARCH_TO_TEMP
+
+   /* PASS 2: redundant PUT elimination.  If doing instrumentation,
+      don't annul (delay) puts of %ESP, since the memory check
+      machinery always requires the in-memory value of %ESP to be up
+      to date. 
+   */
+   for (j = 0; j < 8; j++)
+      annul_put[j] = False;
+
+   for (i = cb->used-1; i >= 0; i--) {
+      u = &cb->instrs[i];
+      if (u->opcode == NOP) continue;
+
+      if (u->opcode == PUT && u->size == 4) {
+         vg_assert(u->tag2 == ArchReg);
+         actual_areg = containingArchRegOf ( 4, u->val2 );
+         if (annul_put[actual_areg]) {
+            u->opcode = NOP;
+            u->tag1 = u->tag2 = NoValue;
+            if (VG_(disassemble)) 
+               VG_(printf)("at %d: delete PUT\n", i );
+         } else {
+            if (!(VG_(clo_instrument) && actual_areg == R_ESP))
+               annul_put[actual_areg] = True;
+         }
+      } 
+      else if (u->opcode == PUT && u->size != 4) { 
+         actual_areg = containingArchRegOf ( u->size, u->val2 );
+         annul_put[actual_areg] = False;
+      } 
+      else if (u->opcode == JMP || u->opcode == JIFZ
+               || u->opcode == CALLM) {
+         for (j = 0; j < 8; j++)
+            annul_put[j] = False;
+      }
+      else {
+         /* If an instruction reads an ArchReg, the immediately
+            preceding PUT cannot be annulled. */
+         actual_areg = maybe_uinstrReadsArchReg ( u );
+         if (actual_areg != -1)      
+            annul_put[actual_areg] = False;
+      }
+   }
+
+   /* PASS 2a: redundant-move elimination.  Given MOV t1, t2 and t1 is
+      dead after this point, annul the MOV insn and rename t2 to t1.
+      Further modifies the last_live_before map. */
+
+#  if 0
+   VG_(ppUCodeBlock)(cb, "Before MOV elimination" );
+   for (i = 0; i < cb->nextTemp; i++)
+     VG_(printf)("llb[t%d]=%d   ", i, last_live_before[i]);
+   VG_(printf)("\n");
+#  endif
+
+   for (i = 0; i < cb->used-1; i++) {
+      u = &cb->instrs[i];
+      if (u->opcode != MOV) continue;
+      if (u->tag1 == Literal) continue;
+      vg_assert(u->tag1 == TempReg);
+      vg_assert(u->tag2 == TempReg);
+      if (last_live_before[u->val1] == i) {
+         if (VG_(disassemble))
+            VG_(printf)(
+               "at %d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
+               i, u->val2, u->val1, i+1, last_live_before[u->val2] );
+         for (j = i+1; j <= last_live_before[u->val2]; j++) {
+            if (cb->instrs[j].tag1 == TempReg 
+                && cb->instrs[j].val1 == u->val2)
+               cb->instrs[j].val1 = u->val1;
+            if (cb->instrs[j].tag2 == TempReg 
+                && cb->instrs[j].val2 == u->val2)
+               cb->instrs[j].val2 = u->val1;
+         }
+         last_live_before[u->val1] = last_live_before[u->val2];
+         last_live_before[u->val2] = i-1;
+         u->opcode = NOP;
+         u->tag1 = u->tag2 = NoValue;
+      }
+   }
+
+   /* PASS 3: redundant condition-code restore/save elimination.
+      Scan backwards from the end.  future_dead_flags records the set
+      of flags which are dead at this point, that is, will be written
+      before they are next read.  Earlier uinsns which write flags
+      already in future_dead_flags can have their writes annulled.  
+   */
+   future_dead_flags = FlagsEmpty;
+
+   for (i = cb->used-1; i >= 0; i--) {
+      u = &cb->instrs[i];
+
+      /* We might never make it to insns beyond this one, so be
+         conservative. */
+      if (u->opcode == JIFZ || u->opcode == JMP) {
+         future_dead_flags = FlagsEmpty;
+         continue;
+      } 
+
+      /* We can annul the flags written by this insn if it writes a
+         subset (or eq) of the set of flags known to be dead after
+         this insn.  If not, just record the flags also written by
+         this insn.*/
+      if (u->flags_w != FlagsEmpty
+          && VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
+         if (VG_(disassemble)) {
+            VG_(printf)("at %d: annul flag write ", i);
+            vg_ppFlagSet("", u->flags_w);
+            VG_(printf)(" due to later ");
+            vg_ppFlagSet("", future_dead_flags);
+            VG_(printf)("\n");
+         }
+         u->flags_w = FlagsEmpty;
+      } else {
+        future_dead_flags 
+           = VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags );
+      }
+
+      /* If this insn also reads flags, empty out future_dead_flags so
+         as to force preceding writes not to be annulled. */
+      if (u->flags_r != FlagsEmpty)
+         future_dead_flags = FlagsEmpty;
+   }
+
+   if (last_live_before) 
+      VG_(jitfree) ( last_live_before );
+}
+
+
+/*------------------------------------------------------------*/
+/*--- The new register allocator.                          ---*/
+/*------------------------------------------------------------*/
+
+typedef
+   struct {
+      /* Becomes live for the first time after this insn ... */
+      Int live_after;
+      /* Becomes dead for the last time after this insn ... */
+      Int dead_before;
+      /* The "home" spill slot, if needed.  Never changes. */
+      Int spill_no;
+      /* Where is it?  VG_NOVALUE==in a spill slot; else in reg. */
+      Int real_no;
+   }
+   TempInfo;
+
+
+/* Take a ucode block and allocate its TempRegs to RealRegs, or put
+   them in spill locations, and add spill code, if there are not
+   enough real regs.  The usual register allocation deal, in short.  
+
+   Important redundancy of representation:
+
+     real_to_temp maps real reg ranks (RRRs) to TempReg nos, or
+     to VG_NOVALUE if the real reg has no currently assigned TempReg.
+
+     The .real_no field of a TempInfo gives the current RRR for
+     this TempReg, or VG_NOVALUE if the TempReg is currently
+     in memory, in which case it is in the SpillNo denoted by
+     spillno.
+
+   These pieces of information (a fwds-bwds mapping, really) must 
+   be kept consistent!
+
+   This allocator uses the so-called Second Chance Bin Packing
+   algorithm, as described in "Quality and Speed in Linear-scan
+   Register Allocation" (Traub, Holloway and Smith, ACM PLDI98,
+   pp142-151).  It is simple and fast and remarkably good at
+   minimising the amount of spill code introduced.
+*/
+
+static
+UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 )
+{
+   TempInfo*    temp_info;
+   Int          real_to_temp[VG_MAX_REALREGS];
+   Bool         is_spill_cand[VG_MAX_REALREGS];
+   Int          ss_busy_until_before[VG_MAX_SPILLSLOTS];
+   Int          i, j, k, m, r, tno, max_ss_no;
+   Bool         wr, defer, isRead, spill_reqd;
+   TempUse      tempUse[3];
+   UCodeBlock*  c2;
+
+   /* Used to denote ... well, "no value" in this fn. */
+#  define VG_NOTHING (-2)
+
+   /* Initialise the TempReg info.  */
+   if (c1->nextTemp > 0)
+      temp_info = VG_(jitmalloc)(c1->nextTemp * sizeof(TempInfo) );
+   else
+      temp_info = NULL;
+
+   for (i = 0; i < c1->nextTemp; i++) {
+      temp_info[i].live_after  = VG_NOTHING;
+      temp_info[i].dead_before = VG_NOTHING;
+      temp_info[i].spill_no    = VG_NOTHING;
+      /* temp_info[i].real_no is not yet relevant. */
+   }
+
+   spill_reqd = False;
+
+   /* Scan fwds to establish live ranges. */
+
+   for (i = 0; i < c1->used; i++) {
+      k = getTempUsage(&c1->instrs[i], &tempUse[0]);
+      vg_assert(k >= 0 && k <= 3);
+
+      /* For each temp usage ... fwds in program order */
+      for (j = 0; j < k; j++) {
+         tno = tempUse[j].tempNo;
+         wr  = tempUse[j].isWrite;
+         if (wr) {
+            /* Writes hold a reg live until after this insn. */
+            if (temp_info[tno].live_after == VG_NOTHING)
+               temp_info[tno].live_after = i;
+            if (temp_info[tno].dead_before < i + 1)
+               temp_info[tno].dead_before = i + 1;
+         } else {
+            /* First use of a tmp should be a write. */
+            vg_assert(temp_info[tno].live_after != VG_NOTHING);
+            /* Reads only hold it live until before this insn. */
+            if (temp_info[tno].dead_before < i)
+               temp_info[tno].dead_before = i;
+         }
+      }
+   }
+
+#  if 0
+   /* Sanity check on live ranges.  Expensive but correct. */
+   for (i = 0; i < c1->nextTemp; i++) {
+      vg_assert( (temp_info[i].live_after == VG_NOTHING 
+                  && temp_info[i].dead_before == VG_NOTHING)
+                 || (temp_info[i].live_after != VG_NOTHING 
+                     && temp_info[i].dead_before != VG_NOTHING) );
+   }
+#  endif
+
+   /* Do a rank-based allocation of TempRegs to spill slot numbers.
+      We put as few as possible values in spill slots, but
+      nevertheless need to have an assignment to them just in case. */
+
+   max_ss_no = -1;
+
+   for (i = 0; i < VG_MAX_SPILLSLOTS; i++)
+      ss_busy_until_before[i] = 0;
+  
+   for (i = 0; i < c1->nextTemp; i++) {
+
+      /* True iff this temp is unused. */
+      if (temp_info[i].live_after == VG_NOTHING) 
+         continue;
+
+      /* Find the lowest-numbered spill slot which is available at the
+         start point of this interval, and assign the interval to
+         it. */
+      for (j = 0; j < VG_MAX_SPILLSLOTS; j++)
+         if (ss_busy_until_before[j] <= temp_info[i].live_after)
+            break;
+      if (j == VG_MAX_SPILLSLOTS) {
+         VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n");
+         VG_(panic)("register allocation failed -- out of spill slots");
+      }
+      ss_busy_until_before[j] = temp_info[i].dead_before;
+      temp_info[i].spill_no = j;
+      if (j > max_ss_no)
+         max_ss_no = j;
+   }
+
+   VG_(total_reg_rank) += (max_ss_no+1);
+
+   /* Show live ranges and assigned spill slot nos. */
+
+   if (VG_(disassemble)) {
+      VG_(printf)("Live Range Assignments\n");
+
+      for (i = 0; i < c1->nextTemp; i++) {
+         if (temp_info[i].live_after == VG_NOTHING) 
+            continue;
+         VG_(printf)(
+            "   LR %d is   after %d to before %d   spillno %d\n",
+            i,
+            temp_info[i].live_after,
+            temp_info[i].dead_before,
+            temp_info[i].spill_no
+         );
+      }
+   }
+
+   /* Now that we've established a spill slot number for each used
+      temporary, we can go ahead and do the core of the "Second-chance
+      binpacking" allocation algorithm. */
+
+   /* Resulting code goes here.  We generate it all in a forwards
+      pass. */
+   c2 = allocCodeBlock();
+
+   /* At the start, no TempRegs are assigned to any real register.
+      Correspondingly, all temps claim to be currently resident in
+      their spill slots, as computed by the previous two passes. */
+   for (i = 0; i < VG_MAX_REALREGS; i++)
+      real_to_temp[i] = VG_NOTHING;
+   for (i = 0; i < c1->nextTemp; i++)
+      temp_info[i].real_no = VG_NOTHING;
+
+   if (VG_(disassemble))
+      VG_(printf)("\n");
+
+   /* Process each insn in turn. */
+   for (i = 0; i < c1->used; i++) {
+
+      if (c1->instrs[i].opcode == NOP) continue;
+      VG_(uinstrs_prealloc)++;
+
+#     if 0
+      /* Check map consistency.  Expensive but correct. */
+      for (r = 0; r < VG_MAX_REALREGS; r++) {
+         if (real_to_temp[r] != VG_NOTHING) {
+            tno = real_to_temp[r];
+            vg_assert(tno >= 0 && tno < c1->nextTemp);
+            vg_assert(temp_info[tno].real_no == r);
+         }
+      }
+      for (tno = 0; tno < c1->nextTemp; tno++) {
+         if (temp_info[tno].real_no != VG_NOTHING) {
+            r = temp_info[tno].real_no;
+            vg_assert(r >= 0 && r < VG_MAX_REALREGS);
+            vg_assert(real_to_temp[r] == tno);
+         }
+      }
+#     endif
+
+      if (VG_(disassemble))
+         VG_(ppUInstr)(i, &c1->instrs[i]);
+
+      /* First, free up enough real regs for this insn.  This may
+         generate spill stores since we may have to evict some TempRegs
+         currently in real regs.  Also generates spill loads. */
+
+      k = getTempUsage(&c1->instrs[i], &tempUse[0]);
+      vg_assert(k >= 0 && k <= 3);
+
+      /* For each ***different*** temp mentioned in the insn .... */
+      for (j = 0; j < k; j++) {
+
+         /* First check if the temp is mentioned again later; if so,
+            ignore this mention.  We only want to process each temp
+            used by the insn once, even if it is mentioned more than
+            once. */
+         defer = False;
+         tno = tempUse[j].tempNo;
+         for (m = j+1; m < k; m++)
+            if (tempUse[m].tempNo == tno) 
+               defer = True;
+         if (defer) 
+            continue;
+
+         /* Now we're trying to find a register for tempUse[j].tempNo.
+            First of all, if it already has a register assigned, we
+            don't need to do anything more. */
+         if (temp_info[tno].real_no != VG_NOTHING)
+            continue;
+
+         /* No luck.  The next thing to do is see if there is a
+            currently unassigned register available.  If so, bag it. */
+         for (r = 0; r < VG_MAX_REALREGS; r++) {
+            if (real_to_temp[r] == VG_NOTHING)
+               break;
+         }
+         if (r < VG_MAX_REALREGS) {
+            real_to_temp[r]        = tno;
+            temp_info[tno].real_no = r;
+            continue;
+         }
+
+         /* Unfortunately, that didn't pan out either.  So we'll have
+            to eject some other unfortunate TempReg into a spill slot
+            in order to free up a register.  Of course, we need to be
+            careful not to eject some other TempReg needed by this
+            insn.
+
+            Select r in 0 .. VG_MAX_REALREGS-1 such that
+            real_to_temp[r] is not mentioned in 
+            tempUse[0 .. k-1].tempNo, since it would be just plain 
+            wrong to eject some other TempReg which we need to use in 
+            this insn.
+
+            It is here that it is important to make a good choice of
+            register to spill.  */
+
+         /* First, mark those regs which are not spill candidates. */
+         for (r = 0; r < VG_MAX_REALREGS; r++) {
+            is_spill_cand[r] = True;
+            for (m = 0; m < k; m++) {
+               if (real_to_temp[r] == tempUse[m].tempNo) {
+                  is_spill_cand[r] = False;
+                  break;
+               }
+            }
+         }
+
+         /* We can choose any r satisfying is_spill_cand[r].  However,
+            try to make a good choice.  First, try and find r such
+            that the associated TempReg is already dead. */
+         for (r = 0; r < VG_MAX_REALREGS; r++) {
+            if (is_spill_cand[r] && 
+                temp_info[real_to_temp[r]].dead_before <= i)
+               goto have_spill_cand;
+         }
+
+         /* No spill cand is mapped to a dead TempReg.  Now we really
+           _do_ have to generate spill code.  Choose r so that the
+           next use of its associated TempReg is as far ahead as
+           possible, in the hope that this will minimise the number of
+           consequent reloads required.  This is a bit expensive, but
+           we don't have to do it very often. */
+         {
+            Int furthest_r = VG_MAX_REALREGS;
+            Int furthest = 0;
+            for (r = 0; r < VG_MAX_REALREGS; r++) {
+               if (!is_spill_cand[r]) continue;
+               for (m = i+1; m < c1->used; m++)
+                  if (uInstrMentionsTempReg(&c1->instrs[m], 
+                                            real_to_temp[r]))
+                     break;
+               if (m > furthest) {
+                  furthest   = m;
+                  furthest_r = r;
+               }
+            }
+            r = furthest_r;
+            goto have_spill_cand;
+         }
+
+         have_spill_cand:
+         if (r == VG_MAX_REALREGS)
+            VG_(panic)("new reg alloc: out of registers ?!");
+
+         /* Eject r.  Important refinement: don't bother if the
+            associated TempReg is now dead. */
+         vg_assert(real_to_temp[r] != VG_NOTHING);
+         vg_assert(real_to_temp[r] != tno);
+         temp_info[real_to_temp[r]].real_no = VG_NOTHING;
+         if (temp_info[real_to_temp[r]].dead_before > i) {
+            uInstr2(c2, PUT, 4, 
+                        RealReg, VG_(rankToRealRegNo)(r), 
+                        SpillNo, temp_info[real_to_temp[r]].spill_no);
+            VG_(uinstrs_spill)++;
+            spill_reqd = True;
+            if (VG_(disassemble))
+               VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
+         }
+
+         /* Decide if tno is read. */
+         isRead = False;
+         for (m = 0; m < k; m++)
+            if (tempUse[m].tempNo == tno && !tempUse[m].isWrite) 
+               isRead = True;
+
+         /* If so, generate a spill load. */
+         if (isRead) {
+            uInstr2(c2, GET, 4, 
+                        SpillNo, temp_info[tno].spill_no, 
+                        RealReg, VG_(rankToRealRegNo)(r) );
+            VG_(uinstrs_spill)++;
+            spill_reqd = True;
+            if (VG_(disassemble))
+               VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
+         }
+
+         /* Update the forwards and backwards maps. */
+         real_to_temp[r]        = tno;
+         temp_info[tno].real_no = r;
+      }
+
+      /* By this point, all TempRegs mentioned by the insn have been
+         bought into real regs.  We now copy the insn to the output
+         and use patchUInstr to convert its rTempRegs into
+         realregs. */
+      for (j = 0; j < k; j++)
+         tempUse[j].realNo 
+            = VG_(rankToRealRegNo)(temp_info[tempUse[j].tempNo].real_no);
+      copyUInstr(c2, &c1->instrs[i]);
+      patchUInstr(&LAST_UINSTR(c2), &tempUse[0], k);
+
+      if (VG_(disassemble)) {
+         VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
+         VG_(printf)("\n");
+      }
+   }
+
+   if (temp_info != NULL)
+      VG_(jitfree)(temp_info);
+
+   freeCodeBlock(c1);
+
+   if (spill_reqd) 
+      VG_(translations_needing_spill)++;
+
+   return c2;
+
+#  undef VG_NOTHING
+
+}
+
+
+/*------------------------------------------------------------*/
+/*--- New instrumentation machinery.                       ---*/
+/*------------------------------------------------------------*/
+
+static
+VgTagOp get_VgT_ImproveOR_TQ ( Int sz )
+{
+   switch (sz) {
+      case 4: return VgT_ImproveOR4_TQ;
+      case 2: return VgT_ImproveOR2_TQ;
+      case 1: return VgT_ImproveOR1_TQ;
+      default: VG_(panic)("get_VgT_ImproveOR_TQ");
+   }
+}
+
+
+static
+VgTagOp get_VgT_ImproveAND_TQ ( Int sz )
+{
+   switch (sz) {
+      case 4: return VgT_ImproveAND4_TQ;
+      case 2: return VgT_ImproveAND2_TQ;
+      case 1: return VgT_ImproveAND1_TQ;
+      default: VG_(panic)("get_VgT_ImproveAND_TQ");
+   }
+}
+
+
+static
+VgTagOp get_VgT_Left ( Int sz )
+{
+   switch (sz) {
+      case 4: return VgT_Left4;
+      case 2: return VgT_Left2;
+      case 1: return VgT_Left1;
+      default: VG_(panic)("get_VgT_Left");
+   }
+}
+
+
+static
+VgTagOp get_VgT_UifU ( Int sz )
+{
+   switch (sz) {
+      case 4: return VgT_UifU4;
+      case 2: return VgT_UifU2;
+      case 1: return VgT_UifU1;
+      case 0: return VgT_UifU0;
+      default: VG_(panic)("get_VgT_UifU");
+   }
+}
+
+
+static
+VgTagOp get_VgT_DifD ( Int sz )
+{
+   switch (sz) {
+      case 4: return VgT_DifD4;
+      case 2: return VgT_DifD2;
+      case 1: return VgT_DifD1;
+      default: VG_(panic)("get_VgT_DifD");
+   }
+}
+
+
+static 
+VgTagOp get_VgT_PCast ( Int szs, Int szd )
+{
+   if (szs == 4 && szd == 0) return VgT_PCast40;
+   if (szs == 2 && szd == 0) return VgT_PCast20;
+   if (szs == 1 && szd == 0) return VgT_PCast10;
+   if (szs == 0 && szd == 1) return VgT_PCast01;
+   if (szs == 0 && szd == 2) return VgT_PCast02;
+   if (szs == 0 && szd == 4) return VgT_PCast04;
+   if (szs == 1 && szd == 4) return VgT_PCast14;
+   if (szs == 1 && szd == 2) return VgT_PCast12;
+   if (szs == 1 && szd == 1) return VgT_PCast11;
+   VG_(printf)("get_VgT_PCast(%d,%d)\n", szs, szd);
+   VG_(panic)("get_VgT_PCast");
+}
+
+
+static 
+VgTagOp get_VgT_Widen ( Bool syned, Int szs, Int szd )
+{
+   if (szs == 1 && szd == 2 && syned)  return VgT_SWiden12;
+   if (szs == 1 && szd == 2 && !syned) return VgT_ZWiden12;
+
+   if (szs == 1 && szd == 4 && syned)  return VgT_SWiden14;
+   if (szs == 1 && szd == 4 && !syned) return VgT_ZWiden14;
+
+   if (szs == 2 && szd == 4 && syned)  return VgT_SWiden24;
+   if (szs == 2 && szd == 4 && !syned) return VgT_ZWiden24;
+
+   VG_(printf)("get_VgT_Widen(%d,%d,%d)\n", (Int)syned, szs, szd);
+   VG_(panic)("get_VgT_Widen");
+}
+
+/* Pessimally cast the spec'd shadow from one size to another. */
+static 
+void create_PCast ( UCodeBlock* cb, Int szs, Int szd, Int tempreg )
+{
+   if (szs == 0 && szd == 0)
+      return;
+   uInstr3(cb, TAG1, 0, TempReg, tempreg, 
+                        NoValue, 0, 
+                        Lit16,   get_VgT_PCast(szs,szd));
+}
+
+
+/* Create a signed or unsigned widen of the spec'd shadow from one
+   size to another.  The only allowed size transitions are 1->2, 1->4
+   and 2->4. */
+static 
+void create_Widen ( UCodeBlock* cb, Bool signed_widen,
+                    Int szs, Int szd, Int tempreg )
+{
+   if (szs == szd) return;
+   uInstr3(cb, TAG1, 0, TempReg, tempreg, 
+                        NoValue, 0, 
+                        Lit16,   get_VgT_Widen(signed_widen,szs,szd));
+}
+
+
+/* Get the condition codes into a new shadow, at the given size. */
+static
+Int create_GETVF ( UCodeBlock* cb, Int sz )
+{
+   Int tt = newShadow(cb);
+   uInstr1(cb, GETVF, 0, TempReg, tt);
+   create_PCast(cb, 0, sz, tt);
+   return tt;
+}
+
+
+/* Save the condition codes from the spec'd shadow. */
+static
+void create_PUTVF ( UCodeBlock* cb, Int sz, Int tempreg )
+{
+   if (sz == 0) {
+      uInstr1(cb, PUTVF, 0, TempReg, tempreg);
+   } else { 
+      Int tt = newShadow(cb);
+      uInstr2(cb, MOV, 4, TempReg, tempreg, TempReg, tt);
+      create_PCast(cb, sz, 0, tt);
+      uInstr1(cb, PUTVF, 0, TempReg, tt);
+   }
+}
+
+
+/* Do Left on the spec'd shadow. */
+static 
+void create_Left ( UCodeBlock* cb, Int sz, Int tempreg )
+{
+   uInstr3(cb, TAG1, 0, 
+               TempReg, tempreg,
+               NoValue, 0, 
+               Lit16, get_VgT_Left(sz));
+}
+
+
+/* Do UifU on ts and td, putting the result in td. */
+static 
+void create_UifU ( UCodeBlock* cb, Int sz, Int ts, Int td )
+{
+   uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
+               Lit16, get_VgT_UifU(sz));
+}
+
+
+/* Do DifD on ts and td, putting the result in td. */
+static 
+void create_DifD ( UCodeBlock* cb, Int sz, Int ts, Int td )
+{
+   uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
+               Lit16, get_VgT_DifD(sz));
+}
+
+
+/* Do HelpAND on value tval and tag tqqq, putting the result in
+   tqqq. */
+static 
+void create_ImproveAND_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
+{
+   uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
+               Lit16, get_VgT_ImproveAND_TQ(sz));
+}
+
+
+/* Do HelpOR on value tval and tag tqqq, putting the result in
+   tqqq. */
+static 
+void create_ImproveOR_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
+{
+   uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
+               Lit16, get_VgT_ImproveOR_TQ(sz));
+}
+
+
+/* Get the shadow for an operand described by (tag, val).  Emit code
+   to do this and return the identity of the shadow holding the
+   result.  The result tag is always copied into a new shadow, so it
+   can be modified without trashing the original.*/
+static
+Int /* TempReg */ getOperandShadow ( UCodeBlock* cb, 
+                                     Int sz, Int tag, Int val )
+{
+   Int sh;
+   sh = newShadow(cb);
+   if (tag == TempReg) {
+      uInstr2(cb, MOV, 4, TempReg, SHADOW(val), TempReg, sh);
+      return sh;
+   }
+   if (tag == Literal) {
+      uInstr1(cb, SETV, sz, TempReg, sh);
+      return sh;
+   }
+   if (tag == ArchReg) {
+      uInstr2(cb, GETV, sz, ArchReg, val, TempReg, sh);
+      return sh;
+   }
+   VG_(panic)("getOperandShadow");
+}
+
+
+
+/* Create and return an instrumented version of cb_in.  Free cb_in
+   before returning. */
+static UCodeBlock* vg_instrument ( UCodeBlock* cb_in )
+{
+   UCodeBlock* cb;
+   Int         i, j;
+   UInstr*     u_in;
+   Int         qs, qd, qt, qtt;
+   cb = allocCodeBlock();
+   cb->nextTemp = cb_in->nextTemp;
+
+   for (i = 0; i < cb_in->used; i++) {
+      qs = qd = qt = qtt = INVALID_TEMPREG;
+      u_in = &cb_in->instrs[i];
+
+      /* if (i > 0) uInstr1(cb, NOP, 0, NoValue, 0); */
+
+      /* VG_(ppUInstr)(0, u_in); */
+      switch (u_in->opcode) {
+
+         case NOP:
+            break;
+
+         case INCEIP:
+            copyUInstr(cb, u_in);
+            break;
+
+         /* Loads and stores.  Test the V bits for the address.
+            The LOADV/STOREV does an addressibility check for the
+            address. */
+         case LOAD: 
+            uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
+            uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
+            uInstr2(cb, LOADV, u_in->size, 
+                        TempReg, u_in->val1,
+                        TempReg, SHADOW(u_in->val2));
+            copyUInstr(cb, u_in);
+            break;
+         case STORE:
+            uInstr1(cb, TESTV,  4, TempReg, SHADOW(u_in->val2));
+            uInstr1(cb, SETV,   4, TempReg, SHADOW(u_in->val2));
+            uInstr2(cb, STOREV, u_in->size,
+                        TempReg, SHADOW(u_in->val1), 
+                        TempReg, u_in->val2);
+            copyUInstr(cb, u_in);
+            break;
+
+         /* Moving stuff around.  Make the V bits follow accordingly,
+            but don't do anything else.  */
+
+         case GET:
+            uInstr2(cb, GETV, u_in->size,
+                        ArchReg, u_in->val1,
+                        TempReg, SHADOW(u_in->val2));
+            copyUInstr(cb, u_in);
+            break;
+         case PUT:
+            uInstr2(cb, PUTV, u_in->size, 
+                        TempReg, SHADOW(u_in->val1),
+                        ArchReg, u_in->val2);
+            copyUInstr(cb, u_in);
+            break;
+
+         case GETF:
+            /* This is not the smartest way to do it, but should work. */
+            qd = create_GETVF(cb, u_in->size);
+            uInstr2(cb, MOV, 4, TempReg, qd, TempReg, SHADOW(u_in->val1));
+            copyUInstr(cb, u_in);
+            break;
+         case PUTF:
+            create_PUTVF(cb, u_in->size, SHADOW(u_in->val1));
+            copyUInstr(cb, u_in);
+            break;
+
+         case MOV:
+            switch (u_in->tag1) {
+               case TempReg: 
+                  uInstr2(cb, MOV, 4,
+                              TempReg, SHADOW(u_in->val1),
+                              TempReg, SHADOW(u_in->val2));
+                  break;
+               case Literal: 
+                  uInstr1(cb, SETV, u_in->size, 
+                              TempReg, SHADOW(u_in->val2));
+                  break;
+               default: 
+                  VG_(panic)("vg_instrument: MOV");
+            }
+            copyUInstr(cb, u_in);
+            break;
+
+         /* Special case of add, where one of the operands is a literal.
+            lea1(t) = t + some literal.
+            Therefore: lea1#(qa) = left(qa) 
+         */
+         case LEA1:
+            vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
+            qs = SHADOW(u_in->val1);
+            qd = SHADOW(u_in->val2);
+            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qd);
+            create_Left(cb, u_in->size, qd);
+            copyUInstr(cb, u_in);
+            break;
+
+         /* Another form of add.  
+            lea2(ts,tt,shift) = ts + (tt << shift); shift is a literal
+                                and is 0,1,2 or 3.
+            lea2#(qs,qt) = left(qs `UifU` (qt << shift)).
+            Note, subtly, that the shift puts zeroes at the bottom of qt,
+            meaning Valid, since the corresponding shift of tt puts 
+            zeroes at the bottom of tb.
+         */
+         case LEA2: {
+            Int shift;
+            vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
+            switch (u_in->extra4b) {
+               case 1: shift = 0; break;
+               case 2: shift = 1; break;
+               case 4: shift = 2; break;
+               case 8: shift = 3; break;
+               default: VG_(panic)( "vg_instrument(LEA2)" );
+            }
+            qs = SHADOW(u_in->val1);
+            qt = SHADOW(u_in->val2);
+            qd = SHADOW(u_in->val3);
+            uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qd);
+            if (shift > 0) {
+               uInstr2(cb, SHL, 4, Literal, 0, TempReg, qd);
+               uLiteral(cb, shift);
+            }
+            create_UifU(cb, 4, qs, qd);
+            create_Left(cb, u_in->size, qd);
+            copyUInstr(cb, u_in);
+            break;
+         }
+
+         /* inc#/dec#(qd) = q `UifU` left(qd) = left(qd) */
+         case INC: case DEC:
+            qd = SHADOW(u_in->val1);
+            create_Left(cb, u_in->size, qd);
+            if (u_in->flags_w != FlagsEmpty)
+               create_PUTVF(cb, u_in->size, qd);
+            copyUInstr(cb, u_in);
+            break;
+
+         /* This is a HACK (approximation :-) */
+         /* rcl#/rcr#(qs,qd) 
+               = let q0 = pcast-sz-0(qd) `UifU` pcast-sz-0(qs) `UifU` eflags#
+                 eflags# = q0
+                 qd =pcast-0-sz(q0)
+            Ie, cast everything down to a single bit, then back up.
+            This assumes that any bad bits infect the whole word and 
+            the eflags.
+         */
+         case RCL: case RCR:
+	    vg_assert(u_in->flags_r != FlagsEmpty);
+            /* The following assertion looks like it makes sense, but is
+               actually wrong.  Consider this:
+                  rcll    %eax
+                  imull   %eax, %eax
+               The rcll writes O and C but so does the imull, so the O and C 
+               write of the rcll is annulled by the prior improvement pass.
+               Noticed by Kevin Ryde <user42@zip.com.au>
+            */
+	    /* vg_assert(u_in->flags_w != FlagsEmpty); */
+            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
+            /* We can safely modify qs; cast it to 0-size. */
+            create_PCast(cb, u_in->size, 0, qs);
+            qd = SHADOW(u_in->val2);
+            create_PCast(cb, u_in->size, 0, qd);
+            /* qs is cast-to-0(shift count#), and qd is cast-to-0(value#). */
+            create_UifU(cb, 0, qs, qd);
+            /* qs is now free; reuse it for the flag definedness. */
+            qs = create_GETVF(cb, 0);
+            create_UifU(cb, 0, qs, qd);
+            create_PUTVF(cb, 0, qd);
+            create_PCast(cb, 0, u_in->size, qd);
+            copyUInstr(cb, u_in);
+            break;
+
+         /* for OP in shl shr sar rol ror
+            (qs is shift count#, qd is value to be OP#d)
+            OP(ts,td)
+            OP#(qs,qd)
+               = pcast-1-sz(qs) `UifU` OP(ts,qd)
+            So we apply OP to the tag bits too, and then UifU with
+            the shift count# to take account of the possibility of it
+            being undefined.
+            
+            A bit subtle:
+               ROL/ROR rearrange the tag bits as per the value bits.
+               SHL/SHR shifts zeroes into the value, and corresponding 
+                  zeroes indicating Definedness into the tag.
+               SAR copies the top bit of the value downwards, and therefore
+                  SAR also copies the definedness of the top bit too.
+            So in all five cases, we just apply the same op to the tag 
+            bits as is applied to the value bits.  Neat!
+         */
+         case SHL:
+         case SHR: case SAR:
+         case ROL: case ROR: {
+            Int t_amount = INVALID_TEMPREG;
+            vg_assert(u_in->tag1 == TempReg || u_in->tag1 == Literal);
+            vg_assert(u_in->tag2 == TempReg);
+            qd = SHADOW(u_in->val2);
+
+            /* Make qs hold shift-count# and make
+               t_amount be a TempReg holding the shift count. */
+            if (u_in->tag1 == Literal) {
+               t_amount = newTemp(cb);
+               uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_amount);
+               uLiteral(cb, u_in->lit32);
+               qs = SHADOW(t_amount);
+               uInstr1(cb, SETV, 1, TempReg, qs);
+            } else {
+               t_amount = u_in->val1;
+               qs = SHADOW(u_in->val1);
+            }
+
+            uInstr2(cb, u_in->opcode, 
+                        u_in->size, 
+                        TempReg, t_amount, 
+                        TempReg, qd);
+            qt = newShadow(cb);
+            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
+            create_PCast(cb, 1, u_in->size, qt);
+            create_UifU(cb, u_in->size, qt, qd);
+            copyUInstr(cb, u_in);
+            break;
+         }
+
+         /* One simple tag operation. */
+         case WIDEN:
+            vg_assert(u_in->tag1 == TempReg);
+            create_Widen(cb, u_in->signed_widen, u_in->extra4b, u_in->size, 
+                             SHADOW(u_in->val1));
+            copyUInstr(cb, u_in);
+            break;
+
+         /* not#(x) = x (since bitwise independent) */
+         case NOT:
+            vg_assert(u_in->tag1 == TempReg);
+            copyUInstr(cb, u_in);
+            break;
+
+         /* neg#(x) = left(x) (derivable from case for SUB) */
+         case NEG:
+            vg_assert(u_in->tag1 == TempReg);
+            create_Left(cb, u_in->size, SHADOW(u_in->val1));
+            copyUInstr(cb, u_in);
+            break;
+
+         /* bswap#(x) = bswap(x) */
+         case BSWAP:
+            vg_assert(u_in->tag1 == TempReg);
+            vg_assert(u_in->size == 4);
+            qd = SHADOW(u_in->val1);
+            uInstr1(cb, BSWAP, 4, TempReg, qd);
+            copyUInstr(cb, u_in);
+            break;
+
+         /* cc2val#(qd) = pcast-0-to-size(eflags#) */
+         case CC2VAL:
+            vg_assert(u_in->tag1 == TempReg);
+            vg_assert(u_in->flags_r != FlagsEmpty);
+            qt = create_GETVF(cb, u_in->size);
+            uInstr2(cb, MOV, 4, TempReg, qt, TempReg, SHADOW(u_in->val1));
+            copyUInstr(cb, u_in);
+            break;
+
+         /* cmov#(qs,qd) = cmov(qs,qd)
+            That is, do the cmov of tags using the same flags as for
+            the data (obviously).  However, first do a test on the 
+            validity of the flags.
+         */
+         case CMOV:
+            vg_assert(u_in->size == 4);
+            vg_assert(u_in->tag1 == TempReg);
+            vg_assert(u_in->tag2 == TempReg);
+            vg_assert(u_in->flags_r != FlagsEmpty);
+            vg_assert(u_in->flags_w == FlagsEmpty);
+            qs = SHADOW(u_in->val1);
+            qd = SHADOW(u_in->val2);
+            qt = create_GETVF(cb, 0);
+            uInstr1(cb, TESTV, 0, TempReg, qt);
+            /* qt should never be referred to again.  Nevertheless
+               ... */
+            uInstr1(cb, SETV, 0, TempReg, qt);
+
+            uInstr2(cb, CMOV, 4, TempReg, qs, TempReg, qd);
+            LAST_UINSTR(cb).cond    = u_in->cond;
+            LAST_UINSTR(cb).flags_r = u_in->flags_r;
+
+            copyUInstr(cb, u_in);
+            break;
+
+         /* add#/sub#(qs,qd) 
+               = qs `UifU` qd `UifU` left(qs) `UifU` left(qd)
+               = left(qs) `UifU` left(qd)
+               = left(qs `UifU` qd)
+            adc#/sbb#(qs,qd)
+               = left(qs `UifU` qd) `UifU` pcast(eflags#)
+            Second arg (dest) is TempReg.
+            First arg (src) is Literal or TempReg or ArchReg. 
+         */
+         case ADD: case SUB:
+         case ADC: case SBB:
+            qd = SHADOW(u_in->val2);
+            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
+            create_UifU(cb, u_in->size, qs, qd);
+            create_Left(cb, u_in->size, qd);
+            if (u_in->opcode == ADC || u_in->opcode == SBB) {
+               vg_assert(u_in->flags_r != FlagsEmpty);
+               qt = create_GETVF(cb, u_in->size);
+               create_UifU(cb, u_in->size, qt, qd);
+            }
+            if (u_in->flags_w != FlagsEmpty) {
+               create_PUTVF(cb, u_in->size, qd);
+            }
+            copyUInstr(cb, u_in);
+            break;
+
+         /* xor#(qs,qd) = qs `UifU` qd */
+         case XOR:
+            qd = SHADOW(u_in->val2);
+            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
+            create_UifU(cb, u_in->size, qs, qd);
+            if (u_in->flags_w != FlagsEmpty) {
+               create_PUTVF(cb, u_in->size, qd);
+            }
+            copyUInstr(cb, u_in);
+            break;
+
+         /* and#/or#(qs,qd) 
+               = (qs `UifU` qd) `DifD` improve(vs,qs) 
+                                `DifD` improve(vd,qd)
+            where improve is the relevant one of
+                Improve{AND,OR}_TQ
+            Use the following steps, with qt as a temp:
+               qt = improve(vd,qd)
+               qd = qs `UifU` qd
+               qd = qt `DifD` qd
+               qt = improve(vs,qs)
+               qd = qt `DifD` qd
+         */
+         case AND: case OR:
+            vg_assert(u_in->tag1 == TempReg);
+            vg_assert(u_in->tag2 == TempReg);
+            qd = SHADOW(u_in->val2);
+            qs = SHADOW(u_in->val1);
+            qt = newShadow(cb);
+
+            /* qt = improve(vd,qd) */
+            uInstr2(cb, MOV, 4, TempReg, qd, TempReg, qt);
+            if (u_in->opcode == AND)
+               create_ImproveAND_TQ(cb, u_in->size, u_in->val2, qt);
+            else
+               create_ImproveOR_TQ(cb, u_in->size, u_in->val2, qt);
+            /* qd = qs `UifU` qd */
+            create_UifU(cb, u_in->size, qs, qd);
+            /* qd = qt `DifD` qd */
+            create_DifD(cb, u_in->size, qt, qd);
+            /* qt = improve(vs,qs) */
+            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
+            if (u_in->opcode == AND)
+               create_ImproveAND_TQ(cb, u_in->size, u_in->val1, qt);
+            else
+               create_ImproveOR_TQ(cb, u_in->size, u_in->val1, qt);
+            /* qd = qt `DifD` qd */
+               create_DifD(cb, u_in->size, qt, qd);
+            /* So, finally qd is the result tag. */
+            if (u_in->flags_w != FlagsEmpty) {
+               create_PUTVF(cb, u_in->size, qd);
+            }
+            copyUInstr(cb, u_in);
+            break;
+
+         /* Machinery to do with supporting CALLM.  Copy the start and
+            end markers only to make the result easier to read
+            (debug); they generate no code and have no effect. 
+         */
+         case CALLM_S: case CALLM_E:
+            copyUInstr(cb, u_in);
+            break;
+
+         /* Copy PUSH and POP verbatim.  Arg/result absval
+            calculations are done when the associated CALL is
+            processed.  CLEAR has no effect on absval calculations but
+            needs to be copied.  
+         */
+         case PUSH: case POP: case CLEAR:
+            copyUInstr(cb, u_in);
+            break;
+
+         /* In short:
+               callm#(a1# ... an#) = (a1# `UifU` ... `UifU` an#)
+            We have to decide on a size to do the computation at,
+            although the choice doesn't affect correctness.  We will
+            do a pcast to the final size anyway, so the only important
+            factor is to choose a size which minimises the total
+            number of casts needed.  Valgrind: just use size 0,
+            regardless.  It may not be very good for performance
+            but does simplify matters, mainly by reducing the number
+            of different pessimising casts which have to be implemented.
+         */
+         case CALLM: {
+            UInstr* uu;
+            Bool res_used;
+
+            /* Now generate the code.  Get the final result absval
+               into qt. */
+            qt  = newShadow(cb);
+            qtt = newShadow(cb);
+            uInstr1(cb, SETV, 0, TempReg, qt);
+            for (j = i-1; cb_in->instrs[j].opcode != CALLM_S; j--) {
+               uu = & cb_in->instrs[j];
+               if (uu->opcode != PUSH) continue;
+               /* cast via a temporary */
+               uInstr2(cb, MOV, 4, TempReg, SHADOW(uu->val1),
+                                   TempReg, qtt);
+               create_PCast(cb, uu->size, 0, qtt);
+               create_UifU(cb, 0, qtt, qt);
+            }
+            /* Remembering also that flags read count as inputs. */
+            if (u_in->flags_r != FlagsEmpty) {
+               qtt = create_GETVF(cb, 0);
+               create_UifU(cb, 0, qtt, qt);
+            }
+
+            /* qt now holds the result tag.  If any results from the
+               call are used, either by fetching with POP or
+               implicitly by writing the flags, we copy the result
+               absval to the relevant location.  If not used, the call
+               must have been for its side effects, so we test qt here
+               and now.  Note that this assumes that all values
+               removed by POP continue to be live.  So dead args
+               *must* be removed with CLEAR, not by POPping them into
+               a dummy tempreg. 
+            */
+            res_used = False;
+            for (j = i+1; cb_in->instrs[j].opcode != CALLM_E; j++) {
+               uu = & cb_in->instrs[j];
+               if (uu->opcode != POP) continue;
+               /* Cast via a temp. */
+               uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qtt);
+               create_PCast(cb, 0, uu->size, qtt);
+               uInstr2(cb, MOV, 4, TempReg, qtt, 
+                                   TempReg, SHADOW(uu->val1));
+               res_used = True;
+            }
+            if (u_in->flags_w != FlagsEmpty) {
+               create_PUTVF(cb, 0, qt);
+               res_used = True;
+            }
+            if (!res_used) {
+               uInstr1(cb, TESTV, 0, TempReg, qt);
+               /* qt should never be referred to again.  Nevertheless
+                  ... */
+               uInstr1(cb, SETV, 0, TempReg, qt);
+            }
+            copyUInstr(cb, u_in);
+            break;
+         }
+         /* Whew ... */
+
+         case JMP:
+            if (u_in->tag1 == TempReg) {
+               uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
+               uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
+            } else {
+               vg_assert(u_in->tag1 == Literal);
+            }
+            if (u_in->cond != CondAlways) {
+               vg_assert(u_in->flags_r != FlagsEmpty);
+               qt = create_GETVF(cb, 0);
+               uInstr1(cb, TESTV, 0, TempReg, qt);
+               /* qt should never be referred to again.  Nevertheless
+                  ... */
+               uInstr1(cb, SETV, 0, TempReg, qt);
+            }
+            copyUInstr(cb, u_in);
+            break;
+
+         case JIFZ:
+            uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
+            uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
+            copyUInstr(cb, u_in);
+            break;
+
+         /* Emit a check on the address used.  For FPU_R, the value
+            loaded into the FPU is checked at the time it is read from
+            memory (see synth_fpu_mem_check_actions).  */
+         case FPU_R: case FPU_W:
+            vg_assert(u_in->tag2 == TempReg);
+            uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
+            uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val2));
+            copyUInstr(cb, u_in);
+            break;
+
+         /* For FPU insns not referencing memory, just copy thru. */
+         case FPU: 
+            copyUInstr(cb, u_in);
+            break;
+
+         default:
+            VG_(ppUInstr)(0, u_in);
+            VG_(panic)( "vg_instrument: unhandled case");
+
+      } /* end of switch (u_in->opcode) */
+
+   } /* end of for loop */
+
+   freeCodeBlock(cb_in);
+   return cb;
+}
+
+/*------------------------------------------------------------*/
+/*--- Clean up mem check instrumentation.                  ---*/
+/*------------------------------------------------------------*/
+
+#define VGC_IS_SHADOW(tempreg) ((tempreg % 2) == 1)
+#define VGC_UNDEF ((UChar)100)
+#define VGC_VALUE ((UChar)101)
+
+#define NOP_no_msg(uu)                                         \
+   do { uu->opcode = NOP; } while (False)
+
+#define NOP_tag1_op(uu)                                        \
+   do { uu->opcode = NOP;                                      \
+        if (VG_(disassemble))                                  \
+           VG_(printf)("at %d: delete %s due to defd arg\n",   \
+                       i, VG_(nameOfTagOp(u->val3)));          \
+   } while (False)
+
+#define SETV_tag1_op(uu,newsz)                                 \
+   do { uu->opcode = SETV;                                     \
+        uu->size = newsz;                                      \
+        uu->tag2 = uu->tag3 = NoValue;                         \
+        if (VG_(disassemble))                                  \
+           VG_(printf)("at %d: convert %s to SETV%d "          \
+                       "due to defd arg\n",                    \
+                       i, VG_(nameOfTagOp(u->val3)), newsz);   \
+   } while (False)
+
+
+
+/* Run backwards and delete SETVs on shadow temps for which the next
+   action is a write.  Needs an env saying whether or not the next
+   action is a write.  The supplied UCodeBlock is destructively
+   modified.
+*/
+static void vg_delete_redundant_SETVs ( UCodeBlock* cb )
+{
+   Bool*   next_is_write;
+   Int     i, j, k, n_temps;
+   UInstr* u;
+   TempUse tempUse[3];
+
+   n_temps = cb->nextTemp;
+   if (n_temps == 0) return;
+
+   next_is_write = VG_(jitmalloc)(n_temps * sizeof(Bool));
+
+   for (i = 0; i < n_temps; i++) next_is_write[i] = True;
+
+   for (i = cb->used-1; i >= 0; i--) {
+      u = &cb->instrs[i];
+
+      if (u->opcode == MOV && VGC_IS_SHADOW(u->val2) 
+                           && next_is_write[u->val2]) {
+         /* This MOV is pointless because the target is dead at this
+            point.  Delete it. */
+         u->opcode = NOP;
+         u->size = 0;
+         if (VG_(disassemble)) 
+            VG_(printf)("at %d: delete MOV\n", i);
+      } else
+
+      if (u->opcode == SETV) {
+         if (u->tag1 == TempReg) {
+            vg_assert(VGC_IS_SHADOW(u->val1));
+            if (next_is_write[u->val1]) {
+               /* This write is pointless, so annul it. */
+               u->opcode = NOP;
+               u->size = 0;
+               if (VG_(disassemble)) 
+                  VG_(printf)("at %d: delete SETV\n", i);
+            } else {
+               /* This write has a purpose; don't annul it, but do
+                  notice that we did it. */
+               next_is_write[u->val1] = True;
+            }
+              
+         }
+
+      } else {
+         /* Find out what this insn does to the temps. */
+         k = getTempUsage(u, &tempUse[0]);
+         vg_assert(k <= 3);
+         for (j = k-1; j >= 0; j--) {
+            next_is_write[ tempUse[j].tempNo ]
+                         = tempUse[j].isWrite;
+         }
+      }
+
+   }
+
+   VG_(jitfree)(next_is_write);
+}
+
+
+/* Run forwards, propagating and using the is-completely-defined
+   property.  This removes a lot of redundant tag-munging code.
+   Unfortunately it requires intimate knowledge of how each uinstr and
+   tagop modifies its arguments.  This duplicates knowledge of uinstr
+   tempreg uses embodied in getTempUsage(), which is unfortunate. 
+   The supplied UCodeBlock* is modified in-place.
+
+   For each value temp, def[] should hold VGC_VALUE.
+
+   For each shadow temp, def[] may hold 4,2,1 or 0 iff that shadow is
+   definitely known to be fully defined at that size.  In all other
+   circumstances a shadow's def[] entry is VGC_UNDEF, meaning possibly
+   undefined.  In cases of doubt, VGC_UNDEF is always safe.
+*/
+static void vg_propagate_definedness ( UCodeBlock* cb )
+{
+   UChar*  def;
+   Int     i, j, k, t, n_temps;
+   UInstr* u;
+   TempUse tempUse[3];
+
+   n_temps = cb->nextTemp;
+   if (n_temps == 0) return;
+
+   def = VG_(jitmalloc)(n_temps * sizeof(UChar));
+   for (i = 0; i < n_temps; i++) 
+      def[i] = VGC_IS_SHADOW(i) ? VGC_UNDEF : VGC_VALUE;
+
+   /* Run forwards, detecting and using the all-defined property. */
+
+   for (i = 0; i < cb->used; i++) {
+      u = &cb->instrs[i];
+      switch (u->opcode) {
+
+      /* Tag-handling uinstrs. */
+
+         /* Deal with these quickly. */
+         case NOP:
+         case INCEIP:
+            break;
+
+         /* Make a tag defined. */
+         case SETV:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            def[u->val1] = u->size;
+            break;
+
+         /* Check definedness of a tag. */
+         case TESTV:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            if (def[u->val1] <= 4) { 
+               vg_assert(def[u->val1] == u->size); 
+               NOP_no_msg(u);
+               if (VG_(disassemble)) 
+                  VG_(printf)("at %d: delete TESTV on defd arg\n", i);
+            }
+            break;
+
+         /* Applies to both values and tags.  Propagate Definedness
+            property through copies.  Note that this isn't optional;
+            we *have* to do this to keep def[] correct. */
+         case MOV:
+            vg_assert(u->tag2 == TempReg);
+            if (u->tag1 == TempReg) {
+               if (VGC_IS_SHADOW(u->val1)) {
+                  vg_assert(VGC_IS_SHADOW(u->val2));
+                  def[u->val2] = def[u->val1];
+               }
+            }
+            break;
+
+         case PUTV:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            if (def[u->val1] <= 4) {
+               vg_assert(def[u->val1] == u->size);
+               u->tag1 = Literal;
+               u->val1 = 0;
+               switch (u->size) {
+                  case 4: u->lit32 = 0x00000000; break;
+                  case 2: u->lit32 = 0xFFFF0000; break;
+                  case 1: u->lit32 = 0xFFFFFF00; break;
+                  default: VG_(panic)("vg_cleanup(PUTV)");
+               }
+               if (VG_(disassemble)) 
+                  VG_(printf)(
+                     "at %d: propagate definedness into PUTV\n", i);
+            }
+            break;
+
+         case STOREV:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            if (def[u->val1] <= 4) {
+               vg_assert(def[u->val1] == u->size);
+               u->tag1 = Literal;
+               u->val1 = 0;
+               switch (u->size) {
+                  case 4: u->lit32 = 0x00000000; break;
+                  case 2: u->lit32 = 0xFFFF0000; break;
+                  case 1: u->lit32 = 0xFFFFFF00; break;
+                  default: VG_(panic)("vg_cleanup(STOREV)");
+               }
+               if (VG_(disassemble)) 
+                  VG_(printf)(
+                     "at %d: propagate definedness into STandV\n", i);
+            }
+            break;
+
+         /* Nothing interesting we can do with this, I think. */
+         case PUTVF:
+            break;
+
+         /* Tag handling operations. */
+         case TAG2:
+            vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
+            vg_assert(u->tag3 == Lit16);
+            /* Ultra-paranoid "type" checking. */
+            switch (u->val3) {
+               case VgT_ImproveAND4_TQ: case VgT_ImproveAND2_TQ:
+               case VgT_ImproveAND1_TQ: case VgT_ImproveOR4_TQ:
+               case VgT_ImproveOR2_TQ: case VgT_ImproveOR1_TQ:
+                  vg_assert(u->tag1 == TempReg && !VGC_IS_SHADOW(u->val1));
+                  break;
+               default:
+                  vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+                  break;
+            }
+            switch (u->val3) {
+               Int sz;
+               case VgT_UifU4: 
+                  sz = 4; goto do_UifU;
+               case VgT_UifU2: 
+                  sz = 2; goto do_UifU;
+               case VgT_UifU1:
+                  sz = 1; goto do_UifU;
+               case VgT_UifU0:
+                  sz = 0; goto do_UifU;
+               do_UifU:
+                  vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+                  vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
+                  if (def[u->val1] <= 4) {
+                     /* UifU.  The first arg is defined, so result is
+                        simply second arg.  Delete this operation. */
+                     vg_assert(def[u->val1] == sz);
+                     NOP_no_msg(u);
+                     if (VG_(disassemble)) 
+                        VG_(printf)(
+                           "at %d: delete UifU%d due to defd arg1\n", 
+                           i, sz);
+                  }
+                  else 
+                  if (def[u->val2] <= 4) {
+                     /* UifU.  The second arg is defined, so result is
+                        simply first arg.  Copy to second. */
+                     vg_assert(def[u->val2] == sz);
+                     u->opcode = MOV; 
+                     u->size = 4;
+                     u->tag3 = NoValue;
+                     def[u->val2] = def[u->val1];
+                     if (VG_(disassemble)) 
+                        VG_(printf)(
+                           "at %d: change UifU%d to MOV due to defd"
+                           " arg2\n", 
+                           i, sz);
+                  }
+                  break;
+               case VgT_ImproveAND4_TQ:
+                  sz = 4; goto do_ImproveAND;
+               case VgT_ImproveAND1_TQ:
+                  sz = 1; goto do_ImproveAND;
+               do_ImproveAND:
+                  /* Implements Q = T OR Q.  So if Q is entirely defined,
+                     ie all 0s, we get MOV T, Q. */
+		  if (def[u->val2] <= 4) {
+                     vg_assert(def[u->val2] == sz);
+                     u->size = 4; /* Regardless of sz */
+                     u->opcode = MOV;
+                     u->tag3 = NoValue;
+                     def[u->val2] = VGC_UNDEF;
+                     if (VG_(disassemble)) 
+                        VG_(printf)(
+                            "at %d: change ImproveAND%d_TQ to MOV due "
+                            "to defd arg2\n", 
+                            i, sz);
+                  }
+                  break;
+               default: 
+                  goto unhandled;
+            }
+            break;
+
+         case TAG1:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            if (def[u->val1] > 4) break;
+            /* We now know that the arg to the op is entirely defined.
+               If the op changes the size of the arg, we must replace
+               it with a SETV at the new size.  If it doesn't change
+               the size, we can delete it completely. */
+            switch (u->val3) {
+               /* Maintain the same size ... */
+               case VgT_Left4: 
+                  vg_assert(def[u->val1] == 4);
+                  NOP_tag1_op(u);
+                  break;
+               case VgT_PCast11: 
+                  vg_assert(def[u->val1] == 1);
+                  NOP_tag1_op(u);
+                  break;
+               /* Change size ... */
+               case VgT_PCast40: 
+                  vg_assert(def[u->val1] == 4);
+                  SETV_tag1_op(u,0);
+                  def[u->val1] = 0;
+                  break;
+               case VgT_PCast14: 
+                  vg_assert(def[u->val1] == 1);
+                  SETV_tag1_op(u,4);
+                  def[u->val1] = 4;
+                  break;
+               case VgT_PCast12: 
+                  vg_assert(def[u->val1] == 1);
+                  SETV_tag1_op(u,2);
+                  def[u->val1] = 2;
+                  break;
+               case VgT_PCast10: 
+                  vg_assert(def[u->val1] == 1);
+                  SETV_tag1_op(u,0);
+                  def[u->val1] = 0;
+                  break;
+               case VgT_PCast02: 
+                  vg_assert(def[u->val1] == 0);
+                  SETV_tag1_op(u,2);
+                  def[u->val1] = 2;
+                  break;
+               default: 
+                  goto unhandled;
+            }
+            if (VG_(disassemble)) 
+               VG_(printf)(
+                  "at %d: delete TAG1 %s due to defd arg\n",
+                  i, VG_(nameOfTagOp(u->val3)));
+            break;
+
+         default:
+         unhandled:
+            /* We don't know how to handle this uinstr.  Be safe, and 
+               set to VGC_VALUE or VGC_UNDEF all temps written by it. */
+            k = getTempUsage(u, &tempUse[0]);
+            vg_assert(k <= 3);
+            for (j = 0; j < k; j++) {
+               t = tempUse[j].tempNo;
+               vg_assert(t >= 0 && t < n_temps);
+               if (!tempUse[j].isWrite) {
+                  /* t is read; ignore it. */
+                  if (0&& VGC_IS_SHADOW(t) && def[t] <= 4)
+                     VG_(printf)("ignoring def %d at %s %s\n", 
+                                 def[t], 
+                                 VG_(nameUOpcode)(True, u->opcode),
+                                 (u->opcode == TAG1 || u->opcode == TAG2)
+                                    ? VG_(nameOfTagOp)(u->val3) 
+                                    : (Char*)"");
+               } else {
+                  /* t is written; better nullify it. */
+                  def[t] = VGC_IS_SHADOW(t) ? VGC_UNDEF : VGC_VALUE;
+               }
+            }
+      }
+   }
+
+   VG_(jitfree)(def);
+}
+
+
+/* Top level post-instrumentation cleanup function. */
+static void vg_cleanup ( UCodeBlock* cb )
+{
+   vg_propagate_definedness ( cb );
+   vg_delete_redundant_SETVs ( cb );
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Main entry point for the JITter.                     ---*/
+/*------------------------------------------------------------*/
+
+/* Translate the basic block beginning at orig_addr, placing the
+   translation in a vg_malloc'd block, the address and size of which
+   are returned in trans_addr and trans_size.  Length of the original
+   block is also returned in orig_size.  If the latter three are NULL,
+   this call is being done for debugging purposes, in which case (a)
+   throw away the translation once it is made, and (b) produce a load
+   of debugging output. 
+*/
+void VG_(translate) ( Addr  orig_addr,
+                      UInt* orig_size,
+                      Addr* trans_addr,
+                      UInt* trans_size )
+{
+   Int         n_disassembled_bytes, final_code_size;
+   Bool        debugging_translation;
+   UChar*      final_code;
+   UCodeBlock* cb;
+
+   VGP_PUSHCC(VgpTranslate);
+   debugging_translation
+      = orig_size == NULL || trans_addr == NULL || trans_size == NULL;
+
+   dis = True;
+   dis = debugging_translation;
+
+   /* Check if we're being asked to jump to a silly address, and if so
+      record an error message before potentially crashing the entire
+      system. */
+   if (VG_(clo_instrument) && !debugging_translation && !dis) {
+      Addr bad_addr;
+      Bool ok = VGM_(check_readable) ( orig_addr, 1, &bad_addr );
+      if (!ok) {
+         VG_(record_jump_error)(bad_addr);
+      }
+   }
+
+   /* if (VG_(overall_in_count) >= 4800) dis=True; */
+   if (VG_(disassemble))
+      VG_(printf)("\n");
+   if (0 || dis 
+       || (VG_(overall_in_count) > 0 &&
+           (VG_(overall_in_count) % 1000 == 0))) {
+      if (0&& (VG_(clo_verbosity) > 1 || dis))
+         VG_(message)(Vg_UserMsg,
+              "trans# %d, bb# %lu, in %d, out %d",
+              VG_(overall_in_count), 
+              VG_(bbs_done),
+              VG_(overall_in_osize), VG_(overall_in_tsize),
+              orig_addr );
+   }
+   cb = allocCodeBlock();
+
+   /* Disassemble this basic block into cb. */
+   VGP_PUSHCC(VgpToUCode);
+   n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
+   VGP_POPCC;
+   /* dis=True; */
+   /* if (0&& VG_(translations_done) < 617)  */
+   /*    dis=False; */
+   /* Try and improve the code a bit. */
+   if (VG_(clo_optimise)) {
+      VGP_PUSHCC(VgpImprove);
+      vg_improve ( cb );
+      if (VG_(disassemble)) 
+         VG_(ppUCodeBlock) ( cb, "Improved code:" );
+      VGP_POPCC;
+   }
+   /* dis=False; */
+   /* Add instrumentation code. */
+   if (VG_(clo_instrument)) {
+      VGP_PUSHCC(VgpInstrument);
+      cb = vg_instrument(cb);
+      VGP_POPCC;
+      if (VG_(disassemble)) 
+         VG_(ppUCodeBlock) ( cb, "Instrumented code:" );
+      if (VG_(clo_cleanup)) {
+         VGP_PUSHCC(VgpCleanup);
+         vg_cleanup(cb);
+         VGP_POPCC;
+         if (VG_(disassemble)) 
+            VG_(ppUCodeBlock) ( cb, "Cleaned-up instrumented code:" );
+      }
+   }
+
+   /* Allocate registers. */
+   VGP_PUSHCC(VgpRegAlloc);
+   cb = vg_do_register_allocation ( cb );
+   VGP_POPCC;
+   /* dis=False; */
+   /* 
+   if (VG_(disassemble))
+      VG_(ppUCodeBlock) ( cb, "After Register Allocation:");
+   */
+
+   VGP_PUSHCC(VgpFromUcode);
+   /* NB final_code is allocated with VG_(jitmalloc), not VG_(malloc)
+      and so must be VG_(jitfree)'d. */
+   final_code = VG_(emit_code)(cb, &final_code_size );
+   VGP_POPCC;
+   freeCodeBlock(cb);
+
+   if (debugging_translation) {
+      /* Only done for debugging -- throw away final result. */
+      VG_(jitfree)(final_code);
+   } else {
+      /* Doing it for real -- return values to caller. */
+      *orig_size = n_disassembled_bytes;
+      *trans_addr = (Addr)final_code;
+      *trans_size = final_code_size;
+   }
+   VGP_POPCC;
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                           vg_translate.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_transtab.c b/coregrind/vg_transtab.c
new file mode 100644
index 000000000..b93fe40df
--- /dev/null
+++ b/coregrind/vg_transtab.c
@@ -0,0 +1,693 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Management of the translation table and cache.               ---*/
+/*---                                                vg_transtab.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+#include "vg_constants.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Management of the LRU-based translation table+cache. ---*/
+/*------------------------------------------------------------*/
+
+/* These sizes were set up so as to be able to debug large KDE 3
+   applications (are there any small ones?) without excessive amounts
+   of code retranslation.  */
+
+/* Size of the translation cache, in bytes. */
+#define VG_TC_SIZE 16000000 
+
+/* Do a LRU pass when the translation cache becomes this full. */
+#define VG_TC_LIMIT_PERCENT 95
+
+/* When doing an LRU pass, reduce TC fullness to this level. */
+#define VG_TC_TARGET_PERCENT 85
+
+/* Number of entries in the translation table.  This must be a prime
+   number in order to make the hashing work properly. */
+#define VG_TT_SIZE /*19997*/ /*29989*/ /*50497*/ /*75083*/ 100129
+
+/* Do an LRU pass when the translation table becomes this full. */
+#define VG_TT_LIMIT_PERCENT /*67*/ 80
+
+/* When doing an LRU pass, reduce TT fullness to this level. */
+#define VG_TT_TARGET_PERCENT /*60*/ 70
+
+/* The number of age steps we track.  0 means the current epoch,
+   N_EPOCHS-1 means used the epoch N_EPOCHS-1 or more ago.  */
+#define VG_N_EPOCHS /*2000*/ 4000
+
+/* This TT entry is empty. */
+#define VG_TTE_EMPTY   ((Addr)1)
+/* This TT entry has been deleted. */
+#define VG_TTE_DELETED ((Addr)3)
+
+/* The TC.  This used to be statically allocated, but that forces many
+   SecMap arrays to be pointlessly allocated at startup, bloating the
+   process size by about 22M and making startup slow.  So now we
+   dynamically allocate it at startup time.
+   was: static UChar vg_tc[VG_TC_SIZE];
+*/
+static UChar* vg_tc = NULL;
+
+/* Count of bytes used in the TC. */
+static Int vg_tc_used = 0;
+
+/* The TT.  Like TC, for the same reason, is dynamically allocated at
+   startup. 
+   was: static TTEntry vg_tt[VG_TT_SIZE];
+*/
+static TTEntry* vg_tt = NULL;
+
+/* Count of non-empty, non-deleted TT entries. */
+static Int vg_tt_used = 0;
+
+/* Fast helper for the TT.  A direct-mapped cache which holds a
+   pointer to a TT entry which may or may not be the correct one, but
+   which we hope usually is.  This array is referred to directly from
+   vg_dispatch.S. */
+Addr VG_(tt_fast)[VG_TT_FAST_SIZE];
+
+/* For reading/writing the misaligned TT-index word at immediately
+   preceding every translation in TC. */
+#define VG_READ_MISALIGNED_WORD(aaa) (*((UInt*)(aaa)))
+#define VG_WRITE_MISALIGNED_WORD(aaa,vvv) *((UInt*)(aaa)) = ((UInt)(vvv))
+
+/* Used for figuring out an age threshold for translations. */
+static Int vg_bytes_in_epoch[VG_N_EPOCHS];
+static Int vg_entries_in_epoch[VG_N_EPOCHS];
+
+
+/* Just so these counts can be queried without making them globally
+   visible. */
+void VG_(get_tt_tc_used) ( UInt* tt_used, UInt* tc_used )
+{
+   *tt_used = vg_tt_used;
+   *tc_used = vg_tc_used;
+}
+
+
+/* Do the LRU thing on TT/TC, clearing them back to the target limits
+   if they are over the threshold limits. 
+*/
+void VG_(maybe_do_lru_pass) ( void )
+{
+   Int i, j, r, w, thresh, ttno;
+   TTEntry* tte;
+
+   const Int tc_limit  = (Int)((VG_TC_SIZE * VG_TC_LIMIT_PERCENT) / 100.0);
+   const Int tt_limit  = (Int)((VG_TT_SIZE * VG_TT_LIMIT_PERCENT) / 100.0);
+   const Int tc_target = (Int)((VG_TC_SIZE * VG_TC_TARGET_PERCENT) / 100.0);
+   const Int tt_target = (Int)((VG_TT_SIZE * VG_TT_TARGET_PERCENT) / 100.0);
+
+   /* Decide quickly if we need to do an LRU pass ? */
+   if (vg_tc_used <= tc_limit && vg_tt_used <= tt_limit)
+      return;
+
+   VGP_PUSHCC(VgpDoLRU);
+   /*   
+   VG_(printf)(
+      "limits: tc_limit %d, tt_limit %d, tc_target %d, tt_target %d\n",
+      tc_limit, tt_limit, tc_target, tt_target);
+   */
+
+   if (VG_(clo_verbosity) > 2)
+      VG_(printf)(" pre-LRU: tc %d (target %d),  tt %d (target %d)\n",
+	          vg_tc_used, tc_target, vg_tt_used, tt_target);
+
+   /* Yes we do.  Figure out what threshold age is required in order to
+      shrink both the TC and TT occupancy below TC_TARGET_PERCENT and
+      TT_TARGET_PERCENT respectively. */
+
+   VG_(number_of_lrus)++;
+
+   /* Count the number of TC bytes and TT entries in each epoch. */
+   for (i = 0; i < VG_N_EPOCHS; i++)
+      vg_bytes_in_epoch[i] = vg_entries_in_epoch[i] = 0;
+
+   for (i = 0; i < VG_TT_SIZE; i++) {
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY || 
+          vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      j = vg_tt[i].mru_epoch;
+      vg_assert(j <= VG_(current_epoch));
+      j = VG_(current_epoch) - j;
+      if (j >= VG_N_EPOCHS) j = VG_N_EPOCHS-1;
+      vg_assert(0 <= j && j < VG_N_EPOCHS);
+      /* Greater j now means older. */
+      vg_entries_in_epoch[j]++;
+      vg_bytes_in_epoch[j] += 4+vg_tt[i].trans_size;
+   }
+
+   /*
+   for (i = 0; i < VG_N_EPOCHS; i++)
+      VG_(printf)("epoch %d: ents %d, bytes %d\n", 
+                  i, vg_entries_in_epoch[i], vg_bytes_in_epoch[i]);
+   */
+
+   /* Cumulatise.  Make vg_{bytes,entries}_in_epoch[n] contain the
+      counts for itself and all younger epochs. */
+   for (i = 1; i < VG_N_EPOCHS; i++) {
+      vg_entries_in_epoch[i] += vg_entries_in_epoch[i-1];
+      vg_bytes_in_epoch[i] += vg_bytes_in_epoch[i-1];
+   }
+
+   for (thresh = 0; thresh < VG_N_EPOCHS; thresh++) {
+      if (vg_entries_in_epoch[thresh] > tt_target 
+          || vg_bytes_in_epoch[thresh] >= tc_target)
+         break;
+   }
+
+   if (VG_(clo_verbosity) > 2)
+      VG_(printf)(
+         "     LRU: discard translations %d or more epochs since last use\n",
+         thresh
+      );
+
+   thresh = VG_(current_epoch) - thresh;
+
+   /* Ok, so we will hit our targets if we retain all entries most
+      recently used at most thresh epochs ago.  Traverse the TT and
+      mark such entries as deleted. */
+   for (i = 0; i < VG_TT_SIZE; i++) {
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY || 
+         vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      if (vg_tt[i].mru_epoch <= thresh) {
+         vg_tt[i].orig_addr = VG_TTE_DELETED;
+         vg_tt_used--;
+	 VG_(this_epoch_out_count) ++;
+	 VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
+	 VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
+	 VG_(overall_out_count) ++;
+	 VG_(overall_out_osize) += vg_tt[i].orig_size;
+	 VG_(overall_out_tsize) += vg_tt[i].trans_size;
+      }
+   }
+
+   vg_assert(vg_tt_used >= 0);
+   vg_assert(vg_tt_used <= tt_target);
+
+   /* Now compact the TC, sliding live entries downwards to fill spaces
+      left by deleted entries.  In this loop, r is the offset in TC of
+      the current translation under consideration, and w is the next
+      allocation point. */
+   r = w = 0;
+   while (True) {
+      if (r >= vg_tc_used) break;
+      /* The first four bytes of every translation contain the index
+         of its TT entry.  The TT entry's .trans_addr field points at
+         the start of the code proper, not at this 4-byte index, so
+         that we don't constantly have to keep adding 4 in the main
+         lookup/dispatch loop. */
+      ttno = VG_READ_MISALIGNED_WORD(&vg_tc[r]);
+      vg_assert(ttno >= 0 && ttno < VG_TT_SIZE);
+      tte = & vg_tt[ ttno ];
+      vg_assert(tte->orig_addr != VG_TTE_EMPTY);
+      if (tte->orig_addr != VG_TTE_DELETED) {
+         /* We want to keep this one alive. */
+         /* Sanity check the pointer back to TC. */
+         vg_assert(tte->trans_addr == (Addr)&vg_tc[r+4]);
+         for (i = 0; i < 4+tte->trans_size; i++)
+            vg_tc[w+i] = vg_tc[r+i];
+         tte->trans_addr = (Addr)&vg_tc[w+4];
+         w += 4+tte->trans_size;
+      }
+      r += 4+tte->trans_size;
+   }
+   /* should have traversed an exact number of translations, with no
+      slop at the end. */
+   vg_assert(w <= r);
+   vg_assert(r == vg_tc_used);
+   vg_assert(w <= r);
+   vg_assert(w <= tc_target);
+   vg_tc_used = w;
+
+   /* Invalidate the fast cache, since it is now out of date.  It will get
+      reconstructed incrementally when the client resumes. */
+   VG_(invalidate_tt_fast)();
+
+   if (VG_(clo_verbosity) > 2)
+      VG_(printf)("post-LRU: tc %d (target %d),  tt %d (target %d)\n",
+	          vg_tc_used, tc_target, vg_tt_used, tt_target);
+
+   if (VG_(clo_verbosity) > 1)
+      VG_(message)(Vg_UserMsg,   
+         "epoch %d (bb %luk): thresh %d, "
+         "out %d (%dk -> %dk), new TT %d, TC %dk",
+         VG_(current_epoch), 
+         VG_(bbs_done) / 1000,
+         VG_(current_epoch) - thresh, 
+         VG_(this_epoch_out_count),
+         VG_(this_epoch_out_osize) / 1000,
+         VG_(this_epoch_out_tsize) / 1000,
+         vg_tt_used, vg_tc_used / 1000
+      );
+
+   /* Reconstruct the SMC detection structures. */
+
+   VGP_POPCC;
+}
+
+
+/* Do a sanity check on TT/TC.
+*/
+void VG_(sanity_check_tc_tt) ( void )
+{
+   Int      i, counted_entries, counted_bytes;
+   TTEntry* tte;
+   counted_entries = 0;
+   counted_bytes   = 0;
+   for (i = 0; i < VG_TT_SIZE; i++) {
+      tte = &vg_tt[i];
+      if (tte->orig_addr == VG_TTE_EMPTY) continue;
+      if (tte->orig_addr == VG_TTE_DELETED) continue;
+      vg_assert(tte->mru_epoch >= 0);
+      vg_assert(tte->mru_epoch <= VG_(current_epoch));
+      counted_entries++;
+      counted_bytes += 4+tte->trans_size;
+      vg_assert(tte->trans_addr >= (Addr)&vg_tc[4]);
+      vg_assert(tte->trans_addr < (Addr)&vg_tc[vg_tc_used]);
+      vg_assert(VG_READ_MISALIGNED_WORD(tte->trans_addr-4) == i);
+   }
+   vg_assert(counted_entries == vg_tt_used);
+   vg_assert(counted_bytes == vg_tc_used);
+}
+
+
+/* Add this already-filled-in entry to the TT.  Assumes that the
+   relevant code chunk has been placed in TC, along with a dummy back
+   pointer, which is inserted here.  
+*/
+extern void VG_(add_to_trans_tab) ( TTEntry* tte )
+{
+   Int i;
+   /*
+   VG_(printf)("add_to_trans_tab(%d) %x %d %x %d\n",
+               vg_tt_used, tte->orig_addr, tte->orig_size, 
+               tte->trans_addr, tte->trans_size);
+   */
+   vg_assert(tte->orig_addr != VG_TTE_DELETED 
+             && tte->orig_addr != VG_TTE_EMPTY);
+   /* Hash to get initial probe point. */
+   i = ((UInt)(tte->orig_addr)) % VG_TT_SIZE;
+   while (True) {
+      if (vg_tt[i].orig_addr == tte->orig_addr)
+         VG_(panic)("add_to_trans_tab: duplicate");
+      if (vg_tt[i].orig_addr == VG_TTE_DELETED ||
+          vg_tt[i].orig_addr == VG_TTE_EMPTY) {
+         /* Put it here, and set the back pointer. */
+         vg_tt[i] = *tte;
+         VG_WRITE_MISALIGNED_WORD(tte->trans_addr-4, i);
+         vg_tt_used++;
+         return;
+      }
+      i++;
+      if (i == VG_TT_SIZE) i = 0;
+   }
+}
+
+
+/* Copy a new translation's code into TC, leaving a 4-byte hole for
+   the back pointer, and returning a pointer to the code proper (not
+   the hole) in TC. 
+*/
+Addr VG_(copy_to_transcache) ( Addr trans_addr, Int trans_size )
+{
+   Int i;
+   Addr ret_addr;
+   if (4+trans_size > VG_TC_SIZE-vg_tc_used)
+      VG_(panic)("copy_to_transcache: not enough free space?!");
+   /* Leave a hole for the back pointer to the TT entry. */
+   vg_tc_used += 4;
+   ret_addr = (Addr)&vg_tc[vg_tc_used];
+   for (i = 0; i < trans_size; i++)
+      vg_tc[vg_tc_used+i] = ((UChar*)trans_addr)[i];
+   vg_tc_used += trans_size;
+   return ret_addr;
+}
+
+
+/* Invalidate the tt_fast cache, for whatever reason.  Tricky.  We
+   have to find a TTE_EMPTY slot to point all entries at. */
+void VG_(invalidate_tt_fast)( void )
+{
+   Int i, j;
+   for (i = 0; i < VG_TT_SIZE && vg_tt[i].orig_addr != VG_TTE_EMPTY; i++)
+      ;
+   vg_assert(i < VG_TT_SIZE 
+             && vg_tt[i].orig_addr == VG_TTE_EMPTY);
+   for (j = 0; j < VG_TT_FAST_SIZE; j++)
+      VG_(tt_fast)[j] = (Addr)&vg_tt[i];
+}
+
+
+/* Search TT to find the translated address of the supplied original,
+   or NULL if not found.  This routine is used when we miss in
+   VG_(tt_fast). 
+*/
+static __inline__ TTEntry* search_trans_table ( Addr orig_addr )
+{
+  //static Int queries = 0;
+  //static Int probes = 0;
+   Int i;
+   /* Hash to get initial probe point. */
+   //   if (queries == 10000) {
+   //  VG_(printf)("%d queries, %d probes\n", queries, probes);
+   //  queries = probes = 0;
+   //}
+   //queries++;
+   i = ((UInt)orig_addr) % VG_TT_SIZE;
+   while (True) {
+     //probes++;
+      if (vg_tt[i].orig_addr == orig_addr)
+         return &vg_tt[i];
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY)
+         return NULL;
+      i++;
+      if (i == VG_TT_SIZE) i = 0;
+   }
+}
+
+
+/* Find the translation address for a given (original) code address.
+   If found, update VG_(tt_fast) so subsequent lookups are fast.  If
+   no translation can be found, return zero.  This routine is (the
+   only one) called from vg_run_innerloop.  */
+Addr VG_(search_transtab) ( Addr original_addr )
+{
+   TTEntry* tte;
+   VGP_PUSHCC(VgpSlowFindT);
+   tte = search_trans_table ( original_addr );
+   if (tte == NULL) {
+      /* We didn't find it.  vg_run_innerloop will have to request a
+         translation. */
+      VGP_POPCC;
+      return (Addr)0;
+   } else {
+      /* Found it.  Put the search result into the fast cache now.
+         Also set the mru_epoch to mark this translation as used. */
+      UInt cno = (UInt)original_addr & VG_TT_FAST_MASK;
+      VG_(tt_fast)[cno] = (Addr)tte;
+      VG_(tt_fast_misses)++;
+      tte->mru_epoch = VG_(current_epoch);
+      VGP_POPCC;
+      return tte->trans_addr;
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Detecting and handling self-modifying code.          ---*/
+/*------------------------------------------------------------*/
+
+/* This mechanism uses two data structures:
+
+   vg_oldmap -- array[64k] of Bool, which approximately records
+   parts of the address space corresponding to code for which
+   a translation exists in the translation table.  vg_oldmap is
+   consulted at each write, to determine whether that write might
+   be writing a code address; if so, the program is stopped at 
+   the next jump, and the corresponding translations are invalidated.
+
+   Precise semantics: vg_oldmap[(a >> 8) & 0xFFFF] is true for all
+   addresses a containing a code byte which has been translated.  So
+   it acts kind-of like a direct-mapped cache with 64k entries.
+
+   The second structure is vg_CAW, a small array of addresses at which
+   vg_oldmap indicates a code write may have happened.  This is
+   (effectively) checked at each control transfer (jump), so that
+   translations can be discarded before going on.  An array is
+   somewhat overkill, since it strikes me as very unlikely that a
+   single basic block will do more than one code write.  Nevertheless
+   ...  
+
+   ToDo: make this comment up-to-date.
+*/
+
+
+/* Definitions for the self-modifying-code detection cache, intended
+   as a fast check which clears the vast majority of writes.  */
+
+#define VG_SMC_CACHE_HASH(aaa) \
+   ((((UInt)a) >> VG_SMC_CACHE_SHIFT) & VG_SMC_CACHE_MASK)
+
+Bool VG_(smc_cache)[VG_SMC_CACHE_SIZE];
+
+
+/* Definitions for the fallback mechanism, which, more slowly,
+   provides a precise record of which words in the address space
+   belong to original code. */
+
+typedef struct { UChar chars[2048]; } VgSmcSecondary;
+
+static VgSmcSecondary* vg_smc_primary[65536];
+
+static VgSmcSecondary* vg_smc_new_secondary ( void )
+{
+   Int i;
+   VgSmcSecondary* sec 
+      = VG_(malloc) ( VG_AR_PRIVATE, sizeof(VgSmcSecondary) );
+   for (i = 0; i < 2048; i++)
+      sec->chars[i] = 0;
+   return sec;
+}
+
+#define GET_BIT_ARRAY(arr,indx)                      \
+   (1 & (  ((UChar*)arr)[((UInt)indx) / 8]           \
+           >> ( ((UInt)indx) % 8) ) )
+
+#define SET_BIT_ARRAY(arr,indx)                      \
+   ((UChar*)arr)[((UInt)indx) / 8] |= (1 << ((UInt)indx) % 8)
+
+
+/* Finally, a place to record the original-code-write addresses
+   detected in a basic block. */
+
+#define VG_ORIGWRITES_SIZE 10
+
+static Addr vg_origwrites[VG_ORIGWRITES_SIZE];
+static Int  vg_origwrites_used;
+
+
+/* Call here to check a written address. */
+
+void VG_(smc_check4) ( Addr a )
+{
+   UInt bit_index;
+   VgSmcSecondary* smc_secondary;
+
+#  if VG_SMC_FASTCHECK_IN_C
+   VG_(smc_total_check4s)++;
+
+   /* Try the fast check first. */
+   if (VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] == False) return;
+#  endif
+
+   VG_(smc_cache_passed)++;
+
+   /* Need to do a slow check. */
+   smc_secondary = vg_smc_primary[a >> 16];
+   if (smc_secondary == NULL) return;
+
+   bit_index = (a & 0xFFFF) >> 2;
+   if (GET_BIT_ARRAY(smc_secondary->chars, bit_index) == 0) return;
+
+   VG_(smc_fancy_passed)++;
+
+   /* Detected a Real Live write to code which has been translated.
+      Note it. */
+   if (vg_origwrites_used == VG_ORIGWRITES_SIZE)
+      VG_(panic)("VG_ORIGWRITES_SIZE is too small; "
+                 "increase and recompile.");
+   vg_origwrites[vg_origwrites_used] = a;
+   vg_origwrites_used++;
+
+   VG_(message)(Vg_DebugMsg, "self-modifying-code write at %p", a);
+
+   /* Force an exit before the next basic block, so the translation
+      cache can be flushed appropriately. */
+   VG_(dispatch_ctr_SAVED) = VG_(dispatch_ctr);
+   VG_(dispatch_ctr)       = 1;
+   VG_(interrupt_reason)   = VG_Y_SMC;
+}
+
+
+/* Mark an address range as containing an original translation,
+   updating both the fast-check cache and the slow-but-correct data
+   structure.  
+*/
+void VG_(smc_mark_original) ( Addr orig_addr, Int orig_size )
+{
+   Addr a;
+   VgSmcSecondary* smc_secondary;
+   UInt bit_index;
+
+   for (a = orig_addr; a < orig_addr+orig_size; a++) {
+
+      VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] = True;
+
+      smc_secondary = vg_smc_primary[a >> 16];
+      if (smc_secondary == NULL)
+         smc_secondary = 
+         vg_smc_primary[a >> 16] = vg_smc_new_secondary();
+
+      bit_index = (a & 0xFFFF) >> 2;
+      SET_BIT_ARRAY(smc_secondary->chars, bit_index);      
+   }
+}
+
+
+/* Discard any translations whose original code overlaps with the
+   range w_addr .. w_addr+3 inclusive. 
+*/
+__attribute__ ((unused))
+static void discard_translations_bracketing ( Addr w_addr )
+{
+#  if 0
+   Int      i, rd, wr;
+   Addr     o_start, o_end;
+   TTEntry* tt;
+
+   for (i = 0; i < VG_TRANSTAB_SLOW_SIZE; i++) {
+      tt = vg_transtab[i];
+      wr = 0;
+      for (rd = 0; rd < vg_transtab_used[i]; rd++) {
+         o_start = tt[rd].orig_addr;
+         o_end   = o_start + tt[rd].orig_size;
+         if (w_addr > o_end || (w_addr+3) < o_start) {
+            /* No collision possible; keep this translation */
+            VG_(smc_mark_original) ( tt[rd].orig_addr, tt[rd].orig_size );
+            if (wr < rd) vg_transtab[wr] = vg_transtab[rd];
+            wr++;
+	 } else {
+            /* Possible collision; discard. */
+            vg_smc_discards++;
+            VG_(message) (Vg_DebugMsg, 
+                             "discarding translation of %p .. %p",
+                             tt[rd].orig_addr, 
+                             tt[rd].orig_addr + tt[rd].orig_size - 1);
+            VG_(free)((void*)tt[rd].trans_addr);
+         }         
+      }
+      vg_transtab_used[i] = wr;
+   }
+#  endif   
+}
+
+
+/* Top-level function in charge of discarding out-of-date translations
+   following the discovery of a (potential) original-code-write. 
+*/
+void VG_(flush_transtab) ( void )
+{
+#  if 0
+   Addr w_addr;
+   Int  i, j;
+
+   /* We shouldn't be here unless a code write was detected. */
+   vg_assert(vg_origwrites_used > 0);
+
+   /* Instead of incrementally fixing up the translation table cache,
+      just invalidate the whole darn thing.  Pray this doesn't happen
+      very often :) */
+   for (i = 0; i < VG_TRANSTAB_CACHE_SIZE; i++)
+      VG_(transtab_cache_orig)[i] = 
+      VG_(transtab_cache_trans)[i] = (Addr)0;
+
+   /* Clear out the fast cache; discard_translations_bracketing
+      reconstructs it. */
+   for (i = 0; i < VG_SMC_CACHE_SIZE; i++) 
+      VG_(smc_cache)[i] = False;
+
+   /* And also clear the slow-but-correct table. */
+   for (i = 0; i < 65536; i++) {
+      VgSmcSecondary* sec = vg_smc_primary[i];
+      if (sec)
+         for (j = 0; j < 2048; j++)
+            sec->chars[j] = 0;         
+   }
+
+   /* This doesn't need to be particularly fast, since we (presumably)
+      don't have to handle particularly frequent writes to code
+      addresses. */
+   while (vg_origwrites_used > 0) {
+      vg_origwrites_used--;
+      w_addr = vg_origwrites[vg_origwrites_used];
+      discard_translations_bracketing ( w_addr );
+   }
+
+   vg_assert(vg_origwrites_used == 0);
+#  endif
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Initialisation.                                      ---*/
+/*------------------------------------------------------------*/
+
+void VG_(init_transtab_and_SMC) ( void )
+{
+   Int i;
+
+   /* Allocate the translation table and translation cache. */
+   vg_assert(vg_tc == NULL);
+   vg_tc = VG_(get_memory_from_mmap) ( VG_TC_SIZE * sizeof(UChar) );
+   vg_assert(vg_tc != NULL);
+
+   vg_assert(vg_tt == NULL);
+   vg_tt = VG_(get_memory_from_mmap) ( VG_TT_SIZE * sizeof(TTEntry) );
+   vg_assert(vg_tt != NULL);
+
+   /* The main translation table is empty. */
+   vg_tt_used = 0;
+   for (i = 0; i < VG_TT_SIZE; i++) {
+      vg_tt[i].orig_addr = VG_TTE_EMPTY;
+   }
+
+   /* The translation table's fast cache is empty.  Point all entries
+      at the first TT entry, which is, of course, empty. */
+   for (i = 0; i < VG_TT_FAST_SIZE; i++)
+      VG_(tt_fast)[i] = (Addr)(&vg_tt[0]);
+
+   /* No part of the address space has any translations. */
+   for (i = 0; i < 65536; i++)
+      vg_smc_primary[i] = NULL;
+
+   /* ... and the associated fast-check cache reflects this. */
+   for (i = 0; i < VG_SMC_CACHE_SIZE; i++) 
+      VG_(smc_cache)[i] = False;
+
+   /* Finally, no original-code-writes have been recorded. */
+   vg_origwrites_used = 0;
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_transtab.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_unsafe.h b/coregrind/vg_unsafe.h
new file mode 100644
index 000000000..9fa91539a
--- /dev/null
+++ b/coregrind/vg_unsafe.h
@@ -0,0 +1,86 @@
+
+/*--------------------------------------------------------------------*/
+/*--- A header file for making sense of syscalls.  Unsafe in the   ---*/
+/*--- sense that we don't call any functions mentioned herein.     ---*/
+/*---                                                  vg_unsafe.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+
+/* These includes are only used for making sense of the args for
+   system calls. */
+#include <asm/unistd.h>   /* for system call numbers */
+#include <sys/mman.h>     /* for PROT_* */
+#include <sys/utsname.h>  /* for uname */
+#include <sys/time.h>     /* for struct timeval & struct timezone */
+#include <linux/net.h>    /* for the SYS_* constants */
+#include <sys/resource.h> /* for struct rlimit */
+#include <linux/shm.h>    /* for struct shmid_ds & struct ipc_perm */
+#include <sys/socket.h>   /* for struct msghdr */
+#include <sys/un.h>       /* for sockaddr_un */
+#include <net/if.h>       /* for struct ifreq et al */
+#include <net/if_arp.h>   /* for struct arpreq */
+#include <net/route.h>    /* for struct rtentry */
+
+#include <linux/isdn.h>   /* for ISDN ioctls */
+#include <linux/module.h> /* for struct module */
+#include <scsi/sg.h>      /* for the SG_* ioctls */
+#include <sched.h>        /* for struct sched_param */
+#include <linux/sysctl.h> /* for struct __sysctl_args */
+
+
+#define __USE_LARGEFILE64
+#include <sys/stat.h>     /* for struct stat */
+#undef __USE_LARGEFILE64
+
+#include <asm/ioctls.h>   /* for stuff for dealing with ioctl :( */
+#include <sys/soundcard.h> /* for various soundcard ioctl constants :( */
+
+#include <termios.h>
+#include <pty.h>
+
+/* 2.2 stuff ... */
+#include <sys/uio.h>
+
+/* Both */
+#include <utime.h>
+#include <sys/times.h>    /* for struct tms */
+
+/* 2.0 at least, for gid_t and loff_t */
+#include <sys/types.h>
+
+#include <sys/statfs.h>
+
+#include <sys/sysinfo.h>
+
+#include <sys/poll.h>
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              vg_unsafe.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_valgrinq_dummy.c b/coregrind/vg_valgrinq_dummy.c
new file mode 100644
index 000000000..5b09ddb0f
--- /dev/null
+++ b/coregrind/vg_valgrinq_dummy.c
@@ -0,0 +1,44 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Used to make a dummy valgrinq.so, which does nothing at all. ---*/
+/*---                                          vg_valgrinq_dummy.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+/* For the rationale behind this file, look at
+   VG_(mash_LD_PRELOAD_string) in vg_main.c. */
+
+/* Remember not to use a variable of this name in any program you want
+   to debug :-) */
+int dont_mess_with_the_RSCDS = 0;
+
+/* If you are bored, perhaps have a look at http://www.rscds.org. */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                      vg_valgrinq_dummy.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/demangle/Makefile.am b/demangle/Makefile.am
new file mode 100644
index 000000000..1188b7c89
--- /dev/null
+++ b/demangle/Makefile.am
@@ -0,0 +1,23 @@
+INCLUDES += -I$(top_srcdir)
+
+noinst_HEADERS = \
+	ansidecl.h     \
+        dyn-string.h   \
+        demangle.h     \
+        safe-ctype.h 
+
+noinst_LIBRARIES = libdemangle.a
+
+libdemangle_a_SOURCES = \
+	cp-demangle.c cplus-dem.c dyn-string.c safe-ctype.c
+
+# some files don't like my config.h, so just pretend it does not exist...
+
+cp-demangle.o:
+	$(COMPILE) -Wno-unused -Wno-shadow -c $< -UHAVE_CONFIG_H
+
+dyn-string.o:
+	$(COMPILE) -c $< -UHAVE_CONFIG_H
+
+cplus-dem.o:
+	$(COMPILE) -Wno-unused -c $<
diff --git a/demangle/Makefile.in b/demangle/Makefile.in
new file mode 100644
index 000000000..323cd67d0
--- /dev/null
+++ b/demangle/Makefile.in
@@ -0,0 +1,291 @@
+# Makefile.in generated automatically by automake 1.4-p4 from Makefile.am
+
+# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+
+DESTDIR =
+
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+
+top_builddir = ..
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS)
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+transform = @program_transform_name@
+
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_alias = @host_alias@
+host_triplet = @host@
+CC = @CC@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+DEFAULT_SUPP = @DEFAULT_SUPP@
+LN_S = @LN_S@
+MAKEINFO = @MAKEINFO@
+PACKAGE = @PACKAGE@
+RANLIB = @RANLIB@
+VERSION = @VERSION@
+
+INCLUDES =  -I$(top_srcdir)
+
+noinst_HEADERS =  	ansidecl.h             dyn-string.h           demangle.h             safe-ctype.h 
+
+
+noinst_LIBRARIES = libdemangle.a
+
+libdemangle_a_SOURCES =  	cp-demangle.c cplus-dem.c dyn-string.c safe-ctype.c
+
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = ../config.h
+CONFIG_CLEAN_FILES = 
+LIBRARIES =  $(noinst_LIBRARIES)
+
+
+DEFS = @DEFS@ -I. -I$(srcdir) -I..
+CPPFLAGS = @CPPFLAGS@
+LDFLAGS = @LDFLAGS@
+LIBS = @LIBS@
+libdemangle_a_LIBADD = 
+libdemangle_a_OBJECTS =  cp-demangle.o cplus-dem.o dyn-string.o \
+safe-ctype.o
+AR = ar
+COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(LDFLAGS) -o $@
+HEADERS =  $(noinst_HEADERS)
+
+DIST_COMMON =  Makefile.am Makefile.in
+
+
+DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST)
+
+TAR = tar
+GZIP_ENV = --best
+SOURCES = $(libdemangle_a_SOURCES)
+OBJECTS = $(libdemangle_a_OBJECTS)
+
+all: all-redirect
+.SUFFIXES:
+.SUFFIXES: .S .c .o .s
+$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) 
+	cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps demangle/Makefile
+
+Makefile: $(srcdir)/Makefile.in  $(top_builddir)/config.status
+	cd $(top_builddir) \
+	  && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+
+mostlyclean-noinstLIBRARIES:
+
+clean-noinstLIBRARIES:
+	-test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES)
+
+distclean-noinstLIBRARIES:
+
+maintainer-clean-noinstLIBRARIES:
+
+.c.o:
+	$(COMPILE) -c $<
+
+.s.o:
+	$(COMPILE) -c $<
+
+.S.o:
+	$(COMPILE) -c $<
+
+mostlyclean-compile:
+	-rm -f *.o core *.core
+
+clean-compile:
+
+distclean-compile:
+	-rm -f *.tab.c
+
+maintainer-clean-compile:
+
+libdemangle.a: $(libdemangle_a_OBJECTS) $(libdemangle_a_DEPENDENCIES)
+	-rm -f libdemangle.a
+	$(AR) cru libdemangle.a $(libdemangle_a_OBJECTS) $(libdemangle_a_LIBADD)
+	$(RANLIB) libdemangle.a
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP)
+	list='$(SOURCES) $(HEADERS)'; \
+	unique=`for i in $$list; do echo $$i; done | \
+	  awk '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	here=`pwd` && cd $(srcdir) \
+	  && mkid -f$$here/ID $$unique $(LISP)
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) $(LISP)
+	tags=; \
+	here=`pwd`; \
+	list='$(SOURCES) $(HEADERS)'; \
+	unique=`for i in $$list; do echo $$i; done | \
+	  awk '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \
+	  || (cd $(srcdir) && etags $(ETAGS_ARGS) $$tags  $$unique $(LISP) -o $$here/TAGS)
+
+mostlyclean-tags:
+
+clean-tags:
+
+distclean-tags:
+	-rm -f TAGS ID
+
+maintainer-clean-tags:
+
+distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir)
+
+subdir = demangle
+
+distdir: $(DISTFILES)
+	@for file in $(DISTFILES); do \
+	  d=$(srcdir); \
+	  if test -d $$d/$$file; then \
+	    cp -pr $$d/$$file $(distdir)/$$file; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || ln $$d/$$file $(distdir)/$$file 2> /dev/null \
+	    || cp -p $$d/$$file $(distdir)/$$file || :; \
+	  fi; \
+	done
+cp-demangle.o: cp-demangle.c ../config.h ../vg_include.h \
+	../vg_constants.h ../vg_kerneliface.h ansidecl.h dyn-string.h \
+	demangle.h
+cplus-dem.o: cplus-dem.c ../config.h safe-ctype.h ../vg_include.h \
+	../vg_constants.h ../vg_kerneliface.h demangle.h ansidecl.h \
+	dyn-string.h
+dyn-string.o: dyn-string.c ../config.h ../vg_include.h ../vg_constants.h \
+	../vg_kerneliface.h ansidecl.h dyn-string.h
+safe-ctype.o: safe-ctype.c ansidecl.h safe-ctype.h
+
+info-am:
+info: info-am
+dvi-am:
+dvi: dvi-am
+check-am: all-am
+check: check-am
+installcheck-am:
+installcheck: installcheck-am
+install-exec-am:
+install-exec: install-exec-am
+
+install-data-am:
+install-data: install-data-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+install: install-am
+uninstall-am:
+uninstall: uninstall-am
+all-am: Makefile $(LIBRARIES) $(HEADERS)
+all-redirect: all-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install
+installdirs:
+
+
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-rm -f Makefile $(CONFIG_CLEAN_FILES)
+	-rm -f config.cache config.log stamp-h stamp-h[0-9]*
+
+maintainer-clean-generic:
+mostlyclean-am:  mostlyclean-noinstLIBRARIES mostlyclean-compile \
+		mostlyclean-tags mostlyclean-generic
+
+mostlyclean: mostlyclean-am
+
+clean-am:  clean-noinstLIBRARIES clean-compile clean-tags clean-generic \
+		mostlyclean-am
+
+clean: clean-am
+
+distclean-am:  distclean-noinstLIBRARIES distclean-compile \
+		distclean-tags distclean-generic clean-am
+
+distclean: distclean-am
+
+maintainer-clean-am:  maintainer-clean-noinstLIBRARIES \
+		maintainer-clean-compile maintainer-clean-tags \
+		maintainer-clean-generic distclean-am
+	@echo "This command is intended for maintainers to use;"
+	@echo "it deletes files that may require special tools to rebuild."
+
+maintainer-clean: maintainer-clean-am
+
+.PHONY: mostlyclean-noinstLIBRARIES distclean-noinstLIBRARIES \
+clean-noinstLIBRARIES maintainer-clean-noinstLIBRARIES \
+mostlyclean-compile distclean-compile clean-compile \
+maintainer-clean-compile tags mostlyclean-tags distclean-tags \
+clean-tags maintainer-clean-tags distdir info-am info dvi-am dvi check \
+check-am installcheck-am installcheck install-exec-am install-exec \
+install-data-am install-data install-am install uninstall-am uninstall \
+all-redirect all-am all installdirs mostlyclean-generic \
+distclean-generic clean-generic maintainer-clean-generic clean \
+mostlyclean distclean maintainer-clean
+
+
+# some files don't like my config.h, so just pretend it does not exist...
+
+cp-demangle.o:
+	$(COMPILE) -Wno-unused -Wno-shadow -c $< -UHAVE_CONFIG_H
+
+dyn-string.o:
+	$(COMPILE) -c $< -UHAVE_CONFIG_H
+
+cplus-dem.o:
+	$(COMPILE) -Wno-unused -c $<
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/demangle/ansidecl.h b/demangle/ansidecl.h
new file mode 100644
index 000000000..9a7c5777f
--- /dev/null
+++ b/demangle/ansidecl.h
@@ -0,0 +1,295 @@
+/* ANSI and traditional C compatability macros
+   Copyright 1991, 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001
+   Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+/* ANSI and traditional C compatibility macros
+
+   ANSI C is assumed if __STDC__ is #defined.
+
+   Macro		ANSI C definition	Traditional C definition
+   -----		---- - ----------	----------- - ----------
+   ANSI_PROTOTYPES	1			not defined
+   PTR			`void *'		`char *'
+   PTRCONST		`void *const'		`char *'
+   LONG_DOUBLE		`long double'		`double'
+   const		not defined		`'
+   volatile		not defined		`'
+   signed		not defined		`'
+   VA_START(ap, var)	va_start(ap, var)	va_start(ap)
+
+   Note that it is safe to write "void foo();" indicating a function
+   with no return value, in all K+R compilers we have been able to test.
+
+   For declaring functions with prototypes, we also provide these:
+
+   PARAMS ((prototype))
+   -- for functions which take a fixed number of arguments.  Use this
+   when declaring the function.  When defining the function, write a
+   K+R style argument list.  For example:
+
+	char *strcpy PARAMS ((char *dest, char *source));
+	...
+	char *
+	strcpy (dest, source)
+	     char *dest;
+	     char *source;
+	{ ... }
+
+
+   VPARAMS ((prototype, ...))
+   -- for functions which take a variable number of arguments.  Use
+   PARAMS to declare the function, VPARAMS to define it.  For example:
+
+	int printf PARAMS ((const char *format, ...));
+	...
+	int
+	printf VPARAMS ((const char *format, ...))
+	{
+	   ...
+	}
+
+   For writing functions which take variable numbers of arguments, we
+   also provide the VA_OPEN, VA_CLOSE, and VA_FIXEDARG macros.  These
+   hide the differences between K+R <varargs.h> and C89 <stdarg.h> more
+   thoroughly than the simple VA_START() macro mentioned above.
+
+   VA_OPEN and VA_CLOSE are used *instead of* va_start and va_end.
+   Immediately after VA_OPEN, put a sequence of VA_FIXEDARG calls
+   corresponding to the list of fixed arguments.  Then use va_arg
+   normally to get the variable arguments, or pass your va_list object
+   around.  You do not declare the va_list yourself; VA_OPEN does it
+   for you.
+
+   Here is a complete example:
+
+	int
+	printf VPARAMS ((const char *format, ...))
+	{
+	   int result;
+
+	   VA_OPEN (ap, format);
+	   VA_FIXEDARG (ap, const char *, format);
+
+	   result = vfprintf (stdout, format, ap);
+	   VA_CLOSE (ap);
+
+	   return result;
+	}
+
+
+   You can declare variables either before or after the VA_OPEN,
+   VA_FIXEDARG sequence.  Also, VA_OPEN and VA_CLOSE are the beginning
+   and end of a block.  They must appear at the same nesting level,
+   and any variables declared after VA_OPEN go out of scope at
+   VA_CLOSE.  Unfortunately, with a K+R compiler, that includes the
+   argument list.  You can have multiple instances of VA_OPEN/VA_CLOSE
+   pairs in a single function in case you need to traverse the
+   argument list more than once.
+
+   For ease of writing code which uses GCC extensions but needs to be
+   portable to other compilers, we provide the GCC_VERSION macro that
+   simplifies testing __GNUC__ and __GNUC_MINOR__ together, and various
+   wrappers around __attribute__.  Also, __extension__ will be #defined
+   to nothing if it doesn't work.  See below.
+
+   This header also defines a lot of obsolete macros:
+   CONST, VOLATILE, SIGNED, PROTO, EXFUN, DEFUN, DEFUN_VOID,
+   AND, DOTS, NOARGS.  Don't use them.  */
+
+#ifndef	_ANSIDECL_H
+#define _ANSIDECL_H	1
+
+/* Every source file includes this file,
+   so they will all get the switch for lint.  */
+/* LINTLIBRARY */
+
+/* Using MACRO(x,y) in cpp #if conditionals does not work with some
+   older preprocessors.  Thus we can't define something like this:
+
+#define HAVE_GCC_VERSION(MAJOR, MINOR) \
+  (__GNUC__ > (MAJOR) || (__GNUC__ == (MAJOR) && __GNUC_MINOR__ >= (MINOR)))
+
+and then test "#if HAVE_GCC_VERSION(2,7)".
+
+So instead we use the macro below and test it against specific values.  */
+
+/* This macro simplifies testing whether we are using gcc, and if it
+   is of a particular minimum version. (Both major & minor numbers are
+   significant.)  This macro will evaluate to 0 if we are not using
+   gcc at all.  */
+#ifndef GCC_VERSION
+#define GCC_VERSION (__GNUC__ * 1000 + __GNUC_MINOR__)
+#endif /* GCC_VERSION */
+
+#if defined (__STDC__) || defined (_AIX) || (defined (__mips) && defined (_SYSTYPE_SVR4)) || defined(_WIN32)
+/* All known AIX compilers implement these things (but don't always
+   define __STDC__).  The RISC/OS MIPS compiler defines these things
+   in SVR4 mode, but does not define __STDC__.  */
+
+#define ANSI_PROTOTYPES	1
+#define PTR		void *
+#define PTRCONST	void *const
+#define LONG_DOUBLE	long double
+
+#define PARAMS(ARGS)		ARGS
+#define VPARAMS(ARGS)		ARGS
+#define VA_START(VA_LIST, VAR)	va_start(VA_LIST, VAR)
+
+/* variadic function helper macros */
+/* "struct Qdmy" swallows the semicolon after VA_OPEN/VA_FIXEDARG's
+   use without inhibiting further decls and without declaring an
+   actual variable.  */
+#define VA_OPEN(AP, VAR)	{ va_list AP; va_start(AP, VAR); { struct Qdmy
+#define VA_CLOSE(AP)		} va_end(AP); }
+#define VA_FIXEDARG(AP, T, N)	struct Qdmy
+ 
+#undef const
+#undef volatile
+#undef signed
+
+/* inline requires special treatment; it's in C99, and GCC >=2.7 supports
+   it too, but it's not in C89.  */
+#undef inline
+#if __STDC_VERSION__ > 199901L
+/* it's a keyword */
+#else
+# if GCC_VERSION >= 2007
+#  define inline __inline__   /* __inline__ prevents -pedantic warnings */
+# else
+#  define inline  /* nothing */
+# endif
+#endif
+
+/* These are obsolete.  Do not use.  */
+#ifndef IN_GCC
+#define CONST		const
+#define VOLATILE	volatile
+#define SIGNED		signed
+
+#define PROTO(type, name, arglist)	type name arglist
+#define EXFUN(name, proto)		name proto
+#define DEFUN(name, arglist, args)	name(args)
+#define DEFUN_VOID(name)		name(void)
+#define AND		,
+#define DOTS		, ...
+#define NOARGS		void
+#endif /* ! IN_GCC */
+
+#else	/* Not ANSI C.  */
+
+#undef  ANSI_PROTOTYPES
+#define PTR		char *
+#define PTRCONST	PTR
+#define LONG_DOUBLE	double
+
+#define PARAMS(args)		()
+#define VPARAMS(args)		(va_alist) va_dcl
+#define VA_START(va_list, var)	va_start(va_list)
+
+#define VA_OPEN(AP, VAR)		{ va_list AP; va_start(AP); { struct Qdmy
+#define VA_CLOSE(AP)			} va_end(AP); }
+#define VA_FIXEDARG(AP, TYPE, NAME)	TYPE NAME = va_arg(AP, TYPE)
+
+/* some systems define these in header files for non-ansi mode */
+#undef const
+#undef volatile
+#undef signed
+#undef inline
+#define const
+#define volatile
+#define signed
+#define inline
+
+#ifndef IN_GCC
+#define CONST
+#define VOLATILE
+#define SIGNED
+
+#define PROTO(type, name, arglist)	type name ()
+#define EXFUN(name, proto)		name()
+#define DEFUN(name, arglist, args)	name arglist args;
+#define DEFUN_VOID(name)		name()
+#define AND		;
+#define DOTS
+#define NOARGS
+#endif /* ! IN_GCC */
+
+#endif	/* ANSI C.  */
+
+/* Define macros for some gcc attributes.  This permits us to use the
+   macros freely, and know that they will come into play for the
+   version of gcc in which they are supported.  */
+
+#if (GCC_VERSION < 2007)
+# define __attribute__(x)
+#endif
+
+/* Attribute __malloc__ on functions was valid as of gcc 2.96. */
+#ifndef ATTRIBUTE_MALLOC
+# if (GCC_VERSION >= 2096)
+#  define ATTRIBUTE_MALLOC __attribute__ ((__malloc__))
+# else
+#  define ATTRIBUTE_MALLOC
+# endif /* GNUC >= 2.96 */
+#endif /* ATTRIBUTE_MALLOC */
+
+/* Attributes on labels were valid as of gcc 2.93. */
+#ifndef ATTRIBUTE_UNUSED_LABEL
+# if (GCC_VERSION >= 2093)
+#  define ATTRIBUTE_UNUSED_LABEL ATTRIBUTE_UNUSED
+# else
+#  define ATTRIBUTE_UNUSED_LABEL
+# endif /* GNUC >= 2.93 */
+#endif /* ATTRIBUTE_UNUSED_LABEL */
+
+#ifndef ATTRIBUTE_UNUSED
+#define ATTRIBUTE_UNUSED __attribute__ ((__unused__))
+#endif /* ATTRIBUTE_UNUSED */
+
+#ifndef ATTRIBUTE_NORETURN
+#define ATTRIBUTE_NORETURN __attribute__ ((__noreturn__))
+#endif /* ATTRIBUTE_NORETURN */
+
+#ifndef ATTRIBUTE_PRINTF
+#define ATTRIBUTE_PRINTF(m, n) __attribute__ ((__format__ (__printf__, m, n)))
+#define ATTRIBUTE_PRINTF_1 ATTRIBUTE_PRINTF(1, 2)
+#define ATTRIBUTE_PRINTF_2 ATTRIBUTE_PRINTF(2, 3)
+#define ATTRIBUTE_PRINTF_3 ATTRIBUTE_PRINTF(3, 4)
+#define ATTRIBUTE_PRINTF_4 ATTRIBUTE_PRINTF(4, 5)
+#define ATTRIBUTE_PRINTF_5 ATTRIBUTE_PRINTF(5, 6)
+#endif /* ATTRIBUTE_PRINTF */
+
+/* We use __extension__ in some places to suppress -pedantic warnings
+   about GCC extensions.  This feature didn't work properly before
+   gcc 2.8.  */
+#if GCC_VERSION < 2008
+#define __extension__
+#endif
+
+/* Bootstrap support:  Adjust certain macros defined by Autoconf,
+   which are only valid for the stage1 compiler.  If we detect
+   a modern version of GCC, we are probably in stage2 or beyond,
+   so unconditionally reset the values.  Note that const, inline,
+   etc. have been dealt with above.  */
+#if (GCC_VERSION >= 2007)
+# ifndef HAVE_LONG_DOUBLE
+#  define HAVE_LONG_DOUBLE 1
+# endif
+#endif /* GCC >= 2.7 */
+
+#endif	/* ansidecl.h	*/
diff --git a/demangle/cp-demangle.c b/demangle/cp-demangle.c
new file mode 100644
index 000000000..76c669a1a
--- /dev/null
+++ b/demangle/cp-demangle.c
@@ -0,0 +1,4170 @@
+/* Demangler for IA64 / g++ V3 ABI.
+   Copyright (C) 2000, 2001 Free Software Foundation, Inc.
+   Written by Alex Samuel <samuel@codesourcery.com>. 
+
+   This file is part of GNU CC.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 
+*/
+
+/* This file implements demangling of C++ names mangled according to
+   the IA64 / g++ V3 ABI.  Use the cp_demangle function to
+   demangle a mangled name, or compile with the preprocessor macro
+   STANDALONE_DEMANGLER defined to create a demangling filter
+   executable (functionally similar to c++filt, but includes this
+   demangler only).  */
+
+#include <sys/types.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+
+#include "vg_include.h"
+#include "ansidecl.h"
+#include "dyn-string.h"
+#include "demangle.h"
+
+#ifndef STANDALONE
+#define malloc(s) VG_(malloc)(VG_AR_DEMANGLE, s)
+#define free(p) VG_(free)(VG_AR_DEMANGLE, p)
+#define realloc(p,s) VG_(realloc)(VG_AR_DEMANGLE, p, s)
+#endif
+
+/* If CP_DEMANGLE_DEBUG is defined, a trace of the grammar evaluation,
+   and other debugging output, will be generated. */
+#ifdef CP_DEMANGLE_DEBUG
+#define DEMANGLE_TRACE(PRODUCTION, DM)                                  \
+  fprintf (stderr, " -> %-24s at position %3d\n",                       \
+           (PRODUCTION), current_position (DM));
+#else
+#define DEMANGLE_TRACE(PRODUCTION, DM)
+#endif
+
+/* Don't include <ctype.h>, to prevent additional unresolved symbols
+   from being dragged into the C++ runtime library.  */
+#define IS_DIGIT(CHAR) ((CHAR) >= '0' && (CHAR) <= '9')
+#define IS_ALPHA(CHAR)                                                  \
+  (((CHAR) >= 'a' && (CHAR) <= 'z')                                     \
+   || ((CHAR) >= 'A' && (CHAR) <= 'Z'))
+
+/* The prefix prepended by GCC to an identifier represnting the
+   anonymous namespace.  */
+#define ANONYMOUS_NAMESPACE_PREFIX "_GLOBAL_"
+
+/* Character(s) to use for namespace separation in demangled output */
+#define NAMESPACE_SEPARATOR (dm->style == DMGL_JAVA ? "." : "::")
+
+/* If flag_verbose is zero, some simplifications will be made to the
+   output to make it easier to read and supress details that are
+   generally not of interest to the average C++ programmer.
+   Otherwise, the demangled representation will attempt to convey as
+   much information as the mangled form.  */
+static int flag_verbose;
+
+/* If flag_strict is non-zero, demangle strictly according to the
+   specification -- don't demangle special g++ manglings.  */
+static int flag_strict;
+
+/* String_list_t is an extended form of dyn_string_t which provides a
+   link field and a caret position for additions to the string.  A
+   string_list_t may safely be cast to and used as a dyn_string_t.  */
+
+struct string_list_def
+{
+  /* The dyn_string; must be first.  */
+  struct dyn_string string;
+
+  /* The position at which additional text is added to this string
+     (using the result_add* macros).  This value is an offset from the
+     end of the string, not the beginning (and should be
+     non-positive).  */
+  int caret_position;
+
+  /* The next string in the list.  */
+  struct string_list_def *next;
+};
+
+typedef struct string_list_def *string_list_t;
+
+/* Data structure representing a potential substitution.  */
+
+struct substitution_def
+{
+  /* The demangled text of the substitution.  */
+  dyn_string_t text;
+
+  /* Whether this substitution represents a template item.  */
+  int template_p : 1;
+};
+
+/* Data structure representing a template argument list.  */
+
+struct template_arg_list_def
+{
+  /* The next (lower) template argument list in the stack of currently
+     active template arguments.  */
+  struct template_arg_list_def *next;
+
+  /* The first element in the list of template arguments in
+     left-to-right order.  */
+  string_list_t first_argument;
+
+  /* The last element in the arguments lists.  */
+  string_list_t last_argument;
+};
+
+typedef struct template_arg_list_def *template_arg_list_t;
+
+/* Data structure to maintain the state of the current demangling.  */
+
+struct demangling_def
+{
+  /* The full mangled name being mangled.  */
+  const char *name;
+
+  /* Pointer into name at the current position.  */
+  const char *next;
+
+  /* Stack for strings containing demangled result generated so far.
+     Text is emitted to the topmost (first) string.  */
+  string_list_t result;
+
+  /* The number of presently available substitutions.  */
+  int num_substitutions;
+
+  /* The allocated size of the substitutions array.  */
+  int substitutions_allocated;
+
+  /* An array of available substitutions.  The number of elements in
+     the array is given by num_substitions, and the allocated array
+     size in substitutions_size.  
+
+     The most recent substition is at the end, so
+
+       - `S_'  corresponds to substititutions[num_substitutions - 1] 
+       - `S0_' corresponds to substititutions[num_substitutions - 2]
+
+     etc. */
+  struct substitution_def *substitutions;
+
+  /* The stack of template argument lists.  */
+  template_arg_list_t template_arg_lists;
+
+  /* The most recently demangled source-name.  */
+  dyn_string_t last_source_name;
+  
+  /* Language style to use for demangled output. */
+  int style;
+
+  /* Set to non-zero iff this name is a constructor.  The actual value
+     indicates what sort of constructor this is; see demangle.h.  */
+  enum gnu_v3_ctor_kinds is_constructor;
+
+  /* Set to non-zero iff this name is a destructor.  The actual value
+     indicates what sort of destructor this is; see demangle.h.  */
+  enum gnu_v3_dtor_kinds is_destructor;
+
+};
+
+typedef struct demangling_def *demangling_t;
+
+/* This type is the standard return code from most functions.  Values
+   other than STATUS_OK contain descriptive messages.  */
+typedef const char *status_t;
+
+/* Special values that can be used as a status_t.  */
+#define STATUS_OK                       NULL
+#define STATUS_ERROR                    "Error."
+#define STATUS_UNIMPLEMENTED            "Unimplemented."
+#define STATUS_INTERNAL_ERROR           "Internal error."
+
+/* This status code indicates a failure in malloc or realloc.  */
+static const char *const status_allocation_failed = "Allocation failed.";
+#define STATUS_ALLOCATION_FAILED        status_allocation_failed
+
+/* Non-zero if STATUS indicates that no error has occurred.  */
+#define STATUS_NO_ERROR(STATUS)         ((STATUS) == STATUS_OK)
+
+/* Evaluate EXPR, which must produce a status_t.  If the status code
+   indicates an error, return from the current function with that
+   status code.  */
+#define RETURN_IF_ERROR(EXPR)                                           \
+  do                                                                    \
+    {                                                                   \
+      status_t s = EXPR;                                                \
+      if (!STATUS_NO_ERROR (s))                                         \
+	return s;                                                       \
+    }                                                                   \
+  while (0)
+
+static status_t int_to_dyn_string 
+  PARAMS ((int, dyn_string_t));
+static string_list_t string_list_new
+  PARAMS ((int));
+static void string_list_delete
+  PARAMS ((string_list_t));
+static status_t result_add_separated_char
+  PARAMS ((demangling_t, int));
+static status_t result_push
+  PARAMS ((demangling_t));
+static string_list_t result_pop
+  PARAMS ((demangling_t));
+static int substitution_start
+  PARAMS ((demangling_t));
+static status_t substitution_add
+  PARAMS ((demangling_t, int, int));
+static dyn_string_t substitution_get
+  PARAMS ((demangling_t, int, int *));
+#ifdef CP_DEMANGLE_DEBUG
+static void substitutions_print 
+  PARAMS ((demangling_t, FILE *));
+#endif
+static template_arg_list_t template_arg_list_new
+  PARAMS ((void));
+static void template_arg_list_delete
+  PARAMS ((template_arg_list_t));
+static void template_arg_list_add_arg 
+  PARAMS ((template_arg_list_t, string_list_t));
+static string_list_t template_arg_list_get_arg
+  PARAMS ((template_arg_list_t, int));
+static void push_template_arg_list
+  PARAMS ((demangling_t, template_arg_list_t));
+static void pop_to_template_arg_list
+  PARAMS ((demangling_t, template_arg_list_t));
+#ifdef CP_DEMANGLE_DEBUG
+static void template_arg_list_print
+  PARAMS ((template_arg_list_t, FILE *));
+#endif
+static template_arg_list_t current_template_arg_list
+  PARAMS ((demangling_t));
+static demangling_t demangling_new
+  PARAMS ((const char *, int));
+static void demangling_delete 
+  PARAMS ((demangling_t));
+
+/* The last character of DS.  Warning: DS is evaluated twice.  */
+#define dyn_string_last_char(DS)                                        \
+  (dyn_string_buf (DS)[dyn_string_length (DS) - 1])
+
+/* Append a space character (` ') to DS if it does not already end
+   with one.  Evaluates to 1 on success, or 0 on allocation failure.  */
+#define dyn_string_append_space(DS)                                     \
+      ((dyn_string_length (DS) > 0                                      \
+        && dyn_string_last_char (DS) != ' ')                            \
+       ? dyn_string_append_char ((DS), ' ')                             \
+       : 1)
+
+/* Returns the index of the current position in the mangled name.  */
+#define current_position(DM)    ((DM)->next - (DM)->name)
+
+/* Returns the character at the current position of the mangled name.  */
+#define peek_char(DM)           (*((DM)->next))
+
+/* Returns the character one past the current position of the mangled
+   name.  */
+#define peek_char_next(DM)                                              \
+  (peek_char (DM) == '\0' ? '\0' : (*((DM)->next + 1)))
+
+/* Returns the character at the current position, and advances the
+   current position to the next character.  */
+#define next_char(DM)           (*((DM)->next)++)
+
+/* Returns non-zero if the current position is the end of the mangled
+   name, i.e. one past the last character.  */
+#define end_of_name_p(DM)       (peek_char (DM) == '\0')
+
+/* Advances the current position by one character.  */
+#define advance_char(DM)        (++(DM)->next)
+
+/* Returns the string containing the current demangled result.  */
+#define result_string(DM)       (&(DM)->result->string)
+
+/* Returns the position at which new text is inserted into the
+   demangled result.  */
+#define result_caret_pos(DM)                                            \
+  (result_length (DM) +                                                 \
+   ((string_list_t) result_string (DM))->caret_position)
+
+/* Adds a dyn_string_t to the demangled result.  */
+#define result_add_string(DM, STRING)                                   \
+  (dyn_string_insert (&(DM)->result->string,                            \
+		      result_caret_pos (DM), (STRING))                  \
+   ? STATUS_OK : STATUS_ALLOCATION_FAILED)
+
+/* Adds NUL-terminated string CSTR to the demangled result.    */
+#define result_add(DM, CSTR)                                            \
+  (dyn_string_insert_cstr (&(DM)->result->string,                       \
+			   result_caret_pos (DM), (CSTR))               \
+   ? STATUS_OK : STATUS_ALLOCATION_FAILED)
+
+/* Adds character CHAR to the demangled result.  */
+#define result_add_char(DM, CHAR)                                       \
+  (dyn_string_insert_char (&(DM)->result->string,                       \
+			   result_caret_pos (DM), (CHAR))               \
+   ? STATUS_OK : STATUS_ALLOCATION_FAILED)
+
+/* Inserts a dyn_string_t to the demangled result at position POS.  */
+#define result_insert_string(DM, POS, STRING)                           \
+  (dyn_string_insert (&(DM)->result->string, (POS), (STRING))           \
+   ? STATUS_OK : STATUS_ALLOCATION_FAILED)
+
+/* Inserts NUL-terminated string CSTR to the demangled result at
+   position POS.  */
+#define result_insert(DM, POS, CSTR)                                    \
+  (dyn_string_insert_cstr (&(DM)->result->string, (POS), (CSTR))        \
+   ? STATUS_OK : STATUS_ALLOCATION_FAILED)
+
+/* Inserts character CHAR to the demangled result at position POS.  */
+#define result_insert_char(DM, POS, CHAR)                               \
+  (dyn_string_insert_char (&(DM)->result->string, (POS), (CHAR))        \
+   ? STATUS_OK : STATUS_ALLOCATION_FAILED)
+
+/* The length of the current demangled result.  */
+#define result_length(DM)                                               \
+  dyn_string_length (&(DM)->result->string)
+
+/* Appends a (less-than, greater-than) character to the result in DM
+   to (open, close) a template argument or parameter list.  Appends a
+   space first if necessary to prevent spurious elision of angle
+   brackets with the previous character.  */
+#define result_open_template_list(DM) result_add_separated_char(DM, '<')
+#define result_close_template_list(DM) result_add_separated_char(DM, '>')
+
+/* Appends a base 10 representation of VALUE to DS.  STATUS_OK on
+   success.  On failure, deletes DS and returns an error code.  */
+
+static status_t
+int_to_dyn_string (value, ds)
+     int value;
+     dyn_string_t ds;
+{
+  int i;
+  int mask = 1;
+
+  /* Handle zero up front.  */
+  if (value == 0)
+    {
+      if (!dyn_string_append_char (ds, '0'))
+	return STATUS_ALLOCATION_FAILED;
+      return STATUS_OK;
+    }
+
+  /* For negative numbers, emit a minus sign.  */
+  if (value < 0)
+    {
+      if (!dyn_string_append_char (ds, '-'))
+	return STATUS_ALLOCATION_FAILED;
+      value = -value;
+    }
+  
+  /* Find the power of 10 of the first digit.  */
+  i = value;
+  while (i > 9)
+    {
+      mask *= 10;
+      i /= 10;
+    }
+
+  /* Write the digits.  */
+  while (mask > 0)
+    {
+      int digit = value / mask;
+
+      if (!dyn_string_append_char (ds, '0' + digit))
+	return STATUS_ALLOCATION_FAILED;
+
+      value -= digit * mask;
+      mask /= 10;
+    }
+
+  return STATUS_OK;
+}
+
+/* Creates a new string list node.  The contents of the string are
+   empty, but the initial buffer allocation is LENGTH.  The string
+   list node should be deleted with string_list_delete.  Returns NULL
+   if allocation fails.  */
+
+static string_list_t 
+string_list_new (length)
+     int length;
+{
+  string_list_t s = (string_list_t) malloc (sizeof (struct string_list_def));
+  s->caret_position = 0;
+  if (s == NULL)
+    return NULL;
+  if (!dyn_string_init ((dyn_string_t) s, length))
+    return NULL;
+  return s;
+}  
+
+/* Deletes the entire string list starting at NODE.  */
+
+static void
+string_list_delete (node)
+     string_list_t node;
+{
+  while (node != NULL)
+    {
+      string_list_t next = node->next;
+      dyn_string_delete ((dyn_string_t) node);
+      node = next;
+    }
+}
+
+/* Appends CHARACTER to the demangled result.  If the current trailing
+   character of the result is CHARACTER, a space is inserted first.  */
+
+static status_t
+result_add_separated_char (dm, character)
+     demangling_t dm;
+     int character;
+{
+  char *result = dyn_string_buf (result_string (dm));
+  int caret_pos = result_caret_pos (dm);
+
+  /* Add a space if the last character is already the character we
+     want to add.  */
+  if (caret_pos > 0 && result[caret_pos - 1] == character)
+    RETURN_IF_ERROR (result_add_char (dm, ' '));
+  /* Add the character.  */
+  RETURN_IF_ERROR (result_add_char (dm, character));
+
+  return STATUS_OK;
+}
+
+/* Allocates and pushes a new string onto the demangled results stack
+   for DM.  Subsequent demangling with DM will emit to the new string.
+   Returns STATUS_OK on success, STATUS_ALLOCATION_FAILED on
+   allocation failure.  */
+
+static status_t
+result_push (dm)
+     demangling_t dm;
+{
+  string_list_t new_string = string_list_new (0);
+  if (new_string == NULL)
+    /* Allocation failed.  */
+    return STATUS_ALLOCATION_FAILED;
+
+  /* Link the new string to the front of the list of result strings.  */
+  new_string->next = (string_list_t) dm->result;
+  dm->result = new_string;
+  return STATUS_OK;
+}
+
+/* Removes and returns the topmost element on the demangled results
+   stack for DM.  The caller assumes ownership for the returned
+   string.  */
+
+static string_list_t
+result_pop (dm)
+     demangling_t dm;
+{
+  string_list_t top = dm->result;
+  dm->result = top->next;
+  return top;
+}
+
+/* Returns the current value of the caret for the result string.  The
+   value is an offet from the end of the result string.  */
+
+static int
+result_get_caret (dm)
+     demangling_t dm;
+{
+  return ((string_list_t) result_string (dm))->caret_position;
+}
+
+/* Sets the value of the caret for the result string, counted as an
+   offet from the end of the result string.  */
+
+static void
+result_set_caret (dm, position)
+     demangling_t dm;
+     int position;
+{
+  ((string_list_t) result_string (dm))->caret_position = position;
+}
+
+/* Shifts the position of the next addition to the result by
+   POSITION_OFFSET.  A negative value shifts the caret to the left.  */
+
+static void
+result_shift_caret (dm, position_offset)
+     demangling_t dm;
+     int position_offset;
+{
+  ((string_list_t) result_string (dm))->caret_position += position_offset;
+}
+
+/* Returns non-zero if the character that comes right before the place
+   where text will be added to the result is a space.  In this case,
+   the caller should supress adding another space.  */
+
+static int
+result_previous_char_is_space (dm)
+     demangling_t dm;
+{
+  char *result = dyn_string_buf (result_string (dm));
+  int pos = result_caret_pos (dm);
+  return pos > 0 && result[pos - 1] == ' ';
+}
+
+/* Returns the start position of a fragment of the demangled result
+   that will be a substitution candidate.  Should be called at the
+   start of productions that can add substitutions.  */
+
+static int
+substitution_start (dm)
+     demangling_t dm;
+{
+  return result_caret_pos (dm);
+}
+
+/* Adds the suffix of the current demangled result of DM starting at
+   START_POSITION as a potential substitution.  If TEMPLATE_P is
+   non-zero, this potential substitution is a template-id.  */
+
+static status_t
+substitution_add (dm, start_position, template_p)
+     demangling_t dm;
+     int start_position;
+     int template_p;
+{
+  dyn_string_t result = result_string (dm);
+  dyn_string_t substitution = dyn_string_new (0);
+  int i;
+
+  if (substitution == NULL)
+    return STATUS_ALLOCATION_FAILED;
+
+  /* Extract the substring of the current demangling result that
+     represents the subsitution candidate.  */
+  if (!dyn_string_substring (substitution, 
+			     result, start_position, result_caret_pos (dm)))
+    {
+      dyn_string_delete (substitution);
+      return STATUS_ALLOCATION_FAILED;
+    }
+
+  /* If there's no room for the new entry, grow the array.  */
+  if (dm->substitutions_allocated == dm->num_substitutions)
+    {
+      size_t new_array_size;
+      if (dm->substitutions_allocated > 0)
+	dm->substitutions_allocated *= 2;
+      else
+	dm->substitutions_allocated = 2;
+      new_array_size = 
+	sizeof (struct substitution_def) * dm->substitutions_allocated;
+
+      dm->substitutions = (struct substitution_def *)
+	realloc (dm->substitutions, new_array_size);
+      if (dm->substitutions == NULL)
+	/* Realloc failed.  */
+	{
+	  dyn_string_delete (substitution);
+	  return STATUS_ALLOCATION_FAILED;
+	}
+    }
+
+  /* Add the substitution to the array.  */
+  i = dm->num_substitutions++;
+  dm->substitutions[i].text = substitution;
+  dm->substitutions[i].template_p = template_p;
+
+#ifdef CP_DEMANGLE_DEBUG
+  substitutions_print (dm, stderr);
+#endif
+
+  return STATUS_OK;
+}
+
+/* Returns the Nth-most-recent substitution.  Sets *TEMPLATE_P to
+   non-zero if the substitution is a template-id, zero otherwise.  
+   N is numbered from zero.  DM retains ownership of the returned
+   string.  If N is negative, or equal to or greater than the current
+   number of substitution candidates, returns NULL.  */
+
+static dyn_string_t
+substitution_get (dm, n, template_p)
+     demangling_t dm;
+     int n;
+     int *template_p;
+{
+  struct substitution_def *sub;
+
+  /* Make sure N is in the valid range.  */
+  if (n < 0 || n >= dm->num_substitutions)
+    return NULL;
+
+  sub = &(dm->substitutions[n]);
+  *template_p = sub->template_p;
+  return sub->text;
+}
+
+#ifdef CP_DEMANGLE_DEBUG
+/* Debugging routine to print the current substitutions to FP.  */
+
+static void
+substitutions_print (dm, fp)
+     demangling_t dm;
+     FILE *fp;
+{
+  int seq_id;
+  int num = dm->num_substitutions;
+
+  fprintf (fp, "SUBSTITUTIONS:\n");
+  for (seq_id = -1; seq_id < num - 1; ++seq_id)
+    {
+      int template_p;
+      dyn_string_t text = substitution_get (dm, seq_id + 1, &template_p);
+
+      if (seq_id == -1)
+	fprintf (fp, " S_ ");
+      else
+	fprintf (fp, " S%d_", seq_id);
+      fprintf (fp, " %c: %s\n", template_p ? '*' : ' ', dyn_string_buf (text));
+    }
+}
+
+#endif /* CP_DEMANGLE_DEBUG */
+
+/* Creates a new template argument list.  Returns NULL if allocation
+   fails.  */
+
+static template_arg_list_t
+template_arg_list_new ()
+{
+  template_arg_list_t new_list =
+    (template_arg_list_t) malloc (sizeof (struct template_arg_list_def));
+  if (new_list == NULL)
+    return NULL;
+  /* Initialize the new list to have no arguments.  */
+  new_list->first_argument = NULL;
+  new_list->last_argument = NULL;
+  /* Return the new list.  */
+  return new_list;
+}
+
+/* Deletes a template argument list and the template arguments it
+   contains.  */
+
+static void
+template_arg_list_delete (list)
+     template_arg_list_t list;
+{
+  /* If there are any arguments on LIST, delete them.  */
+  if (list->first_argument != NULL)
+    string_list_delete (list->first_argument);
+  /* Delete LIST.  */
+  free (list);
+}
+
+/* Adds ARG to the template argument list ARG_LIST.  */
+
+static void 
+template_arg_list_add_arg (arg_list, arg)
+     template_arg_list_t arg_list;
+     string_list_t arg;
+{
+  if (arg_list->first_argument == NULL)
+    /* If there were no arguments before, ARG is the first one.  */
+    arg_list->first_argument = arg;
+  else
+    /* Make ARG the last argument on the list.  */
+    arg_list->last_argument->next = arg;
+  /* Make ARG the last on the list.  */
+  arg_list->last_argument = arg;
+  arg->next = NULL;
+}
+
+/* Returns the template arugment at position INDEX in template
+   argument list ARG_LIST.  */
+
+static string_list_t
+template_arg_list_get_arg (arg_list, index)
+     template_arg_list_t arg_list;
+     int index;
+{
+  string_list_t arg = arg_list->first_argument;
+  /* Scan down the list of arguments to find the one at position
+     INDEX.  */
+  while (index--)
+    {
+      arg = arg->next;
+      if (arg == NULL)
+	/* Ran out of arguments before INDEX hit zero.  That's an
+	   error.  */
+	return NULL;
+    }
+  /* Return the argument at position INDEX.  */
+  return arg;
+}
+
+/* Pushes ARG_LIST onto the top of the template argument list stack.  */
+
+static void
+push_template_arg_list (dm, arg_list)
+     demangling_t dm;
+     template_arg_list_t arg_list;
+{
+  arg_list->next = dm->template_arg_lists;
+  dm->template_arg_lists = arg_list;
+#ifdef CP_DEMANGLE_DEBUG
+  fprintf (stderr, " ** pushing template arg list\n");
+  template_arg_list_print (arg_list, stderr);
+#endif 
+}
+
+/* Pops and deletes elements on the template argument list stack until
+   arg_list is the topmost element.  If arg_list is NULL, all elements
+   are popped and deleted.  */
+
+static void
+pop_to_template_arg_list (dm, arg_list)
+     demangling_t dm;
+     template_arg_list_t arg_list;
+{
+  while (dm->template_arg_lists != arg_list)
+    {
+      template_arg_list_t top = dm->template_arg_lists;
+      /* Disconnect the topmost element from the list.  */
+      dm->template_arg_lists = top->next;
+      /* Delete the popped element.  */
+      template_arg_list_delete (top);
+#ifdef CP_DEMANGLE_DEBUG
+      fprintf (stderr, " ** removing template arg list\n");
+#endif
+    }
+}
+
+#ifdef CP_DEMANGLE_DEBUG
+
+/* Prints the contents of ARG_LIST to FP.  */
+
+static void
+template_arg_list_print (arg_list, fp)
+  template_arg_list_t arg_list;
+  FILE *fp;
+{
+  string_list_t arg;
+  int index = -1;
+
+  fprintf (fp, "TEMPLATE ARGUMENT LIST:\n");
+  for (arg = arg_list->first_argument; arg != NULL; arg = arg->next)
+    {
+      if (index == -1)
+	fprintf (fp, " T_  : ");
+      else
+	fprintf (fp, " T%d_ : ", index);
+      ++index;
+      fprintf (fp, "%s\n", dyn_string_buf ((dyn_string_t) arg));
+    }
+}
+
+#endif /* CP_DEMANGLE_DEBUG */
+
+/* Returns the topmost element on the stack of template argument
+   lists.  If there is no list of template arguments, returns NULL.  */
+
+static template_arg_list_t
+current_template_arg_list (dm)
+     demangling_t dm;
+{
+  return dm->template_arg_lists;
+}
+
+/* Allocates a demangling_t object for demangling mangled NAME.  A new
+   result must be pushed before the returned object can be used.
+   Returns NULL if allocation fails.  */
+
+static demangling_t
+demangling_new (name, style)
+     const char *name;
+     int style;
+{
+  demangling_t dm;
+  dm = (demangling_t) malloc (sizeof (struct demangling_def));
+  if (dm == NULL)
+    return NULL;
+
+  dm->name = name;
+  dm->next = name;
+  dm->result = NULL;
+  dm->num_substitutions = 0;
+  dm->substitutions_allocated = 10;
+  dm->template_arg_lists = NULL;
+  dm->last_source_name = dyn_string_new (0);
+  if (dm->last_source_name == NULL)
+    return NULL;
+  dm->substitutions = (struct substitution_def *)
+    malloc (dm->substitutions_allocated * sizeof (struct substitution_def));
+  if (dm->substitutions == NULL)
+    {
+      dyn_string_delete (dm->last_source_name);
+      return NULL;
+    }
+  dm->style = style;
+  dm->is_constructor = 0;
+  dm->is_destructor = 0;
+
+  return dm;
+}
+
+/* Deallocates a demangling_t object and all memory associated with
+   it.  */
+
+static void
+demangling_delete (dm)
+     demangling_t dm;
+{
+  int i;
+  template_arg_list_t arg_list = dm->template_arg_lists;
+
+  /* Delete the stack of template argument lists.  */
+  while (arg_list != NULL)
+    {
+      template_arg_list_t next = arg_list->next;
+      template_arg_list_delete (arg_list);
+      arg_list = next;
+    }
+  /* Delete the list of substitutions.  */
+  for (i = dm->num_substitutions; --i >= 0; )
+    dyn_string_delete (dm->substitutions[i].text);
+  free (dm->substitutions);
+  /* Delete the demangled result.  */
+  string_list_delete (dm->result);
+  /* Delete the stored identifier name.  */
+  dyn_string_delete (dm->last_source_name);
+  /* Delete the context object itself.  */
+  free (dm);
+}
+
+/* These functions demangle an alternative of the corresponding
+   production in the mangling spec.  The first argument of each is a
+   demangling context structure for the current demangling
+   operation.  Most emit demangled text directly to the topmost result
+   string on the result string stack in the demangling context
+   structure.  */
+
+static status_t demangle_char
+  PARAMS ((demangling_t, int));
+static status_t demangle_mangled_name 
+  PARAMS ((demangling_t));
+static status_t demangle_encoding
+  PARAMS ((demangling_t));
+static status_t demangle_name
+  PARAMS ((demangling_t, int *));
+static status_t demangle_nested_name
+  PARAMS ((demangling_t, int *));
+static status_t demangle_prefix_v3
+  PARAMS ((demangling_t, int *));
+static status_t demangle_unqualified_name
+  PARAMS ((demangling_t, int *));
+static status_t demangle_source_name
+  PARAMS ((demangling_t));
+static status_t demangle_number
+  PARAMS ((demangling_t, int *, int, int));
+static status_t demangle_number_literally
+  PARAMS ((demangling_t, dyn_string_t, int, int));
+static status_t demangle_identifier
+  PARAMS ((demangling_t, int, dyn_string_t));
+static status_t demangle_operator_name
+  PARAMS ((demangling_t, int, int *));
+static status_t demangle_nv_offset
+  PARAMS ((demangling_t));
+static status_t demangle_v_offset
+  PARAMS ((demangling_t));
+static status_t demangle_call_offset
+  PARAMS ((demangling_t));
+static status_t demangle_special_name
+  PARAMS ((demangling_t));
+static status_t demangle_ctor_dtor_name
+  PARAMS ((demangling_t));
+static status_t demangle_type_ptr
+  PARAMS ((demangling_t, int *, int));
+static status_t demangle_type
+  PARAMS ((demangling_t));
+static status_t demangle_CV_qualifiers
+  PARAMS ((demangling_t, dyn_string_t));
+static status_t demangle_builtin_type
+  PARAMS ((demangling_t));
+static status_t demangle_function_type
+  PARAMS ((demangling_t, int *));
+static status_t demangle_bare_function_type
+  PARAMS ((demangling_t, int *));
+static status_t demangle_class_enum_type
+  PARAMS ((demangling_t, int *));
+static status_t demangle_array_type
+  PARAMS ((demangling_t, int *));
+static status_t demangle_template_param
+  PARAMS ((demangling_t));
+static status_t demangle_template_args_1
+  PARAMS ((demangling_t, template_arg_list_t));
+static status_t demangle_template_args
+  PARAMS ((demangling_t));
+static status_t demangle_literal
+  PARAMS ((demangling_t));
+static status_t demangle_template_arg
+  PARAMS ((demangling_t));
+static status_t demangle_expression_v3
+  PARAMS ((demangling_t));
+static status_t demangle_scope_expression
+  PARAMS ((demangling_t));
+static status_t demangle_expr_primary
+  PARAMS ((demangling_t));
+static status_t demangle_substitution
+  PARAMS ((demangling_t, int *));
+static status_t demangle_local_name
+  PARAMS ((demangling_t));
+static status_t demangle_discriminator 
+  PARAMS ((demangling_t, int));
+static status_t cp_demangle
+  PARAMS ((const char *, dyn_string_t, int));
+#ifdef IN_LIBGCC2
+static status_t cp_demangle_type
+  PARAMS ((const char*, dyn_string_t));
+#endif
+
+/* When passed to demangle_bare_function_type, indicates that the
+   function's return type is not encoded before its parameter types.  */
+#define BFT_NO_RETURN_TYPE    NULL
+
+/* Check that the next character is C.  If so, consume it.  If not,
+   return an error.  */
+
+static status_t
+demangle_char (dm, c)
+     demangling_t dm;
+     int c;
+{
+  static char *error_message = NULL;
+
+  if (peek_char (dm) == c)
+    {
+      advance_char (dm);
+      return STATUS_OK;
+    }
+  else
+    {
+	vg_assert (0);
+	/*
+      if (error_message == NULL)
+	error_message = strdup ("Expected ?");
+      error_message[9] = c;
+      return error_message;
+      */
+    }
+}
+
+/* Demangles and emits a <mangled-name>.  
+
+    <mangled-name>      ::= _Z <encoding>  */
+
+static status_t
+demangle_mangled_name (dm)
+     demangling_t dm;
+{
+  DEMANGLE_TRACE ("mangled-name", dm);
+  RETURN_IF_ERROR (demangle_char (dm, '_'));
+  RETURN_IF_ERROR (demangle_char (dm, 'Z'));
+  RETURN_IF_ERROR (demangle_encoding (dm));
+  return STATUS_OK;
+}
+
+/* Demangles and emits an <encoding>.  
+
+    <encoding>		::= <function name> <bare-function-type>
+			::= <data name>
+			::= <special-name>  */
+
+static status_t
+demangle_encoding (dm)
+     demangling_t dm;
+{
+  int encode_return_type;
+  int start_position;
+  template_arg_list_t old_arg_list = current_template_arg_list (dm);
+  char peek = peek_char (dm);
+
+  DEMANGLE_TRACE ("encoding", dm);
+  
+  /* Remember where the name starts.  If it turns out to be a template
+     function, we'll have to insert the return type here.  */
+  start_position = result_caret_pos (dm);
+
+  if (peek == 'G' || peek == 'T')
+    RETURN_IF_ERROR (demangle_special_name (dm));
+  else
+    {
+      /* Now demangle the name.  */
+      RETURN_IF_ERROR (demangle_name (dm, &encode_return_type));
+
+      /* If there's anything left, the name was a function name, with
+	 maybe its return type, and its parameter types, following.  */
+      if (!end_of_name_p (dm) 
+	  && peek_char (dm) != 'E')
+	{
+	  if (encode_return_type)
+	    /* Template functions have their return type encoded.  The
+	       return type should be inserted at start_position.  */
+	    RETURN_IF_ERROR 
+	      (demangle_bare_function_type (dm, &start_position));
+	  else
+	    /* Non-template functions don't have their return type
+	       encoded.  */
+	    RETURN_IF_ERROR 
+	      (demangle_bare_function_type (dm, BFT_NO_RETURN_TYPE)); 
+	}
+    }
+
+  /* Pop off template argument lists that were built during the
+     mangling of this name, to restore the old template context.  */
+  pop_to_template_arg_list (dm, old_arg_list);
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <name>.
+
+    <name>              ::= <unscoped-name>
+                        ::= <unscoped-template-name> <template-args>
+			::= <nested-name>
+                        ::= <local-name>
+
+    <unscoped-name>     ::= <unqualified-name>
+			::= St <unqualified-name>   # ::std::
+
+    <unscoped-template-name>    
+                        ::= <unscoped-name>
+                        ::= <substitution>  */
+
+static status_t
+demangle_name (dm, encode_return_type)
+     demangling_t dm;
+     int *encode_return_type;
+{
+  int start = substitution_start (dm);
+  char peek = peek_char (dm);
+  int is_std_substitution = 0;
+
+  /* Generally, the return type is encoded if the function is a
+     template-id, and suppressed otherwise.  There are a few cases,
+     though, in which the return type is not encoded even for a
+     templated function.  In these cases, this flag is set.  */
+  int suppress_return_type = 0;
+
+  DEMANGLE_TRACE ("name", dm);
+
+  switch (peek)
+    {
+    case 'N':
+      /* This is a <nested-name>.  */
+      RETURN_IF_ERROR (demangle_nested_name (dm, encode_return_type));
+      break;
+
+    case 'Z':
+      RETURN_IF_ERROR (demangle_local_name (dm));
+      *encode_return_type = 0;
+      break;
+
+    case 'S':
+      /* The `St' substitution allows a name nested in std:: to appear
+	 without being enclosed in a nested name.  */
+      if (peek_char_next (dm) == 't') 
+	{
+	  (void) next_char (dm);
+	  (void) next_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "std::"));
+	  RETURN_IF_ERROR 
+	    (demangle_unqualified_name (dm, &suppress_return_type));
+	  is_std_substitution = 1;
+	}
+      else
+	RETURN_IF_ERROR (demangle_substitution (dm, encode_return_type));
+      /* Check if a template argument list immediately follows.
+	 If so, then we just demangled an <unqualified-template-name>.  */
+      if (peek_char (dm) == 'I') 
+	{
+	  /* A template name of the form std::<unqualified-name> is a
+             substitution candidate.  */
+	  if (is_std_substitution)
+	    RETURN_IF_ERROR (substitution_add (dm, start, 0));
+	  /* Demangle the <template-args> here.  */
+	  RETURN_IF_ERROR (demangle_template_args (dm));
+	  *encode_return_type = !suppress_return_type;
+	}
+      else
+	*encode_return_type = 0;
+
+      break;
+
+    default:
+      /* This is an <unscoped-name> or <unscoped-template-name>.  */
+      RETURN_IF_ERROR (demangle_unqualified_name (dm, &suppress_return_type));
+
+      /* If the <unqualified-name> is followed by template args, this
+	 is an <unscoped-template-name>.  */
+      if (peek_char (dm) == 'I')
+	{
+	  /* Add a substitution for the unqualified template name.  */
+	  RETURN_IF_ERROR (substitution_add (dm, start, 0));
+
+	  RETURN_IF_ERROR (demangle_template_args (dm));
+	  *encode_return_type = !suppress_return_type;
+	}
+      else
+	*encode_return_type = 0;
+
+      break;
+    }
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <nested-name>. 
+
+    <nested-name>     ::= N [<CV-qualifiers>] <prefix> <unqulified-name> E  */
+
+static status_t
+demangle_nested_name (dm, encode_return_type)
+     demangling_t dm;
+     int *encode_return_type;
+{
+  char peek;
+
+  DEMANGLE_TRACE ("nested-name", dm);
+
+  RETURN_IF_ERROR (demangle_char (dm, 'N'));
+
+  peek = peek_char (dm);
+  if (peek == 'r' || peek == 'V' || peek == 'K')
+    {
+      dyn_string_t cv_qualifiers;
+      status_t status;
+
+      /* Snarf up CV qualifiers.  */
+      cv_qualifiers = dyn_string_new (24);
+      if (cv_qualifiers == NULL)
+	return STATUS_ALLOCATION_FAILED;
+      demangle_CV_qualifiers (dm, cv_qualifiers);
+
+      /* Emit them, preceded by a space.  */
+      status = result_add_char (dm, ' ');
+      if (STATUS_NO_ERROR (status)) 
+	status = result_add_string (dm, cv_qualifiers);
+      /* The CV qualifiers that occur in a <nested-name> will be
+	 qualifiers for member functions.  These are placed at the end
+	 of the function.  Therefore, shift the caret to the left by
+	 the length of the qualifiers, so other text is inserted
+	 before them and they stay at the end.  */
+      result_shift_caret (dm, -dyn_string_length (cv_qualifiers) - 1);
+      /* Clean up.  */
+      dyn_string_delete (cv_qualifiers);
+      RETURN_IF_ERROR (status);
+    }
+
+  RETURN_IF_ERROR (demangle_prefix_v3 (dm, encode_return_type));
+  /* No need to demangle the final <unqualified-name>; demangle_prefix
+     will handle it.  */
+  RETURN_IF_ERROR (demangle_char (dm, 'E'));
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <prefix>.
+
+    <prefix>            ::= <prefix> <unqualified-name>
+                        ::= <template-prefix> <template-args>
+			::= # empty
+			::= <substitution>
+
+    <template-prefix>   ::= <prefix>
+                        ::= <substitution>  */
+
+static status_t
+demangle_prefix_v3 (dm, encode_return_type)
+     demangling_t dm;
+     int *encode_return_type;
+{
+  int start = substitution_start (dm);
+  int nested = 0;
+
+  /* ENCODE_RETURN_TYPE is updated as we decend the nesting chain.
+     After <template-args>, it is set to non-zero; after everything
+     else it is set to zero.  */
+
+  /* Generally, the return type is encoded if the function is a
+     template-id, and suppressed otherwise.  There are a few cases,
+     though, in which the return type is not encoded even for a
+     templated function.  In these cases, this flag is set.  */
+  int suppress_return_type = 0;
+
+  DEMANGLE_TRACE ("prefix", dm);
+
+  while (1)
+    {
+      char peek;
+
+      if (end_of_name_p (dm))
+	return "Unexpected end of name in <compound-name>.";
+
+      peek = peek_char (dm);
+      
+      /* We'll initialize suppress_return_type to false, and set it to true
+	 if we end up demangling a constructor name.  However, make
+	 sure we're not actually about to demangle template arguments
+	 -- if so, this is the <template-args> following a
+	 <template-prefix>, so we'll want the previous flag value
+	 around.  */
+      if (peek != 'I')
+	suppress_return_type = 0;
+
+      if (IS_DIGIT ((unsigned char) peek)
+	  || (peek >= 'a' && peek <= 'z')
+	  || peek == 'C' || peek == 'D'
+	  || peek == 'S')
+	{
+	  /* We have another level of scope qualification.  */
+	  if (nested)
+	    RETURN_IF_ERROR (result_add (dm, NAMESPACE_SEPARATOR));
+	  else
+	    nested = 1;
+
+	  if (peek == 'S')
+	    /* The substitution determines whether this is a
+	       template-id.  */
+	    RETURN_IF_ERROR (demangle_substitution (dm, encode_return_type));
+	  else
+	    {
+	      /* It's just a name.  */
+	      RETURN_IF_ERROR 
+		(demangle_unqualified_name (dm, &suppress_return_type));
+	      *encode_return_type = 0;
+	    }
+	}
+      else if (peek == 'Z')
+	RETURN_IF_ERROR (demangle_local_name (dm));
+      else if (peek == 'I')
+	{
+	  RETURN_IF_ERROR (demangle_template_args (dm));
+
+	  /* Now we want to indicate to the caller that we've
+	     demangled template arguments, thus the prefix was a
+	     <template-prefix>.  That's so that the caller knows to
+	     demangle the function's return type, if this turns out to
+	     be a function name.  But, if it's a member template
+	     constructor or a templated conversion operator, report it
+	     as untemplated.  Those never get encoded return types.  */
+	  *encode_return_type = !suppress_return_type;
+	}
+      else if (peek == 'E')
+	/* All done.  */
+	return STATUS_OK;
+      else
+	return "Unexpected character in <compound-name>.";
+
+      if (peek != 'S'
+	  && peek_char (dm) != 'E')
+	/* Add a new substitution for the prefix thus far.  */
+	RETURN_IF_ERROR (substitution_add (dm, start, *encode_return_type));
+    }
+}
+
+/* Demangles and emits an <unqualified-name>.  If this
+   <unqualified-name> is for a special function type that should never
+   have its return type encoded (particularly, a constructor or
+   conversion operator), *SUPPRESS_RETURN_TYPE is set to 1; otherwise,
+   it is set to zero.
+
+    <unqualified-name>  ::= <operator-name>
+			::= <special-name>  
+			::= <source-name>  */
+
+static status_t
+demangle_unqualified_name (dm, suppress_return_type)
+     demangling_t dm;
+     int *suppress_return_type;
+{
+  char peek = peek_char (dm);
+
+  DEMANGLE_TRACE ("unqualified-name", dm);
+
+  /* By default, don't force suppression of the return type (though
+     non-template functions still don't get a return type encoded).  */ 
+  *suppress_return_type = 0;
+
+  if (IS_DIGIT ((unsigned char) peek))
+    RETURN_IF_ERROR (demangle_source_name (dm));
+  else if (peek >= 'a' && peek <= 'z')
+    {
+      int num_args;
+
+      /* Conversion operators never have a return type encoded.  */
+      if (peek == 'c' && peek_char_next (dm) == 'v')
+	*suppress_return_type = 1;
+
+      RETURN_IF_ERROR (demangle_operator_name (dm, 0, &num_args));
+    }
+  else if (peek == 'C' || peek == 'D')
+    {
+      /* Constructors never have a return type encoded.  */
+      if (peek == 'C')
+	*suppress_return_type = 1;
+
+      RETURN_IF_ERROR (demangle_ctor_dtor_name (dm));
+    }
+  else
+    return "Unexpected character in <unqualified-name>.";
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits <source-name>.  
+
+    <source-name> ::= <length number> <identifier>  */
+
+static status_t
+demangle_source_name (dm)
+     demangling_t dm;
+{
+  int length;
+
+  DEMANGLE_TRACE ("source-name", dm);
+
+  /* Decode the length of the identifier.  */
+  RETURN_IF_ERROR (demangle_number (dm, &length, 10, 0));
+  if (length == 0)
+    return "Zero length in <source-name>.";
+
+  /* Now the identifier itself.  It's placed into last_source_name,
+     where it can be used to build a constructor or destructor name.  */
+  RETURN_IF_ERROR (demangle_identifier (dm, length, 
+					dm->last_source_name));
+
+  /* Emit it.  */
+  RETURN_IF_ERROR (result_add_string (dm, dm->last_source_name));
+
+  return STATUS_OK;
+}
+
+/* Demangles a number, either a <number> or a <positive-number> at the
+   current position, consuming all consecutive digit characters.  Sets
+   *VALUE to the resulting numberand returns STATUS_OK.  The number is
+   interpreted as BASE, which must be either 10 or 36.  If IS_SIGNED
+   is non-zero, negative numbers -- prefixed with `n' -- are accepted.
+
+    <number> ::= [n] <positive-number>
+
+    <positive-number> ::= <decimal integer>  */
+
+static status_t
+demangle_number (dm, value, base, is_signed)
+     demangling_t dm;
+     int *value;
+     int base;
+     int is_signed;
+{
+  dyn_string_t number = dyn_string_new (10);
+
+  DEMANGLE_TRACE ("number", dm);
+
+  if (number == NULL)
+    return STATUS_ALLOCATION_FAILED;
+
+  demangle_number_literally (dm, number, base, is_signed);
+  /*
+  *value = strtol (dyn_string_buf (number), NULL, base);
+  */
+  /* vg_assert( base == 10 ); */
+  if ( base != 10 ) {
+     dyn_string_delete(number);
+     return STATUS_UNIMPLEMENTED;
+  }
+
+  *value = VG_(atoll) (dyn_string_buf (number));
+  dyn_string_delete (number);
+
+  return STATUS_OK;
+}
+
+/* Demangles a number at the current position.  The digits (and minus
+   sign, if present) that make up the number are appended to STR.
+   Only base-BASE digits are accepted; BASE must be either 10 or 36.
+   If IS_SIGNED, negative numbers -- prefixed with `n' -- are
+   accepted.  Does not consume a trailing underscore or other
+   terminating character.  */
+
+static status_t
+demangle_number_literally (dm, str, base, is_signed)
+     demangling_t dm;
+     dyn_string_t str;
+     int base;
+     int is_signed;
+{
+  DEMANGLE_TRACE ("number*", dm);
+
+  if (base != 10 && base != 36)
+    return STATUS_INTERNAL_ERROR;
+
+  /* An `n' denotes a negative number.  */
+  if (is_signed && peek_char (dm) == 'n')
+    {
+      /* Skip past the n.  */
+      advance_char (dm);
+      /* The normal way to write a negative number is with a minus
+	 sign.  */
+      if (!dyn_string_append_char (str, '-'))
+	return STATUS_ALLOCATION_FAILED;
+    }
+
+  /* Loop until we hit a non-digit.  */
+  while (1)
+    {
+      char peek = peek_char (dm);
+      if (IS_DIGIT ((unsigned char) peek)
+	  || (base == 36 && peek >= 'A' && peek <= 'Z'))
+	{
+	  /* Accumulate digits.  */
+	  if (!dyn_string_append_char (str, next_char (dm)))
+	    return STATUS_ALLOCATION_FAILED;
+	}
+      else
+	/* Not a digit?  All done.  */
+	break;
+    }
+
+  return STATUS_OK;
+}
+
+/* Demangles an identifier at the current position of LENGTH
+   characters and places it in IDENTIFIER.  */
+
+static status_t
+demangle_identifier (dm, length, identifier)
+     demangling_t dm;
+     int length;
+     dyn_string_t identifier;
+{
+  DEMANGLE_TRACE ("identifier", dm);
+
+  dyn_string_clear (identifier);
+  if (!dyn_string_resize (identifier, length))
+    return STATUS_ALLOCATION_FAILED;
+
+  while (length-- > 0)
+    {
+      if (end_of_name_p (dm))
+	return "Unexpected end of name in <identifier>.";
+      if (!dyn_string_append_char (identifier, next_char (dm)))
+	return STATUS_ALLOCATION_FAILED;
+    }
+
+  /* GCC encodes anonymous namespaces using a `_GLOBAL_[_.$]N.'
+     followed by the source file name and some random characters.
+     Unless we're in strict mode, decipher these names appropriately.  */
+  if (!flag_strict)
+    {
+      char *name = dyn_string_buf (identifier);
+      int prefix_length = VG_(strlen) (ANONYMOUS_NAMESPACE_PREFIX);
+
+      /* Compare the first, fixed part.  */
+      if (VG_(strncmp) (name, ANONYMOUS_NAMESPACE_PREFIX, prefix_length) == 0)
+        {
+	  name += prefix_length;
+	  /* The next character might be a period, an underscore, or
+	     dollar sign, depending on the target architecture's
+	     assembler's capabilities.  After that comes an `N'.  */
+	  if ((*name == '.' || *name == '_' || *name == '$')
+	      && *(name + 1) == 'N')
+	    /* This looks like the anonymous namespace identifier.
+	       Replace it with something comprehensible.  */
+	    dyn_string_copy_cstr (identifier, "(anonymous namespace)");
+	}
+    }
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits an <operator-name>.  If SHORT_NAME is non-zero,
+   the short form is emitted; otherwise the full source form
+   (`operator +' etc.) is emitted.  *NUM_ARGS is set to the number of
+   operands that the operator takes.  
+
+    <operator-name>
+                  ::= nw        # new           
+                  ::= na        # new[]
+                  ::= dl        # delete        
+                  ::= da        # delete[]      
+		  ::= ps        # + (unary)
+                  ::= ng        # - (unary)     
+                  ::= ad        # & (unary)     
+                  ::= de        # * (unary)     
+                  ::= co        # ~             
+                  ::= pl        # +             
+                  ::= mi        # -             
+                  ::= ml        # *             
+                  ::= dv        # /             
+                  ::= rm        # %             
+                  ::= an        # &             
+                  ::= or        # |             
+                  ::= eo        # ^             
+                  ::= aS        # =             
+                  ::= pL        # +=            
+                  ::= mI        # -=            
+                  ::= mL        # *=            
+                  ::= dV        # /=            
+                  ::= rM        # %=            
+                  ::= aN        # &=            
+                  ::= oR        # |=            
+                  ::= eO        # ^=            
+                  ::= ls        # <<            
+                  ::= rs        # >>            
+                  ::= lS        # <<=           
+                  ::= rS        # >>=           
+                  ::= eq        # ==            
+                  ::= ne        # !=            
+                  ::= lt        # <             
+                  ::= gt        # >             
+                  ::= le        # <=            
+                  ::= ge        # >=            
+                  ::= nt        # !             
+                  ::= aa        # &&            
+                  ::= oo        # ||            
+                  ::= pp        # ++            
+                  ::= mm        # --            
+                  ::= cm        # ,             
+                  ::= pm        # ->*           
+                  ::= pt        # ->            
+                  ::= cl        # ()            
+                  ::= ix        # []            
+                  ::= qu        # ?
+                  ::= sz        # sizeof 
+                  ::= cv <type> # cast        
+		  ::= v [0-9] <source-name>  # vendor extended operator  */
+
+static status_t
+demangle_operator_name (dm, short_name, num_args)
+     demangling_t dm;
+     int short_name;
+     int *num_args;
+{
+  struct operator_code
+  {
+    /* The mangled code for this operator.  */
+    const char *const code;
+    /* The source name of this operator.  */
+    const char *const name;
+    /* The number of arguments this operator takes.  */
+    const int num_args;
+  };
+
+  static const struct operator_code operators[] = 
+  {
+    { "aN", "&="       , 2 },
+    { "aS", "="        , 2 },
+    { "aa", "&&"       , 2 },
+    { "ad", "&"        , 1 },
+    { "an", "&"        , 2 },
+    { "cl", "()"       , 0 },
+    { "cm", ","        , 2 },
+    { "co", "~"        , 1 },
+    { "dV", "/="       , 2 },
+    { "da", " delete[]", 1 },
+    { "de", "*"        , 1 },
+    { "dl", " delete"  , 1 },
+    { "dv", "/"        , 2 },
+    { "eO", "^="       , 2 },
+    { "eo", "^"        , 2 },
+    { "eq", "=="       , 2 },
+    { "ge", ">="       , 2 },
+    { "gt", ">"        , 2 },
+    { "ix", "[]"       , 2 },
+    { "lS", "<<="      , 2 },
+    { "le", "<="       , 2 },
+    { "ls", "<<"       , 2 },
+    { "lt", "<"        , 2 },
+    { "mI", "-="       , 2 },
+    { "mL", "*="       , 2 },
+    { "mi", "-"        , 2 },
+    { "ml", "*"        , 2 },
+    { "mm", "--"       , 1 },
+    { "na", " new[]"   , 1 },
+    { "ne", "!="       , 2 },
+    { "ng", "-"        , 1 },
+    { "nt", "!"        , 1 },
+    { "nw", " new"     , 1 },
+    { "oR", "|="       , 2 },
+    { "oo", "||"       , 2 },
+    { "or", "|"        , 2 },
+    { "pL", "+="       , 2 },
+    { "pl", "+"        , 2 },
+    { "pm", "->*"      , 2 },
+    { "pp", "++"       , 1 },
+    { "ps", "+"        , 1 },
+    { "pt", "->"       , 2 },
+    { "qu", "?"        , 3 },
+    { "rM", "%="       , 2 },
+    { "rS", ">>="      , 2 },
+    { "rm", "%"        , 2 },
+    { "rs", ">>"       , 2 },
+    { "sz", " sizeof"  , 1 }
+  };
+
+  const int num_operators = 
+    sizeof (operators) / sizeof (struct operator_code);
+
+  int c0 = next_char (dm);
+  int c1 = next_char (dm);
+  const struct operator_code* p1 = operators;
+  const struct operator_code* p2 = operators + num_operators;
+
+  DEMANGLE_TRACE ("operator-name", dm);
+
+  /* Is this a vendor-extended operator?  */
+  if (c0 == 'v' && IS_DIGIT (c1))
+    {
+      RETURN_IF_ERROR (result_add (dm, "operator "));
+      RETURN_IF_ERROR (demangle_source_name (dm));
+      *num_args = 0;
+      return STATUS_OK;
+    }
+
+  /* Is this a conversion operator?  */
+  if (c0 == 'c' && c1 == 'v')
+    {
+      RETURN_IF_ERROR (result_add (dm, "operator "));
+      /* Demangle the converted-to type.  */
+      RETURN_IF_ERROR (demangle_type (dm));
+      *num_args = 0;
+      return STATUS_OK;
+    }
+
+  /* Perform a binary search for the operator code.  */
+  while (1)
+    {
+      const struct operator_code* p = p1 + (p2 - p1) / 2;
+      char match0 = p->code[0];
+      char match1 = p->code[1];
+
+      if (c0 == match0 && c1 == match1)
+	/* Found it.  */
+	{
+	  if (!short_name)
+	    RETURN_IF_ERROR (result_add (dm, "operator"));
+	  RETURN_IF_ERROR (result_add (dm, p->name));
+	  *num_args = p->num_args;
+
+	  return STATUS_OK;
+	}
+
+      if (p == p1)
+	/* Couldn't find it.  */
+	return "Unknown code in <operator-name>.";
+
+      /* Try again.  */
+      if (c0 < match0 || (c0 == match0 && c1 < match1))
+	p2 = p;
+      else
+	p1 = p;
+    }
+}
+
+/* Demangles and omits an <nv-offset>.
+
+    <nv-offset> ::= <offset number>   # non-virtual base override  */
+
+static status_t
+demangle_nv_offset (dm)
+     demangling_t dm;
+{
+  dyn_string_t number;
+  status_t status = STATUS_OK;
+
+  DEMANGLE_TRACE ("h-offset", dm);
+
+  /* Demangle the offset.  */
+  number = dyn_string_new (4);
+  if (number == NULL)
+    return STATUS_ALLOCATION_FAILED;
+  demangle_number_literally (dm, number, 10, 1);
+
+  /* Don't display the offset unless in verbose mode.  */
+  if (flag_verbose)
+    {
+      status = result_add (dm, " [nv:");
+      if (STATUS_NO_ERROR (status))
+	status = result_add_string (dm, number);
+      if (STATUS_NO_ERROR (status))
+	status = result_add_char (dm, ']');
+    }
+
+  /* Clean up.  */
+  dyn_string_delete (number);
+  RETURN_IF_ERROR (status);
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <v-offset>. 
+
+    <v-offset>  ::= <offset number> _ <virtual offset number>
+			# virtual base override, with vcall offset  */
+
+static status_t
+demangle_v_offset (dm)
+     demangling_t dm;
+{
+  dyn_string_t number;
+  status_t status = STATUS_OK;
+
+  DEMANGLE_TRACE ("v-offset", dm);
+
+  /* Demangle the offset.  */
+  number = dyn_string_new (4);
+  if (number == NULL)
+    return STATUS_ALLOCATION_FAILED;
+  demangle_number_literally (dm, number, 10, 1);
+
+  /* Don't display the offset unless in verbose mode.  */
+  if (flag_verbose)
+    {
+      status = result_add (dm, " [v:");
+      if (STATUS_NO_ERROR (status))
+	status = result_add_string (dm, number);
+      if (STATUS_NO_ERROR (status))
+	result_add_char (dm, ',');
+    }
+  dyn_string_delete (number);
+  RETURN_IF_ERROR (status);
+
+  /* Demangle the separator.  */
+  RETURN_IF_ERROR (demangle_char (dm, '_'));
+
+  /* Demangle the vcall offset.  */
+  number = dyn_string_new (4);
+  if (number == NULL)
+    return STATUS_ALLOCATION_FAILED;
+  demangle_number_literally (dm, number, 10, 1);
+
+  /* Don't display the vcall offset unless in verbose mode.  */
+  if (flag_verbose)
+    {
+      status = result_add_string (dm, number);
+      if (STATUS_NO_ERROR (status))
+	status = result_add_char (dm, ']');
+    }
+  dyn_string_delete (number);
+  RETURN_IF_ERROR (status);
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <call-offset>.
+
+    <call-offset> ::= h <nv-offset> _
+		  ::= v <v-offset> _  */
+
+static status_t
+demangle_call_offset (dm)
+     demangling_t dm;
+{
+  DEMANGLE_TRACE ("call-offset", dm);
+
+  switch (peek_char (dm))
+    {
+    case 'h':
+      advance_char (dm);
+      /* Demangle the offset.  */
+      RETURN_IF_ERROR (demangle_nv_offset (dm));
+      /* Demangle the separator.  */
+      RETURN_IF_ERROR (demangle_char (dm, '_'));
+      break;
+
+    case 'v':
+      advance_char (dm);
+      /* Demangle the offset.  */
+      RETURN_IF_ERROR (demangle_v_offset (dm));
+      /* Demangle the separator.  */
+      RETURN_IF_ERROR (demangle_char (dm, '_'));
+      break;
+
+    default:
+      return "Unrecognized <call-offset>.";
+    }
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <special-name>.  
+
+    <special-name> ::= GV <object name>   # Guard variable
+                   ::= TV <type>          # virtual table
+                   ::= TT <type>          # VTT
+                   ::= TI <type>          # typeinfo structure
+		   ::= TS <type>          # typeinfo name  
+
+   Other relevant productions include thunks:
+
+    <special-name> ::= T <call-offset> <base encoding>
+ 			 # base is the nominal target function of thunk
+
+    <special-name> ::= Tc <call-offset> <call-offset> <base encoding>
+			 # base is the nominal target function of thunk
+			 # first call-offset is 'this' adjustment
+			 # second call-offset is result adjustment
+
+   where
+
+    <call-offset>  ::= h <nv-offset> _
+		   ::= v <v-offset> _
+
+   Also demangles the special g++ manglings,
+
+    <special-name> ::= TC <type> <offset number> _ <base type>
+                                          # construction vtable
+		   ::= TF <type>	  # typeinfo function (old ABI only)
+		   ::= TJ <type>	  # java Class structure  */
+
+static status_t
+demangle_special_name (dm)
+     demangling_t dm;
+{
+  dyn_string_t number;
+  int unused;
+  char peek = peek_char (dm);
+
+  DEMANGLE_TRACE ("special-name", dm);
+
+  if (peek == 'G')
+    {
+      /* Consume the G.  */
+      advance_char (dm);
+      switch (peek_char (dm))
+	{
+	case 'V':
+	  /* A guard variable name.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "guard variable for "));
+	  RETURN_IF_ERROR (demangle_name (dm, &unused));
+	  break;
+
+	case 'R':
+	  /* A reference temporary.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "reference temporary for "));
+	  RETURN_IF_ERROR (demangle_name (dm, &unused));
+	  break;
+	  
+	default:
+	  return "Unrecognized <special-name>.";
+	}
+    }
+  else if (peek == 'T')
+    {
+      status_t status = STATUS_OK;
+
+      /* Other C++ implementation miscellania.  Consume the T.  */
+      advance_char (dm);
+
+      switch (peek_char (dm))
+	{
+	case 'V':
+	  /* Virtual table.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "vtable for "));
+	  RETURN_IF_ERROR (demangle_type (dm));
+	  break;
+
+	case 'T':
+	  /* VTT structure.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "VTT for "));
+	  RETURN_IF_ERROR (demangle_type (dm));
+	  break;
+
+	case 'I':
+	  /* Typeinfo structure.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "typeinfo for "));
+	  RETURN_IF_ERROR (demangle_type (dm));
+	  break;
+
+	case 'F':
+	  /* Typeinfo function.  Used only in old ABI with new mangling.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "typeinfo fn for "));
+	  RETURN_IF_ERROR (demangle_type (dm));
+	  break;
+
+	case 'S':
+	  /* Character string containing type name, used in typeinfo. */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "typeinfo name for "));
+	  RETURN_IF_ERROR (demangle_type (dm));
+	  break;
+
+	case 'J':
+	  /* The java Class variable corresponding to a C++ class.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "java Class for "));
+	  RETURN_IF_ERROR (demangle_type (dm));
+	  break;
+
+	case 'h':
+	  /* Non-virtual thunk.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "non-virtual thunk"));
+	  RETURN_IF_ERROR (demangle_nv_offset (dm));
+	  /* Demangle the separator.  */
+	  RETURN_IF_ERROR (demangle_char (dm, '_'));
+	  /* Demangle and emit the target name and function type.  */
+	  RETURN_IF_ERROR (result_add (dm, " to "));
+	  RETURN_IF_ERROR (demangle_encoding (dm));
+	  break;
+
+	case 'v':
+	  /* Virtual thunk.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "virtual thunk"));
+	  RETURN_IF_ERROR (demangle_v_offset (dm));
+	  /* Demangle the separator.  */
+	  RETURN_IF_ERROR (demangle_char (dm, '_'));
+	  /* Demangle and emit the target function.  */
+	  RETURN_IF_ERROR (result_add (dm, " to "));
+	  RETURN_IF_ERROR (demangle_encoding (dm));
+	  break;
+
+	case 'c':
+	  /* Covariant return thunk.  */
+	  advance_char (dm);
+	  RETURN_IF_ERROR (result_add (dm, "covariant return thunk"));
+	  RETURN_IF_ERROR (demangle_call_offset (dm));
+	  RETURN_IF_ERROR (demangle_call_offset (dm));
+	  /* Demangle and emit the target function.  */
+	  RETURN_IF_ERROR (result_add (dm, " to "));
+	  RETURN_IF_ERROR (demangle_encoding (dm));
+	  break;
+
+	case 'C':
+	  /* TC is a special g++ mangling for a construction vtable. */
+	  if (!flag_strict)
+	    {
+	      dyn_string_t derived_type;
+
+	      advance_char (dm);
+	      RETURN_IF_ERROR (result_add (dm, "construction vtable for "));
+
+	      /* Demangle the derived type off to the side.  */
+	      RETURN_IF_ERROR (result_push (dm));
+	      RETURN_IF_ERROR (demangle_type (dm));
+	      derived_type = (dyn_string_t) result_pop (dm);
+
+	      /* Demangle the offset.  */
+	      number = dyn_string_new (4);
+	      if (number == NULL)
+		{
+		  dyn_string_delete (derived_type);
+		  return STATUS_ALLOCATION_FAILED;
+		}
+	      demangle_number_literally (dm, number, 10, 1);
+	      /* Demangle the underscore separator.  */
+	      status = demangle_char (dm, '_');
+
+	      /* Demangle the base type.  */
+	      if (STATUS_NO_ERROR (status))
+		status = demangle_type (dm);
+
+	      /* Emit the derived type.  */
+	      if (STATUS_NO_ERROR (status))
+		status = result_add (dm, "-in-");
+	      if (STATUS_NO_ERROR (status))
+		status = result_add_string (dm, derived_type);
+	      dyn_string_delete (derived_type);
+
+	      /* Don't display the offset unless in verbose mode.  */
+	      if (flag_verbose)
+		{
+		  status = result_add_char (dm, ' ');
+		  if (STATUS_NO_ERROR (status))
+		    result_add_string (dm, number);
+		}
+	      dyn_string_delete (number);
+	      RETURN_IF_ERROR (status);
+	      break;
+	    }
+	  /* If flag_strict, fall through.  */
+
+	default:
+	  return "Unrecognized <special-name>.";
+	}
+    }
+  else
+    return STATUS_ERROR;
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <ctor-dtor-name>.  
+   
+    <ctor-dtor-name>
+                   ::= C1  # complete object (in-charge) ctor
+                   ::= C2  # base object (not-in-charge) ctor
+                   ::= C3  # complete object (in-charge) allocating ctor
+                   ::= D0  # deleting (in-charge) dtor
+                   ::= D1  # complete object (in-charge) dtor
+                   ::= D2  # base object (not-in-charge) dtor  */
+
+static status_t
+demangle_ctor_dtor_name (dm)
+     demangling_t dm;
+{
+  static const char *const ctor_flavors[] = 
+  {
+    "in-charge",
+    "not-in-charge",
+    "allocating"
+  };
+  static const char *const dtor_flavors[] = 
+  {
+    "in-charge deleting",
+    "in-charge",
+    "not-in-charge"
+  };
+
+  int flavor;
+  char peek = peek_char (dm);
+
+  DEMANGLE_TRACE ("ctor-dtor-name", dm);
+  
+  if (peek == 'C')
+    {
+      /* A constructor name.  Consume the C.  */
+      advance_char (dm);
+      flavor = next_char (dm);
+      if (flavor < '1' || flavor > '3')
+	return "Unrecognized constructor.";
+      RETURN_IF_ERROR (result_add_string (dm, dm->last_source_name));
+      switch (flavor)
+	{
+	case '1': dm->is_constructor = gnu_v3_complete_object_ctor;
+	  break;
+	case '2': dm->is_constructor = gnu_v3_base_object_ctor;
+	  break;
+	case '3': dm->is_constructor = gnu_v3_complete_object_allocating_ctor;
+	  break;
+	}
+      /* Print the flavor of the constructor if in verbose mode.  */
+      if (flag_verbose)
+	{
+	  RETURN_IF_ERROR (result_add (dm, "["));
+	  RETURN_IF_ERROR (result_add (dm, ctor_flavors[flavor - '1']));
+	  RETURN_IF_ERROR (result_add_char (dm, ']'));
+	}
+    }
+  else if (peek == 'D')
+    {
+      /* A destructor name.  Consume the D.  */
+      advance_char (dm);
+      flavor = next_char (dm);
+      if (flavor < '0' || flavor > '2')
+	return "Unrecognized destructor.";
+      RETURN_IF_ERROR (result_add_char (dm, '~'));
+      RETURN_IF_ERROR (result_add_string (dm, dm->last_source_name));
+      switch (flavor)
+	{
+	case '0': dm->is_destructor = gnu_v3_deleting_dtor;
+	  break;
+	case '1': dm->is_destructor = gnu_v3_complete_object_dtor;
+	  break;
+	case '2': dm->is_destructor = gnu_v3_base_object_dtor;
+	  break;
+	}
+      /* Print the flavor of the destructor if in verbose mode.  */
+      if (flag_verbose)
+	{
+	  RETURN_IF_ERROR (result_add (dm, " ["));
+	  RETURN_IF_ERROR (result_add (dm, dtor_flavors[flavor - '0']));
+	  RETURN_IF_ERROR (result_add_char (dm, ']'));
+	}
+    }
+  else
+    return STATUS_ERROR;
+
+  return STATUS_OK;
+}
+
+/* Handle pointer, reference, and pointer-to-member cases for
+   demangle_type.  All consecutive `P's, `R's, and 'M's are joined to
+   build a pointer/reference type.  We snarf all these, plus the
+   following <type>, all at once since we need to know whether we have
+   a pointer to data or pointer to function to construct the right
+   output syntax.  C++'s pointer syntax is hairy.  
+
+   This function adds substitution candidates for every nested
+   pointer/reference type it processes, including the outermost, final
+   type, assuming the substitution starts at SUBSTITUTION_START in the
+   demangling result.  For example, if this function demangles
+   `PP3Foo', it will add a substitution for `Foo', `Foo*', and
+   `Foo**', in that order.
+
+   *INSERT_POS is a quantity used internally, when this function calls
+   itself recursively, to figure out where to insert pointer
+   punctuation on the way up.  On entry to this function, INSERT_POS
+   should point to a temporary value, but that value need not be
+   initialized.
+
+     <type> ::= P <type>
+            ::= R <type>
+            ::= <pointer-to-member-type>
+
+     <pointer-to-member-type> ::= M </class/ type> </member/ type>  */
+
+static status_t
+demangle_type_ptr (dm, insert_pos, substitution_start)
+     demangling_t dm;
+     int *insert_pos;
+     int substitution_start;
+{
+  status_t status;
+  int is_substitution_candidate = 1;
+
+  DEMANGLE_TRACE ("type*", dm);
+
+  /* Scan forward, collecting pointers and references into symbols,
+     until we hit something else.  Then emit the type.  */
+  switch (peek_char (dm))
+    {
+    case 'P':
+      /* A pointer.  Snarf the `P'.  */
+      advance_char (dm);
+      /* Demangle the underlying type.  */
+      RETURN_IF_ERROR (demangle_type_ptr (dm, insert_pos, 
+					  substitution_start));
+      /* Insert an asterisk where we're told to; it doesn't
+	 necessarily go at the end.  If we're doing Java style output, 
+	 there is no pointer symbol.  */
+      if (dm->style != DMGL_JAVA)
+	RETURN_IF_ERROR (result_insert_char (dm, *insert_pos, '*'));
+      /* The next (outermost) pointer or reference character should go
+	 after this one.  */
+      ++(*insert_pos);
+      break;
+
+    case 'R':
+      /* A reference.  Snarf the `R'.  */
+      advance_char (dm);
+      /* Demangle the underlying type.  */
+      RETURN_IF_ERROR (demangle_type_ptr (dm, insert_pos, 
+					  substitution_start));
+      /* Insert an ampersand where we're told to; it doesn't
+	 necessarily go at the end.  */
+      RETURN_IF_ERROR (result_insert_char (dm, *insert_pos, '&'));
+      /* The next (outermost) pointer or reference character should go
+	 after this one.  */
+      ++(*insert_pos);
+      break;
+
+    case 'M':
+    {
+      /* A pointer-to-member.  */
+      dyn_string_t class_type;
+      
+      /* Eat the 'M'.  */
+      advance_char (dm);
+      
+      /* Capture the type of which this is a pointer-to-member.  */
+      RETURN_IF_ERROR (result_push (dm));
+      RETURN_IF_ERROR (demangle_type (dm));
+      class_type = (dyn_string_t) result_pop (dm);
+      
+      if (peek_char (dm) == 'F')
+	/* A pointer-to-member function.  We want output along the
+	   lines of `void (C::*) (int, int)'.  Demangle the function
+	   type, which would in this case give `void () (int, int)'
+	   and set *insert_pos to the spot between the first
+	   parentheses.  */
+	status = demangle_type_ptr (dm, insert_pos, substitution_start);
+      else if (peek_char (dm) == 'A')
+	/* A pointer-to-member array variable.  We want output that
+	   looks like `int (Klass::*) [10]'.  Demangle the array type
+	   as `int () [10]', and set *insert_pos to the spot between
+	   the parentheses.  */
+	status = demangle_array_type (dm, insert_pos);
+      else
+        {
+	  /* A pointer-to-member variable.  Demangle the type of the
+             pointed-to member.  */
+	  status = demangle_type (dm);
+	  /* Make it pretty.  */
+	  if (STATUS_NO_ERROR (status)
+	      && !result_previous_char_is_space (dm))
+	    status = result_add_char (dm, ' ');
+	  /* The pointer-to-member notation (e.g. `C::*') follows the
+             member's type.  */
+	  *insert_pos = result_caret_pos (dm);
+	}
+
+      /* Build the pointer-to-member notation.  */
+      if (STATUS_NO_ERROR (status))
+	status = result_insert (dm, *insert_pos, "::*");
+      if (STATUS_NO_ERROR (status))
+	status = result_insert_string (dm, *insert_pos, class_type);
+      /* There may be additional levels of (pointer or reference)
+	 indirection in this type.  If so, the `*' and `&' should be
+	 added after the pointer-to-member notation (e.g. `C::*&' for
+	 a reference to a pointer-to-member of class C).  */
+      *insert_pos += dyn_string_length (class_type) + 3;
+
+      /* Clean up. */
+      dyn_string_delete (class_type);
+
+      RETURN_IF_ERROR (status);
+    }
+    break;
+
+    case 'F':
+      /* Ooh, tricky, a pointer-to-function.  When we demangle the
+	 function type, the return type should go at the very
+	 beginning.  */
+      *insert_pos = result_caret_pos (dm);
+      /* The parentheses indicate this is a function pointer or
+	 reference type.  */
+      RETURN_IF_ERROR (result_add (dm, "()"));
+      /* Now demangle the function type.  The return type will be
+	 inserted before the `()', and the argument list will go after
+	 it.  */
+      RETURN_IF_ERROR (demangle_function_type (dm, insert_pos));
+      /* We should now have something along the lines of 
+	 `void () (int, int)'.  The pointer or reference characters
+	 have to inside the first set of parentheses.  *insert_pos has
+	 already been updated to point past the end of the return
+	 type.  Move it one character over so it points inside the
+	 `()'.  */
+      ++(*insert_pos);
+      break;
+
+    case 'A':
+      /* An array pointer or reference.  demangle_array_type will figure
+	 out where the asterisks and ampersands go.  */
+      RETURN_IF_ERROR (demangle_array_type (dm, insert_pos));
+      break;
+
+    default:
+      /* No more pointer or reference tokens; this is therefore a
+	 pointer to data.  Finish up by demangling the underlying
+	 type.  */
+      RETURN_IF_ERROR (demangle_type (dm));
+      /* The pointer or reference characters follow the underlying
+	 type, as in `int*&'.  */
+      *insert_pos = result_caret_pos (dm);
+      /* Because of the production <type> ::= <substitution>,
+	 demangle_type will already have added the underlying type as
+	 a substitution candidate.  Don't do it again.  */
+      is_substitution_candidate = 0;
+      break;
+    }
+  
+  if (is_substitution_candidate)
+    RETURN_IF_ERROR (substitution_add (dm, substitution_start, 0));
+  
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <type>.  
+
+    <type> ::= <builtin-type>
+	   ::= <function-type>
+	   ::= <class-enum-type>
+	   ::= <array-type>
+	   ::= <pointer-to-member-type>
+	   ::= <template-param>
+	   ::= <template-template-param> <template-args>
+           ::= <CV-qualifiers> <type>
+	   ::= P <type>   # pointer-to
+	   ::= R <type>   # reference-to
+	   ::= C <type>   # complex pair (C 2000)
+	   ::= G <type>   # imaginary (C 2000)
+	   ::= U <source-name> <type>     # vendor extended type qualifier
+	   ::= <substitution>  */
+
+static status_t
+demangle_type (dm)
+     demangling_t dm;
+{
+  int start = substitution_start (dm);
+  char peek = peek_char (dm);
+  char peek_next;
+  int encode_return_type = 0;
+  template_arg_list_t old_arg_list = current_template_arg_list (dm);
+  int insert_pos;
+
+  /* A <type> can be a <substitution>; therefore, this <type> is a
+     substitution candidate unless a special condition holds (see
+     below).  */
+  int is_substitution_candidate = 1;
+
+  DEMANGLE_TRACE ("type", dm);
+
+  /* A <class-enum-type> can start with a digit (a <source-name>), an
+     N (a <nested-name>), or a Z (a <local-name>).  */
+  if (IS_DIGIT ((unsigned char) peek) || peek == 'N' || peek == 'Z')
+    RETURN_IF_ERROR (demangle_class_enum_type (dm, &encode_return_type));
+  /* Lower-case letters begin <builtin-type>s, except for `r', which
+     denotes restrict.  */
+  else if (peek >= 'a' && peek <= 'z' && peek != 'r')
+    {
+      RETURN_IF_ERROR (demangle_builtin_type (dm));
+      /* Built-in types are not substitution candidates.  */
+      is_substitution_candidate = 0;
+    }
+  else
+    switch (peek)
+      {
+      case 'r':
+      case 'V':
+      case 'K':
+	/* CV-qualifiers (including restrict).  We have to demangle
+	   them off to the side, since C++ syntax puts them in a funny
+	   place for qualified pointer and reference types.  */
+	{
+	  status_t status;
+	  dyn_string_t cv_qualifiers = dyn_string_new (24);
+	  int old_caret_position = result_get_caret (dm);
+
+	  if (cv_qualifiers == NULL)
+	    return STATUS_ALLOCATION_FAILED;
+
+	  /* Decode all adjacent CV qualifiers.  */
+	  demangle_CV_qualifiers (dm, cv_qualifiers);
+	  /* Emit them, and shift the caret left so that the
+	     underlying type will be emitted before the qualifiers.  */
+	  status = result_add_string (dm, cv_qualifiers);
+	  result_shift_caret (dm, -dyn_string_length (cv_qualifiers));
+	  /* Clean up.  */
+	  dyn_string_delete (cv_qualifiers);
+	  RETURN_IF_ERROR (status);
+	  /* Also prepend a blank, if needed.  */
+	  RETURN_IF_ERROR (result_add_char (dm, ' '));
+	  result_shift_caret (dm, -1);
+
+	  /* Demangle the underlying type.  It will be emitted before
+	     the CV qualifiers, since we moved the caret.  */
+	  RETURN_IF_ERROR (demangle_type (dm));
+
+	  /* Put the caret back where it was previously.  */
+	  result_set_caret (dm, old_caret_position);
+	}
+	break;
+
+      case 'F':
+	return "Non-pointer or -reference function type.";
+
+      case 'A':
+	RETURN_IF_ERROR (demangle_array_type (dm, NULL));
+	break;
+
+      case 'T':
+	/* It's either a <template-param> or a
+	   <template-template-param>.  In either case, demangle the
+	   `T' token first.  */
+	RETURN_IF_ERROR (demangle_template_param (dm));
+
+	/* Check for a template argument list; if one is found, it's a
+	     <template-template-param> ::= <template-param>
+                                       ::= <substitution>  */
+	if (peek_char (dm) == 'I')
+	  {
+	    /* Add a substitution candidate.  The template parameter
+	       `T' token is a substitution candidate by itself,
+	       without the template argument list.  */
+	    RETURN_IF_ERROR (substitution_add (dm, start, encode_return_type));
+
+	    /* Now demangle the template argument list.  */
+	    RETURN_IF_ERROR (demangle_template_args (dm));
+	    /* The entire type, including the template template
+	       parameter and its argument list, will be added as a
+	       substitution candidate below.  */
+	  }
+
+	break;
+
+      case 'S':
+	/* First check if this is a special substitution.  If it is,
+	   this is a <class-enum-type>.  Special substitutions have a
+	   letter following the `S'; other substitutions have a digit
+	   or underscore.  */
+	peek_next = peek_char_next (dm);
+	if (IS_DIGIT (peek_next) || peek_next == '_')
+	  {
+	    RETURN_IF_ERROR (demangle_substitution (dm, &encode_return_type));
+	    
+	    /* The substituted name may have been a template name.
+	       Check if template arguments follow, and if so, demangle
+	       them.  */
+	    if (peek_char (dm) == 'I')
+	      RETURN_IF_ERROR (demangle_template_args (dm));
+	    else
+	      /* A substitution token is not itself a substitution
+		 candidate.  (However, if the substituted template is
+		 instantiated, the resulting type is.)  */
+	      is_substitution_candidate = 0;
+	  }
+	else
+	  {
+	    /* Now some trickiness.  We have a special substitution
+	       here.  Often, the special substitution provides the
+	       name of a template that's subsequently instantiated,
+	       for instance `SaIcE' => std::allocator<char>.  In these
+	       cases we need to add a substitution candidate for the
+	       entire <class-enum-type> and thus don't want to clear
+	       the is_substitution_candidate flag.
+
+	       However, it's possible that what we have here is a
+	       substitution token representing an entire type, such as
+	       `Ss' => std::string.  In this case, we mustn't add a
+	       new substitution candidate for this substitution token.
+	       To detect this case, remember where the start of the
+	       substitution token is.  */
+ 	    const char *next = dm->next;
+	    /* Now demangle the <class-enum-type>.  */
+	    RETURN_IF_ERROR 
+	      (demangle_class_enum_type (dm, &encode_return_type));
+	    /* If all that was just demangled is the two-character
+	       special substitution token, supress the addition of a
+	       new candidate for it.  */
+	    if (dm->next == next + 2)
+	      is_substitution_candidate = 0;
+	  }
+
+	break;
+
+      case 'P':
+      case 'R':
+      case 'M':
+	RETURN_IF_ERROR (demangle_type_ptr (dm, &insert_pos, start));
+	/* demangle_type_ptr adds all applicable substitution
+	   candidates.  */
+	is_substitution_candidate = 0;
+	break;
+
+      case 'C':
+	/* A C99 complex type.  */
+	RETURN_IF_ERROR (result_add (dm, "complex "));
+	advance_char (dm);
+	RETURN_IF_ERROR (demangle_type (dm));
+	break;
+
+      case 'G':
+	/* A C99 imaginary type.  */
+	RETURN_IF_ERROR (result_add (dm, "imaginary "));
+	advance_char (dm);
+	RETURN_IF_ERROR (demangle_type (dm));
+	break;
+
+      case 'U':
+	/* Vendor-extended type qualifier.  */
+	advance_char (dm);
+	RETURN_IF_ERROR (demangle_source_name (dm));
+	RETURN_IF_ERROR (result_add_char (dm, ' '));
+	RETURN_IF_ERROR (demangle_type (dm));
+	break;
+
+      default:
+	return "Unexpected character in <type>.";
+      }
+
+  if (is_substitution_candidate)
+    /* Add a new substitution for the type. If this type was a
+       <template-param>, pass its index since from the point of
+       substitutions; a <template-param> token is a substitution
+       candidate distinct from the type that is substituted for it.  */
+    RETURN_IF_ERROR (substitution_add (dm, start, encode_return_type));
+
+  /* Pop off template argument lists added during mangling of this
+     type.  */
+  pop_to_template_arg_list (dm, old_arg_list);
+
+  return STATUS_OK;
+}
+
+/* C++ source names of builtin types, indexed by the mangled code
+   letter's position in the alphabet ('a' -> 0, 'b' -> 1, etc).  */
+static const char *const builtin_type_names[26] = 
+{
+  "signed char",              /* a */
+  "bool",                     /* b */
+  "char",                     /* c */
+  "double",                   /* d */
+  "long double",              /* e */
+  "float",                    /* f */
+  "__float128",               /* g */
+  "unsigned char",            /* h */
+  "int",                      /* i */
+  "unsigned",                 /* j */
+  NULL,                       /* k */
+  "long",                     /* l */
+  "unsigned long",            /* m */
+  "__int128",                 /* n */
+  "unsigned __int128",        /* o */
+  NULL,                       /* p */
+  NULL,                       /* q */
+  NULL,                       /* r */
+  "short",                    /* s */
+  "unsigned short",           /* t */
+  NULL,                       /* u */
+  "void",                     /* v */
+  "wchar_t",                  /* w */
+  "long long",                /* x */
+  "unsigned long long",       /* y */
+  "..."                       /* z */
+};
+
+/* Java source names of builtin types.  Types that arn't valid in Java
+   are also included here - we don't fail if someone attempts to demangle a 
+   C++ symbol in Java style. */
+static const char *const java_builtin_type_names[26] = 
+{
+  "signed char",                /* a */
+  "boolean", /* C++ "bool" */   /* b */
+  "byte", /* C++ "char" */      /* c */
+  "double",                     /* d */
+  "long double",                /* e */
+  "float",                      /* f */
+  "__float128",                 /* g */
+  "unsigned char",              /* h */
+  "int",                        /* i */
+  "unsigned",                   /* j */
+  NULL,                         /* k */
+  "long",                       /* l */
+  "unsigned long",              /* m */
+  "__int128",                   /* n */
+  "unsigned __int128",          /* o */
+  NULL,                         /* p */
+  NULL,                         /* q */
+  NULL,                         /* r */
+  "short",                      /* s */
+  "unsigned short",             /* t */
+  NULL,                         /* u */
+  "void",                       /* v */
+  "char", /* C++ "wchar_t" */   /* w */
+  "long", /* C++ "long long" */ /* x */
+  "unsigned long long",         /* y */
+  "..."                         /* z */
+};
+
+/* Demangles and emits a <builtin-type>.  
+
+    <builtin-type> ::= v  # void
+		   ::= w  # wchar_t
+		   ::= b  # bool
+		   ::= c  # char
+		   ::= a  # signed char
+		   ::= h  # unsigned char
+		   ::= s  # short
+		   ::= t  # unsigned short
+		   ::= i  # int
+		   ::= j  # unsigned int
+		   ::= l  # long
+		   ::= m  # unsigned long
+		   ::= x  # long long, __int64
+		   ::= y  # unsigned long long, __int64
+		   ::= n  # __int128
+		   ::= o  # unsigned __int128
+		   ::= f  # float
+		   ::= d  # double
+		   ::= e  # long double, __float80
+		   ::= g  # __float128
+		   ::= z  # ellipsis
+		   ::= u <source-name>    # vendor extended type  */
+
+static status_t
+demangle_builtin_type (dm)
+     demangling_t dm;
+{
+
+  char code = peek_char (dm);
+
+  DEMANGLE_TRACE ("builtin-type", dm);
+
+  if (code == 'u')
+    {
+      advance_char (dm);
+      RETURN_IF_ERROR (demangle_source_name (dm));
+      return STATUS_OK;
+    }
+  else if (code >= 'a' && code <= 'z')
+    {
+      const char *type_name;
+      /* Java uses different names for some built-in types. */
+      if (dm->style == DMGL_JAVA)
+        type_name = java_builtin_type_names[code - 'a'];
+      else
+        type_name = builtin_type_names[code - 'a'];
+      if (type_name == NULL)
+	return "Unrecognized <builtin-type> code.";
+
+      RETURN_IF_ERROR (result_add (dm, type_name));
+      advance_char (dm);
+      return STATUS_OK;
+    }
+  else
+    return "Non-alphabetic <builtin-type> code.";
+}
+
+/* Demangles all consecutive CV-qualifiers (const, volatile, and
+   restrict) at the current position.  The qualifiers are appended to
+   QUALIFIERS.  Returns STATUS_OK.  */
+
+static status_t
+demangle_CV_qualifiers (dm, qualifiers)
+     demangling_t dm;
+     dyn_string_t qualifiers;
+{
+  DEMANGLE_TRACE ("CV-qualifiers", dm);
+
+  while (1)
+    {
+      switch (peek_char (dm))
+	{
+	case 'r':
+	  if (!dyn_string_append_space (qualifiers))
+	    return STATUS_ALLOCATION_FAILED;
+	  if (!dyn_string_append_cstr (qualifiers, "restrict"))
+	    return STATUS_ALLOCATION_FAILED;
+	  break;
+
+	case 'V':
+	  if (!dyn_string_append_space (qualifiers))
+	    return STATUS_ALLOCATION_FAILED;
+	  if (!dyn_string_append_cstr (qualifiers, "volatile"))
+	    return STATUS_ALLOCATION_FAILED;
+	  break;
+
+	case 'K':
+	  if (!dyn_string_append_space (qualifiers))
+	    return STATUS_ALLOCATION_FAILED;
+	  if (!dyn_string_append_cstr (qualifiers, "const"))
+	    return STATUS_ALLOCATION_FAILED;
+	  break;
+
+	default:
+	  return STATUS_OK;
+	}
+
+      advance_char (dm);
+    }
+}
+
+/* Demangles and emits a <function-type>.  *FUNCTION_NAME_POS is the
+   position in the result string of the start of the function
+   identifier, at which the function's return type will be inserted;
+   *FUNCTION_NAME_POS is updated to position past the end of the
+   function's return type.
+
+    <function-type> ::= F [Y] <bare-function-type> E  */
+
+static status_t
+demangle_function_type (dm, function_name_pos)
+     demangling_t dm;
+     int *function_name_pos;
+{
+  DEMANGLE_TRACE ("function-type", dm);
+  RETURN_IF_ERROR (demangle_char (dm, 'F'));  
+  if (peek_char (dm) == 'Y')
+    {
+      /* Indicate this function has C linkage if in verbose mode.  */
+      if (flag_verbose)
+	RETURN_IF_ERROR (result_add (dm, " [extern \"C\"] "));
+      advance_char (dm);
+    }
+  RETURN_IF_ERROR (demangle_bare_function_type (dm, function_name_pos));
+  RETURN_IF_ERROR (demangle_char (dm, 'E'));
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <bare-function-type>.  RETURN_TYPE_POS is the
+   position in the result string at which the function return type
+   should be inserted.  If RETURN_TYPE_POS is BFT_NO_RETURN_TYPE, the
+   function's return type is assumed not to be encoded.  
+
+    <bare-function-type> ::= <signature type>+  */
+
+static status_t
+demangle_bare_function_type (dm, return_type_pos)
+     demangling_t dm;
+     int *return_type_pos;
+{
+  /* Sequence is the index of the current function parameter, counting
+     from zero.  The value -1 denotes the return type.  */
+  int sequence = 
+    (return_type_pos == BFT_NO_RETURN_TYPE ? 0 : -1);
+
+  DEMANGLE_TRACE ("bare-function-type", dm);
+
+  RETURN_IF_ERROR (result_add_char (dm, '('));
+  while (!end_of_name_p (dm) && peek_char (dm) != 'E')
+    {
+      if (sequence == -1)
+	/* We're decoding the function's return type.  */
+	{
+	  dyn_string_t return_type;
+	  status_t status = STATUS_OK;
+
+	  /* Decode the return type off to the side.  */
+	  RETURN_IF_ERROR (result_push (dm));
+	  RETURN_IF_ERROR (demangle_type (dm));
+	  return_type = (dyn_string_t) result_pop (dm);
+
+	  /* Add a space to the end of the type.  Insert the return
+             type where we've been asked to. */
+	  if (!dyn_string_append_space (return_type))
+	    status = STATUS_ALLOCATION_FAILED;
+	  if (STATUS_NO_ERROR (status))
+	    {
+	      if (!dyn_string_insert (result_string (dm), *return_type_pos, 
+				      return_type))
+		status = STATUS_ALLOCATION_FAILED;
+	      else
+		*return_type_pos += dyn_string_length (return_type);
+	    }
+
+	  dyn_string_delete (return_type);
+	  RETURN_IF_ERROR (status);
+	}
+      else 
+	{
+	  /* Skip `void' parameter types.  One should only occur as
+	     the only type in a parameter list; in that case, we want
+	     to print `foo ()' instead of `foo (void)'.  */
+	  if (peek_char (dm) == 'v')
+	    /* Consume the v.  */
+	    advance_char (dm);
+	  else
+	    {
+	      /* Separate parameter types by commas.  */
+	      if (sequence > 0)
+		RETURN_IF_ERROR (result_add (dm, ", "));
+	      /* Demangle the type.  */
+	      RETURN_IF_ERROR (demangle_type (dm));
+	    }
+	}
+
+      ++sequence;
+    }
+  RETURN_IF_ERROR (result_add_char (dm, ')'));
+
+  /* We should have demangled at least one parameter type (which would
+     be void, for a function that takes no parameters), plus the
+     return type, if we were supposed to demangle that.  */
+  if (sequence == -1)
+    return "Missing function return type.";
+  else if (sequence == 0)
+    return "Missing function parameter.";
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <class-enum-type>.  *ENCODE_RETURN_TYPE is set to
+   non-zero if the type is a template-id, zero otherwise.  
+
+    <class-enum-type> ::= <name>  */
+
+static status_t
+demangle_class_enum_type (dm, encode_return_type)
+     demangling_t dm;
+     int *encode_return_type;
+{
+  DEMANGLE_TRACE ("class-enum-type", dm);
+
+  RETURN_IF_ERROR (demangle_name (dm, encode_return_type));
+  return STATUS_OK;
+}
+
+/* Demangles and emits an <array-type>.  
+
+   If PTR_INSERT_POS is not NULL, the array type is formatted as a
+   pointer or reference to an array, except that asterisk and
+   ampersand punctuation is omitted (since it's not know at this
+   point).  *PTR_INSERT_POS is set to the position in the demangled
+   name at which this punctuation should be inserted.  For example,
+   `A10_i' is demangled to `int () [10]' and *PTR_INSERT_POS points
+   between the parentheses.
+
+   If PTR_INSERT_POS is NULL, the array type is assumed not to be
+   pointer- or reference-qualified.  Then, for example, `A10_i' is
+   demangled simply as `int[10]'.  
+
+    <array-type> ::= A [<dimension number>] _ <element type>  
+                 ::= A <dimension expression> _ <element type>  */
+
+static status_t
+demangle_array_type (dm, ptr_insert_pos)
+     demangling_t dm;
+     int *ptr_insert_pos;
+{
+  status_t status = STATUS_OK;
+  dyn_string_t array_size = NULL;
+  char peek;
+
+  DEMANGLE_TRACE ("array-type", dm);
+
+  RETURN_IF_ERROR (demangle_char (dm, 'A'));
+
+  /* Demangle the array size into array_size.  */
+  peek = peek_char (dm);
+  if (peek == '_')
+    /* Array bound is omitted.  This is a C99-style VLA.  */
+    ;
+  else if (IS_DIGIT (peek_char (dm))) 
+    {
+      /* It looks like a constant array bound.  */
+      array_size = dyn_string_new (10);
+      if (array_size == NULL)
+	return STATUS_ALLOCATION_FAILED;
+      status = demangle_number_literally (dm, array_size, 10, 0);
+    }
+  else
+    {
+      /* Anything is must be an expression for a nont-constant array
+	 bound.  This happens if the array type occurs in a template
+	 and the array bound references a template parameter.  */
+      RETURN_IF_ERROR (result_push (dm));
+      RETURN_IF_ERROR (demangle_expression_v3 (dm));
+      array_size = (dyn_string_t) result_pop (dm);
+    }
+  /* array_size may have been allocated by now, so we can't use
+     RETURN_IF_ERROR until it's been deallocated.  */
+
+  /* Demangle the base type of the array.  */
+  if (STATUS_NO_ERROR (status))
+    status = demangle_char (dm, '_');
+  if (STATUS_NO_ERROR (status))
+    status = demangle_type (dm);
+
+  if (ptr_insert_pos != NULL)
+    {
+      /* This array is actually part of an pointer- or
+	 reference-to-array type.  Format appropriately, except we
+	 don't know which and how much punctuation to use.  */
+      if (STATUS_NO_ERROR (status))
+	status = result_add (dm, " () ");
+      /* Let the caller know where to insert the punctuation.  */
+      *ptr_insert_pos = result_caret_pos (dm) - 2;
+    }
+
+  /* Emit the array dimension syntax.  */
+  if (STATUS_NO_ERROR (status))
+    status = result_add_char (dm, '[');
+  if (STATUS_NO_ERROR (status) && array_size != NULL)
+    status = result_add_string (dm, array_size);
+  if (STATUS_NO_ERROR (status))
+    status = result_add_char (dm, ']');
+  if (array_size != NULL)
+    dyn_string_delete (array_size);
+  
+  RETURN_IF_ERROR (status);
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <template-param>.  
+
+    <template-param> ::= T_       # first template parameter
+                     ::= T <parameter-2 number> _  */
+
+static status_t
+demangle_template_param (dm)
+     demangling_t dm;
+{
+  int parm_number;
+  template_arg_list_t current_arg_list = current_template_arg_list (dm);
+  string_list_t arg;
+
+  DEMANGLE_TRACE ("template-param", dm);
+
+  /* Make sure there is a template argmust list in which to look up
+     this parameter reference.  */
+  if (current_arg_list == NULL)
+    return "Template parameter outside of template.";
+
+  RETURN_IF_ERROR (demangle_char (dm, 'T'));
+  if (peek_char (dm) == '_')
+    parm_number = 0;
+  else
+    {
+      RETURN_IF_ERROR (demangle_number (dm, &parm_number, 10, 0));
+      ++parm_number;
+    }
+  RETURN_IF_ERROR (demangle_char (dm, '_'));
+
+  arg = template_arg_list_get_arg (current_arg_list, parm_number);
+  if (arg == NULL)
+    /* parm_number exceeded the number of arguments in the current
+       template argument list.  */
+    return "Template parameter number out of bounds.";
+  RETURN_IF_ERROR (result_add_string (dm, (dyn_string_t) arg));
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <template-args>.  
+
+    <template-args> ::= I <template-arg>+ E  */
+
+static status_t
+demangle_template_args_1 (dm, arg_list)
+     demangling_t dm;
+     template_arg_list_t arg_list;
+{
+  int first = 1;
+
+  DEMANGLE_TRACE ("template-args", dm);
+
+  RETURN_IF_ERROR (demangle_char (dm, 'I'));
+  RETURN_IF_ERROR (result_open_template_list (dm));
+  do
+    {
+      string_list_t arg;
+
+      if (first)
+	first = 0;
+      else
+	RETURN_IF_ERROR (result_add (dm, ", "));
+
+      /* Capture the template arg.  */
+      RETURN_IF_ERROR (result_push (dm));
+      RETURN_IF_ERROR (demangle_template_arg (dm));
+      arg = result_pop (dm);
+
+      /* Emit it in the demangled name.  */
+      RETURN_IF_ERROR (result_add_string (dm, (dyn_string_t) arg));
+
+      /* Save it for use in expanding <template-param>s.  */
+      template_arg_list_add_arg (arg_list, arg);
+    }
+  while (peek_char (dm) != 'E');
+  /* Append the '>'.  */
+  RETURN_IF_ERROR (result_close_template_list (dm));
+
+  /* Consume the 'E'.  */
+  advance_char (dm);
+
+  return STATUS_OK;
+}
+
+static status_t
+demangle_template_args (dm)
+     demangling_t dm;
+{
+  int first = 1;
+  dyn_string_t old_last_source_name;
+  dyn_string_t new_name;
+  template_arg_list_t arg_list = template_arg_list_new ();
+  status_t status;
+
+  if (arg_list == NULL)
+    return STATUS_ALLOCATION_FAILED;
+
+  /* Preserve the most recently demangled source name.  */
+  old_last_source_name = dm->last_source_name;
+  new_name = dyn_string_new (0);
+
+  if (new_name == NULL)
+    {
+      template_arg_list_delete (arg_list);
+      return STATUS_ALLOCATION_FAILED;
+    }
+
+  dm->last_source_name = new_name;
+  
+  status = demangle_template_args_1 (dm, arg_list);
+  /* Restore the most recent demangled source name.  */
+  dyn_string_delete (dm->last_source_name);
+  dm->last_source_name = old_last_source_name;
+
+  if (!STATUS_NO_ERROR (status))
+    {
+      template_arg_list_delete (arg_list);
+      return status;
+    }
+
+  /* Push the list onto the top of the stack of template argument
+     lists, so that arguments from it are used from now on when
+     expanding <template-param>s.  */
+  push_template_arg_list (dm, arg_list);
+
+  return STATUS_OK;
+}
+
+/* This function, which does not correspond to a production in the
+   mangling spec, handles the `literal' production for both
+   <template-arg> and <expr-primary>.  It does not expect or consume
+   the initial `L' or final `E'.  The demangling is given by:
+
+     <literal> ::= <type> </value/ number>
+
+   and the emitted output is `(type)number'.  */
+
+static status_t
+demangle_literal (dm)
+     demangling_t dm;
+{
+  char peek = peek_char (dm);
+  dyn_string_t value_string;
+  status_t status;
+
+  DEMANGLE_TRACE ("literal", dm);
+
+  if (!flag_verbose && peek >= 'a' && peek <= 'z')
+    {
+      /* If not in verbose mode and this is a builtin type, see if we
+	 can produce simpler numerical output.  In particular, for
+	 integer types shorter than `long', just write the number
+	 without type information; for bools, write `true' or `false'.
+	 Other refinements could be made here too.  */
+
+      /* This constant string is used to map from <builtin-type> codes
+	 (26 letters of the alphabet) to codes that determine how the 
+	 value will be displayed.  The codes are:
+	   b: display as bool
+	   i: display as int
+           l: display as long
+	 A space means the value will be represented using cast
+	 notation. */
+      static const char *const code_map = "ibi    iii ll     ii  i  ";
+
+      char code = code_map[peek - 'a'];
+      /* FIXME: Implement demangling of floats and doubles.  */
+      if (code == 'u')
+	return STATUS_UNIMPLEMENTED;
+      if (code == 'b')
+	{
+	  /* It's a boolean.  */
+	  char value;
+
+	  /* Consume the b.  */
+	  advance_char (dm);
+	  /* Look at the next character.  It should be 0 or 1,
+	     corresponding to false or true, respectively.  */
+	  value = peek_char (dm);
+	  if (value == '0')
+	    RETURN_IF_ERROR (result_add (dm, "false"));
+	  else if (value == '1')
+	    RETURN_IF_ERROR (result_add (dm, "true"));
+	  else
+	    return "Unrecognized bool constant.";
+	  /* Consume the 0 or 1.  */
+	  advance_char (dm);
+	  return STATUS_OK;
+	}
+      else if (code == 'i' || code == 'l')
+	{
+	  /* It's an integer or long.  */
+
+	  /* Consume the type character.  */
+	  advance_char (dm);
+
+	  /* Demangle the number and write it out.  */
+	  value_string = dyn_string_new (0);
+	  status = demangle_number_literally (dm, value_string, 10, 1);
+	  if (STATUS_NO_ERROR (status))
+	    status = result_add_string (dm, value_string);
+	  /* For long integers, append an l.  */
+	  if (code == 'l' && STATUS_NO_ERROR (status))
+	    status = result_add_char (dm, code);
+	  dyn_string_delete (value_string);
+
+	  RETURN_IF_ERROR (status);
+	  return STATUS_OK;
+	}
+      /* ...else code == ' ', so fall through to represent this
+	 literal's type explicitly using cast syntax.  */
+    }
+
+  RETURN_IF_ERROR (result_add_char (dm, '('));
+  RETURN_IF_ERROR (demangle_type (dm));
+  RETURN_IF_ERROR (result_add_char (dm, ')'));
+
+  value_string = dyn_string_new (0);
+  if (value_string == NULL)
+    return STATUS_ALLOCATION_FAILED;
+
+  status = demangle_number_literally (dm, value_string, 10, 1);
+  if (STATUS_NO_ERROR (status))
+    status = result_add_string (dm, value_string);
+  dyn_string_delete (value_string);
+  RETURN_IF_ERROR (status);
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <template-arg>.  
+
+    <template-arg> ::= <type>                     # type
+                   ::= L <type> <value number> E  # literal
+                   ::= LZ <encoding> E            # external name
+                   ::= X <expression> E           # expression  */
+
+static status_t
+demangle_template_arg (dm)
+     demangling_t dm;
+{
+  DEMANGLE_TRACE ("template-arg", dm);
+
+  switch (peek_char (dm))
+    {
+    case 'L':
+      advance_char (dm);
+
+      if (peek_char (dm) == 'Z')
+	{
+	  /* External name.  */
+	  advance_char (dm);
+	  /* FIXME: Standard is contradictory here.  */
+	  RETURN_IF_ERROR (demangle_encoding (dm));
+	}
+      else
+	RETURN_IF_ERROR (demangle_literal (dm));
+      RETURN_IF_ERROR (demangle_char (dm, 'E'));
+      break;
+
+    case 'X':
+      /* Expression.  */
+      advance_char (dm);
+      RETURN_IF_ERROR (demangle_expression_v3 (dm));
+      RETURN_IF_ERROR (demangle_char (dm, 'E'));
+      break;
+
+    default:
+      RETURN_IF_ERROR (demangle_type (dm));
+      break;
+    }
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits an <expression>.
+
+    <expression> ::= <unary operator-name> <expression>
+		 ::= <binary operator-name> <expression> <expression>
+		 ::= <expr-primary>  
+                 ::= <scope-expression>  */
+
+static status_t
+demangle_expression_v3 (dm)
+     demangling_t dm;
+{
+  char peek = peek_char (dm);
+
+  DEMANGLE_TRACE ("expression", dm);
+
+  if (peek == 'L' || peek == 'T')
+    RETURN_IF_ERROR (demangle_expr_primary (dm));
+  else if (peek == 's' && peek_char_next (dm) == 'r')
+    RETURN_IF_ERROR (demangle_scope_expression (dm));
+  else
+    /* An operator expression.  */
+    {
+      int num_args;
+      status_t status = STATUS_OK;
+      dyn_string_t operator_name;
+
+      /* We have an operator name.  Since we want to output binary
+	 operations in infix notation, capture the operator name
+	 first.  */
+      RETURN_IF_ERROR (result_push (dm));
+      RETURN_IF_ERROR (demangle_operator_name (dm, 1, &num_args));
+      operator_name = (dyn_string_t) result_pop (dm);
+
+      /* If it's binary, do an operand first.  */
+      if (num_args > 1)
+	{
+	  status = result_add_char (dm, '(');
+	  if (STATUS_NO_ERROR (status))
+	    status = demangle_expression_v3 (dm);
+	  if (STATUS_NO_ERROR (status))
+	    status = result_add_char (dm, ')');
+	}
+
+      /* Emit the operator.  */  
+      if (STATUS_NO_ERROR (status))
+	status = result_add_string (dm, operator_name);
+      dyn_string_delete (operator_name);
+      RETURN_IF_ERROR (status);
+      
+      /* Emit its second (if binary) or only (if unary) operand.  */
+      RETURN_IF_ERROR (result_add_char (dm, '('));
+      RETURN_IF_ERROR (demangle_expression_v3 (dm));
+      RETURN_IF_ERROR (result_add_char (dm, ')'));
+
+      /* The ternary operator takes a third operand.  */
+      if (num_args == 3)
+	{
+	  RETURN_IF_ERROR (result_add (dm, ":("));
+	  RETURN_IF_ERROR (demangle_expression_v3 (dm));
+	  RETURN_IF_ERROR (result_add_char (dm, ')'));
+	}
+    }
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <scope-expression>.  
+
+    <scope-expression> ::= sr <qualifying type> <source-name>
+                       ::= sr <qualifying type> <encoding>  */
+
+static status_t
+demangle_scope_expression (dm)
+     demangling_t dm;
+{
+  RETURN_IF_ERROR (demangle_char (dm, 's'));
+  RETURN_IF_ERROR (demangle_char (dm, 'r'));
+  RETURN_IF_ERROR (demangle_type (dm));
+  RETURN_IF_ERROR (result_add (dm, "::"));
+  RETURN_IF_ERROR (demangle_encoding (dm));
+  return STATUS_OK;
+}
+
+/* Demangles and emits an <expr-primary>.  
+
+    <expr-primary> ::= <template-param>
+		   ::= L <type> <value number> E  # literal
+		   ::= L <mangled-name> E         # external name  */
+
+static status_t
+demangle_expr_primary (dm)
+     demangling_t dm;
+{
+  char peek = peek_char (dm);
+
+  DEMANGLE_TRACE ("expr-primary", dm);
+
+  if (peek == 'T')
+    RETURN_IF_ERROR (demangle_template_param (dm));
+  else if (peek == 'L')
+    {
+      /* Consume the `L'.  */
+      advance_char (dm);
+      peek = peek_char (dm);
+
+      if (peek == '_')
+	RETURN_IF_ERROR (demangle_mangled_name (dm));
+      else
+	RETURN_IF_ERROR (demangle_literal (dm));
+
+      RETURN_IF_ERROR (demangle_char (dm, 'E'));
+    }
+  else
+    return STATUS_ERROR;
+
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <substitution>.  Sets *TEMPLATE_P to non-zero
+   if the substitution is the name of a template, zero otherwise. 
+
+     <substitution> ::= S <seq-id> _
+                    ::= S_
+
+                    ::= St   # ::std::
+                    ::= Sa   # ::std::allocator
+                    ::= Sb   # ::std::basic_string
+                    ::= Ss   # ::std::basic_string<char,
+				    		   ::std::char_traits<char>,
+						   ::std::allocator<char> >
+                    ::= Si   # ::std::basic_istream<char,  
+                                                    std::char_traits<char> >
+                    ::= So   # ::std::basic_ostream<char,  
+                                                    std::char_traits<char> >
+                    ::= Sd   # ::std::basic_iostream<char, 
+                                                    std::char_traits<char> >
+*/
+
+static status_t
+demangle_substitution (dm, template_p)
+     demangling_t dm;
+     int *template_p;
+{
+  int seq_id;
+  int peek;
+  dyn_string_t text;
+
+  DEMANGLE_TRACE ("substitution", dm);
+
+  RETURN_IF_ERROR (demangle_char (dm, 'S'));
+
+  /* Scan the substitution sequence index.  A missing number denotes
+     the first index.  */
+  peek = peek_char (dm);
+  if (peek == '_')
+    seq_id = -1;
+  /* If the following character is 0-9 or a capital letter, interpret
+     the sequence up to the next underscore as a base-36 substitution
+     index.  */
+  else if (IS_DIGIT ((unsigned char) peek) 
+	   || (peek >= 'A' && peek <= 'Z'))
+    RETURN_IF_ERROR (demangle_number (dm, &seq_id, 36, 0));
+  else 
+    {
+      const char *new_last_source_name = NULL;
+
+      switch (peek)
+	{
+	case 't':
+	  RETURN_IF_ERROR (result_add (dm, "std"));
+	  break;
+
+	case 'a':
+	  RETURN_IF_ERROR (result_add (dm, "std::allocator"));
+	  new_last_source_name = "allocator";
+	  *template_p = 1;
+	  break;
+
+	case 'b':
+	  RETURN_IF_ERROR (result_add (dm, "std::basic_string"));
+	  new_last_source_name = "basic_string";
+	  *template_p = 1;
+	  break;
+	  
+	case 's':
+	  if (!flag_verbose)
+	    {
+	      RETURN_IF_ERROR (result_add (dm, "std::string"));
+	      new_last_source_name = "string";
+	    }
+	  else
+	    {
+	      RETURN_IF_ERROR (result_add (dm, "std::basic_string<char, std::char_traits<char>, std::allocator<char> >"));
+	      new_last_source_name = "basic_string";
+	    }
+	  *template_p = 0;
+	  break;
+
+	case 'i':
+	  if (!flag_verbose)
+	    {
+	      RETURN_IF_ERROR (result_add (dm, "std::istream"));
+	      new_last_source_name = "istream";
+	    }
+	  else
+	    {
+	      RETURN_IF_ERROR (result_add (dm, "std::basic_istream<char, std::char_traints<char> >"));
+	      new_last_source_name = "basic_istream";
+	    }
+	  *template_p = 0;
+	  break;
+
+	case 'o':
+	  if (!flag_verbose)
+	    {
+	      RETURN_IF_ERROR (result_add (dm, "std::ostream"));
+	      new_last_source_name = "ostream";
+	    }
+	  else
+	    {
+	      RETURN_IF_ERROR (result_add (dm, "std::basic_ostream<char, std::char_traits<char> >"));
+	      new_last_source_name = "basic_ostream";
+	    }
+	  *template_p = 0;
+	  break;
+
+	case 'd':
+	  if (!flag_verbose) 
+	    {
+	      RETURN_IF_ERROR (result_add (dm, "std::iostream"));
+	      new_last_source_name = "iostream";
+	    }
+	  else
+	    {
+	      RETURN_IF_ERROR (result_add (dm, "std::basic_iostream<char, std::char_traits<char> >"));
+	      new_last_source_name = "basic_iostream";
+	    }
+	  *template_p = 0;
+	  break;
+
+	default:
+	  return "Unrecognized <substitution>.";
+	}
+      
+      /* Consume the character we just processed.  */
+      advance_char (dm);
+
+      if (new_last_source_name != NULL)
+	{
+	  if (!dyn_string_copy_cstr (dm->last_source_name, 
+				     new_last_source_name))
+	    return STATUS_ALLOCATION_FAILED;
+	}
+
+      return STATUS_OK;
+    }
+
+  /* Look up the substitution text.  Since `S_' is the most recent
+     substitution, `S0_' is the second-most-recent, etc., shift the
+     numbering by one.  */
+  text = substitution_get (dm, seq_id + 1, template_p);
+  if (text == NULL) 
+    return "Substitution number out of range.";
+
+  /* Emit the substitution text.  */
+  RETURN_IF_ERROR (result_add_string (dm, text));
+
+  RETURN_IF_ERROR (demangle_char (dm, '_'));
+  return STATUS_OK;
+}
+
+/* Demangles and emits a <local-name>.  
+
+    <local-name> := Z <function encoding> E <entity name> [<discriminator>]
+                 := Z <function encoding> E s [<discriminator>]  */
+
+static status_t
+demangle_local_name (dm)
+     demangling_t dm;
+{
+  DEMANGLE_TRACE ("local-name", dm);
+
+  RETURN_IF_ERROR (demangle_char (dm, 'Z'));
+  RETURN_IF_ERROR (demangle_encoding (dm));
+  RETURN_IF_ERROR (demangle_char (dm, 'E'));
+  RETURN_IF_ERROR (result_add (dm, "::"));
+
+  if (peek_char (dm) == 's')
+    {
+      /* Local character string literal.  */
+      RETURN_IF_ERROR (result_add (dm, "string literal"));
+      /* Consume the s.  */
+      advance_char (dm);
+      RETURN_IF_ERROR (demangle_discriminator (dm, 0));
+    }
+  else
+    {
+      int unused;
+      /* Local name for some other entity.  Demangle its name.  */
+      RETURN_IF_ERROR (demangle_name (dm, &unused));
+      RETURN_IF_ERROR (demangle_discriminator (dm, 1));
+     }
+
+   return STATUS_OK;
+ }
+
+ /* Optimonally demangles and emits a <discriminator>.  If there is no
+    <discriminator> at the current position in the mangled string, the
+    descriminator is assumed to be zero.  Emit the discriminator number
+    in parentheses, unless SUPPRESS_FIRST is non-zero and the
+    discriminator is zero.  
+
+     <discriminator> ::= _ <number>  */
+
+static status_t
+demangle_discriminator (dm, suppress_first)
+     demangling_t dm;
+     int suppress_first;
+{
+  /* Output for <discriminator>s to the demangled name is completely
+     suppressed if not in verbose mode.  */
+
+  if (peek_char (dm) == '_')
+    {
+      /* Consume the underscore.  */
+      advance_char (dm);
+      if (flag_verbose)
+	RETURN_IF_ERROR (result_add (dm, " [#"));
+      /* Check if there's a number following the underscore.  */
+      if (IS_DIGIT ((unsigned char) peek_char (dm)))
+	{
+	  int discriminator;
+	  /* Demangle the number.  */
+	  RETURN_IF_ERROR (demangle_number (dm, &discriminator, 10, 0));
+	  if (flag_verbose)
+	    /* Write the discriminator.  The mangled number is two
+	       less than the discriminator ordinal, counting from
+	       zero.  */
+	    RETURN_IF_ERROR (int_to_dyn_string (discriminator + 1,
+						(dyn_string_t) dm->result));
+	}
+      else
+	return STATUS_ERROR;
+      if (flag_verbose)
+	RETURN_IF_ERROR (result_add_char (dm, ']'));
+    }
+  else if (!suppress_first)
+    {
+      if (flag_verbose)
+	RETURN_IF_ERROR (result_add (dm, " [#0]"));
+    }
+
+  return STATUS_OK;
+}
+
+/* Demangle NAME into RESULT, which must be an initialized
+   dyn_string_t.  On success, returns STATUS_OK.  On failure, returns
+   an error message, and the contents of RESULT are unchanged.  */
+
+static status_t
+cp_demangle (name, result, style)
+     const char *name;
+     dyn_string_t result;
+     int style;
+{
+  status_t status;
+  int length = VG_(strlen) (name);
+
+  if (length > 2 && name[0] == '_' && name[1] == 'Z')
+    {
+      demangling_t dm = demangling_new (name, style);
+      if (dm == NULL)
+	return STATUS_ALLOCATION_FAILED;
+
+      status = result_push (dm);
+      if (status != STATUS_OK)
+	{
+	  demangling_delete (dm);
+	  return status;
+	}
+
+      status = demangle_mangled_name (dm);
+      if (STATUS_NO_ERROR (status))
+	{
+	  dyn_string_t demangled = (dyn_string_t) result_pop (dm);
+	  if (!dyn_string_copy (result, demangled))
+	    {
+	      demangling_delete (dm);
+	      return STATUS_ALLOCATION_FAILED;
+	    }
+	  dyn_string_delete (demangled);
+	}
+      
+      demangling_delete (dm);
+    }
+  else
+    {
+      /* It's evidently not a mangled C++ name.  It could be the name
+	 of something with C linkage, though, so just copy NAME into
+	 RESULT.  */
+      if (!dyn_string_copy_cstr (result, name))
+	return STATUS_ALLOCATION_FAILED;
+      status = STATUS_OK;
+    }
+
+  return status; 
+}
+
+/* Demangle TYPE_NAME into RESULT, which must be an initialized
+   dyn_string_t.  On success, returns STATUS_OK.  On failiure, returns
+   an error message, and the contents of RESULT are unchanged.  */
+
+#ifdef IN_LIBGCC2
+static status_t
+cp_demangle_type (type_name, result)
+     const char* type_name;
+     dyn_string_t result;
+{
+  status_t status;
+  demangling_t dm = demangling_new (type_name);
+  
+  if (dm == NULL)
+    return STATUS_ALLOCATION_FAILED;
+
+  /* Demangle the type name.  The demangled name is stored in dm.  */
+  status = result_push (dm);
+  if (status != STATUS_OK)
+    {
+      demangling_delete (dm);
+      return status;
+    }
+
+  status = demangle_type (dm);
+
+  if (STATUS_NO_ERROR (status))
+    {
+      /* The demangling succeeded.  Pop the result out of dm and copy
+	 it into RESULT.  */
+      dyn_string_t demangled = (dyn_string_t) result_pop (dm);
+      if (!dyn_string_copy (result, demangled))
+	return STATUS_ALLOCATION_FAILED;
+      dyn_string_delete (demangled);
+    }
+
+  /* Clean up.  */
+  demangling_delete (dm);
+
+  return status;
+}
+
+extern char *__cxa_demangle PARAMS ((const char *, char *, size_t *, int *));
+
+/* ia64 ABI-mandated entry point in the C++ runtime library for performing
+   demangling.  MANGLED_NAME is a NUL-terminated character string
+   containing the name to be demangled.  
+
+   OUTPUT_BUFFER is a region of memory, allocated with malloc, of
+   *LENGTH bytes, into which the demangled name is stored.  If
+   OUTPUT_BUFFER is not long enough, it is expanded using realloc.
+   OUTPUT_BUFFER may instead be NULL; in that case, the demangled name
+   is placed in a region of memory allocated with malloc.  
+
+   If LENGTH is non-NULL, the length of the buffer conaining the
+   demangled name, is placed in *LENGTH.  
+
+   The return value is a pointer to the start of the NUL-terminated
+   demangled name, or NULL if the demangling fails.  The caller is
+   responsible for deallocating this memory using free.  
+
+   *STATUS is set to one of the following values:
+      0: The demangling operation succeeded.
+     -1: A memory allocation failiure occurred.
+     -2: MANGLED_NAME is not a valid name under the C++ ABI mangling rules.
+     -3: One of the arguments is invalid.
+
+   The demagling is performed using the C++ ABI mangling rules, with
+   GNU extensions.  */
+
+char *
+__cxa_demangle (mangled_name, output_buffer, length, status)
+     const char *mangled_name;
+     char *output_buffer;
+     size_t *length;
+     int *status;
+{
+  struct dyn_string demangled_name;
+  status_t result;
+
+  if (status == NULL)
+    return NULL;
+
+  if (mangled_name == NULL) {
+    *status = -3;
+    return NULL;
+  }
+
+  /* Did the caller provide a buffer for the demangled name?  */
+  if (output_buffer == NULL) {
+    /* No; dyn_string will malloc a buffer for us.  */
+    if (!dyn_string_init (&demangled_name, 0)) 
+      {
+	*status = -1;
+	return NULL;
+      }
+  }
+  else {
+    /* Yes.  Check that the length was provided.  */
+    if (length == NULL) {
+      *status = -3;
+      return NULL;
+    }
+    /* Install the buffer into a dyn_string.  */
+    demangled_name.allocated = *length;
+    demangled_name.length = 0;
+    demangled_name.s = output_buffer;
+  }
+
+  if (mangled_name[0] == '_' && mangled_name[1] == 'Z')
+    /* MANGLED_NAME apprears to be a function or variable name.
+       Demangle it accordingly.  */
+    result = cp_demangle (mangled_name, &demangled_name, 0);
+  else
+    /* Try to demangled MANGLED_NAME as the name of a type.  */
+    result = cp_demangle_type (mangled_name, &demangled_name);
+
+  if (result == STATUS_OK) 
+    /* The demangling succeeded.  */
+    {
+      /* If LENGTH isn't NULL, store the allocated buffer length
+	 there; the buffer may have been realloced by dyn_string
+	 functions.  */
+      if (length != NULL)
+	*length = demangled_name.allocated;
+      /* The operation was a success.  */
+      *status = 0;
+      return dyn_string_buf (&demangled_name);
+    }
+  else if (result == STATUS_ALLOCATION_FAILED)
+    /* A call to malloc or realloc failed during the demangling
+       operation.  */
+    {
+      *status = -1;
+      return NULL;
+    }
+  else
+    /* The demangling failed for another reason, most probably because
+       MANGLED_NAME isn't a valid mangled name.  */
+    {
+      /* If the buffer containing the demangled name wasn't provided
+	 by the caller, free it.  */
+      if (output_buffer == NULL)
+	free (dyn_string_buf (&demangled_name));
+      *status = -2;
+      return NULL;
+    }
+}
+
+#else /* !IN_LIBGCC2 */
+
+/* Variant entry point for integration with the existing cplus-dem
+   demangler.  Attempts to demangle MANGLED.  If the demangling
+   succeeds, returns a buffer, allocated with malloc, containing the
+   demangled name.  The caller must deallocate the buffer using free.
+   If the demangling failes, returns NULL.  */
+
+char *
+VG_(cplus_demangle_v3) (mangled)
+     const char* mangled;
+{
+  dyn_string_t demangled;
+  status_t status;
+
+  /* If this isn't a mangled name, don't pretend to demangle it.  */
+  if (VG_(strncmp) (mangled, "_Z", 2) != 0)
+    return NULL;
+
+  /* Create a dyn_string to hold the demangled name.  */
+  demangled = dyn_string_new (0);
+  /* Attempt the demangling.  */
+  status = cp_demangle ((char *) mangled, demangled, 0);
+
+  if (STATUS_NO_ERROR (status))
+    /* Demangling succeeded.  */
+    {
+      /* Grab the demangled result from the dyn_string.  It was
+	 allocated with malloc, so we can return it directly.  */
+      char *return_value = dyn_string_release (demangled);
+      /* Hand back the demangled name.  */
+      return return_value;
+    }
+  else if (status == STATUS_ALLOCATION_FAILED)
+    {
+	vg_assert (0);
+	/*
+      fprintf (stderr, "Memory allocation failed.\n");
+      abort ();
+      */
+    }
+  else
+    /* Demangling failed.  */
+    {
+      dyn_string_delete (demangled);
+      return NULL;
+    }
+}
+
+/* Demangle a Java symbol.  Java uses a subset of the V3 ABI C++ mangling 
+   conventions, but the output formatting is a little different.
+   This instructs the C++ demangler not to emit pointer characters ("*"), and 
+   to use Java's namespace separator symbol ("." instead of "::").  It then 
+   does an additional pass over the demangled output to replace instances 
+   of JArray<TYPE> with TYPE[].  */
+
+char *
+VG_(java_demangle_v3) (mangled)
+     const char* mangled;
+{
+  dyn_string_t demangled;
+  char *next;
+  char *end;
+  int len;
+  status_t status;
+  int nesting = 0;
+  char *cplus_demangled;
+  char *return_value;
+    
+  /* Create a dyn_string to hold the demangled name.  */
+  demangled = dyn_string_new (0);
+
+  /* Attempt the demangling.  */
+  status = cp_demangle ((char *) mangled, demangled, DMGL_JAVA);
+
+  if (STATUS_NO_ERROR (status))
+    /* Demangling succeeded.  */
+    {
+      /* Grab the demangled result from the dyn_string. */
+      cplus_demangled = dyn_string_release (demangled);
+    }
+  else if (status == STATUS_ALLOCATION_FAILED)
+    {
+	vg_assert (0);
+	/*
+      fprintf (stderr, "Memory allocation failed.\n");
+      abort ();
+      */
+    }
+  else
+    /* Demangling failed.  */
+    {
+      dyn_string_delete (demangled);
+      return NULL;
+    }
+  
+  len = VG_(strlen) (cplus_demangled);
+  next = cplus_demangled;
+  end = next + len;
+  demangled = NULL;
+
+  /* Replace occurances of JArray<TYPE> with TYPE[]. */
+  while (next < end)
+    {
+      char *open_str = VG_(strstr) (next, "JArray<");
+      char *close_str = NULL;
+      if (nesting > 0)
+	close_str = VG_(strchr) (next, '>');
+    
+      if (open_str != NULL && (close_str == NULL || close_str > open_str))
+        {
+	  ++nesting;
+	  
+	  if (!demangled)
+	    demangled = dyn_string_new(len);
+
+          /* Copy prepending symbols, if any. */
+	  if (open_str > next)
+	    {
+	      open_str[0] = 0;
+	      dyn_string_append_cstr (demangled, next);
+	    }	  
+	  next = open_str + 7;
+	}
+      else if (close_str != NULL)
+        {
+	  --nesting;
+	  
+          /* Copy prepending type symbol, if any. Squash any spurious 
+	     whitespace. */
+	  if (close_str > next && next[0] != ' ')
+	    {
+	      close_str[0] = 0;
+	      dyn_string_append_cstr (demangled, next);
+	    }
+	  dyn_string_append_cstr (demangled, "[]");	  
+	  next = close_str + 1;
+	}
+      else
+        {
+	  /* There are no more arrays. Copy the rest of the symbol, or
+	     simply return the original symbol if no changes were made. */
+	  if (next == cplus_demangled)
+	    return cplus_demangled;
+
+          dyn_string_append_cstr (demangled, next);
+	  next = end;
+	}
+    }
+
+  free (cplus_demangled);
+  
+  return_value = dyn_string_release (demangled);
+  return return_value;
+}
+
+#endif /* IN_LIBGCC2 */
+
+
+/* Demangle NAME in the G++ V3 ABI demangling style, and return either
+   zero, indicating that some error occurred, or a demangling_t
+   holding the results.  */
+static demangling_t
+demangle_v3_with_details (name)
+     const char *name;
+{
+  demangling_t dm;
+  status_t status;
+
+  if (VG_(strncmp) (name, "_Z", 2))
+    return 0;
+
+  dm = demangling_new (name, DMGL_GNU_V3);
+  if (dm == NULL)
+    {
+	vg_assert (0);
+	/*
+      fprintf (stderr, "Memory allocation failed.\n");
+      abort ();
+      */
+    }
+
+  status = result_push (dm);
+  if (! STATUS_NO_ERROR (status))
+    {
+      demangling_delete (dm);
+      vg_assert (0);
+      /*
+      fprintf (stderr, "%s\n", status);
+      abort ();
+      */
+    }
+
+  status = demangle_mangled_name (dm);
+  if (STATUS_NO_ERROR (status))
+    return dm;
+
+  demangling_delete (dm);
+  return 0;
+}
+
+
+/* Return non-zero iff NAME is the mangled form of a constructor name
+   in the G++ V3 ABI demangling style.  Specifically, return:
+   - '1' if NAME is a complete object constructor,
+   - '2' if NAME is a base object constructor, or
+   - '3' if NAME is a complete object allocating constructor.  */
+/*
+enum gnu_v3_ctor_kinds
+is_gnu_v3_mangled_ctor (name)
+     const char *name;
+{
+  demangling_t dm = demangle_v3_with_details (name);
+
+  if (dm)
+    {
+      enum gnu_v3_ctor_kinds result = dm->is_constructor;
+      demangling_delete (dm);
+      return result;
+    }
+  else
+    return 0;
+}
+*/
+
+
+/* Return non-zero iff NAME is the mangled form of a destructor name
+   in the G++ V3 ABI demangling style.  Specifically, return:
+   - '0' if NAME is a deleting destructor,
+   - '1' if NAME is a complete object destructor, or
+   - '2' if NAME is a base object destructor.  */
+/*
+enum gnu_v3_dtor_kinds
+is_gnu_v3_mangled_dtor (name)
+     const char *name;
+{
+  demangling_t dm = demangle_v3_with_details (name);
+
+  if (dm)
+    {
+      enum gnu_v3_dtor_kinds result = dm->is_destructor;
+      demangling_delete (dm);
+      return result;
+    }
+  else
+    return 0;
+}
+*/
+
+#ifdef STANDALONE_DEMANGLER
+
+#include "getopt.h"
+
+static void print_usage
+  PARAMS ((FILE* fp, int exit_value));
+
+/* Non-zero if CHAR is a character than can occur in a mangled name.  */
+#define is_mangled_char(CHAR)                                           \
+  (IS_ALPHA (CHAR) || IS_DIGIT (CHAR)                                   \
+   || (CHAR) == '_' || (CHAR) == '.' || (CHAR) == '$')
+
+/* The name of this program, as invoked.  */
+const char* program_name;
+
+/* Prints usage summary to FP and then exits with EXIT_VALUE.  */
+
+static void
+print_usage (fp, exit_value)
+     FILE* fp;
+     int exit_value;
+{
+  fprintf (fp, "Usage: %s [options] [names ...]\n", program_name);
+  fprintf (fp, "Options:\n");
+  fprintf (fp, "  -h,--help       Display this message.\n");
+  fprintf (fp, "  -s,--strict     Demangle standard names only.\n");
+  fprintf (fp, "  -v,--verbose    Produce verbose demanglings.\n");
+  fprintf (fp, "If names are provided, they are demangled.  Otherwise filters standard input.\n");
+
+  exit (exit_value);
+}
+
+/* Option specification for getopt_long.  */
+static const struct option long_options[] = 
+{
+  { "help",    no_argument, NULL, 'h' },
+  { "strict",  no_argument, NULL, 's' },
+  { "verbose", no_argument, NULL, 'v' },
+  { NULL,      no_argument, NULL, 0   },
+};
+
+/* Main entry for a demangling filter executable.  It will demangle
+   its command line arguments, if any.  If none are provided, it will
+   filter stdin to stdout, replacing any recognized mangled C++ names
+   with their demangled equivalents.  */
+
+int
+main (argc, argv)
+     int argc;
+     char *argv[];
+{
+  status_t status;
+  int i;
+  int opt_char;
+
+  /* Use the program name of this program, as invoked.  */
+  program_name = argv[0];
+
+  /* Parse options.  */
+  do 
+    {
+      opt_char = getopt_long (argc, argv, "hsv", long_options, NULL);
+      switch (opt_char)
+	{
+	case '?':  /* Unrecognized option.  */
+	  print_usage (stderr, 1);
+	  break;
+
+	case 'h':
+	  print_usage (stdout, 0);
+	  break;
+
+	case 's':
+	  flag_strict = 1;
+	  break;
+
+	case 'v':
+	  flag_verbose = 1;
+	  break;
+	}
+    }
+  while (opt_char != -1);
+
+  if (optind == argc) 
+    /* No command line arguments were provided.  Filter stdin.  */
+    {
+      dyn_string_t mangled = dyn_string_new (3);
+      dyn_string_t demangled = dyn_string_new (0);
+      status_t status;
+
+      /* Read all of input.  */
+      while (!feof (stdin))
+	{
+	  char c = getchar ();
+
+	  /* The first character of a mangled name is an underscore.  */
+	  if (feof (stdin))
+	    break;
+	  if (c != '_')
+	    {
+	      /* It's not a mangled name.  Print the character and go
+		 on.  */
+	      putchar (c);
+	      continue;
+	    }
+	  c = getchar ();
+	  
+	  /* The second character of a mangled name is a capital `Z'.  */
+	  if (feof (stdin))
+	    break;
+	  if (c != 'Z')
+	    {
+	      /* It's not a mangled name.  Print the previous
+		 underscore, the `Z', and go on.  */
+	      putchar ('_');
+	      putchar (c);
+	      continue;
+	    }
+
+	  /* Start keeping track of the candidate mangled name.  */
+	  dyn_string_append_char (mangled, '_');
+	  dyn_string_append_char (mangled, 'Z');
+
+	  /* Pile characters into mangled until we hit one that can't
+	     occur in a mangled name.  */
+	  c = getchar ();
+	  while (!feof (stdin) && is_mangled_char (c))
+	    {
+	      dyn_string_append_char (mangled, c);
+	      if (feof (stdin))
+		break;
+	      c = getchar ();
+	    }
+
+	  /* Attempt to demangle the name.  */
+	  status = cp_demangle (dyn_string_buf (mangled), demangled, 0);
+
+	  /* If the demangling succeeded, great!  Print out the
+	     demangled version.  */
+	  if (STATUS_NO_ERROR (status))
+	    fputs (dyn_string_buf (demangled), stdout);
+	  /* Abort on allocation failures.  */
+	  else if (status == STATUS_ALLOCATION_FAILED)
+	    {
+	      fprintf (stderr, "Memory allocation failed.\n");
+	      abort ();
+	    }
+	  /* Otherwise, it might not have been a mangled name.  Just
+	     print out the original text.  */
+	  else
+	    fputs (dyn_string_buf (mangled), stdout);
+
+	  /* If we haven't hit EOF yet, we've read one character that
+	     can't occur in a mangled name, so print it out.  */
+	  if (!feof (stdin))
+	    putchar (c);
+
+	  /* Clear the candidate mangled name, to start afresh next
+	     time we hit a `_Z'.  */
+	  dyn_string_clear (mangled);
+	}
+
+      dyn_string_delete (mangled);
+      dyn_string_delete (demangled);
+    }
+  else
+    /* Demangle command line arguments.  */
+    {
+      dyn_string_t result = dyn_string_new (0);
+
+      /* Loop over command line arguments.  */
+      for (i = optind; i < argc; ++i)
+	{
+	  /* Attempt to demangle.  */
+	  status = cp_demangle (argv[i], result, 0);
+
+	  /* If it worked, print the demangled name.  */
+	  if (STATUS_NO_ERROR (status))
+	    printf ("%s\n", dyn_string_buf (result));
+	  /* Abort on allocaiton failures.  */
+	  else if (status == STATUS_ALLOCATION_FAILED)
+	    {
+	      fprintf (stderr, "Memory allocation failed.\n");
+	      abort ();
+	    }
+	  /* If not, print the error message to stderr instead.  */
+	  else 
+	    fprintf (stderr, "%s\n", status);
+	}
+      dyn_string_delete (result);
+    }
+
+  return 0;
+}
+
+#endif /* STANDALONE_DEMANGLER */
diff --git a/demangle/cplus-dem.c b/demangle/cplus-dem.c
new file mode 100644
index 000000000..56c326139
--- /dev/null
+++ b/demangle/cplus-dem.c
@@ -0,0 +1,5264 @@
+/* Demangler for GNU C++
+   Copyright 1989, 1991, 1994, 1995, 1996, 1997, 1998, 1999,
+   2000, 2001 Free Software Foundation, Inc.
+   Written by James Clark (jjc@jclark.uucp)
+   Rewritten by Fred Fish (fnf@cygnus.com) for ARM and Lucid demangling
+   Modified by Satish Pai (pai@apollo.hp.com) for HP demangling
+
+This file is part of the libiberty library.
+Libiberty is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public
+License as published by the Free Software Foundation; either
+version 2 of the License, or (at your option) any later version.
+
+Libiberty is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with libiberty; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* This file exports two functions; cplus_mangle_opname and cplus_demangle.
+
+   This file imports xmalloc and xrealloc, which are like malloc and
+   realloc except that they generate a fatal error if there is no
+   available memory.  */
+
+/* This file lives in both GCC and libiberty.  When making changes, please
+   try not to break either.  */
+
+#define __NO_STRING_INLINES
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "safe-ctype.h"
+#include "vg_include.h"
+
+#include <sys/types.h>
+#include <string.h>
+#include <stdio.h>
+
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#else
+char * malloc ();
+char * realloc ();
+#endif
+
+#include <demangle.h>
+#include "dyn-string.h"
+#undef CURRENT_DEMANGLING_STYLE
+#define CURRENT_DEMANGLING_STYLE work->options
+
+/*#include "libiberty.h"*/
+
+static char *ada_demangle  PARAMS ((const char *, int));
+
+#define min(X,Y) (((X) < (Y)) ? (X) : (Y))
+
+/* A value at least one greater than the maximum number of characters
+   that will be output when using the `%d' format with `printf'.  */
+#define INTBUF_SIZE 32
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
+#endif
+
+#ifndef STANDALONE
+#define xstrdup(ptr) VG_(strdup)(VG_AR_DEMANGLE, ptr)
+#define free(ptr) VG_(free)(VG_AR_DEMANGLE, ptr)
+#define xmalloc(size) VG_(malloc)(VG_AR_DEMANGLE, size)
+#define xrealloc(ptr, size) VG_(realloc)(VG_AR_DEMANGLE, ptr, size)
+#define abort() vg_assert(0)
+#undef strstr
+#define strstr VG_(strstr)
+#define sprintf VG_(sprintf)
+#define strncpy VG_(strncpy)
+#define strncat VG_(strncat)
+#define strchr VG_(strchr)
+#define strpbrk VG_(strpbrk)
+#endif
+
+extern void fancy_abort PARAMS ((void)) ATTRIBUTE_NORETURN;
+
+/* In order to allow a single demangler executable to demangle strings
+   using various common values of CPLUS_MARKER, as well as any specific
+   one set at compile time, we maintain a string containing all the
+   commonly used ones, and check to see if the marker we are looking for
+   is in that string.  CPLUS_MARKER is usually '$' on systems where the
+   assembler can deal with that.  Where the assembler can't, it's usually
+   '.' (but on many systems '.' is used for other things).  We put the
+   current defined CPLUS_MARKER first (which defaults to '$'), followed
+   by the next most common value, followed by an explicit '$' in case
+   the value of CPLUS_MARKER is not '$'.
+
+   We could avoid this if we could just get g++ to tell us what the actual
+   cplus marker character is as part of the debug information, perhaps by
+   ensuring that it is the character that terminates the gcc<n>_compiled
+   marker symbol (FIXME).  */
+
+#if !defined (CPLUS_MARKER)
+#define CPLUS_MARKER '$'
+#endif
+
+enum demangling_styles current_demangling_style = auto_demangling;
+
+static char cplus_markers[] = { CPLUS_MARKER, '.', '$', '\0' };
+
+static char char_str[2] = { '\000', '\000' };
+
+/*
+void
+set_cplus_marker_for_demangling (ch)
+     int ch;
+{
+  cplus_markers[0] = ch;
+}
+*/
+
+typedef struct string		/* Beware: these aren't required to be */
+{				/*  '\0' terminated.  */
+  char *b;			/* pointer to start of string */
+  char *p;			/* pointer after last character */
+  char *e;			/* pointer after end of allocated space */
+} string;
+
+/* Stuff that is shared between sub-routines.
+   Using a shared structure allows cplus_demangle to be reentrant.  */
+
+struct work_stuff
+{
+  int options;
+  char **typevec;
+  char **ktypevec;
+  char **btypevec;
+  int numk;
+  int numb;
+  int ksize;
+  int bsize;
+  int ntypes;
+  int typevec_size;
+  int constructor;
+  int destructor;
+  int static_type;	/* A static member function */
+  int temp_start;       /* index in demangled to start of template args */
+  int type_quals;       /* The type qualifiers.  */
+  int dllimported;	/* Symbol imported from a PE DLL */
+  char **tmpl_argvec;   /* Template function arguments. */
+  int ntmpl_args;       /* The number of template function arguments. */
+  int forgetting_types; /* Nonzero if we are not remembering the types
+			   we see.  */
+  string* previous_argument; /* The last function argument demangled.  */
+  int nrepeats;         /* The number of times to repeat the previous
+			   argument.  */
+};
+
+#define PRINT_ANSI_QUALIFIERS (work -> options & DMGL_ANSI)
+#define PRINT_ARG_TYPES       (work -> options & DMGL_PARAMS)
+
+static const struct optable
+{
+  const char *const in;
+  const char *const out;
+  const int flags;
+} optable[] = {
+  {"nw",	  " new",	DMGL_ANSI},	/* new (1.92,	 ansi) */
+  {"dl",	  " delete",	DMGL_ANSI},	/* new (1.92,	 ansi) */
+  {"new",	  " new",	0},		/* old (1.91,	 and 1.x) */
+  {"delete",	  " delete",	0},		/* old (1.91,	 and 1.x) */
+  {"vn",	  " new []",	DMGL_ANSI},	/* GNU, pending ansi */
+  {"vd",	  " delete []",	DMGL_ANSI},	/* GNU, pending ansi */
+  {"as",	  "=",		DMGL_ANSI},	/* ansi */
+  {"ne",	  "!=",		DMGL_ANSI},	/* old, ansi */
+  {"eq",	  "==",		DMGL_ANSI},	/* old,	ansi */
+  {"ge",	  ">=",		DMGL_ANSI},	/* old,	ansi */
+  {"gt",	  ">",		DMGL_ANSI},	/* old,	ansi */
+  {"le",	  "<=",		DMGL_ANSI},	/* old,	ansi */
+  {"lt",	  "<",		DMGL_ANSI},	/* old,	ansi */
+  {"plus",	  "+",		0},		/* old */
+  {"pl",	  "+",		DMGL_ANSI},	/* ansi */
+  {"apl",	  "+=",		DMGL_ANSI},	/* ansi */
+  {"minus",	  "-",		0},		/* old */
+  {"mi",	  "-",		DMGL_ANSI},	/* ansi */
+  {"ami",	  "-=",		DMGL_ANSI},	/* ansi */
+  {"mult",	  "*",		0},		/* old */
+  {"ml",	  "*",		DMGL_ANSI},	/* ansi */
+  {"amu",	  "*=",		DMGL_ANSI},	/* ansi (ARM/Lucid) */
+  {"aml",	  "*=",		DMGL_ANSI},	/* ansi (GNU/g++) */
+  {"convert",	  "+",		0},		/* old (unary +) */
+  {"negate",	  "-",		0},		/* old (unary -) */
+  {"trunc_mod",	  "%",		0},		/* old */
+  {"md",	  "%",		DMGL_ANSI},	/* ansi */
+  {"amd",	  "%=",		DMGL_ANSI},	/* ansi */
+  {"trunc_div",	  "/",		0},		/* old */
+  {"dv",	  "/",		DMGL_ANSI},	/* ansi */
+  {"adv",	  "/=",		DMGL_ANSI},	/* ansi */
+  {"truth_andif", "&&",		0},		/* old */
+  {"aa",	  "&&",		DMGL_ANSI},	/* ansi */
+  {"truth_orif",  "||",		0},		/* old */
+  {"oo",	  "||",		DMGL_ANSI},	/* ansi */
+  {"truth_not",	  "!",		0},		/* old */
+  {"nt",	  "!",		DMGL_ANSI},	/* ansi */
+  {"postincrement","++",	0},		/* old */
+  {"pp",	  "++",		DMGL_ANSI},	/* ansi */
+  {"postdecrement","--",	0},		/* old */
+  {"mm",	  "--",		DMGL_ANSI},	/* ansi */
+  {"bit_ior",	  "|",		0},		/* old */
+  {"or",	  "|",		DMGL_ANSI},	/* ansi */
+  {"aor",	  "|=",		DMGL_ANSI},	/* ansi */
+  {"bit_xor",	  "^",		0},		/* old */
+  {"er",	  "^",		DMGL_ANSI},	/* ansi */
+  {"aer",	  "^=",		DMGL_ANSI},	/* ansi */
+  {"bit_and",	  "&",		0},		/* old */
+  {"ad",	  "&",		DMGL_ANSI},	/* ansi */
+  {"aad",	  "&=",		DMGL_ANSI},	/* ansi */
+  {"bit_not",	  "~",		0},		/* old */
+  {"co",	  "~",		DMGL_ANSI},	/* ansi */
+  {"call",	  "()",		0},		/* old */
+  {"cl",	  "()",		DMGL_ANSI},	/* ansi */
+  {"alshift",	  "<<",		0},		/* old */
+  {"ls",	  "<<",		DMGL_ANSI},	/* ansi */
+  {"als",	  "<<=",	DMGL_ANSI},	/* ansi */
+  {"arshift",	  ">>",		0},		/* old */
+  {"rs",	  ">>",		DMGL_ANSI},	/* ansi */
+  {"ars",	  ">>=",	DMGL_ANSI},	/* ansi */
+  {"component",	  "->",		0},		/* old */
+  {"pt",	  "->",		DMGL_ANSI},	/* ansi; Lucid C++ form */
+  {"rf",	  "->",		DMGL_ANSI},	/* ansi; ARM/GNU form */
+  {"indirect",	  "*",		0},		/* old */
+  {"method_call",  "->()",	0},		/* old */
+  {"addr",	  "&",		0},		/* old (unary &) */
+  {"array",	  "[]",		0},		/* old */
+  {"vc",	  "[]",		DMGL_ANSI},	/* ansi */
+  {"compound",	  ", ",		0},		/* old */
+  {"cm",	  ", ",		DMGL_ANSI},	/* ansi */
+  {"cond",	  "?:",		0},		/* old */
+  {"cn",	  "?:",		DMGL_ANSI},	/* pseudo-ansi */
+  {"max",	  ">?",		0},		/* old */
+  {"mx",	  ">?",		DMGL_ANSI},	/* pseudo-ansi */
+  {"min",	  "<?",		0},		/* old */
+  {"mn",	  "<?",		DMGL_ANSI},	/* pseudo-ansi */
+  {"nop",	  "",		0},		/* old (for operator=) */
+  {"rm",	  "->*",	DMGL_ANSI},	/* ansi */
+  {"sz",          "sizeof ",    DMGL_ANSI}      /* pseudo-ansi */
+};
+
+/* These values are used to indicate the various type varieties.
+   They are all non-zero so that they can be used as `success'
+   values.  */
+typedef enum type_kind_t
+{
+  tk_none,
+  tk_pointer,
+  tk_reference,
+  tk_integral,
+  tk_bool,
+  tk_char,
+  tk_real
+} type_kind_t;
+
+const struct demangler_engine libiberty_demanglers[] =
+{
+  {
+    NO_DEMANGLING_STYLE_STRING,
+    no_demangling,
+    "Demangling disabled"
+  }
+  ,
+  {
+    AUTO_DEMANGLING_STYLE_STRING,
+      auto_demangling,
+      "Automatic selection based on executable"
+  }
+  ,
+  {
+    GNU_DEMANGLING_STYLE_STRING,
+      gnu_demangling,
+      "GNU (g++) style demangling"
+  }
+  ,
+  {
+    LUCID_DEMANGLING_STYLE_STRING,
+      lucid_demangling,
+      "Lucid (lcc) style demangling"
+  }
+  ,
+  {
+    ARM_DEMANGLING_STYLE_STRING,
+      arm_demangling,
+      "ARM style demangling"
+  }
+  ,
+  {
+    HP_DEMANGLING_STYLE_STRING,
+      hp_demangling,
+      "HP (aCC) style demangling"
+  }
+  ,
+  {
+    EDG_DEMANGLING_STYLE_STRING,
+      edg_demangling,
+      "EDG style demangling"
+  }
+  ,
+  {
+    GNU_V3_DEMANGLING_STYLE_STRING,
+    gnu_v3_demangling,
+    "GNU (g++) V3 ABI-style demangling"
+  }
+  ,
+  {
+    JAVA_DEMANGLING_STYLE_STRING,
+    java_demangling,
+    "Java style demangling"
+  }
+  ,
+  {
+    GNAT_DEMANGLING_STYLE_STRING,
+    gnat_demangling,
+    "GNAT style demangling"
+  }
+  ,
+  {
+    NULL, unknown_demangling, NULL
+  }
+};
+
+#define STRING_EMPTY(str)	((str) -> b == (str) -> p)
+#define PREPEND_BLANK(str)	{if (!STRING_EMPTY(str)) \
+    string_prepend(str, " ");}
+#define APPEND_BLANK(str)	{if (!STRING_EMPTY(str)) \
+    string_append(str, " ");}
+#define LEN_STRING(str)         ( (STRING_EMPTY(str))?0:((str)->p - (str)->b))
+
+/* The scope separator appropriate for the language being demangled.  */
+
+#define SCOPE_STRING(work) ((work->options & DMGL_JAVA) ? "." : "::")
+
+#define ARM_VTABLE_STRING "__vtbl__"	/* Lucid/ARM virtual table prefix */
+#define ARM_VTABLE_STRLEN 8		/* strlen (ARM_VTABLE_STRING) */
+
+/* Prototypes for local functions */
+
+static void
+delete_work_stuff PARAMS ((struct work_stuff *));
+
+static void
+delete_non_B_K_work_stuff PARAMS ((struct work_stuff *));
+
+static char *
+mop_up PARAMS ((struct work_stuff *, string *, int));
+
+static void
+squangle_mop_up PARAMS ((struct work_stuff *));
+
+static void
+work_stuff_copy_to_from PARAMS ((struct work_stuff *, struct work_stuff *));
+
+#if 0
+static int
+demangle_method_args PARAMS ((struct work_stuff *, const char **, string *));
+#endif
+
+static char *
+internal_cplus_demangle PARAMS ((struct work_stuff *, const char *));
+
+static int
+demangle_template_template_parm PARAMS ((struct work_stuff *work,
+					 const char **, string *));
+
+static int
+demangle_template PARAMS ((struct work_stuff *work, const char **, string *,
+			   string *, int, int));
+
+static int
+arm_pt PARAMS ((struct work_stuff *, const char *, int, const char **,
+		const char **));
+
+static int
+demangle_class_name PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+demangle_qualified PARAMS ((struct work_stuff *, const char **, string *,
+			    int, int));
+
+static int
+demangle_class PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+demangle_fund_type PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+demangle_signature PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+demangle_prefix PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+gnu_special PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+arm_special PARAMS ((const char **, string *));
+
+static void
+string_need PARAMS ((string *, int));
+
+static void
+string_delete PARAMS ((string *));
+
+static void
+string_init PARAMS ((string *));
+
+static void
+string_clear PARAMS ((string *));
+
+#if 0
+static int
+string_empty PARAMS ((string *));
+#endif
+
+static void
+string_append PARAMS ((string *, const char *));
+
+static void
+string_appends PARAMS ((string *, string *));
+
+static void
+string_appendn PARAMS ((string *, const char *, int));
+
+static void
+string_prepend PARAMS ((string *, const char *));
+
+static void
+string_prependn PARAMS ((string *, const char *, int));
+
+static void
+string_append_template_idx PARAMS ((string *, int));
+
+static int
+get_count PARAMS ((const char **, int *));
+
+static int
+consume_count PARAMS ((const char **));
+
+static int
+consume_count_with_underscores PARAMS ((const char**));
+
+static int
+demangle_args PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+demangle_nested_args PARAMS ((struct work_stuff*, const char**, string*));
+
+static int
+do_type PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+do_arg PARAMS ((struct work_stuff *, const char **, string *));
+
+static void
+demangle_function_name PARAMS ((struct work_stuff *, const char **, string *,
+				const char *));
+
+static int
+iterate_demangle_function PARAMS ((struct work_stuff *,
+				   const char **, string *, const char *));
+
+static void
+remember_type PARAMS ((struct work_stuff *, const char *, int));
+
+static void
+remember_Btype PARAMS ((struct work_stuff *, const char *, int, int));
+
+static int
+register_Btype PARAMS ((struct work_stuff *));
+
+static void
+remember_Ktype PARAMS ((struct work_stuff *, const char *, int));
+
+static void
+forget_types PARAMS ((struct work_stuff *));
+
+static void
+forget_B_and_K_types PARAMS ((struct work_stuff *));
+
+static void
+string_prepends PARAMS ((string *, string *));
+
+static int
+demangle_template_value_parm PARAMS ((struct work_stuff*, const char**,
+				      string*, type_kind_t));
+
+static int
+do_hpacc_template_const_value PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+do_hpacc_template_literal PARAMS ((struct work_stuff *, const char **, string *));
+
+static int
+snarf_numeric_literal PARAMS ((const char **, string *));
+
+/* There is a TYPE_QUAL value for each type qualifier.  They can be
+   combined by bitwise-or to form the complete set of qualifiers for a
+   type.  */
+
+#define TYPE_UNQUALIFIED   0x0
+#define TYPE_QUAL_CONST    0x1
+#define TYPE_QUAL_VOLATILE 0x2
+#define TYPE_QUAL_RESTRICT 0x4
+
+static int
+code_for_qualifier PARAMS ((int));
+
+static const char*
+qualifier_string PARAMS ((int));
+
+static const char*
+demangle_qualifier PARAMS ((int));
+
+static int
+demangle_expression PARAMS ((struct work_stuff *, const char **, string *, 
+			     type_kind_t));
+
+static int
+demangle_integral_value PARAMS ((struct work_stuff *, const char **,
+				 string *));
+
+static int
+demangle_real_value PARAMS ((struct work_stuff *, const char **, string *));
+
+static void
+demangle_arm_hp_template PARAMS ((struct work_stuff *, const char **, int,
+				  string *));
+
+static void
+recursively_demangle PARAMS ((struct work_stuff *, const char **, string *,
+			      int));
+
+static void
+grow_vect PARAMS ((void **, size_t *, size_t, int));
+
+/* Translate count to integer, consuming tokens in the process.
+   Conversion terminates on the first non-digit character.
+
+   Trying to consume something that isn't a count results in no
+   consumption of input and a return of -1.
+
+   Overflow consumes the rest of the digits, and returns -1.  */
+
+static int
+consume_count (type)
+     const char **type;
+{
+  int count = 0;
+
+  if (! ISDIGIT ((unsigned char)**type))
+    return -1;
+
+  while (ISDIGIT ((unsigned char)**type))
+    {
+      count *= 10;
+
+      /* Check for overflow.
+	 We assume that count is represented using two's-complement;
+	 no power of two is divisible by ten, so if an overflow occurs
+	 when multiplying by ten, the result will not be a multiple of
+	 ten.  */
+      if ((count % 10) != 0)
+	{
+	  while (ISDIGIT ((unsigned char) **type))
+	    (*type)++;
+	  return -1;
+	}
+
+      count += **type - '0';
+      (*type)++;
+    }
+
+  if (count < 0)
+    count = -1;
+
+  return (count);
+}
+
+
+/* Like consume_count, but for counts that are preceded and followed
+   by '_' if they are greater than 10.  Also, -1 is returned for
+   failure, since 0 can be a valid value.  */
+
+static int
+consume_count_with_underscores (mangled)
+     const char **mangled;
+{
+  int idx;
+
+  if (**mangled == '_')
+    {
+      (*mangled)++;
+      if (!ISDIGIT ((unsigned char)**mangled))
+	return -1;
+
+      idx = consume_count (mangled);
+      if (**mangled != '_')
+	/* The trailing underscore was missing. */
+	return -1;
+
+      (*mangled)++;
+    }
+  else
+    {
+      if (**mangled < '0' || **mangled > '9')
+	return -1;
+
+      idx = **mangled - '0';
+      (*mangled)++;
+    }
+
+  return idx;
+}
+
+/* C is the code for a type-qualifier.  Return the TYPE_QUAL
+   corresponding to this qualifier.  */
+
+static int
+code_for_qualifier (c)
+  int c;
+{
+  switch (c)
+    {
+    case 'C':
+      return TYPE_QUAL_CONST;
+
+    case 'V':
+      return TYPE_QUAL_VOLATILE;
+
+    case 'u':
+      return TYPE_QUAL_RESTRICT;
+
+    default:
+      break;
+    }
+
+  /* C was an invalid qualifier.  */
+  abort ();
+}
+
+/* Return the string corresponding to the qualifiers given by
+   TYPE_QUALS.  */
+
+static const char*
+qualifier_string (type_quals)
+     int type_quals;
+{
+  switch (type_quals)
+    {
+    case TYPE_UNQUALIFIED:
+      return "";
+
+    case TYPE_QUAL_CONST:
+      return "const";
+
+    case TYPE_QUAL_VOLATILE:
+      return "volatile";
+
+    case TYPE_QUAL_RESTRICT:
+      return "__restrict";
+
+    case TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE:
+      return "const volatile";
+
+    case TYPE_QUAL_CONST | TYPE_QUAL_RESTRICT:
+      return "const __restrict";
+
+    case TYPE_QUAL_VOLATILE | TYPE_QUAL_RESTRICT:
+      return "volatile __restrict";
+
+    case TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE | TYPE_QUAL_RESTRICT:
+      return "const volatile __restrict";
+
+    default:
+      break;
+    }
+
+  /* TYPE_QUALS was an invalid qualifier set.  */
+  abort ();
+}
+
+/* C is the code for a type-qualifier.  Return the string
+   corresponding to this qualifier.  This function should only be
+   called with a valid qualifier code.  */
+
+static const char*
+demangle_qualifier (c)
+  int c;
+{
+  return qualifier_string (code_for_qualifier (c));
+}
+
+#if 0
+int
+cplus_demangle_opname (opname, result, options)
+     const char *opname;
+     char *result;
+     int options;
+{
+  int len, len1, ret;
+  string type;
+  struct work_stuff work[1];
+  const char *tem;
+
+  len = strlen(opname);
+  result[0] = '\0';
+  ret = 0;
+  memset ((char *) work, 0, sizeof (work));
+  work->options = options;
+
+  if (opname[0] == '_' && opname[1] == '_'
+      && opname[2] == 'o' && opname[3] == 'p')
+    {
+      /* ANSI.  */
+      /* type conversion operator.  */
+      tem = opname + 4;
+      if (do_type (work, &tem, &type))
+	{
+	  strcat (result, "operator ");
+	  strncat (result, type.b, type.p - type.b);
+	  string_delete (&type);
+	  ret = 1;
+	}
+    }
+  else if (opname[0] == '_' && opname[1] == '_'
+	   && ISLOWER((unsigned char)opname[2])
+	   && ISLOWER((unsigned char)opname[3]))
+    {
+      if (opname[4] == '\0')
+	{
+	  /* Operator.  */
+	  size_t i;
+	  for (i = 0; i < ARRAY_SIZE (optable); i++)
+	    {
+	      if (strlen (optable[i].in) == 2
+		  && memcmp (optable[i].in, opname + 2, 2) == 0)
+		{
+		  strcat (result, "operator");
+		  strcat (result, optable[i].out);
+		  ret = 1;
+		  break;
+		}
+	    }
+	}
+      else
+	{
+	  if (opname[2] == 'a' && opname[5] == '\0')
+	    {
+	      /* Assignment.  */
+	      size_t i;
+	      for (i = 0; i < ARRAY_SIZE (optable); i++)
+		{
+		  if (strlen (optable[i].in) == 3
+		      && memcmp (optable[i].in, opname + 2, 3) == 0)
+		    {
+		      strcat (result, "operator");
+		      strcat (result, optable[i].out);
+		      ret = 1;
+		      break;
+		    }
+		}
+	    }
+	}
+    }
+  else if (len >= 3
+	   && opname[0] == 'o'
+	   && opname[1] == 'p'
+	   && strchr (cplus_markers, opname[2]) != NULL)
+    {
+      /* see if it's an assignment expression */
+      if (len >= 10 /* op$assign_ */
+	  && memcmp (opname + 3, "assign_", 7) == 0)
+	{
+	  size_t i;
+	  for (i = 0; i < ARRAY_SIZE (optable); i++)
+	    {
+	      len1 = len - 10;
+	      if ((int) strlen (optable[i].in) == len1
+		  && memcmp (optable[i].in, opname + 10, len1) == 0)
+		{
+		  strcat (result, "operator");
+		  strcat (result, optable[i].out);
+		  strcat (result, "=");
+		  ret = 1;
+		  break;
+		}
+	    }
+	}
+      else
+	{
+	  size_t i;
+	  for (i = 0; i < ARRAY_SIZE (optable); i++)
+	    {
+	      len1 = len - 3;
+	      if ((int) strlen (optable[i].in) == len1
+		  && memcmp (optable[i].in, opname + 3, len1) == 0)
+		{
+		  strcat (result, "operator");
+		  strcat (result, optable[i].out);
+		  ret = 1;
+		  break;
+		}
+	    }
+	}
+    }
+  else if (len >= 5 && memcmp (opname, "type", 4) == 0
+	   && strchr (cplus_markers, opname[4]) != NULL)
+    {
+      /* type conversion operator */
+      tem = opname + 5;
+      if (do_type (work, &tem, &type))
+	{
+	  strcat (result, "operator ");
+	  strncat (result, type.b, type.p - type.b);
+	  string_delete (&type);
+	  ret = 1;
+	}
+    }
+  squangle_mop_up (work);
+  return ret;
+
+}
+#endif /* 0 */
+
+/* Takes operator name as e.g. "++" and returns mangled
+   operator name (e.g. "postincrement_expr"), or NULL if not found.
+
+   If OPTIONS & DMGL_ANSI == 1, return the ANSI name;
+   if OPTIONS & DMGL_ANSI == 0, return the old GNU name.  */
+
+/*
+const char *
+cplus_mangle_opname (opname, options)
+     const char *opname;
+     int options;
+{
+  size_t i;
+  int len;
+
+  len = strlen (opname);
+  for (i = 0; i < ARRAY_SIZE (optable); i++)
+    {
+      if ((int) strlen (optable[i].out) == len
+	  && (options & DMGL_ANSI) == (optable[i].flags & DMGL_ANSI)
+	  && memcmp (optable[i].out, opname, len) == 0)
+	return optable[i].in;
+    }
+  return (0);
+}
+*/
+
+/* Add a routine to set the demangling style to be sure it is valid and
+   allow for any demangler initialization that maybe necessary. */
+
+/*
+enum demangling_styles
+cplus_demangle_set_style (style)
+     enum demangling_styles style;
+{
+  const struct demangler_engine *demangler = libiberty_demanglers; 
+
+  for (; demangler->demangling_style != unknown_demangling; ++demangler)
+    if (style == demangler->demangling_style)
+      {
+	current_demangling_style = style;
+	return current_demangling_style;
+      }
+
+  return unknown_demangling;
+}
+*/
+
+/* Do string name to style translation */
+
+/*
+enum demangling_styles
+cplus_demangle_name_to_style (name)
+     const char *name;
+{
+  const struct demangler_engine *demangler = libiberty_demanglers; 
+
+  for (; demangler->demangling_style != unknown_demangling; ++demangler)
+    if (strcmp (name, demangler->demangling_style_name) == 0)
+      return demangler->demangling_style;
+
+  return unknown_demangling;
+}
+*/
+
+/* char *cplus_demangle (const char *mangled, int options)
+
+   If MANGLED is a mangled function name produced by GNU C++, then
+   a pointer to a @code{malloc}ed string giving a C++ representation
+   of the name will be returned; otherwise NULL will be returned.
+   It is the caller's responsibility to free the string which
+   is returned.
+
+   The OPTIONS arg may contain one or more of the following bits:
+
+   	DMGL_ANSI	ANSI qualifiers such as `const' and `void' are
+			included.
+	DMGL_PARAMS	Function parameters are included.
+
+   For example,
+
+   cplus_demangle ("foo__1Ai", DMGL_PARAMS)		=> "A::foo(int)"
+   cplus_demangle ("foo__1Ai", DMGL_PARAMS | DMGL_ANSI)	=> "A::foo(int)"
+   cplus_demangle ("foo__1Ai", 0)			=> "A::foo"
+
+   cplus_demangle ("foo__1Afe", DMGL_PARAMS)		=> "A::foo(float,...)"
+   cplus_demangle ("foo__1Afe", DMGL_PARAMS | DMGL_ANSI)=> "A::foo(float,...)"
+   cplus_demangle ("foo__1Afe", 0)			=> "A::foo"
+
+   Note that any leading underscores, or other such characters prepended by
+   the compilation system, are presumed to have already been stripped from
+   MANGLED.  */
+
+char *
+VG_(cplus_demangle) (mangled, options)
+     const char *mangled;
+     int options;
+{
+  char *ret;
+  struct work_stuff work[1];
+
+  if (current_demangling_style == no_demangling)
+    return xstrdup (mangled);
+
+  memset ((char *) work, 0, sizeof (work));
+  work->options = options;
+  if ((work->options & DMGL_STYLE_MASK) == 0)
+    work->options |= (int) current_demangling_style & DMGL_STYLE_MASK;
+
+  /* The V3 ABI demangling is implemented elsewhere.  */
+  if (GNU_V3_DEMANGLING || AUTO_DEMANGLING)
+    {
+      ret = VG_(cplus_demangle_v3) (mangled/*, work->options*/);
+      if (ret || GNU_V3_DEMANGLING)
+	return ret;
+    }
+
+  if (JAVA_DEMANGLING)
+    {
+      ret = VG_(java_demangle_v3) (mangled);
+      if (ret)
+        return ret;
+    }
+
+  if (GNAT_DEMANGLING)
+    return ada_demangle(mangled,options);
+
+  ret = internal_cplus_demangle (work, mangled);
+  squangle_mop_up (work);
+  return (ret);
+}
+
+
+/* Assuming *OLD_VECT points to an array of *SIZE objects of size
+   ELEMENT_SIZE, grow it to contain at least MIN_SIZE objects,
+   updating *OLD_VECT and *SIZE as necessary.  */
+
+static void
+grow_vect (old_vect, size, min_size, element_size)
+     void **old_vect;
+     size_t *size;
+     size_t min_size;
+     int element_size;
+{
+  if (*size < min_size)
+    {
+      *size *= 2;
+      if (*size < min_size)
+	*size = min_size;
+      *old_vect = xrealloc (*old_vect, *size * element_size);
+    }
+}
+
+/* Demangle ada names:
+   1. Discard final __{DIGIT}+ or ${DIGIT}+
+   2. Convert other instances of embedded "__" to `.'.
+   3. Discard leading _ada_.
+   4. Remove everything after first ___ if it is followed by 'X'.
+   5. Put symbols that should be suppressed in <...> brackets.
+   The resulting string is valid until the next call of ada_demangle.  */
+
+static char *
+ada_demangle (mangled, option)
+     const char *mangled;
+     int option ATTRIBUTE_UNUSED;
+{
+  int i, j;
+  int len0;
+  const char* p;
+  char *demangled = NULL;
+  int at_start_name;
+  int changed;
+  char *demangling_buffer = NULL;
+  size_t demangling_buffer_size = 0;
+  
+  changed = 0;
+
+  if (strncmp (mangled, "_ada_", 5) == 0)
+    {
+      mangled += 5;
+      changed = 1;
+    }
+  
+  if (mangled[0] == '_' || mangled[0] == '<')
+    goto Suppress;
+  
+  p = strstr (mangled, "___");
+  if (p == NULL)
+    len0 = strlen (mangled);
+  else
+    {
+      if (p[3] == 'X')
+	{
+	  len0 = p - mangled;
+	  changed = 1;
+	}
+      else
+	goto Suppress;
+    }
+  
+  /* Make demangled big enough for possible expansion by operator name.  */
+  grow_vect ((void **) &(demangling_buffer),
+	     &demangling_buffer_size,  2 * len0 + 1,
+	     sizeof (char));
+  demangled = demangling_buffer;
+  
+  if (ISDIGIT ((unsigned char) mangled[len0 - 1])) {
+    for (i = len0 - 2; i >= 0 && ISDIGIT ((unsigned char) mangled[i]); i -= 1)
+      ;
+    if (i > 1 && mangled[i] == '_' && mangled[i - 1] == '_')
+      {
+	len0 = i - 1;
+	changed = 1;
+      }
+    else if (mangled[i] == '$')
+      {
+	len0 = i;
+	changed = 1;
+      }
+  }
+  
+  for (i = 0, j = 0; i < len0 && ! ISALPHA ((unsigned char)mangled[i]);
+       i += 1, j += 1)
+    demangled[j] = mangled[i];
+  
+  at_start_name = 1;
+  while (i < len0)
+    {
+      at_start_name = 0;
+      
+      if (i < len0 - 2 && mangled[i] == '_' && mangled[i + 1] == '_')
+	{
+	  demangled[j] = '.';
+	  changed = at_start_name = 1;
+	  i += 2; j += 1;
+	}
+      else
+	{
+	  demangled[j] = mangled[i];
+	  i += 1;  j += 1;
+	}
+    }
+  demangled[j] = '\000';
+  
+  for (i = 0; demangled[i] != '\0'; i += 1)
+    if (ISUPPER ((unsigned char)demangled[i]) || demangled[i] == ' ')
+      goto Suppress;
+
+  if (! changed)
+    return NULL;
+  else
+    return demangled;
+  
+ Suppress:
+  grow_vect ((void **) &(demangling_buffer),
+	     &demangling_buffer_size,  strlen (mangled) + 3,
+	     sizeof (char));
+  demangled = demangling_buffer;
+  if (mangled[0] == '<')
+     strcpy (demangled, mangled);
+  else
+    sprintf (demangled, "<%s>", mangled);
+
+  return demangled;
+}
+
+/* This function performs most of what cplus_demangle use to do, but
+   to be able to demangle a name with a B, K or n code, we need to
+   have a longer term memory of what types have been seen. The original
+   now intializes and cleans up the squangle code info, while internal
+   calls go directly to this routine to avoid resetting that info. */
+
+static char *
+internal_cplus_demangle (work, mangled)
+     struct work_stuff *work;
+     const char *mangled;
+{
+
+  string decl;
+  int success = 0;
+  char *demangled = NULL;
+  int s1, s2, s3, s4;
+  s1 = work->constructor;
+  s2 = work->destructor;
+  s3 = work->static_type;
+  s4 = work->type_quals;
+  work->constructor = work->destructor = 0;
+  work->type_quals = TYPE_UNQUALIFIED;
+  work->dllimported = 0;
+
+  if ((mangled != NULL) && (*mangled != '\0'))
+    {
+      string_init (&decl);
+
+      /* First check to see if gnu style demangling is active and if the
+	 string to be demangled contains a CPLUS_MARKER.  If so, attempt to
+	 recognize one of the gnu special forms rather than looking for a
+	 standard prefix.  In particular, don't worry about whether there
+	 is a "__" string in the mangled string.  Consider "_$_5__foo" for
+	 example.  */
+
+      if ((AUTO_DEMANGLING || GNU_DEMANGLING))
+	{
+	  success = gnu_special (work, &mangled, &decl);
+	}
+      if (!success)
+	{
+	  success = demangle_prefix (work, &mangled, &decl);
+	}
+      if (success && (*mangled != '\0'))
+	{
+	  success = demangle_signature (work, &mangled, &decl);
+	}
+      if (work->constructor == 2)
+        {
+          string_prepend (&decl, "global constructors keyed to ");
+          work->constructor = 0;
+        }
+      else if (work->destructor == 2)
+        {
+          string_prepend (&decl, "global destructors keyed to ");
+          work->destructor = 0;
+        }
+      else if (work->dllimported == 1)
+        {
+          string_prepend (&decl, "import stub for ");
+          work->dllimported = 0;
+        }
+      demangled = mop_up (work, &decl, success);
+    }
+  work->constructor = s1;
+  work->destructor = s2;
+  work->static_type = s3;
+  work->type_quals = s4;
+  return demangled;
+}
+
+
+/* Clear out and squangling related storage */
+static void
+squangle_mop_up (work)
+     struct work_stuff *work;
+{
+  /* clean up the B and K type mangling types. */
+  forget_B_and_K_types (work);
+  if (work -> btypevec != NULL)
+    {
+      free ((char *) work -> btypevec);
+    }
+  if (work -> ktypevec != NULL)
+    {
+      free ((char *) work -> ktypevec);
+    }
+}
+
+
+/* Copy the work state and storage.  */
+
+static void
+work_stuff_copy_to_from (to, from)
+     struct work_stuff *to;
+     struct work_stuff *from;
+{
+  int i;
+
+  delete_work_stuff (to);
+
+  /* Shallow-copy scalars.  */
+  memcpy (to, from, sizeof (*to));
+
+  /* Deep-copy dynamic storage.  */
+  if (from->typevec_size)
+    to->typevec
+      = (char **) xmalloc (from->typevec_size * sizeof (to->typevec[0]));
+
+  for (i = 0; i < from->ntypes; i++)
+    {
+      int len = strlen (from->typevec[i]) + 1;
+
+      to->typevec[i] = xmalloc (len);
+      memcpy (to->typevec[i], from->typevec[i], len);
+    }
+
+  if (from->ksize)
+    to->ktypevec
+      = (char **) xmalloc (from->ksize * sizeof (to->ktypevec[0]));
+
+  for (i = 0; i < from->numk; i++)
+    {
+      int len = strlen (from->ktypevec[i]) + 1;
+
+      to->ktypevec[i] = xmalloc (len);
+      memcpy (to->ktypevec[i], from->ktypevec[i], len);
+    }
+
+  if (from->bsize)
+    to->btypevec
+      = (char **) xmalloc (from->bsize * sizeof (to->btypevec[0]));
+
+  for (i = 0; i < from->numb; i++)
+    {
+      int len = strlen (from->btypevec[i]) + 1;
+
+      to->btypevec[i] = xmalloc (len);
+      memcpy (to->btypevec[i], from->btypevec[i], len);
+    }
+
+  if (from->ntmpl_args)
+    to->tmpl_argvec
+      = xmalloc (from->ntmpl_args * sizeof (to->tmpl_argvec[0]));
+
+  for (i = 0; i < from->ntmpl_args; i++)
+    {
+      int len = strlen (from->tmpl_argvec[i]) + 1;
+
+      to->tmpl_argvec[i] = xmalloc (len);
+      memcpy (to->tmpl_argvec[i], from->tmpl_argvec[i], len);
+    }
+
+  if (from->previous_argument)
+    {
+      to->previous_argument = (string*) xmalloc (sizeof (string));
+      string_init (to->previous_argument);
+      string_appends (to->previous_argument, from->previous_argument);
+    }
+}
+
+
+/* Delete dynamic stuff in work_stuff that is not to be re-used.  */
+
+static void
+delete_non_B_K_work_stuff (work)
+     struct work_stuff *work;
+{
+  /* Discard the remembered types, if any.  */
+
+  forget_types (work);
+  if (work -> typevec != NULL)
+    {
+      free ((char *) work -> typevec);
+      work -> typevec = NULL;
+      work -> typevec_size = 0;
+    }
+  if (work->tmpl_argvec)
+    {
+      int i;
+
+      for (i = 0; i < work->ntmpl_args; i++)
+	if (work->tmpl_argvec[i])
+	  free ((char*) work->tmpl_argvec[i]);
+
+      free ((char*) work->tmpl_argvec);
+      work->tmpl_argvec = NULL;
+    }
+  if (work->previous_argument)
+    {
+      string_delete (work->previous_argument);
+      free ((char*) work->previous_argument);
+      work->previous_argument = NULL;
+    }
+}
+
+
+/* Delete all dynamic storage in work_stuff.  */
+static void
+delete_work_stuff (work)
+     struct work_stuff *work;
+{
+  delete_non_B_K_work_stuff (work);
+  squangle_mop_up (work);
+}
+
+
+/* Clear out any mangled storage */
+
+static char *
+mop_up (work, declp, success)
+     struct work_stuff *work;
+     string *declp;
+     int success;
+{
+  char *demangled = NULL;
+
+  delete_non_B_K_work_stuff (work);
+
+  /* If demangling was successful, ensure that the demangled string is null
+     terminated and return it.  Otherwise, free the demangling decl.  */
+
+  if (!success)
+    {
+      string_delete (declp);
+    }
+  else
+    {
+      string_appendn (declp, "", 1);
+      demangled = declp->b;
+    }
+  return (demangled);
+}
+
+/*
+
+LOCAL FUNCTION
+
+	demangle_signature -- demangle the signature part of a mangled name
+
+SYNOPSIS
+
+	static int
+	demangle_signature (struct work_stuff *work, const char **mangled,
+			    string *declp);
+
+DESCRIPTION
+
+	Consume and demangle the signature portion of the mangled name.
+
+	DECLP is the string where demangled output is being built.  At
+	entry it contains the demangled root name from the mangled name
+	prefix.  I.E. either a demangled operator name or the root function
+	name.  In some special cases, it may contain nothing.
+
+	*MANGLED points to the current unconsumed location in the mangled
+	name.  As tokens are consumed and demangling is performed, the
+	pointer is updated to continuously point at the next token to
+	be consumed.
+
+	Demangling GNU style mangled names is nasty because there is no
+	explicit token that marks the start of the outermost function
+	argument list.  */
+
+static int
+demangle_signature (work, mangled, declp)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+{
+  int success = 1;
+  int func_done = 0;
+  int expect_func = 0;
+  int expect_return_type = 0;
+  const char *oldmangled = NULL;
+  string trawname;
+  string tname;
+
+  while (success && (**mangled != '\0'))
+    {
+      switch (**mangled)
+	{
+	case 'Q':
+	  oldmangled = *mangled;
+	  success = demangle_qualified (work, mangled, declp, 1, 0);
+	  if (success)
+	    remember_type (work, oldmangled, *mangled - oldmangled);
+	  if (AUTO_DEMANGLING || GNU_DEMANGLING)
+	    expect_func = 1;
+	  oldmangled = NULL;
+	  break;
+
+        case 'K':
+	  oldmangled = *mangled;
+	  success = demangle_qualified (work, mangled, declp, 1, 0);
+	  if (AUTO_DEMANGLING || GNU_DEMANGLING)
+	    {
+	      expect_func = 1;
+	    }
+	  oldmangled = NULL;
+	  break;
+
+	case 'S':
+	  /* Static member function */
+	  if (oldmangled == NULL)
+	    {
+	      oldmangled = *mangled;
+	    }
+	  (*mangled)++;
+	  work -> static_type = 1;
+	  break;
+
+	case 'C':
+	case 'V':
+	case 'u':
+	  work->type_quals |= code_for_qualifier (**mangled);
+
+	  /* a qualified member function */
+	  if (oldmangled == NULL)
+	    oldmangled = *mangled;
+	  (*mangled)++;
+	  break;
+
+	case 'L':
+	  /* Local class name follows after "Lnnn_" */
+	  if (HP_DEMANGLING)
+	    {
+	      while (**mangled && (**mangled != '_'))
+		(*mangled)++;
+	      if (!**mangled)
+		success = 0;
+	      else
+		(*mangled)++;
+	    }
+	  else
+	    success = 0;
+	  break;
+
+	case '0': case '1': case '2': case '3': case '4':
+	case '5': case '6': case '7': case '8': case '9':
+	  if (oldmangled == NULL)
+	    {
+	      oldmangled = *mangled;
+	    }
+          work->temp_start = -1; /* uppermost call to demangle_class */
+	  success = demangle_class (work, mangled, declp);
+	  if (success)
+	    {
+	      remember_type (work, oldmangled, *mangled - oldmangled);
+	    }
+	  if (AUTO_DEMANGLING || GNU_DEMANGLING || EDG_DEMANGLING)
+	    {
+              /* EDG and others will have the "F", so we let the loop cycle
+                 if we are looking at one. */
+              if (**mangled != 'F')
+                 expect_func = 1;
+	    }
+	  oldmangled = NULL;
+	  break;
+
+	case 'B':
+	  {
+	    string s;
+	    success = do_type (work, mangled, &s);
+	    if (success)
+	      {
+		string_append (&s, SCOPE_STRING (work));
+		string_prepends (declp, &s);
+	      }
+	    oldmangled = NULL;
+	    expect_func = 1;
+	  }
+	  break;
+
+	case 'F':
+	  /* Function */
+	  /* ARM/HP style demangling includes a specific 'F' character after
+	     the class name.  For GNU style, it is just implied.  So we can
+	     safely just consume any 'F' at this point and be compatible
+	     with either style.  */
+
+	  oldmangled = NULL;
+	  func_done = 1;
+	  (*mangled)++;
+
+	  /* For lucid/ARM/HP style we have to forget any types we might
+	     have remembered up to this point, since they were not argument
+	     types.  GNU style considers all types seen as available for
+	     back references.  See comment in demangle_args() */
+
+	  if (LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING)
+	    {
+	      forget_types (work);
+	    }
+	  success = demangle_args (work, mangled, declp);
+	  /* After picking off the function args, we expect to either
+	     find the function return type (preceded by an '_') or the
+	     end of the string. */
+	  if (success && (AUTO_DEMANGLING || EDG_DEMANGLING) && **mangled == '_')
+	    {
+	      ++(*mangled);
+              /* At this level, we do not care about the return type. */
+              success = do_type (work, mangled, &tname);
+              string_delete (&tname);
+            }
+
+	  break;
+
+	case 't':
+	  /* G++ Template */
+	  string_init(&trawname);
+	  string_init(&tname);
+	  if (oldmangled == NULL)
+	    {
+	      oldmangled = *mangled;
+	    }
+	  success = demangle_template (work, mangled, &tname,
+				       &trawname, 1, 1);
+	  if (success)
+	    {
+	      remember_type (work, oldmangled, *mangled - oldmangled);
+	    }
+	  string_append (&tname, SCOPE_STRING (work));
+
+	  string_prepends(declp, &tname);
+	  if (work -> destructor & 1)
+	    {
+	      string_prepend (&trawname, "~");
+	      string_appends (declp, &trawname);
+	      work->destructor -= 1;
+	    }
+	  if ((work->constructor & 1) || (work->destructor & 1))
+	    {
+	      string_appends (declp, &trawname);
+	      work->constructor -= 1;
+	    }
+	  string_delete(&trawname);
+	  string_delete(&tname);
+	  oldmangled = NULL;
+	  expect_func = 1;
+	  break;
+
+	case '_':
+	  if ((AUTO_DEMANGLING || GNU_DEMANGLING) && expect_return_type)
+	    {
+	      /* Read the return type. */
+	      string return_type;
+	      string_init (&return_type);
+
+	      (*mangled)++;
+	      success = do_type (work, mangled, &return_type);
+	      APPEND_BLANK (&return_type);
+
+	      string_prepends (declp, &return_type);
+	      string_delete (&return_type);
+	      break;
+	    }
+	  else
+	    /* At the outermost level, we cannot have a return type specified,
+	       so if we run into another '_' at this point we are dealing with
+	       a mangled name that is either bogus, or has been mangled by
+	       some algorithm we don't know how to deal with.  So just
+	       reject the entire demangling.  */
+            /* However, "_nnn" is an expected suffix for alternate entry point
+               numbered nnn for a function, with HP aCC, so skip over that
+               without reporting failure. pai/1997-09-04 */
+            if (HP_DEMANGLING)
+              {
+                (*mangled)++;
+                while (**mangled && ISDIGIT ((unsigned char)**mangled))
+                  (*mangled)++;
+              }
+            else
+	      success = 0;
+	  break;
+
+	case 'H':
+	  if (AUTO_DEMANGLING || GNU_DEMANGLING)
+	    {
+	      /* A G++ template function.  Read the template arguments. */
+	      success = demangle_template (work, mangled, declp, 0, 0,
+					   0);
+	      if (!(work->constructor & 1))
+		expect_return_type = 1;
+	      (*mangled)++;
+	      break;
+	    }
+	  else
+	    /* fall through */
+	    {;}
+
+	default:
+	  if (AUTO_DEMANGLING || GNU_DEMANGLING)
+	    {
+	      /* Assume we have stumbled onto the first outermost function
+		 argument token, and start processing args.  */
+	      func_done = 1;
+	      success = demangle_args (work, mangled, declp);
+	    }
+	  else
+	    {
+	      /* Non-GNU demanglers use a specific token to mark the start
+		 of the outermost function argument tokens.  Typically 'F',
+		 for ARM/HP-demangling, for example.  So if we find something
+		 we are not prepared for, it must be an error.  */
+	      success = 0;
+	    }
+	  break;
+	}
+      /*
+	if (AUTO_DEMANGLING || GNU_DEMANGLING)
+	*/
+      {
+	if (success && expect_func)
+	  {
+	    func_done = 1;
+              if (LUCID_DEMANGLING || ARM_DEMANGLING || EDG_DEMANGLING)
+                {
+                  forget_types (work);
+                }
+	    success = demangle_args (work, mangled, declp);
+	    /* Since template include the mangling of their return types,
+	       we must set expect_func to 0 so that we don't try do
+	       demangle more arguments the next time we get here.  */
+	    expect_func = 0;
+	  }
+      }
+    }
+  if (success && !func_done)
+    {
+      if (AUTO_DEMANGLING || GNU_DEMANGLING)
+	{
+	  /* With GNU style demangling, bar__3foo is 'foo::bar(void)', and
+	     bar__3fooi is 'foo::bar(int)'.  We get here when we find the
+	     first case, and need to ensure that the '(void)' gets added to
+	     the current declp.  Note that with ARM/HP, the first case
+	     represents the name of a static data member 'foo::bar',
+	     which is in the current declp, so we leave it alone.  */
+	  success = demangle_args (work, mangled, declp);
+	}
+    }
+  if (success && PRINT_ARG_TYPES)
+    {
+      if (work->static_type)
+	string_append (declp, " static");
+      if (work->type_quals != TYPE_UNQUALIFIED)
+	{
+	  APPEND_BLANK (declp);
+	  string_append (declp, qualifier_string (work->type_quals));
+	}
+    }
+
+  return (success);
+}
+
+#if 0
+
+static int
+demangle_method_args (work, mangled, declp)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+{
+  int success = 0;
+
+  if (work -> static_type)
+    {
+      string_append (declp, *mangled + 1);
+      *mangled += strlen (*mangled);
+      success = 1;
+    }
+  else
+    {
+      success = demangle_args (work, mangled, declp);
+    }
+  return (success);
+}
+
+#endif
+
+static int
+demangle_template_template_parm (work, mangled, tname)
+     struct work_stuff *work;
+     const char **mangled;
+     string *tname;
+{
+  int i;
+  int r;
+  int need_comma = 0;
+  int success = 1;
+  string temp;
+
+  string_append (tname, "template <");
+  /* get size of template parameter list */
+  if (get_count (mangled, &r))
+    {
+      for (i = 0; i < r; i++)
+	{
+	  if (need_comma)
+	    {
+	      string_append (tname, ", ");
+	    }
+
+	    /* Z for type parameters */
+	    if (**mangled == 'Z')
+	      {
+		(*mangled)++;
+		string_append (tname, "class");
+	      }
+	      /* z for template parameters */
+	    else if (**mangled == 'z')
+	      {
+		(*mangled)++;
+		success =
+		  demangle_template_template_parm (work, mangled, tname);
+		if (!success)
+		  {
+		    break;
+		  }
+	      }
+	    else
+	      {
+		/* temp is initialized in do_type */
+		success = do_type (work, mangled, &temp);
+		if (success)
+		  {
+		    string_appends (tname, &temp);
+		  }
+		string_delete(&temp);
+		if (!success)
+		  {
+		    break;
+		  }
+	      }
+	  need_comma = 1;
+	}
+
+    }
+  if (tname->p[-1] == '>')
+    string_append (tname, " ");
+  string_append (tname, "> class");
+  return (success);
+}
+
+static int
+demangle_expression (work, mangled, s, tk)
+     struct work_stuff *work;
+     const char** mangled;
+     string* s;
+     type_kind_t tk;
+{
+  int need_operator = 0;
+  int success;
+
+  success = 1;
+  string_appendn (s, "(", 1);
+  (*mangled)++;
+  while (success && **mangled != 'W' && **mangled != '\0')
+    {
+      if (need_operator)
+	{
+	  size_t i;
+	  size_t len;
+
+	  success = 0;
+
+	  len = strlen (*mangled);
+
+	  for (i = 0; i < ARRAY_SIZE (optable); ++i)
+	    {
+	      size_t l = strlen (optable[i].in);
+
+	      if (l <= len
+		  && memcmp (optable[i].in, *mangled, l) == 0)
+		{
+		  string_appendn (s, " ", 1);
+		  string_append (s, optable[i].out);
+		  string_appendn (s, " ", 1);
+		  success = 1;
+		  (*mangled) += l;
+		  break;
+		}
+	    }
+
+	  if (!success)
+	    break;
+	}
+      else
+	need_operator = 1;
+
+      success = demangle_template_value_parm (work, mangled, s, tk);
+    }
+
+  if (**mangled != 'W')
+    success = 0;
+  else
+    {
+      string_appendn (s, ")", 1);
+      (*mangled)++;
+    }
+
+  return success;
+}
+
+static int
+demangle_integral_value (work, mangled, s)
+     struct work_stuff *work;
+     const char** mangled;
+     string* s;
+{
+  int success;
+
+  if (**mangled == 'E')
+    success = demangle_expression (work, mangled, s, tk_integral);
+  else if (**mangled == 'Q' || **mangled == 'K')
+    success = demangle_qualified (work, mangled, s, 0, 1);
+  else
+    {
+      int value;
+
+      /* By default, we let the number decide whether we shall consume an
+	 underscore.  */
+      int consume_following_underscore = 0;
+      int leave_following_underscore = 0;
+
+      success = 0;
+
+      /* Negative numbers are indicated with a leading `m'.  */
+      if (**mangled == 'm')
+	{
+	  string_appendn (s, "-", 1);
+	  (*mangled)++;
+	}
+      else if (mangled[0][0] == '_' && mangled[0][1] == 'm')
+	{
+	  /* Since consume_count_with_underscores does not handle the
+	     `m'-prefix we must do it here, using consume_count and
+	     adjusting underscores: we have to consume the underscore
+	     matching the prepended one.  */
+	  consume_following_underscore = 1;
+	  string_appendn (s, "-", 1);
+	  (*mangled) += 2;
+	}
+      else if (**mangled == '_')
+	{
+	  /* Do not consume a following underscore;
+	     consume_following_underscore will consume what should be
+	     consumed.  */
+	  leave_following_underscore = 1;
+	}
+
+      /* We must call consume_count if we expect to remove a trailing
+	 underscore, since consume_count_with_underscores expects
+	 the leading underscore (that we consumed) if it is to handle
+	 multi-digit numbers.  */
+      if (consume_following_underscore)
+	value = consume_count (mangled);
+      else
+	value = consume_count_with_underscores (mangled);
+
+      if (value != -1)
+	{
+	  char buf[INTBUF_SIZE];
+	  sprintf (buf, "%d", value);
+	  string_append (s, buf);
+
+	  /* Numbers not otherwise delimited, might have an underscore
+	     appended as a delimeter, which we should skip.
+
+	     ??? This used to always remove a following underscore, which
+	     is wrong.  If other (arbitrary) cases are followed by an
+	     underscore, we need to do something more radical.  */
+
+	  if ((value > 9 || consume_following_underscore)
+	      && ! leave_following_underscore
+	      && **mangled == '_')
+	    (*mangled)++;
+
+	  /* All is well.  */
+	  success = 1;
+	}
+    }
+
+  return success;
+}
+
+/* Demangle the real value in MANGLED.  */
+
+static int
+demangle_real_value (work, mangled, s)
+     struct work_stuff *work;
+     const char **mangled;
+     string* s;
+{
+  if (**mangled == 'E')
+    return demangle_expression (work, mangled, s, tk_real);
+
+  if (**mangled == 'm')
+    {
+      string_appendn (s, "-", 1);
+      (*mangled)++;
+    }
+  while (ISDIGIT ((unsigned char)**mangled))
+    {
+      string_appendn (s, *mangled, 1);
+      (*mangled)++;
+    }
+  if (**mangled == '.') /* fraction */
+    {
+      string_appendn (s, ".", 1);
+      (*mangled)++;
+      while (ISDIGIT ((unsigned char)**mangled))
+	{
+	  string_appendn (s, *mangled, 1);
+	  (*mangled)++;
+	}
+    }
+  if (**mangled == 'e') /* exponent */
+    {
+      string_appendn (s, "e", 1);
+      (*mangled)++;
+      while (ISDIGIT ((unsigned char)**mangled))
+	{
+	  string_appendn (s, *mangled, 1);
+	  (*mangled)++;
+	}
+    }
+
+  return 1;
+}
+
+static int
+demangle_template_value_parm (work, mangled, s, tk)
+     struct work_stuff *work;
+     const char **mangled;
+     string* s;
+     type_kind_t tk;
+{
+  int success = 1;
+
+  if (**mangled == 'Y')
+    {
+      /* The next argument is a template parameter. */
+      int idx;
+
+      (*mangled)++;
+      idx = consume_count_with_underscores (mangled);
+      if (idx == -1
+	  || (work->tmpl_argvec && idx >= work->ntmpl_args)
+	  || consume_count_with_underscores (mangled) == -1)
+	return -1;
+      if (work->tmpl_argvec)
+	string_append (s, work->tmpl_argvec[idx]);
+      else
+	string_append_template_idx (s, idx);
+    }
+  else if (tk == tk_integral)
+    success = demangle_integral_value (work, mangled, s);
+  else if (tk == tk_char)
+    {
+      char tmp[2];
+      int val;
+      if (**mangled == 'm')
+	{
+	  string_appendn (s, "-", 1);
+	  (*mangled)++;
+	}
+      string_appendn (s, "'", 1);
+      val = consume_count(mangled);
+      if (val <= 0)
+	success = 0;
+      else
+	{
+	  tmp[0] = (char)val;
+	  tmp[1] = '\0';
+	  string_appendn (s, &tmp[0], 1);
+	  string_appendn (s, "'", 1);
+	}
+    }
+  else if (tk == tk_bool)
+    {
+      int val = consume_count (mangled);
+      if (val == 0)
+	string_appendn (s, "false", 5);
+      else if (val == 1)
+	string_appendn (s, "true", 4);
+      else
+	success = 0;
+    }
+  else if (tk == tk_real)
+    success = demangle_real_value (work, mangled, s);
+  else if (tk == tk_pointer || tk == tk_reference)
+    {
+      if (**mangled == 'Q')
+	success = demangle_qualified (work, mangled, s,
+				      /*isfuncname=*/0, 
+				      /*append=*/1);
+      else
+	{
+	  int symbol_len  = consume_count (mangled);
+	  if (symbol_len == -1)
+	    return -1;
+	  if (symbol_len == 0)
+	    string_appendn (s, "0", 1);
+	  else
+	    {
+	      char *p = xmalloc (symbol_len + 1), *q;
+	      strncpy (p, *mangled, symbol_len);
+	      p [symbol_len] = '\0';
+	      /* We use cplus_demangle here, rather than
+		 internal_cplus_demangle, because the name of the entity
+		 mangled here does not make use of any of the squangling
+		 or type-code information we have built up thus far; it is
+		 mangled independently.  */
+	      q = VG_(cplus_demangle) (p, work->options);
+	      if (tk == tk_pointer)
+		string_appendn (s, "&", 1);
+	      /* FIXME: Pointer-to-member constants should get a
+		 qualifying class name here.  */
+	      if (q)
+		{
+		  string_append (s, q);
+		  free (q);
+		}
+	      else
+		string_append (s, p);
+	      free (p);
+	    }
+	  *mangled += symbol_len;
+	}
+    }
+
+  return success;
+}
+
+/* Demangle the template name in MANGLED.  The full name of the
+   template (e.g., S<int>) is placed in TNAME.  The name without the
+   template parameters (e.g. S) is placed in TRAWNAME if TRAWNAME is
+   non-NULL.  If IS_TYPE is nonzero, this template is a type template,
+   not a function template.  If both IS_TYPE and REMEMBER are nonzero,
+   the template is remembered in the list of back-referenceable
+   types.  */
+
+static int
+demangle_template (work, mangled, tname, trawname, is_type, remember)
+     struct work_stuff *work;
+     const char **mangled;
+     string *tname;
+     string *trawname;
+     int is_type;
+     int remember;
+{
+  int i;
+  int r;
+  int need_comma = 0;
+  int success = 0;
+  const char *start;
+  int is_java_array = 0;
+  string temp;
+  int bindex = 0;
+
+  (*mangled)++;
+  if (is_type)
+    {
+      if (remember)
+	bindex = register_Btype (work);
+      start = *mangled;
+      /* get template name */
+      if (**mangled == 'z')
+	{
+	  int idx;
+	  (*mangled)++;
+	  (*mangled)++;
+
+	  idx = consume_count_with_underscores (mangled);
+	  if (idx == -1
+	      || (work->tmpl_argvec && idx >= work->ntmpl_args)
+	      || consume_count_with_underscores (mangled) == -1)
+	    return (0);
+
+	  if (work->tmpl_argvec)
+	    {
+	      string_append (tname, work->tmpl_argvec[idx]);
+	      if (trawname)
+		string_append (trawname, work->tmpl_argvec[idx]);
+	    }
+	  else
+	    {
+	      string_append_template_idx (tname, idx);
+	      if (trawname)
+		string_append_template_idx (trawname, idx);
+	    }
+	}
+      else
+	{
+	  if ((r = consume_count (mangled)) <= 0
+	      || (int) strlen (*mangled) < r)
+	    {
+	      return (0);
+	    }
+	  is_java_array = (work -> options & DMGL_JAVA)
+	    && strncmp (*mangled, "JArray1Z", 8) == 0;
+	  if (! is_java_array)
+	    {
+	      string_appendn (tname, *mangled, r);
+	    }
+	  if (trawname)
+	    string_appendn (trawname, *mangled, r);
+	  *mangled += r;
+	}
+    }
+  if (!is_java_array)
+    string_append (tname, "<");
+  /* get size of template parameter list */
+  if (!get_count (mangled, &r))
+    {
+      return (0);
+    }
+  if (!is_type)
+    {
+      /* Create an array for saving the template argument values. */
+      work->tmpl_argvec = (char**) xmalloc (r * sizeof (char *));
+      work->ntmpl_args = r;
+      for (i = 0; i < r; i++)
+	work->tmpl_argvec[i] = 0;
+    }
+  for (i = 0; i < r; i++)
+    {
+      if (need_comma)
+	{
+	  string_append (tname, ", ");
+	}
+      /* Z for type parameters */
+      if (**mangled == 'Z')
+	{
+	  (*mangled)++;
+	  /* temp is initialized in do_type */
+	  success = do_type (work, mangled, &temp);
+	  if (success)
+	    {
+	      string_appends (tname, &temp);
+
+	      if (!is_type)
+		{
+		  /* Save the template argument. */
+		  int len = temp.p - temp.b;
+		  work->tmpl_argvec[i] = xmalloc (len + 1);
+		  memcpy (work->tmpl_argvec[i], temp.b, len);
+		  work->tmpl_argvec[i][len] = '\0';
+		}
+	    }
+	  string_delete(&temp);
+	  if (!success)
+	    {
+	      break;
+	    }
+	}
+      /* z for template parameters */
+      else if (**mangled == 'z')
+	{
+	  int r2;
+	  (*mangled)++;
+	  success = demangle_template_template_parm (work, mangled, tname);
+
+	  if (success
+	      && (r2 = consume_count (mangled)) > 0
+	      && (int) strlen (*mangled) >= r2)
+	    {
+	      string_append (tname, " ");
+	      string_appendn (tname, *mangled, r2);
+	      if (!is_type)
+		{
+		  /* Save the template argument. */
+		  int len = r2;
+		  work->tmpl_argvec[i] = xmalloc (len + 1);
+		  memcpy (work->tmpl_argvec[i], *mangled, len);
+		  work->tmpl_argvec[i][len] = '\0';
+		}
+	      *mangled += r2;
+	    }
+	  if (!success)
+	    {
+	      break;
+	    }
+	}
+      else
+	{
+	  string  param;
+	  string* s;
+
+	  /* otherwise, value parameter */
+
+	  /* temp is initialized in do_type */
+	  success = do_type (work, mangled, &temp);
+	  string_delete(&temp);
+	  if (!success)
+	    break;
+
+	  if (!is_type)
+	    {
+	      s = &param;
+	      string_init (s);
+	    }
+	  else
+	    s = tname;
+
+	  success = demangle_template_value_parm (work, mangled, s,
+						  (type_kind_t) success);
+
+	  if (!success)
+	    {
+	      if (!is_type)
+		string_delete (s);
+	      success = 0;
+	      break;
+	    }
+
+	  if (!is_type)
+	    {
+	      int len = s->p - s->b;
+	      work->tmpl_argvec[i] = xmalloc (len + 1);
+	      memcpy (work->tmpl_argvec[i], s->b, len);
+	      work->tmpl_argvec[i][len] = '\0';
+
+	      string_appends (tname, s);
+	      string_delete (s);
+	    }
+	}
+      need_comma = 1;
+    }
+  if (is_java_array)
+    {
+      string_append (tname, "[]");
+    }
+  else
+    {
+      if (tname->p[-1] == '>')
+	string_append (tname, " ");
+      string_append (tname, ">");
+    }
+
+  if (is_type && remember)
+    remember_Btype (work, tname->b, LEN_STRING (tname), bindex);
+
+  /*
+    if (work -> static_type)
+    {
+    string_append (declp, *mangled + 1);
+    *mangled += strlen (*mangled);
+    success = 1;
+    }
+    else
+    {
+    success = demangle_args (work, mangled, declp);
+    }
+    }
+    */
+  return (success);
+}
+
+static int
+arm_pt (work, mangled, n, anchor, args)
+     struct work_stuff *work;
+     const char *mangled;
+     int n;
+     const char **anchor, **args;
+{
+  /* Check if ARM template with "__pt__" in it ("parameterized type") */
+  /* Allow HP also here, because HP's cfront compiler follows ARM to some extent */
+  if ((ARM_DEMANGLING || HP_DEMANGLING) && (*anchor = strstr (mangled, "__pt__")))
+    {
+      int len;
+      *args = *anchor + 6;
+      len = consume_count (args);
+      if (len == -1)
+	return 0;
+      if (*args + len == mangled + n && **args == '_')
+	{
+	  ++*args;
+	  return 1;
+	}
+    }
+  if (AUTO_DEMANGLING || EDG_DEMANGLING)
+    {
+      if ((*anchor = strstr (mangled, "__tm__"))
+          || (*anchor = strstr (mangled, "__ps__"))
+          || (*anchor = strstr (mangled, "__pt__")))
+        {
+          int len;
+          *args = *anchor + 6;
+          len = consume_count (args);
+	  if (len == -1)
+	    return 0;
+          if (*args + len == mangled + n && **args == '_')
+            {
+              ++*args;
+              return 1;
+            }
+        }
+      else if ((*anchor = strstr (mangled, "__S")))
+        {
+ 	  int len;
+ 	  *args = *anchor + 3;
+ 	  len = consume_count (args);
+	  if (len == -1)
+	    return 0;
+ 	  if (*args + len == mangled + n && **args == '_')
+            {
+              ++*args;
+ 	      return 1;
+            }
+        }
+    }
+
+  return 0;
+}
+
+static void
+demangle_arm_hp_template (work, mangled, n, declp)
+     struct work_stuff *work;
+     const char **mangled;
+     int n;
+     string *declp;
+{
+  const char *p;
+  const char *args;
+  const char *e = *mangled + n;
+  string arg;
+
+  /* Check for HP aCC template spec: classXt1t2 where t1, t2 are
+     template args */
+  if (HP_DEMANGLING && ((*mangled)[n] == 'X'))
+    {
+      char *start_spec_args = NULL;
+
+      /* First check for and omit template specialization pseudo-arguments,
+         such as in "Spec<#1,#1.*>" */
+      start_spec_args = strchr (*mangled, '<');
+      if (start_spec_args && (start_spec_args - *mangled < n))
+        string_appendn (declp, *mangled, start_spec_args - *mangled);
+      else
+        string_appendn (declp, *mangled, n);
+      (*mangled) += n + 1;
+      string_init (&arg);
+      if (work->temp_start == -1) /* non-recursive call */
+        work->temp_start = declp->p - declp->b;
+      string_append (declp, "<");
+      while (1)
+        {
+          string_clear (&arg);
+          switch (**mangled)
+            {
+              case 'T':
+                /* 'T' signals a type parameter */
+                (*mangled)++;
+                if (!do_type (work, mangled, &arg))
+                  goto hpacc_template_args_done;
+                break;
+
+              case 'U':
+              case 'S':
+                /* 'U' or 'S' signals an integral value */
+                if (!do_hpacc_template_const_value (work, mangled, &arg))
+                  goto hpacc_template_args_done;
+                break;
+
+              case 'A':
+                /* 'A' signals a named constant expression (literal) */
+                if (!do_hpacc_template_literal (work, mangled, &arg))
+                  goto hpacc_template_args_done;
+                break;
+
+              default:
+                /* Today, 1997-09-03, we have only the above types
+                   of template parameters */
+                /* FIXME: maybe this should fail and return null */
+                goto hpacc_template_args_done;
+            }
+          string_appends (declp, &arg);
+         /* Check if we're at the end of template args.
+             0 if at end of static member of template class,
+             _ if done with template args for a function */
+          if ((**mangled == '\000') || (**mangled == '_'))
+            break;
+          else
+            string_append (declp, ",");
+        }
+    hpacc_template_args_done:
+      string_append (declp, ">");
+      string_delete (&arg);
+      if (**mangled == '_')
+        (*mangled)++;
+      return;
+    }
+  /* ARM template? (Also handles HP cfront extensions) */
+  else if (arm_pt (work, *mangled, n, &p, &args))
+    {
+      string type_str;
+
+      string_init (&arg);
+      string_appendn (declp, *mangled, p - *mangled);
+      if (work->temp_start == -1)  /* non-recursive call */
+	work->temp_start = declp->p - declp->b;
+      string_append (declp, "<");
+      /* should do error checking here */
+      while (args < e) {
+	string_clear (&arg);
+
+	/* Check for type or literal here */
+	switch (*args)
+	  {
+	    /* HP cfront extensions to ARM for template args */
+	    /* spec: Xt1Lv1 where t1 is a type, v1 is a literal value */
+	    /* FIXME: We handle only numeric literals for HP cfront */
+          case 'X':
+            /* A typed constant value follows */
+            args++;
+            if (!do_type (work, &args, &type_str))
+	      goto cfront_template_args_done;
+            string_append (&arg, "(");
+            string_appends (&arg, &type_str);
+            string_append (&arg, ")");
+            if (*args != 'L')
+              goto cfront_template_args_done;
+            args++;
+            /* Now snarf a literal value following 'L' */
+            if (!snarf_numeric_literal (&args, &arg))
+	      goto cfront_template_args_done;
+            break;
+
+          case 'L':
+            /* Snarf a literal following 'L' */
+            args++;
+            if (!snarf_numeric_literal (&args, &arg))
+	      goto cfront_template_args_done;
+            break;
+          default:
+            /* Not handling other HP cfront stuff */
+            if (!do_type (work, &args, &arg))
+              goto cfront_template_args_done;
+	  }
+	string_appends (declp, &arg);
+	string_append (declp, ",");
+      }
+    cfront_template_args_done:
+      string_delete (&arg);
+      if (args >= e)
+	--declp->p; /* remove extra comma */
+      string_append (declp, ">");
+    }
+  else if (n>10 && strncmp (*mangled, "_GLOBAL_", 8) == 0
+	   && (*mangled)[9] == 'N'
+	   && (*mangled)[8] == (*mangled)[10]
+	   && strchr (cplus_markers, (*mangled)[8]))
+    {
+      /* A member of the anonymous namespace.  */
+      string_append (declp, "{anonymous}");
+    }
+  else
+    {
+      if (work->temp_start == -1) /* non-recursive call only */
+	work->temp_start = 0;     /* disable in recursive calls */
+      string_appendn (declp, *mangled, n);
+    }
+  *mangled += n;
+}
+
+/* Extract a class name, possibly a template with arguments, from the
+   mangled string; qualifiers, local class indicators, etc. have
+   already been dealt with */
+
+static int
+demangle_class_name (work, mangled, declp)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+{
+  int n;
+  int success = 0;
+
+  n = consume_count (mangled);
+  if (n == -1)
+    return 0;
+  if ((int) strlen (*mangled) >= n)
+    {
+      demangle_arm_hp_template (work, mangled, n, declp);
+      success = 1;
+    }
+
+  return (success);
+}
+
+/*
+
+LOCAL FUNCTION
+
+	demangle_class -- demangle a mangled class sequence
+
+SYNOPSIS
+
+	static int
+	demangle_class (struct work_stuff *work, const char **mangled,
+			strint *declp)
+
+DESCRIPTION
+
+	DECLP points to the buffer into which demangling is being done.
+
+	*MANGLED points to the current token to be demangled.  On input,
+	it points to a mangled class (I.E. "3foo", "13verylongclass", etc.)
+	On exit, it points to the next token after the mangled class on
+	success, or the first unconsumed token on failure.
+
+	If the CONSTRUCTOR or DESTRUCTOR flags are set in WORK, then
+	we are demangling a constructor or destructor.  In this case
+	we prepend "class::class" or "class::~class" to DECLP.
+
+	Otherwise, we prepend "class::" to the current DECLP.
+
+	Reset the constructor/destructor flags once they have been
+	"consumed".  This allows demangle_class to be called later during
+	the same demangling, to do normal class demangling.
+
+	Returns 1 if demangling is successful, 0 otherwise.
+
+*/
+
+static int
+demangle_class (work, mangled, declp)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+{
+  int success = 0;
+  int btype;
+  string class_name;
+  char *save_class_name_end = 0;
+
+  string_init (&class_name);
+  btype = register_Btype (work);
+  if (demangle_class_name (work, mangled, &class_name))
+    {
+      save_class_name_end = class_name.p;
+      if ((work->constructor & 1) || (work->destructor & 1))
+	{
+          /* adjust so we don't include template args */
+          if (work->temp_start && (work->temp_start != -1))
+            {
+              class_name.p = class_name.b + work->temp_start;
+            }
+	  string_prepends (declp, &class_name);
+	  if (work -> destructor & 1)
+	    {
+	      string_prepend (declp, "~");
+              work -> destructor -= 1;
+	    }
+	  else
+	    {
+	      work -> constructor -= 1;
+	    }
+	}
+      class_name.p = save_class_name_end;
+      remember_Ktype (work, class_name.b, LEN_STRING(&class_name));
+      remember_Btype (work, class_name.b, LEN_STRING(&class_name), btype);
+      string_prepend (declp, SCOPE_STRING (work));
+      string_prepends (declp, &class_name);
+      success = 1;
+    }
+  string_delete (&class_name);
+  return (success);
+}
+
+
+/* Called when there's a "__" in the mangled name, with `scan' pointing to
+   the rightmost guess.
+
+   Find the correct "__"-sequence where the function name ends and the
+   signature starts, which is ambiguous with GNU mangling.
+   Call demangle_signature here, so we can make sure we found the right
+   one; *mangled will be consumed so caller will not make further calls to
+   demangle_signature.  */
+
+static int
+iterate_demangle_function (work, mangled, declp, scan)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+     const char *scan;
+{
+  const char *mangle_init = *mangled;
+  int success = 0;
+  string decl_init;
+  struct work_stuff work_init;
+
+  if (*(scan + 2) == '\0')
+    return 0;
+
+  /* Do not iterate for some demangling modes, or if there's only one
+     "__"-sequence.  This is the normal case.  */
+  if (ARM_DEMANGLING || LUCID_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING
+      || strstr (scan + 2, "__") == NULL)
+    {
+      demangle_function_name (work, mangled, declp, scan);
+      return 1;
+    }
+
+  /* Save state so we can restart if the guess at the correct "__" was
+     wrong.  */
+  string_init (&decl_init);
+  string_appends (&decl_init, declp);
+  memset (&work_init, 0, sizeof work_init);
+  work_stuff_copy_to_from (&work_init, work);
+
+  /* Iterate over occurrences of __, allowing names and types to have a
+     "__" sequence in them.  We must start with the first (not the last)
+     occurrence, since "__" most often occur between independent mangled
+     parts, hence starting at the last occurence inside a signature
+     might get us a "successful" demangling of the signature.  */
+
+  while (scan[2])
+    {
+      demangle_function_name (work, mangled, declp, scan);
+      success = demangle_signature (work, mangled, declp);
+      if (success)
+	break;
+
+      /* Reset demangle state for the next round.  */
+      *mangled = mangle_init;
+      string_clear (declp);
+      string_appends (declp, &decl_init);
+      work_stuff_copy_to_from (work, &work_init);
+
+      /* Leave this underscore-sequence.  */
+      scan += 2;
+
+      /* Scan for the next "__" sequence.  */
+      while (*scan && (scan[0] != '_' || scan[1] != '_'))
+	scan++;
+
+      /* Move to last "__" in this sequence.  */
+      while (*scan && *scan == '_')
+	scan++;
+      scan -= 2;
+    }
+
+  /* Delete saved state.  */
+  delete_work_stuff (&work_init);
+  string_delete (&decl_init);
+
+  return success;
+}
+
+/*
+
+LOCAL FUNCTION
+
+	demangle_prefix -- consume the mangled name prefix and find signature
+
+SYNOPSIS
+
+	static int
+	demangle_prefix (struct work_stuff *work, const char **mangled,
+			 string *declp);
+
+DESCRIPTION
+
+	Consume and demangle the prefix of the mangled name.
+	While processing the function name root, arrange to call
+	demangle_signature if the root is ambiguous.
+
+	DECLP points to the string buffer into which demangled output is
+	placed.  On entry, the buffer is empty.  On exit it contains
+	the root function name, the demangled operator name, or in some
+	special cases either nothing or the completely demangled result.
+
+	MANGLED points to the current pointer into the mangled name.  As each
+	token of the mangled name is consumed, it is updated.  Upon entry
+	the current mangled name pointer points to the first character of
+	the mangled name.  Upon exit, it should point to the first character
+	of the signature if demangling was successful, or to the first
+	unconsumed character if demangling of the prefix was unsuccessful.
+
+	Returns 1 on success, 0 otherwise.
+ */
+
+static int
+demangle_prefix (work, mangled, declp)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+{
+  int success = 1;
+  const char *scan;
+  int i;
+
+  if (strlen(*mangled) > 6
+      && (strncmp(*mangled, "_imp__", 6) == 0
+          || strncmp(*mangled, "__imp_", 6) == 0))
+    {
+      /* it's a symbol imported from a PE dynamic library. Check for both
+         new style prefix _imp__ and legacy __imp_ used by older versions
+	 of dlltool. */
+      (*mangled) += 6;
+      work->dllimported = 1;
+    }
+  else if (strlen(*mangled) >= 11 && strncmp(*mangled, "_GLOBAL_", 8) == 0)
+    {
+      char *marker = strchr (cplus_markers, (*mangled)[8]);
+      if (marker != NULL && *marker == (*mangled)[10])
+	{
+	  if ((*mangled)[9] == 'D')
+	    {
+	      /* it's a GNU global destructor to be executed at program exit */
+	      (*mangled) += 11;
+	      work->destructor = 2;
+	      if (gnu_special (work, mangled, declp))
+		return success;
+	    }
+	  else if ((*mangled)[9] == 'I')
+	    {
+	      /* it's a GNU global constructor to be executed at program init */
+	      (*mangled) += 11;
+	      work->constructor = 2;
+	      if (gnu_special (work, mangled, declp))
+		return success;
+	    }
+	}
+    }
+  else if ((ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING) && strncmp(*mangled, "__std__", 7) == 0)
+    {
+      /* it's a ARM global destructor to be executed at program exit */
+      (*mangled) += 7;
+      work->destructor = 2;
+    }
+  else if ((ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING) && strncmp(*mangled, "__sti__", 7) == 0)
+    {
+      /* it's a ARM global constructor to be executed at program initial */
+      (*mangled) += 7;
+      work->constructor = 2;
+    }
+
+  /*  This block of code is a reduction in strength time optimization
+      of:
+      scan = strstr (*mangled, "__"); */
+
+  {
+    scan = *mangled;
+
+    do {
+      scan = strchr (scan, '_');
+    } while (scan != NULL && *++scan != '_');
+
+    if (scan != NULL) --scan;
+  }
+
+  if (scan != NULL)
+    {
+      /* We found a sequence of two or more '_', ensure that we start at
+	 the last pair in the sequence.  */
+      /* i = strspn (scan, "_"); */
+      i = 0;
+      while (scan[i] == '_') i++;
+      if (i > 2)
+	{
+	  scan += (i - 2);
+	}
+    }
+
+  if (scan == NULL)
+    {
+      success = 0;
+    }
+  else if (work -> static_type)
+    {
+      if (!ISDIGIT ((unsigned char)scan[0]) && (scan[0] != 't'))
+	{
+	  success = 0;
+	}
+    }
+  else if ((scan == *mangled)
+	   && (ISDIGIT ((unsigned char)scan[2]) || (scan[2] == 'Q')
+	       || (scan[2] == 't') || (scan[2] == 'K') || (scan[2] == 'H')))
+    {
+      /* The ARM says nothing about the mangling of local variables.
+	 But cfront mangles local variables by prepending __<nesting_level>
+	 to them. As an extension to ARM demangling we handle this case.  */
+      if ((LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING)
+	  && ISDIGIT ((unsigned char)scan[2]))
+	{
+	  *mangled = scan + 2;
+	  consume_count (mangled);
+	  string_append (declp, *mangled);
+	  *mangled += strlen (*mangled);
+	  success = 1;
+	}
+      else
+	{
+	  /* A GNU style constructor starts with __[0-9Qt].  But cfront uses
+	     names like __Q2_3foo3bar for nested type names.  So don't accept
+	     this style of constructor for cfront demangling.  A GNU
+	     style member-template constructor starts with 'H'. */
+	  if (!(LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING))
+	    work -> constructor += 1;
+	  *mangled = scan + 2;
+	}
+    }
+  else if (ARM_DEMANGLING && scan[2] == 'p' && scan[3] == 't')
+    {
+      /* Cfront-style parameterized type.  Handled later as a signature. */
+      success = 1;
+
+      /* ARM template? */
+      demangle_arm_hp_template (work, mangled, strlen (*mangled), declp);
+    }
+  else if (EDG_DEMANGLING && ((scan[2] == 't' && scan[3] == 'm')
+                              || (scan[2] == 'p' && scan[3] == 's')
+                              || (scan[2] == 'p' && scan[3] == 't')))
+    {
+      /* EDG-style parameterized type.  Handled later as a signature. */
+      success = 1;
+
+      /* EDG template? */
+      demangle_arm_hp_template (work, mangled, strlen (*mangled), declp);
+    }
+  else if ((scan == *mangled) && !ISDIGIT ((unsigned char)scan[2])
+	   && (scan[2] != 't'))
+    {
+      /* Mangled name starts with "__".  Skip over any leading '_' characters,
+	 then find the next "__" that separates the prefix from the signature.
+	 */
+      if (!(ARM_DEMANGLING || LUCID_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING)
+	  || (arm_special (mangled, declp) == 0))
+	{
+	  while (*scan == '_')
+	    {
+	      scan++;
+	    }
+	  if ((scan = strstr (scan, "__")) == NULL || (*(scan + 2) == '\0'))
+	    {
+	      /* No separator (I.E. "__not_mangled"), or empty signature
+		 (I.E. "__not_mangled_either__") */
+	      success = 0;
+	    }
+	  else
+	    return iterate_demangle_function (work, mangled, declp, scan);
+	}
+    }
+  else if (*(scan + 2) != '\0')
+    {
+      /* Mangled name does not start with "__" but does have one somewhere
+	 in there with non empty stuff after it.  Looks like a global
+	 function name.  Iterate over all "__":s until the right
+	 one is found.  */
+      return iterate_demangle_function (work, mangled, declp, scan);
+    }
+  else
+    {
+      /* Doesn't look like a mangled name */
+      success = 0;
+    }
+
+  if (!success && (work->constructor == 2 || work->destructor == 2))
+    {
+      string_append (declp, *mangled);
+      *mangled += strlen (*mangled);
+      success = 1;
+    }
+  return (success);
+}
+
+/*
+
+LOCAL FUNCTION
+
+	gnu_special -- special handling of gnu mangled strings
+
+SYNOPSIS
+
+	static int
+	gnu_special (struct work_stuff *work, const char **mangled,
+		     string *declp);
+
+
+DESCRIPTION
+
+	Process some special GNU style mangling forms that don't fit
+	the normal pattern.  For example:
+
+		_$_3foo		(destructor for class foo)
+		_vt$foo		(foo virtual table)
+		_vt$foo$bar	(foo::bar virtual table)
+		__vt_foo	(foo virtual table, new style with thunks)
+		_3foo$varname	(static data member)
+		_Q22rs2tu$vw	(static data member)
+		__t6vector1Zii	(constructor with template)
+		__thunk_4__$_7ostream (virtual function thunk)
+ */
+
+static int
+gnu_special (work, mangled, declp)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+{
+  int n;
+  int success = 1;
+  const char *p;
+
+  if ((*mangled)[0] == '_'
+      && strchr (cplus_markers, (*mangled)[1]) != NULL
+      && (*mangled)[2] == '_')
+    {
+      /* Found a GNU style destructor, get past "_<CPLUS_MARKER>_" */
+      (*mangled) += 3;
+      work -> destructor += 1;
+    }
+  else if ((*mangled)[0] == '_'
+	   && (((*mangled)[1] == '_'
+		&& (*mangled)[2] == 'v'
+		&& (*mangled)[3] == 't'
+		&& (*mangled)[4] == '_')
+	       || ((*mangled)[1] == 'v'
+		   && (*mangled)[2] == 't'
+		   && strchr (cplus_markers, (*mangled)[3]) != NULL)))
+    {
+      /* Found a GNU style virtual table, get past "_vt<CPLUS_MARKER>"
+         and create the decl.  Note that we consume the entire mangled
+	 input string, which means that demangle_signature has no work
+	 to do.  */
+      if ((*mangled)[2] == 'v')
+	(*mangled) += 5; /* New style, with thunks: "__vt_" */
+      else
+	(*mangled) += 4; /* Old style, no thunks: "_vt<CPLUS_MARKER>" */
+      while (**mangled != '\0')
+	{
+	  switch (**mangled)
+	    {
+	    case 'Q':
+	    case 'K':
+	      success = demangle_qualified (work, mangled, declp, 0, 1);
+	      break;
+	    case 't':
+	      success = demangle_template (work, mangled, declp, 0, 1,
+					   1);
+	      break;
+	    default:
+	      if (ISDIGIT((unsigned char)*mangled[0]))
+		{
+		  n = consume_count(mangled);
+		  /* We may be seeing a too-large size, or else a
+		     ".<digits>" indicating a static local symbol.  In
+		     any case, declare victory and move on; *don't* try
+		     to use n to allocate.  */
+		  if (n > (int) strlen (*mangled))
+		    {
+		      success = 1;
+		      break;
+		    }
+		}
+	      else
+		{
+		  /*n = strcspn (*mangled, cplus_markers);*/
+		  const char *check = *mangled;
+		  n = 0;
+		  while (*check)
+		    if (strchr (cplus_markers, *check++) == NULL)
+		      n++;
+		    else
+		      break;
+		}
+	      string_appendn (declp, *mangled, n);
+	      (*mangled) += n;
+	    }
+
+	  p = strpbrk (*mangled, cplus_markers);
+	  if (success && ((p == NULL) || (p == *mangled)))
+	    {
+	      if (p != NULL)
+		{
+		  string_append (declp, SCOPE_STRING (work));
+		  (*mangled)++;
+		}
+	    }
+	  else
+	    {
+	      success = 0;
+	      break;
+	    }
+	}
+      if (success)
+	string_append (declp, " virtual table");
+    }
+  else if ((*mangled)[0] == '_'
+	   && (strchr("0123456789Qt", (*mangled)[1]) != NULL)
+	   && (p = strpbrk (*mangled, cplus_markers)) != NULL)
+    {
+      /* static data member, "_3foo$varname" for example */
+      (*mangled)++;
+      switch (**mangled)
+	{
+	case 'Q':
+	case 'K':
+	  success = demangle_qualified (work, mangled, declp, 0, 1);
+	  break;
+	case 't':
+	  success = demangle_template (work, mangled, declp, 0, 1, 1);
+	  break;
+	default:
+	  n = consume_count (mangled);
+	  if (n < 0 || n > (long) strlen (*mangled))
+	    {
+	      success = 0;
+	      break;
+	    }
+
+	  if (n > 10 && strncmp (*mangled, "_GLOBAL_", 8) == 0
+	      && (*mangled)[9] == 'N'
+	      && (*mangled)[8] == (*mangled)[10]
+	      && strchr (cplus_markers, (*mangled)[8]))
+	    {
+	      /* A member of the anonymous namespace.  There's information
+		 about what identifier or filename it was keyed to, but
+		 it's just there to make the mangled name unique; we just
+		 step over it.  */
+	      string_append (declp, "{anonymous}");
+	      (*mangled) += n;
+
+	      /* Now p points to the marker before the N, so we need to
+		 update it to the first marker after what we consumed.  */
+	      p = strpbrk (*mangled, cplus_markers);
+	      break;
+	    }
+
+	  string_appendn (declp, *mangled, n);
+	  (*mangled) += n;
+	}
+      if (success && (p == *mangled))
+	{
+	  /* Consumed everything up to the cplus_marker, append the
+	     variable name.  */
+	  (*mangled)++;
+	  string_append (declp, SCOPE_STRING (work));
+	  n = strlen (*mangled);
+	  string_appendn (declp, *mangled, n);
+	  (*mangled) += n;
+	}
+      else
+	{
+	  success = 0;
+	}
+    }
+  else if (strncmp (*mangled, "__thunk_", 8) == 0)
+    {
+      int delta;
+
+      (*mangled) += 8;
+      delta = consume_count (mangled);
+      if (delta == -1)
+	success = 0;
+      else
+	{
+	  char *method = internal_cplus_demangle (work, ++*mangled);
+
+	  if (method)
+	    {
+	      char buf[50];
+	      sprintf (buf, "virtual function thunk (delta:%d) for ", -delta);
+	      string_append (declp, buf);
+	      string_append (declp, method);
+	      free (method);
+	      n = strlen (*mangled);
+	      (*mangled) += n;
+	    }
+	  else
+	    {
+	      success = 0;
+	    }
+	}
+    }
+  else if (strncmp (*mangled, "__t", 3) == 0
+	   && ((*mangled)[3] == 'i' || (*mangled)[3] == 'f'))
+    {
+      p = (*mangled)[3] == 'i' ? " type_info node" : " type_info function";
+      (*mangled) += 4;
+      switch (**mangled)
+	{
+	case 'Q':
+	case 'K':
+	  success = demangle_qualified (work, mangled, declp, 0, 1);
+	  break;
+	case 't':
+	  success = demangle_template (work, mangled, declp, 0, 1, 1);
+	  break;
+	default:
+	  success = do_type (work, mangled, declp);
+	  break;
+	}
+      if (success && **mangled != '\0')
+	success = 0;
+      if (success)
+	string_append (declp, p);
+    }
+  else
+    {
+      success = 0;
+    }
+  return (success);
+}
+
+static void
+recursively_demangle(work, mangled, result, namelength)
+     struct work_stuff *work;
+     const char **mangled;
+     string *result;
+     int namelength;
+{
+  char * recurse = (char *)NULL;
+  char * recurse_dem = (char *)NULL;
+
+  recurse = (char *) xmalloc (namelength + 1);
+  memcpy (recurse, *mangled, namelength);
+  recurse[namelength] = '\000';
+
+  recurse_dem = VG_(cplus_demangle) (recurse, work->options);
+
+  if (recurse_dem)
+    {
+      string_append (result, recurse_dem);
+      free (recurse_dem);
+    }
+  else
+    {
+      string_appendn (result, *mangled, namelength);
+    }
+  free (recurse);
+  *mangled += namelength;
+}
+
+/*
+
+LOCAL FUNCTION
+
+	arm_special -- special handling of ARM/lucid mangled strings
+
+SYNOPSIS
+
+	static int
+	arm_special (const char **mangled,
+		     string *declp);
+
+
+DESCRIPTION
+
+	Process some special ARM style mangling forms that don't fit
+	the normal pattern.  For example:
+
+		__vtbl__3foo		(foo virtual table)
+		__vtbl__3foo__3bar	(bar::foo virtual table)
+
+ */
+
+static int
+arm_special (mangled, declp)
+     const char **mangled;
+     string *declp;
+{
+  int n;
+  int success = 1;
+  const char *scan;
+
+  if (strncmp (*mangled, ARM_VTABLE_STRING, ARM_VTABLE_STRLEN) == 0)
+    {
+      /* Found a ARM style virtual table, get past ARM_VTABLE_STRING
+         and create the decl.  Note that we consume the entire mangled
+	 input string, which means that demangle_signature has no work
+	 to do.  */
+      scan = *mangled + ARM_VTABLE_STRLEN;
+      while (*scan != '\0')        /* first check it can be demangled */
+        {
+          n = consume_count (&scan);
+          if (n == -1)
+	    {
+	      return (0);           /* no good */
+	    }
+          scan += n;
+          if (scan[0] == '_' && scan[1] == '_')
+	    {
+	      scan += 2;
+	    }
+        }
+      (*mangled) += ARM_VTABLE_STRLEN;
+      while (**mangled != '\0')
+	{
+	  n = consume_count (mangled);
+          if (n == -1
+	      || n > (long) strlen (*mangled))
+	    return 0;
+	  string_prependn (declp, *mangled, n);
+	  (*mangled) += n;
+	  if ((*mangled)[0] == '_' && (*mangled)[1] == '_')
+	    {
+	      string_prepend (declp, "::");
+	      (*mangled) += 2;
+	    }
+	}
+      string_append (declp, " virtual table");
+    }
+  else
+    {
+      success = 0;
+    }
+  return (success);
+}
+
+/*
+
+LOCAL FUNCTION
+
+	demangle_qualified -- demangle 'Q' qualified name strings
+
+SYNOPSIS
+
+	static int
+	demangle_qualified (struct work_stuff *, const char *mangled,
+			    string *result, int isfuncname, int append);
+
+DESCRIPTION
+
+	Demangle a qualified name, such as "Q25Outer5Inner" which is
+	the mangled form of "Outer::Inner".  The demangled output is
+	prepended or appended to the result string according to the
+	state of the append flag.
+
+	If isfuncname is nonzero, then the qualified name we are building
+	is going to be used as a member function name, so if it is a
+	constructor or destructor function, append an appropriate
+	constructor or destructor name.  I.E. for the above example,
+	the result for use as a constructor is "Outer::Inner::Inner"
+	and the result for use as a destructor is "Outer::Inner::~Inner".
+
+BUGS
+
+	Numeric conversion is ASCII dependent (FIXME).
+
+ */
+
+static int
+demangle_qualified (work, mangled, result, isfuncname, append)
+     struct work_stuff *work;
+     const char **mangled;
+     string *result;
+     int isfuncname;
+     int append;
+{
+  int qualifiers = 0;
+  int success = 1;
+  string temp;
+  string last_name;
+  int bindex = register_Btype (work);
+
+  /* We only make use of ISFUNCNAME if the entity is a constructor or
+     destructor.  */
+  isfuncname = (isfuncname
+		&& ((work->constructor & 1) || (work->destructor & 1)));
+
+  string_init (&temp);
+  string_init (&last_name);
+
+  if ((*mangled)[0] == 'K')
+    {
+    /* Squangling qualified name reuse */
+      int idx;
+      (*mangled)++;
+      idx = consume_count_with_underscores (mangled);
+      if (idx == -1 || idx >= work -> numk)
+        success = 0;
+      else
+        string_append (&temp, work -> ktypevec[idx]);
+    }
+  else
+    switch ((*mangled)[1])
+    {
+    case '_':
+      /* GNU mangled name with more than 9 classes.  The count is preceded
+	 by an underscore (to distinguish it from the <= 9 case) and followed
+	 by an underscore.  */
+      (*mangled)++;
+      qualifiers = consume_count_with_underscores (mangled);
+      if (qualifiers == -1)
+	success = 0;
+      break;
+
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+      /* The count is in a single digit.  */
+      qualifiers = (*mangled)[1] - '0';
+
+      /* If there is an underscore after the digit, skip it.  This is
+	 said to be for ARM-qualified names, but the ARM makes no
+	 mention of such an underscore.  Perhaps cfront uses one.  */
+      if ((*mangled)[2] == '_')
+	{
+	  (*mangled)++;
+	}
+      (*mangled) += 2;
+      break;
+
+    case '0':
+    default:
+      success = 0;
+    }
+
+  if (!success)
+    {
+      string_delete (&last_name);
+      string_delete (&temp);
+      return success;
+    }
+
+  /* Pick off the names and collect them in the temp buffer in the order
+     in which they are found, separated by '::'.  */
+
+  while (qualifiers-- > 0)
+    {
+      int remember_K = 1;
+      string_clear (&last_name);
+
+      if (*mangled[0] == '_')
+	(*mangled)++;
+
+      if (*mangled[0] == 't')
+	{
+	  /* Here we always append to TEMP since we will want to use
+	     the template name without the template parameters as a
+	     constructor or destructor name.  The appropriate
+	     (parameter-less) value is returned by demangle_template
+	     in LAST_NAME.  We do not remember the template type here,
+	     in order to match the G++ mangling algorithm.  */
+	  success = demangle_template(work, mangled, &temp,
+				      &last_name, 1, 0);
+	  if (!success)
+	    break;
+	}
+      else if (*mangled[0] == 'K')
+	{
+          int idx;
+          (*mangled)++;
+          idx = consume_count_with_underscores (mangled);
+          if (idx == -1 || idx >= work->numk)
+            success = 0;
+          else
+            string_append (&temp, work->ktypevec[idx]);
+          remember_K = 0;
+
+	  if (!success) break;
+	}
+      else
+	{
+	  if (EDG_DEMANGLING)
+            {
+	      int namelength;
+ 	      /* Now recursively demangle the qualifier
+ 	       * This is necessary to deal with templates in
+ 	       * mangling styles like EDG */
+	      namelength = consume_count (mangled);
+	      if (namelength == -1)
+		{
+		  success = 0;
+		  break;
+		}
+ 	      recursively_demangle(work, mangled, &temp, namelength);
+            }
+          else
+            {
+	      string temp_last_name;
+	      string_init (&temp_last_name);
+              success = do_type (work, mangled, &temp_last_name);
+              if (!success)
+	        {
+		  string_delete (&temp_last_name);
+                  break;
+		}
+              string_appends (&temp, &temp_last_name);
+	      string_appends (&last_name, &temp_last_name);
+	      string_delete (&temp_last_name);
+            }
+	}
+
+      if (remember_K)
+	remember_Ktype (work, temp.b, LEN_STRING (&temp));
+
+      if (qualifiers > 0)
+	string_append (&temp, SCOPE_STRING (work));
+    }
+
+  remember_Btype (work, temp.b, LEN_STRING (&temp), bindex);
+
+  /* If we are using the result as a function name, we need to append
+     the appropriate '::' separated constructor or destructor name.
+     We do this here because this is the most convenient place, where
+     we already have a pointer to the name and the length of the name.  */
+
+  if (isfuncname)
+    {
+      string_append (&temp, SCOPE_STRING (work));
+      if (work -> destructor & 1)
+	string_append (&temp, "~");
+      string_appends (&temp, &last_name);
+    }
+
+  /* Now either prepend the temp buffer to the result, or append it,
+     depending upon the state of the append flag.  */
+
+  if (append)
+    string_appends (result, &temp);
+  else
+    {
+      if (!STRING_EMPTY (result))
+	string_append (&temp, SCOPE_STRING (work));
+      string_prepends (result, &temp);
+    }
+
+  string_delete (&last_name);
+  string_delete (&temp);
+  return (success);
+}
+
+/*
+
+LOCAL FUNCTION
+
+	get_count -- convert an ascii count to integer, consuming tokens
+
+SYNOPSIS
+
+	static int
+	get_count (const char **type, int *count)
+
+DESCRIPTION
+
+	Assume that *type points at a count in a mangled name; set
+	*count to its value, and set *type to the next character after
+	the count.  There are some weird rules in effect here.
+
+	If *type does not point at a string of digits, return zero.
+
+	If *type points at a string of digits followed by an
+	underscore, set *count to their value as an integer, advance
+	*type to point *after the underscore, and return 1.
+
+	If *type points at a string of digits not followed by an
+	underscore, consume only the first digit.  Set *count to its
+	value as an integer, leave *type pointing after that digit,
+	and return 1.
+
+        The excuse for this odd behavior: in the ARM and HP demangling
+        styles, a type can be followed by a repeat count of the form
+        `Nxy', where:
+
+        `x' is a single digit specifying how many additional copies
+            of the type to append to the argument list, and
+
+        `y' is one or more digits, specifying the zero-based index of
+            the first repeated argument in the list.  Yes, as you're
+            unmangling the name you can figure this out yourself, but
+            it's there anyway.
+
+        So, for example, in `bar__3fooFPiN51', the first argument is a
+        pointer to an integer (`Pi'), and then the next five arguments
+        are the same (`N5'), and the first repeat is the function's
+        second argument (`1').
+*/
+
+static int
+get_count (type, count)
+     const char **type;
+     int *count;
+{
+  const char *p;
+  int n;
+
+  if (!ISDIGIT ((unsigned char)**type))
+    return (0);
+  else
+    {
+      *count = **type - '0';
+      (*type)++;
+      if (ISDIGIT ((unsigned char)**type))
+	{
+	  p = *type;
+	  n = *count;
+	  do
+	    {
+	      n *= 10;
+	      n += *p - '0';
+	      p++;
+	    }
+	  while (ISDIGIT ((unsigned char)*p));
+	  if (*p == '_')
+	    {
+	      *type = p + 1;
+	      *count = n;
+	    }
+	}
+    }
+  return (1);
+}
+
+/* RESULT will be initialised here; it will be freed on failure.  The
+   value returned is really a type_kind_t.  */
+
+static int
+do_type (work, mangled, result)
+     struct work_stuff *work;
+     const char **mangled;
+     string *result;
+{
+  int n;
+  int done;
+  int success;
+  string decl;
+  const char *remembered_type;
+  int type_quals;
+  string btype;
+  type_kind_t tk = tk_none;
+
+  string_init (&btype);
+  string_init (&decl);
+  string_init (result);
+
+  done = 0;
+  success = 1;
+  while (success && !done)
+    {
+      int member;
+      switch (**mangled)
+	{
+
+	  /* A pointer type */
+	case 'P':
+	case 'p':
+	  (*mangled)++;
+	  if (! (work -> options & DMGL_JAVA))
+	    string_prepend (&decl, "*");
+	  if (tk == tk_none)
+	    tk = tk_pointer;
+	  break;
+
+	  /* A reference type */
+	case 'R':
+	  (*mangled)++;
+	  string_prepend (&decl, "&");
+	  if (tk == tk_none)
+	    tk = tk_reference;
+	  break;
+
+	  /* An array */
+	case 'A':
+	  {
+	    ++(*mangled);
+	    if (!STRING_EMPTY (&decl)
+		&& (decl.b[0] == '*' || decl.b[0] == '&'))
+	      {
+		string_prepend (&decl, "(");
+		string_append (&decl, ")");
+	      }
+	    string_append (&decl, "[");
+	    if (**mangled != '_')
+	      success = demangle_template_value_parm (work, mangled, &decl,
+						      tk_integral);
+	    if (**mangled == '_')
+	      ++(*mangled);
+	    string_append (&decl, "]");
+	    break;
+	  }
+
+	/* A back reference to a previously seen type */
+	case 'T':
+	  (*mangled)++;
+	  if (!get_count (mangled, &n) || n >= work -> ntypes)
+	    {
+	      success = 0;
+	    }
+	  else
+	    {
+	      remembered_type = work -> typevec[n];
+	      mangled = &remembered_type;
+	    }
+	  break;
+
+	  /* A function */
+	case 'F':
+	  (*mangled)++;
+	    if (!STRING_EMPTY (&decl)
+		&& (decl.b[0] == '*' || decl.b[0] == '&'))
+	    {
+	      string_prepend (&decl, "(");
+	      string_append (&decl, ")");
+	    }
+	  /* After picking off the function args, we expect to either find the
+	     function return type (preceded by an '_') or the end of the
+	     string.  */
+	  if (!demangle_nested_args (work, mangled, &decl)
+	      || (**mangled != '_' && **mangled != '\0'))
+	    {
+	      success = 0;
+	      break;
+	    }
+	  if (success && (**mangled == '_'))
+	    (*mangled)++;
+	  break;
+
+	case 'M':
+	case 'O':
+	  {
+	    type_quals = TYPE_UNQUALIFIED;
+
+	    member = **mangled == 'M';
+	    (*mangled)++;
+
+	    string_append (&decl, ")");
+
+	    /* We don't need to prepend `::' for a qualified name;
+	       demangle_qualified will do that for us.  */
+	    if (**mangled != 'Q')
+	      string_prepend (&decl, SCOPE_STRING (work));
+
+	    if (ISDIGIT ((unsigned char)**mangled))
+	      {
+		n = consume_count (mangled);
+		if (n == -1
+		    || (int) strlen (*mangled) < n)
+		  {
+		    success = 0;
+		    break;
+		  }
+		string_prependn (&decl, *mangled, n);
+		*mangled += n;
+	      }
+	    else if (**mangled == 'X' || **mangled == 'Y')
+	      {
+		string temp;
+		do_type (work, mangled, &temp);
+		string_prepends (&decl, &temp);
+	      }
+	    else if (**mangled == 't')
+	      {
+		string temp;
+		string_init (&temp);
+		success = demangle_template (work, mangled, &temp,
+					     NULL, 1, 1);
+		if (success)
+		  {
+		    string_prependn (&decl, temp.b, temp.p - temp.b);
+		    string_clear (&temp);
+		  }
+		else
+		  break;
+	      }
+	    else if (**mangled == 'Q')
+	      {
+		success = demangle_qualified (work, mangled, &decl,
+					      /*isfuncnam=*/0, 
+					      /*append=*/0);
+		if (!success)
+		  break;
+	      }
+	    else
+	      {
+		success = 0;
+		break;
+	      }
+
+	    string_prepend (&decl, "(");
+	    if (member)
+	      {
+		switch (**mangled)
+		  {
+		  case 'C':
+		  case 'V':
+		  case 'u':
+		    type_quals |= code_for_qualifier (**mangled);
+		    (*mangled)++;
+		    break;
+
+		  default:
+		    break;
+		  }
+
+		if (*(*mangled)++ != 'F')
+		  {
+		    success = 0;
+		    break;
+		  }
+	      }
+	    if ((member && !demangle_nested_args (work, mangled, &decl))
+		|| **mangled != '_')
+	      {
+		success = 0;
+		break;
+	      }
+	    (*mangled)++;
+	    if (! PRINT_ANSI_QUALIFIERS)
+	      {
+		break;
+	      }
+	    if (type_quals != TYPE_UNQUALIFIED)
+	      {
+		APPEND_BLANK (&decl);
+		string_append (&decl, qualifier_string (type_quals));
+	      }
+	    break;
+	  }
+        case 'G':
+	  (*mangled)++;
+	  break;
+
+	case 'C':
+	case 'V':
+	case 'u':
+	  if (PRINT_ANSI_QUALIFIERS)
+	    {
+	      if (!STRING_EMPTY (&decl))
+		string_prepend (&decl, " ");
+
+	      string_prepend (&decl, demangle_qualifier (**mangled));
+	    }
+	  (*mangled)++;
+	  break;
+	  /*
+	    }
+	    */
+
+	  /* fall through */
+	default:
+	  done = 1;
+	  break;
+	}
+    }
+
+  if (success) switch (**mangled)
+    {
+      /* A qualified name, such as "Outer::Inner".  */
+    case 'Q':
+    case 'K':
+      {
+        success = demangle_qualified (work, mangled, result, 0, 1);
+        break;
+      }
+
+    /* A back reference to a previously seen squangled type */
+    case 'B':
+      (*mangled)++;
+      if (!get_count (mangled, &n) || n >= work -> numb)
+	success = 0;
+      else
+	string_append (result, work->btypevec[n]);
+      break;
+
+    case 'X':
+    case 'Y':
+      /* A template parm.  We substitute the corresponding argument. */
+      {
+	int idx;
+
+	(*mangled)++;
+	idx = consume_count_with_underscores (mangled);
+
+	if (idx == -1
+	    || (work->tmpl_argvec && idx >= work->ntmpl_args)
+	    || consume_count_with_underscores (mangled) == -1)
+	  {
+	    success = 0;
+	    break;
+	  }
+
+	if (work->tmpl_argvec)
+	  string_append (result, work->tmpl_argvec[idx]);
+	else
+	  string_append_template_idx (result, idx);
+
+	success = 1;
+      }
+    break;
+
+    default:
+      success = demangle_fund_type (work, mangled, result);
+      if (tk == tk_none)
+	tk = (type_kind_t) success;
+      break;
+    }
+
+  if (success)
+    {
+      if (!STRING_EMPTY (&decl))
+	{
+	  string_append (result, " ");
+	  string_appends (result, &decl);
+	}
+    }
+  else
+    string_delete (result);
+  string_delete (&decl);
+
+  if (success)
+    /* Assume an integral type, if we're not sure.  */
+    return (int) ((tk == tk_none) ? tk_integral : tk);
+  else
+    return 0;
+}
+
+/* Given a pointer to a type string that represents a fundamental type
+   argument (int, long, unsigned int, etc) in TYPE, a pointer to the
+   string in which the demangled output is being built in RESULT, and
+   the WORK structure, decode the types and add them to the result.
+
+   For example:
+
+   	"Ci"	=>	"const int"
+	"Sl"	=>	"signed long"
+	"CUs"	=>	"const unsigned short"
+
+   The value returned is really a type_kind_t.  */
+
+static int
+demangle_fund_type (work, mangled, result)
+     struct work_stuff *work;
+     const char **mangled;
+     string *result;
+{
+  int done = 0;
+  int success = 1;
+  char buf[10];
+  unsigned int dec = 0;
+  string btype;
+  type_kind_t tk = tk_integral;
+
+  string_init (&btype);
+
+  /* First pick off any type qualifiers.  There can be more than one.  */
+
+  while (!done)
+    {
+      switch (**mangled)
+	{
+	case 'C':
+	case 'V':
+	case 'u':
+	  if (PRINT_ANSI_QUALIFIERS)
+	    {
+              if (!STRING_EMPTY (result))
+                string_prepend (result, " ");
+	      string_prepend (result, demangle_qualifier (**mangled));
+	    }
+	  (*mangled)++;
+	  break;
+	case 'U':
+	  (*mangled)++;
+	  APPEND_BLANK (result);
+	  string_append (result, "unsigned");
+	  break;
+	case 'S': /* signed char only */
+	  (*mangled)++;
+	  APPEND_BLANK (result);
+	  string_append (result, "signed");
+	  break;
+	case 'J':
+	  (*mangled)++;
+	  APPEND_BLANK (result);
+	  string_append (result, "__complex");
+	  break;
+	default:
+	  done = 1;
+	  break;
+	}
+    }
+
+  /* Now pick off the fundamental type.  There can be only one.  */
+
+  switch (**mangled)
+    {
+    case '\0':
+    case '_':
+      break;
+    case 'v':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "void");
+      break;
+    case 'x':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "long long");
+      break;
+    case 'l':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "long");
+      break;
+    case 'i':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "int");
+      break;
+    case 's':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "short");
+      break;
+    case 'b':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "bool");
+      tk = tk_bool;
+      break;
+    case 'c':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "char");
+      tk = tk_char;
+      break;
+    case 'w':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "wchar_t");
+      tk = tk_char;
+      break;
+    case 'r':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "long double");
+      tk = tk_real;
+      break;
+    case 'd':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "double");
+      tk = tk_real;
+      break;
+    case 'f':
+      (*mangled)++;
+      APPEND_BLANK (result);
+      string_append (result, "float");
+      tk = tk_real;
+      break;
+    case 'G':
+      (*mangled)++;
+      if (!ISDIGIT ((unsigned char)**mangled))
+	{
+	  success = 0;
+	  break;
+	}
+    case 'I':
+      (*mangled)++;
+      if (**mangled == '_')
+	{
+	  int i;
+	  (*mangled)++;
+	  for (i = 0;
+	       i < (long) sizeof (buf) - 1 && **mangled && **mangled != '_';
+	       (*mangled)++, i++)
+	    buf[i] = **mangled;
+	  if (**mangled != '_')
+	    {
+	      success = 0;
+	      break;
+	    }
+	  buf[i] = '\0';
+	  (*mangled)++;
+	}
+      else
+	{
+	  strncpy (buf, *mangled, 2);
+	  buf[2] = '\0';
+	  *mangled += min (strlen (*mangled), 2);
+	}
+      /*sscanf (buf, "%x", &dec);
+      sprintf (buf, "int%u_t", dec);*/
+      sprintf (buf, "i_xx_t");
+      APPEND_BLANK (result);
+      string_append (result, buf);
+      break;
+
+      /* fall through */
+      /* An explicit type, such as "6mytype" or "7integer" */
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+      {
+        int bindex = register_Btype (work);
+        string loc_btype;
+        string_init (&loc_btype);
+        if (demangle_class_name (work, mangled, &loc_btype)) {
+          remember_Btype (work, loc_btype.b, LEN_STRING (&loc_btype), bindex);
+          APPEND_BLANK (result);
+          string_appends (result, &loc_btype);
+        }
+        else
+          success = 0;
+        string_delete (&loc_btype);
+        break;
+      }
+    case 't':
+      {
+        success = demangle_template (work, mangled, &btype, 0, 1, 1);
+        string_appends (result, &btype);
+        break;
+      }
+    default:
+      success = 0;
+      break;
+    }
+
+  string_delete (&btype);
+
+  return success ? ((int) tk) : 0;
+}
+
+
+/* Handle a template's value parameter for HP aCC (extension from ARM)
+   **mangled points to 'S' or 'U' */
+
+static int
+do_hpacc_template_const_value (work, mangled, result)
+     struct work_stuff *work ATTRIBUTE_UNUSED;
+     const char **mangled;
+     string *result;
+{
+  int unsigned_const;
+
+  if (**mangled != 'U' && **mangled != 'S')
+    return 0;
+
+  unsigned_const = (**mangled == 'U');
+
+  (*mangled)++;
+
+  switch (**mangled)
+    {
+      case 'N':
+        string_append (result, "-");
+        /* fall through */
+      case 'P':
+        (*mangled)++;
+        break;
+      case 'M':
+        /* special case for -2^31 */
+        string_append (result, "-2147483648");
+        (*mangled)++;
+        return 1;
+      default:
+        return 0;
+    }
+
+  /* We have to be looking at an integer now */
+  if (!(ISDIGIT ((unsigned char)**mangled)))
+    return 0;
+
+  /* We only deal with integral values for template
+     parameters -- so it's OK to look only for digits */
+  while (ISDIGIT ((unsigned char)**mangled))
+    {
+      char_str[0] = **mangled;
+      string_append (result, char_str);
+      (*mangled)++;
+    }
+
+  if (unsigned_const)
+    string_append (result, "U");
+
+  /* FIXME? Some day we may have 64-bit (or larger :-) ) constants
+     with L or LL suffixes. pai/1997-09-03 */
+
+  return 1; /* success */
+}
+
+/* Handle a template's literal parameter for HP aCC (extension from ARM)
+   **mangled is pointing to the 'A' */
+
+static int
+do_hpacc_template_literal (work, mangled, result)
+     struct work_stuff *work;
+     const char **mangled;
+     string *result;
+{
+  int literal_len = 0;
+  char * recurse;
+  char * recurse_dem;
+
+  if (**mangled != 'A')
+    return 0;
+
+  (*mangled)++;
+
+  literal_len = consume_count (mangled);
+
+  if (literal_len <= 0)
+    return 0;
+
+  /* Literal parameters are names of arrays, functions, etc.  and the
+     canonical representation uses the address operator */
+  string_append (result, "&");
+
+  /* Now recursively demangle the literal name */
+  recurse = (char *) xmalloc (literal_len + 1);
+  memcpy (recurse, *mangled, literal_len);
+  recurse[literal_len] = '\000';
+
+  recurse_dem = VG_(cplus_demangle) (recurse, work->options);
+
+  if (recurse_dem)
+    {
+      string_append (result, recurse_dem);
+      free (recurse_dem);
+    }
+  else
+    {
+      string_appendn (result, *mangled, literal_len);
+    }
+  (*mangled) += literal_len;
+  free (recurse);
+
+  return 1;
+}
+
+static int
+snarf_numeric_literal (args, arg)
+     const char ** args;
+     string * arg;
+{
+  if (**args == '-')
+    {
+      char_str[0] = '-';
+      string_append (arg, char_str);
+      (*args)++;
+    }
+  else if (**args == '+')
+    (*args)++;
+
+  if (!ISDIGIT ((unsigned char)**args))
+    return 0;
+
+  while (ISDIGIT ((unsigned char)**args))
+    {
+      char_str[0] = **args;
+      string_append (arg, char_str);
+      (*args)++;
+    }
+
+  return 1;
+}
+
+/* Demangle the next argument, given by MANGLED into RESULT, which
+   *should be an uninitialized* string.  It will be initialized here,
+   and free'd should anything go wrong.  */
+
+static int
+do_arg (work, mangled, result)
+     struct work_stuff *work;
+     const char **mangled;
+     string *result;
+{
+  /* Remember where we started so that we can record the type, for
+     non-squangling type remembering.  */
+  const char *start = *mangled;
+  string temp_result;
+
+  string_init (result);
+  string_init (&temp_result);
+
+  if (work->nrepeats > 0)
+    {
+      --work->nrepeats;
+
+      if (work->previous_argument == 0)
+	return 0;
+
+      /* We want to reissue the previous type in this argument list.  */
+      string_appends (result, work->previous_argument);
+      return 1;
+    }
+
+  if (**mangled == 'n')
+    {
+      /* A squangling-style repeat.  */
+      (*mangled)++;
+      work->nrepeats = consume_count(mangled);
+
+      if (work->nrepeats <= 0)
+	/* This was not a repeat count after all.  */
+	return 0;
+
+      if (work->nrepeats > 9)
+	{
+	  if (**mangled != '_')
+	    /* The repeat count should be followed by an '_' in this
+	       case.  */
+	    return 0;
+	  else
+	    (*mangled)++;
+	}
+
+      /* Now, the repeat is all set up.  */
+      return do_arg (work, mangled, result);
+    }
+
+  /* Save the result in WORK->previous_argument so that we can find it
+     if it's repeated.  Note that saving START is not good enough: we
+     do not want to add additional types to the back-referenceable
+     type vector when processing a repeated type.  */
+  if (work->previous_argument)
+    string_clear (work->previous_argument);
+  else
+    {
+      work->previous_argument = (string*) xmalloc (sizeof (string));
+      string_init (work->previous_argument);
+    }
+
+  if (!do_type (work, mangled, &temp_result))
+    {
+      string_delete (&temp_result);
+      return 0;
+    }
+  string_appends (work->previous_argument, &temp_result);
+  string_delete (&temp_result);
+
+  string_appends (result, work->previous_argument);
+
+  remember_type (work, start, *mangled - start);
+  return 1;
+}
+
+static void
+remember_type (work, start, len)
+     struct work_stuff *work;
+     const char *start;
+     int len;
+{
+  char *tem;
+
+  if (work->forgetting_types)
+    return;
+
+  if (work -> ntypes >= work -> typevec_size)
+    {
+      if (work -> typevec_size == 0)
+	{
+	  work -> typevec_size = 3;
+	  work -> typevec
+	    = (char **) xmalloc (sizeof (char *) * work -> typevec_size);
+	}
+      else
+	{
+	  work -> typevec_size *= 2;
+	  work -> typevec
+	    = (char **) xrealloc ((char *)work -> typevec,
+				  sizeof (char *) * work -> typevec_size);
+	}
+    }
+  tem = xmalloc (len + 1);
+  memcpy (tem, start, len);
+  tem[len] = '\0';
+  work -> typevec[work -> ntypes++] = tem;
+}
+
+
+/* Remember a K type class qualifier. */
+static void
+remember_Ktype (work, start, len)
+     struct work_stuff *work;
+     const char *start;
+     int len;
+{
+  char *tem;
+
+  if (work -> numk >= work -> ksize)
+    {
+      if (work -> ksize == 0)
+	{
+	  work -> ksize = 5;
+	  work -> ktypevec
+	    = (char **) xmalloc (sizeof (char *) * work -> ksize);
+	}
+      else
+	{
+	  work -> ksize *= 2;
+	  work -> ktypevec
+	    = (char **) xrealloc ((char *)work -> ktypevec,
+				  sizeof (char *) * work -> ksize);
+	}
+    }
+  tem = xmalloc (len + 1);
+  memcpy (tem, start, len);
+  tem[len] = '\0';
+  work -> ktypevec[work -> numk++] = tem;
+}
+
+/* Register a B code, and get an index for it. B codes are registered
+   as they are seen, rather than as they are completed, so map<temp<char> >
+   registers map<temp<char> > as B0, and temp<char> as B1 */
+
+static int
+register_Btype (work)
+     struct work_stuff *work;
+{
+  int ret;
+
+  if (work -> numb >= work -> bsize)
+    {
+      if (work -> bsize == 0)
+	{
+	  work -> bsize = 5;
+	  work -> btypevec
+	    = (char **) xmalloc (sizeof (char *) * work -> bsize);
+	}
+      else
+	{
+	  work -> bsize *= 2;
+	  work -> btypevec
+	    = (char **) xrealloc ((char *)work -> btypevec,
+				  sizeof (char *) * work -> bsize);
+	}
+    }
+  ret = work -> numb++;
+  work -> btypevec[ret] = NULL;
+  return(ret);
+}
+
+/* Store a value into a previously registered B code type. */
+
+static void
+remember_Btype (work, start, len, ind)
+     struct work_stuff *work;
+     const char *start;
+     int len, ind;
+{
+  char *tem;
+
+  tem = xmalloc (len + 1);
+  memcpy (tem, start, len);
+  tem[len] = '\0';
+  work -> btypevec[ind] = tem;
+}
+
+/* Lose all the info related to B and K type codes. */
+static void
+forget_B_and_K_types (work)
+     struct work_stuff *work;
+{
+  int i;
+
+  while (work -> numk > 0)
+    {
+      i = --(work -> numk);
+      if (work -> ktypevec[i] != NULL)
+	{
+	  free (work -> ktypevec[i]);
+	  work -> ktypevec[i] = NULL;
+	}
+    }
+
+  while (work -> numb > 0)
+    {
+      i = --(work -> numb);
+      if (work -> btypevec[i] != NULL)
+	{
+	  free (work -> btypevec[i]);
+	  work -> btypevec[i] = NULL;
+	}
+    }
+}
+/* Forget the remembered types, but not the type vector itself.  */
+
+static void
+forget_types (work)
+     struct work_stuff *work;
+{
+  int i;
+
+  while (work -> ntypes > 0)
+    {
+      i = --(work -> ntypes);
+      if (work -> typevec[i] != NULL)
+	{
+	  free (work -> typevec[i]);
+	  work -> typevec[i] = NULL;
+	}
+    }
+}
+
+/* Process the argument list part of the signature, after any class spec
+   has been consumed, as well as the first 'F' character (if any).  For
+   example:
+
+   "__als__3fooRT0"		=>	process "RT0"
+   "complexfunc5__FPFPc_PFl_i"	=>	process "PFPc_PFl_i"
+
+   DECLP must be already initialised, usually non-empty.  It won't be freed
+   on failure.
+
+   Note that g++ differs significantly from ARM and lucid style mangling
+   with regards to references to previously seen types.  For example, given
+   the source fragment:
+
+     class foo {
+       public:
+       foo::foo (int, foo &ia, int, foo &ib, int, foo &ic);
+     };
+
+     foo::foo (int, foo &ia, int, foo &ib, int, foo &ic) { ia = ib = ic; }
+     void foo (int, foo &ia, int, foo &ib, int, foo &ic) { ia = ib = ic; }
+
+   g++ produces the names:
+
+     __3fooiRT0iT2iT2
+     foo__FiR3fooiT1iT1
+
+   while lcc (and presumably other ARM style compilers as well) produces:
+
+     foo__FiR3fooT1T2T1T2
+     __ct__3fooFiR3fooT1T2T1T2
+
+   Note that g++ bases its type numbers starting at zero and counts all
+   previously seen types, while lucid/ARM bases its type numbers starting
+   at one and only considers types after it has seen the 'F' character
+   indicating the start of the function args.  For lucid/ARM style, we
+   account for this difference by discarding any previously seen types when
+   we see the 'F' character, and subtracting one from the type number
+   reference.
+
+ */
+
+static int
+demangle_args (work, mangled, declp)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+{
+  string arg;
+  int need_comma = 0;
+  int r;
+  int t;
+  const char *tem;
+  char temptype;
+
+  if (PRINT_ARG_TYPES)
+    {
+      string_append (declp, "(");
+      if (**mangled == '\0')
+	{
+	  string_append (declp, "void");
+	}
+    }
+
+  while ((**mangled != '_' && **mangled != '\0' && **mangled != 'e')
+	 || work->nrepeats > 0)
+    {
+      if ((**mangled == 'N') || (**mangled == 'T'))
+	{
+	  temptype = *(*mangled)++;
+
+	  if (temptype == 'N')
+	    {
+	      if (!get_count (mangled, &r))
+		{
+		  return (0);
+		}
+	    }
+	  else
+	    {
+	      r = 1;
+	    }
+          if ((HP_DEMANGLING || ARM_DEMANGLING || EDG_DEMANGLING) && work -> ntypes >= 10)
+            {
+              /* If we have 10 or more types we might have more than a 1 digit
+                 index so we'll have to consume the whole count here. This
+                 will lose if the next thing is a type name preceded by a
+                 count but it's impossible to demangle that case properly
+                 anyway. Eg if we already have 12 types is T12Pc "(..., type1,
+                 Pc, ...)"  or "(..., type12, char *, ...)" */
+              if ((t = consume_count(mangled)) <= 0)
+                {
+                  return (0);
+                }
+            }
+          else
+	    {
+	      if (!get_count (mangled, &t))
+	    	{
+	          return (0);
+	    	}
+	    }
+	  if (LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING)
+	    {
+	      t--;
+	    }
+	  /* Validate the type index.  Protect against illegal indices from
+	     malformed type strings.  */
+	  if ((t < 0) || (t >= work -> ntypes))
+	    {
+	      return (0);
+	    }
+	  while (work->nrepeats > 0 || --r >= 0)
+	    {
+	      tem = work -> typevec[t];
+	      if (need_comma && PRINT_ARG_TYPES)
+		{
+		  string_append (declp, ", ");
+		}
+	      if (!do_arg (work, &tem, &arg))
+		{
+		  return (0);
+		}
+	      if (PRINT_ARG_TYPES)
+		{
+		  string_appends (declp, &arg);
+		}
+	      string_delete (&arg);
+	      need_comma = 1;
+	    }
+	}
+      else
+	{
+	  if (need_comma && PRINT_ARG_TYPES)
+	    string_append (declp, ", ");
+	  if (!do_arg (work, mangled, &arg))
+	    {
+	      string_delete (&arg);
+	      return (0);
+	    }
+	  if (PRINT_ARG_TYPES)
+	    string_appends (declp, &arg);
+	  string_delete (&arg);
+	  need_comma = 1;
+	}
+    }
+
+  if (**mangled == 'e')
+    {
+      (*mangled)++;
+      if (PRINT_ARG_TYPES)
+	{
+	  if (need_comma)
+	    {
+	      string_append (declp, ",");
+	    }
+	  string_append (declp, "...");
+	}
+    }
+
+  if (PRINT_ARG_TYPES)
+    {
+      string_append (declp, ")");
+    }
+  return (1);
+}
+
+/* Like demangle_args, but for demangling the argument lists of function
+   and method pointers or references, not top-level declarations.  */
+
+static int
+demangle_nested_args (work, mangled, declp)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+{
+  string* saved_previous_argument;
+  int result;
+  int saved_nrepeats;
+
+  /* The G++ name-mangling algorithm does not remember types on nested
+     argument lists, unless -fsquangling is used, and in that case the
+     type vector updated by remember_type is not used.  So, we turn
+     off remembering of types here.  */
+  ++work->forgetting_types;
+
+  /* For the repeat codes used with -fsquangling, we must keep track of
+     the last argument.  */
+  saved_previous_argument = work->previous_argument;
+  saved_nrepeats = work->nrepeats;
+  work->previous_argument = 0;
+  work->nrepeats = 0;
+
+  /* Actually demangle the arguments.  */
+  result = demangle_args (work, mangled, declp);
+
+  /* Restore the previous_argument field.  */
+  if (work->previous_argument)
+    {
+      string_delete (work->previous_argument);
+      free ((char*) work->previous_argument);
+    }
+  work->previous_argument = saved_previous_argument;
+  --work->forgetting_types;
+  work->nrepeats = saved_nrepeats;
+
+  return result;
+}
+
+static void
+demangle_function_name (work, mangled, declp, scan)
+     struct work_stuff *work;
+     const char **mangled;
+     string *declp;
+     const char *scan;
+{
+  size_t i;
+  string type;
+  const char *tem;
+
+  string_appendn (declp, (*mangled), scan - (*mangled));
+  string_need (declp, 1);
+  *(declp -> p) = '\0';
+
+  /* Consume the function name, including the "__" separating the name
+     from the signature.  We are guaranteed that SCAN points to the
+     separator.  */
+
+  (*mangled) = scan + 2;
+  /* We may be looking at an instantiation of a template function:
+     foo__Xt1t2_Ft3t4, where t1, t2, ... are template arguments and a
+     following _F marks the start of the function arguments.  Handle
+     the template arguments first. */
+
+  if (HP_DEMANGLING && (**mangled == 'X'))
+    {
+      demangle_arm_hp_template (work, mangled, 0, declp);
+      /* This leaves MANGLED pointing to the 'F' marking func args */
+    }
+
+  if (LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING)
+    {
+
+      /* See if we have an ARM style constructor or destructor operator.
+	 If so, then just record it, clear the decl, and return.
+	 We can't build the actual constructor/destructor decl until later,
+	 when we recover the class name from the signature.  */
+
+      if (strcmp (declp -> b, "__ct") == 0)
+	{
+	  work -> constructor += 1;
+	  string_clear (declp);
+	  return;
+	}
+      else if (strcmp (declp -> b, "__dt") == 0)
+	{
+	  work -> destructor += 1;
+	  string_clear (declp);
+	  return;
+	}
+    }
+
+  if (declp->p - declp->b >= 3
+      && declp->b[0] == 'o'
+      && declp->b[1] == 'p'
+      && strchr (cplus_markers, declp->b[2]) != NULL)
+    {
+      /* see if it's an assignment expression */
+      if (declp->p - declp->b >= 10 /* op$assign_ */
+	  && memcmp (declp->b + 3, "assign_", 7) == 0)
+	{
+	  for (i = 0; i < ARRAY_SIZE (optable); i++)
+	    {
+	      int len = declp->p - declp->b - 10;
+	      if ((int) strlen (optable[i].in) == len
+		  && memcmp (optable[i].in, declp->b + 10, len) == 0)
+		{
+		  string_clear (declp);
+		  string_append (declp, "operator");
+		  string_append (declp, optable[i].out);
+		  string_append (declp, "=");
+		  break;
+		}
+	    }
+	}
+      else
+	{
+	  for (i = 0; i < ARRAY_SIZE (optable); i++)
+	    {
+	      int len = declp->p - declp->b - 3;
+	      if ((int) strlen (optable[i].in) == len
+		  && memcmp (optable[i].in, declp->b + 3, len) == 0)
+		{
+		  string_clear (declp);
+		  string_append (declp, "operator");
+		  string_append (declp, optable[i].out);
+		  break;
+		}
+	    }
+	}
+    }
+  else if (declp->p - declp->b >= 5 && memcmp (declp->b, "type", 4) == 0
+	   && strchr (cplus_markers, declp->b[4]) != NULL)
+    {
+      /* type conversion operator */
+      tem = declp->b + 5;
+      if (do_type (work, &tem, &type))
+	{
+	  string_clear (declp);
+	  string_append (declp, "operator ");
+	  string_appends (declp, &type);
+	  string_delete (&type);
+	}
+    }
+  else if (declp->b[0] == '_' && declp->b[1] == '_'
+	   && declp->b[2] == 'o' && declp->b[3] == 'p')
+    {
+      /* ANSI.  */
+      /* type conversion operator.  */
+      tem = declp->b + 4;
+      if (do_type (work, &tem, &type))
+	{
+	  string_clear (declp);
+	  string_append (declp, "operator ");
+	  string_appends (declp, &type);
+	  string_delete (&type);
+	}
+    }
+  else if (declp->b[0] == '_' && declp->b[1] == '_'
+	   && ISLOWER((unsigned char)declp->b[2])
+	   && ISLOWER((unsigned char)declp->b[3]))
+    {
+      if (declp->b[4] == '\0')
+	{
+	  /* Operator.  */
+	  for (i = 0; i < ARRAY_SIZE (optable); i++)
+	    {
+	      if (strlen (optable[i].in) == 2
+		  && memcmp (optable[i].in, declp->b + 2, 2) == 0)
+		{
+		  string_clear (declp);
+		  string_append (declp, "operator");
+		  string_append (declp, optable[i].out);
+		  break;
+		}
+	    }
+	}
+      else
+	{
+	  if (declp->b[2] == 'a' && declp->b[5] == '\0')
+	    {
+	      /* Assignment.  */
+	      for (i = 0; i < ARRAY_SIZE (optable); i++)
+		{
+		  if (strlen (optable[i].in) == 3
+		      && memcmp (optable[i].in, declp->b + 2, 3) == 0)
+		    {
+		      string_clear (declp);
+		      string_append (declp, "operator");
+		      string_append (declp, optable[i].out);
+		      break;
+		    }
+		}
+	    }
+	}
+    }
+}
+
+/* a mini string-handling package */
+
+static void
+string_need (s, n)
+     string *s;
+     int n;
+{
+  int tem;
+
+  if (s->b == NULL)
+    {
+      if (n < 32)
+	{
+	  n = 32;
+	}
+      s->p = s->b = xmalloc (n);
+      s->e = s->b + n;
+    }
+  else if (s->e - s->p < n)
+    {
+      tem = s->p - s->b;
+      n += tem;
+      n *= 2;
+      s->b = xrealloc (s->b, n);
+      s->p = s->b + tem;
+      s->e = s->b + n;
+    }
+}
+
+static void
+string_delete (s)
+     string *s;
+{
+  if (s->b != NULL)
+    {
+      free (s->b);
+      s->b = s->e = s->p = NULL;
+    }
+}
+
+static void
+string_init (s)
+     string *s;
+{
+  s->b = s->p = s->e = NULL;
+}
+
+static void
+string_clear (s)
+     string *s;
+{
+  s->p = s->b;
+}
+
+#if 0
+
+static int
+string_empty (s)
+     string *s;
+{
+  return (s->b == s->p);
+}
+
+#endif
+
+static void
+string_append (p, s)
+     string *p;
+     const char *s;
+{
+  int n;
+  if (s == NULL || *s == '\0')
+    return;
+  n = strlen (s);
+  string_need (p, n);
+  memcpy (p->p, s, n);
+  p->p += n;
+}
+
+static void
+string_appends (p, s)
+     string *p, *s;
+{
+  int n;
+
+  if (s->b != s->p)
+    {
+      n = s->p - s->b;
+      string_need (p, n);
+      memcpy (p->p, s->b, n);
+      p->p += n;
+    }
+}
+
+static void
+string_appendn (p, s, n)
+     string *p;
+     const char *s;
+     int n;
+{
+  if (n != 0)
+    {
+      string_need (p, n);
+      memcpy (p->p, s, n);
+      p->p += n;
+    }
+}
+
+static void
+string_prepend (p, s)
+     string *p;
+     const char *s;
+{
+  if (s != NULL && *s != '\0')
+    {
+      string_prependn (p, s, strlen (s));
+    }
+}
+
+static void
+string_prepends (p, s)
+     string *p, *s;
+{
+  if (s->b != s->p)
+    {
+      string_prependn (p, s->b, s->p - s->b);
+    }
+}
+
+static void
+string_prependn (p, s, n)
+     string *p;
+     const char *s;
+     int n;
+{
+  char *q;
+
+  if (n != 0)
+    {
+      string_need (p, n);
+      for (q = p->p - 1; q >= p->b; q--)
+	{
+	  q[n] = q[0];
+	}
+      memcpy (p->b, s, n);
+      p->p += n;
+    }
+}
+
+static void
+string_append_template_idx (s, idx)
+     string *s;
+     int idx;
+{
+  char buf[INTBUF_SIZE + 1 /* 'T' */];
+  sprintf(buf, "T%d", idx);
+  string_append (s, buf);
+}
+
+/* To generate a standalone demangler program for testing purposes,
+   just compile and link this file with -DMAIN and libiberty.a.  When
+   run, it demangles each command line arg, or each stdin string, and
+   prints the result on stdout.  */
+
+#ifdef MAIN
+
+#include "getopt.h"
+
+static const char *program_name;
+static const char *program_version = VERSION;
+static int flags = DMGL_PARAMS | DMGL_ANSI | DMGL_VERBOSE;
+
+static void demangle_it PARAMS ((char *));
+static void usage PARAMS ((FILE *, int)) ATTRIBUTE_NORETURN;
+static void fatal PARAMS ((const char *)) ATTRIBUTE_NORETURN;
+static void print_demangler_list PARAMS ((FILE *));
+
+static void
+demangle_it (mangled_name)
+     char *mangled_name;
+{
+  char *result;
+
+  /* For command line args, also try to demangle type encodings.  */
+  result = cplus_demangle (mangled_name, flags | DMGL_TYPES);
+  if (result == NULL)
+    {
+      printf ("%s\n", mangled_name);
+    }
+  else
+    {
+      printf ("%s\n", result);
+      free (result);
+    }
+}
+
+static void 
+print_demangler_list (stream)
+     FILE *stream;
+{
+  const struct demangler_engine *demangler; 
+
+  fprintf (stream, "{%s", libiberty_demanglers->demangling_style_name);
+  
+  for (demangler = libiberty_demanglers + 1;
+       demangler->demangling_style != unknown_demangling;
+       ++demangler)
+    fprintf (stream, ",%s", demangler->demangling_style_name);
+
+  fprintf (stream, "}");
+}
+
+static void
+usage (stream, status)
+     FILE *stream;
+     int status;
+{
+  fprintf (stream, "\
+Usage: %s [-_] [-n] [--strip-underscores] [--no-strip-underscores] \n",
+	   program_name);
+
+  fprintf (stream, "\
+       [-s ");
+  print_demangler_list (stream);
+  fprintf (stream, "]\n");
+
+  fprintf (stream, "\
+       [--format ");
+  print_demangler_list (stream);
+  fprintf (stream, "]\n");
+
+  fprintf (stream, "\
+       [--help] [--version] [arg...]\n");
+  exit (status);
+}
+
+#define MBUF_SIZE 32767
+char mbuffer[MBUF_SIZE];
+
+/* Defined in the automatically-generated underscore.c.  */
+extern int prepends_underscore;
+
+int strip_underscore = 0;
+
+static const struct option long_options[] = {
+  {"strip-underscores", no_argument, 0, '_'},
+  {"format", required_argument, 0, 's'},
+  {"help", no_argument, 0, 'h'},
+  {"no-strip-underscores", no_argument, 0, 'n'},
+  {"version", no_argument, 0, 'v'},
+  {0, no_argument, 0, 0}
+};
+
+/* More 'friendly' abort that prints the line and file.
+   config.h can #define abort fancy_abort if you like that sort of thing.  */
+
+void
+fancy_abort ()
+{
+  fatal ("Internal gcc abort.");
+}
+
+
+static const char *
+standard_symbol_characters PARAMS ((void));
+
+static const char *
+hp_symbol_characters PARAMS ((void));
+
+static const char *
+gnu_v3_symbol_characters PARAMS ((void));
+
+/* Return the string of non-alnum characters that may occur 
+   as a valid symbol component, in the standard assembler symbol
+   syntax.  */
+
+static const char *
+standard_symbol_characters ()
+{
+  return "_$.";
+}
+
+
+/* Return the string of non-alnum characters that may occur
+   as a valid symbol name component in an HP object file.
+
+   Note that, since HP's compiler generates object code straight from
+   C++ source, without going through an assembler, its mangled
+   identifiers can use all sorts of characters that no assembler would
+   tolerate, so the alphabet this function creates is a little odd.
+   Here are some sample mangled identifiers offered by HP:
+
+	typeid*__XT24AddressIndExpClassMember_
+	[Vftptr]key:__dt__32OrdinaryCompareIndExpClassMemberFv
+	__ct__Q2_9Elf64_Dyn18{unnamed.union.#1}Fv
+
+   This still seems really weird to me, since nowhere else in this
+   file is there anything to recognize curly brackets, parens, etc.
+   I've talked with Srikanth <srikanth@cup.hp.com>, and he assures me
+   this is right, but I still strongly suspect that there's a
+   misunderstanding here.
+
+   If we decide it's better for c++filt to use HP's assembler syntax
+   to scrape identifiers out of its input, here's the definition of
+   the symbol name syntax from the HP assembler manual:
+
+       Symbols are composed of uppercase and lowercase letters, decimal
+       digits, dollar symbol, period (.), ampersand (&), pound sign(#) and
+       underscore (_). A symbol can begin with a letter, digit underscore or
+       dollar sign. If a symbol begins with a digit, it must contain a
+       non-digit character.
+
+   So have fun.  */
+static const char *
+hp_symbol_characters ()
+{
+  return "_$.<>#,*&[]:(){}";
+}
+
+
+/* Return the string of non-alnum characters that may occur 
+   as a valid symbol component in the GNU C++ V3 ABI mangling
+   scheme.  */
+
+static const char *
+gnu_v3_symbol_characters ()
+{
+  return "_$.";
+}
+
+
+extern int main PARAMS ((int, char **));
+
+int
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  char *result;
+  int c;
+  const char *valid_symbols;
+  enum demangling_styles style = auto_demangling;
+
+  program_name = argv[0];
+
+  strip_underscore = prepends_underscore;
+
+  while ((c = getopt_long (argc, argv, "_ns:", long_options, (int *) 0)) != EOF)
+    {
+      switch (c)
+	{
+	case '?':
+	  usage (stderr, 1);
+	  break;
+	case 'h':
+	  usage (stdout, 0);
+	case 'n':
+	  strip_underscore = 0;
+	  break;
+	case 'v':
+	  printf ("GNU %s (C++ demangler), version %s\n", program_name, program_version);
+	  return (0);
+	case '_':
+	  strip_underscore = 1;
+	  break;
+	case 's':
+	  {
+	    style = cplus_demangle_name_to_style (optarg);
+	    if (style == unknown_demangling)
+	      {
+		fprintf (stderr, "%s: unknown demangling style `%s'\n",
+			 program_name, optarg);
+		return (1);
+	      }
+	    else
+	      cplus_demangle_set_style (style);
+	  }
+	  break;
+	}
+    }
+
+  if (optind < argc)
+    {
+      for ( ; optind < argc; optind++)
+	{
+	  demangle_it (argv[optind]);
+	}
+    }
+  else
+    {
+      switch (current_demangling_style)
+	{
+	case gnu_demangling:
+	case lucid_demangling:
+	case arm_demangling:
+	case java_demangling:
+	case edg_demangling:
+	case gnat_demangling:
+	case auto_demangling:
+	  valid_symbols = standard_symbol_characters ();
+	  break;
+	case hp_demangling:
+	  valid_symbols = hp_symbol_characters ();
+	  break;
+	case gnu_v3_demangling:
+	  valid_symbols = gnu_v3_symbol_characters ();
+	  break;
+	default:
+	  /* Folks should explicitly indicate the appropriate alphabet for
+	     each demangling.  Providing a default would allow the
+	     question to go unconsidered.  */
+	  abort ();
+	}
+
+      for (;;)
+	{
+	  int i = 0;
+	  c = getchar ();
+	  /* Try to read a label.  */
+	  while (c != EOF && (ISALNUM (c) || strchr (valid_symbols, c)))
+	    {
+	      if (i >= MBUF_SIZE-1)
+		break;
+	      mbuffer[i++] = c;
+	      c = getchar ();
+	    }
+	  if (i > 0)
+	    {
+	      int skip_first = 0;
+
+	      if (mbuffer[0] == '.' || mbuffer[0] == '$')
+		++skip_first;
+	      if (strip_underscore && mbuffer[skip_first] == '_')
+		++skip_first;
+
+	      if (skip_first > i)
+		skip_first = i;
+
+	      mbuffer[i] = 0;
+	      flags |= (int) style;
+	      result = cplus_demangle (mbuffer + skip_first, flags);
+	      if (result)
+		{
+		  if (mbuffer[0] == '.')
+		    putc ('.', stdout);
+		  fputs (result, stdout);
+		  free (result);
+		}
+	      else
+		fputs (mbuffer, stdout);
+
+	      fflush (stdout);
+	    }
+	  if (c == EOF)
+	    break;
+	  putchar (c);
+	  fflush (stdout);
+	}
+    }
+
+  return (0);
+}
+
+static void
+fatal (str)
+     const char *str;
+{
+  fprintf (stderr, "%s: %s\n", program_name, str);
+  exit (1);
+}
+
+PTR
+xmalloc (size)
+  size_t size;
+{
+  register PTR value = (PTR) malloc (size);
+  if (value == 0)
+    fatal ("virtual memory exhausted");
+  return value;
+}
+
+PTR
+xrealloc (ptr, size)
+  PTR ptr;
+  size_t size;
+{
+  register PTR value = (PTR) realloc (ptr, size);
+  if (value == 0)
+    fatal ("virtual memory exhausted");
+  return value;
+}
+#endif	/* main */
diff --git a/demangle/demangle.h b/demangle/demangle.h
new file mode 100644
index 000000000..238ae3398
--- /dev/null
+++ b/demangle/demangle.h
@@ -0,0 +1,177 @@
+/* Defs for interface to demanglers.
+   Copyright 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001
+   Free Software Foundation, Inc.
+   
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+
+#if !defined (DEMANGLE_H)
+#define DEMANGLE_H
+
+#include <ansidecl.h>
+
+#define current_demangling_style VG_(current_demangling_style)
+
+/* Options passed to cplus_demangle (in 2nd parameter). */
+
+#define DMGL_NO_OPTS	 0		/* For readability... */
+#define DMGL_PARAMS	 (1 << 0)	/* Include function args */
+#define DMGL_ANSI	 (1 << 1)	/* Include const, volatile, etc */
+#define DMGL_JAVA	 (1 << 2)	/* Demangle as Java rather than C++. */
+
+#define DMGL_AUTO	 (1 << 8)
+#define DMGL_GNU	 (1 << 9)
+#define DMGL_LUCID	 (1 << 10)
+#define DMGL_ARM	 (1 << 11)
+#define DMGL_HP 	 (1 << 12)       /* For the HP aCC compiler;
+                                            same as ARM except for
+                                            template arguments, etc. */
+#define DMGL_EDG	 (1 << 13)
+#define DMGL_GNU_V3	 (1 << 14)
+#define DMGL_GNAT	 (1 << 15)
+
+/* If none of these are set, use 'current_demangling_style' as the default. */
+#define DMGL_STYLE_MASK (DMGL_AUTO|DMGL_GNU|DMGL_LUCID|DMGL_ARM|DMGL_HP|DMGL_EDG|DMGL_GNU_V3|DMGL_JAVA|DMGL_GNAT)
+
+/* Enumeration of possible demangling styles.
+
+   Lucid and ARM styles are still kept logically distinct, even though
+   they now both behave identically.  The resulting style is actual the
+   union of both.  I.E. either style recognizes both "__pt__" and "__rf__"
+   for operator "->", even though the first is lucid style and the second
+   is ARM style. (FIXME?) */
+
+extern enum demangling_styles
+{
+  no_demangling = -1,
+  unknown_demangling = 0,
+  auto_demangling = DMGL_AUTO,
+  gnu_demangling = DMGL_GNU,
+  lucid_demangling = DMGL_LUCID,
+  arm_demangling = DMGL_ARM,
+  hp_demangling = DMGL_HP,
+  edg_demangling = DMGL_EDG,
+  gnu_v3_demangling = DMGL_GNU_V3,
+  java_demangling = DMGL_JAVA,
+  gnat_demangling = DMGL_GNAT
+} current_demangling_style;
+
+/* Define string names for the various demangling styles. */
+
+#define NO_DEMANGLING_STYLE_STRING            "none"
+#define AUTO_DEMANGLING_STYLE_STRING	      "auto"
+#define GNU_DEMANGLING_STYLE_STRING    	      "gnu"
+#define LUCID_DEMANGLING_STYLE_STRING	      "lucid"
+#define ARM_DEMANGLING_STYLE_STRING	      "arm"
+#define HP_DEMANGLING_STYLE_STRING	      "hp"
+#define EDG_DEMANGLING_STYLE_STRING	      "edg"
+#define GNU_V3_DEMANGLING_STYLE_STRING        "gnu-v3"
+#define JAVA_DEMANGLING_STYLE_STRING          "java"
+#define GNAT_DEMANGLING_STYLE_STRING          "gnat"
+
+/* Some macros to test what demangling style is active. */
+
+#define CURRENT_DEMANGLING_STYLE current_demangling_style
+#define AUTO_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_AUTO)
+#define GNU_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_GNU)
+#define LUCID_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_LUCID)
+#define ARM_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_ARM)
+#define HP_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_HP)
+#define EDG_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_EDG)
+#define GNU_V3_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_GNU_V3)
+#define JAVA_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_JAVA)
+#define GNAT_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_GNAT)
+
+/* Provide information about the available demangle styles. This code is
+   pulled from gdb into libiberty because it is useful to binutils also.  */
+
+extern const struct demangler_engine
+{
+  const char *const demangling_style_name;
+  const enum demangling_styles demangling_style;
+  const char *const demangling_style_doc;
+} libiberty_demanglers[];
+
+extern char *
+VG_(cplus_demangle) PARAMS ((const char *mangled, int options));
+
+/*
+extern int
+cplus_demangle_opname PARAMS ((const char *opname, char *result, int options));
+*/
+
+/*
+extern const char *
+cplus_mangle_opname PARAMS ((const char *opname, int options));
+*/
+
+/* Note: This sets global state.  FIXME if you care about multi-threading. */
+
+/*
+extern void
+set_cplus_marker_for_demangling PARAMS ((int ch));
+*/
+
+/*
+extern enum demangling_styles 
+cplus_demangle_set_style PARAMS ((enum demangling_styles style));
+*/
+
+/*
+extern enum demangling_styles 
+cplus_demangle_name_to_style PARAMS ((const char *name));
+*/
+
+/* V3 ABI demangling entry points, defined in cp-demangle.c.  */
+extern char*
+VG_(cplus_demangle_v3) PARAMS ((const char* mangled));
+
+extern char*
+VG_(java_demangle_v3) PARAMS ((const char* mangled));
+
+
+enum gnu_v3_ctor_kinds {
+  gnu_v3_complete_object_ctor = 1,
+  gnu_v3_base_object_ctor,
+  gnu_v3_complete_object_allocating_ctor
+};
+
+/* Return non-zero iff NAME is the mangled form of a constructor name
+   in the G++ V3 ABI demangling style.  Specifically, return an `enum
+   gnu_v3_ctor_kinds' value indicating what kind of constructor
+   it is.  */
+/*
+extern enum gnu_v3_ctor_kinds
+	is_gnu_v3_mangled_ctor PARAMS ((const char *name));
+*/
+
+
+enum gnu_v3_dtor_kinds {
+  gnu_v3_deleting_dtor = 1,
+  gnu_v3_complete_object_dtor,
+  gnu_v3_base_object_dtor
+};
+
+/* Return non-zero iff NAME is the mangled form of a destructor name
+   in the G++ V3 ABI demangling style.  Specifically, return an `enum
+   gnu_v3_dtor_kinds' value, indicating what kind of destructor
+   it is.  */
+/*
+extern enum gnu_v3_dtor_kinds
+	is_gnu_v3_mangled_dtor PARAMS ((const char *name));
+*/
+
+#endif	/* DEMANGLE_H */
diff --git a/demangle/dyn-string.c b/demangle/dyn-string.c
new file mode 100644
index 000000000..aaa7e3631
--- /dev/null
+++ b/demangle/dyn-string.c
@@ -0,0 +1,439 @@
+/* An abstract string datatype.
+   Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+   Contributed by Mark Mitchell (mark@markmitchell.com).
+
+This file is part of GNU CC.
+   
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#include "vg_include.h"
+#include "ansidecl.h"
+#include "dyn-string.h"
+
+#ifndef STANDALONE
+#define malloc(s) VG_(malloc)(VG_AR_DEMANGLE, s)
+#define free(p) VG_(free)(VG_AR_DEMANGLE, p)
+#define realloc(p,s) VG_(realloc)(VG_AR_DEMANGLE, p, s)
+#endif
+
+/* If this file is being compiled for inclusion in the C++ runtime
+   library, as part of the demangler implementation, we don't want to
+   abort if an allocation fails.  Instead, percolate an error code up
+   through the call chain.  */
+
+#ifdef IN_LIBGCC2
+#define RETURN_ON_ALLOCATION_FAILURE
+#endif
+
+/* Performs in-place initialization of a dyn_string struct.  This
+   function can be used with a dyn_string struct on the stack or
+   embedded in another object.  The contents of of the string itself
+   are still dynamically allocated.  The string initially is capable
+   of holding at least SPACE characeters, including the terminating
+   NUL.  If SPACE is 0, it will silently be increated to 1.  
+
+   If RETURN_ON_ALLOCATION_FAILURE is defined and memory allocation
+   fails, returns 0.  Otherwise returns 1.  */
+
+int
+dyn_string_init (ds_struct_ptr, space)
+     struct dyn_string *ds_struct_ptr;
+     int space;
+{
+  /* We need at least one byte in which to store the terminating NUL.  */
+  if (space == 0)
+    space = 1;
+
+#ifdef RETURN_ON_ALLOCATION_FAILURE
+  ds_struct_ptr->s = (char *) malloc (space);
+  if (ds_struct_ptr->s == NULL)
+    return 0;
+#else
+  ds_struct_ptr->s = (char *) malloc (space);
+#endif
+  ds_struct_ptr->allocated = space;
+  ds_struct_ptr->length = 0;
+  ds_struct_ptr->s[0] = '\0';
+
+  return 1;
+}
+
+/* Create a new dynamic string capable of holding at least SPACE
+   characters, including the terminating NUL.  If SPACE is 0, it will
+   be silently increased to 1.  If RETURN_ON_ALLOCATION_FAILURE is
+   defined and memory allocation fails, returns NULL.  Otherwise
+   returns the newly allocated string.  */
+
+dyn_string_t 
+dyn_string_new (space)
+     int space;
+{
+  dyn_string_t result;
+#ifdef RETURN_ON_ALLOCATION_FAILURE
+  result = (dyn_string_t) malloc (sizeof (struct dyn_string));
+  if (result == NULL)
+    return NULL;
+  if (!dyn_string_init (result, space))
+    {
+      free (result);
+      return NULL;
+    }
+#else
+  result = (dyn_string_t) malloc (sizeof (struct dyn_string));
+  dyn_string_init (result, space);
+#endif
+  return result;
+}
+
+/* Free the memory used by DS.  */
+
+void 
+dyn_string_delete (ds)
+     dyn_string_t ds;
+{
+  free (ds->s);
+  free (ds);
+}
+
+/* Returns the contents of DS in a buffer allocated with malloc.  It
+   is the caller's responsibility to deallocate the buffer using free.
+   DS is then set to the empty string.  Deletes DS itself.  */
+
+char*
+dyn_string_release (ds)
+     dyn_string_t ds;
+{
+  /* Store the old buffer.  */
+  char* result = ds->s;
+  /* The buffer is no longer owned by DS.  */
+  ds->s = NULL;
+  /* Delete DS.  */
+  free (ds);
+  /* Return the old buffer.  */
+  return result;
+}
+
+/* Increase the capacity of DS so it can hold at least SPACE
+   characters, plus the terminating NUL.  This function will not (at
+   present) reduce the capacity of DS.  Returns DS on success. 
+
+   If RETURN_ON_ALLOCATION_FAILURE is defined and a memory allocation
+   operation fails, deletes DS and returns NULL.  */
+
+dyn_string_t 
+dyn_string_resize (ds, space)
+     dyn_string_t ds;
+     int space;
+{
+  int new_allocated = ds->allocated;
+
+  /* Increase SPACE to hold the NUL termination.  */
+  ++space;
+
+  /* Increase allocation by factors of two.  */
+  while (space > new_allocated)
+    new_allocated *= 2;
+    
+  if (new_allocated != ds->allocated)
+    {
+      ds->allocated = new_allocated;
+      /* We actually need more space.  */
+#ifdef RETURN_ON_ALLOCATION_FAILURE
+      ds->s = (char *) realloc (ds->s, ds->allocated);
+      if (ds->s == NULL)
+	{
+	  free (ds);
+	  return NULL;
+	}
+#else
+      ds->s = (char *) realloc (ds->s, ds->allocated);
+#endif
+    }
+
+  return ds;
+}
+
+/* Sets the contents of DS to the empty string.  */
+
+void
+dyn_string_clear (ds)
+     dyn_string_t ds;
+{
+  /* A dyn_string always has room for at least the NUL terminator.  */
+  ds->s[0] = '\0';
+  ds->length = 0;
+}
+
+/* Makes the contents of DEST the same as the contents of SRC.  DEST
+   and SRC must be distinct.  Returns 1 on success.  On failure, if
+   RETURN_ON_ALLOCATION_FAILURE, deletes DEST and returns 0.  */
+
+int
+dyn_string_copy (dest, src)
+     dyn_string_t dest;
+     dyn_string_t src;
+{
+  if (dest == src)
+      VG_(panic) ("dyn_string_copy: src==dest");
+
+  /* Make room in DEST.  */
+  if (dyn_string_resize (dest, src->length) == NULL)
+    return 0;
+  /* Copy DEST into SRC.  */
+  VG_(strcpy) (dest->s, src->s);
+  /* Update the size of DEST.  */
+  dest->length = src->length;
+  return 1;
+}
+
+/* Copies SRC, a NUL-terminated string, into DEST.  Returns 1 on
+   success.  On failure, if RETURN_ON_ALLOCATION_FAILURE, deletes DEST
+   and returns 0.  */
+
+int
+dyn_string_copy_cstr (dest, src)
+     dyn_string_t dest;
+     const char *src;
+{
+  int length = VG_(strlen) (src);
+  /* Make room in DEST.  */
+  if (dyn_string_resize (dest, length) == NULL)
+    return 0;
+  /* Copy DEST into SRC.  */
+  VG_(strcpy) (dest->s, src);
+  /* Update the size of DEST.  */
+  dest->length = length;
+  return 1;
+}
+
+/* Inserts SRC at the beginning of DEST.  DEST is expanded as
+   necessary.  SRC and DEST must be distinct.  Returns 1 on success.
+   On failure, if RETURN_ON_ALLOCATION_FAILURE, deletes DEST and
+   returns 0.  */
+
+int
+dyn_string_prepend (dest, src)
+     dyn_string_t dest;
+     dyn_string_t src;
+{
+  return dyn_string_insert (dest, 0, src);
+}
+
+/* Inserts SRC, a NUL-terminated string, at the beginning of DEST.
+   DEST is expanded as necessary.  Returns 1 on success.  On failure,
+   if RETURN_ON_ALLOCATION_FAILURE, deletes DEST and returns 0. */
+
+int
+dyn_string_prepend_cstr (dest, src)
+     dyn_string_t dest;
+     const char *src;
+{
+  return dyn_string_insert_cstr (dest, 0, src);
+}
+
+/* Inserts SRC into DEST starting at position POS.  DEST is expanded
+   as necessary.  SRC and DEST must be distinct.  Returns 1 on
+   success.  On failure, if RETURN_ON_ALLOCATION_FAILURE, deletes DEST
+   and returns 0.  */
+
+int
+dyn_string_insert (dest, pos, src)
+     dyn_string_t dest;
+     int pos;
+     dyn_string_t src;
+{
+  int i;
+
+  if (src == dest)
+    VG_(panic)( "dyn_string_insert: src==dest" );
+
+  if (dyn_string_resize (dest, dest->length + src->length) == NULL)
+    return 0;
+  /* Make room for the insertion.  Be sure to copy the NUL.  */
+  for (i = dest->length; i >= pos; --i)
+    dest->s[i + src->length] = dest->s[i];
+  /* Splice in the new stuff.  */
+  VG_(strncpy) (dest->s + pos, src->s, src->length);
+  /* Compute the new length.  */
+  dest->length += src->length;
+  return 1;
+}
+
+/* Inserts SRC, a NUL-terminated string, into DEST starting at
+   position POS.  DEST is expanded as necessary.  Returns 1 on
+   success.  On failure, RETURN_ON_ALLOCATION_FAILURE, deletes DEST
+   and returns 0.  */
+
+int
+dyn_string_insert_cstr (dest, pos, src)
+     dyn_string_t dest;
+     int pos;
+     const char *src;
+{
+  int i;
+  int length = VG_(strlen) (src);
+
+  if (dyn_string_resize (dest, dest->length + length) == NULL)
+    return 0;
+  /* Make room for the insertion.  Be sure to copy the NUL.  */
+  for (i = dest->length; i >= pos; --i)
+    dest->s[i + length] = dest->s[i];
+  /* Splice in the new stuff.  */
+  VG_(strncpy) (dest->s + pos, src, length);
+  /* Compute the new length.  */
+  dest->length += length;
+  return 1;
+}
+
+/* Inserts character C into DEST starting at position POS.  DEST is
+   expanded as necessary.  Returns 1 on success.  On failure,
+   RETURN_ON_ALLOCATION_FAILURE, deletes DEST and returns 0.  */
+
+int
+dyn_string_insert_char (dest, pos, c)
+     dyn_string_t dest;
+     int pos;
+     int c;
+{
+  int i;
+
+  if (dyn_string_resize (dest, dest->length + 1) == NULL)
+    return 0;
+  /* Make room for the insertion.  Be sure to copy the NUL.  */
+  for (i = dest->length; i >= pos; --i)
+    dest->s[i + 1] = dest->s[i];
+  /* Add the new character.  */
+  dest->s[pos] = c;
+  /* Compute the new length.  */
+  ++dest->length;
+  return 1;
+}
+     
+/* Append S to DS, resizing DS if necessary.  Returns 1 on success.
+   On failure, if RETURN_ON_ALLOCATION_FAILURE, deletes DEST and
+   returns 0.  */
+
+int
+dyn_string_append (dest, s)
+     dyn_string_t dest;
+     dyn_string_t s;
+{
+  if (dyn_string_resize (dest, dest->length + s->length) == 0)
+    return 0;
+  VG_(strcpy) (dest->s + dest->length, s->s);
+  dest->length += s->length;
+  return 1;
+}
+
+/* Append the NUL-terminated string S to DS, resizing DS if necessary.
+   Returns 1 on success.  On failure, if RETURN_ON_ALLOCATION_FAILURE,
+   deletes DEST and returns 0.  */
+
+int
+dyn_string_append_cstr (dest, s)
+     dyn_string_t dest;
+     const char *s;
+{
+  int len = VG_(strlen) (s);
+
+  /* The new length is the old length plus the size of our string, plus
+     one for the null at the end.  */
+  if (dyn_string_resize (dest, dest->length + len) == NULL)
+    return 0;
+  VG_(strcpy) (dest->s + dest->length, s);
+  dest->length += len;
+  return 1;
+}
+
+/* Appends C to the end of DEST.  Returns 1 on success.  On failiure,
+   if RETURN_ON_ALLOCATION_FAILURE, deletes DEST and returns 0.  */
+
+int
+dyn_string_append_char (dest, c)
+     dyn_string_t dest;
+     int c;
+{
+  /* Make room for the extra character.  */
+  if (dyn_string_resize (dest, dest->length + 1) == NULL)
+    return 0;
+  /* Append the character; it will overwrite the old NUL.  */
+  dest->s[dest->length] = c;
+  /* Add a new NUL at the end.  */
+  dest->s[dest->length + 1] = '\0';
+  /* Update the length.  */
+  ++(dest->length);
+  return 1;
+}
+
+/* Sets the contents of DEST to the substring of SRC starting at START
+   and ending before END.  START must be less than or equal to END,
+   and both must be between zero and the length of SRC, inclusive.
+   Returns 1 on success.  On failure, if RETURN_ON_ALLOCATION_FAILURE,
+   deletes DEST and returns 0.  */
+
+int
+dyn_string_substring (dest, src, start, end)
+     dyn_string_t dest;
+     dyn_string_t src;
+     int start;
+     int end;
+{
+  int i;
+  int length = end - start;
+
+  /*
+  vg_assert (start > end || start > src->length || end > src->length);
+  */
+
+  /* Make room for the substring.  */
+  if (dyn_string_resize (dest, length) == NULL)
+    return 0;
+  /* Copy the characters in the substring,  */
+  for (i = length; --i >= 0; )
+    dest->s[i] = src->s[start + i];
+  /* NUL-terimate the result.  */
+  dest->s[length] = '\0';
+  /* Record the length of the substring.  */
+  dest->length = length;
+
+  return 1;
+}
+
+/* Returns non-zero if DS1 and DS2 have the same contents.  */
+
+int
+dyn_string_eq (ds1, ds2)
+     dyn_string_t ds1;
+     dyn_string_t ds2;
+{
+  /* If DS1 and DS2 have different lengths, they must not be the same.  */
+  if (ds1->length != ds2->length)
+    return 0;
+  else
+    return !VG_(strcmp) (ds1->s, ds2->s);
+}
diff --git a/demangle/dyn-string.h b/demangle/dyn-string.h
new file mode 100644
index 000000000..9615cd64e
--- /dev/null
+++ b/demangle/dyn-string.h
@@ -0,0 +1,96 @@
+/* An abstract string datatype.
+   Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+   Contributed by Mark Mitchell (mark@markmitchell.com).
+
+This file is part of GCC.
+   
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+#ifndef __DYN_STRING_H
+#define __DYN_STRING_H
+
+
+typedef struct dyn_string
+{
+  int allocated;	/* The amount of space allocated for the string.  */
+  int length;		/* The actual length of the string.  */
+  char *s;		/* The string itself, NUL-terminated.  */
+}* dyn_string_t;
+
+/* The length STR, in bytes, not including the terminating NUL.  */
+#define dyn_string_length(STR)                                          \
+  ((STR)->length)
+
+/* The NTBS in which the contents of STR are stored.  */
+#define dyn_string_buf(STR)                                             \
+  ((STR)->s)
+
+/* Compare DS1 to DS2 with strcmp.  */
+#define dyn_string_compare(DS1, DS2)                                    \
+  (VG_(strcmp) ((DS1)->s, (DS2)->s))
+
+
+/* dyn_string functions are used in the demangling implementation
+   included in the G++ runtime library.  To prevent collisions with
+   names in user programs, the functions that are used in the
+   demangler are given implementation-reserved names.  */
+
+#if 1 /* def IN_LIBGCC2 */
+
+#define dyn_string_init                 VG_(__cxa_dyn_string_init)
+#define dyn_string_new                  VG_(__cxa_dyn_string_new)
+#define dyn_string_delete               VG_(__cxa_dyn_string_delete)
+#define dyn_string_release              VG_(__cxa_dyn_string_release)
+#define dyn_string_resize               VG_(__cxa_dyn_string_resize)
+#define dyn_string_clear                VG_(__cxa_dyn_string_clear)
+#define dyn_string_copy                 VG_(__cxa_dyn_string_copy)
+#define dyn_string_copy_cstr            VG_(__cxa_dyn_string_copy_cstr)
+#define dyn_string_prepend              VG_(__cxa_dyn_string_prepend)
+#define dyn_string_prepend_cstr         VG_(__cxa_dyn_string_prepend_cstr)
+#define dyn_string_insert               VG_(__cxa_dyn_string_insert)
+#define dyn_string_insert_cstr          VG_(__cxa_dyn_string_insert_cstr)
+#define dyn_string_insert_char          VG_(__cxa_dyn_string_insert_char)
+#define dyn_string_append               VG_(__cxa_dyn_string_append)
+#define dyn_string_append_cstr          VG_(__cxa_dyn_string_append_cstr)
+#define dyn_string_append_char          VG_(__cxa_dyn_string_append_char)
+#define dyn_string_substring            VG_(__cxa_dyn_string_substring)
+#define dyn_string_eq                   VG_(__cxa_dyn_string_eq)
+
+#endif /* IN_LIBGCC2 */
+
+
+extern int dyn_string_init              PARAMS ((struct dyn_string *, int));
+extern dyn_string_t dyn_string_new      PARAMS ((int));
+extern void dyn_string_delete           PARAMS ((dyn_string_t));
+extern char *dyn_string_release         PARAMS ((dyn_string_t));
+extern dyn_string_t dyn_string_resize   PARAMS ((dyn_string_t, int));
+extern void dyn_string_clear            PARAMS ((dyn_string_t));
+extern int dyn_string_copy              PARAMS ((dyn_string_t, dyn_string_t));
+extern int dyn_string_copy_cstr         PARAMS ((dyn_string_t, const char *));
+extern int dyn_string_prepend           PARAMS ((dyn_string_t, dyn_string_t));
+extern int dyn_string_prepend_cstr      PARAMS ((dyn_string_t, const char *));
+extern int dyn_string_insert            PARAMS ((dyn_string_t, int,
+						 dyn_string_t));
+extern int dyn_string_insert_cstr       PARAMS ((dyn_string_t, int,
+						 const char *));
+extern int dyn_string_insert_char       PARAMS ((dyn_string_t, int, int));
+extern int dyn_string_append            PARAMS ((dyn_string_t, dyn_string_t));
+extern int dyn_string_append_cstr       PARAMS ((dyn_string_t, const char *));
+extern int dyn_string_append_char       PARAMS ((dyn_string_t, int));
+extern int dyn_string_substring         PARAMS ((dyn_string_t, 
+						 dyn_string_t, int, int));
+extern int dyn_string_eq                PARAMS ((dyn_string_t, dyn_string_t));
+
+#endif
diff --git a/demangle/safe-ctype.c b/demangle/safe-ctype.c
new file mode 100644
index 000000000..0c2be3ed7
--- /dev/null
+++ b/demangle/safe-ctype.c
@@ -0,0 +1,163 @@
+/* <ctype.h> replacement macros.
+
+   Copyright (C) 2000 Free Software Foundation, Inc.
+   Contributed by Zack Weinberg <zackw@stanford.edu>.
+
+This file is part of the libiberty library.
+Libiberty is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public
+License as published by the Free Software Foundation; either
+version 2 of the License, or (at your option) any later version.
+
+Libiberty is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with libiberty; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* This is a compatible replacement of the standard C library's <ctype.h>
+   with the following properties:
+
+   - Implements all isxxx() macros required by C99.
+   - Also implements some character classes useful when
+     parsing C-like languages.
+   - Does not change behavior depending on the current locale.
+   - Behaves properly for all values in the range of a signed or
+     unsigned char.  */
+
+#include "ansidecl.h"
+#include <safe-ctype.h>
+#include <stdio.h>  /* for EOF */
+
+/* Shorthand */
+#define bl _sch_isblank
+#define cn _sch_iscntrl
+#define di _sch_isdigit
+#define is _sch_isidst
+#define lo _sch_islower
+#define nv _sch_isnvsp
+#define pn _sch_ispunct
+#define pr _sch_isprint
+#define sp _sch_isspace
+#define up _sch_isupper
+#define vs _sch_isvsp
+#define xd _sch_isxdigit
+
+/* Masks.  */
+#define L  lo|is   |pr	/* lower case letter */
+#define XL lo|is|xd|pr	/* lowercase hex digit */
+#define U  up|is   |pr	/* upper case letter */
+#define XU up|is|xd|pr	/* uppercase hex digit */
+#define D  di   |xd|pr	/* decimal digit */
+#define P  pn      |pr	/* punctuation */
+#define _  pn|is   |pr	/* underscore */
+
+#define C           cn	/* control character */
+#define Z  nv      |cn	/* NUL */
+#define M  nv|sp   |cn	/* cursor movement: \f \v */
+#define V  vs|sp   |cn	/* vertical space: \r \n */
+#define T  nv|sp|bl|cn	/* tab */
+#define S  nv|sp|bl|pr	/* space */
+
+/* Are we ASCII? */
+#if '\n' == 0x0A && ' ' == 0x20 && '0' == 0x30 \
+  && 'A' == 0x41 && 'a' == 0x61 && '!' == 0x21 \
+  && EOF == -1
+
+const unsigned short _sch_istable[256] =
+{
+  Z,  C,  C,  C,   C,  C,  C,  C,   /* NUL SOH STX ETX  EOT ENQ ACK BEL */
+  C,  T,  V,  M,   M,  V,  C,  C,   /* BS  HT  LF  VT   FF  CR  SO  SI  */
+  C,  C,  C,  C,   C,  C,  C,  C,   /* DLE DC1 DC2 DC3  DC4 NAK SYN ETB */
+  C,  C,  C,  C,   C,  C,  C,  C,   /* CAN EM  SUB ESC  FS  GS  RS  US  */
+  S,  P,  P,  P,   P,  P,  P,  P,   /* SP  !   "   #    $   %   &   '   */
+  P,  P,  P,  P,   P,  P,  P,  P,   /* (   )   *   +    ,   -   .   /   */
+  D,  D,  D,  D,   D,  D,  D,  D,   /* 0   1   2   3    4   5   6   7   */
+  D,  D,  P,  P,   P,  P,  P,  P,   /* 8   9   :   ;    <   =   >   ?   */
+  P, XU, XU, XU,  XU, XU, XU,  U,   /* @   A   B   C    D   E   F   G   */
+  U,  U,  U,  U,   U,  U,  U,  U,   /* H   I   J   K    L   M   N   O   */
+  U,  U,  U,  U,   U,  U,  U,  U,   /* P   Q   R   S    T   U   V   W   */
+  U,  U,  U,  P,   P,  P,  P,  _,   /* X   Y   Z   [    \   ]   ^   _   */
+  P, XL, XL, XL,  XL, XL, XL,  L,   /* `   a   b   c    d   e   f   g   */
+  L,  L,  L,  L,   L,  L,  L,  L,   /* h   i   j   k    l   m   n   o   */
+  L,  L,  L,  L,   L,  L,  L,  L,   /* p   q   r   s    t   u   v   w   */
+  L,  L,  L,  P,   P,  P,  P,  C,   /* x   y   z   {    |   }   ~   DEL */
+
+  /* high half of unsigned char is locale-specific, so all tests are
+     false in "C" locale */
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+};
+
+const unsigned char _sch_tolower[256] =
+{
+   0,  1,  2,  3,   4,  5,  6,  7,   8,  9, 10, 11,  12, 13, 14, 15,
+  16, 17, 18, 19,  20, 21, 22, 23,  24, 25, 26, 27,  28, 29, 30, 31,
+  32, 33, 34, 35,  36, 37, 38, 39,  40, 41, 42, 43,  44, 45, 46, 47,
+  48, 49, 50, 51,  52, 53, 54, 55,  56, 57, 58, 59,  60, 61, 62, 63,
+  64,
+
+  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+  'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+
+  91, 92, 93, 94, 95, 96,
+
+  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+  'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+
+ 123,124,125,126,127,
+
+ 128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143,
+ 144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159,
+ 160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175,
+ 176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191,
+
+ 192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207,
+ 208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223,
+ 224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239,
+ 240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255,
+};
+
+const unsigned char _sch_toupper[256] =
+{
+   0,  1,  2,  3,   4,  5,  6,  7,   8,  9, 10, 11,  12, 13, 14, 15,
+  16, 17, 18, 19,  20, 21, 22, 23,  24, 25, 26, 27,  28, 29, 30, 31,
+  32, 33, 34, 35,  36, 37, 38, 39,  40, 41, 42, 43,  44, 45, 46, 47,
+  48, 49, 50, 51,  52, 53, 54, 55,  56, 57, 58, 59,  60, 61, 62, 63,
+  64,
+
+  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+  'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+
+  91, 92, 93, 94, 95, 96,
+
+  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+  'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+
+ 123,124,125,126,127,
+
+ 128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143,
+ 144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159,
+ 160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175,
+ 176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191,
+
+ 192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207,
+ 208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223,
+ 224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239,
+ 240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255,
+};
+
+#else
+ #error "Unsupported host character set"
+#endif /* not ASCII */
diff --git a/demangle/safe-ctype.h b/demangle/safe-ctype.h
new file mode 100644
index 000000000..b2ad8490b
--- /dev/null
+++ b/demangle/safe-ctype.h
@@ -0,0 +1,103 @@
+/* <ctype.h> replacement macros.
+
+   Copyright (C) 2000, 2001 Free Software Foundation, Inc.
+   Contributed by Zack Weinberg <zackw@stanford.edu>.
+
+This file is part of the libiberty library.
+Libiberty is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public
+License as published by the Free Software Foundation; either
+version 2 of the License, or (at your option) any later version.
+
+Libiberty is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with libiberty; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* This is a compatible replacement of the standard C library's <ctype.h>
+   with the following properties:
+
+   - Implements all isxxx() macros required by C99.
+   - Also implements some character classes useful when
+     parsing C-like languages.
+   - Does not change behavior depending on the current locale.
+   - Behaves properly for all values in the range of a signed or
+     unsigned char.
+
+   To avoid conflicts, this header defines the isxxx functions in upper
+   case, e.g. ISALPHA not isalpha.  */
+
+#ifndef SAFE_CTYPE_H
+#define SAFE_CTYPE_H
+
+#ifdef isalpha
+ #error "safe-ctype.h and ctype.h may not be used simultaneously"
+#else
+
+/* Categories.  */
+
+enum {
+  /* In C99 */
+  _sch_isblank  = 0x0001,	/* space \t */
+  _sch_iscntrl  = 0x0002,	/* nonprinting characters */
+  _sch_isdigit  = 0x0004,	/* 0-9 */
+  _sch_islower  = 0x0008,	/* a-z */
+  _sch_isprint  = 0x0010,	/* any printing character including ' ' */
+  _sch_ispunct  = 0x0020,	/* all punctuation */
+  _sch_isspace  = 0x0040,	/* space \t \n \r \f \v */
+  _sch_isupper  = 0x0080,	/* A-Z */
+  _sch_isxdigit = 0x0100,	/* 0-9A-Fa-f */
+
+  /* Extra categories useful to cpplib.  */
+  _sch_isidst	= 0x0200,	/* A-Za-z_ */
+  _sch_isvsp    = 0x0400,	/* \n \r */
+  _sch_isnvsp   = 0x0800,	/* space \t \f \v \0 */
+
+  /* Combinations of the above.  */
+  _sch_isalpha  = _sch_isupper|_sch_islower,	/* A-Za-z */
+  _sch_isalnum  = _sch_isalpha|_sch_isdigit,	/* A-Za-z0-9 */
+  _sch_isidnum  = _sch_isidst|_sch_isdigit,	/* A-Za-z0-9_ */
+  _sch_isgraph  = _sch_isalnum|_sch_ispunct,	/* isprint and not space */
+  _sch_iscppsp  = _sch_isvsp|_sch_isnvsp,	/* isspace + \0 */
+  _sch_isbasic  = _sch_isprint|_sch_iscppsp     /* basic charset of ISO C
+						   (plus ` and @)  */
+};
+
+/* Character classification.  */
+extern const unsigned short _sch_istable[256];
+
+#define _sch_test(c, bit) (_sch_istable[(c) & 0xff] & (unsigned short)(bit))
+
+#define ISALPHA(c)  _sch_test(c, _sch_isalpha)
+#define ISALNUM(c)  _sch_test(c, _sch_isalnum)
+#define ISBLANK(c)  _sch_test(c, _sch_isblank)
+#define ISCNTRL(c)  _sch_test(c, _sch_iscntrl)
+#define ISDIGIT(c)  _sch_test(c, _sch_isdigit)
+#define ISGRAPH(c)  _sch_test(c, _sch_isgraph)
+#define ISLOWER(c)  _sch_test(c, _sch_islower)
+#define ISPRINT(c)  _sch_test(c, _sch_isprint)
+#define ISPUNCT(c)  _sch_test(c, _sch_ispunct)
+#define ISSPACE(c)  _sch_test(c, _sch_isspace)
+#define ISUPPER(c)  _sch_test(c, _sch_isupper)
+#define ISXDIGIT(c) _sch_test(c, _sch_isxdigit)
+
+#define ISIDNUM(c)	_sch_test(c, _sch_isidnum)
+#define ISIDST(c)	_sch_test(c, _sch_isidst)
+#define IS_ISOBASIC(c)	_sch_test(c, _sch_isbasic)
+#define IS_VSPACE(c)	_sch_test(c, _sch_isvsp)
+#define IS_NVSPACE(c)	_sch_test(c, _sch_isnvsp)
+#define IS_SPACE_OR_NUL(c)	_sch_test(c, _sch_iscppsp)
+
+/* Character transformation.  */
+extern const unsigned char  _sch_toupper[256];
+extern const unsigned char  _sch_tolower[256];
+#define TOUPPER(c) _sch_toupper[(c) & 0xff]
+#define TOLOWER(c) _sch_tolower[(c) & 0xff]
+
+#endif /* no ctype.h */
+#endif /* SAFE_CTYPE_H */
diff --git a/docs/Makefile.am b/docs/Makefile.am
new file mode 100644
index 000000000..e8a58fa18
--- /dev/null
+++ b/docs/Makefile.am
@@ -0,0 +1,5 @@
+docdir = $(datadir)/doc/valgrind
+
+doc_DATA = index.html manual.html nav.html techdocs.html
+
+EXTRA_DIST = $(doc_DATA)
diff --git a/docs/Makefile.in b/docs/Makefile.in
new file mode 100644
index 000000000..87022a820
--- /dev/null
+++ b/docs/Makefile.in
@@ -0,0 +1,200 @@
+# Makefile.in generated automatically by automake 1.4-p4 from Makefile.am
+
+# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+
+DESTDIR =
+
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+
+top_builddir = ..
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS)
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+transform = @program_transform_name@
+
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_alias = @host_alias@
+host_triplet = @host@
+CC = @CC@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+DEFAULT_SUPP = @DEFAULT_SUPP@
+LN_S = @LN_S@
+MAKEINFO = @MAKEINFO@
+PACKAGE = @PACKAGE@
+RANLIB = @RANLIB@
+VERSION = @VERSION@
+
+docdir = $(datadir)/doc/valgrind
+
+doc_DATA = index.html manual.html nav.html techdocs.html
+
+EXTRA_DIST = $(doc_DATA)
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = ../config.h
+CONFIG_CLEAN_FILES = 
+DATA =  $(doc_DATA)
+
+DIST_COMMON =  Makefile.am Makefile.in
+
+
+DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST)
+
+TAR = tar
+GZIP_ENV = --best
+all: all-redirect
+.SUFFIXES:
+$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) 
+	cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps docs/Makefile
+
+Makefile: $(srcdir)/Makefile.in  $(top_builddir)/config.status
+	cd $(top_builddir) \
+	  && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+
+install-docDATA: $(doc_DATA)
+	@$(NORMAL_INSTALL)
+	$(mkinstalldirs) $(DESTDIR)$(docdir)
+	@list='$(doc_DATA)'; for p in $$list; do \
+	  if test -f $(srcdir)/$$p; then \
+	    echo " $(INSTALL_DATA) $(srcdir)/$$p $(DESTDIR)$(docdir)/$$p"; \
+	    $(INSTALL_DATA) $(srcdir)/$$p $(DESTDIR)$(docdir)/$$p; \
+	  else if test -f $$p; then \
+	    echo " $(INSTALL_DATA) $$p $(DESTDIR)$(docdir)/$$p"; \
+	    $(INSTALL_DATA) $$p $(DESTDIR)$(docdir)/$$p; \
+	  fi; fi; \
+	done
+
+uninstall-docDATA:
+	@$(NORMAL_UNINSTALL)
+	list='$(doc_DATA)'; for p in $$list; do \
+	  rm -f $(DESTDIR)$(docdir)/$$p; \
+	done
+tags: TAGS
+TAGS:
+
+
+distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir)
+
+subdir = docs
+
+distdir: $(DISTFILES)
+	@for file in $(DISTFILES); do \
+	  d=$(srcdir); \
+	  if test -d $$d/$$file; then \
+	    cp -pr $$d/$$file $(distdir)/$$file; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || ln $$d/$$file $(distdir)/$$file 2> /dev/null \
+	    || cp -p $$d/$$file $(distdir)/$$file || :; \
+	  fi; \
+	done
+info-am:
+info: info-am
+dvi-am:
+dvi: dvi-am
+check-am: all-am
+check: check-am
+installcheck-am:
+installcheck: installcheck-am
+install-exec-am:
+install-exec: install-exec-am
+
+install-data-am: install-docDATA
+install-data: install-data-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+install: install-am
+uninstall-am: uninstall-docDATA
+uninstall: uninstall-am
+all-am: Makefile $(DATA)
+all-redirect: all-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install
+installdirs:
+	$(mkinstalldirs)  $(DESTDIR)$(docdir)
+
+
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-rm -f Makefile $(CONFIG_CLEAN_FILES)
+	-rm -f config.cache config.log stamp-h stamp-h[0-9]*
+
+maintainer-clean-generic:
+mostlyclean-am:  mostlyclean-generic
+
+mostlyclean: mostlyclean-am
+
+clean-am:  clean-generic mostlyclean-am
+
+clean: clean-am
+
+distclean-am:  distclean-generic clean-am
+
+distclean: distclean-am
+
+maintainer-clean-am:  maintainer-clean-generic distclean-am
+	@echo "This command is intended for maintainers to use;"
+	@echo "it deletes files that may require special tools to rebuild."
+
+maintainer-clean: maintainer-clean-am
+
+.PHONY: uninstall-docDATA install-docDATA tags distdir info-am info \
+dvi-am dvi check check-am installcheck-am installcheck install-exec-am \
+install-exec install-data-am install-data install-am install \
+uninstall-am uninstall all-redirect all-am all installdirs \
+mostlyclean-generic distclean-generic clean-generic \
+maintainer-clean-generic clean mostlyclean distclean maintainer-clean
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/docs/index.html b/docs/index.html
new file mode 100644
index 000000000..111170256
--- /dev/null
+++ b/docs/index.html
@@ -0,0 +1,26 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<html>
+
+<head>
+  <meta http-equiv="Content-Type"     
+        content="text/html; charset=iso-8859-1">
+  <meta http-equiv="Content-Language" content="en-gb">
+  <meta name="generator" 
+        content="Mozilla/4.76 (X11; U; Linux 2.4.1-0.1.9 i586) [Netscape]">
+  <meta name="author" content="Julian Seward <jseward@acm.org>">
+  <meta name="description" content="say what this prog does">
+  <meta name="keywords" content="Valgrind, memory checker, x86, GPL">
+  <title>Valgrind's user manual</title>
+</head>
+
+<frameset cols="150,*">
+  <frame name="nav" target="main" src="nav.html">
+  <frame name="main" src="manual.html" scrolling="auto">
+  <noframes>
+    <body>
+     <p>This page uses frames, but your browser doesn't support them.</p>
+    </body>
+  </noframes>
+</frameset>
+
+</html>
diff --git a/docs/manual.html b/docs/manual.html
new file mode 100644
index 000000000..1bcd02a81
--- /dev/null
+++ b/docs/manual.html
@@ -0,0 +1,1753 @@
+<html>
+  <head>
+    <style type="text/css">
+      body      { background-color: #ffffff;
+                  color:            #000000;
+                  font-family:      Times, Helvetica, Arial;
+                  font-size:        14pt}
+      h4        { margin-bottom:    0.3em}
+      code      { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      pre       { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      a:link    { color:            #0000C0;
+                  text-decoration:  none; }
+      a:visited { color:            #0000C0; 
+                  text-decoration:  none; }
+      a:active  { color:            #0000C0;
+                  text-decoration:  none; }
+    </style>
+  </head>
+
+<body bgcolor="#ffffff">
+
+<a name="title">&nbsp;</a>
+<h1 align=center>Valgrind, snapshot 20020317</h1>
+
+<center>
+<a href="mailto:jseward@acm.org">jseward@acm.org<br>
+<a href="http://www.muraroa.demon.co.uk">http://www.muraroa.demon.co.uk</a><br>
+Copyright &copy; 2000-2002 Julian Seward
+<p>
+Valgrind is licensed under the GNU General Public License, 
+version 2<br>
+An open-source tool for finding memory-management problems in
+Linux-x86 executables.
+</center>
+
+<p>
+
+<hr width="100%">
+<a name="contents"></a>
+<h2>Contents of this manual</h2>
+
+<h4>1&nbsp; <a href="#intro">Introduction</a></h4>
+    1.1&nbsp; <a href="#whatfor">What Valgrind is for</a><br>
+    1.2&nbsp; <a href="#whatdoes">What it does with your program</a>
+
+<h4>2&nbsp; <a href="#howtouse">How to use it, and how to make sense 
+    of the results</a></h4>
+    2.1&nbsp; <a href="#starta">Getting started</a><br>
+    2.2&nbsp; <a href="#comment">The commentary</a><br>
+    2.3&nbsp; <a href="#report">Reporting of errors</a><br>
+    2.4&nbsp; <a href="#suppress">Suppressing errors</a><br>
+    2.5&nbsp; <a href="#flags">Command-line flags</a><br>
+    2.6&nbsp; <a href="#errormsgs">Explaination of error messages</a><br>
+    2.7&nbsp; <a href="#suppfiles">Writing suppressions files</a><br>
+    2.8&nbsp; <a href="#install">Building and installing</a><br>
+    2.9&nbsp; <a href="#problems">If you have problems</a><br>
+
+<h4>3&nbsp; <a href="#machine">Details of the checking machinery</a></h4>
+    3.1&nbsp; <a href="#vvalue">Valid-value (V) bits</a><br>
+    3.2&nbsp; <a href="#vaddress">Valid-address (A)&nbsp;bits</a><br>
+    3.3&nbsp; <a href="#together">Putting it all together</a><br>
+    3.4&nbsp; <a href="#signals">Signals</a><br>
+    3.5&nbsp; <a href="#leaks">Memory leak detection</a><br>
+
+<h4>4&nbsp; <a href="#limits">Limitations</a></h4>
+
+<h4>5&nbsp; <a href="#howitworks">How it works -- a rough overview</a></h4>
+    5.1&nbsp; <a href="#startb">Getting started</a><br>
+    5.2&nbsp; <a href="#engine">The translation/instrumentation engine</a><br>
+    5.3&nbsp; <a href="#track">Tracking the status of memory</a><br>
+    5.4&nbsp; <a href="#sys_calls">System calls</a><br>
+    5.5&nbsp; <a href="#sys_signals">Signals</a><br>
+
+<h4>6&nbsp; <a href="#example">An example</a></h4>
+
+<h4>7&nbsp; <a href="techdocs.html">The design and implementation of Valgrind</a></h4>
+
+<hr width="100%">
+
+<a name="intro"></a>
+<h2>1&nbsp; Introduction</h2>
+
+<a name="whatfor"></a>
+<h3>1.1&nbsp; What Valgrind is for</h3>
+
+Valgrind is a tool to help you find memory-management problems in your
+programs. When a program is run under Valgrind's supervision, all
+reads and writes of memory are checked, and calls to
+malloc/new/free/delete are intercepted. As a result, Valgrind can
+detect problems such as:
+<ul>
+  <li>Use of uninitialised memory</li>
+  <li>Reading/writing memory after it has been free'd</li>
+  <li>Reading/writing off the end of malloc'd blocks</li>
+  <li>Reading/writing inappropriate areas on the stack</li>
+  <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+</ul>
+
+Problems like these can be difficult to find by other means, often
+lying undetected for long periods, then causing occasional,
+difficult-to-diagnose crashes.
+
+<p>
+Valgrind is closely tied to details of the CPU, operating system and
+to a less extent, compiler and basic C libraries. This makes it
+difficult to make it portable, so I have chosen at the outset to
+concentrate on what I believe to be a widely used platform: Red Hat
+Linux 7.2, on x86s. I believe that it will work without significant
+difficulty on other x86 GNU/Linux systems which use the 2.4 kernel and
+GNU libc 2.2.X, for example SuSE 7.1 and Mandrake 8.0.  Red Hat 6.2 is
+also supported.  It has worked in the past, and probably still does,
+on RedHat 7.1 and 6.2.  Note that I haven't compiled it on RedHat 7.1
+and 6.2 for a while, so they may no longer work now.
+<p>
+(Early Feb 02: after feedback from the KDE people it also works better
+on other Linuxes).
+<p>
+At some point in the past, Valgrind has also worked on Red Hat 6.2
+(x86), thanks to the efforts of Rob Noble.
+
+<p>
+Valgrind is licensed under the GNU General Public License, version
+2. Read the file LICENSE in the source distribution for details.
+
+<a name="whatdoes">
+<h3>1.2&nbsp; What it does with your program</h3>
+
+Valgrind is designed to be as non-intrusive as possible. It works
+directly with existing executables. You don't need to recompile,
+relink, or otherwise modify, the program to be checked. Simply place
+the word <code>valgrind</code> at the start of the command line
+normally used to run the program. So, for example, if you want to run
+the command <code>ls -l</code> on Valgrind, simply issue the
+command: <code>valgrind ls -l</code>.
+
+<p>Valgrind takes control of your program before it starts. Debugging
+information is read from the executable and associated libraries, so
+that error messages can be phrased in terms of source code
+locations. Your program is then run on a synthetic x86 CPU which
+checks every memory access. All detected errors are written to a
+log. When the program finishes, Valgrind searches for and reports on
+leaked memory.
+
+<p>You can run pretty much any dynamically linked ELF x86 executable using
+Valgrind. Programs run 25 to 50 times slower, and take a lot more
+memory, than they usually would. It works well enough to run large
+programs. For example, the Konqueror web browser from the KDE Desktop
+Environment, version 2.1.1, runs slowly but usably on Valgrind.
+
+<p>Valgrind simulates every single instruction your program executes.
+Because of this, it finds errors not only in your application but also
+in all supporting dynamically-linked (.so-format) libraries, including
+the GNU C library, the X client libraries, Qt, if you work with KDE, and
+so on. That often includes libraries, for example the GNU C library,
+which contain memory access violations, but which you cannot or do not
+want to fix.
+
+<p>Rather than swamping you with errors in which you are not
+interested, Valgrind allows you to selectively suppress errors, by
+recording them in a suppressions file which is read when Valgrind
+starts up. As supplied, Valgrind comes with a suppressions file
+designed to give reasonable behaviour on Red Hat 7.2 (also 7.1 and
+6.2) when running text-only and simple X applications.
+
+<p><a href="#example">Section 6</a> shows an example of use.
+<p>
+<hr width="100%">
+
+<a name="howtouse"></a>
+<h2>2&nbsp; How to use it, and how to make sense of the results</h2>
+
+<a name="starta"></a>
+<h3>2.1&nbsp; Getting started</h3>
+
+First off, consider whether it might be beneficial to recompile your
+application and supporting libraries with optimisation disabled and
+debugging info enabled (the <code>-g</code> flag).  You don't have to
+do this, but doing so helps Valgrind produce more accurate and less
+confusing error reports.  Chances are you're set up like this already,
+if you intended to debug your program with GNU gdb, or some other
+debugger.
+
+<p>Then just run your application, but place the word
+<code>valgrind</code> in front of your usual command-line invokation.
+Note that you should run the real (machine-code) executable here.  If
+your application is started by, for example, a shell or perl script,
+you'll need to modify it to invoke Valgrind on the real executables.
+Running such scripts directly under Valgrind will result in you
+getting error reports pertaining to <code>/bin/sh</code>,
+<code>/usr/bin/perl</code>, or whatever interpreter you're using.
+This almost certainly isn't what you want and can be hugely confusing.
+
+<a name="comment"></a>
+<h3>2.2&nbsp; The commentary</h3>
+
+Valgrind writes a commentary, detailing error reports and other
+significant events.  The commentary goes to standard output by
+default.  This may interfere with your program, so you can ask for it
+to be directed elsewhere.
+
+<p>All lines in the commentary are of the following form:<br>
+<pre>
+  ==12345== some-message-from-Valgrind
+</pre>
+<p>The <code>12345</code>  is the process ID.  This scheme makes it easy
+to distinguish program output from Valgrind commentary, and also easy
+to differentiate commentaries from different processes which have
+become merged together, for whatever reason.
+
+<p>By default, Valgrind writes only essential messages to the commentary,
+so as to avoid flooding you with information of secondary importance.
+If you want more information about what is happening, re-run, passing
+the <code>-v</code> flag to Valgrind.
+
+
+<a name="report"></a>
+<h3>2.3&nbsp; Reporting of errors</h3>
+
+When Valgrind detects something bad happening in the program, an error
+message is written to the commentary.  For example:<br>
+<pre>
+  ==25832== Invalid read of size 4
+  ==25832==    at 0x8048724: BandMatrix::ReSize(int, int, int) (bogon.cpp:45)
+  ==25832==    by 0x80487AF: main (bogon.cpp:66)
+  ==25832==    by 0x40371E5E: __libc_start_main (libc-start.c:129)
+  ==25832==    by 0x80485D1: (within /home/sewardj/newmat10/bogon)
+  ==25832==    Address 0xBFFFF74C is not stack'd, malloc'd or free'd
+</pre>
+
+<p>This message says that the program did an illegal 4-byte read of
+address 0xBFFFF74C, which, as far as it can tell, is not a valid stack
+address, nor corresponds to any currently malloc'd or free'd blocks.
+The read is happening at line 45 of <code>bogon.cpp</code>, called
+from line 66 of the same file, etc.  For errors associated with an
+identified malloc'd/free'd block, for example reading free'd memory,
+Valgrind reports not only the location where the error happened, but
+also where the associated block was malloc'd/free'd.
+
+<p>Valgrind remembers all error reports.  When an error is detected,
+it is compared against old reports, to see if it is a duplicate.  If
+so, the error is noted, but no further commentary is emitted.  This
+avoids you being swamped with bazillions of duplicate error reports.
+
+<p>If you want to know how many times each error occurred, run with
+the <code>-v</code> option.  When execution finishes, all the reports
+are printed out, along with, and sorted by, their occurrence counts.
+This makes it easy to see which errors have occurred most frequently.
+
+<p>Errors are reported before the associated operation actually
+happens.  For example, if you program decides to read from address
+zero, Valgrind will emit a message to this effect, and the program
+will then duly die with a segmentation fault.
+
+<p>In general, you should try and fix errors in the order that they
+are reported.  Not doing so can be confusing.  For example, a program
+which copies uninitialised values to several memory locations, and
+later uses them, will generate several error messages.  The first such
+error message may well give the most direct clue to the root cause of
+the problem.
+
+<a name="suppress"></a>
+<h3>2.4&nbsp; Suppressing errors</h3>
+
+Valgrind detects numerous problems in the base libraries, such as the
+GNU C library, and the XFree86 client libraries, which come
+pre-installed on your GNU/Linux system.  You can't easily fix these,
+but you don't want to see these errors (and yes, there are many!)  So
+Valgrind reads a list of errors to suppress at startup.  By default
+this file is <code>redhat72.supp</code>, located in the Valgrind
+installation directory.  
+
+<p>You can modify and add to the suppressions file at your leisure, or
+write your own.  Multiple suppression files are allowed.  This is
+useful if part of your project contains errors you can't or don't want
+to fix, yet you don't want to continuously be reminded of them.
+
+<p>Each error to be suppressed is described very specifically, to
+minimise the possibility that a suppression-directive inadvertantly
+suppresses a bunch of similar errors which you did want to see.  The
+suppression mechanism is designed to allow precise yet flexible
+specification of errors to suppress.
+
+<p>If you use the <code>-v</code> flag, at the end of execution, Valgrind
+prints out one line for each used suppression, giving its name and the
+number of times it got used.  Here's the suppressions used by a run of
+<code>ls -l</code>:
+<pre>
+  --27579-- supp: 1 socketcall.connect(serv_addr)/__libc_connect/__nscd_getgrgid_r
+  --27579-- supp: 1 socketcall.connect(serv_addr)/__libc_connect/__nscd_getpwuid_r
+  --27579-- supp: 6 strrchr/_dl_map_object_from_fd/_dl_map_object
+</pre>
+
+<a name="flags"></a>
+<h3>2.5&nbsp; Command-line flags</h3>
+
+You invoke Valgrind like this:
+<pre>
+  valgrind [options-for-Valgrind] your-prog [options for your-prog]
+</pre>
+
+<p>Valgrind's default settings succeed in giving reasonable behaviour
+in most cases.  Available options, in no particular order, are as
+follows:
+<ul>
+  <li><code>--help</code></li><br>
+
+  <li><code>--version</code><br>
+      <p>The usual deal.</li><br><p>
+
+  <li><code>-v --verbose</code><br>
+      <p>Be more verbose.  Gives extra information on various aspects
+      of your program, such as: the shared objects loaded, the
+      suppressions used, the progress of the instrumentation engine,
+      and warnings about unusual behaviour.
+      </li><br><p>
+
+  <li><code>-q --quiet</code><br>
+      <p>Run silently, and only print error messages.  Useful if you
+      are running regression tests or have some other automated test
+      machinery.
+      </li><br><p>
+
+  <li><code>--demangle=no</code><br>
+      <code>--demangle=yes</code> [the default]
+      <p>Disable/enable automatic demangling (decoding) of C++ names.
+      Enabled by default.  When enabled, Valgrind will attempt to
+      translate encoded C++ procedure names back to something
+      approaching the original.  The demangler handles symbols mangled
+      by g++ versions 2.X and 3.X.
+
+      <p>An important fact about demangling is that function
+      names mentioned in suppressions files should be in their mangled
+      form.  Valgrind does not demangle function names when searching
+      for applicable suppressions, because to do otherwise would make
+      suppressions file contents dependent on the state of Valgrind's
+      demangling machinery, and would also be slow and pointless.
+      </li><br><p>
+
+  <li><code>--num-callers=&lt;number&gt;</code> [default=4]<br>
+      <p>By default, Valgrind shows four levels of function call names
+      to help you identify program locations.  You can change that
+      number with this option.  This can help in determining the
+      program's location in deeply-nested call chains.  Note that errors
+      are commoned up using only the top three function locations (the
+      place in the current function, and that of its two immediate
+      callers).  So this doesn't affect the total number of errors
+      reported.  
+      <p>
+      The maximum value for this is 50.  Note that higher settings
+      will make Valgrind run a bit more slowly and take a bit more
+      memory, but can be useful when working with programs with
+      deeply-nested call chains.  
+      </li><br><p>
+
+  <li><code>--gdb-attach=no</code> [the default]<br>
+      <code>--gdb-attach=yes</code>
+      <p>When enabled, Valgrind will pause after every error shown,
+      and print the line
+      <br>
+      <code>---- Attach to GDB ? --- [Return/N/n/Y/y/C/c] ----</code>
+      <p>
+      Pressing <code>Ret</code>, or <code>N</code> <code>Ret</code>
+      or <code>n</code> <code>Ret</code>, causes Valgrind not to
+      start GDB for this error.
+      <p>
+      <code>Y</code> <code>Ret</code>
+      or <code>y</code> <code>Ret</code> causes Valgrind to
+      start GDB, for the program at this point.  When you have
+      finished with GDB, quit from it, and the program will continue.
+      Trying to continue from inside GDB doesn't work.
+      <p>
+      <code>C</code> <code>Ret</code>
+      or <code>c</code> <code>Ret</code> causes Valgrind not to
+      start GDB, and not to ask again.
+      <p>
+      <code>--gdb-attach=yes</code> conflicts with 
+      <code>--trace-children=yes</code>.  You can't use them
+      together.  Valgrind refuses to start up in this situation.
+      </li><br><p>
+     
+  <li><code>--partial-loads-ok=yes</code> [the default]<br>
+      <code>--partial-loads-ok=no</code>
+      <p>Controls how Valgrind handles word (4-byte) loads from
+      addresses for which some bytes are addressible and others
+      are not.  When <code>yes</code> (the default), such loads
+      do not elicit an address error.  Instead, the loaded V bytes
+      corresponding to the illegal addresses indicate undefined, and
+      those corresponding to legal addresses are loaded from shadow 
+      memory, as usual.
+      <p>
+      When <code>no</code>, loads from partially
+      invalid addresses are treated the same as loads from completely
+      invalid addresses: an illegal-address error is issued,
+      and the resulting V bytes indicate valid data.
+      </li><br><p>
+
+  <li><code>--sloppy-malloc=no</code> [the default]<br>
+      <code>--sloppy-malloc=yes</code>
+      <p>When enabled, all requests for malloc/calloc are rounded up
+      to a whole number of machine words -- in other words, made
+      divisible by 4.  For example, a request for 17 bytes of space
+      would result in a 20-byte area being made available.  This works
+      around bugs in sloppy libraries which assume that they can
+      safely rely on malloc/calloc requests being rounded up in this
+      fashion.  Without the workaround, these libraries tend to
+      generate large numbers of errors when they access the ends of
+      these areas.  Valgrind snapshots dated 17 Feb 2002 and later are
+      cleverer about this problem, and you should no longer need to 
+      use this flag.
+      </li><br><p>
+
+  <li><code>--trace-children=no</code> [the default]</br>
+      <code>--trace-children=yes</code>
+      <p>When enabled, Valgrind will trace into child processes.  This
+      is confusing and usually not what you want, so is disabled by
+      default.</li><br><p>
+
+  <li><code>--freelist-vol=&lt;number></code> [default: 1000000]
+      <p>When the client program releases memory using free (in C) or
+      delete (C++), that memory is not immediately made available for
+      re-allocation.  Instead it is marked inaccessible and placed in
+      a queue of freed blocks.  The purpose is to delay the point at
+      which freed-up memory comes back into circulation.  This
+      increases the chance that Valgrind will be able to detect
+      invalid accesses to blocks for some significant period of time
+      after they have been freed.  
+      <p>
+      This flag specifies the maximum total size, in bytes, of the
+      blocks in the queue.  The default value is one million bytes.
+      Increasing this increases the total amount of memory used by
+      Valgrind but may detect invalid uses of freed blocks which would
+      otherwise go undetected.</li><br><p>
+
+  <li><code>--logfile-fd=&lt;number></code> [default: 2, stderr]
+      <p>Specifies the file descriptor on which Valgrind communicates
+      all of its messages.  The default, 2, is the standard error
+      channel.  This may interfere with the client's own use of
+      stderr.  To dump Valgrind's commentary in a file without using
+      stderr, something like the following works well (sh/bash
+      syntax):<br>
+      <code>&nbsp;&nbsp;
+            valgrind --logfile-fd=9 my_prog 9> logfile</code><br>
+      That is: tell Valgrind to send all output to file descriptor 9,
+      and ask the shell to route file descriptor 9 to "logfile".
+      </li><br><p>
+
+  <li><code>--suppressions=&lt;filename></code> [default:
+      /installation/directory/redhat72.supp] <p>Specifies an extra
+      file from which to read descriptions of errors to suppress.  You
+      may use as many extra suppressions files as you
+      like.</li><br><p>
+
+  <li><code>--leak-check=no</code> [default]<br>
+      <code>--leak-check=yes</code>
+      <p>When enabled, search for memory leaks when the client program
+      finishes.  A memory leak means a malloc'd block, which has not
+      yet been free'd, but to which no pointer can be found.  Such a
+      block can never be free'd by the program, since no pointer to it
+      exists.  Leak checking is disabled by default
+      because it tends to generate dozens of error messages.
+      </li><br><p>
+
+  <li><code>--show-reachable=no</code> [default]<br>
+      <code>--show-reachable=yes</code> <p>When disabled, the memory
+      leak detector only shows blocks for which it cannot find a
+      pointer to at all, or it can only find a pointer to the middle
+      of.  These blocks are prime candidates for memory leaks.  When
+      enabled, the leak detector also reports on blocks which it could
+      find a pointer to.  Your program could, at least in principle,
+      have freed such blocks before exit.  Contrast this to blocks for
+      which no pointer, or only an interior pointer could be found:
+      they are more likely to indicate memory leaks, because
+      you do not actually have a pointer to the start of the block
+      which you can hand to free(), even if you wanted to.
+      </li><br><p>
+
+  <li><code>--leak-resolution=low</code> [default]<br>
+      <code>--leak-resolution=med</code> <br>
+      <code>--leak-resolution=high</code>
+      <p>When doing leak checking, determines how willing Valgrind is
+      to consider different backtraces the same.  When set to
+      <code>low</code>, the default, only the first two entries need
+      match.  When <code>med</code>, four entries have to match.  When
+      <code>high</code>, all entries need to match.  
+      <p>
+      For hardcore leak debugging, you probably want to use
+      <code>--leak-resolution=high</code> together with 
+      <code>--num-callers=40</code> or some such large number.  Note
+      however that this can give an overwhelming amount of
+      information, which is why the defaults are 4 callers and
+      low-resolution matching.
+      <p>
+      Note that the <code>--leak-resolution=</code> setting does not
+      affect Valgrind's ability to find leaks.  It only changes how
+      the results are presented to you.
+      </li><br><p>
+
+  <li><code>--workaround-gcc296-bugs=no</code> [default]<br>
+      <code>--workaround-gcc296-bugs=yes</code> <p>When enabled,
+      assume that reads and writes some small distance below the stack
+      pointer <code>%esp</code> are due to bugs in gcc 2.96, and does
+      not report them.  The "small distance" is 256 bytes by default.
+      Note that gcc 2.96 is the default compiler on some popular Linux
+      distributions (RedHat 7.X, Mandrake) and so you may well need to
+      use this flag.  Do not use it if you do not have to, as it can
+      cause real errors to be overlooked.  A better option is to use a
+      gcc/g++ which works properly; 2.95.3 seems to be a good choice.
+      <p>
+      Unfortunately (27 Feb 02) it looks like g++ 3.0.4 is similarly
+      buggy, so you may need to issue this flag if you use 3.0.4.
+      </li><br><p>
+
+  <li><code>--client-perms=no</code> [default]<br>
+      <code>--client-perms=yes</code> <p>An experimental feature.
+      <p>
+      When enabled, and when <code>--instrument=yes</code> (which is
+      the default), Valgrind honours client directives to set and
+      query address range permissions.  This allows the client program
+      to tell Valgrind about changes in memory range permissions that
+      Valgrind would not otherwise know about, and so allows clients
+      to get Valgrind to do arbitrary custom checks.
+      <p>
+      Clients need to include the header file <code>valgrind.h</code>
+      to make this work.  The macros therein have the magical property
+      that they generate code in-line which Valgrind can spot.
+      However, the code does nothing when not run on Valgrind, so you
+      are not forced to run your program on Valgrind just because you
+      use the macros in this file.
+      <p>
+      A brief description of the available macros:
+      <ul>
+      <li><code>VALGRIND_MAKE_NOACCESS</code>,
+          <code>VALGRIND_MAKE_WRITABLE</code> and
+          <code>VALGRIND_MAKE_READABLE</code>.  These mark address
+          ranges as completely inaccessible, accessible but containing
+          undefined data, and accessible and containing defined data,
+          respectively.  Subsequent errors may have their faulting
+          addresses described in terms of these blocks.  Returns a
+          "block handle".
+      <p>
+      <li><code>VALGRIND_DISCARD</code>: At some point you may want
+          Valgrind to stop reporting errors in terms of the blocks
+          defined by the previous three macros.  To do this, the above
+          macros return a small-integer "block handle".  You can pass
+          this block handle to <code>VALGRIND_DISCARD</code>.  After
+          doing so, Valgrind will no longer be able to relate
+          addressing errors to the user-defined block associated with
+          the handle.  The permissions settings associated with the
+          handle remain in place; this just affects how errors are
+          reported, not whether they are reported.  Returns 1 for an
+          invalid handle and 0 for a valid handle (although passing
+          invalid handles is harmless).
+      <p>
+      <li><code>VALGRIND_CHECK_NOACCESS</code>,
+          <code>VALGRIND_CHECK_WRITABLE</code> and
+          <code>VALGRIND_CHECK_READABLE</code>: check immediately
+          whether or not the given address range has the relevant
+          property, and if not, print an error message.  Also, for the
+          convenience of the client, returns zero if the relevant
+          property holds; otherwise, the returned value is the address
+          of the first byte for which the property is not true.
+      <p>
+      <li><code>VALGRIND_CHECK_NOACCESS</code>: a quick and easy way
+          to find out whether Valgrind thinks a particular variable
+          (lvalue, to be precise) is addressible and defined.  Prints
+          an error message if not.  Returns no value.
+      <p>
+      <li><code>VALGRIND_MAKE_NOACCESS_STACK</code>: a highly
+          experimental feature.  Similarly to
+          <code>VALGRIND_MAKE_NOACCESS</code>, this marks an address
+          range as inaccessible, so that subsequent accesses to an
+          address in the range gives an error.  However, this macro
+          does not return a block handle.  Instead, all annotations
+          created like this are reviewed at each client
+          <code>ret</code> (subroutine return) instruction, and those
+          which now define an address range block the client's stack
+          pointer register (<code>%esp</code>) are automatically
+          deleted.
+          <p>
+          In other words, this macro allows the client to tell
+          Valgrind about red-zones on its own stack.  Valgrind
+          automatically discards this information when the stack
+          retreats past such blocks.  Beware: hacky and flaky.
+      </ul>
+      </li>
+      <p>
+      As of 17 March 02 (the time of writing this), there is a small
+      problem with all of these macros, which is that I haven't
+      figured out how to make them produce sensible (always-succeeds)
+      return values when the client is run on the real CPU or on
+      Valgrind without <code>--client-perms=yes</code>.  So if you
+      write client code which depends on the return values, be aware
+      that it may misbehave when not run with full Valgrindification.
+      If you always ignore the return values you should always be
+      safe.  I plan to fix this.
+</ul>
+
+There are also some options for debugging Valgrind itself.  You
+shouldn't need to use them in the normal run of things.  Nevertheless:
+
+<ul>
+
+  <li><code>--single-step=no</code> [default]<br>
+      <code>--single-step=yes</code>
+      <p>When enabled, each x86 insn is translated seperately into
+      instrumented code.  When disabled, translation is done on a
+      per-basic-block basis, giving much better translations.</li><br>
+      <p>
+
+  <li><code>--optimise=no</code><br>
+      <code>--optimise=yes</code> [default]
+      <p>When enabled, various improvements are applied to the
+      intermediate code, mainly aimed at allowing the simulated CPU's
+      registers to be cached in the real CPU's registers over several
+      simulated instructions.</li><br>
+      <p>
+
+  <li><code>--instrument=no</code><br>
+      <code>--instrument=yes</code> [default]
+      <p>When disabled, the translations don't actually contain any
+      instrumentation.</li><br>
+      <p>
+
+  <li><code>--cleanup=no</code><br>
+      <code>--cleanup=yes</code> [default]
+      <p>When enabled, various improvments are applied to the
+      post-instrumented intermediate code, aimed at removing redundant
+      value checks.</li><br>
+      <p>
+
+  <li><code>--trace-syscalls=no</code> [default]<br>
+      <code>--trace-syscalls=yes</code>
+      <p>Enable/disable tracing of system call intercepts.</li><br>
+      <p>
+
+  <li><code>--trace-signals=no</code> [default]<br>
+      <code>--trace-signals=yes</code>
+      <p>Enable/disable tracing of signal handling.</li><br>
+      <p>
+
+  <li><code>--trace-symtab=no</code> [default]<br>
+      <code>--trace-symtab=yes</code>
+      <p>Enable/disable tracing of symbol table reading.</li><br>
+      <p>
+
+  <li><code>--trace-malloc=no</code> [default]<br>
+      <code>--trace-malloc=yes</code>
+      <p>Enable/disable tracing of malloc/free (et al) intercepts.
+      </li><br>
+      <p>
+
+  <li><code>--stop-after=&lt;number></code> 
+      [default: infinity, more or less]
+      <p>After &lt;number> basic blocks have been executed, shut down
+      Valgrind and switch back to running the client on the real CPU.
+      </li><br>
+      <p>
+
+  <li><code>--dump-error=&lt;number></code>
+      [default: inactive]
+      <p>After the program has exited, show gory details of the
+      translation of the basic block containing the &lt;number>'th
+      error context.  When used with <code>--single-step=yes</code>, 
+      can show the
+      exact x86 instruction causing an error.</li><br>
+      <p>
+
+  <li><code>--smc-check=none</code><br>
+      <code>--smc-check=some</code> [default]<br>
+      <code>--smc-check=all</code>
+      <p>How carefully should Valgrind check for self-modifying code
+      writes, so that translations can be discarded?&nbsp; When
+      "none", no writes are checked.  When "some", only writes
+      resulting from moves from integer registers to memory are
+      checked.  When "all", all memory writes are checked, even those
+      with which are no sane program would generate code -- for
+      example, floating-point writes.</li>
+</ul>
+
+
+<a name="errormsgs">
+<h3>2.6&nbsp; Explaination of error messages</h3>
+
+Despite considerable sophistication under the hood, Valgrind can only
+really detect two kinds of errors, use of illegal addresses, and use
+of undefined values.  Nevertheless, this is enough to help you
+discover all sorts of memory-management nasties in your code.  This
+section presents a quick summary of what error messages mean.  The
+precise behaviour of the error-checking machinery is described in
+<a href="#machine">Section 4</a>.
+
+
+<h4>2.6.1&nbsp; Illegal read / Illegal write errors</h4>
+For example:
+<pre>
+  ==30975== Invalid read of size 4
+  ==30975==    at 0x40F6BBCC: (within /usr/lib/libpng.so.2.1.0.9)
+  ==30975==    by 0x40F6B804: (within /usr/lib/libpng.so.2.1.0.9)
+  ==30975==    by 0x40B07FF4: read_png_image__FP8QImageIO (kernel/qpngio.cpp:326)
+  ==30975==    by 0x40AC751B: QImageIO::read() (kernel/qimage.cpp:3621)
+  ==30975==    Address 0xBFFFF0E0 is not stack'd, malloc'd or free'd
+</pre>
+
+<p>This happens when your program reads or writes memory at a place
+which Valgrind reckons it shouldn't.  In this example, the program did
+a 4-byte read at address 0xBFFFF0E0, somewhere within the
+system-supplied library libpng.so.2.1.0.9, which was called from
+somewhere else in the same library, called from line 326 of
+qpngio.cpp, and so on.
+
+<p>Valgrind tries to establish what the illegal address might relate
+to, since that's often useful.  So, if it points into a block of
+memory which has already been freed, you'll be informed of this, and
+also where the block was free'd at..  Likewise, if it should turn out
+to be just off the end of a malloc'd block, a common result of
+off-by-one-errors in array subscripting, you'll be informed of this
+fact, and also where the block was malloc'd.
+
+<p>In this example, Valgrind can't identify the address.  Actually the
+address is on the stack, but, for some reason, this is not a valid
+stack address -- it is below the stack pointer, %esp, and that isn't
+allowed.
+
+<p>Note that Valgrind only tells you that your program is about to
+access memory at an illegal address.  It can't stop the access from
+happening.  So, if your program makes an access which normally would
+result in a segmentation fault, you program will still suffer the same
+fate -- but you will get a message from Valgrind immediately prior to
+this.  In this particular example, reading junk on the stack is
+non-fatal, and the program stays alive.
+
+
+<h4>2.6.2&nbsp; Use of uninitialised values</h4>
+For example:
+<pre>
+  ==19146== Use of uninitialised CPU condition code
+  ==19146==    at 0x402DFA94: _IO_vfprintf (_itoa.h:49)
+  ==19146==    by 0x402E8476: _IO_printf (printf.c:36)
+  ==19146==    by 0x8048472: main (tests/manuel1.c:8)
+  ==19146==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+</pre>
+
+<p>An uninitialised-value use error is reported when your program uses
+a value which hasn't been initialised -- in other words, is undefined.
+Here, the undefined value is used somewhere inside the printf()
+machinery of the C library.  This error was reported when running the
+following small program:
+<pre>
+  int main()
+  {
+    int x;
+    printf ("x = %d\n", x);
+  }
+</pre>
+
+<p>It is important to understand that your program can copy around
+junk (uninitialised) data to its heart's content.  Valgrind observes
+this and keeps track of the data, but does not complain.  A complaint
+is issued only when your program attempts to make use of uninitialised
+data.  In this example, x is uninitialised.  Valgrind observes the
+value being passed to _IO_printf and thence to
+_IO_vfprintf, but makes no comment.  However,
+_IO_vfprintf has to examine the value of x
+so it can turn it into the corresponding ASCII string, and it is at
+this point that Valgrind complains.
+
+<p>Sources of uninitialised data tend to be:
+<ul>
+  <li>Local variables in procedures which have not been initialised,
+      as in the example above.</li><br><p>
+
+  <li>The contents of malloc'd blocks, before you write something
+      there.  In C++, the new operator is a wrapper round malloc, so
+      if you create an object with new, its fields will be
+      uninitialised until you fill them in, which is only Right and
+      Proper.</li>
+</ul>
+
+
+
+<h4>2.6.3&nbsp; Illegal frees</h4>
+For example:
+<pre>
+  ==7593== Invalid free()
+  ==7593==    at 0x4004FFDF: free (ut_clientmalloc.c:577)
+  ==7593==    by 0x80484C7: main (tests/doublefree.c:10)
+  ==7593==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+  ==7593==    by 0x80483B1: (within tests/doublefree)
+  ==7593==    Address 0x3807F7B4 is 0 bytes inside a block of size 177 free'd
+  ==7593==    at 0x4004FFDF: free (ut_clientmalloc.c:577)
+  ==7593==    by 0x80484C7: main (tests/doublefree.c:10)
+  ==7593==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+  ==7593==    by 0x80483B1: (within tests/doublefree)
+</pre>
+<p>Valgrind keeps track of the blocks allocated by your program with
+malloc/new, so it can know exactly whether or not the argument to
+free/delete is legitimate or not.  Here, this test program has
+freed the same block twice.  As with the illegal read/write errors,
+Valgrind attempts to make sense of the address free'd.  If, as
+here, the address is one which has previously been freed, you wil
+be told that -- making duplicate frees of the same block easy to spot.
+
+
+<h4>2.6.4&nbsp; Passing system call parameters with inadequate
+read/write permissions</h4>
+
+Valgrind checks all parameters to system calls.  If a system call
+needs to read from a buffer provided by your program, Valgrind checks
+that the entire buffer is addressible and has valid data, ie, it is
+readable.  And if the system call needs to write to a user-supplied
+buffer, Valgrind checks that the buffer is addressible.  After the
+system call, Valgrind updates its administrative information to
+precisely reflect any changes in memory permissions caused by the
+system call.
+
+<p>Here's an example of a system call with an invalid parameter:
+<pre>
+  #include &lt;stdlib.h>
+  #include &lt;unistd.h>
+  int main( void )
+  {
+    char* arr = malloc(10);
+    (void) write( 1 /* stdout */, arr, 10 );
+    return 0;
+  }
+</pre>
+
+<p>You get this complaint ...
+<pre>
+  ==8230== Syscall param write(buf) lacks read permissions
+  ==8230==    at 0x4035E072: __libc_write
+  ==8230==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+  ==8230==    by 0x80483B1: (within tests/badwrite)
+  ==8230==    by &lt;bogus frame pointer> ???
+  ==8230==    Address 0x3807E6D0 is 0 bytes inside a block of size 10 alloc'd
+  ==8230==    at 0x4004FEE6: malloc (ut_clientmalloc.c:539)
+  ==8230==    by 0x80484A0: main (tests/badwrite.c:6)
+  ==8230==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+  ==8230==    by 0x80483B1: (within tests/badwrite)
+</pre>
+
+<p>... because the program has tried to write uninitialised junk from
+the malloc'd block to the standard output.
+
+
+<h4>2.6.5&nbsp; Warning messages you might see</h4>
+
+Most of these only appear if you run in verbose mode (enabled by
+<code>-v</code>):
+<ul>
+<li> <code>More than 50 errors detected.  Subsequent errors
+     will still be recorded, but in less detail than before.</code>
+     <br>
+     After 50 different errors have been shown, Valgrind becomes 
+     more conservative about collecting them.  It then requires only 
+     the program counters in the top two stack frames to match when
+     deciding whether or not two errors are really the same one.
+     Prior to this point, the PCs in the top four frames are required
+     to match.  This hack has the effect of slowing down the
+     appearance of new errors after the first 50.  The 50 constant can
+     be changed by recompiling Valgrind.
+<p>
+<li> <code>More than 500 errors detected.  I'm not reporting any more.
+     Final error counts may be inaccurate.  Go fix your
+     program!</code>
+     <br>
+     After 500 different errors have been detected, Valgrind ignores
+     any more.  It seems unlikely that collecting even more different
+     ones would be of practical help to anybody, and it avoids the
+     danger that Valgrind spends more and more of its time comparing
+     new errors against an ever-growing collection.  As above, the 500
+     number is a compile-time constant.
+<p>
+<li> <code>Warning: client exiting by calling exit(&lt;number>).
+     Bye!</code>
+     <br>
+     Your program has called the <code>exit</code> system call, which
+     will immediately terminate the process.  You'll get no exit-time
+     error summaries or leak checks.  Note that this is not the same
+     as your program calling the ANSI C function <code>exit()</code>
+     -- that causes a normal, controlled shutdown of Valgrind.
+<p>
+<li> <code>Warning: client switching stacks?</code>
+     <br>
+     Valgrind spotted such a large change in the stack pointer, %esp,
+     that it guesses the client is switching to a different stack.
+     At this point it makes a kludgey guess where the base of the new
+     stack is, and sets memory permissions accordingly.  You may get
+     many bogus error messages following this, if Valgrind guesses
+     wrong.  At the moment "large change" is defined as a change of
+     more that 2000000 in the value of the %esp (stack pointer)
+     register.
+<p>
+<li> <code>Warning: client attempted to close Valgrind's logfile fd &lt;number>
+     </code>
+     <br>
+     Valgrind doesn't allow the client
+     to close the logfile, because you'd never see any diagnostic
+     information after that point.  If you see this message,
+     you may want to use the <code>--logfile-fd=&lt;number></code> 
+     option to specify a different logfile file-descriptor number.
+<p>
+<li> <code>Warning: noted but unhandled ioctl &lt;number></code>
+     <br>
+     Valgrind observed a call to one of the vast family of
+     <code>ioctl</code> system calls, but did not modify its
+     memory status info (because I have not yet got round to it).
+     The call will still have gone through, but you may get spurious
+     errors after this as a result of the non-update of the memory info.
+<p>
+<li> <code>Warning: unblocking signal &lt;number> due to
+     sigprocmask</code>
+     <br>
+     Really just a diagnostic from the signal simulation machinery.  
+     This message will appear if your program handles a signal by
+     first <code>longjmp</code>ing out of the signal handler,
+     and then unblocking the signal with <code>sigprocmask</code>
+     -- a standard signal-handling idiom.
+<p>
+<li> <code>Warning: bad signal number &lt;number> in __NR_sigaction.</code>
+     <br>
+     Probably indicates a bug in the signal simulation machinery.
+<p>
+<li> <code>Warning: set address range perms: large range &lt;number></code>
+     <br> 
+     Diagnostic message, mostly for my benefit, to do with memory 
+     permissions.
+</ul>
+
+
+<a name="suppfiles"></a>
+<h3>2.7&nbsp; Writing suppressions files</h3>
+
+A suppression file describes a bunch of errors which, for one reason
+or another, you don't want Valgrind to tell you about.  Usually the
+reason is that the system libraries are buggy but unfixable, at least
+within the scope of the current debugging session.  Multiple
+suppresions files are allowed.  By default, Valgrind uses
+<code>linux24.supp</code> in the directory where it is installed.
+
+<p>
+You can ask to add suppressions from another file, by specifying
+<code>--suppressions=/path/to/file.supp</code>.
+
+<p>Each suppression has the following components:<br>
+<ul>
+
+  <li>Its name.  This merely gives a handy name to the suppression, by
+      which it is referred to in the summary of used suppressions
+      printed out when a program finishes.  It's not important what
+      the name is; any identifying string will do.
+      <p>
+
+  <li>The nature of the error to suppress.  Either: 
+      <code>Value1</code>, 
+      <code>Value2</code>,
+      <code>Value4</code>,
+      <code>Value8</code> or 
+      <code>Value0</code>,
+      meaning an uninitialised-value error when
+      using a value of 1, 2, 4 or 8 bytes, 
+      or the CPU's condition codes, respectively.  Or: 
+      <code>Addr1</code>,
+      <code>Addr2</code>, 
+      <code>Addr4</code> or 
+      <code>Addr8</code>, meaning an invalid address during a
+      memory access of 1, 2, 4 or 8 bytes respectively.  Or 
+      <code>Param</code>,
+      meaning an invalid system call parameter error.  Or
+      <code>Free</code>, meaning an invalid or mismatching free.</li><br>
+      <p>
+
+  <li>The "immediate location" specification.  For Value and Addr
+      errors, is either the name of the function in which the error
+      occurred, or, failing that, the full path the the .so file
+      containing the error location.  For Param errors, is the name of
+      the offending system call parameter.  For Free errors, is the
+      name of the function doing the freeing (eg, <code>free</code>,
+      <code>__builtin_vec_delete</code>, etc)</li><br>
+      <p>
+
+  <li>The caller of the above "immediate location".  Again, either a
+      function or shared-object name.</li><br>
+      <p>
+
+  <li>Optionally, one or two extra calling-function or object names,
+      for greater precision.</li>
+</ul>
+
+<p>
+Locations may be either names of shared objects or wildcards matching
+function names.  They begin <code>obj:</code> and <code>fun:</code>
+respectively.  Function and object names to match against may use the 
+wildcard characters <code>*</code> and <code>?</code>.
+
+A suppression only suppresses an error when the error matches all the
+details in the suppression.  Here's an example:
+<pre>
+  {
+    __gconv_transform_ascii_internal/__mbrtowc/mbtowc
+    Value4
+    fun:__gconv_transform_ascii_internal
+    fun:__mbr*toc
+    fun:mbtowc
+  }
+</pre>
+
+<p>What is means is: suppress a use-of-uninitialised-value error, when
+the data size is 4, when it occurs in the function
+<code>__gconv_transform_ascii_internal</code>, when that is called
+from any function of name matching <code>__mbr*toc</code>, 
+when that is called from
+<code>mbtowc</code>.  It doesn't apply under any other circumstances.
+The string by which this suppression is identified to the user is
+__gconv_transform_ascii_internal/__mbrtowc/mbtowc.
+
+<p>Another example:
+<pre>
+  {
+    libX11.so.6.2/libX11.so.6.2/libXaw.so.7.0
+    Value4
+    obj:/usr/X11R6/lib/libX11.so.6.2
+    obj:/usr/X11R6/lib/libX11.so.6.2
+    obj:/usr/X11R6/lib/libXaw.so.7.0
+  }
+</pre>
+
+<p>Suppress any size 4 uninitialised-value error which occurs anywhere
+in <code>libX11.so.6.2</code>, when called from anywhere in the same
+library, when called from anywhere in <code>libXaw.so.7.0</code>.  The
+inexact specification of locations is regrettable, but is about all
+you can hope for, given that the X11 libraries shipped with Red Hat
+7.2 have had their symbol tables removed.
+
+<p>Note -- since the above two examples did not make it clear -- that
+you can freely mix the <code>obj:</code> and <code>fun:</code>
+styles of description within a single suppression record.
+
+
+<a name="install"></a>
+<h3>2.8&nbsp; Building and installing</h3>
+At the moment, very rudimentary.
+
+<p>The tarball is set up for a standard Red Hat 7.1 (6.2) machine.  To
+build, just do "make".  No configure script, no autoconf, no nothing.
+
+<p>The files needed for installation are: valgrind.so, valgring.so,
+valgrind, VERSION, redhat72.supp (or redhat62.supp). You can copy
+these to any directory you like. However, you then need to edit the
+shell script "valgrind". On line 4, set the environment variable
+<code>VALGRIND</code> to point to the directory you have copied the
+installation into.
+
+
+<a name="problems"></a>
+<h3>2.9&nbsp; If you have problems</h3>
+Mail me (<a href="mailto:jseward@acm.org">jseward@acm.org</a>).
+
+<p>See <a href="#limits">Section 4</a> for the known limitations of
+Valgrind, and for a list of programs which are known not to work on
+it.
+
+<p>The translator/instrumentor has a lot of assertions in it.  They
+are permanently enabled, and I have no plans to disable them.  If one
+of these breaks, please mail me!
+
+<p>If you get an assertion failure on the expression
+<code>chunkSane(ch)</code> in <code>vg_free()</code> in
+<code>vg_malloc.c</code>, this may have happened because your program
+wrote off the end of a malloc'd block, or before its beginning.
+Valgrind should have emitted a proper message to that effect before
+dying in this way.  This is a known problem which I should fix.
+<p>
+
+<hr width="100%">
+
+<a name="machine"></a>
+<h2>3&nbsp; Details of the checking machinery</h2>
+
+Read this section if you want to know, in detail, exactly what and how
+Valgrind is checking.
+
+<a name="vvalue"></a>
+<h3>3.1&nbsp; Valid-value (V) bits</h3>
+
+It is simplest to think of Valgrind implementing a synthetic Intel x86
+CPU which is identical to a real CPU, except for one crucial detail.
+Every bit (literally) of data processed, stored and handled by the
+real CPU has, in the synthetic CPU, an associated "valid-value" bit,
+which says whether or not the accompanying bit has a legitimate value.
+In the discussions which follow, this bit is referred to as the V
+(valid-value) bit.
+
+<p>Each byte in the system therefore has a 8 V bits which accompanies
+it wherever it goes.  For example, when the CPU loads a word-size item
+(4 bytes) from memory, it also loads the corresponding 32 V bits from
+a bitmap which stores the V bits for the process' entire address
+space.  If the CPU should later write the whole or some part of that
+value to memory at a different address, the relevant V bits will be
+stored back in the V-bit bitmap.
+
+<p>In short, each bit in the system has an associated V bit, which
+follows it around everywhere, even inside the CPU.  Yes, the CPU's
+(integer) registers have their own V bit vectors.
+
+<p>Copying values around does not cause Valgrind to check for, or
+report on, errors.  However, when a value is used in a way which might
+conceivably affect the outcome of your program's computation, the
+associated V bits are immediately checked.  If any of these indicate
+that the value is undefined, an error is reported.
+
+<p>Here's an (admittedly nonsensical) example:
+<pre>
+  int i, j;
+  int a[10], b[10];
+  for (i = 0; i &lt; 10; i++) {
+    j = a[i];
+    b[i] = j;
+  }
+</pre>
+
+<p>Valgrind emits no complaints about this, since it merely copies
+uninitialised values from <code>a[]</code> into <code>b[]</code>, and
+doesn't use them in any way.  However, if the loop is changed to
+<pre>
+  for (i = 0; i &lt; 10; i++) {
+    j += a[i];
+  }
+  if (j == 77) 
+     printf("hello there\n");
+</pre>
+then Valgrind will complain, at the <code>if</code>, that the
+condition depends on uninitialised values.
+
+<p>Most low level operations, such as adds, cause Valgrind to 
+use the V bits for the operands to calculate the V bits for the
+result.  Even if the result is partially or wholly undefined,
+it does not complain.
+
+<p>Checks on definedness only occur in two places: when a value is
+used to generate a memory address, and where control flow decision
+needs to be made.  Also, when a system call is detected, valgrind
+checks definedness of parameters as required.
+
+<p>If a check should detect undefinedness, and error message is
+issued.  The resulting value is subsequently regarded as well-defined.
+To do otherwise would give long chains of error messages.  In effect,
+we say that undefined values are non-infectious.
+
+<p>This sounds overcomplicated.  Why not just check all reads from
+memory, and complain if an undefined value is loaded into a CPU register? 
+Well, that doesn't work well, because perfectly legitimate C programs routinely
+copy uninitialised values around in memory, and we don't want endless complaints
+about that.  Here's the canonical example.  Consider a struct
+like this:
+<pre>
+  struct S { int x; char c; };
+  struct S s1, s2;
+  s1.x = 42;
+  s1.c = 'z';
+  s2 = s1;
+</pre>
+
+<p>The question to ask is: how large is <code>struct S</code>, in
+bytes?  An int is 4 bytes and a char one byte, so perhaps a struct S
+occupies 5 bytes?  Wrong.  All (non-toy) compilers I know of will
+round the size of <code>struct S</code> up to a whole number of words,
+in this case 8 bytes.  Not doing this forces compilers to generate
+truly appalling code for subscripting arrays of <code>struct
+S</code>'s.
+
+<p>So s1 occupies 8 bytes, yet only 5 of them will be initialised.
+For the assignment <code>s2 = s1</code>, gcc generates code to copy
+all 8 bytes wholesale into <code>s2</code> without regard for their
+meaning.  If Valgrind simply checked values as they came out of
+memory, it would yelp every time a structure assignment like this
+happened.  So the more complicated semantics described above is
+necessary.  This allows gcc to copy <code>s1</code> into
+<code>s2</code> any way it likes, and a warning will only be emitted
+if the uninitialised values are later used.
+
+<p>One final twist to this story.  The above scheme allows garbage to
+pass through the CPU's integer registers without complaint.  It does
+this by giving the integer registers V tags, passing these around in
+the expected way.  This complicated and computationally expensive to
+do, but is necessary.  Valgrind is more simplistic about
+floating-point loads and stores.  In particular, V bits for data read
+as a result of floating-point loads are checked at the load
+instruction.  So if your program uses the floating-point registers to
+do memory-to-memory copies, you will get complaints about
+uninitialised values.  Fortunately, I have not yet encountered a
+program which (ab)uses the floating-point registers in this way.
+
+<a name="vaddress"></a>
+<h3>3.2&nbsp; Valid-address (A) bits</h3>
+
+Notice that the previous section describes how the validity of values
+is established and maintained without having to say whether the
+program does or does not have the right to access any particular
+memory location.  We now consider the latter issue.
+
+<p>As described above, every bit in memory or in the CPU has an
+associated valid-value (V) bit.  In addition, all bytes in memory, but
+not in the CPU, have an associated valid-address (A) bit.  This
+indicates whether or not the program can legitimately read or write
+that location.  It does not give any indication of the validity or the
+data at that location -- that's the job of the V bits -- only whether
+or not the location may be accessed.
+
+<p>Every time your program reads or writes memory, Valgrind checks the
+A bits associated with the address.  If any of them indicate an
+invalid address, an error is emitted.  Note that the reads and writes
+themselves do not change the A bits, only consult them.
+
+<p>So how do the A bits get set/cleared?  Like this:
+
+<ul>
+  <li>When the program starts, all the global data areas are marked as
+      accessible.</li><br>
+      <p>
+
+  <li>When the program does malloc/new, the A bits for the exactly the
+      area allocated, and not a byte more, are marked as accessible.
+      Upon freeing the area the A bits are changed to indicate
+      inaccessibility.</li><br>
+      <p>
+
+  <li>When the stack pointer register (%esp) moves up or down, A bits
+      are set.  The rule is that the area from %esp up to the base of
+      the stack is marked as accessible, and below %esp is
+      inaccessible.  (If that sounds illogical, bear in mind that the
+      stack grows down, not up, on almost all Unix systems, including
+      GNU/Linux.)  Tracking %esp like this has the useful side-effect
+      that the section of stack used by a function for local variables
+      etc is automatically marked accessible on function entry and
+      inaccessible on exit.</li><br>
+      <p>
+
+  <li>When doing system calls, A bits are changed appropriately.  For
+      example, mmap() magically makes files appear in the process's
+      address space, so the A bits must be updated if mmap()
+      succeeds.</li><br>
+</ul>
+
+
+<a name="together"></a>
+<h3>3.3&nbsp; Putting it all together</h3>
+Valgrind's checking machinery can be summarised as follows:
+
+<ul>
+  <li>Each byte in memory has 8 associated V (valid-value) bits,
+      saying whether or not the byte has a defined value, and a single
+      A (valid-address) bit, saying whether or not the program
+      currently has the right to read/write that address.</li><br>
+      <p>
+
+  <li>When memory is read or written, the relevant A bits are
+      consulted.  If they indicate an invalid address, Valgrind emits
+      an Invalid read or Invalid write error.</li><br>
+      <p>
+
+  <li>When memory is read into the CPU's integer registers, the
+      relevant V bits are fetched from memory and stored in the
+      simulated CPU.  They are not consulted.</li><br>
+      <p>
+
+  <li>When an integer register is written out to memory, the V bits
+      for that register are written back to memory too.</li><br>
+      <p>
+
+  <li>When memory is read into the CPU's floating point registers, the
+      relevant V bits are read from memory and they are immediately
+      checked.  If any are invalid, an uninitialised value error is
+      emitted.  This precludes using the floating-point registers to
+      copy possibly-uninitialised memory, but simplifies Valgrind in
+      that it does not have to track the validity status of the
+      floating-point registers.</li><br>
+      <p>
+
+  <li>As a result, when a floating-point register is written to
+      memory, the associated V bits are set to indicate a valid
+      value.</li><br>
+      <p>
+
+  <li>When values in integer CPU registers are used to generate a
+      memory address, or to determine the outcome of a conditional
+      branch, the V bits for those values are checked, and an error
+      emitted if any of them are undefined.</li><br>
+      <p>
+
+  <li>When values in integer CPU registers are used for any other
+      purpose, Valgrind computes the V bits for the result, but does
+      not check them.</li><br>
+      <p>
+
+  <li>One the V bits for a value in the CPU have been checked, they
+      are then set to indicate validity.  This avoids long chains of
+      errors.</li><br>
+      <p>
+
+  <li>When values are loaded from memory, valgrind checks the A bits
+      for that location and issues an illegal-address warning if
+      needed.  In that case, the V bits loaded are forced to indicate
+      Valid, despite the location being invalid.
+      <p>
+      This apparently strange choice reduces the amount of confusing
+      information presented to the user.  It avoids the
+      unpleasant phenomenon in which memory is read from a place which
+      is both unaddressible and contains invalid values, and, as a
+      result, you get not only an invalid-address (read/write) error,
+      but also a potentially large set of uninitialised-value errors,
+      one for every time the value is used.
+      <p>
+      There is a hazy boundary case to do with multi-byte loads from
+      addresses which are partially valid and partially invalid.  See
+      details of the flag <code>--partial-loads-ok</code> for details.
+      </li><br>
+</ul>
+
+Valgrind intercepts calls to malloc, calloc, realloc, valloc,
+memalign, free, new and delete.  The behaviour you get is:
+
+<ul>
+
+  <li>malloc/new: the returned memory is marked as addressible but not
+      having valid values.  This means you have to write on it before
+      you can read it.</li><br>
+      <p>
+
+  <li>calloc: returned memory is marked both addressible and valid,
+      since calloc() clears the area to zero.</li><br>
+      <p>
+
+  <li>realloc: if the new size is larger than the old, the new section
+      is addressible but invalid, as with malloc.</li><br>
+      <p>
+
+  <li>If the new size is smaller, the dropped-off section is marked as
+      unaddressible.  You may only pass to realloc a pointer
+      previously issued to you by malloc/calloc/new/realloc.</li><br>
+      <p>
+
+  <li>free/delete: you may only pass to free a pointer previously
+      issued to you by malloc/calloc/new/realloc, or the value
+      NULL. Otherwise, Valgrind complains.  If the pointer is indeed
+      valid, Valgrind marks the entire area it points at as
+      unaddressible, and places the block in the freed-blocks-queue.
+      The aim is to defer as long as possible reallocation of this
+      block.  Until that happens, all attempts to access it will
+      elicit an invalid-address error, as you would hope.</li><br>
+</ul>
+
+
+
+<a name="signals"></a>
+<h3>3.4&nbsp; Signals</h3>
+
+Valgrind provides suitable handling of signals, so, provided you stick
+to POSIX stuff, you should be ok.  Basic sigaction() and sigprocmask()
+are handled.  Signal handlers may return in the normal way or do
+longjmp(); both should work ok.  As specified by POSIX, a signal is
+blocked in its own handler.  Default actions for signals should work
+as before.  Etc, etc.
+
+<p>Under the hood, dealing with signals is a real pain, and Valgrind's
+simulation leaves much to be desired.  If your program does
+way-strange stuff with signals, bad things may happen.  If so, let me
+know.  I don't promise to fix it, but I'd at least like to be aware of
+it.
+
+
+<a name="leaks"><a/>
+<h3>3.5&nbsp; Memory leak detection</h3>
+
+Valgrind keeps track of all memory blocks issued in response to calls
+to malloc/calloc/realloc/new.  So when the program exits, it knows
+which blocks are still outstanding -- have not been returned, in other
+words.  Ideally, you want your program to have no blocks still in use
+at exit.  But many programs do.
+
+<p>For each such block, Valgrind scans the entire address space of the
+process, looking for pointers to the block.  One of three situations
+may result:
+
+<ul>
+  <li>A pointer to the start of the block is found.  This usually
+      indicates programming sloppiness; since the block is still
+      pointed at, the programmer could, at least in principle, free'd
+      it before program exit.</li><br>
+      <p>
+
+  <li>A pointer to the interior of the block is found.  The pointer
+      might originally have pointed to the start and have been moved
+      along, or it might be entirely unrelated.  Valgrind deems such a
+      block as "dubious", that is, possibly leaked,
+      because it's unclear whether or
+      not a pointer to it still exists.</li><br>
+      <p>
+
+  <li>The worst outcome is that no pointer to the block can be found.
+      The block is classified as "leaked", because the
+      programmer could not possibly have free'd it at program exit,
+      since no pointer to it exists.  This might be a symptom of
+      having lost the pointer at some earlier point in the
+      program.</li>
+</ul>
+
+Valgrind reports summaries about leaked and dubious blocks.
+For each such block, it will also tell you where the block was
+allocated.  This should help you figure out why the pointer to it has
+been lost.  In general, you should attempt to ensure your programs do
+not have any leaked or dubious blocks at exit.
+
+<p>The precise area of memory in which Valgrind searches for pointers
+is: all naturally-aligned 4-byte words for which all A bits indicate
+addressibility and all V bits indicated that the stored value is
+actually valid.
+
+<p><hr width="100%">
+
+
+<a name="limits"></a>
+<h2>4&nbsp; Limitations</h2>
+
+The following list of limitations seems depressingly long.  However,
+most programs actually work fine.
+
+<p>Valgrind will run x86-GNU/Linux ELF dynamically linked binaries, on
+a kernel 2.4.X system, subject to the following constraints:
+
+<ul>
+  <li>No MMX, SSE, SSE2, 3DNow instructions.  If the translator
+      encounters these, Valgrind will simply give up.  It may be
+      possible to add support for them at a later time. Intel added a
+      few instructions such as "cmov" to the integer instruction set
+      on Pentium and later processors, and these are supported.
+      Nevertheless it's safest to think of Valgrind as implementing
+      the 486 instruction set.</li><br>
+      <p>
+
+  <li>Multithreaded programs are not supported, since I haven't yet
+      figured out how to do this.  To be more specific, it is the
+      "clone" system call which is not supported.  A program calls
+      "clone" to create threads.  Valgrind will abort if this
+      happens.</li><nr>
+      <p>
+
+  <li>Valgrind assumes that the floating point registers are not used
+      as intermediaries in memory-to-memory copies, so it immediately
+      checks V bits in floating-point loads/stores.  If you want to
+      write code which copies around possibly-uninitialised values,
+      you must ensure these travel through the integer registers, not
+      the FPU.</li><br>
+      <p>
+
+  <li>If your program does its own memory management, rather than
+      using malloc/new/free/delete, it should still work, but
+      Valgrind's error checking won't be so effective.</li><br>
+      <p>
+
+  <li>Valgrind's signal simulation is not as robust as it could be.
+      Basic POSIX-compliant sigaction and sigprocmask functionality is
+      supplied, but it's conceivable that things could go badly awry
+      if you do wierd things with signals.  Workaround: don't.
+      Programs that do non-POSIX signal tricks are in any case
+      inherently unportable, so should be avoided if
+      possible.</li><br>
+      <p>
+
+  <li>I have no idea what happens if programs try to handle signals on
+      an alternate stack (sigaltstack).  YMMV.</li><br>
+      <p>
+
+  <li>Programs which switch stacks are not well handled.  Valgrind
+      does have support for this, but I don't have great faith in it.
+      It's difficult -- there's no cast-iron way to decide whether a
+      large change in %esp is as a result of the program switching
+      stacks, or merely allocating a large object temporarily on the
+      current stack -- yet Valgrind needs to handle the two situations
+      differently.</li><br>
+      <p>
+
+  <li>x86 instructions, and system calls, have been implemented on
+      demand.  So it's possible, although unlikely, that a program
+      will fall over with a message to that effect.  If this happens,
+      please mail me ALL the details printed out, so I can try and
+      implement the missing feature.</li><br>
+      <p>
+
+  <li>x86 floating point works correctly, but floating-point code may
+      run even more slowly than integer code, due to my simplistic
+      approach to FPU emulation.</li><br>
+      <p>
+
+  <li>You can't Valgrind-ize statically linked binaries.  Valgrind
+      relies on the dynamic-link mechanism to gain control at
+      startup.</li><br>
+      <p>
+
+  <li>Memory consumption of your program is majorly increased whilst
+      running under Valgrind.  This is due to the large amount of
+      adminstrative information maintained behind the scenes.  Another
+      cause is that Valgrind dynamically translates the original
+      executable and never throws any translation away, except in
+      those rare cases where self-modifying code is detected.
+      Translated, instrumented code is 8-12 times larger than the
+      original (!) so you can easily end up with 15+ MB of
+      translations when running (eg) a web browser.  There's not a lot
+      you can do about this -- use Valgrind on a fast machine with a lot
+      of memory and swap space.  At some point I may implement a LRU
+      caching scheme for translations, so as to bound the maximum
+      amount of memory devoted to them, to say 8 or 16 MB.</li>
+</ul>
+
+
+Programs which are known not to work are:
+
+<ul>
+  <li>Netscape 4.76 works pretty well on some platforms -- quite
+      nicely on my AMD K6-III (400 MHz).  I can surf, do mail, etc, no
+      problem.  On other platforms is has been observed to crash
+      during startup.  Despite much investigation I can't figure out
+      why.</li><br>
+      <p>
+
+  <li>kpackage (a KDE front end to rpm) dies because the CPUID
+      instruction is unimplemented.  Easy to fix.</li><br>
+      <p>
+
+  <li>knode (a KDE newsreader) tries to do multithreaded things, and
+      fails.</li><br>
+      <p>
+
+  <li>emacs starts up but immediately concludes it is out of memory
+      and aborts.  Emacs has it's own memory-management scheme, but I
+      don't understand why this should interact so badly with
+      Valgrind.</li><br>
+      <p>
+
+  <li>Gimp and Gnome and GTK-based apps die early on because
+      of unimplemented system call wrappers.  (I'm a KDE user :)
+      This wouldn't be hard to fix.
+      </li><br>
+      <p>
+
+  <li>As a consequence of me being a KDE user, almost all KDE apps
+      work ok -- except those which are multithreaded.
+      </li><br>
+      <p>
+</ul>
+
+
+<p><hr width="100%">
+
+
+<a name="howitworks"></a>
+<h2>5&nbsp; How it works -- a rough overview</h2>
+Some gory details, for those with a passion for gory details.  You
+don't need to read this section if all you want to do is use Valgrind.
+
+<a name="startb"></a>
+<h3>5.1&nbsp; Getting started</h3>
+
+Valgrind is compiled into a shared object, valgrind.so.  The shell
+script valgrind sets the LD_PRELOAD environment variable to point to
+valgrind.so.  This causes the .so to be loaded as an extra library to
+any subsequently executed dynamically-linked ELF binary, viz, the
+program you want to debug.
+
+<p>The dynamic linker allows each .so in the process image to have an
+initialisation function which is run before main().  It also allows
+each .so to have a finalisation function run after main() exits.
+
+<p>When valgrind.so's initialisation function is called by the dynamic
+linker, the synthetic CPU to starts up.  The real CPU remains locked
+in valgrind.so for the entire rest of the program, but the synthetic
+CPU returns from the initialisation function.  Startup of the program
+now continues as usual -- the dynamic linker calls all the other .so's
+initialisation routines, and eventually runs main().  This all runs on
+the synthetic CPU, not the real one, but the client program cannot
+tell the difference.
+
+<p>Eventually main() exits, so the synthetic CPU calls valgrind.so's
+finalisation function.  Valgrind detects this, and uses it as its cue
+to exit.  It prints summaries of all errors detected, possibly checks
+for memory leaks, and then exits the finalisation routine, but now on
+the real CPU.  The synthetic CPU has now lost control -- permanently
+-- so the program exits back to the OS on the real CPU, just as it
+would have done anyway.
+
+<p>On entry, Valgrind switches stacks, so it runs on its own stack.
+On exit, it switches back.  This means that the client program
+continues to run on its own stack, so we can switch back and forth
+between running it on the simulated and real CPUs without difficulty.
+This was an important design decision, because it makes it easy (well,
+significantly less difficult) to debug the synthetic CPU.
+
+
+<a name="engine"></a>
+<h3>5.2&nbsp; The translation/instrumentation engine</h3>
+
+Valgrind does not directly run any of the original program's code.  Only
+instrumented translations are run.  Valgrind maintains a translation
+table, which allows it to find the translation quickly for any branch
+target (code address).  If no translation has yet been made, the
+translator - a just-in-time translator - is summoned.  This makes an
+instrumented translation, which is added to the collection of
+translations.  Subsequent jumps to that address will use this
+translation.
+
+<p>Valgrind can optionally check writes made by the application, to
+see if they are writing an address contained within code which has
+been translated.  Such a write invalidates translations of code
+bracketing the written address.  Valgrind will discard the relevant
+translations, which causes them to be re-made, if they are needed
+again, reflecting the new updated data stored there.  In this way,
+self modifying code is supported.  In practice I have not found any
+Linux applications which use self-modifying-code.
+
+<p>The JITter translates basic blocks -- blocks of straight-line-code
+-- as single entities.  To minimise the considerable difficulties of
+dealing with the x86 instruction set, x86 instructions are first
+translated to a RISC-like intermediate code, similar to sparc code,
+but with an infinite number of virtual integer registers.  Initially
+each insn is translated seperately, and there is no attempt at
+instrumentation.
+
+<p>The intermediate code is improved, mostly so as to try and cache
+the simulated machine's registers in the real machine's registers over
+several simulated instructions.  This is often very effective.  Also,
+we try to remove redundant updates of the simulated machines's
+condition-code register.
+
+<p>The intermediate code is then instrumented, giving more
+intermediate code.  There are a few extra intermediate-code operations
+to support instrumentation; it is all refreshingly simple.  After
+instrumentation there is a cleanup pass to remove redundant value
+checks.
+
+<p>This gives instrumented intermediate code which mentions arbitrary
+numbers of virtual registers.  A linear-scan register allocator is
+used to assign real registers and possibly generate spill code.  All
+of this is still phrased in terms of the intermediate code.  This
+machinery is inspired by the work of Reuben Thomas (MITE).
+
+<p>Then, and only then, is the final x86 code emitted.  The
+intermediate code is carefully designed so that x86 code can be
+generated from it without need for spare registers or other
+inconveniences.
+
+<p>The translations are managed using a traditional LRU-based caching
+scheme.  The translation cache has a default size of about 14MB.
+
+<a name="track"></a>
+
+<h3>5.3&nbsp; Tracking the status of memory</h3> Each byte in the
+process' address space has nine bits associated with it: one A bit and
+eight V bits.  The A and V bits for each byte are stored using a
+sparse array, which flexibly and efficiently covers arbitrary parts of
+the 32-bit address space without imposing significant space or
+performance overheads for the parts of the address space never
+visited.  The scheme used, and speedup hacks, are described in detail
+at the top of the source file vg_memory.c, so you should read that for
+the gory details.
+
+<a name="sys_calls"></a>
+
+<h3>5.4 System calls</h3>
+All system calls are intercepted.  The memory status map is consulted
+before and updated after each call.  It's all rather tiresome.  See
+vg_syscall_mem.c for details.
+
+<a name="sys_signals"></a>
+
+<h3>5.5&nbsp; Signals</h3>
+All system calls to sigaction() and sigprocmask() are intercepted.  If
+the client program is trying to set a signal handler, Valgrind makes a
+note of the handler address and which signal it is for.  Valgrind then
+arranges for the same signal to be delivered to its own handler.
+
+<p>When such a signal arrives, Valgrind's own handler catches it, and
+notes the fact.  At a convenient safe point in execution, Valgrind
+builds a signal delivery frame on the client's stack and runs its
+handler.  If the handler longjmp()s, there is nothing more to be said.
+If the handler returns, Valgrind notices this, zaps the delivery
+frame, and carries on where it left off before delivering the signal.
+
+<p>The purpose of this nonsense is that setting signal handlers
+essentially amounts to giving callback addresses to the Linux kernel.
+We can't allow this to happen, because if it did, signal handlers
+would run on the real CPU, not the simulated one.  This means the
+checking machinery would not operate during the handler run, and,
+worse, memory permissions maps would not be updated, which could cause
+spurious error reports once the handler had returned.
+
+<p>An even worse thing would happen if the signal handler longjmp'd
+rather than returned: Valgrind would completely lose control of the
+client program.
+
+<p>Upshot: we can't allow the client to install signal handlers
+directly.  Instead, Valgrind must catch, on behalf of the client, any
+signal the client asks to catch, and must delivery it to the client on
+the simulated CPU, not the real one.  This involves considerable
+gruesome fakery; see vg_signals.c for details.
+<p>
+
+<hr width="100%">
+
+<a name="example"></a>
+<h2>6&nbsp; Example</h2>
+This is the log for a run of a small program. The program is in fact
+correct, and the reported error is as the result of a potentially serious
+code generation bug in GNU g++ (snapshot 20010527).
+<pre>
+sewardj@phoenix:~/newmat10$
+~/Valgrind-6/valgrind -v ./bogon 
+==25832== Valgrind 0.10, a memory error detector for x86 RedHat 7.1.
+==25832== Copyright (C) 2000-2001, and GNU GPL'd, by Julian Seward.
+==25832== Startup, with flags:
+==25832== --suppressions=/home/sewardj/Valgrind/redhat71.supp
+==25832== reading syms from /lib/ld-linux.so.2
+==25832== reading syms from /lib/libc.so.6
+==25832== reading syms from /mnt/pima/jrs/Inst/lib/libgcc_s.so.0
+==25832== reading syms from /lib/libm.so.6
+==25832== reading syms from /mnt/pima/jrs/Inst/lib/libstdc++.so.3
+==25832== reading syms from /home/sewardj/Valgrind/valgrind.so
+==25832== reading syms from /proc/self/exe
+==25832== loaded 5950 symbols, 142333 line number locations
+==25832== 
+==25832== Invalid read of size 4
+==25832==    at 0x8048724: _ZN10BandMatrix6ReSizeEiii (bogon.cpp:45)
+==25832==    by 0x80487AF: main (bogon.cpp:66)
+==25832==    by 0x40371E5E: __libc_start_main (libc-start.c:129)
+==25832==    by 0x80485D1: (within /home/sewardj/newmat10/bogon)
+==25832==    Address 0xBFFFF74C is not stack'd, malloc'd or free'd
+==25832==
+==25832== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+==25832== malloc/free: in use at exit: 0 bytes in 0 blocks.
+==25832== malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+==25832== For a detailed leak analysis, rerun with: --leak-check=yes
+==25832==
+==25832== exiting, did 1881 basic blocks, 0 misses.
+==25832== 223 translations, 3626 bytes in, 56801 bytes out.
+</pre>
+<p>The GCC folks fixed this about a week before gcc-3.0 shipped.
+<hr width="100%">
+<p>
+</body>
+</html>
diff --git a/docs/nav.html b/docs/nav.html
new file mode 100644
index 000000000..686ac2bde
--- /dev/null
+++ b/docs/nav.html
@@ -0,0 +1,68 @@
+<html>
+  <head>
+    <title>Valgrind</title>
+    <base target="main">
+    <style type="text/css">
+    <style type="text/css">
+      body      { background-color: #ffffff;
+                  color:            #000000;
+                  font-family:      Times, Helvetica, Arial;
+                  font-size:        14pt}
+      h4        { margin-bottom:    0.3em}
+      code      { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      pre       { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      a:link    { color:            #0000C0;
+                  text-decoration:  none; }
+      a:visited { color:            #0000C0; 
+                  text-decoration:  none; }
+      a:active  { color:            #0000C0;
+                  text-decoration:  none; }
+    </style>
+  </head>
+
+  <body>
+    <br>
+    <a href="manual.html#contents"><b>Contents of this manual</b></a><br>
+    <a href="manual.html#intro">1 Introduction</a><br>
+    <a href="manual.html#whatfor">1.1 What Valgrind is for</a><br>
+    <a href="manual.html#whatdoes">1.2 What it does with
+       your program</a>
+    <p>
+    <a href="manual.html#howtouse">2 <b>How to use it, and how to
+       make sense of the results</b></a><br>
+    <a href="manual.html#starta">2.1 Getting started</a><br>
+    <a href="manual.html#comment">2.2 The commentary</a><br>
+    <a href="manual.html#report">2.3 Reporting of errors</a><br>
+    <a href="manual.html#suppress">2.4 Suppressing errors</a><br>
+    <a href="manual.html#flags">2.5 Command-line flags</a><br>
+    <a href="manual.html#errormsgs">2.6 Explanation of error messages</a><br>
+    <a href="manual.html#suppfiles">2.7 Writing suppressions files</a><br>
+    <a href="manual.html#install">2.8 Building and installing</a><br>
+    <a href="manual.html#problems">2.9 If you have problems</a>
+    <p>
+    <a href="manual.html#machine">3 <b>Details of the checking machinery</b></a><br>
+    <a href="manual.html#vvalue">3.1 Valid-value (V) bits</a><br>
+    <a href="manual.html#vaddress">3.2 Valid-address (A) bits</a><br>
+    <a href="manual.html#together">3.3 Putting it all together</a><br>
+    <a href="manual.html#signals">3.4 Signals</a><br>
+    <a href="manual.html#leaks">3.5 Memory leak detection</a>
+    <p>
+    <a href="manual.html#limits">4 <b>Limitations</b></a><br>
+    <p>
+    <a href="manual.html#howitworks">5 <b>How it works -- a rough overview</b></a><br>
+    <a href="manual.html#startb">5.1 Getting started</a><br>
+    <a href="manual.html#engine">5.2 The translation/instrumentation engine</a><br>
+    <a href="manual.html#track">5.3 Tracking the status of memory</a><br>
+    <a href="manual.html#sys_calls">5.4 System calls</a><br>
+    <a href="manual.html#sys_signals">5.5 Signals</a>
+    <p>
+    <a href="manual.html#example">6 <b>An example</b></a><br>
+    <p>
+    <a href="techdocs.html">7 <b>The design and implementation of Valgrind</b></a><br>
+
+</body>
+</html>
diff --git a/docs/techdocs.html b/docs/techdocs.html
new file mode 100644
index 000000000..4044d4957
--- /dev/null
+++ b/docs/techdocs.html
@@ -0,0 +1,2116 @@
+<html>
+  <head>
+    <style type="text/css">
+      body      { background-color: #ffffff;
+                  color:            #000000;
+                  font-family:      Times, Helvetica, Arial;
+                  font-size:        14pt}
+      h4        { margin-bottom:    0.3em}
+      code      { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      pre       { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      a:link    { color:            #0000C0;
+                  text-decoration:  none; }
+      a:visited { color:            #0000C0; 
+                  text-decoration:  none; }
+      a:active  { color:            #0000C0;
+                  text-decoration:  none; }
+    </style>
+    <title>The design and implementation of Valgrind</title>
+  </head>
+
+<body bgcolor="#ffffff">
+
+<a name="title">&nbsp;</a>
+<h1 align=center>The design and implementation of Valgrind</h1>
+
+<center>
+Detailed technical notes for hackers, maintainers and the
+overly-curious<br>
+These notes pertain to snapshot 20020306<br>
+<p>
+<a href="mailto:jseward@acm.org">jseward@acm.org<br>
+<a href="http://developer.kde.org/~sewardj">http://developer.kde.org/~sewardj</a><br>
+<a href="http://www.muraroa.demon.co.uk">http://www.muraroa.demon.co.uk</a><br>
+Copyright &copy; 2000-2002 Julian Seward
+<p>
+Valgrind is licensed under the GNU General Public License, 
+version 2<br>
+An open-source tool for finding memory-management problems in
+x86 GNU/Linux executables.
+</center>
+
+<p>
+
+
+
+
+<hr width="100%">
+
+<h2>Introduction</h2>
+
+This document contains a detailed, highly-technical description of the
+internals of Valgrind.  This is not the user manual; if you are an
+end-user of Valgrind, you do not want to read this.  Conversely, if
+you really are a hacker-type and want to know how it works, I assume
+that you have read the user manual thoroughly.
+<p>
+You may need to read this document several times, and carefully.  Some
+important things, I only say once.
+
+
+<h3>History</h3>
+
+Valgrind came into public view in late Feb 2002.  However, it has been
+under contemplation for a very long time, perhaps seriously for about
+five years.  Somewhat over two years ago, I started working on the x86
+code generator for the Glasgow Haskell Compiler
+(http://www.haskell.org/ghc), gaining familiarity with x86 internals
+on the way.  I then did Cacheprof (http://www.cacheprof.org), gaining
+further x86 experience.  Some time around Feb 2000 I started
+experimenting with a user-space x86 interpreter for x86-Linux.  This
+worked, but it was clear that a JIT-based scheme would be necessary to
+give reasonable performance for Valgrind.  Design work for the JITter
+started in earnest in Oct 2000, and by early 2001 I had an x86-to-x86
+dynamic translator which could run quite large programs.  This
+translator was in a sense pointless, since it did not do any
+instrumentation or checking.
+
+<p>
+Most of the rest of 2001 was taken up designing and implementing the
+instrumentation scheme.  The main difficulty, which consumed a lot
+of effort, was to design a scheme which did not generate large numbers
+of false uninitialised-value warnings.  By late 2001 a satisfactory
+scheme had been arrived at, and I started to test it on ever-larger
+programs, with an eventual eye to making it work well enough so that
+it was helpful to folks debugging the upcoming version 3 of KDE.  I've
+used KDE since before version 1.0, and wanted to Valgrind to be an
+indirect contribution to the KDE 3 development effort.  At the start of
+Feb 02 the kde-core-devel crew started using it, and gave a huge
+amount of helpful feedback and patches in the space of three weeks.
+Snapshot 20020306 is the result.
+
+<p>
+In the best Unix tradition, or perhaps in the spirit of Fred Brooks'
+depressing-but-completely-accurate epitaph "build one to throw away;
+you will anyway", much of Valgrind is a second or third rendition of
+the initial idea.  The instrumentation machinery
+(<code>vg_translate.c</code>, <code>vg_memory.c</code>) and core CPU
+simulation (<code>vg_to_ucode.c</code>, <code>vg_from_ucode.c</code>)
+have had three redesigns and rewrites; the register allocator,
+low-level memory manager (<code>vg_malloc2.c</code>) and symbol table
+reader (<code>vg_symtab2.c</code>) are on the second rewrite.  In a
+sense, this document serves to record some of the knowledge gained as
+a result.
+
+
+<h3>Design overview</h3>
+
+Valgrind is compiled into a Linux shared object,
+<code>valgrind.so</code>, and also a dummy one,
+<code>valgrinq.so</code>, of which more later.  The
+<code>valgrind</code> shell script adds <code>valgrind.so</code> to
+the <code>LD_PRELOAD</code> list of extra libraries to be
+loaded with any dynamically linked library.  This is a standard trick,
+one which I assume the <code>LD_PRELOAD</code> mechanism was developed
+to support.
+
+<p>
+<code>valgrind.so</code>
+is linked with the <code>-z initfirst</code> flag, which requests that
+its initialisation code is run before that of any other object in the
+executable image.  When this happens, valgrind gains control.  The
+real CPU becomes "trapped" in <code>valgrind.so</code> and the 
+translations it generates.  The synthetic CPU provided by Valgrind
+does, however, return from this initialisation function.  So the 
+normal startup actions, orchestrated by the dynamic linker
+<code>ld.so</code>, continue as usual, except on the synthetic CPU,
+not the real one.  Eventually <code>main</code> is run and returns,
+and then the finalisation code of the shared objects is run,
+presumably in inverse order to which they were initialised.  Remember,
+this is still all happening on the simulated CPU.  Eventually
+<code>valgrind.so</code>'s own finalisation code is called.  It spots
+this event, shuts down the simulated CPU, prints any error summaries
+and/or does leak detection, and returns from the initialisation code
+on the real CPU.  At this point, in effect the real and synthetic CPUs
+have merged back into one, Valgrind has lost control of the program,
+and the program finally <code>exit()s</code> back to the kernel in the
+usual way.
+
+<p>
+The normal course of activity, one Valgrind has started up, is as
+follows.  Valgrind never runs any part of your program (usually
+referred to as the "client"), not a single byte of it, directly.
+Instead it uses function <code>VG_(translate)</code> to translate
+basic blocks (BBs, straight-line sequences of code) into instrumented
+translations, and those are run instead.  The translations are stored
+in the translation cache (TC), <code>vg_tc</code>, with the
+translation table (TT), <code>vg_tt</code> supplying the
+original-to-translation code address mapping.  Auxiliary array
+<code>VG_(tt_fast)</code> is used as a direct-map cache for fast
+lookups in TT; it usually achieves a hit rate of around 98% and
+facilitates an orig-to-trans lookup in 4 x86 insns, which is not bad.
+
+<p>
+Function <code>VG_(dispatch)</code> in <code>vg_dispatch.S</code> is
+the heart of the JIT dispatcher.  Once a translated code address has
+been found, it is executed simply by an x86 <code>call</code>
+to the translation.  At the end of the translation, the next 
+original code addr is loaded into <code>%eax</code>, and the 
+translation then does a <code>ret</code>, taking it back to the
+dispatch loop, with, interestingly, zero branch mispredictions.  
+The address requested in <code>%eax</code> is looked up first in
+<code>VG_(tt_fast)</code>, and, if not found, by calling C helper
+<code>VG_(search_transtab)</code>.  If there is still no translation 
+available, <code>VG_(dispatch)</code> exits back to the top-level
+C dispatcher <code>VG_(toploop)</code>, which arranges for 
+<code>VG_(translate)</code> to make a new translation.  All fairly
+unsurprising, really.  There are various complexities described below.
+
+<p>
+The translator, orchestrated by <code>VG_(translate)</code>, is
+complicated but entirely self-contained.  It is described in great
+detail in subsequent sections.  Translations are stored in TC, with TT
+tracking administrative information.  The translations are subject to
+an approximate LRU-based management scheme.  With the current
+settings, the TC can hold at most about 15MB of translations, and LRU
+passes prune it to about 13.5MB.  Given that the
+orig-to-translation expansion ratio is about 13:1 to 14:1, this means
+TC holds translations for more or less a megabyte of original code,
+which generally comes to about 70000 basic blocks for C++ compiled
+with optimisation on.  Generating new translations is expensive, so it
+is worth having a large TC to minimise the (capacity) miss rate.
+
+<p>
+The dispatcher, <code>VG_(dispatch)</code>, receives hints from
+the translations which allow it to cheaply spot all control 
+transfers corresponding to x86 <code>call</code> and <code>ret</code>
+instructions.  It has to do this in order to spot some special events:
+<ul>
+<li>Calls to <code>VG_(shutdown)</code>.  This is Valgrind's cue to
+    exit.  NOTE: actually this is done a different way; it should be
+    cleaned up.
+<p>
+<li>Returns of system call handlers, to the return address 
+    <code>VG_(signalreturn_bogusRA)</code>.  The signal simulator
+    needs to know when a signal handler is returning, so we spot
+    jumps (returns) to this address.
+<p>
+<li>Calls to <code>vg_trap_here</code>.  All <code>malloc</code>,
+    <code>free</code>, etc calls that the client program makes are
+    eventually routed to a call to <code>vg_trap_here</code>,
+    and Valgrind does its own special thing with these calls.
+    In effect this provides a trapdoor, by which Valgrind can
+    intercept certain calls on the simulated CPU, run the call as it
+    sees fit itself (on the real CPU), and return the result to
+    the simulated CPU, quite transparently to the client program.
+</ul>
+Valgrind intercepts the client's <code>malloc</code>,
+<code>free</code>, etc,
+calls, so that it can store additional information.  Each block 
+<code>malloc</code>'d by the client gives rise to a shadow block
+in which Valgrind stores the call stack at the time of the
+<code>malloc</code>
+call.  When the client calls <code>free</code>, Valgrind tries to
+find the shadow block corresponding to the address passed to
+<code>free</code>, and emits an error message if none can be found.
+If it is found, the block is placed on the freed blocks queue 
+<code>vg_freed_list</code>, it is marked as inaccessible, and
+its shadow block now records the call stack at the time of the
+<code>free</code> call.  Keeping <code>free</code>'d blocks in
+this queue allows Valgrind to spot all (presumably invalid) accesses
+to them.  However, once the volume of blocks in the free queue 
+exceeds <code>VG_(clo_freelist_vol)</code>, blocks are finally
+removed from the queue.
+
+<p>
+Keeping track of A and V bits (note: if you don't know what these are,
+you haven't read the user guide carefully enough) for memory is done
+in <code>vg_memory.c</code>.  This implements a sparse array structure
+which covers the entire 4G address space in a way which is reasonably
+fast and reasonably space efficient.  The 4G address space is divided
+up into 64K sections, each covering 64Kb of address space.  Given a
+32-bit address, the top 16 bits are used to select one of the 65536
+entries in <code>VG_(primary_map)</code>.  The resulting "secondary"
+(<code>SecMap</code>) holds A and V bits for the 64k of address space
+chunk corresponding to the lower 16 bits of the address.
+
+
+<h3>Design decisions</h3>
+
+Some design decisions were motivated by the need to make Valgrind
+debuggable.  Imagine you are writing a CPU simulator.  It works fairly
+well.  However, you run some large program, like Netscape, and after
+tens of millions of instructions, it crashes.  How can you figure out
+where in your simulator the bug is?
+
+<p>
+Valgrind's answer is: cheat.  Valgrind is designed so that it is
+possible to switch back to running the client program on the real
+CPU at any point.  Using the <code>--stop-after= </code> flag, you can 
+ask Valgrind to run just some number of basic blocks, and then 
+run the rest of the way on the real CPU.  If you are searching for
+a bug in the simulated CPU, you can use this to do a binary search,
+which quickly leads you to the specific basic block which is
+causing the problem.  
+
+<p>
+This is all very handy.  It does constrain the design in certain
+unimportant ways.  Firstly, the layout of memory, when viewed from the
+client's point of view, must be identical regardless of whether it is
+running on the real or simulated CPU.  This means that Valgrind can't
+do pointer swizzling -- well, no great loss -- and it can't run on 
+the same stack as the client -- again, no great loss.  
+Valgrind operates on its own stack, <code>VG_(stack)</code>, which
+it switches to at startup, temporarily switching back to the client's
+stack when doing system calls for the client.
+
+<p>
+Valgrind also receives signals on its own stack,
+<code>VG_(sigstack)</code>, but for different gruesome reasons
+discussed below.
+
+<p>
+This nice clean switch-back-to-the-real-CPU-whenever-you-like story
+is muddied by signals.  Problem is that signals arrive at arbitrary
+times and tend to slightly perturb the basic block count, with the
+result that you can get close to the basic block causing a problem but
+can't home in on it exactly.  My kludgey hack is to define
+<code>SIGNAL_SIMULATION</code> to 1 towards the bottom of 
+<code>vg_syscall_mem.c</code>, so that signal handlers are run on the
+real CPU and don't change the BB counts.
+
+<p>
+A second hole in the switch-back-to-real-CPU story is that Valgrind's
+way of delivering signals to the client is different from that of the
+kernel.  Specifically, the layout of the signal delivery frame, and
+the mechanism used to detect a sighandler returning, are different.
+So you can't expect to make the transition inside a sighandler and
+still have things working, but in practice that's not much of a
+restriction.
+
+<p>
+Valgrind's implementation of <code>malloc</code>, <code>free</code>,
+etc, (in <code>vg_clientmalloc.c</code>, not the low-level stuff in
+<code>vg_malloc2.c</code>) is somewhat complicated by the need to 
+handle switching back at arbitrary points.  It does work tho.
+
+
+
+<h3>Correctness</h3>
+
+There's only one of me, and I have a Real Life (tm) as well as hacking
+Valgrind [allegedly :-].  That means I don't have time to waste
+chasing endless bugs in Valgrind.  My emphasis is therefore on doing
+everything as simply as possible, with correctness, stability and
+robustness being the number one priority, more important than
+performance or functionality.  As a result:
+<ul>
+<li>The code is absolutely loaded with assertions, and these are
+    <b>permanently enabled.</b>  I have no plan to remove or disable
+    them later.  Over the past couple of months, as valgrind has
+    become more widely used, they have shown their worth, pulling
+    up various bugs which would otherwise have appeared as
+    hard-to-find segmentation faults.
+    <p>
+    I am of the view that it's acceptable to spend 5% of the total
+    running time of your valgrindified program doing assertion checks
+    and other internal sanity checks.
+<p>
+<li>Aside from the assertions, valgrind contains various sets of
+    internal sanity checks, which get run at varying frequencies
+    during normal operation.  <code>VG_(do_sanity_checks)</code>
+    runs every 1000 basic blocks, which means 500 to 2000 times/second 
+    for typical machines at present.  It checks that Valgrind hasn't
+    overrun its private stack, and does some simple checks on the
+    memory permissions maps.  Once every 25 calls it does some more
+    extensive checks on those maps.  Etc, etc.
+    <p>
+    The following components also have sanity check code, which can
+    be enabled to aid debugging:
+    <ul>
+    <li>The low-level memory-manager
+        (<code>VG_(mallocSanityCheckArena)</code>).  This does a 
+        complete check of all blocks and chains in an arena, which
+        is very slow.  Is not engaged by default.
+    <p>
+    <li>The symbol table reader(s): various checks to ensure
+        uniqueness of mappings; see <code>VG_(read_symbols)</code>
+        for a start.  Is permanently engaged.
+    <p>
+    <li>The A and V bit tracking stuff in <code>vg_memory.c</code>.
+        This can be compiled with cpp symbol
+        <code>VG_DEBUG_MEMORY</code> defined, which removes all the
+        fast, optimised cases, and uses simple-but-slow fallbacks
+        instead.  Not engaged by default.
+    <p>
+    <li>Ditto <code>VG_DEBUG_LEAKCHECK</code>.
+    <p>
+    <li>The JITter parses x86 basic blocks into sequences of 
+        UCode instructions.  It then sanity checks each one with
+        <code>VG_(saneUInstr)</code> and sanity checks the sequence
+        as a whole with <code>VG_(saneUCodeBlock)</code>.  This stuff
+        is engaged by default, and has caught some way-obscure bugs
+        in the simulated CPU machinery in its time.
+    <p>
+    <li>The system call wrapper does
+        <code>VG_(first_and_last_secondaries_look_plausible)</code> after
+        every syscall; this is known to pick up bugs in the syscall
+        wrappers.  Engaged by default.
+    <p>
+    <li>The main dispatch loop, in <code>VG_(dispatch)</code>, checks
+        that translations do not set <code>%ebp</code> to any value
+        different from <code>VG_EBP_DISPATCH_CHECKED</code> or
+        <code>& VG_(baseBlock)</code>.  In effect this test is free,
+        and is permanently engaged.
+    <p>
+    <li>There are a couple of ifdefed-out consistency checks I
+        inserted whilst debugging the new register allocater, 
+        <code>vg_do_register_allocation</code>.
+    </ul>
+<p>
+<li>I try to avoid techniques, algorithms, mechanisms, etc, for which
+    I can supply neither a convincing argument that they are correct,
+    nor sanity-check code which might pick up bugs in my
+    implementation.  I don't always succeed in this, but I try.
+    Basically the idea is: avoid techniques which are, in practice,
+    unverifiable, in some sense.   When doing anything, always have in
+    mind: "how can I verify that this is correct?"
+</ul>
+
+<p>
+Some more specific things are:
+
+<ul>
+<li>Valgrind runs in the same namespace as the client, at least from
+    <code>ld.so</code>'s point of view, and it therefore absolutely
+    had better not export any symbol with a name which could clash
+    with that of the client or any of its libraries.  Therefore, all
+    globally visible symbols exported from <code>valgrind.so</code>
+    are defined using the <code>VG_</code> CPP macro.  As you'll see
+    from <code>vg_constants.h</code>, this appends some arbitrary
+    prefix to the symbol, in order that it be, we hope, globally
+    unique.  Currently the prefix is <code>vgPlain_</code>.  For
+    convenience there are also <code>VGM_</code>, <code>VGP_</code>
+    and <code>VGOFF_</code>.  All locally defined symbols are declared
+    <code>static</code> and do not appear in the final shared object.
+    <p>
+    To check this, I periodically do 
+    <code>nm valgrind.so | grep " T "</code>, 
+    which shows you all the globally exported text symbols.
+    They should all have an approved prefix, except for those like
+    <code>malloc</code>, <code>free</code>, etc, which we deliberately
+    want to shadow and take precedence over the same names exported
+    from <code>glibc.so</code>, so that valgrind can intercept those
+    calls easily.  Similarly, <code>nm valgrind.so | grep " D "</code>
+    allows you to find any rogue data-segment symbol names.
+<p>
+<li>Valgrind tries, and almost succeeds, in being completely
+    independent of all other shared objects, in particular of
+    <code>glibc.so</code>.  For example, we have our own low-level
+    memory manager in <code>vg_malloc2.c</code>, which is a fairly
+    standard malloc/free scheme augmented with arenas, and
+    <code>vg_mylibc.c</code> exports reimplementations of various bits
+    and pieces you'd normally get from the C library.
+    <p>
+    Why all the hassle?  Because imagine the potential chaos of both
+    the simulated and real CPUs executing in <code>glibc.so</code>.
+    It just seems simpler and cleaner to be completely self-contained,
+    so that only the simulated CPU visits <code>glibc.so</code>.  In
+    practice it's not much hassle anyway.  Also, valgrind starts up
+    before glibc has a chance to initialise itself, and who knows what
+    difficulties that could lead to.  Finally, glibc has definitions
+    for some types, specifically <code>sigset_t</code>, which conflict
+    (are different from) the Linux kernel's idea of same.  When 
+    Valgrind wants to fiddle around with signal stuff, it wants to
+    use the kernel's definitions, not glibc's definitions.  So it's 
+    simplest just to keep glibc out of the picture entirely.
+    <p>
+    To find out which glibc symbols are used by Valgrind, reinstate
+    the link flags <code>-nostdlib -Wl,-no-undefined</code>.  This
+    causes linking to fail, but will tell you what you depend on.
+    I have mostly, but not entirely, got rid of the glibc
+    dependencies; what remains is, IMO, fairly harmless.  AFAIK the
+    current dependencies are: <code>memset</code>,
+    <code>memcmp</code>, <code>stat</code>, <code>system</code>,
+    <code>sbrk</code>, <code>setjmp</code> and <code>longjmp</code>.
+
+<p>
+<li>Similarly, valgrind should not really import any headers other
+    than the Linux kernel headers, since it knows of no API other than
+    the kernel interface to talk to.  At the moment this is really not
+    in a good state, and <code>vg_syscall_mem</code> imports, via
+    <code>vg_unsafe.h</code>, a significant number of C-library
+    headers so as to know the sizes of various structs passed across
+    the kernel boundary.  This is of course completely bogus, since
+    there is no guarantee that the C library's definitions of these
+    structs matches those of the kernel.  I have started to sort this
+    out using <code>vg_kerneliface.h</code>, into which I had intended
+    to copy all kernel definitions which valgrind could need, but this
+    has not gotten very far.  At the moment it mostly contains
+    definitions for <code>sigset_t</code> and <code>struct
+    sigaction</code>, since the kernel's definition for these really
+    does clash with glibc's.  I plan to use a <code>vki_</code> prefix
+    on all these types and constants, to denote the fact that they
+    pertain to <b>V</b>algrind's <b>K</b>ernel <b>I</b>nterface.
+    <p>
+    Another advantage of having a <code>vg_kerneliface.h</code> file
+    is that it makes it simpler to interface to a different kernel.
+    Once can, for example, easily imagine writing a new
+    <code>vg_kerneliface.h</code> for FreeBSD, or x86 NetBSD.
+
+</ul>
+
+<h3>Current limitations</h3>
+
+No threads.  I think fixing this is close to a research-grade problem.
+<p>
+No MMX.  Fixing this should be relatively easy, using the same giant
+trick used for x86 FPU instructions.  See below.
+<p>
+Support for weird (non-POSIX) signal stuff is patchy.  Does anybody
+care?
+<p>
+
+
+
+
+<hr width="100%">
+
+<h2>The instrumenting JITter</h2>
+
+This really is the heart of the matter.  We begin with various side
+issues.
+
+<h3>Run-time storage, and the use of host registers</h3>
+
+Valgrind translates client (original) basic blocks into instrumented
+basic blocks, which live in the translation cache TC, until either the
+client finishes or the translations are ejected from TC to make room
+for newer ones.
+<p>
+Since it generates x86 code in memory, Valgrind has complete control
+of the use of registers in the translations.  Now pay attention.  I
+shall say this only once, and it is important you understand this.  In
+what follows I will refer to registers in the host (real) cpu using
+their standard names, <code>%eax</code>, <code>%edi</code>, etc.  I
+refer to registers in the simulated CPU by capitalising them:
+<code>%EAX</code>, <code>%EDI</code>, etc.  These two sets of
+registers usually bear no direct relationship to each other; there is
+no fixed mapping between them.  This naming scheme is used fairly
+consistently in the comments in the sources.
+<p>
+Host registers, once things are up and running, are used as follows:
+<ul>
+<li><code>%esp</code>, the real stack pointer, points
+    somewhere in Valgrind's private stack area,
+    <code>VG_(stack)</code> or, transiently, into its signal delivery
+    stack, <code>VG_(sigstack)</code>.
+<p>
+<li><code>%edi</code> is used as a temporary in code generation; it
+    is almost always dead, except when used for the <code>Left</code>
+    value-tag operations.
+<p>
+<li><code>%eax</code>, <code>%ebx</code>, <code>%ecx</code>,
+    <code>%edx</code> and <code>%esi</code> are available to
+    Valgrind's register allocator.  They are dead (carry unimportant
+    values) in between translations, and are live only in
+    translations.  The one exception to this is <code>%eax</code>,
+    which, as mentioned far above, has a special significance to the
+    dispatch loop <code>VG_(dispatch)</code>: when a translation
+    returns to the dispatch loop, <code>%eax</code> is expected to
+    contain the original-code-address of the next translation to run.
+    The register allocator is so good at minimising spill code that
+    using five regs and not having to save/restore <code>%edi</code>
+    actually gives better code than allocating to <code>%edi</code>
+    as well, but then having to push/pop it around special uses.
+<p>
+<li><code>%ebp</code> points permanently at
+    <code>VG_(baseBlock)</code>.  Valgrind's translations are
+    position-independent, partly because this is convenient, but also
+    because translations get moved around in TC as part of the LRUing
+    activity.  <b>All</b> static entities which need to be referred to
+    from generated code, whether data or helper functions, are stored
+    starting at <code>VG_(baseBlock)</code> and are therefore reached
+    by indexing from <code>%ebp</code>.  There is but one exception, 
+    which is that by placing the value
+    <code>VG_EBP_DISPATCH_CHECKED</code>
+    in <code>%ebp</code> just before a return to the dispatcher, 
+    the dispatcher is informed that the next address to run, 
+    in <code>%eax</code>, requires special treatment.
+<p>
+<li>The real machine's FPU state is pretty much unimportant, for
+    reasons which will become obvious.  Ditto its <code>%eflags</code>
+    register.
+</ul>
+
+<p>
+The state of the simulated CPU is stored in memory, in
+<code>VG_(baseBlock)</code>, which is a block of 200 words IIRC.
+Recall that <code>%ebp</code> points permanently at the start of this
+block.  Function <code>vg_init_baseBlock</code> decides what the
+offsets of various entities in <code>VG_(baseBlock)</code> are to be,
+and allocates word offsets for them.  The code generator then emits
+<code>%ebp</code> relative addresses to get at those things.  The
+sequence in which entities are allocated has been carefully chosen so
+that the 32 most popular entities come first, because this means 8-bit
+offsets can be used in the generated code.
+
+<p>
+If I was clever, I could make <code>%ebp</code> point 32 words along 
+<code>VG_(baseBlock)</code>, so that I'd have another 32 words of
+short-form offsets available, but that's just complicated, and it's
+not important -- the first 32 words take 99% (or whatever) of the
+traffic.
+
+<p>
+Currently, the sequence of stuff in <code>VG_(baseBlock)</code> is as
+follows:
+<ul>
+<li>9 words, holding the simulated integer registers,
+    <code>%EAX</code> .. <code>%EDI</code>, and the simulated flags,
+    <code>%EFLAGS</code>.
+<p>
+<li>Another 9 words, holding the V bit "shadows" for the above 9 regs.
+<p>
+<li>The <b>addresses</b> of various helper routines called from
+    generated code: 
+    <code>VG_(helper_value_check4_fail)</code>,
+    <code>VG_(helper_value_check0_fail)</code>,
+    which register V-check failures,
+    <code>VG_(helperc_STOREV4)</code>,
+    <code>VG_(helperc_STOREV1)</code>,
+    <code>VG_(helperc_LOADV4)</code>,
+    <code>VG_(helperc_LOADV1)</code>,
+    which do stores and loads of V bits to/from the 
+    sparse array which keeps track of V bits in memory,
+    and
+    <code>VGM_(handle_esp_assignment)</code>, which messes with
+    memory addressibility resulting from changes in <code>%ESP</code>.
+<p>
+<li>The simulated <code>%EIP</code>.
+<p>
+<li>24 spill words, for when the register allocator can't make it work
+    with 5 measly registers.
+<p>
+<li>Addresses of helpers <code>VG_(helperc_STOREV2)</code>,
+    <code>VG_(helperc_LOADV2)</code>.  These are here because 2-byte
+    loads and stores are relatively rare, so are placed above the
+    magic 32-word offset boundary.
+<p>
+<li>For similar reasons, addresses of helper functions 
+    <code>VGM_(fpu_write_check)</code> and
+    <code>VGM_(fpu_read_check)</code>, which handle the A/V maps
+    testing and changes required by FPU writes/reads.  
+<p>
+<li>Some other boring helper addresses:
+    <code>VG_(helper_value_check2_fail)</code> and
+    <code>VG_(helper_value_check1_fail)</code>.  These are probably
+    never emitted now, and should be removed.
+<p>
+<li>The entire state of the simulated FPU, which I believe to be
+    108 bytes long.
+<p>
+<li>Finally, the addresses of various other helper functions in
+    <code>vg_helpers.S</code>, which deal with rare situations which
+    are tedious or difficult to generate code in-line for.
+</ul>
+
+<p>
+As a general rule, the simulated machine's state lives permanently in
+memory at <code>VG_(baseBlock)</code>.  However, the JITter does some
+optimisations which allow the simulated integer registers to be
+cached in real registers over multiple simulated instructions within
+the same basic block.  These are always flushed back into memory at
+the end of every basic block, so that the in-memory state is
+up-to-date between basic blocks.  (This flushing is implied by the
+statement above that the real machine's allocatable registers are
+dead in between simulated blocks).
+
+
+<h3>Startup, shutdown, and system calls</h3>
+
+Getting into of Valgrind (<code>VG_(startup)</code>, called from
+<code>valgrind.so</code>'s initialisation section), really means
+copying the real CPU's state into <code>VG_(baseBlock)</code>, and
+then installing our own stack pointer, etc, into the real CPU, and
+then starting up the JITter.  Exiting valgrind involves copying the
+simulated state back to the real state.
+
+<p>
+Unfortunately, there's a complication at startup time.  Problem is
+that at the point where we need to take a snapshot of the real CPU's
+state, the offsets in <code>VG_(baseBlock)</code> are not set up yet,
+because to do so would involve disrupting the real machine's state
+significantly.  The way round this is to dump the real machine's state
+into a temporary, static block of memory,
+<code>VG_(m_state_static)</code>.  We can then set up the
+<code>VG_(baseBlock)</code> offsets at our leisure, and copy into it
+from <code>VG_(m_state_static)</code> at some convenient later time.
+This copying is done by
+<code>VG_(copy_m_state_static_to_baseBlock)</code>.
+
+<p>
+On exit, the inverse transformation is (rather unnecessarily) used:
+stuff in <code>VG_(baseBlock)</code> is copied to
+<code>VG_(m_state_static)</code>, and the assembly stub then copies
+from <code>VG_(m_state_static)</code> into the real machine registers.
+
+<p>
+Doing system calls on behalf of the client (<code>vg_syscall.S</code>)
+is something of a half-way house.  We have to make the world look
+sufficiently like that which the client would normally have to make
+the syscall actually work properly, but we can't afford to lose
+control.  So the trick is to copy all of the client's state, <b>except
+its program counter</b>, into the real CPU, do the system call, and
+copy the state back out.  Note that the client's state includes its
+stack pointer register, so one effect of this partial restoration is
+to cause the system call to be run on the client's stack, as it should
+be.
+
+<p>
+As ever there are complications.  We have to save some of our own state
+somewhere when restoring the client's state into the CPU, so that we
+can keep going sensibly afterwards.  In fact the only thing which is
+important is our own stack pointer, but for paranoia reasons I save 
+and restore our own FPU state as well, even though that's probably
+pointless.
+
+<p>
+The complication on the above complication is, that for horrible
+reasons to do with signals, we may have to handle a second client
+system call whilst the client is blocked inside some other system 
+call (unbelievable!).  That means there's two sets of places to 
+dump Valgrind's stack pointer and FPU state across the syscall,
+and we decide which to use by consulting
+<code>VG_(syscall_depth)</code>, which is in turn maintained by
+<code>VG_(wrap_syscall)</code>.
+
+
+
+<h3>Introduction to UCode</h3>
+
+UCode lies at the heart of the x86-to-x86 JITter.  The basic premise
+is that dealing the the x86 instruction set head-on is just too darn
+complicated, so we do the traditional compiler-writer's trick and
+translate it into a simpler, easier-to-deal-with form.
+
+<p>
+In normal operation, translation proceeds through six stages,
+coordinated by <code>VG_(translate)</code>:
+<ol>
+<li>Parsing of an x86 basic block into a sequence of UCode
+    instructions (<code>VG_(disBB)</code>).
+<p>
+<li>UCode optimisation (<code>vg_improve</code>), with the aim of
+    caching simulated registers in real registers over multiple
+    simulated instructions, and removing redundant simulated
+    <code>%EFLAGS</code> saving/restoring.
+<p>
+<li>UCode instrumentation (<code>vg_instrument</code>), which adds
+    value and address checking code.
+<p>
+<li>Post-instrumentation cleanup (<code>vg_cleanup</code>), removing
+    redundant value-check computations.
+<p>
+<li>Register allocation (<code>vg_do_register_allocation</code>),
+    which, note, is done on UCode.
+<p>
+<li>Emission of final instrumented x86 code
+    (<code>VG_(emit_code)</code>).
+</ol>
+
+<p>
+Notice how steps 2, 3, 4 and 5 are simple UCode-to-UCode
+transformation passes, all on straight-line blocks of UCode (type
+<code>UCodeBlock</code>).  Steps 2 and 4 are optimisation passes and
+can be disabled for debugging purposes, with
+<code>--optimise=no</code> and <code>--cleanup=no</code> respectively.
+
+<p>
+Valgrind can also run in a no-instrumentation mode, given
+<code>--instrument=no</code>.  This is useful for debugging the JITter
+quickly without having to deal with the complexity of the
+instrumentation mechanism too.  In this mode, steps 3 and 4 are
+omitted.
+
+<p>
+These flags combine, so that <code>--instrument=no</code> together with 
+<code>--optimise=no</code> means only steps 1, 5 and 6 are used.
+<code>--single-step=yes</code> causes each x86 instruction to be
+treated as a single basic block.  The translations are terrible but
+this is sometimes instructive.  
+
+<p>
+The <code>--stop-after=N</code> flag switches back to the real CPU
+after <code>N</code> basic blocks.  It also re-JITs the final basic
+block executed and prints the debugging info resulting, so this
+gives you a way to get a quick snapshot of how a basic block looks as
+it passes through the six stages mentioned above.  If you want to 
+see full information for every block translated (probably not, but
+still ...) find, in <code>VG_(translate)</code>, the lines
+<br><code>   dis = True;</code>
+<br><code>   dis = debugging_translation;</code>
+<br>
+and comment out the second line.  This will spew out debugging
+junk faster than you can possibly imagine.
+
+
+
+<h3>UCode operand tags: type <code>Tag</code></h3>
+
+UCode is, more or less, a simple two-address RISC-like code.  In
+keeping with the x86 AT&T assembly syntax, generally speaking the
+first operand is the source operand, and the second is the destination
+operand, which is modified when the uinstr is notionally executed.
+
+<p>
+UCode instructions have up to three operand fields, each of which has
+a corresponding <code>Tag</code> describing it.  Possible values for
+the tag are:
+
+<ul>
+<li><code>NoValue</code>: indicates that the field is not in use.
+<p>
+<li><code>Lit16</code>: the field contains a 16-bit literal.
+<p>
+<li><code>Literal</code>: the field denotes a 32-bit literal, whose
+    value is stored in the <code>lit32</code> field of the uinstr
+    itself.  Since there is only one <code>lit32</code> for the whole
+    uinstr, only one operand field may contain this tag.
+<p>
+<li><code>SpillNo</code>: the field contains a spill slot number, in
+    the range 0 to 23 inclusive, denoting one of the spill slots
+    contained inside <code>VG_(baseBlock)</code>.  Such tags only
+    exist after register allocation.
+<p>
+<li><code>RealReg</code>: the field contains a number in the range 0
+    to 7 denoting an integer x86 ("real") register on the host.  The
+    number is the Intel encoding for integer registers.  Such tags
+    only exist after register allocation.
+<p>
+<li><code>ArchReg</code>: the field contains a number in the range 0
+    to 7 denoting an integer x86 register on the simulated CPU.  In
+    reality this means a reference to one of the first 8 words of
+    <code>VG_(baseBlock)</code>.  Such tags can exist at any point in
+    the translation process.
+<p>
+<li>Last, but not least, <code>TempReg</code>.  The field contains the
+    number of one of an infinite set of virtual (integer)
+    registers. <code>TempReg</code>s are used everywhere throughout
+    the translation process; you can have as many as you want.  The
+    register allocator maps as many as it can into
+    <code>RealReg</code>s and turns the rest into
+    <code>SpillNo</code>s, so <code>TempReg</code>s should not exist
+    after the register allocation phase.
+    <p>
+    <code>TempReg</code>s are always 32 bits long, even if the data
+    they hold is logically shorter.  In that case the upper unused
+    bits are required, and, I think, generally assumed, to be zero.  
+    <code>TempReg</code>s holding V bits for quantities shorter than 
+    32 bits are expected to have ones in the unused places, since a
+    one denotes "undefined".
+</ul>
+
+
+<h3>UCode instructions: type <code>UInstr</code></h3>
+
+<p>
+UCode was carefully designed to make it possible to do register
+allocation on UCode and then translate the result into x86 code
+without needing any extra registers ... well, that was the original
+plan, anyway.  Things have gotten a little more complicated since
+then.  In what follows, UCode instructions are referred to as uinstrs,
+to distinguish them from x86 instructions.  Uinstrs of course have
+uopcodes which are (naturally) different from x86 opcodes.
+
+<p>
+A uinstr (type <code>UInstr</code>) contains
+various fields, not all of which are used by any one uopcode:
+<ul>
+<li>Three 16-bit operand fields, <code>val1</code>, <code>val2</code>
+    and <code>val3</code>.
+<p>
+<li>Three tag fields, <code>tag1</code>, <code>tag2</code>
+    and <code>tag3</code>.  Each of these has a value of type
+    <code>Tag</code>,
+    and they describe what the <code>val1</code>, <code>val2</code>
+    and <code>val3</code> fields contain.
+<p>
+<li>A 32-bit literal field.
+<p>
+<li>Two <code>FlagSet</code>s, specifying which x86 condition codes are
+    read and written by the uinstr.
+<p>
+<li>An opcode byte, containing a value of type <code>Opcode</code>.
+<p>
+<li>A size field, indicating the data transfer size (1/2/4/8/10) in
+    cases where this makes sense, or zero otherwise.
+<p>
+<li>A condition-code field, which, for jumps, holds a
+    value of type <code>Condcode</code>, indicating the condition
+    which applies.  The encoding is as it is in the x86 insn stream,
+    except we add a 17th value <code>CondAlways</code> to indicate
+    an unconditional transfer.
+<p>
+<li>Various 1-bit flags, indicating whether this insn pertains to an
+    x86 CALL or RET instruction, whether a widening is signed or not,
+    etc.
+</ul>
+
+<p>
+UOpcodes (type <code>Opcode</code>) are divided into two groups: those
+necessary merely to express the functionality of the x86 code, and
+extra uopcodes needed to express the instrumentation.  The former
+group contains:
+<ul>
+<li><code>GET</code> and <code>PUT</code>, which move values from the
+    simulated CPU's integer registers (<code>ArchReg</code>s) into
+    <code>TempReg</code>s, and back.  <code>GETF</code> and
+    <code>PUTF</code> do the corresponding thing for the simulated
+    <code>%EFLAGS</code>.  There are no corresponding insns for the
+    FPU register stack, since we don't explicitly simulate its
+    registers.
+<p>
+<li><code>LOAD</code> and <code>STORE</code>, which, in RISC-like
+    fashion, are the only uinstrs able to interact with memory.
+<p>
+<li><code>MOV</code> and <code>CMOV</code> allow unconditional and
+    conditional moves of values between <code>TempReg</code>s.
+<p>
+<li>ALU operations.  Again in RISC-like fashion, these only operate on
+    <code>TempReg</code>s (before reg-alloc) or <code>RealReg</code>s
+    (after reg-alloc).  These are: <code>ADD</code>, <code>ADC</code>,
+    <code>AND</code>, <code>OR</code>, <code>XOR</code>,
+    <code>SUB</code>, <code>SBB</code>, <code>SHL</code>,
+    <code>SHR</code>, <code>SAR</code>, <code>ROL</code>,
+    <code>ROR</code>, <code>RCL</code>, <code>RCR</code>,
+    <code>NOT</code>, <code>NEG</code>, <code>INC</code>,
+    <code>DEC</code>, <code>BSWAP</code>, <code>CC2VAL</code> and
+    <code>WIDEN</code>.  <code>WIDEN</code> does signed or unsigned
+    value widening.  <code>CC2VAL</code> is used to convert condition
+    codes into a value, zero or one.  The rest are obvious.
+    <p>
+    To allow for more efficient code generation, we bend slightly the
+    restriction at the start of the previous para: for
+    <code>ADD</code>, <code>ADC</code>, <code>XOR</code>,
+    <code>SUB</code> and <code>SBB</code>, we allow the first (source)
+    operand to also be an <code>ArchReg</code>, that is, one of the
+    simulated machine's registers.  Also, many of these ALU ops allow
+    the source operand to be a literal.  See
+    <code>VG_(saneUInstr)</code> for the final word on the allowable
+    forms of uinstrs.
+<p>
+<li><code>LEA1</code> and <code>LEA2</code> are not strictly
+    necessary, but allow faciliate better translations.  They
+    record the fancy x86 addressing modes in a direct way, which
+    allows those amodes to be emitted back into the final
+    instruction stream more or less verbatim.
+<p>
+<li><code>CALLM</code> calls a machine-code helper, one of the methods
+    whose address is stored at some <code>VG_(baseBlock)</code>
+    offset.  <code>PUSH</code> and <code>POP</code> move values
+    to/from <code>TempReg</code> to the real (Valgrind's) stack, and
+    <code>CLEAR</code> removes values from the stack.
+    <code>CALLM_S</code> and <code>CALLM_E</code> delimit the
+    boundaries of call setups and clearings, for the benefit of the
+    instrumentation passes.  Getting this right is critical, and so
+    <code>VG_(saneUCodeBlock)</code> makes various checks on the use
+    of these uopcodes.
+    <p>
+    It is important to understand that these uopcodes have nothing to
+    do with the x86 <code>call</code>, <code>return,</code>
+    <code>push</code> or <code>pop</code> instructions, and are not
+    used to implement them.  Those guys turn into combinations of
+    <code>GET</code>, <code>PUT</code>, <code>LOAD</code>,
+    <code>STORE</code>, <code>ADD</code>, <code>SUB</code>, and
+    <code>JMP</code>.  What these uopcodes support is calling of
+    helper functions such as <code>VG_(helper_imul_32_64)</code>,
+    which do stuff which is too difficult or tedious to emit inline.
+<p>
+<li><code>FPU</code>, <code>FPU_R</code> and <code>FPU_W</code>.
+    Valgrind doesn't attempt to simulate the internal state of the
+    FPU at all.  Consequently it only needs to be able to distinguish
+    FPU ops which read and write memory from those that don't, and
+    for those which do, it needs to know the effective address and
+    data transfer size.  This is made easier because the x86 FP
+    instruction encoding is very regular, basically consisting of
+    16 bits for a non-memory FPU insn and 11 (IIRC) bits + an address mode
+    for a memory FPU insn.  So our <code>FPU</code> uinstr carries
+    the 16 bits in its <code>val1</code> field.  And
+    <code>FPU_R</code> and <code>FPU_W</code> carry 11 bits in that
+    field, together with the identity of a <code>TempReg</code> or
+    (later) <code>RealReg</code> which contains the address.
+<p>
+<li><code>JIFZ</code> is unique, in that it allows a control-flow
+    transfer which is not deemed to end a basic block.  It causes a
+    jump to a literal (original) address if the specified argument
+    is zero.
+<p>
+<li>Finally, <code>INCEIP</code> advances the simulated
+    <code>%EIP</code> by the specified literal amount.  This supports
+    lazy <code>%EIP</code> updating, as described below.
+</ul>
+
+<p>
+Stages 1 and 2 of the 6-stage translation process mentioned above
+deal purely with these uopcodes, and no others.  They are
+sufficient to express pretty much all the x86 32-bit protected-mode 
+instruction set, at
+least everything understood by a pre-MMX original Pentium (P54C). 
+
+<p>
+Stages 3, 4, 5 and 6 also deal with the following extra
+"instrumentation" uopcodes.  They are used to express all the
+definedness-tracking and -checking machinery which valgrind does.  In
+later sections we show how to create checking code for each of the
+uopcodes above.  Note that these instrumentation uopcodes, although
+some appearing complicated, have been carefully chosen so that
+efficient x86 code can be generated for them.  GNU superopt v2.5 did a
+great job helping out here.  Anyways, the uopcodes are as follows:
+
+<ul>
+<li><code>GETV</code> and <code>PUTV</code> are analogues to
+    <code>GET</code> and <code>PUT</code> above.  They are identical
+    except that they move the V bits for the specified values back and
+    forth to <code>TempRegs</code>, rather than moving the values
+    themselves.
+<p>
+<li>Similarly, <code>LOADV</code> and <code>STOREV</code> read and
+    write V bits from the synthesised shadow memory that Valgrind
+    maintains.  In fact they do more than that, since they also do
+    address-validity checks, and emit complaints if the read/written
+    addresses are unaddressible.
+<p>
+<li><code>TESTV</code>, whose parameters are a <code>TempReg</code>
+    and a size, tests the V bits in the <code>TempReg</code>, at the
+    specified operation size (0/1/2/4 byte) and emits an error if any
+    of them indicate undefinedness.  This is the only uopcode capable
+    of doing such tests.
+<p>
+<li><code>SETV</code>, whose parameters are also <code>TempReg</code>
+    and a size, makes the V bits in the <code>TempReg</code> indicated
+    definedness, at the specified operation size.  This is usually
+    used to generate the correct V bits for a literal value, which is
+    of course fully defined.
+<p>
+<li><code>GETVF</code> and <code>PUTVF</code> are analogues to
+    <code>GETF</code> and <code>PUTF</code>.  They move the single V
+    bit used to model definedness of <code>%EFLAGS</code> between its
+    home in <code>VG_(baseBlock)</code> and the specified
+    <code>TempReg</code>.
+<p>
+<li><code>TAG1</code> denotes one of a family of unary operations on
+    <code>TempReg</code>s containing V bits.  Similarly,
+    <code>TAG2</code> denotes one in a family of binary operations on
+    V bits.
+</ul>
+
+<p>
+These 10 uopcodes are sufficient to express Valgrind's entire
+definedness-checking semantics.  In fact most of the interesting magic
+is done by the <code>TAG1</code> and <code>TAG2</code>
+suboperations.
+
+<p>
+First, however, I need to explain about V-vector operation sizes.
+There are 4 sizes: 1, 2 and 4, which operate on groups of 8, 16 and 32
+V bits at a time, supporting the usual 1, 2 and 4 byte x86 operations.
+However there is also the mysterious size 0, which really means a
+single V bit.  Single V bits are used in various circumstances; in
+particular, the definedness of <code>%EFLAGS</code> is modelled with a
+single V bit.  Now might be a good time to also point out that for
+V bits, 1 means "undefined" and 0 means "defined".  Similarly, for A
+bits, 1 means "invalid address" and 0 means "valid address".  This
+seems counterintuitive (and so it is), but testing against zero on
+x86s saves instructions compared to testing against all 1s, because
+many ALU operations set the Z flag for free, so to speak.
+
+<p>
+With that in mind, the tag ops are:
+
+<ul>
+<li><b>(UNARY) Pessimising casts</b>: <code>VgT_PCast40</code>,
+    <code>VgT_PCast20</code>, <code>VgT_PCast10</code>,
+    <code>VgT_PCast01</code>, <code>VgT_PCast02</code> and
+    <code>VgT_PCast04</code>.  A "pessimising cast" takes a V-bit
+    vector at one size, and creates a new one at another size,
+    pessimised in the sense that if any of the bits in the source
+    vector indicate undefinedness, then all the bits in the result
+    indicate undefinedness.  In this case the casts are all to or from
+    a single V bit, so for example <code>VgT_PCast40</code> is a
+    pessimising cast from 32 bits to 1, whereas
+    <code>VgT_PCast04</code> simply copies the single source V bit
+    into all 32 bit positions in the result.  Surprisingly, these ops
+    can all be implemented very efficiently.
+    <p>
+    There are also the pessimising casts <code>VgT_PCast14</code>,
+    from 8 bits to 32, <code>VgT_PCast12</code>, from 8 bits to 16,
+    and <code>VgT_PCast11</code>, from 8 bits to 8.  This last one
+    seems nonsensical, but in fact it isn't a no-op because, as
+    mentioned above, any undefined (1) bits in the source infect the
+    entire result.
+<p>
+<li><b>(UNARY) Propagating undefinedness upwards in a word</b>:
+    <code>VgT_Left4</code>, <code>VgT_Left2</code> and
+    <code>VgT_Left1</code>.  These are used to simulate the worst-case
+    effects of carry propagation in adds and subtracts.  They return a
+    V vector identical to the original, except that if the original
+    contained any undefined bits, then it and all bits above it are
+    marked as undefined too.  Hence the Left bit in the names.
+<p>
+<li><b>(UNARY) Signed and unsigned value widening</b>:
+     <code>VgT_SWiden14</code>, <code>VgT_SWiden24</code>,
+     <code>VgT_SWiden12</code>, <code>VgT_ZWiden14</code>,
+     <code>VgT_ZWiden24</code> and <code>VgT_ZWiden12</code>.  These
+     mimic the definedness effects of standard signed and unsigned
+     integer widening.  Unsigned widening creates zero bits in the new
+     positions, so <code>VgT_ZWiden*</code> accordingly park mark
+     those parts of their argument as defined.  Signed widening copies
+     the sign bit into the new positions, so <code>VgT_SWiden*</code>
+     copies the definedness of the sign bit into the new positions.
+     Because 1 means undefined and 0 means defined, these operations
+     can (fascinatingly) be done by the same operations which they
+     mimic.  Go figure.
+<p>
+<li><b>(BINARY) Undefined-if-either-Undefined,
+     Defined-if-either-Defined</b>: <code>VgT_UifU4</code>,
+     <code>VgT_UifU2</code>, <code>VgT_UifU1</code>,
+     <code>VgT_UifU0</code>, <code>VgT_DifD4</code>,
+     <code>VgT_DifD2</code>, <code>VgT_DifD1</code>.  These do simple
+     bitwise operations on pairs of V-bit vectors, with
+     <code>UifU</code> giving undefined if either arg bit is
+     undefined, and <code>DifD</code> giving defined if either arg bit
+     is defined.  Abstract interpretation junkies, if any make it this
+     far, may like to think of them as meets and joins (or is it joins
+     and meets) in the definedness lattices.  
+<p>
+<li><b>(BINARY; one value, one V bits) Generate argument improvement
+    terms for AND and OR</b>: <code>VgT_ImproveAND4_TQ</code>,
+    <code>VgT_ImproveAND2_TQ</code>, <code>VgT_ImproveAND1_TQ</code>,
+    <code>VgT_ImproveOR4_TQ</code>, <code>VgT_ImproveOR2_TQ</code>,
+    <code>VgT_ImproveOR1_TQ</code>.  These help out with AND and OR
+    operations.  AND and OR have the inconvenient property that the
+    definedness of the result depends on the actual values of the
+    arguments as well as their definedness.  At the bit level:
+    <br><code>1 AND undefined = undefined</code>, but 
+    <br><code>0 AND undefined = 0</code>, and similarly 
+    <br><code>0 OR  undefined = undefined</code>, but 
+    <br><code>1 OR  undefined = 1</code>.
+    <br>
+    <p>
+    It turns out that gcc (quite legitimately) generates code which
+    relies on this fact, so we have to model it properly in order to
+    avoid flooding users with spurious value errors.  The ultimate
+    definedness result of AND and OR is calculated using
+    <code>UifU</code> on the definedness of the arguments, but we
+    also <code>DifD</code> in some "improvement" terms which 
+    take into account the above phenomena.  
+    <p>
+    <code>ImproveAND</code> takes as its first argument the actual
+    value of an argument to AND (the T) and the definedness of that
+    argument (the Q), and returns a V-bit vector which is defined (0)
+    for bits which have value 0 and are defined; this, when
+    <code>DifD</code> into the final result causes those bits to be
+    defined even if the corresponding bit in the other argument is undefined.
+    <p>
+    The <code>ImproveOR</code> ops do the dual thing for OR
+    arguments.  Note that XOR does not have this property that one
+    argument can make the other irrelevant, so there is no need for
+    such complexity for XOR.
+</ul>
+
+<p>
+That's all the tag ops.  If you stare at this long enough, and then
+run Valgrind and stare at the pre- and post-instrumented ucode, it
+should be fairly obvious how the instrumentation machinery hangs
+together.
+
+<p>
+One point, if you do this: in order to make it easy to differentiate
+<code>TempReg</code>s carrying values from <code>TempReg</code>s
+carrying V bit vectors, Valgrind prints the former as (for example)
+<code>t28</code> and the latter as <code>q28</code>; the fact that
+they carry the same number serves to indicate their relationship.
+This is purely for the convenience of the human reader; the register
+allocator and code generator don't regard them as different.
+
+
+<h3>Translation into UCode</h3>
+
+<code>VG_(disBB)</code> allocates a new <code>UCodeBlock</code> and
+then uses <code>disInstr</code> to translate x86 instructions one at a
+time into UCode, dumping the result in the <code>UCodeBlock</code>.
+This goes on until a control-flow transfer instruction is encountered.
+
+<p>
+Despite the large size of <code>vg_to_ucode.c</code>, this translation
+is really very simple.  Each x86 instruction is translated entirely
+independently of its neighbours, merrily allocating new
+<code>TempReg</code>s as it goes.  The idea is to have a simple
+translator -- in reality, no more than a macro-expander -- and the --
+resulting bad UCode translation is cleaned up by the UCode
+optimisation phase which follows.  To give you an idea of some x86
+instructions and their translations (this is a complete basic block,
+as Valgrind sees it):
+<pre>
+        0x40435A50:  incl %edx
+
+           0: GETL      %EDX, t0
+           1: INCL      t0  (-wOSZAP)
+           2: PUTL      t0, %EDX
+
+        0x40435A51:  movsbl (%edx),%eax
+
+           3: GETL      %EDX, t2
+           4: LDB       (t2), t2
+           5: WIDENL_Bs t2
+           6: PUTL      t2, %EAX
+
+        0x40435A54:  testb $0x20, 1(%ecx,%eax,2)
+
+           7: GETL      %EAX, t6
+           8: GETL      %ECX, t8
+           9: LEA2L     1(t8,t6,2), t4
+          10: LDB       (t4), t10
+          11: MOVB      $0x20, t12
+          12: ANDB      t12, t10  (-wOSZACP)
+          13: INCEIPo   $9
+
+        0x40435A59:  jnz-8 0x40435A50
+
+          14: Jnzo      $0x40435A50  (-rOSZACP)
+          15: JMPo      $0x40435A5B
+</pre>
+
+<p>
+Notice how the block always ends with an unconditional jump to the
+next block.  This is a bit unnecessary, but makes many things simpler.
+
+<p>
+Most x86 instructions turn into sequences of <code>GET</code>,
+</code>PUT</code>, <code>LEA1</code>, <code>LEA2</code>,
+<code>LOAD</code> and <code>STORE</code>.  Some complicated ones
+however rely on calling helper bits of code in 
+<code>vg_helpers.S</code>.  The ucode instructions <code>PUSH</code>,
+<code>POP</code>, <code>CALL</code>, <code>CALLM_S</code> and
+<code>CALLM_E</code> support this.  The calling convention is somewhat
+ad-hoc and is not the C calling convention.  The helper routines must 
+save all integer registers, and the flags, that they use.  Args are
+passed on the stack underneath the return address, as usual, and if 
+result(s) are to be returned, it (they) are either placed in dummy arg
+slots created by the ucode <code>PUSH</code> sequence, or just
+overwrite the incoming args.
+
+<p>
+In order that the instrumentation mechanism can handle calls to these
+helpers, <code>VG_(saneUCodeBlock)</code> enforces the following
+restrictions on calls to helpers:
+
+<ul>
+<li>Each <code>CALL</code> uinstr must be bracketed by a preceding
+    <code>CALLM_S</code> marker (dummy uinstr) and a trailing
+    <code>CALLM_E</code> marker.  These markers are used by the
+    instrumentation mechanism later to establish the boundaries of the
+    <code>PUSH</code>, <code>POP</code> and <code>CLEAR</code>
+    sequences for the call.
+<p>
+<li><code>PUSH</code>, <code>POP</code> and <code>CLEAR</code>
+    may only appear inside sections bracketed by <code>CALLM_S</code>
+    and <code>CALLM_E</code>, and nowhere else.
+<p>
+<li>In any such bracketed section, no two <code>PUSH</code> insns may
+    push the same <code>TempReg</code>.  Dually, no two two
+    <code>POP</code>s may pop the same <code>TempReg</code>.
+<p>
+<li>Finally, although this is not checked, args should be removed from
+    the stack with <code>CLEAR</code>, rather than <code>POP</code>s
+    into a <code>TempReg</code> which is not subsequently used.  This
+    is because the instrumentation mechanism assumes that all values
+    <code>POP</code>ped from the stack are actually used.
+</ul>
+
+Some of the translations may appear to have redundant
+<code>TempReg</code>-to-<code>TempReg</code> moves.  This helps the
+next phase, UCode optimisation, to generate better code.
+
+
+
+<h3>UCode optimisation</h3>
+
+UCode is then subjected to an improvement pass
+(<code>vg_improve()</code>), which blurs the boundaries between the
+translations of the original x86 instructions.  It's pretty
+straightforward.  Three transformations are done:
+
+<ul>
+<li>Redundant <code>GET</code> elimination.  Actually, more general
+    than that -- eliminates redundant fetches of ArchRegs.  In our
+    running example, uinstr 3 <code>GET</code>s <code>%EDX</code> into
+    <code>t2</code> despite the fact that, by looking at the previous
+    uinstr, it is already in <code>t0</code>.  The <code>GET</code> is
+    therefore removed, and <code>t2</code> renamed to <code>t0</code>.
+    Assuming <code>t0</code> is allocated to a host register, it means
+    the simulated <code>%EDX</code> will exist in a host CPU register
+    for more than one simulated x86 instruction, which seems to me to
+    be a highly desirable property.
+    <p>
+    There is some mucking around to do with subregisters;
+    <code>%AL</code> vs <code>%AH</code> <code>%AX</code> vs
+    <code>%EAX</code> etc.  I can't remember how it works, but in
+    general we are very conservative, and these tend to invalidate the
+    caching. 
+<p>
+<li>Redundant <code>PUT</code> elimination.  This annuls
+    <code>PUT</code>s of values back to simulated CPU registers if a
+    later <code>PUT</code> would overwrite the earlier
+    <code>PUT</code> value, and there is no intervening reads of the
+    simulated register (<code>ArchReg</code>).
+    <p>
+    As before, we are paranoid when faced with subregister references.
+    Also, <code>PUT</code>s of <code>%ESP</code> are never annulled,
+    because it is vital the instrumenter always has an up-to-date
+    <code>%ESP</code> value available, <code>%ESP</code> changes
+    affect addressibility of the memory around the simulated stack
+    pointer.
+    <p>
+    The implication of the above paragraph is that the simulated
+    machine's registers are only lazily updated once the above two
+    optimisation phases have run, with the exception of
+    <code>%ESP</code>.  <code>TempReg</code>s go dead at the end of
+    every basic block, from which is is inferrable that any
+    <code>TempReg</code> caching a simulated CPU reg is flushed (back
+    into the relevant <code>VG_(baseBlock)</code> slot) at the end of
+    every basic block.  The further implication is that the simulated
+    registers are only up-to-date at in between basic blocks, and not
+    at arbitrary points inside basic blocks.  And the consequence of
+    that is that we can only deliver signals to the client in between
+    basic blocks.  None of this seems any problem in practice.
+<p>
+<li>Finally there is a simple def-use thing for condition codes.  If
+    an earlier uinstr writes the condition codes, and the next uinsn
+    along which actually cares about the condition codes writes the
+    same or larger set of them, but does not read any, the earlier
+    uinsn is marked as not writing any condition codes.  This saves 
+    a lot of redundant cond-code saving and restoring.
+</ul>
+
+The effect of these transformations on our short block is rather
+unexciting, and shown below.  On longer basic blocks they can
+dramatically improve code quality.
+
+<pre>
+at 3: delete GET, rename t2 to t0 in (4 .. 6)
+at 7: delete GET, rename t6 to t0 in (8 .. 9)
+at 1: annul flag write OSZAP due to later OSZACP
+
+Improved code:
+           0: GETL      %EDX, t0
+           1: INCL      t0
+           2: PUTL      t0, %EDX
+           4: LDB       (t0), t0
+           5: WIDENL_Bs t0
+           6: PUTL      t0, %EAX
+           8: GETL      %ECX, t8
+           9: LEA2L     1(t8,t0,2), t4
+          10: LDB       (t4), t10
+          11: MOVB      $0x20, t12
+          12: ANDB      t12, t10  (-wOSZACP)
+          13: INCEIPo   $9
+          14: Jnzo      $0x40435A50  (-rOSZACP)
+          15: JMPo      $0x40435A5B
+</pre>
+
+<h3>UCode instrumentation</h3>
+
+Once you understand the meaning of the instrumentation uinstrs,
+discussed in detail above, the instrumentation scheme is fairly
+straighforward.  Each uinstr is instrumented in isolation, and the
+instrumentation uinstrs are placed before the original uinstr.
+Our running example continues below.  I have placed a blank line 
+after every original ucode, to make it easier to see which
+instrumentation uinstrs correspond to which originals.
+
+<p>
+As mentioned somewhere above, <code>TempReg</code>s carrying values 
+have names like <code>t28</code>, and each one has a shadow carrying
+its V bits, with names like <code>q28</code>.  This pairing aids in
+reading instrumented ucode.
+
+<p>
+One decision about all this is where to have "observation points",
+that is, where to check that V bits are valid.  I use a minimalistic
+scheme, only checking where a failure of validity could cause the 
+original program to (seg)fault.  So the use of values as memory
+addresses causes a check, as do conditional jumps (these cause a check
+on the definedness of the condition codes).  And arguments
+<code>PUSH</code>ed for helper calls are checked, hence the wierd
+restrictions on help call preambles described above.
+
+<p>
+Another decision is that once a value is tested, it is thereafter
+regarded as defined, so that we do not emit multiple undefined-value
+errors for the same undefined value.  That means that
+<code>TESTV</code> uinstrs are always followed by <code>SETV</code> 
+on the same (shadow) <code>TempReg</code>s.  Most of these
+<code>SETV</code>s are redundant and are removed by the
+post-instrumentation cleanup phase.
+
+<p>
+The instrumentation for calling helper functions deserves further
+comment.  The definedness of results from a helper is modelled using
+just one V bit.  So, in short, we do pessimising casts of the
+definedness of all the args, down to a single bit, and then
+<code>UifU</code> these bits together.  So this single V bit will say
+"undefined" if any part of any arg is undefined.  This V bit is then
+pessimally cast back up to the result(s) sizes, as needed.  If, by
+seeing that all the args are got rid of with <code>CLEAR</code> and
+none with <code>POP</code>, Valgrind sees that the result of the call
+is not actually used, it immediately examines the result V bit with a
+<code>TESTV</code> -- <code>SETV</code> pair.  If it did not do this,
+there would be no observation point to detect that the some of the
+args to the helper were undefined.  Of course, if the helper's results
+are indeed used, we don't do this, since the result usage will
+presumably cause the result definedness to be checked at some suitable
+future point.
+
+<p>
+In general Valgrind tries to track definedness on a bit-for-bit basis,
+but as the above para shows, for calls to helpers we throw in the
+towel and approximate down to a single bit.  This is because it's too
+complex and difficult to track bit-level definedness through complex
+ops such as integer multiply and divide, and in any case there is no
+reasonable code fragments which attempt to (eg) multiply two
+partially-defined values and end up with something meaningful, so
+there seems little point in modelling multiplies, divides, etc, in
+that level of detail.
+
+<p>
+Integer loads and stores are instrumented with firstly a test of the
+definedness of the address, followed by a <code>LOADV</code> or
+<code>STOREV</code> respectively.  These turn into calls to 
+(for example) <code>VG_(helperc_LOADV4)</code>.  These helpers do two
+things: they perform an address-valid check, and they load or store V
+bits from/to the relevant address in the (simulated V-bit) memory.
+
+<p>
+FPU loads and stores are different.  As above the definedness of the
+address is first tested.  However, the helper routine for FPU loads
+(<code>VGM_(fpu_read_check)</code>) emits an error if either the
+address is invalid or the referenced area contains undefined values.
+It has to do this because we do not simulate the FPU at all, and so
+cannot track definedness of values loaded into it from memory, so we
+have to check them as soon as they are loaded into the FPU, ie, at
+this point.  We notionally assume that everything in the FPU is
+defined.
+
+<p>
+It follows therefore that FPU writes first check the definedness of
+the address, then the validity of the address, and finally mark the
+written bytes as well-defined.
+
+<p>
+If anyone is inspired to extend Valgrind to MMX/SSE insns, I suggest
+you use the same trick.  It works provided that the FPU/MMX unit is
+not used to merely as a conduit to copy partially undefined data from
+one place in memory to another.  Unfortunately the integer CPU is used
+like that (when copying C structs with holes, for example) and this is
+the cause of much of the elaborateness of the instrumentation here
+described.
+
+<p>
+<code>vg_instrument()</code> in <code>vg_translate.c</code> actually
+does the instrumentation.  There are comments explaining how each
+uinstr is handled, so we do not repeat that here.  As explained
+already, it is bit-accurate, except for calls to helper functions.
+Unfortunately the x86 insns <code>bt/bts/btc/btr</code> are done by
+helper fns, so bit-level accuracy is lost there.  This should be fixed
+by doing them inline; it will probably require adding a couple new
+uinstrs.  Also, left and right rotates through the carry flag (x86
+<code>rcl</code> and <code>rcr</code>) are approximated via a single
+V bit; so far this has not caused anyone to complain.  The
+non-carry rotates, <code>rol</code> and <code>ror</code>, are much
+more common and are done exactly.  Re-visiting the instrumentation for
+AND and OR, they seem rather verbose, and I wonder if it could be done
+more concisely now.
+
+<p>
+The lowercase <code>o</code> on many of the uopcodes in the running
+example indicates that the size field is zero, usually meaning a
+single-bit operation.
+
+<p>
+Anyroads, the post-instrumented version of our running example looks
+like this:
+
+<pre>
+Instrumented code:
+           0: GETVL     %EDX, q0
+           1: GETL      %EDX, t0
+
+           2: TAG1o     q0 = Left4 ( q0 )
+           3: INCL      t0
+
+           4: PUTVL     q0, %EDX
+           5: PUTL      t0, %EDX
+
+           6: TESTVL    q0
+           7: SETVL     q0
+           8: LOADVB    (t0), q0
+           9: LDB       (t0), t0
+
+          10: TAG1o     q0 = SWiden14 ( q0 )
+          11: WIDENL_Bs t0
+
+          12: PUTVL     q0, %EAX
+          13: PUTL      t0, %EAX
+
+          14: GETVL     %ECX, q8
+          15: GETL      %ECX, t8
+
+          16: MOVL      q0, q4
+          17: SHLL      $0x1, q4
+          18: TAG2o     q4 = UifU4 ( q8, q4 )
+          19: TAG1o     q4 = Left4 ( q4 )
+          20: LEA2L     1(t8,t0,2), t4
+
+          21: TESTVL    q4
+          22: SETVL     q4
+          23: LOADVB    (t4), q10
+          24: LDB       (t4), t10
+
+          25: SETVB     q12
+          26: MOVB      $0x20, t12
+
+          27: MOVL      q10, q14
+          28: TAG2o     q14 = ImproveAND1_TQ ( t10, q14 )
+          29: TAG2o     q10 = UifU1 ( q12, q10 )
+          30: TAG2o     q10 = DifD1 ( q14, q10 )
+          31: MOVL      q12, q14
+          32: TAG2o     q14 = ImproveAND1_TQ ( t12, q14 )
+          33: TAG2o     q10 = DifD1 ( q14, q10 )
+          34: MOVL      q10, q16
+          35: TAG1o     q16 = PCast10 ( q16 )
+          36: PUTVFo    q16
+          37: ANDB      t12, t10  (-wOSZACP)
+
+          38: INCEIPo   $9
+
+          39: GETVFo    q18
+          40: TESTVo    q18
+          41: SETVo     q18
+          42: Jnzo      $0x40435A50  (-rOSZACP)
+
+          43: JMPo      $0x40435A5B
+</pre>
+
+
+<h3>UCode post-instrumentation cleanup</h3>
+
+<p>
+This pass, coordinated by <code>vg_cleanup()</code>, removes redundant
+definedness computation created by the simplistic instrumentation
+pass.  It consists of two passes,
+<code>vg_propagate_definedness()</code> followed by
+<code>vg_delete_redundant_SETVs</code>.
+
+<p>
+<code>vg_propagate_definedness()</code> is a simple
+constant-propagation and constant-folding pass.  It tries to determine
+which <code>TempReg</code>s containing V bits will always indicate
+"fully defined", and it propagates this information as far as it can,
+and folds out as many operations as possible.  For example, the
+instrumentation for an ADD of a literal to a variable quantity will be
+reduced down so that the definedness of the result is simply the
+definedness of the variable quantity, since the literal is by
+definition fully defined.
+
+<p>
+<code>vg_delete_redundant_SETVs</code> removes <code>SETV</code>s on
+shadow <code>TempReg</code>s for which the next action is a write.
+I don't think there's anything else worth saying about this; it is
+simple.  Read the sources for details.
+
+<p>
+So the cleaned-up running example looks like this.  As above, I have
+inserted line breaks after every original (non-instrumentation) uinstr
+to aid readability.  As with straightforward ucode optimisation, the
+results in this block are undramatic because it is so short; longer
+blocks benefit more because they have more redundancy which gets
+eliminated.
+
+
+<pre>
+at 29: delete UifU1 due to defd arg1
+at 32: change ImproveAND1_TQ to MOV due to defd arg2
+at 41: delete SETV
+at 31: delete MOV
+at 25: delete SETV
+at 22: delete SETV
+at 7: delete SETV
+
+           0: GETVL     %EDX, q0
+           1: GETL      %EDX, t0
+
+           2: TAG1o     q0 = Left4 ( q0 )
+           3: INCL      t0
+
+           4: PUTVL     q0, %EDX
+           5: PUTL      t0, %EDX
+
+           6: TESTVL    q0
+           8: LOADVB    (t0), q0
+           9: LDB       (t0), t0
+
+          10: TAG1o     q0 = SWiden14 ( q0 )
+          11: WIDENL_Bs t0
+
+          12: PUTVL     q0, %EAX
+          13: PUTL      t0, %EAX
+
+          14: GETVL     %ECX, q8
+          15: GETL      %ECX, t8
+
+          16: MOVL      q0, q4
+          17: SHLL      $0x1, q4
+          18: TAG2o     q4 = UifU4 ( q8, q4 )
+          19: TAG1o     q4 = Left4 ( q4 )
+          20: LEA2L     1(t8,t0,2), t4
+
+          21: TESTVL    q4
+          23: LOADVB    (t4), q10
+          24: LDB       (t4), t10
+
+          26: MOVB      $0x20, t12
+
+          27: MOVL      q10, q14
+          28: TAG2o     q14 = ImproveAND1_TQ ( t10, q14 )
+          30: TAG2o     q10 = DifD1 ( q14, q10 )
+          32: MOVL      t12, q14
+          33: TAG2o     q10 = DifD1 ( q14, q10 )
+          34: MOVL      q10, q16
+          35: TAG1o     q16 = PCast10 ( q16 )
+          36: PUTVFo    q16
+          37: ANDB      t12, t10  (-wOSZACP)
+
+          38: INCEIPo   $9
+          39: GETVFo    q18
+          40: TESTVo    q18
+          42: Jnzo      $0x40435A50  (-rOSZACP)
+
+          43: JMPo      $0x40435A5B
+</pre>
+
+
+<h3>Translation from UCode</h3>
+
+This is all very simple, even though <code>vg_from_ucode.c</code>
+is a big file.  Position-independent x86 code is generated into 
+a dynamically allocated array <code>emitted_code</code>; this is
+doubled in size when it overflows.  Eventually the array is handed
+back to the caller of <code>VG_(translate)</code>, who must copy
+the result into TC and TT, and free the array.
+
+<p>
+This file is structured into four layers of abstraction, which,
+thankfully, are glued back together with extensive
+<code>__inline__</code> directives.  From the bottom upwards:
+
+<ul>
+<li>Address-mode emitters, <code>emit_amode_regmem_reg</code> et al.
+<p>
+<li>Emitters for specific x86 instructions.  There are quite a lot of
+    these, with names such as <code>emit_movv_offregmem_reg</code>.
+    The <code>v</code> suffix is Intel parlance for a 16/32 bit insn;
+    there are also <code>b</code> suffixes for 8 bit insns.
+<p>
+<li>The next level up are the <code>synth_*</code> functions, which
+    synthesise possibly a sequence of raw x86 instructions to do some
+    simple task.  Some of these are quite complex because they have to
+    work around Intel's silly restrictions on subregister naming.  See 
+    <code>synth_nonshiftop_reg_reg</code> for example.
+<p>
+<li>Finally, at the top of the heap, we have
+    <code>emitUInstr()</code>,
+    which emits code for a single uinstr.
+</ul>
+
+<p>
+Some comments:
+<ul>
+<li>The hack for FPU instructions becomes apparent here.  To do a
+    <code>FPU</code> ucode instruction, we load the simulated FPU's
+    state into from its <code>VG_(baseBlock)</code> into the real FPU
+    using an x86 <code>frstor</code> insn, do the ucode
+    <code>FPU</code> insn on the real CPU, and write the updated FPU
+    state back into <code>VG_(baseBlock)</code> using an
+    <code>fnsave</code> instruction.  This is pretty brutal, but is
+    simple and it works, and even seems tolerably efficient.  There is
+    no attempt to cache the simulated FPU state in the real FPU over
+    multiple back-to-back ucode FPU instructions.
+    <p>
+    <code>FPU_R</code> and <code>FPU_W</code> are also done this way,
+    with the minor complication that we need to patch in some
+    addressing mode bits so the resulting insn knows the effective
+    address to use.  This is easy because of the regularity of the x86
+    FPU instruction encodings.
+<p>
+<li>An analogous trick is done with ucode insns which claim, in their
+    <code>flags_r</code> and <code>flags_w</code> fields, that they
+    read or write the simulated <code>%EFLAGS</code>.  For such cases
+    we first copy the simulated <code>%EFLAGS</code> into the real
+    <code>%eflags</code>, then do the insn, then, if the insn says it
+    writes the flags, copy back to <code>%EFLAGS</code>.  This is a
+    bit expensive, which is why the ucode optimisation pass goes to
+    some effort to remove redundant flag-update annotations.
+</ul>
+
+<p>
+And so ... that's the end of the documentation for the instrumentating
+translator!  It's really not that complex, because it's composed as a
+sequence of simple(ish) self-contained transformations on
+straight-line blocks of code.
+
+
+<h3>Top-level dispatch loop</h3>
+
+Urk.  In <code>VG_(toploop)</code>.  This is basically boring and
+unsurprising, not to mention fiddly and fragile.  It needs to be
+cleaned up.  
+
+<p>
+The only perhaps surprise is that the whole thing is run
+on top of a <code>setjmp</code>-installed exception handler, because,
+supposing a translation got a segfault, we have to bail out of the
+Valgrind-supplied exception handler <code>VG_(oursignalhandler)</code>
+and immediately start running the client's segfault handler, if it has
+one.  In particular we can't finish the current basic block and then
+deliver the signal at some convenient future point, because signals
+like SIGILL, SIGSEGV and SIGBUS mean that the faulting insn should not
+simply be re-tried.  (I'm sure there is a clearer way to explain this).
+
+
+<h3>Exceptions, creating new translations</h3>
+<h3>Self-modifying code</h3>
+
+<h3>Lazy updates of the simulated program counter</h3>
+
+Simulated <code>%EIP</code> is not updated after every simulated x86
+insn as this was regarded as too expensive.  Instead ucode
+<code>INCEIP</code> insns move it along as and when necessary.
+Currently we don't allow it to fall more than 4 bytes behind reality
+(see <code>VG_(disBB)</code> for the way this works).
+<p>
+Note that <code>%EIP</code> is always brought up to date by the inner
+dispatch loop in <code>VG_(dispatch)</code>, so that if the client
+takes a fault we know at least which basic block this happened in.
+
+
+<h3>The translation cache and translation table</h3>
+
+<h3>Signals</h3>
+
+Horrible, horrible.  <code>vg_signals.c</code>.
+Basically, since we have to intercept all system
+calls anyway, we can see when the client tries to install a signal
+handler.  If it does so, we make a note of what the client asked to
+happen, and ask the kernel to route the signal to our own signal
+handler, <code>VG_(oursignalhandler)</code>.  This simply notes the
+delivery of signals, and returns.  
+
+<p>
+Every 1000 basic blocks, we see if more signals have arrived.  If so,
+<code>VG_(deliver_signals)</code> builds signal delivery frames on the
+client's stack, and allows their handlers to be run.  Valgrind places
+in these signal delivery frames a bogus return address,
+</code>VG_(signalreturn_bogusRA)</code>, and checks all jumps to see
+if any jump to it.  If so, this is a sign that a signal handler is
+returning, and if so Valgrind removes the relevant signal frame from
+the client's stack, restores the from the signal frame the simulated
+state before the signal was delivered, and allows the client to run
+onwards.  We have to do it this way because some signal handlers never
+return, they just <code>longjmp()</code>, which nukes the signal
+delivery frame.
+
+<p>
+The Linux kernel has a different but equally horrible hack for
+detecting signal handler returns.  Discovering it is left as an
+exercise for the reader.
+
+
+
+<h3>Errors, error contexts, error reporting, suppressions</h3>
+<h3>Client malloc/free</h3>
+<h3>Low-level memory management</h3>
+<h3>A and V bitmaps</h3>
+<h3>Symbol table management</h3>
+<h3>Dealing with system calls</h3>
+<h3>Namespace management</h3>
+<h3>GDB attaching</h3>
+<h3>Non-dependence on glibc or anything else</h3>
+<h3>The leak detector</h3>
+<h3>Performance problems</h3>
+<h3>Continuous sanity checking</h3>
+<h3>Tracing, or not tracing, child processes</h3>
+<h3>Assembly glue for syscalls</h3>
+
+
+<hr width="100%">
+
+<h2>Extensions</h2>
+
+Some comments about Stuff To Do.
+
+<h3>Bugs</h3>
+
+Stephan Kulow and Marc Mutz report problems with kmail in KDE 3 CVS
+(RC2 ish) when run on Valgrind.  Stephan has it deadlocking; Marc has
+it looping at startup.  I can't repro either behaviour. Needs
+repro-ing and fixing.
+
+
+<h3>Threads</h3>
+
+Doing a good job of thread support strikes me as almost a
+research-level problem.  The central issues are how to do fast cheap
+locking of the <code>VG_(primary_map)</code> structure, whether or not
+accesses to the individual secondary maps need locking, what
+race-condition issues result, and whether the already-nasty mess that
+is the signal simulator needs further hackery.
+
+<p>
+I realise that threads are the most-frequently-requested feature, and
+I am thinking about it all.  If you have guru-level understanding of 
+fast mutual exclusion mechanisms and race conditions, I would be
+interested in hearing from you.
+
+
+<h3>Verification suite</h3>
+
+Directory <code>tests/</code> contains various ad-hoc tests for
+Valgrind.  However, there is no systematic verification or regression
+suite, that, for example, exercises all the stuff in
+<code>vg_memory.c</code>, to ensure that illegal memory accesses and
+undefined value uses are detected as they should be.  It would be good
+to have such a suite.
+
+
+<h3>Porting to other platforms</h3>
+
+It would be great if Valgrind was ported to FreeBSD and x86 NetBSD,
+and to x86 OpenBSD, if it's possible (doesn't OpenBSD use a.out-style
+executables, not ELF ?)
+
+<p>
+The main difficulties, for an x86-ELF platform, seem to be:
+
+<ul>
+<li>You'd need to rewrite the <code>/proc/self/maps</code> parser
+    (<code>vg_procselfmaps.c</code>).
+    Easy.
+<p>
+<li>You'd need to rewrite <code>vg_syscall_mem.c</code>, or, more
+    specifically, provide one for your OS.  This is tedious, but you
+    can implement syscalls on demand, and the Linux kernel interface
+    is, for the most part, going to look very similar to the *BSD
+    interfaces, so it's really a copy-paste-and-modify-on-demand job.
+    As part of this, you'd need to supply a new
+    <code>vg_kerneliface.h</code> file.
+<p>
+<li>You'd also need to change the syscall wrappers for Valgrind's
+    internal use, in <code>vg_mylibc.c</code>.
+</ul>
+
+All in all, I think a port to x86-ELF *BSDs is not really very
+difficult, and in some ways I would like to see it happen, because
+that would force a more clear factoring of Valgrind into platform
+dependent and independent pieces.  Not to mention, *BSD folks also
+deserve to use Valgrind just as much as the Linux crew do.
+
+
+<p>
+<hr width="100%">
+
+<h2>Easy stuff which ought to be done</h2>
+
+<h3>MMX instructions</h3>
+
+MMX insns should be supported, using the same trick as for FPU insns.
+If the MMX registers are not used to copy uninitialised junk from one
+place to another in memory, this means we don't have to actually
+simulate the internal MMX unit state, so the FPU hack applies.  This
+should be fairly easy.
+
+
+
+<h3>Fix stabs-info reader</h3>
+
+The machinery in <code>vg_symtab2.c</code> which reads "stabs" style
+debugging info is pretty weak.  It usually correctly translates 
+simulated program counter values into line numbers and procedure
+names, but the file name is often completely wrong.  I think the
+logic used to parse "stabs" entries is weak.  It should be fixed.
+The simplest solution, IMO, is to copy either the logic or simply the
+code out of GNU binutils which does this; since GDB can clearly get it
+right, binutils (or GDB?) must have code to do this somewhere.
+
+
+
+
+
+<h3>BT/BTC/BTS/BTR</h3>
+
+These are x86 instructions which test, complement, set, or reset, a
+single bit in a word.  At the moment they are both incorrectly
+implemented and incorrectly instrumented.
+
+<p>
+The incorrect instrumentation is due to use of helper functions.  This
+means we lose bit-level definedness tracking, which could wind up
+giving spurious uninitialised-value use errors.  The Right Thing to do
+is to invent a couple of new UOpcodes, I think <code>GET_BIT</code>
+and <code>SET_BIT</code>, which can be used to implement all 4 x86
+insns, get rid of the helpers, and give bit-accurate instrumentation
+rules for the two new UOpcodes.
+
+<p>
+I realised the other day that they are mis-implemented too.  The x86
+insns take a bit-index and a register or memory location to access.
+For registers the bit index clearly can only be in the range zero to
+register-width minus 1, and I assumed the same applied to memory
+locations too.  But evidently not; for memory locations the index can
+be arbitrary, and the processor will index arbitrarily into memory as
+a result.  This too should be fixed.  Sigh.  Presumably indexing
+outside the immediate word is not actually used by any programs yet
+tested on Valgrind, for otherwise they (presumably) would simply not
+work at all.  If you plan to hack on this, first check the Intel docs
+to make sure my understanding is really correct.
+
+
+
+<h3>Using PREFETCH instructions</h3>
+
+Here's a small but potentially interesting project for performance
+junkies.  Experiments with valgrind's code generator and optimiser(s)
+suggest that reducing the number of instructions executed in the
+translations and mem-check helpers gives disappointingly small
+performance improvements.  Perhaps this is because performance of
+Valgrindified code is limited by cache misses.  After all, each read
+in the original program now gives rise to at least three reads, one
+for the <code>VG_(primary_map)</code>, one of the resulting
+secondary, and the original.  Not to mention, the instrumented
+translations are 13 to 14 times larger than the originals.  All in all
+one would expect the memory system to be hammered to hell and then
+some.
+
+<p>
+So here's an idea.  An x86 insn involving a read from memory, after
+instrumentation, will turn into ucode of the following form:
+<pre>
+    ... calculate effective addr, into ta and qa ...
+    TESTVL qa             -- is the addr defined?
+    LOADV (ta), qloaded   -- fetch V bits for the addr
+    LOAD  (ta), tloaded   -- do the original load
+</pre>
+At the point where the <code>LOADV</code> is done, we know the actual
+address (<code>ta</code>) from which the real <code>LOAD</code> will
+be done.  We also know that the <code>LOADV</code> will take around
+20 x86 insns to do.  So it seems plausible that doing a prefetch of
+<code>ta</code> just before the <code>LOADV</code> might just avoid a
+miss at the <code>LOAD</code> point, and that might be a significant
+performance win.
+
+<p>
+Prefetch insns are notoriously tempermental, more often than not
+making things worse rather than better, so this would require
+considerable fiddling around.  It's complicated because Intels and
+AMDs have different prefetch insns with different semantics, so that
+too needs to be taken into account.  As a general rule, even placing
+the prefetches before the <code>LOADV</code> insn is too near the
+<code>LOAD</code>; the ideal distance is apparently circa 200 CPU
+cycles.  So it might be worth having another analysis/transformation
+pass which pushes prefetches as far back as possible, hopefully 
+immediately after the effective address becomes available.
+
+<p>
+Doing too many prefetches is also bad because they soak up bus
+bandwidth / cpu resources, so some cleverness in deciding which loads
+to prefetch and which to not might be helpful.  One can imagine not
+prefetching client-stack-relative (<code>%EBP</code> or
+<code>%ESP</code>) accesses, since the stack in general tends to show
+good locality anyway.
+
+<p>
+There's quite a lot of experimentation to do here, but I think it
+might make an interesting week's work for someone.
+
+<p>
+As of 15-ish March 2002, I've started to experiment with this, using
+the AMD <code>prefetch/prefetchw</code> insns.
+
+
+
+<h3>User-defined permission ranges</h3>
+
+This is quite a large project -- perhaps a month's hacking for a
+capable hacker to do a good job -- but it's potentially very
+interesting.  The outcome would be that Valgrind could detect a 
+whole class of bugs which it currently cannot.
+
+<p>
+The presentation falls into two pieces.
+
+<p>
+<b>Part 1: user-defined address-range permission setting</b>
+<p>
+
+Valgrind intercepts the client's <code>malloc</code>,
+<code>free</code>, etc calls, watches system calls, and watches the
+stack pointer move.  This is currently the only way it knows about
+which addresses are valid and which not.  Sometimes the client program
+knows extra information about its memory areas.  For example, the
+client could at some point know that all elements of an array are
+out-of-date.  We would like to be able to convey to Valgrind this
+information that the array is now addressable-but-uninitialised, so
+that Valgrind can then warn if elements are used before they get new
+values. 
+
+<p>
+What I would like are some macros like this:
+<pre>
+   VALGRIND_MAKE_NOACCESS(addr, len)
+   VALGRIND_MAKE_WRITABLE(addr, len)
+   VALGRIND_MAKE_READABLE(addr, len)
+</pre>
+and also, to check that memory is addressible/initialised,
+<pre>
+   VALGRIND_CHECK_ADDRESSIBLE(addr, len)
+   VALGRIND_CHECK_INITIALISED(addr, len)
+</pre>
+
+<p>
+I then include in my sources a header defining these macros, rebuild
+my app, run under Valgrind, and get user-defined checks.
+
+<p>
+Now here's a neat trick.  It's a nuisance to have to re-link the app
+with some new library which implements the above macros.  So the idea
+is to define the macros so that the resulting executable is still
+completely stand-alone, and can be run without Valgrind, in which case
+the macros do nothing, but when run on Valgrind, the Right Thing
+happens.  How to do this?  The idea is for these macros to turn into a
+piece of inline assembly code, which (1) has no effect when run on the
+real CPU, (2) is easily spotted by Valgrind's JITter, and (3) no sane
+person would ever write, which is important for avoiding false matches
+in (2).  So here's a suggestion:
+<pre>
+   VALGRIND_MAKE_NOACCESS(addr, len)
+</pre>
+becomes (roughly speaking)
+<pre>
+   movl addr, %eax
+   movl len,  %ebx
+   movl $1,   %ecx   -- 1 describes the action; MAKE_WRITABLE might be
+                     -- 2, etc
+   rorl $13, %ecx
+   rorl $19, %ecx
+   rorl $11, %eax
+   rorl $21, %eax
+</pre>
+The rotate sequences have no effect, and it's unlikely they would
+appear for any other reason, but they define a unique byte-sequence
+which the JITter can easily spot.  Using the operand constraints
+section at the end of a gcc inline-assembly statement, we can tell gcc
+that the assembly fragment kills <code>%eax</code>, <code>%ebx</code>,
+<code>%ecx</code> and the condition codes, so this fragment is made
+harmless when not running on Valgrind, runs quickly when not on
+Valgrind, and does not require any other library support.
+
+
+<p>
+<b>Part 2: using it to detect interference between stack variables</b>
+<p>
+
+Currently Valgrind cannot detect errors of the following form:
+<pre>
+void fooble ( void )
+{
+   int a[10];
+   int b[10];
+   a[10] = 99;
+}
+</pre>
+Now imagine rewriting this as
+<pre>
+void fooble ( void )
+{
+   int spacer0;
+   int a[10];
+   int spacer1;
+   int b[10];
+   int spacer2;
+   VALGRIND_MAKE_NOACCESS(&spacer0, sizeof(int));
+   VALGRIND_MAKE_NOACCESS(&spacer1, sizeof(int));
+   VALGRIND_MAKE_NOACCESS(&spacer2, sizeof(int));
+   a[10] = 99;
+}
+</pre>
+Now the invalid write is certain to hit <code>spacer0</code> or
+<code>spacer1</code>, so Valgrind will spot the error.
+
+<p>
+There are two complications.
+
+<p>
+The first is that we don't want to annotate sources by hand, so the
+Right Thing to do is to write a C/C++ parser, annotator, prettyprinter
+which does this automatically, and run it on post-CPP'd C/C++ source.
+See http://www.cacheprof.org for an example of a system which
+transparently inserts another phase into the gcc/g++ compilation
+route.  The parser/prettyprinter is probably not as hard as it sounds;
+I would write it in Haskell, a powerful functional language well
+suited to doing symbolic computation, with which I am intimately
+familar.  There is already a C parser written in Haskell by someone in
+the Haskell community, and that would probably be a good starting
+point.
+
+<p>
+The second complication is how to get rid of these
+<code>NOACCESS</code> records inside Valgrind when the instrumented
+function exits; after all, these refer to stack addresses and will
+make no sense whatever when some other function happens to re-use the
+same stack address range, probably shortly afterwards.  I think I
+would be inclined to define a special stack-specific macro
+<pre>
+   VALGRIND_MAKE_NOACCESS_STACK(addr, len)
+</pre>
+which causes Valgrind to record the client's <code>%ESP</code> at the
+time it is executed.  Valgrind will then watch for changes in
+<code>%ESP</code> and discard such records as soon as the protected
+area is uncovered by an increase in <code>%ESP</code>.  I hesitate
+with this scheme only because it is potentially expensive, if there
+are hundreds of such records, and considering that changes in
+<code>%ESP</code> already require expensive messing with stack access
+permissions.
+
+<p>
+This is probably easier and more robust than for the instrumenter 
+program to try and spot all exit points for the procedure and place
+suitable deallocation annotations there.  Plus C++ procedures can 
+bomb out at any point if they get an exception, so spotting return
+points at the source level just won't work at all.
+
+<p>
+Although some work, it's all eminently doable, and it would make
+Valgrind into an even-more-useful tool.
+
+<p>
+Update: as of 17 March 2002, this (these hooks) are done.
+
+
+<p>
+</body>
+</html>
diff --git a/helgrind/Makefile.am b/helgrind/Makefile.am
new file mode 100644
index 000000000..00387927b
--- /dev/null
+++ b/helgrind/Makefile.am
@@ -0,0 +1,80 @@
+SUBDIRS = demangle . docs tests
+
+valdir = $(libdir)/valgrind
+
+LDFLAGS = -Wl,-z -Wl,initfirst
+
+INCLUDES += -I$(srcdir)/demangle
+
+bin_SCRIPTS = valgrind
+
+val_DATA = linux22.supp linux24.supp
+
+EXTRA_DIST = $(val_DATA) \
+	PATCHES_APPLIED ACKNOWLEDGEMENTS \
+	README_KDE3_FOLKS \
+	README_MISSING_SYSCALL_OR_IOCTL TODO
+
+val_PROGRAMS = valgrind.so valgrinq.so
+
+valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+
+valgrind_so_SOURCES = \
+	vg_clientmalloc.c \
+	vg_clientperms.c \
+	vg_demangle.c \
+	vg_dispatch.S \
+	vg_errcontext.c \
+	vg_execontext.c \
+	vg_from_ucode.c \
+	vg_helpers.S \
+	vg_main.c \
+	vg_malloc2.c \
+	vg_memory.c \
+	vg_messages.c \
+	vg_mylibc.c \
+	vg_procselfmaps.c \
+	vg_profile.c \
+	vg_signals.c \
+	vg_startup.S \
+	vg_symtab2.c \
+	vg_syscall_mem.c \
+	vg_syscall.S \
+	vg_to_ucode.c \
+	vg_translate.c \
+	vg_transtab.c \
+	vg_valgrinq_dummy.c \
+	vg_vtagops.c
+
+valgrind_so_LDADD = \
+	demangle/cp-demangle.o \
+	demangle/cplus-dem.o \
+	demangle/dyn-string.o \
+	demangle/safe-ctype.o
+
+include_HEADERS = valgrind.h
+
+noinst_HEADERS = \
+        vg_kerneliface.h        \
+        vg_include.h            \
+        vg_version.h            \
+        vg_constants.h          \
+        vg_unsafe.h
+
+
+install-data-hook:
+	cd ${valdir} && rm -f default.supp && $(LN_S) $(DEFAULT_SUPP) default.supp
+
+vg_memory.o:
+	$(COMPILE) -O2 -mpreferred-stack-boundary=2 -c $<
+
+vg_clientmalloc.o:
+	$(COMPILE) -fno-omit-frame-pointer -c $<
+
+
+valgrind.so: $(valgrind_so_OBJECTS)
+	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+	$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+
+valgrinq.so: $(valgrinq_so_OBJECTS)
+	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
diff --git a/include/valgrind.h b/include/valgrind.h
new file mode 100644
index 000000000..5a32ab565
--- /dev/null
+++ b/include/valgrind.h
@@ -0,0 +1,156 @@
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+
+#ifndef __VALGRIND_H
+#define __VALGRIND_H
+
+
+/* This file is for inclusion into client (your!) code.
+
+   You can use these macros to manipulate and query memory permissions
+   inside your own programs.
+
+   The resulting executables will still run without Valgrind, just a
+   little bit more slowly than they otherwise would, but otherwise
+   unchanged.  
+
+   When run on Valgrind with --client-perms=yes, Valgrind observes
+   these macro calls and takes appropriate action.  When run on
+   Valgrind with --client-perms=no (the default), Valgrind observes
+   these macro calls but does not take any action as a result.  */
+
+
+
+/* This defines the magic code sequence which the JITter spots and
+   handles magically.  Don't look too closely at this; it will rot
+   your brain.  
+*/
+#define VALGRIND_MAGIC_SEQUENCE(_zzq_res,_zzq_code,_zzq_addr,_zzq_len)  \
+  asm volatile("movl %1, %%eax\n\t"                                     \
+               "movl %2, %%ebx\n\t"                                     \
+               "movl %3, %%ecx\n\t"                                     \
+               "roll $29, %%eax ; roll $3, %%eax\n\t"                   \
+               "roll $27, %%eax ; roll $5, %%eax\n\t"                   \
+               "movl %%eax, %0\t"                                       \
+               : "=r" (_zzq_res)                                        \
+               : "r" (_zzq_code), "r" (_zzq_addr), "r" (_zzq_len)       \
+               : "eax", "ebx", "ecx", "cc", "memory"                    \
+              );
+
+
+
+/* Client-code macros to manipulate the state of memory. */
+
+/* Mark memory at _qzz_addr as unaddressible and undefined for
+   _qzz_len bytes.  Returns an int handle pertaining to the block
+   descriptions Valgrind will use in subsequent error messages. */
+#define VALGRIND_MAKE_NOACCESS(_qzz_addr,_qzz_len)               \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res,1001,_qzz_addr,_qzz_len);   \
+    _qzz_res;                                                    \
+   })
+
+/* Similarly, mark memory at _qzz_addr as addressible but undefined
+   for _qzz_len bytes. */
+#define VALGRIND_MAKE_WRITABLE(_qzz_addr,_qzz_len)               \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res,1002,_qzz_addr,_qzz_len);   \
+    _qzz_res;                                                    \
+   })
+
+/* Similarly, mark memory at _qzz_addr as addressible and defined
+   for _qzz_len bytes. */
+#define VALGRIND_MAKE_READABLE(_qzz_addr,_qzz_len)               \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res,1003,_qzz_addr,_qzz_len);   \
+    _qzz_res;                                                    \
+   })
+
+/* Discard a block-description-handle obtained from the above three
+   macros.  After this, Valgrind will no longer be able to relate
+   addressing errors to the user-defined block associated with the
+   handle.  The permissions settings associated with the handle remain
+   in place.  Returns 1 for an invalid handle, 0 for a valid
+   handle. */
+#define VALGRIND_DISCARD(_qzz_blkindex)                          \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res,2004,0,_qzz_blkindex);      \
+    _qzz_res;                                                    \
+   })
+
+
+
+/* Client-code macros to check the state of memory. */
+
+/* Check that memory at _qzz_addr is addressible for _qzz_len bytes.
+   If suitable addressibility is not established, Valgrind prints an
+   error message and returns the address of the first offending byte.
+   Otherwise it returns zero. */
+#define VALGRIND_CHECK_WRITABLE(_qzz_addr,_qzz_len)              \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res,2002,_qzz_addr,_qzz_len);   \
+    _qzz_res;                                                    \
+   })
+
+/* Check that memory at _qzz_addr is addressible and defined for
+   _qzz_len bytes.  If suitable addressibility and definedness are not
+   established, Valgrind prints an error message and returns the
+   address of the first offending byte.  Otherwise it returns zero. */
+#define VALGRIND_CHECK_READABLE(_qzz_addr,_qzz_len)              \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res,2003,_qzz_addr,_qzz_len);   \
+    _qzz_res;                                                    \
+   })
+
+
+/* Use this macro to force the definedness and addressibility of a
+   value to be checked.  If suitable addressibility and definedness
+   are not established, Valgrind prints an error message and returns
+   the address of the first offending byte.  Otherwise it returns
+   zero. */
+#define VALGRIND_CHECK_DEFINED(__lvalue)                         \
+   (void)                                                        \
+   VALGRIND_CHECK_READABLE(                                      \
+      (volatile unsigned char *)&(__lvalue),                     \
+                      (unsigned int)(sizeof (__lvalue)))
+
+
+
+/* Mark memory, intended to be on the client's stack, at _qzz_addr as
+   unaddressible and undefined for _qzz_len bytes.  Does not return a
+   value.  The record associated with this setting will be
+   automatically removed by Valgrind when the containing routine
+   exits. */
+#define VALGRIND_MAKE_NOACCESS_STACK(_qzz_addr,_qzz_len)         \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res,3001,_qzz_addr,_qzz_len);   \
+    _qzz_res;                                                    \
+   })
+
+
+#endif
diff --git a/include/vg_profile.c b/include/vg_profile.c
new file mode 100644
index 000000000..ed10eded2
--- /dev/null
+++ b/include/vg_profile.c
@@ -0,0 +1,112 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Profiling machinery -- not for release builds!               ---*/
+/*---                                                 vg_profile.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+
+#ifdef VG_PROFILE
+
+/* get rid of these, if possible */
+#include <signal.h>
+#include <sys/time.h>
+
+#define VGP_PAIR(enumname,str) str
+static const Char* vgp_names[VGP_M_CCS] = { VGP_LIST };
+#undef VGP_PAIR
+
+static Int   vgp_nticks;
+static Int   vgp_counts[VGP_M_CCS];
+static Int   vgp_entries[VGP_M_CCS];
+
+static Int   vgp_sp;
+static VgpCC vgp_stack[VGP_M_STACK];
+
+void VGP_(tick) ( int sigNo )
+{
+   Int cc;
+   vgp_nticks++;
+   cc = vgp_stack[vgp_sp];
+   vg_assert(cc >= 0 && cc < VGP_M_CCS);
+   vgp_counts[ cc ]++;
+}
+
+void VGP_(init_profiling) ( void )
+{
+   struct itimerval value;
+   Int i, ret;
+
+   for (i = 0; i < VGP_M_CCS; i++)
+     vgp_counts[i] = vgp_entries[i] = 0;
+
+   vgp_nticks = 0;
+   vgp_sp = -1;
+   VGP_(pushcc) ( VgpRun );
+
+   value.it_interval.tv_sec  = 0;
+   value.it_interval.tv_usec = 10 * 1000;
+   value.it_value = value.it_interval;
+
+   signal(SIGPROF, VGP_(tick) );
+   ret = setitimer(ITIMER_PROF, &value, NULL);
+   if (ret != 0) VG_(panic)("vgp_init_profiling");
+}
+
+void VGP_(done_profiling) ( void )
+{
+   Int i;
+   VG_(printf)("Profiling done, %d ticks\n", vgp_nticks);
+   for (i = 0; i < VGP_M_CCS; i++)
+      VG_(printf)("%2d: %4d (%3d %%%%) ticks,  %8d entries   for  %s\n",
+                  i, vgp_counts[i], 
+                  (Int)(1000.0 * (double)vgp_counts[i] / (double)vgp_nticks),
+                  vgp_entries[i],
+                  vgp_names[i] );
+}
+
+void VGP_(pushcc) ( VgpCC cc )
+{
+   if (vgp_sp >= VGP_M_STACK-1) VG_(panic)("vgp_pushcc");
+   vgp_sp++;
+   vgp_stack[vgp_sp] = cc;
+   vgp_entries[ cc ] ++;
+}
+
+void VGP_(popcc) ( void )
+{
+   if (vgp_sp <= 0) VG_(panic)("vgp_popcc");
+   vgp_sp--;
+}
+
+#endif /* VG_PROFILE */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                             vg_profile.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/install-sh b/install-sh
new file mode 100755
index 000000000..e9de23842
--- /dev/null
+++ b/install-sh
@@ -0,0 +1,251 @@
+#!/bin/sh
+#
+# install - install a program, script, or datafile
+# This comes from X11R5 (mit/util/scripts/install.sh).
+#
+# Copyright 1991 by the Massachusetts Institute of Technology
+#
+# Permission to use, copy, modify, distribute, and sell this software and its
+# documentation for any purpose is hereby granted without fee, provided that
+# the above copyright notice appear in all copies and that both that
+# copyright notice and this permission notice appear in supporting
+# documentation, and that the name of M.I.T. not be used in advertising or
+# publicity pertaining to distribution of the software without specific,
+# written prior permission.  M.I.T. makes no representations about the
+# suitability of this software for any purpose.  It is provided "as is"
+# without express or implied warranty.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# `make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch.  It can only install one file at a time, a restriction
+# shared with many OS's install programs.
+
+
+# set DOITPROG to echo to test this script
+
+# Don't use :- since 4.3BSD and earlier shells don't like it.
+doit="${DOITPROG-}"
+
+
+# put in absolute paths if you don't have them in your path; or use env. vars.
+
+mvprog="${MVPROG-mv}"
+cpprog="${CPPROG-cp}"
+chmodprog="${CHMODPROG-chmod}"
+chownprog="${CHOWNPROG-chown}"
+chgrpprog="${CHGRPPROG-chgrp}"
+stripprog="${STRIPPROG-strip}"
+rmprog="${RMPROG-rm}"
+mkdirprog="${MKDIRPROG-mkdir}"
+
+transformbasename=""
+transform_arg=""
+instcmd="$mvprog"
+chmodcmd="$chmodprog 0755"
+chowncmd=""
+chgrpcmd=""
+stripcmd=""
+rmcmd="$rmprog -f"
+mvcmd="$mvprog"
+src=""
+dst=""
+dir_arg=""
+
+while [ x"$1" != x ]; do
+    case $1 in
+	-c) instcmd="$cpprog"
+	    shift
+	    continue;;
+
+	-d) dir_arg=true
+	    shift
+	    continue;;
+
+	-m) chmodcmd="$chmodprog $2"
+	    shift
+	    shift
+	    continue;;
+
+	-o) chowncmd="$chownprog $2"
+	    shift
+	    shift
+	    continue;;
+
+	-g) chgrpcmd="$chgrpprog $2"
+	    shift
+	    shift
+	    continue;;
+
+	-s) stripcmd="$stripprog"
+	    shift
+	    continue;;
+
+	-t=*) transformarg=`echo $1 | sed 's/-t=//'`
+	    shift
+	    continue;;
+
+	-b=*) transformbasename=`echo $1 | sed 's/-b=//'`
+	    shift
+	    continue;;
+
+	*)  if [ x"$src" = x ]
+	    then
+		src=$1
+	    else
+		# this colon is to work around a 386BSD /bin/sh bug
+		:
+		dst=$1
+	    fi
+	    shift
+	    continue;;
+    esac
+done
+
+if [ x"$src" = x ]
+then
+	echo "install:	no input file specified"
+	exit 1
+else
+	true
+fi
+
+if [ x"$dir_arg" != x ]; then
+	dst=$src
+	src=""
+	
+	if [ -d $dst ]; then
+		instcmd=:
+		chmodcmd=""
+	else
+		instcmd=mkdir
+	fi
+else
+
+# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
+# might cause directories to be created, which would be especially bad 
+# if $src (and thus $dsttmp) contains '*'.
+
+	if [ -f $src -o -d $src ]
+	then
+		true
+	else
+		echo "install:  $src does not exist"
+		exit 1
+	fi
+	
+	if [ x"$dst" = x ]
+	then
+		echo "install:	no destination specified"
+		exit 1
+	else
+		true
+	fi
+
+# If destination is a directory, append the input filename; if your system
+# does not like double slashes in filenames, you may need to add some logic
+
+	if [ -d $dst ]
+	then
+		dst="$dst"/`basename $src`
+	else
+		true
+	fi
+fi
+
+## this sed command emulates the dirname command
+dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
+
+# Make sure that the destination directory exists.
+#  this part is taken from Noah Friedman's mkinstalldirs script
+
+# Skip lots of stat calls in the usual case.
+if [ ! -d "$dstdir" ]; then
+defaultIFS='	
+'
+IFS="${IFS-${defaultIFS}}"
+
+oIFS="${IFS}"
+# Some sh's can't handle IFS=/ for some reason.
+IFS='%'
+set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'`
+IFS="${oIFS}"
+
+pathcomp=''
+
+while [ $# -ne 0 ] ; do
+	pathcomp="${pathcomp}${1}"
+	shift
+
+	if [ ! -d "${pathcomp}" ] ;
+        then
+		$mkdirprog "${pathcomp}"
+	else
+		true
+	fi
+
+	pathcomp="${pathcomp}/"
+done
+fi
+
+if [ x"$dir_arg" != x ]
+then
+	$doit $instcmd $dst &&
+
+	if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi &&
+	if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi &&
+	if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi &&
+	if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi
+else
+
+# If we're going to rename the final executable, determine the name now.
+
+	if [ x"$transformarg" = x ] 
+	then
+		dstfile=`basename $dst`
+	else
+		dstfile=`basename $dst $transformbasename | 
+			sed $transformarg`$transformbasename
+	fi
+
+# don't allow the sed command to completely eliminate the filename
+
+	if [ x"$dstfile" = x ] 
+	then
+		dstfile=`basename $dst`
+	else
+		true
+	fi
+
+# Make a temp file name in the proper directory.
+
+	dsttmp=$dstdir/#inst.$$#
+
+# Move or copy the file name to the temp name
+
+	$doit $instcmd $src $dsttmp &&
+
+	trap "rm -f ${dsttmp}" 0 &&
+
+# and set any options; do chmod last to preserve setuid bits
+
+# If any of these fail, we abort the whole thing.  If we want to
+# ignore errors from any of these, just make sure not to ignore
+# errors from the above "$doit $instcmd $src $dsttmp" command.
+
+	if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi &&
+	if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi &&
+	if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi &&
+	if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi &&
+
+# Now rename the file to the real destination.
+
+	$doit $rmcmd -f $dstdir/$dstfile &&
+	$doit $mvcmd $dsttmp $dstdir/$dstfile 
+
+fi &&
+
+
+exit 0
diff --git a/lackey/Makefile.am b/lackey/Makefile.am
new file mode 100644
index 000000000..00387927b
--- /dev/null
+++ b/lackey/Makefile.am
@@ -0,0 +1,80 @@
+SUBDIRS = demangle . docs tests
+
+valdir = $(libdir)/valgrind
+
+LDFLAGS = -Wl,-z -Wl,initfirst
+
+INCLUDES += -I$(srcdir)/demangle
+
+bin_SCRIPTS = valgrind
+
+val_DATA = linux22.supp linux24.supp
+
+EXTRA_DIST = $(val_DATA) \
+	PATCHES_APPLIED ACKNOWLEDGEMENTS \
+	README_KDE3_FOLKS \
+	README_MISSING_SYSCALL_OR_IOCTL TODO
+
+val_PROGRAMS = valgrind.so valgrinq.so
+
+valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+
+valgrind_so_SOURCES = \
+	vg_clientmalloc.c \
+	vg_clientperms.c \
+	vg_demangle.c \
+	vg_dispatch.S \
+	vg_errcontext.c \
+	vg_execontext.c \
+	vg_from_ucode.c \
+	vg_helpers.S \
+	vg_main.c \
+	vg_malloc2.c \
+	vg_memory.c \
+	vg_messages.c \
+	vg_mylibc.c \
+	vg_procselfmaps.c \
+	vg_profile.c \
+	vg_signals.c \
+	vg_startup.S \
+	vg_symtab2.c \
+	vg_syscall_mem.c \
+	vg_syscall.S \
+	vg_to_ucode.c \
+	vg_translate.c \
+	vg_transtab.c \
+	vg_valgrinq_dummy.c \
+	vg_vtagops.c
+
+valgrind_so_LDADD = \
+	demangle/cp-demangle.o \
+	demangle/cplus-dem.o \
+	demangle/dyn-string.o \
+	demangle/safe-ctype.o
+
+include_HEADERS = valgrind.h
+
+noinst_HEADERS = \
+        vg_kerneliface.h        \
+        vg_include.h            \
+        vg_version.h            \
+        vg_constants.h          \
+        vg_unsafe.h
+
+
+install-data-hook:
+	cd ${valdir} && rm -f default.supp && $(LN_S) $(DEFAULT_SUPP) default.supp
+
+vg_memory.o:
+	$(COMPILE) -O2 -mpreferred-stack-boundary=2 -c $<
+
+vg_clientmalloc.o:
+	$(COMPILE) -fno-omit-frame-pointer -c $<
+
+
+valgrind.so: $(valgrind_so_OBJECTS)
+	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+	$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+
+valgrinq.so: $(valgrinq_so_OBJECTS)
+	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
diff --git a/linux22.supp b/linux22.supp
new file mode 100644
index 000000000..d50994c8c
--- /dev/null
+++ b/linux22.supp
@@ -0,0 +1,270 @@
+
+##----------------------------------------------------------------------##
+
+# Errors to suppress by default on a RedHat 6.2 system
+# (glibc 2.1.3, XFree86 3.3.6)
+
+# Format of this file is:
+# {
+#     name_of_suppression
+#     kind: one of Param Value1 Value2 Value4 Value8
+#                   Free Addr1 Addr2 Addr4 Addr8
+#     (if Param: name of system call param, if Free: name of free-ing fn)
+#     caller0 name, or /name/of/so/file.so
+#     caller1 name, or ditto
+#     (optionally: caller2 name)
+#     (optionally: caller3 name)
+#  }
+
+##----------------------------------------------------------------------##
+
+{
+   socketcall.connect(serv_addr)/__libc_connect/*(Param)
+   Param
+   socketcall.connect(serv_addr)
+   fun:__libc_connect
+   fun:*
+}
+
+{
+   strrchr/_dl_map_object_from_fd/_dl_map_object(Addr4)
+   Addr4
+   fun:strrchr
+   fun:_dl_map_object_from_fd
+   fun:_dl_map_object
+}
+
+{
+   strrchr/_dl_map_object_from_fd/_dl_map_object(Value1)
+   Value1
+   fun:strrchr
+   fun:_dl_map_object_from_fd
+   fun:_dl_map_object
+}
+
+{
+   llseek(result)/__libc_lseek64/_IO_file_seek(Param)
+   Param
+   llseek(result)
+   fun:__libc_lseek64
+   fun:_IO_file_seek
+}
+
+{
+   __rawmemchr/_nl_*/*locale(Addr4)
+   Addr4
+   fun:__rawmemchr
+   fun:_nl_*
+   fun:*locale
+}
+
+# new ones for RH62 ls -l 
+{
+   __strchrnul/__nss_database_lookup(Value0)
+   Value0
+   fun:__strchrnul
+   fun:__nss_database_lookup
+}
+{
+   __strchrnul/__gethostbyname_r(Value0)
+   Value0
+   fun:__strchrnul
+   fun:__gethostbyname_r
+}
+
+{
+   strrchr/_dl_map*/_dl_map*(Value0)
+   Value0
+   fun:strrchr
+   fun:_dl_map*
+   fun:_dl_map*
+}
+
+{
+   strchr/dl_open_worker/_dl_catch_error(Value0)
+   Value0
+   fun:strchr
+   fun:dl_open_worker
+   fun:_dl_catch_error
+}
+
+{
+   __rawmemchr/???/__getgrgid_r(Value0)
+   Value0
+   fun:__rawmemchr
+   fun:*
+   fun:__getgrgid_r
+}
+
+{
+   __rawmemchr/_nl_*/*locale*(Value0)
+   Value0
+   fun:__rawmemchr
+   fun:_nl_*
+   fun:*locale*
+}
+
+##----------------------------------------------------------------------##
+## from a Debian machine running kernel 2.2.19 I believe
+## I guess most of these are the same as above really, but
+## Debian stripped their libc-2.1.3
+
+{
+   libc-2.1.3.so/libc-2.1.3.so/libc-2.1.3.so(Value0)
+   Value0
+   obj:*libc-2.1.3.so
+   obj:*libc-2.1.3.so
+   obj:*libc-2.1.3.so
+}
+
+{
+   strchr/libc-2.1.3.so(Value0)
+   Value0
+   fun:*strchr*
+   obj:*libc-2.1.3.so
+}
+
+{
+   libc-2.1.3.so/libXt.so(Value0)
+   Value0
+   obj:*libc-2.1.3.so
+   obj:*libXt.so*
+}
+
+{
+   socketcall.connect(serv_addr)/__libc_connect/*(Param)
+   Param
+   socketcall.connect(serv_addr)
+   obj:*libc-2.1.3.so
+   obj:*libX11.so*
+}
+
+
+##----------------------------------------------------------------------##
+
+{
+   X11-Value0-0
+   Value0
+   obj:*libXt.so.6.0
+   obj:*libXt.so.6.0
+   obj:*libXt.so.6.0
+}
+{
+   X11-Value0-1
+   Value0
+   fun:__rawmemchr
+   obj:*libXt.so.6.0
+   obj:*libXt.so.6.0
+}
+
+
+# Suppressions for XFree86-3.3.X
+
+{
+   X11-Addr4-1
+   Addr4
+   obj:/usr/X11R6/lib/libX11.so.6.1
+   obj:/usr/X11R6/lib/libX11.so.6.1
+   obj:/usr/X11R6/lib/libX11.so.6.1
+}
+
+{
+   X11-Addr4-2
+   Addr4
+   obj:/usr/X11R6/lib/libX11.so.6.1
+   obj:/usr/X11R6/lib/libX11.so.6.1
+   obj:/usr/X11R6/lib/libXt.so.6.0
+}
+
+{
+   X11-Addr4-3
+   Addr4
+   obj:/usr/X11R6/lib/libXt.so.6.0
+   obj:/usr/X11R6/lib/libXt.so.6.0
+   obj:/usr/X11R6/lib/libXt.so.6.0
+}
+
+{
+   X11-Addr4-4
+   Addr4
+   obj:/usr/X11R6/lib/libX11.so.6.1
+   obj:/usr/X11R6/lib/libXt.so.6.0
+   obj:/usr/X11R6/lib/libXt.so.6.0
+}
+
+{
+   X11-Addr4-5
+   Addr4
+   fun:__rawmemchr
+   obj:/usr/X11R6/lib/libXt.so.6.0
+   obj:/usr/X11R6/lib/libXt.so.6.0
+}
+
+{
+   X11-Addr4-6
+   Addr4
+   obj:/usr/X11R6/lib/libXmu.so.6.0
+   obj:/usr/X11R6/lib/libXmu.so.6.0
+   obj:/usr/X11R6/lib/libXt.so.6.0
+}
+
+{
+   X11-Addr4-7
+   Addr4
+   obj:/usr/X11R6/lib/libXt.so.6.0
+   obj:/usr/X11R6/lib/libXt.so.6.0
+   obj:/usr/X11R6/lib/libXawXpm_posing_as_Xaw.so.6.1
+}
+
+{
+   X11-Param-1
+   Param
+   write(buf)
+   fun:__libc_write
+   obj:/usr/X11R6/lib/libX11.so.6.1
+   obj:/usr/X11R6/lib/libX11.so.6.1
+}
+
+{
+   X11-Addr4-8
+   Addr4
+   obj:/usr/X11R6/lib/libX11.so.6.1
+   obj:/usr/X11R6/lib/libXpm.so.4.11
+   obj:/usr/X11R6/lib/libXpm.so.4.11
+}
+
+{
+   X11-Addr4-8
+   Addr4
+   obj:/usr/X11R6/lib/libXt.so.6.0
+   obj:/usr/X11R6/lib/libXawXpm_posing_as_Xaw.so.6.1
+   obj:/usr/X11R6/lib/libXt.so.6.0
+}
+
+{
+   X11-Addr4-9
+   Addr4
+   obj:/usr/X11R6/lib/libXaw.so.6.1
+   obj:/usr/X11R6/lib/libXt.so.6.0
+   obj:/usr/X11R6/lib/libXt.so.6.0
+}
+
+{
+   X11-Addr4-10
+   Addr4
+   obj:/usr/X11R6/lib/libXaw.so.6.1
+   obj:/usr/X11R6/lib/libXaw.so.6.1
+   obj:/usr/X11R6/lib/libXt.so.6.0
+}
+
+{
+   X11-Addr4-11
+   Addr4
+   obj:/usr/X11R6/lib/libXt.so.6.0
+   obj:/usr/X11R6/lib/libXt.so.6.0
+   obj:/usr/X11R6/lib/libXaw.so.6.1
+}
+
+
+
+##----------------------------------------------------------------------##
diff --git a/linux24.supp b/linux24.supp
new file mode 100644
index 000000000..f5943a68b
--- /dev/null
+++ b/linux24.supp
@@ -0,0 +1,296 @@
+
+##----------------------------------------------------------------------##
+
+# Errors to suppress by default on a Linux kernel 2.4 system
+# (glibc 2.2.4, XFree86 4.1.0)
+
+# Format of this file is:
+# {
+#     name_of_suppression
+#     kind: one of Param Value1 Value2 Value4 Value8
+#                   Free Addr1 Addr2 Addr4 Addr8
+#     (if Param: name of system call param, if Free: name of free-ing fn)
+#     caller0 name, or /name/of/so/file.so
+#     caller1 name, or ditto
+#     (optionally: caller2 name)
+#     (optionally: caller3 name)
+#  }
+
+
+# even more glibc suppressions ?
+{
+   libc-2.2.4.so/libc-2.2.4.so/libc-2.2.4.so(Value0)
+   Value0
+   obj:*libc-2.2.4.so
+   obj:*libc-2.2.4.so
+   obj:*libc-2.2.4.so
+}
+{
+   libc-2.2.4.so/libc-2.2.4.so/libc-2.2.4.so(Value4)
+   Value4
+   obj:*libc-2.2.4.so
+   obj:*libc-2.2.4.so
+   obj:*libc-2.2.4.so
+}
+
+##### glibc 2.2.5 stuff perhaps?
+##### suppressions for coolo
+{
+   strchr/dl_open_worker(Value0)
+   Value0
+   fun:strchr
+   fun:dl_open_worker
+}
+{ 
+   __rawmemchr/internal_getgrgid_r(Value0)
+   Value0 
+   fun:__rawmemchr
+   fun:internal_getgrgid_r
+} 
+{ 
+   _IO_vfprintf/__strnlen(Value0)
+   Value0 
+   fun:__strnlen
+   fun:_IO_vfprintf
+} 
+{ 
+   __strchrnul/gethostbyname*(Value0)
+   Value0 
+   fun:__strchrnul
+   fun:gethostbyname*
+} 
+
+
+##----
+{
+   strlen/*dl_map_object*(Value0)
+   Value0
+   fun:strlen
+   fun:*dl_map_object*
+}
+
+{
+   strlen/*dl_open_worker*(Value0)
+   Value0
+   fun:strlen
+   fun:*dl_open_worker*
+}
+
+{
+   *rawmemchr*/*nss*(Value0)
+   Value0
+   fun:*rawmemchr*
+   fun:*nss*
+}
+
+{
+   *strchrnul*/*nss*(Value0)
+   Value0
+   fun:*strchrnul*
+   fun:*nss*
+}
+
+
+
+# gcc version 2.96 20000731 (Red Hat Linux 7.1 2.96-98)
+# on Red Hat 7.2 (x86) miscompiles __mpn_construct_double in
+# __mpn_construct_double (../sysdeps/ieee754/dbl-64/mpn2dbl.c:45)
+# (glibc-2.2.4) to read and write below %esp.  Hence the following
+# two:
+{
+   __mpn_construct_double/*(Addr4)
+   Addr4
+   fun:__mpn_construct_double
+   fun:*
+}
+{
+   __mpn_construct_double/*(Addr8)
+   Addr8
+   fun:__mpn_construct_double
+   fun:*
+}
+
+# More of the same (gcc bug, I'm pretty sure)
+{
+   __fabs/*(Addr4)
+   Addr4
+   fun:__fabs
+   fun:*
+}
+{
+   __fabs/*(Addr8)
+   Addr8
+   fun:__fabs
+   fun:*
+}
+
+
+# Not sure what this is about ... but anyway
+{
+   pthread_sighandler/*(Addr4)
+   Addr4
+   fun:pthread_sighandler
+   fun:*
+}
+
+
+# More glibc stuff, AFAICS
+
+{
+   __strnlen/__argz_stringify/_nl_make_l10nflist(Value0)
+   Value0
+   fun:__strnlen
+   fun:__argz_stringify
+   fun:_nl_make_l10nflist
+}
+
+#--------------
+{
+   _dl_relocate_object/dl_open_worker/_dl_catch_error(Value0)
+   Value0
+   fun:_dl_relocate_object
+   fun:dl_open_worker
+   fun:_dl_catch_error
+}
+{
+   _dl_relocate_object/libc-2.2.4.so/_dl_catch_error(Value0)
+   Value0
+   fun:_dl_relocate_object
+   obj:*libc-2.2.4.so
+   fun:_dl_catch_error
+}
+
+{
+   strrchr/_dl_map_object_from_fd/_dl_map_object(Value0)
+   Value0
+   fun:strrchr
+   fun:_dl_map_object_from_fd
+   fun:_dl_map_object
+}
+
+#-------------------
+{
+   socketcall.connect(serv_addr)/__libc_connect/*
+   Param
+   socketcall.connect(serv_addr)
+   fun:__libc_connect
+   fun:*
+}
+{
+   socketcall.connect(serv_addr)/libc-2.2.4.so/libc-2.2.4.so
+   Param
+   socketcall.connect(serv_addr)
+   obj:*libc-2.2.4.so
+   obj:*libc-2.2.4.so
+}
+
+{
+   libX11.so.6.2/libX11.so.6.2/libX11.so.6.2(Value0)
+   Value0
+   obj:/usr/X11R6/lib/libX11.so.6.2
+   obj:/usr/X11R6/lib/libX11.so.6.2
+   obj:/usr/X11R6/lib/libX11.so.6.2
+}
+
+{
+   libXt.so.6.2/libXt.so.6.2/libXt.so.6.2(Value0)
+   Value0
+   obj:/usr/X11R6/lib/libXt.so.6.0
+   obj:/usr/X11R6/lib/libXt.so.6.0
+   obj:/usr/X11R6/lib/libXt.so.6.0
+}
+
+
+{
+   libXaw.so.7.0/libXaw.so.7.0/libXaw.so.7.0(Value0)
+   Value0
+   obj:/usr/X11R6/lib/libXaw.so.7.0
+   obj:/usr/X11R6/lib/libXaw.so.7.0
+   obj:/usr/X11R6/lib/libXaw.so.7.0
+}
+
+{
+   libXmu.so.6.2/libXmu.so.6.2/libXmu.so.6.2(Value0)
+   Value0
+   obj:/usr/X11R6/lib/libXmu.so.6.2
+   obj:/usr/X11R6/lib/libXmu.so.6.2
+   obj:/usr/X11R6/lib/libXmu.so.6.2
+}
+
+{
+   libXt.so.6.0/libXt.so.6.0/libXaw.so.7.0(Value0)
+   Value0
+   obj:/usr/X11R6/lib/libXt.so.6.0
+   obj:/usr/X11R6/lib/libXt.so.6.0
+   obj:/usr/X11R6/lib/libXaw.so.7.0
+}
+
+{
+   libXaw.so.7.0/libXaw.so.7.0/libXt.so.6.0(Value4)
+   Value4
+   obj:/usr/X11R6/lib/libXaw.so.7.0
+   obj:/usr/X11R6/lib/libXaw.so.7.0
+   obj:/usr/X11R6/lib/libXt.so.6.0
+}
+
+{
+   libX11.so.6.2/libX11.so.6.2/libXaw.so.7.0(Value0)
+   Value0
+   obj:/usr/X11R6/lib/libX11.so.6.2
+   obj:/usr/X11R6/lib/libX11.so.6.2
+   obj:/usr/X11R6/lib/libXaw.so.7.0
+}
+
+#----------------------
+{
+   write(buf)/__libc_write/libX11.so.6.2/libX11.so.6.2(Param)
+   Param
+   write(buf)
+   fun:__libc_write
+   obj:/usr/X11R6/lib/libX11.so.6.2
+   obj:/usr/X11R6/lib/libX11.so.6.2
+}
+{
+   write(buf)/libc-2.2.4.so/libX11.so.6.2/libX11.so.6.2(Param)
+   Param
+   write(buf)
+   obj:*libc-2.2.4.so
+   obj:/usr/X11R6/lib/libX11.so.6.2
+   obj:/usr/X11R6/lib/libX11.so.6.2
+}
+
+#{
+#   llseek(result)/__libc_lseek64/_IO_file_seek(Param)
+#   Param
+#   llseek(result)
+#   fun:__libc_lseek64
+#   fun:_IO_file_seek
+#}
+
+{
+   writev(vector[...])/__writev/libX11.so.6.2/libX11.so.6.2
+   Param
+   writev(vector[...])
+   fun:__writev
+   obj:/usr/X11R6/lib/libX11.so.6.2
+   obj:/usr/X11R6/lib/libX11.so.6.2
+}
+
+#----------------
+{
+   __rawmemchr/libXt.so.6.0/libXt.so.6.0
+   Value0
+   fun:__rawmemchr
+   obj:/usr/X11R6/lib/libXt.so.6.0
+   obj:/usr/X11R6/lib/libXt.so.6.0
+}
+{
+   libc-2.2.4.so/libXt.so.6.0/libXt.so.6.0
+   Value0
+   obj:*libc-2.2.4.so
+   obj:/usr/X11R6/lib/libXt.so.6.0
+   obj:/usr/X11R6/lib/libXt.so.6.0
+}
+
+##----------------------------------------------------------------------##
+
diff --git a/ltmain.sh b/ltmain.sh
new file mode 100644
index 000000000..a92647710
--- /dev/null
+++ b/ltmain.sh
@@ -0,0 +1,5029 @@
+# ltmain.sh - Provide generalized library-building support services.
+# NOTE: Changing this file will not affect anything until you rerun configure.
+#
+# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001
+# Free Software Foundation, Inc.
+# Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Check that we have a working $echo.
+if test "X$1" = X--no-reexec; then
+  # Discard the --no-reexec flag, and continue.
+  shift
+elif test "X$1" = X--fallback-echo; then
+  # Avoid inline document here, it may be left over
+  :
+elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then
+  # Yippee, $echo works!
+  :
+else
+  # Restart under the correct shell, and then maybe $echo will work.
+  exec $SHELL "$0" --no-reexec ${1+"$@"}
+fi
+
+if test "X$1" = X--fallback-echo; then
+  # used as fallback echo
+  shift
+  cat <<EOF
+$*
+EOF
+  exit 0
+fi
+
+# The name of this program.
+progname=`$echo "$0" | sed 's%^.*/%%'`
+modename="$progname"
+
+# Constants.
+PROGRAM=ltmain.sh
+PACKAGE=libtool
+VERSION=1.4.2a
+TIMESTAMP=" (1.922.2.79 2001/11/28 21:50:31)"
+
+default_mode=
+help="Try \`$progname --help' for more information."
+magic="%%%MAGIC variable%%%"
+mkdir="mkdir"
+mv="mv -f"
+rm="rm -f"
+
+# Sed substitution that helps us do robust quoting.  It backslashifies
+# metacharacters that are still active within double-quoted strings.
+Xsed='sed -e 1s/^X//'
+sed_quote_subst='s/\([\\`\\"$\\\\]\)/\\\1/g'
+SP2NL='tr \040 \012'
+NL2SP='tr \015\012 \040\040'
+
+# NLS nuisances.
+# Only set LANG and LC_ALL to C if already set.
+# These must not be set unconditionally because not all systems understand
+# e.g. LANG=C (notably SCO).
+# We save the old values to restore during execute mode.
+if test "${LC_ALL+set}" = set; then
+  save_LC_ALL="$LC_ALL"; LC_ALL=C; export LC_ALL
+fi
+if test "${LANG+set}" = set; then
+  save_LANG="$LANG"; LANG=C; export LANG
+fi
+
+# Make sure IFS has a sensible default
+: ${IFS=" 	"}
+
+if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then
+  echo "$modename: not configured to build any kind of library" 1>&2
+  echo "Fatal configuration error.  See the $PACKAGE docs for more information." 1>&2
+  exit 1
+fi
+
+# Global variables.
+mode=$default_mode
+nonopt=
+prev=
+prevopt=
+run=
+show="$echo"
+show_help=
+execute_dlfiles=
+lo2o="s/\\.lo\$/.${objext}/"
+o2lo="s/\\.${objext}\$/.lo/"
+
+# Parse our command line options once, thoroughly.
+while test $# -gt 0
+do
+  arg="$1"
+  shift
+
+  case $arg in
+  -*=*) optarg=`$echo "X$arg" | $Xsed -e 's/[-_a-zA-Z0-9]*=//'` ;;
+  *) optarg= ;;
+  esac
+
+  # If the previous option needs an argument, assign it.
+  if test -n "$prev"; then
+    case $prev in
+    execute_dlfiles)
+      execute_dlfiles="$execute_dlfiles $arg"
+      ;;
+    *)
+      eval "$prev=\$arg"
+      ;;
+    esac
+
+    prev=
+    prevopt=
+    continue
+  fi
+
+  # Have we seen a non-optional argument yet?
+  case $arg in
+  --help)
+    show_help=yes
+    ;;
+
+  --version)
+    echo "$PROGRAM (GNU $PACKAGE) $VERSION$TIMESTAMP"
+    exit 0
+    ;;
+
+  --config)
+    sed -e '1,/^# ### BEGIN LIBTOOL CONFIG/d' -e '/^# ### END LIBTOOL CONFIG/,$d' $0
+    exit 0
+    ;;
+
+  --debug)
+    echo "$progname: enabling shell trace mode"
+    set -x
+    ;;
+
+  --dry-run | -n)
+    run=:
+    ;;
+
+  --features)
+    echo "host: $host"
+    if test "$build_libtool_libs" = yes; then
+      echo "enable shared libraries"
+    else
+      echo "disable shared libraries"
+    fi
+    if test "$build_old_libs" = yes; then
+      echo "enable static libraries"
+    else
+      echo "disable static libraries"
+    fi
+    exit 0
+    ;;
+
+  --finish) mode="finish" ;;
+
+  --mode) prevopt="--mode" prev=mode ;;
+  --mode=*) mode="$optarg" ;;
+
+  --preserve-dup-deps) duplicate_deps="yes" ;;
+
+  --quiet | --silent)
+    show=:
+    ;;
+
+  -dlopen)
+    prevopt="-dlopen"
+    prev=execute_dlfiles
+    ;;
+
+  -*)
+    $echo "$modename: unrecognized option \`$arg'" 1>&2
+    $echo "$help" 1>&2
+    exit 1
+    ;;
+
+  *)
+    nonopt="$arg"
+    break
+    ;;
+  esac
+done
+
+if test -n "$prevopt"; then
+  $echo "$modename: option \`$prevopt' requires an argument" 1>&2
+  $echo "$help" 1>&2
+  exit 1
+fi
+
+# If this variable is set in any of the actions, the command in it
+# will be execed at the end.  This prevents here-documents from being
+# left over by shells.
+exec_cmd=
+
+if test -z "$show_help"; then
+
+  # Infer the operation mode.
+  if test -z "$mode"; then
+    case $nonopt in
+    *cc | *++ | gcc* | *-gcc*)
+      mode=link
+      for arg
+      do
+	case $arg in
+	-c)
+	   mode=compile
+	   break
+	   ;;
+	esac
+      done
+      ;;
+    *db | *dbx | *strace | *truss)
+      mode=execute
+      ;;
+    *install*|cp|mv)
+      mode=install
+      ;;
+    *rm)
+      mode=uninstall
+      ;;
+    *)
+      # If we have no mode, but dlfiles were specified, then do execute mode.
+      test -n "$execute_dlfiles" && mode=execute
+
+      # Just use the default operation mode.
+      if test -z "$mode"; then
+	if test -n "$nonopt"; then
+	  $echo "$modename: warning: cannot infer operation mode from \`$nonopt'" 1>&2
+	else
+	  $echo "$modename: warning: cannot infer operation mode without MODE-ARGS" 1>&2
+	fi
+      fi
+      ;;
+    esac
+  fi
+
+  # Only execute mode is allowed to have -dlopen flags.
+  if test -n "$execute_dlfiles" && test "$mode" != execute; then
+    $echo "$modename: unrecognized option \`-dlopen'" 1>&2
+    $echo "$help" 1>&2
+    exit 1
+  fi
+
+  # Change the help message to a mode-specific one.
+  generic_help="$help"
+  help="Try \`$modename --help --mode=$mode' for more information."
+
+  # These modes are in order of execution frequency so that they run quickly.
+  case $mode in
+  # libtool compile mode
+  compile)
+    modename="$modename: compile"
+    # Get the compilation command and the source file.
+    base_compile=
+    prev=
+    lastarg=
+    srcfile="$nonopt"
+    suppress_output=
+
+    user_target=no
+    for arg
+    do
+      case $prev in
+      "") ;;
+      xcompiler)
+	# Aesthetically quote the previous argument.
+	prev=
+	lastarg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+
+	case $arg in
+	# Double-quote args containing other shell metacharacters.
+	# Many Bourne shells cannot handle close brackets correctly
+	# in scan sets, so we specify it separately.
+	*[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	  arg="\"$arg\""
+	  ;;
+	esac
+
+	# Add the previous argument to base_compile.
+	if test -z "$base_compile"; then
+	  base_compile="$lastarg"
+	else
+	  base_compile="$base_compile $lastarg"
+	fi
+	continue
+	;;
+      esac
+
+      # Accept any command-line options.
+      case $arg in
+      -o)
+	if test "$user_target" != "no"; then
+	  $echo "$modename: you cannot specify \`-o' more than once" 1>&2
+	  exit 1
+	fi
+	user_target=next
+	;;
+
+      -static)
+	build_old_libs=yes
+	continue
+	;;
+
+      -prefer-pic)
+	pic_mode=yes
+	continue
+	;;
+
+      -prefer-non-pic)
+	pic_mode=no
+	continue
+	;;
+
+      -Xcompiler)
+	prev=xcompiler
+	continue
+	;;
+
+      -Wc,*)
+	args=`$echo "X$arg" | $Xsed -e "s/^-Wc,//"`
+	lastarg=
+	save_ifs="$IFS"; IFS=','
+	for arg in $args; do
+	  IFS="$save_ifs"
+
+	  # Double-quote args containing other shell metacharacters.
+	  # Many Bourne shells cannot handle close brackets correctly
+	  # in scan sets, so we specify it separately.
+	  case $arg in
+	    *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	    arg="\"$arg\""
+	    ;;
+	  esac
+	  lastarg="$lastarg $arg"
+	done
+	IFS="$save_ifs"
+	lastarg=`$echo "X$lastarg" | $Xsed -e "s/^ //"`
+
+	# Add the arguments to base_compile.
+	if test -z "$base_compile"; then
+	  base_compile="$lastarg"
+	else
+	  base_compile="$base_compile $lastarg"
+	fi
+	continue
+	;;
+      esac
+
+      case $user_target in
+      next)
+	# The next one is the -o target name
+	user_target=yes
+	continue
+	;;
+      yes)
+	# We got the output file
+	user_target=set
+	libobj="$arg"
+	continue
+	;;
+      esac
+
+      # Accept the current argument as the source file.
+      lastarg="$srcfile"
+      srcfile="$arg"
+
+      # Aesthetically quote the previous argument.
+
+      # Backslashify any backslashes, double quotes, and dollar signs.
+      # These are the only characters that are still specially
+      # interpreted inside of double-quoted scrings.
+      lastarg=`$echo "X$lastarg" | $Xsed -e "$sed_quote_subst"`
+
+      # Double-quote args containing other shell metacharacters.
+      # Many Bourne shells cannot handle close brackets correctly
+      # in scan sets, so we specify it separately.
+      case $lastarg in
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	lastarg="\"$lastarg\""
+	;;
+      esac
+
+      # Add the previous argument to base_compile.
+      if test -z "$base_compile"; then
+	base_compile="$lastarg"
+      else
+	base_compile="$base_compile $lastarg"
+      fi
+    done
+
+    case $user_target in
+    set)
+      ;;
+    no)
+      # Get the name of the library object.
+      libobj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%'`
+      ;;
+    *)
+      $echo "$modename: you must specify a target with \`-o'" 1>&2
+      exit 1
+      ;;
+    esac
+
+    # Recognize several different file suffixes.
+    # If the user specifies -o file.o, it is replaced with file.lo
+    xform='[cCFSfmso]'
+    case $libobj in
+    *.ada) xform=ada ;;
+    *.adb) xform=adb ;;
+    *.ads) xform=ads ;;
+    *.asm) xform=asm ;;
+    *.c++) xform=c++ ;;
+    *.cc) xform=cc ;;
+    *.cpp) xform=cpp ;;
+    *.cxx) xform=cxx ;;
+    *.f90) xform=f90 ;;
+    *.for) xform=for ;;
+    esac
+
+    libobj=`$echo "X$libobj" | $Xsed -e "s/\.$xform$/.lo/"`
+
+    case $libobj in
+    *.lo) obj=`$echo "X$libobj" | $Xsed -e "$lo2o"` ;;
+    *)
+      $echo "$modename: cannot determine name of library object from \`$libobj'" 1>&2
+      exit 1
+      ;;
+    esac
+
+    if test -z "$base_compile"; then
+      $echo "$modename: you must specify a compilation command" 1>&2
+      $echo "$help" 1>&2
+      exit 1
+    fi
+
+    # Delete any leftover library objects.
+    if test "$build_old_libs" = yes; then
+      removelist="$obj $libobj"
+    else
+      removelist="$libobj"
+    fi
+
+    $run $rm $removelist
+    trap "$run $rm $removelist; exit 1" 1 2 15
+
+    # On Cygwin there's no "real" PIC flag so we must build both object types
+    case $host_os in
+    cygwin* | mingw* | pw32* | os2*)
+      pic_mode=default
+      ;;
+    esac
+    if test $pic_mode = no && test "$deplibs_check_method" != pass_all; then
+      # non-PIC code in shared libraries is not supported
+      pic_mode=default
+    fi
+
+    # Calculate the filename of the output object if compiler does
+    # not support -o with -c
+    if test "$compiler_c_o" = no; then
+      output_obj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%' -e 's%\.[^.]*$%%'`.${objext}
+      lockfile="$output_obj.lock"
+      removelist="$removelist $output_obj $lockfile"
+      trap "$run $rm $removelist; exit 1" 1 2 15
+    else
+      need_locks=no
+      lockfile=
+    fi
+
+    # Lock this critical section if it is needed
+    # We use this script file to make the link, it avoids creating a new file
+    if test "$need_locks" = yes; then
+      until $run ln "$0" "$lockfile" 2>/dev/null; do
+	$show "Waiting for $lockfile to be removed"
+	sleep 2
+      done
+    elif test "$need_locks" = warn; then
+      if test -f "$lockfile"; then
+	echo "\
+*** ERROR, $lockfile exists and contains:
+`cat $lockfile 2>/dev/null`
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together.  If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+	$run $rm $removelist
+	exit 1
+      fi
+      echo $srcfile > "$lockfile"
+    fi
+
+    if test -n "$fix_srcfile_path"; then
+      eval srcfile=\"$fix_srcfile_path\"
+    fi
+
+    # Only build a PIC object if we are building libtool libraries.
+    if test "$build_libtool_libs" = yes; then
+      # Without this assignment, base_compile gets emptied.
+      fbsd_hideous_sh_bug=$base_compile
+
+      if test "$pic_mode" != no; then
+	# All platforms use -DPIC, to notify preprocessed assembler code.
+	command="$base_compile $srcfile $pic_flag -DPIC"
+      else
+	# Don't build PIC code
+	command="$base_compile $srcfile"
+      fi
+      if test "$build_old_libs" = yes; then
+	lo_libobj="$libobj"
+	dir=`$echo "X$libobj" | $Xsed -e 's%/[^/]*$%%'`
+	if test "X$dir" = "X$libobj"; then
+	  dir="$objdir"
+	else
+	  dir="$dir/$objdir"
+	fi
+	libobj="$dir/"`$echo "X$libobj" | $Xsed -e 's%^.*/%%'`
+
+	if test -d "$dir"; then
+	  $show "$rm $libobj"
+	  $run $rm $libobj
+	else
+	  $show "$mkdir $dir"
+	  $run $mkdir $dir
+	  status=$?
+	  if test $status -ne 0 && test ! -d $dir; then
+	    exit $status
+	  fi
+	fi
+      fi
+      if test "$compiler_o_lo" = yes; then
+	output_obj="$libobj"
+	command="$command -o $output_obj"
+      elif test "$compiler_c_o" = yes; then
+	output_obj="$obj"
+	command="$command -o $output_obj"
+      fi
+
+      $run $rm "$output_obj"
+      $show "$command"
+      if $run eval "$command"; then :
+      else
+	test -n "$output_obj" && $run $rm $removelist
+	exit 1
+      fi
+
+      if test "$need_locks" = warn &&
+	 test x"`cat $lockfile 2>/dev/null`" != x"$srcfile"; then
+	echo "\
+*** ERROR, $lockfile contains:
+`cat $lockfile 2>/dev/null`
+
+but it should contain:
+$srcfile
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together.  If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+	$run $rm $removelist
+	exit 1
+      fi
+
+      # Just move the object if needed, then go on to compile the next one
+      if test x"$output_obj" != x"$libobj"; then
+	$show "$mv $output_obj $libobj"
+	if $run $mv $output_obj $libobj; then :
+	else
+	  error=$?
+	  $run $rm $removelist
+	  exit $error
+	fi
+      fi
+
+      # If we have no pic_flag, then copy the object into place and finish.
+      if (test -z "$pic_flag" || test "$pic_mode" != default) &&
+	 test "$build_old_libs" = yes; then
+	# Rename the .lo from within objdir to obj
+	if test -f $obj; then
+	  $show $rm $obj
+	  $run $rm $obj
+	fi
+
+	$show "$mv $libobj $obj"
+	if $run $mv $libobj $obj; then :
+	else
+	  error=$?
+	  $run $rm $removelist
+	  exit $error
+	fi
+
+	xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'`
+	if test "X$xdir" = "X$obj"; then
+	  xdir="."
+	else
+	  xdir="$xdir"
+	fi
+	baseobj=`$echo "X$obj" | $Xsed -e "s%.*/%%"`
+	libobj=`$echo "X$baseobj" | $Xsed -e "$o2lo"`
+	# Now arrange that obj and lo_libobj become the same file
+	$show "(cd $xdir && $LN_S $baseobj $libobj)"
+	if $run eval '(cd $xdir && $LN_S $baseobj $libobj)'; then
+	  # Unlock the critical section if it was locked
+	  if test "$need_locks" != no; then
+	    $run $rm "$lockfile"
+	  fi
+	  exit 0
+	else
+	  error=$?
+	  $run $rm $removelist
+	  exit $error
+	fi
+      fi
+
+      # Allow error messages only from the first compilation.
+      suppress_output=' >/dev/null 2>&1'
+    fi
+
+    # Only build a position-dependent object if we build old libraries.
+    if test "$build_old_libs" = yes; then
+      if test "$pic_mode" != yes; then
+	# Don't build PIC code
+	command="$base_compile $srcfile"
+      else
+	# All platforms use -DPIC, to notify preprocessed assembler code.
+	command="$base_compile $srcfile $pic_flag -DPIC"
+      fi
+      if test "$compiler_c_o" = yes; then
+	command="$command -o $obj"
+	output_obj="$obj"
+      fi
+
+      # Suppress compiler output if we already did a PIC compilation.
+      command="$command$suppress_output"
+      $run $rm "$output_obj"
+      $show "$command"
+      if $run eval "$command"; then :
+      else
+	$run $rm $removelist
+	exit 1
+      fi
+
+      if test "$need_locks" = warn &&
+	 test x"`cat $lockfile 2>/dev/null`" != x"$srcfile"; then
+	echo "\
+*** ERROR, $lockfile contains:
+`cat $lockfile 2>/dev/null`
+
+but it should contain:
+$srcfile
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together.  If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+	$run $rm $removelist
+	exit 1
+      fi
+
+      # Just move the object if needed
+      if test x"$output_obj" != x"$obj"; then
+	$show "$mv $output_obj $obj"
+	if $run $mv $output_obj $obj; then :
+	else
+	  error=$?
+	  $run $rm $removelist
+	  exit $error
+	fi
+      fi
+
+      # Create an invalid libtool object if no PIC, so that we do not
+      # accidentally link it into a program.
+      if test "$build_libtool_libs" != yes; then
+	$show "echo timestamp > $libobj"
+	$run eval "echo timestamp > \$libobj" || exit $?
+      else
+	# Move the .lo from within objdir
+	$show "$mv $libobj $lo_libobj"
+	if $run $mv $libobj $lo_libobj; then :
+	else
+	  error=$?
+	  $run $rm $removelist
+	  exit $error
+	fi
+      fi
+    fi
+
+    # Unlock the critical section if it was locked
+    if test "$need_locks" != no; then
+      $run $rm "$lockfile"
+    fi
+
+    exit 0
+    ;;
+
+  # libtool link mode
+  link | relink)
+    modename="$modename: link"
+    case $host in
+    *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+      # It is impossible to link a dll without this setting, and
+      # we shouldn't force the makefile maintainer to figure out
+      # which system we are compiling for in order to pass an extra
+      # flag for every libtool invokation.
+      # allow_undefined=no
+
+      # FIXME: Unfortunately, there are problems with the above when trying
+      # to make a dll which has undefined symbols, in which case not
+      # even a static library is built.  For now, we need to specify
+      # -no-undefined on the libtool link line when we can be certain
+      # that all symbols are satisfied, otherwise we get a static library.
+      allow_undefined=yes
+      ;;
+    *)
+      allow_undefined=yes
+      ;;
+    esac
+    libtool_args="$nonopt"
+    compile_command="$nonopt"
+    finalize_command="$nonopt"
+
+    compile_rpath=
+    finalize_rpath=
+    compile_shlibpath=
+    finalize_shlibpath=
+    convenience=
+    old_convenience=
+    deplibs=
+    old_deplibs=
+    compiler_flags=
+    linker_flags=
+    dllsearchpath=
+    lib_search_path=`pwd`
+
+    avoid_version=no
+    dlfiles=
+    dlprefiles=
+    dlself=no
+    export_dynamic=no
+    export_symbols=
+    export_symbols_regex=
+    generated=
+    libobjs=
+    ltlibs=
+    module=no
+    no_install=no
+    objs=
+    prefer_static_libs=no
+    preload=no
+    prev=
+    prevarg=
+    release=
+    rpath=
+    xrpath=
+    perm_rpath=
+    temp_rpath=
+    thread_safe=no
+    vinfo=
+
+    # We need to know -static, to get the right output filenames.
+    for arg
+    do
+      case $arg in
+      -all-static | -static)
+	if test "X$arg" = "X-all-static"; then
+	  if test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then
+	    $echo "$modename: warning: complete static linking is impossible in this configuration" 1>&2
+	  fi
+	  if test -n "$link_static_flag"; then
+	    dlopen_self=$dlopen_self_static
+	  fi
+	else
+	  if test -z "$pic_flag" && test -n "$link_static_flag"; then
+	    dlopen_self=$dlopen_self_static
+	  fi
+	fi
+	build_libtool_libs=no
+	build_old_libs=yes
+	prefer_static_libs=yes
+	break
+	;;
+      esac
+    done
+
+    # See if our shared archives depend on static archives.
+    test -n "$old_archive_from_new_cmds" && build_old_libs=yes
+
+    # Go through the arguments, transforming them on the way.
+    while test $# -gt 0; do
+      arg="$1"
+      shift
+      case $arg in
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	qarg=\"`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`\" ### testsuite: skip nested quoting test
+	;;
+      *) qarg=$arg ;;
+      esac
+      libtool_args="$libtool_args $qarg"
+
+      # If the previous option needs an argument, assign it.
+      if test -n "$prev"; then
+	case $prev in
+	output)
+	  compile_command="$compile_command @OUTPUT@"
+	  finalize_command="$finalize_command @OUTPUT@"
+	  ;;
+	esac
+
+	case $prev in
+	dlfiles|dlprefiles)
+	  if test "$preload" = no; then
+	    # Add the symbol object into the linking commands.
+	    compile_command="$compile_command @SYMFILE@"
+	    finalize_command="$finalize_command @SYMFILE@"
+	    preload=yes
+	  fi
+	  case $arg in
+	  *.la | *.lo) ;;  # We handle these cases below.
+	  force)
+	    if test "$dlself" = no; then
+	      dlself=needless
+	      export_dynamic=yes
+	    fi
+	    prev=
+	    continue
+	    ;;
+	  self)
+	    if test "$prev" = dlprefiles; then
+	      dlself=yes
+	    elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then
+	      dlself=yes
+	    else
+	      dlself=needless
+	      export_dynamic=yes
+	    fi
+	    prev=
+	    continue
+	    ;;
+	  *)
+	    if test "$prev" = dlfiles; then
+	      dlfiles="$dlfiles $arg"
+	    else
+	      dlprefiles="$dlprefiles $arg"
+	    fi
+	    prev=
+	    continue
+	    ;;
+	  esac
+	  ;;
+	expsyms)
+	  export_symbols="$arg"
+	  if test ! -f "$arg"; then
+	    $echo "$modename: symbol file \`$arg' does not exist"
+	    exit 1
+	  fi
+	  prev=
+	  continue
+	  ;;
+	expsyms_regex)
+	  export_symbols_regex="$arg"
+	  prev=
+	  continue
+	  ;;
+	release)
+	  release="-$arg"
+	  prev=
+	  continue
+	  ;;
+	rpath | xrpath)
+	  # We need an absolute path.
+	  case $arg in
+	  [\\/]* | [A-Za-z]:[\\/]*) ;;
+	  *)
+	    $echo "$modename: only absolute run-paths are allowed" 1>&2
+	    exit 1
+	    ;;
+	  esac
+	  if test "$prev" = rpath; then
+	    case "$rpath " in
+	    *" $arg "*) ;;
+	    *) rpath="$rpath $arg" ;;
+	    esac
+	  else
+	    case "$xrpath " in
+	    *" $arg "*) ;;
+	    *) xrpath="$xrpath $arg" ;;
+	    esac
+	  fi
+	  prev=
+	  continue
+	  ;;
+	xcompiler)
+	  compiler_flags="$compiler_flags $qarg"
+	  prev=
+	  compile_command="$compile_command $qarg"
+	  finalize_command="$finalize_command $qarg"
+	  continue
+	  ;;
+	xlinker)
+	  linker_flags="$linker_flags $qarg"
+	  compiler_flags="$compiler_flags $wl$qarg"
+	  prev=
+	  compile_command="$compile_command $wl$qarg"
+	  finalize_command="$finalize_command $wl$qarg"
+	  continue
+	  ;;
+	*)
+	  eval "$prev=\"\$arg\""
+	  prev=
+	  continue
+	  ;;
+	esac
+      fi # test -n $prev
+
+      prevarg="$arg"
+
+      case $arg in
+      -all-static)
+	if test -n "$link_static_flag"; then
+	  compile_command="$compile_command $link_static_flag"
+	  finalize_command="$finalize_command $link_static_flag"
+	fi
+	continue
+	;;
+
+      -allow-undefined)
+	# FIXME: remove this flag sometime in the future.
+	$echo "$modename: \`-allow-undefined' is deprecated because it is the default" 1>&2
+	continue
+	;;
+
+      -avoid-version)
+	avoid_version=yes
+	continue
+	;;
+
+      -dlopen)
+	prev=dlfiles
+	continue
+	;;
+
+      -dlpreopen)
+	prev=dlprefiles
+	continue
+	;;
+
+      -export-dynamic)
+	export_dynamic=yes
+	continue
+	;;
+
+      -export-symbols | -export-symbols-regex)
+	if test -n "$export_symbols" || test -n "$export_symbols_regex"; then
+	  $echo "$modename: more than one -exported-symbols argument is not allowed"
+	  exit 1
+	fi
+	if test "X$arg" = "X-export-symbols"; then
+	  prev=expsyms
+	else
+	  prev=expsyms_regex
+	fi
+	continue
+	;;
+
+      # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:*
+      # so, if we see these flags be careful not to treat them like -L
+      -L[A-Z][A-Z]*:*)
+	case $with_gcc/$host in
+	no/*-*-irix* | no/*-*-nonstopux*)
+	  compile_command="$compile_command $arg"
+	  finalize_command="$finalize_command $arg"
+	  ;;
+	esac
+	continue
+	;;
+
+      -L*)
+	dir=`$echo "X$arg" | $Xsed -e 's/^-L//'`
+	# We need an absolute path.
+	case $dir in
+	[\\/]* | [A-Za-z]:[\\/]*) ;;
+	*)
+	  absdir=`cd "$dir" && pwd`
+	  if test -z "$absdir"; then
+	    $echo "$modename: cannot determine absolute directory name of \`$dir'" 1>&2
+	    exit 1
+	  fi
+	  dir="$absdir"
+	  ;;
+	esac
+	case "$deplibs " in
+	*" -L$dir "*) ;;
+	*)
+	  deplibs="$deplibs -L$dir"
+	  lib_search_path="$lib_search_path $dir"
+	  ;;
+	esac
+	case $host in
+	*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+	  case :$dllsearchpath: in
+	  *":$dir:"*) ;;
+	  *) dllsearchpath="$dllsearchpath:$dir";;
+	  esac
+	  ;;
+	esac
+	continue
+	;;
+
+      -l*)
+	if test "X$arg" = "X-lc" || test "X$arg" = "X-lm"; then
+	  case $host in
+	  *-*-cygwin* | *-*-pw32* | *-*-beos*)
+	    # These systems don't actually have a C or math library (as such)
+	    continue
+	    ;;
+	  *-*-mingw* | *-*-os2*)
+	    # These systems don't actually have a C library (as such)
+	    test "X$arg" = "X-lc" && continue
+	    ;;
+	  *-*-openbsd* | *-*-freebsd*)
+	    # Do not include libc due to us having libc/libc_r.
+	    test "X$arg" = "X-lc" && continue
+	    ;;
+	  esac
+	 elif test "X$arg" = "X-lc_r"; then
+	  case $host in
+	 *-*-openbsd* | *-*-freebsd*)
+	    # Do not include libc_r directly, use -pthread flag.
+	    continue
+	    ;;
+	  esac
+	fi
+	deplibs="$deplibs $arg"
+	continue
+	;;
+
+      -module)
+	module=yes
+	continue
+	;;
+
+      -no-fast-install)
+	fast_install=no
+	continue
+	;;
+
+      -no-install)
+	case $host in
+	*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+	  # The PATH hackery in wrapper scripts is required on Windows
+	  # in order for the loader to find any dlls it needs.
+	  $echo "$modename: warning: \`-no-install' is ignored for $host" 1>&2
+	  $echo "$modename: warning: assuming \`-no-fast-install' instead" 1>&2
+	  fast_install=no
+	  ;;
+	*) no_install=yes ;;
+	esac
+	continue
+	;;
+
+      -no-undefined)
+	allow_undefined=no
+	continue
+	;;
+
+      -o) prev=output ;;
+
+      -release)
+	prev=release
+	continue
+	;;
+
+      -rpath)
+	prev=rpath
+	continue
+	;;
+
+      -R)
+	prev=xrpath
+	continue
+	;;
+
+      -R*)
+	dir=`$echo "X$arg" | $Xsed -e 's/^-R//'`
+	# We need an absolute path.
+	case $dir in
+	[\\/]* | [A-Za-z]:[\\/]*) ;;
+	*)
+	  $echo "$modename: only absolute run-paths are allowed" 1>&2
+	  exit 1
+	  ;;
+	esac
+	case "$xrpath " in
+	*" $dir "*) ;;
+	*) xrpath="$xrpath $dir" ;;
+	esac
+	continue
+	;;
+
+      -static)
+	# The effects of -static are defined in a previous loop.
+	# We used to do the same as -all-static on platforms that
+	# didn't have a PIC flag, but the assumption that the effects
+	# would be equivalent was wrong.  It would break on at least
+	# Digital Unix and AIX.
+	continue
+	;;
+
+      -thread-safe)
+	thread_safe=yes
+	continue
+	;;
+
+      -version-info)
+	prev=vinfo
+	continue
+	;;
+
+      -Wc,*)
+	args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wc,//'`
+	arg=
+	save_ifs="$IFS"; IFS=','
+	for flag in $args; do
+	  IFS="$save_ifs"
+	  case $flag in
+	    *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	    flag="\"$flag\""
+	    ;;
+	  esac
+	  arg="$arg $wl$flag"
+	  compiler_flags="$compiler_flags $flag"
+	done
+	IFS="$save_ifs"
+	arg=`$echo "X$arg" | $Xsed -e "s/^ //"`
+	;;
+
+      -Wl,*)
+	args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wl,//'`
+	arg=
+	save_ifs="$IFS"; IFS=','
+	for flag in $args; do
+	  IFS="$save_ifs"
+	  case $flag in
+	    *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	    flag="\"$flag\""
+	    ;;
+	  esac
+	  arg="$arg $wl$flag"
+	  compiler_flags="$compiler_flags $wl$flag"
+	  linker_flags="$linker_flags $flag"
+	done
+	IFS="$save_ifs"
+	arg=`$echo "X$arg" | $Xsed -e "s/^ //"`
+	;;
+
+      -Xcompiler)
+	prev=xcompiler
+	continue
+	;;
+
+      -Xlinker)
+	prev=xlinker
+	continue
+	;;
+
+      # Some other compiler flag.
+      -* | +*)
+	# Unknown arguments in both finalize_command and compile_command need
+	# to be aesthetically quoted because they are evaled later.
+	arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+	case $arg in
+	*[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	  arg="\"$arg\""
+	  ;;
+	esac
+	;;
+
+      *.lo | *.$objext)
+	# A library or standard object.
+	if test "$prev" = dlfiles; then
+	  # This file was specified with -dlopen.
+	  if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then
+	    dlfiles="$dlfiles $arg"
+	    prev=
+	    continue
+	  else
+	    # If libtool objects are unsupported, then we need to preload.
+	    prev=dlprefiles
+	  fi
+	fi
+
+	if test "$prev" = dlprefiles; then
+	  # Preload the old-style object.
+	  dlprefiles="$dlprefiles "`$echo "X$arg" | $Xsed -e "$lo2o"`
+	  prev=
+	else
+	  case $arg in
+	  *.lo) libobjs="$libobjs $arg" ;;
+	  *) objs="$objs $arg" ;;
+	  esac
+	fi
+	;;
+
+      *.$libext)
+	# An archive.
+	deplibs="$deplibs $arg"
+	old_deplibs="$old_deplibs $arg"
+	continue
+	;;
+
+      *.la)
+	# A libtool-controlled library.
+
+	if test "$prev" = dlfiles; then
+	  # This library was specified with -dlopen.
+	  dlfiles="$dlfiles $arg"
+	  prev=
+	elif test "$prev" = dlprefiles; then
+	  # The library was specified with -dlpreopen.
+	  dlprefiles="$dlprefiles $arg"
+	  prev=
+	else
+	  deplibs="$deplibs $arg"
+	fi
+	continue
+	;;
+
+      # Some other compiler argument.
+      *)
+	# Unknown arguments in both finalize_command and compile_command need
+	# to be aesthetically quoted because they are evaled later.
+	arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+	case $arg in
+	*[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	  arg="\"$arg\""
+	  ;;
+	esac
+	;;
+      esac # arg
+
+      # Now actually substitute the argument into the commands.
+      if test -n "$arg"; then
+	compile_command="$compile_command $arg"
+	finalize_command="$finalize_command $arg"
+      fi
+    done # argument parsing loop
+
+    if test -n "$prev"; then
+      $echo "$modename: the \`$prevarg' option requires an argument" 1>&2
+      $echo "$help" 1>&2
+      exit 1
+    fi
+
+    if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then
+      eval arg=\"$export_dynamic_flag_spec\"
+      compile_command="$compile_command $arg"
+      finalize_command="$finalize_command $arg"
+    fi
+
+    # calculate the name of the file, without its directory
+    outputname=`$echo "X$output" | $Xsed -e 's%^.*/%%'`
+    libobjs_save="$libobjs"
+
+    if test -n "$shlibpath_var"; then
+      # get the directories listed in $shlibpath_var
+      eval shlib_search_path=\`\$echo \"X\${$shlibpath_var}\" \| \$Xsed -e \'s/:/ /g\'\`
+    else
+      shlib_search_path=
+    fi
+    eval sys_lib_search_path=\"$sys_lib_search_path_spec\"
+    eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\"
+
+    output_objdir=`$echo "X$output" | $Xsed -e 's%/[^/]*$%%'`
+    if test "X$output_objdir" = "X$output"; then
+      output_objdir="$objdir"
+    else
+      output_objdir="$output_objdir/$objdir"
+    fi
+    # Create the object directory.
+    if test ! -d $output_objdir; then
+      $show "$mkdir $output_objdir"
+      $run $mkdir $output_objdir
+      status=$?
+      if test $status -ne 0 && test ! -d $output_objdir; then
+	exit $status
+      fi
+    fi
+
+    # Determine the type of output
+    case $output in
+    "")
+      $echo "$modename: you must specify an output file" 1>&2
+      $echo "$help" 1>&2
+      exit 1
+      ;;
+    *.$libext) linkmode=oldlib ;;
+    *.lo | *.$objext) linkmode=obj ;;
+    *.la) linkmode=lib ;;
+    *) linkmode=prog ;; # Anything else should be a program.
+    esac
+
+    specialdeplibs=
+    libs=
+    # Find all interdependent deplibs by searching for libraries
+    # that are linked more than once (e.g. -la -lb -la)
+    for deplib in $deplibs; do
+      if test "X$duplicate_deps" = "Xyes" ; then
+	case "$libs " in
+	*" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+	esac
+      fi
+      libs="$libs $deplib"
+    done
+    deplibs=
+    newdependency_libs=
+    newlib_search_path=
+    need_relink=no # whether we're linking any uninstalled libtool libraries
+    notinst_deplibs= # not-installed libtool libraries
+    notinst_path= # paths that contain not-installed libtool libraries
+    case $linkmode in
+    lib)
+	passes="conv link"
+	for file in $dlfiles $dlprefiles; do
+	  case $file in
+	  *.la) ;;
+	  *)
+	    $echo "$modename: libraries can \`-dlopen' only libtool libraries: $file" 1>&2
+	    exit 1
+	    ;;
+	  esac
+	done
+	;;
+    prog)
+	compile_deplibs=
+	finalize_deplibs=
+	alldeplibs=no
+	newdlfiles=
+	newdlprefiles=
+	passes="conv scan dlopen dlpreopen link"
+	;;
+    *)  passes="conv"
+	;;
+    esac
+    for pass in $passes; do
+      if test $linkmode = prog; then
+	# Determine which files to process
+	case $pass in
+	dlopen)
+	  libs="$dlfiles"
+	  save_deplibs="$deplibs" # Collect dlpreopened libraries
+	  deplibs=
+	  ;;
+	dlpreopen) libs="$dlprefiles" ;;
+	link) libs="$deplibs %DEPLIBS% $dependency_libs" ;;
+	esac
+      fi
+      for deplib in $libs; do
+	lib=
+	found=no
+	case $deplib in
+	-l*)
+	  if test $linkmode = oldlib && test $linkmode = obj; then
+	    $echo "$modename: warning: \`-l' is ignored for archives/objects: $deplib" 1>&2
+	    continue
+	  fi
+	  if test $pass = conv; then
+	    deplibs="$deplib $deplibs"
+	    continue
+	  fi
+	  name=`$echo "X$deplib" | $Xsed -e 's/^-l//'`
+	  for searchdir in $newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path; do
+	    # Search the libtool library
+	    lib="$searchdir/lib${name}.la"
+	    if test -f "$lib"; then
+	      found=yes
+	      break
+	    fi
+	  done
+	  if test "$found" != yes; then
+	    # deplib doesn't seem to be a libtool library
+	    if test "$linkmode,$pass" = "prog,link"; then
+	      compile_deplibs="$deplib $compile_deplibs"
+	      finalize_deplibs="$deplib $finalize_deplibs"
+	    else
+	      deplibs="$deplib $deplibs"
+	      test $linkmode = lib && newdependency_libs="$deplib $newdependency_libs"
+	    fi
+	    continue
+	  fi
+	  ;; # -l
+	-L*)
+	  case $linkmode in
+	  lib)
+	    deplibs="$deplib $deplibs"
+	    test $pass = conv && continue
+	    newdependency_libs="$deplib $newdependency_libs"
+	    newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`
+	    ;;
+	  prog)
+	    if test $pass = conv; then
+	      deplibs="$deplib $deplibs"
+	      continue
+	    fi
+	    if test $pass = scan; then
+	      deplibs="$deplib $deplibs"
+	      newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`
+	    else
+	      compile_deplibs="$deplib $compile_deplibs"
+	      finalize_deplibs="$deplib $finalize_deplibs"
+	    fi
+	    ;;
+	  *)
+	    $echo "$modename: warning: \`-L' is ignored for archives/objects: $deplib" 1>&2
+	    ;;
+	  esac # linkmode
+	  continue
+	  ;; # -L
+	-R*)
+	  if test $pass = link; then
+	    dir=`$echo "X$deplib" | $Xsed -e 's/^-R//'`
+	    # Make sure the xrpath contains only unique directories.
+	    case "$xrpath " in
+	    *" $dir "*) ;;
+	    *) xrpath="$xrpath $dir" ;;
+	    esac
+	  fi
+	  deplibs="$deplib $deplibs"
+	  continue
+	  ;;
+	*.la) lib="$deplib" ;;
+	*.$libext)
+	  if test $pass = conv; then
+	    deplibs="$deplib $deplibs"
+	    continue
+	  fi
+	  case $linkmode in
+	  lib)
+	    if test "$deplibs_check_method" != pass_all; then
+	      echo
+	      echo "*** Warning: Trying to link with static lib archive $deplib."
+	      echo "*** I have the capability to make that library automatically link in when"
+	      echo "*** you link to this library.  But I can only do this if you have a"
+	      echo "*** shared version of the library, which you do not appear to have"
+	      echo "*** because the file extensions .$libext of this argument makes me believe"
+	      echo "*** that it is just a static archive that I should not used here."
+	    else
+	      echo
+	      echo "*** Warning: Linking the shared library $output against the"
+	      echo "*** static library $deplib is not portable!"
+	      deplibs="$deplib $deplibs"
+	    fi
+	    continue
+	    ;;
+	  prog)
+	    if test $pass != link; then
+	      deplibs="$deplib $deplibs"
+	    else
+	      compile_deplibs="$deplib $compile_deplibs"
+	      finalize_deplibs="$deplib $finalize_deplibs"
+	    fi
+	    continue
+	    ;;
+	  esac # linkmode
+	  ;; # *.$libext
+	*.lo | *.$objext)
+	  if test $pass = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then
+	    # If there is no dlopen support or we're linking statically,
+	    # we need to preload.
+	    newdlprefiles="$newdlprefiles $deplib"
+	    compile_deplibs="$deplib $compile_deplibs"
+	    finalize_deplibs="$deplib $finalize_deplibs"
+	  else
+	    newdlfiles="$newdlfiles $deplib"
+	  fi
+	  continue
+	  ;;
+	%DEPLIBS%)
+	  alldeplibs=yes
+	  continue
+	  ;;
+	esac # case $deplib
+	if test $found = yes || test -f "$lib"; then :
+	else
+	  $echo "$modename: cannot find the library \`$lib'" 1>&2
+	  exit 1
+	fi
+
+	# Check to see that this really is a libtool archive.
+	if (sed -e '2q' $lib | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then :
+	else
+	  $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+	  exit 1
+	fi
+
+	ladir=`$echo "X$lib" | $Xsed -e 's%/[^/]*$%%'`
+	test "X$ladir" = "X$lib" && ladir="."
+
+	dlname=
+	dlopen=
+	dlpreopen=
+	libdir=
+	library_names=
+	old_library=
+	# If the library was installed with an old release of libtool,
+	# it will not redefine variable installed.
+	installed=yes
+
+	# Read the .la file
+	case $lib in
+	*/* | *\\*) . $lib ;;
+	*) . ./$lib ;;
+	esac
+
+	if test "$linkmode,$pass" = "lib,link" ||
+	   test "$linkmode,$pass" = "prog,scan" ||
+	   { test $linkmode = oldlib && test $linkmode = obj; }; then
+	   # Add dl[pre]opened files of deplib
+	  test -n "$dlopen" && dlfiles="$dlfiles $dlopen"
+	  test -n "$dlpreopen" && dlprefiles="$dlprefiles $dlpreopen"
+	fi
+
+	if test $pass = conv; then
+	  # Only check for convenience libraries
+	  deplibs="$lib $deplibs"
+	  if test -z "$libdir"; then
+	    if test -z "$old_library"; then
+	      $echo "$modename: cannot find name of link library for \`$lib'" 1>&2
+	      exit 1
+	    fi
+	    # It is a libtool convenience library, so add in its objects.
+	    convenience="$convenience $ladir/$objdir/$old_library"
+	    old_convenience="$old_convenience $ladir/$objdir/$old_library"
+	    tmp_libs=
+	    for deplib in $dependency_libs; do
+	      deplibs="$deplib $deplibs"
+              if test "X$duplicate_deps" = "Xyes" ; then
+	        case "$tmp_libs " in
+	        *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+	        esac
+              fi
+	      tmp_libs="$tmp_libs $deplib"
+	    done
+	  elif test $linkmode != prog && test $linkmode != lib; then
+	    $echo "$modename: \`$lib' is not a convenience library" 1>&2
+	    exit 1
+	  fi
+	  continue
+	fi # $pass = conv
+
+	# Get the name of the library we link against.
+	linklib=
+	for l in $old_library $library_names; do
+	  linklib="$l"
+	done
+	if test -z "$linklib"; then
+	  $echo "$modename: cannot find name of link library for \`$lib'" 1>&2
+	  exit 1
+	fi
+
+	# This library was specified with -dlopen.
+	if test $pass = dlopen; then
+	  if test -z "$libdir"; then
+	    $echo "$modename: cannot -dlopen a convenience library: \`$lib'" 1>&2
+	    exit 1
+	  fi
+	  if test -z "$dlname" || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then
+	    # If there is no dlname, no dlopen support or we're linking
+	    # statically, we need to preload.
+	    dlprefiles="$dlprefiles $lib"
+	  else
+	    newdlfiles="$newdlfiles $lib"
+	  fi
+	  continue
+	fi # $pass = dlopen
+
+	# We need an absolute path.
+	case $ladir in
+	[\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;;
+	*)
+	  abs_ladir=`cd "$ladir" && pwd`
+	  if test -z "$abs_ladir"; then
+	    $echo "$modename: warning: cannot determine absolute directory name of \`$ladir'" 1>&2
+	    $echo "$modename: passing it literally to the linker, although it might fail" 1>&2
+	    abs_ladir="$ladir"
+	  fi
+	  ;;
+	esac
+	laname=`$echo "X$lib" | $Xsed -e 's%^.*/%%'`
+
+	# Find the relevant object directory and library name.
+	if test "X$installed" = Xyes; then
+	  if test ! -f "$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then
+	    $echo "$modename: warning: library \`$lib' was moved." 1>&2
+	    dir="$ladir"
+	    absdir="$abs_ladir"
+	    libdir="$abs_ladir"
+	  else
+	    dir="$libdir"
+	    absdir="$libdir"
+	  fi
+	else
+	  dir="$ladir/$objdir"
+	  absdir="$abs_ladir/$objdir"
+	  # Remove this search path later
+	  notinst_path="$notinst_path $abs_ladir"
+	fi # $installed = yes
+	name=`$echo "X$laname" | $Xsed -e 's/\.la$//' -e 's/^lib//'`
+
+	# This library was specified with -dlpreopen.
+	if test $pass = dlpreopen; then
+	  if test -z "$libdir"; then
+	    $echo "$modename: cannot -dlpreopen a convenience library: \`$lib'" 1>&2
+	    exit 1
+	  fi
+	  # Prefer using a static library (so that no silly _DYNAMIC symbols
+	  # are required to link).
+	  if test -n "$old_library"; then
+	    newdlprefiles="$newdlprefiles $dir/$old_library"
+	  # Otherwise, use the dlname, so that lt_dlopen finds it.
+	  elif test -n "$dlname"; then
+	    newdlprefiles="$newdlprefiles $dir/$dlname"
+	  else
+	    newdlprefiles="$newdlprefiles $dir/$linklib"
+	  fi
+	fi # $pass = dlpreopen
+
+	if test -z "$libdir"; then
+	  # Link the convenience library
+	  if test $linkmode = lib; then
+	    deplibs="$dir/$old_library $deplibs"
+	  elif test "$linkmode,$pass" = "prog,link"; then
+	    compile_deplibs="$dir/$old_library $compile_deplibs"
+	    finalize_deplibs="$dir/$old_library $finalize_deplibs"
+	  else
+	    deplibs="$lib $deplibs"
+	  fi
+	  continue
+	fi
+
+	if test $linkmode = prog && test $pass != link; then
+	  newlib_search_path="$newlib_search_path $ladir"
+	  deplibs="$lib $deplibs"
+
+	  linkalldeplibs=no
+	  if test "$link_all_deplibs" != no || test -z "$library_names" ||
+	     test "$build_libtool_libs" = no; then
+	    linkalldeplibs=yes
+	  fi
+
+	  tmp_libs=
+	  for deplib in $dependency_libs; do
+	    case $deplib in
+	    -L*) newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`;; ### testsuite: skip nested quoting test
+	    esac
+	    # Need to link against all dependency_libs?
+	    if test $linkalldeplibs = yes; then
+	      deplibs="$deplib $deplibs"
+	    else
+	      # Need to hardcode shared library paths
+	      # or/and link against static libraries
+	      newdependency_libs="$deplib $newdependency_libs"
+	    fi
+	    if test "X$duplicate_deps" = "Xyes" ; then
+	      case "$tmp_libs " in
+	      *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+	      esac
+	    fi
+	    tmp_libs="$tmp_libs $deplib"
+	  done # for deplib
+	  continue
+	fi # $linkmode = prog...
+
+	link_static=no # Whether the deplib will be linked statically
+	if test -n "$library_names" &&
+	   { test "$prefer_static_libs" = no || test -z "$old_library"; }; then
+	  # Link against this shared library
+
+	  if test "$linkmode,$pass" = "prog,link" ||
+	   { test $linkmode = lib && test $hardcode_into_libs = yes; }; then
+	    # Hardcode the library path.
+	    # Skip directories that are in the system default run-time
+	    # search path.
+	    case " $sys_lib_dlsearch_path " in
+	    *" $absdir "*) ;;
+	    *)
+	      case "$compile_rpath " in
+	      *" $absdir "*) ;;
+	      *) compile_rpath="$compile_rpath $absdir"
+	      esac
+	      ;;
+	    esac
+	    case " $sys_lib_dlsearch_path " in
+	    *" $libdir "*) ;;
+	    *)
+	      case "$finalize_rpath " in
+	      *" $libdir "*) ;;
+	      *) finalize_rpath="$finalize_rpath $libdir"
+	      esac
+	      ;;
+	    esac
+	    if test $linkmode = prog; then
+	      # We need to hardcode the library path
+	      if test -n "$shlibpath_var"; then
+		# Make sure the rpath contains only unique directories.
+		case "$temp_rpath " in
+		*" $dir "*) ;;
+		*" $absdir "*) ;;
+		*) temp_rpath="$temp_rpath $dir" ;;
+		esac
+	      fi
+	    fi
+	  fi # $linkmode,$pass = prog,link...
+
+	  if test "$alldeplibs" = yes &&
+	     { test "$deplibs_check_method" = pass_all ||
+	       { test "$build_libtool_libs" = yes &&
+		 test -n "$library_names"; }; }; then
+	    # We only need to search for static libraries
+	    continue
+	  fi
+
+	  if test "$installed" = no; then
+	    notinst_deplibs="$notinst_deplibs $lib"
+	    need_relink=yes
+	  fi
+
+	  if test -n "$old_archive_from_expsyms_cmds"; then
+	    # figure out the soname
+	    set dummy $library_names
+	    realname="$2"
+	    shift; shift
+	    libname=`eval \\$echo \"$libname_spec\"`
+	    # use dlname if we got it. it's perfectly good, no?
+	    if test -n "$dlname"; then
+	      soname="$dlname"
+	    elif test -n "$soname_spec"; then
+	      # bleh windows
+	      case $host in
+	      *cygwin*)
+		major=`expr $current - $age`
+		versuffix="-$major"
+		;;
+	      esac
+	      eval soname=\"$soname_spec\"
+	    else
+	      soname="$realname"
+	    fi
+
+	    # Make a new name for the extract_expsyms_cmds to use
+	    soroot="$soname"
+	    soname=`echo $soroot | sed -e 's/^.*\///'`
+	    newlib="libimp-`echo $soname | sed 's/^lib//;s/\.dll$//'`.a"
+
+	    # If the library has no export list, then create one now
+	    if test -f "$output_objdir/$soname-def"; then :
+	    else
+	      $show "extracting exported symbol list from \`$soname'"
+	      save_ifs="$IFS"; IFS='~'
+	      eval cmds=\"$extract_expsyms_cmds\"
+	      for cmd in $cmds; do
+		IFS="$save_ifs"
+		$show "$cmd"
+		$run eval "$cmd" || exit $?
+	      done
+	      IFS="$save_ifs"
+	    fi
+
+	    # Create $newlib
+	    if test -f "$output_objdir/$newlib"; then :; else
+	      $show "generating import library for \`$soname'"
+	      save_ifs="$IFS"; IFS='~'
+	      eval cmds=\"$old_archive_from_expsyms_cmds\"
+	      for cmd in $cmds; do
+		IFS="$save_ifs"
+		$show "$cmd"
+		$run eval "$cmd" || exit $?
+	      done
+	      IFS="$save_ifs"
+	    fi
+	    # make sure the library variables are pointing to the new library
+	    dir=$output_objdir
+	    linklib=$newlib
+	  fi # test -n $old_archive_from_expsyms_cmds
+
+	  if test $linkmode = prog || test "$mode" != relink; then
+	    add_shlibpath=
+	    add_dir=
+	    add=
+	    lib_linked=yes
+	    case $hardcode_action in
+	    immediate | unsupported)
+	      if test "$hardcode_direct" = no; then
+		add="$dir/$linklib"
+	      elif test "$hardcode_minus_L" = no; then
+		case $host in
+		*-*-sunos*) add_shlibpath="$dir" ;;
+		esac
+		add_dir="-L$dir"
+		add="-l$name"
+	      elif test "$hardcode_shlibpath_var" = no; then
+		add_shlibpath="$dir"
+		add="-l$name"
+	      else
+		lib_linked=no
+	      fi
+	      ;;
+	    relink)
+	      if test "$hardcode_direct" = yes; then
+		add="$dir/$linklib"
+	      elif test "$hardcode_minus_L" = yes; then
+		add_dir="-L$dir"
+		add="-l$name"
+	      elif test "$hardcode_shlibpath_var" = yes; then
+		add_shlibpath="$dir"
+		add="-l$name"
+	      else
+		lib_linked=no
+	      fi
+	      ;;
+	    *) lib_linked=no ;;
+	    esac
+
+	    if test "$lib_linked" != yes; then
+	      $echo "$modename: configuration error: unsupported hardcode properties"
+	      exit 1
+	    fi
+
+	    if test -n "$add_shlibpath"; then
+	      case :$compile_shlibpath: in
+	      *":$add_shlibpath:"*) ;;
+	      *) compile_shlibpath="$compile_shlibpath$add_shlibpath:" ;;
+	      esac
+	    fi
+	    if test $linkmode = prog; then
+	      test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs"
+	      test -n "$add" && compile_deplibs="$add $compile_deplibs"
+	    else
+	      test -n "$add_dir" && deplibs="$add_dir $deplibs"
+	      test -n "$add" && deplibs="$add $deplibs"
+	      if test "$hardcode_direct" != yes && \
+		 test "$hardcode_minus_L" != yes && \
+		 test "$hardcode_shlibpath_var" = yes; then
+		case :$finalize_shlibpath: in
+		*":$libdir:"*) ;;
+		*) finalize_shlibpath="$finalize_shlibpath$libdir:" ;;
+		esac
+	      fi
+	    fi
+	  fi
+
+	  if test $linkmode = prog || test "$mode" = relink; then
+	    add_shlibpath=
+	    add_dir=
+	    add=
+	    # Finalize command for both is simple: just hardcode it.
+	    if test "$hardcode_direct" = yes; then
+	      add="$libdir/$linklib"
+	    elif test "$hardcode_minus_L" = yes; then
+	      add_dir="-L$libdir"
+	      add="-l$name"
+	    elif test "$hardcode_shlibpath_var" = yes; then
+	      case :$finalize_shlibpath: in
+	      *":$libdir:"*) ;;
+	      *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;;
+	      esac
+	      add="-l$name"
+	    else
+	      # We cannot seem to hardcode it, guess we'll fake it.
+	      add_dir="-L$libdir"
+	      add="-l$name"
+	    fi
+
+	    if test $linkmode = prog; then
+	      test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs"
+	      test -n "$add" && finalize_deplibs="$add $finalize_deplibs"
+	    else
+	      test -n "$add_dir" && deplibs="$add_dir $deplibs"
+	      test -n "$add" && deplibs="$add $deplibs"
+	    fi
+	  fi
+	elif test $linkmode = prog; then
+	  if test "$alldeplibs" = yes &&
+	     { test "$deplibs_check_method" = pass_all ||
+	       { test "$build_libtool_libs" = yes &&
+		 test -n "$library_names"; }; }; then
+	    # We only need to search for static libraries
+	    continue
+	  fi
+
+	  # Try to link the static library
+	  # Here we assume that one of hardcode_direct or hardcode_minus_L
+	  # is not unsupported.  This is valid on all known static and
+	  # shared platforms.
+	  if test "$hardcode_direct" != unsupported; then
+	    test -n "$old_library" && linklib="$old_library"
+	    compile_deplibs="$dir/$linklib $compile_deplibs"
+	    finalize_deplibs="$dir/$linklib $finalize_deplibs"
+	  else
+	    compile_deplibs="-l$name -L$dir $compile_deplibs"
+	    finalize_deplibs="-l$name -L$dir $finalize_deplibs"
+	  fi
+	elif test "$build_libtool_libs" = yes; then
+	  # Not a shared library
+	  if test "$deplibs_check_method" != pass_all; then
+	    # We're trying link a shared library against a static one
+	    # but the system doesn't support it.
+
+	    # Just print a warning and add the library to dependency_libs so
+	    # that the program can be linked against the static library.
+	    echo
+	    echo "*** Warning: This system can not link to static lib archive $lib."
+	    echo "*** I have the capability to make that library automatically link in when"
+	    echo "*** you link to this library.  But I can only do this if you have a"
+	    echo "*** shared version of the library, which you do not appear to have."
+	    if test "$module" = yes; then
+	      echo "*** But as you try to build a module library, libtool will still create "
+	      echo "*** a static module, that should work as long as the dlopening application"
+	      echo "*** is linked with the -dlopen flag to resolve symbols at runtime."
+	      if test -z "$global_symbol_pipe"; then
+		echo
+		echo "*** However, this would only work if libtool was able to extract symbol"
+		echo "*** lists from a program, using \`nm' or equivalent, but libtool could"
+		echo "*** not find such a program.  So, this module is probably useless."
+		echo "*** \`nm' from GNU binutils and a full rebuild may help."
+	      fi
+	      if test "$build_old_libs" = no; then
+		build_libtool_libs=module
+		build_old_libs=yes
+	      else
+		build_libtool_libs=no
+	      fi
+	    fi
+	  else
+	    convenience="$convenience $dir/$old_library"
+	    old_convenience="$old_convenience $dir/$old_library"
+	    deplibs="$dir/$old_library $deplibs"
+	    link_static=yes
+	  fi
+	fi # link shared/static library?
+
+	if test $linkmode = lib; then
+	  if test -n "$dependency_libs" &&
+	     { test $hardcode_into_libs != yes || test $build_old_libs = yes ||
+	       test $link_static = yes; }; then
+	    # Extract -R from dependency_libs
+	    temp_deplibs=
+	    for libdir in $dependency_libs; do
+	      case $libdir in
+	      -R*) temp_xrpath=`$echo "X$libdir" | $Xsed -e 's/^-R//'`
+		   case " $xrpath " in
+		   *" $temp_xrpath "*) ;;
+		   *) xrpath="$xrpath $temp_xrpath";;
+		   esac;;
+	      *) temp_deplibs="$temp_deplibs $libdir";;
+	      esac
+	    done
+	    dependency_libs="$temp_deplibs"
+	  fi
+
+	  newlib_search_path="$newlib_search_path $absdir"
+	  # Link against this library
+	  test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs"
+	  # ... and its dependency_libs
+	  tmp_libs=
+	  for deplib in $dependency_libs; do
+	    newdependency_libs="$deplib $newdependency_libs"
+	    if test "X$duplicate_deps" = "Xyes" ; then
+	      case "$tmp_libs " in
+	      *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+	      esac
+	    fi
+	    tmp_libs="$tmp_libs $deplib"
+	  done
+
+	  if test $link_all_deplibs != no; then
+	    # Add the search paths of all dependency libraries
+	    for deplib in $dependency_libs; do
+	      case $deplib in
+	      -L*) path="$deplib" ;;
+	      *.la)
+		dir=`$echo "X$deplib" | $Xsed -e 's%/[^/]*$%%'`
+		test "X$dir" = "X$deplib" && dir="."
+		# We need an absolute path.
+		case $dir in
+		[\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;;
+		*)
+		  absdir=`cd "$dir" && pwd`
+		  if test -z "$absdir"; then
+		    $echo "$modename: warning: cannot determine absolute directory name of \`$dir'" 1>&2
+		    absdir="$dir"
+		  fi
+		  ;;
+		esac
+		if grep "^installed=no" $deplib > /dev/null; then
+		  path="-L$absdir/$objdir"
+		else
+		  eval libdir=`sed -n -e 's/^libdir=\(.*\)$/\1/p' $deplib`
+		  if test -z "$libdir"; then
+		    $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2
+		    exit 1
+		  fi
+		  if test "$absdir" != "$libdir"; then
+		    $echo "$modename: warning: \`$deplib' seems to be moved" 1>&2
+		  fi
+		  path="-L$absdir"
+		fi
+		;;
+	      *) continue ;;
+	      esac
+	      case " $deplibs " in
+	      *" $path "*) ;;
+	      *) deplibs="$deplibs $path" ;;
+	      esac
+	    done
+	  fi # link_all_deplibs != no
+	fi # linkmode = lib
+      done # for deplib in $libs
+      if test $pass = dlpreopen; then
+	# Link the dlpreopened libraries before other libraries
+	for deplib in $save_deplibs; do
+	  deplibs="$deplib $deplibs"
+	done
+      fi
+      if test $pass != dlopen; then
+	test $pass != scan && dependency_libs="$newdependency_libs"
+	if test $pass != conv; then
+	  # Make sure lib_search_path contains only unique directories.
+	  lib_search_path=
+	  for dir in $newlib_search_path; do
+	    case "$lib_search_path " in
+	    *" $dir "*) ;;
+	    *) lib_search_path="$lib_search_path $dir" ;;
+	    esac
+	  done
+	  newlib_search_path=
+	fi
+
+	if test "$linkmode,$pass" != "prog,link"; then
+	  vars="deplibs"
+	else
+	  vars="compile_deplibs finalize_deplibs"
+	fi
+	for var in $vars dependency_libs; do
+	  # Add libraries to $var in reverse order
+	  eval tmp_libs=\"\$$var\"
+	  new_libs=
+	  for deplib in $tmp_libs; do
+	    case $deplib in
+	    -L*) new_libs="$deplib $new_libs" ;;
+	    *)
+	      case " $specialdeplibs " in
+	      *" $deplib "*) new_libs="$deplib $new_libs" ;;
+	      *)
+		case " $new_libs " in
+		*" $deplib "*) ;;
+		*) new_libs="$deplib $new_libs" ;;
+		esac
+		;;
+	      esac
+	      ;;
+	    esac
+	  done
+	  tmp_libs=
+	  for deplib in $new_libs; do
+	    case $deplib in
+	    -L*)
+	      case " $tmp_libs " in
+	      *" $deplib "*) ;;
+	      *) tmp_libs="$tmp_libs $deplib" ;;
+	      esac
+	      ;;
+	    *) tmp_libs="$tmp_libs $deplib" ;;
+	    esac
+	  done
+	  eval $var=\"$tmp_libs\"
+	done # for var
+      fi
+      if test "$pass" = "conv" &&
+       { test "$linkmode" = "lib" || test "$linkmode" = "prog"; }; then
+	libs="$deplibs" # reset libs
+	deplibs=
+      fi
+    done # for pass
+    if test $linkmode = prog; then
+      dlfiles="$newdlfiles"
+      dlprefiles="$newdlprefiles"
+    fi
+
+    case $linkmode in
+    oldlib)
+      if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+	$echo "$modename: warning: \`-dlopen' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$rpath"; then
+	$echo "$modename: warning: \`-rpath' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$xrpath"; then
+	$echo "$modename: warning: \`-R' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$vinfo"; then
+	$echo "$modename: warning: \`-version-info' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$release"; then
+	$echo "$modename: warning: \`-release' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$export_symbols" || test -n "$export_symbols_regex"; then
+	$echo "$modename: warning: \`-export-symbols' is ignored for archives" 1>&2
+      fi
+
+      # Now set the variables for building old libraries.
+      build_libtool_libs=no
+      oldlibs="$output"
+      objs="$objs$old_deplibs"
+      ;;
+
+    lib)
+      # Make sure we only generate libraries of the form `libNAME.la'.
+      case $outputname in
+      lib*)
+	name=`$echo "X$outputname" | $Xsed -e 's/\.la$//' -e 's/^lib//'`
+	eval libname=\"$libname_spec\"
+	;;
+      *)
+	if test "$module" = no; then
+	  $echo "$modename: libtool library \`$output' must begin with \`lib'" 1>&2
+	  $echo "$help" 1>&2
+	  exit 1
+	fi
+	if test "$need_lib_prefix" != no; then
+	  # Add the "lib" prefix for modules if required
+	  name=`$echo "X$outputname" | $Xsed -e 's/\.la$//'`
+	  eval libname=\"$libname_spec\"
+	else
+	  libname=`$echo "X$outputname" | $Xsed -e 's/\.la$//'`
+	fi
+	;;
+      esac
+
+      if test -n "$objs"; then
+	if test "$deplibs_check_method" != pass_all; then
+	  $echo "$modename: cannot build libtool library \`$output' from non-libtool objects on this host:$objs" 2>&1
+	  exit 1
+	else
+	  echo
+	  echo "*** Warning: Linking the shared library $output against the non-libtool"
+	  echo "*** objects $objs is not portable!"
+	  libobjs="$libobjs $objs"
+	fi
+      fi
+
+      if test "$dlself" != no; then
+	$echo "$modename: warning: \`-dlopen self' is ignored for libtool libraries" 1>&2
+      fi
+
+      set dummy $rpath
+      if test $# -gt 2; then
+	$echo "$modename: warning: ignoring multiple \`-rpath's for a libtool library" 1>&2
+      fi
+      install_libdir="$2"
+
+      oldlibs=
+      if test -z "$rpath"; then
+	if test "$build_libtool_libs" = yes; then
+	  # Building a libtool convenience library.
+	  libext=al
+	  oldlibs="$output_objdir/$libname.$libext $oldlibs"
+	  build_libtool_libs=convenience
+	  build_old_libs=yes
+	fi
+
+	if test -n "$vinfo"; then
+	  $echo "$modename: warning: \`-version-info' is ignored for convenience libraries" 1>&2
+	fi
+
+	if test -n "$release"; then
+	  $echo "$modename: warning: \`-release' is ignored for convenience libraries" 1>&2
+	fi
+      else
+
+	# Parse the version information argument.
+	save_ifs="$IFS"; IFS=':'
+	set dummy $vinfo 0 0 0
+	IFS="$save_ifs"
+
+	if test -n "$8"; then
+	  $echo "$modename: too many parameters to \`-version-info'" 1>&2
+	  $echo "$help" 1>&2
+	  exit 1
+	fi
+
+	current="$2"
+	revision="$3"
+	age="$4"
+
+	# Check that each of the things are valid numbers.
+	case $current in
+	0 | [1-9] | [1-9][0-9] | [1-9][0-9][0-9]) ;;
+	*)
+	  $echo "$modename: CURRENT \`$current' is not a nonnegative integer" 1>&2
+	  $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+	  exit 1
+	  ;;
+	esac
+
+	case $revision in
+	0 | [1-9] | [1-9][0-9] | [1-9][0-9][0-9]) ;;
+	*)
+	  $echo "$modename: REVISION \`$revision' is not a nonnegative integer" 1>&2
+	  $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+	  exit 1
+	  ;;
+	esac
+
+	case $age in
+	0 | [1-9] | [1-9][0-9] | [1-9][0-9][0-9]) ;;
+	*)
+	  $echo "$modename: AGE \`$age' is not a nonnegative integer" 1>&2
+	  $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+	  exit 1
+	  ;;
+	esac
+
+	if test $age -gt $current; then
+	  $echo "$modename: AGE \`$age' is greater than the current interface number \`$current'" 1>&2
+	  $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+	  exit 1
+	fi
+
+	# Calculate the version variables.
+	major=
+	versuffix=
+	verstring=
+	case $version_type in
+	none) ;;
+
+	darwin)
+	  # Like Linux, but with the current version available in
+	  # verstring for coding it into the library header
+	  major=.`expr $current - $age`
+	  versuffix="$major.$age.$revision"
+	  # Darwin ld doesn't like 0 for these options...
+	  minor_current=`expr $current + 1`
+	  verstring="-compatibility_version $minor_current -current_version $minor_current.$revision"
+	  ;;
+
+	freebsd-aout)
+	  major=".$current"
+	  versuffix=".$current.$revision";
+	  ;;
+
+	freebsd-elf)
+	  major=".$current"
+	  versuffix=".$current";
+	  ;;
+
+	irix | nonstopux)
+	  case $version_type in
+	    nonstopux) verstring_prefix=nonstopux ;;
+	    *)         verstring_prefix=sgi ;;
+	  esac
+	  verstring="$verstring_prefix$major.$revision"
+
+	  major=`expr $current - $age + 1`
+
+	  # Add in all the interfaces that we are compatible with.
+	  loop=$revision
+	  while test $loop != 0; do
+	    iface=`expr $revision - $loop`
+	    loop=`expr $loop - 1`
+	    verstring="$verstring_prefix$major.$iface:$verstring"
+	  done
+
+	  # Before this point, $major must not contain `.'.
+	  major=.$major
+	  versuffix="$major.$revision"
+	  ;;
+
+	linux)
+	  major=.`expr $current - $age`
+	  versuffix="$major.$age.$revision"
+	  ;;
+
+	osf)
+	  major=`expr $current - $age`
+	  versuffix=".$current.$age.$revision"
+	  verstring="$current.$age.$revision"
+
+	  # Add in all the interfaces that we are compatible with.
+	  loop=$age
+	  while test $loop != 0; do
+	    iface=`expr $current - $loop`
+	    loop=`expr $loop - 1`
+	    verstring="$verstring:${iface}.0"
+	  done
+
+	  # Make executables depend on our current version.
+	  verstring="$verstring:${current}.0"
+	  ;;
+
+	sunos)
+	  major=".$current"
+	  versuffix=".$current.$revision"
+	  ;;
+
+	windows)
+	  # Use '-' rather than '.', since we only want one
+	  # extension on DOS 8.3 filesystems.
+	  major=`expr $current - $age`
+	  versuffix="-$major"
+	  ;;
+
+	*)
+	  $echo "$modename: unknown library version type \`$version_type'" 1>&2
+	  echo "Fatal configuration error.  See the $PACKAGE docs for more information." 1>&2
+	  exit 1
+	  ;;
+	esac
+
+	# Clear the version info if we defaulted, and they specified a release.
+	if test -z "$vinfo" && test -n "$release"; then
+	  major=
+	  verstring="0.0"
+	  case $version_type in
+	  darwin)
+	    # we can't check for "0.0" in archive_cmds due to quoting
+	    # problems, so we reset it completely
+	    verstring=""
+	    ;;
+	  *)
+	    verstring="0.0"
+	    ;;
+	  esac
+	  if test "$need_version" = no; then
+	    versuffix=
+	  else
+	    versuffix=".0.0"
+	  fi
+	fi
+
+	# Remove version info from name if versioning should be avoided
+	if test "$avoid_version" = yes && test "$need_version" = no; then
+	  major=
+	  versuffix=
+	  verstring=""
+	fi
+
+	# Check to see if the archive will have undefined symbols.
+	if test "$allow_undefined" = yes; then
+	  if test "$allow_undefined_flag" = unsupported; then
+	    $echo "$modename: warning: undefined symbols not allowed in $host shared libraries" 1>&2
+	    build_libtool_libs=no
+	    build_old_libs=yes
+	  fi
+	else
+	  # Don't allow undefined symbols.
+	  allow_undefined_flag="$no_undefined_flag"
+	fi
+      fi
+
+      if test "$mode" != relink; then
+	# Remove our outputs.
+	$show "${rm}r $output_objdir/$outputname $output_objdir/$libname.* $output_objdir/${libname}${release}.*"
+	$run ${rm}r $output_objdir/$outputname $output_objdir/$libname.* $output_objdir/${libname}${release}.*
+      fi
+
+      # Now set the variables for building old libraries.
+      if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then
+	oldlibs="$oldlibs $output_objdir/$libname.$libext"
+
+	# Transform .lo files to .o files.
+	oldobjs="$objs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e "$lo2o" | $NL2SP`
+      fi
+
+      # Eliminate all temporary directories.
+      for path in $notinst_path; do
+	lib_search_path=`echo "$lib_search_path " | sed -e 's% $path % %g'`
+	deplibs=`echo "$deplibs " | sed -e 's% -L$path % %g'`
+	dependency_libs=`echo "$dependency_libs " | sed -e 's% -L$path % %g'`
+      done
+
+      if test -n "$xrpath"; then
+	# If the user specified any rpath flags, then add them.
+	temp_xrpath=
+	for libdir in $xrpath; do
+	  temp_xrpath="$temp_xrpath -R$libdir"
+	  case "$finalize_rpath " in
+	  *" $libdir "*) ;;
+	  *) finalize_rpath="$finalize_rpath $libdir" ;;
+	  esac
+	done
+	if test $hardcode_into_libs != yes || test $build_old_libs = yes; then
+	  dependency_libs="$temp_xrpath $dependency_libs"
+	fi
+      fi
+
+      # Make sure dlfiles contains only unique files that won't be dlpreopened
+      old_dlfiles="$dlfiles"
+      dlfiles=
+      for lib in $old_dlfiles; do
+	case " $dlprefiles $dlfiles " in
+	*" $lib "*) ;;
+	*) dlfiles="$dlfiles $lib" ;;
+	esac
+      done
+
+      # Make sure dlprefiles contains only unique files
+      old_dlprefiles="$dlprefiles"
+      dlprefiles=
+      for lib in $old_dlprefiles; do
+	case "$dlprefiles " in
+	*" $lib "*) ;;
+	*) dlprefiles="$dlprefiles $lib" ;;
+	esac
+      done
+
+      if test "$build_libtool_libs" = yes; then
+	if test -n "$rpath"; then
+	  case $host in
+	  *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos*)
+	    # these systems don't actually have a c library (as such)!
+	    ;;
+	  *-*-rhapsody* | *-*-darwin1.[012])
+	    # Rhapsody C library is in the System framework
+	    deplibs="$deplibs -framework System"
+	    ;;
+	  *-*-netbsd*)
+	    # Don't link with libc until the a.out ld.so is fixed.
+	    ;;
+	  *-*-openbsd* | *-*-freebsd*)
+	    # Do not include libc due to us having libc/libc_r.
+	    ;;
+	  *)
+	    # Add libc to deplibs on all other systems if necessary.
+	    if test $build_libtool_need_lc = "yes"; then
+	      deplibs="$deplibs -lc"
+	    fi
+	    ;;
+	  esac
+	fi
+
+	# Transform deplibs into only deplibs that can be linked in shared.
+	name_save=$name
+	libname_save=$libname
+	release_save=$release
+	versuffix_save=$versuffix
+	major_save=$major
+	# I'm not sure if I'm treating the release correctly.  I think
+	# release should show up in the -l (ie -lgmp5) so we don't want to
+	# add it in twice.  Is that correct?
+	release=""
+	versuffix=""
+	major=""
+	newdeplibs=
+	droppeddeps=no
+	case $deplibs_check_method in
+	pass_all)
+	  # Don't check for shared/static.  Everything works.
+	  # This might be a little naive.  We might want to check
+	  # whether the library exists or not.  But this is on
+	  # osf3 & osf4 and I'm not really sure... Just
+	  # implementing what was already the behaviour.
+	  newdeplibs=$deplibs
+	  ;;
+	test_compile)
+	  # This code stresses the "libraries are programs" paradigm to its
+	  # limits. Maybe even breaks it.  We compile a program, linking it
+	  # against the deplibs as a proxy for the library.  Then we can check
+	  # whether they linked in statically or dynamically with ldd.
+	  $rm conftest.c
+	  cat > conftest.c <<EOF
+	  int main() { return 0; }
+EOF
+	  $rm conftest
+	  $CC -o conftest conftest.c $deplibs
+	  if test $? -eq 0 ; then
+	    ldd_output=`ldd conftest`
+	    for i in $deplibs; do
+	      name="`expr $i : '-l\(.*\)'`"
+	      # If $name is empty we are operating on a -L argument.
+	      if test -n "$name" && test "$name" != "0"; then
+		libname=`eval \\$echo \"$libname_spec\"`
+		deplib_matches=`eval \\$echo \"$library_names_spec\"`
+		set dummy $deplib_matches
+		deplib_match=$2
+		if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
+		  newdeplibs="$newdeplibs $i"
+		else
+		  droppeddeps=yes
+		  echo
+		  echo "*** Warning: dynamic linker does not accept needed library $i."
+		  echo "*** I have the capability to make that library automatically link in when"
+		  echo "*** you link to this library.  But I can only do this if you have a"
+		  echo "*** shared version of the library, which I believe you do not have"
+		  echo "*** because a test_compile did reveal that the linker did not use it for"
+		  echo "*** its dynamic dependency list that programs get resolved with at runtime."
+		fi
+	      else
+		newdeplibs="$newdeplibs $i"
+	      fi
+	    done
+	  else
+	    # Error occured in the first compile.  Let's try to salvage
+	    # the situation: Compile a separate program for each library.
+	    for i in $deplibs; do
+	      name="`expr $i : '-l\(.*\)'`"
+	     # If $name is empty we are operating on a -L argument.
+	      if test -n "$name" && test "$name" != "0"; then
+		$rm conftest
+		$CC -o conftest conftest.c $i
+		# Did it work?
+		if test $? -eq 0 ; then
+		  ldd_output=`ldd conftest`
+		  libname=`eval \\$echo \"$libname_spec\"`
+		  deplib_matches=`eval \\$echo \"$library_names_spec\"`
+		  set dummy $deplib_matches
+		  deplib_match=$2
+		  if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
+		    newdeplibs="$newdeplibs $i"
+		  else
+		    droppeddeps=yes
+		    echo
+		    echo "*** Warning: dynamic linker does not accept needed library $i."
+		    echo "*** I have the capability to make that library automatically link in when"
+		    echo "*** you link to this library.  But I can only do this if you have a"
+		    echo "*** shared version of the library, which you do not appear to have"
+		    echo "*** because a test_compile did reveal that the linker did not use this one"
+		    echo "*** as a dynamic dependency that programs can get resolved with at runtime."
+		  fi
+		else
+		  droppeddeps=yes
+		  echo
+		  echo "*** Warning!  Library $i is needed by this library but I was not able to"
+		  echo "***  make it link in!  You will probably need to install it or some"
+		  echo "*** library that it depends on before this library will be fully"
+		  echo "*** functional.  Installing it before continuing would be even better."
+		fi
+	      else
+		newdeplibs="$newdeplibs $i"
+	      fi
+	    done
+	  fi
+	  ;;
+	file_magic*)
+	  set dummy $deplibs_check_method
+	  file_magic_regex=`expr "$deplibs_check_method" : "$2 \(.*\)"`
+	  for a_deplib in $deplibs; do
+	    name="`expr $a_deplib : '-l\(.*\)'`"
+	    # If $name is empty we are operating on a -L argument.
+	    if test -n "$name" && test "$name" != "0"; then
+	      libname=`eval \\$echo \"$libname_spec\"`
+	      for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do
+		    potential_libs=`ls $i/$libname[.-]* 2>/dev/null`
+		    for potent_lib in $potential_libs; do
+		      # Follow soft links.
+		      if ls -lLd "$potent_lib" 2>/dev/null \
+			 | grep " -> " >/dev/null; then
+			continue
+		      fi
+		      # The statement above tries to avoid entering an
+		      # endless loop below, in case of cyclic links.
+		      # We might still enter an endless loop, since a link
+		      # loop can be closed while we follow links,
+		      # but so what?
+		      potlib="$potent_lib"
+		      while test -h "$potlib" 2>/dev/null; do
+			potliblink=`ls -ld $potlib | sed 's/.* -> //'`
+			case $potliblink in
+			[\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";;
+			*) potlib=`$echo "X$potlib" | $Xsed -e 's,[^/]*$,,'`"$potliblink";;
+			esac
+		      done
+		      if eval $file_magic_cmd \"\$potlib\" 2>/dev/null \
+			 | sed 10q \
+			 | egrep "$file_magic_regex" > /dev/null; then
+			newdeplibs="$newdeplibs $a_deplib"
+			a_deplib=""
+			break 2
+		      fi
+		    done
+	      done
+	      if test -n "$a_deplib" ; then
+		droppeddeps=yes
+		echo
+		echo "*** Warning: linker path does not have real file for library $a_deplib."
+		echo "*** I have the capability to make that library automatically link in when"
+		echo "*** you link to this library.  But I can only do this if you have a"
+		echo "*** shared version of the library, which you do not appear to have"
+		echo "*** because I did check the linker path looking for a file starting"
+		if test -z "$potlib" ; then
+		  echo "*** with $libname but no candidates were found. (...for file magic test)"
+		else
+		  echo "*** with $libname and none of the candidates passed a file format test"
+		  echo "*** using a file magic. Last file checked: $potlib"
+		fi
+	      fi
+	    else
+	      # Add a -L argument.
+	      newdeplibs="$newdeplibs $a_deplib"
+	    fi
+	  done # Gone through all deplibs.
+	  ;;
+	match_pattern*)
+	  set dummy $deplibs_check_method
+	  match_pattern_regex=`expr "$deplibs_check_method" : "$2 \(.*\)"`
+	  for a_deplib in $deplibs; do
+	    name="`expr $a_deplib : '-l\(.*\)'`"
+	    # If $name is empty we are operating on a -L argument.
+	    if test -n "$name" && test "$name" != "0"; then
+	      libname=`eval \\$echo \"$libname_spec\"`
+	      for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do
+		potential_libs=`ls $i/$libname[.-]* 2>/dev/null`
+		for potent_lib in $potential_libs; do
+		  potlib="$potent_lib" # see symlink-check below in file_magic test
+		  if eval echo \"$potent_lib\" 2>/dev/null \
+		      | sed 10q \
+		      | egrep "$match_pattern_regex" > /dev/null; then
+		    newdeplibs="$newdeplibs $a_deplib"
+		    a_deplib=""
+		    break 2
+		  fi
+		done
+	      done
+	      if test -n "$a_deplib" ; then
+		droppeddeps=yes
+		echo
+		echo "*** Warning: linker path does not have real file for library $a_deplib."
+		echo "*** I have the capability to make that library automatically link in when"
+		echo "*** you link to this library.  But I can only do this if you have a"
+		echo "*** shared version of the library, which you do not appear to have"
+		echo "*** because I did check the linker path looking for a file starting"
+		if test -z "$potlib" ; then
+		  echo "*** with $libname but no candidates were found. (...for regex pattern test)"
+		else
+		  echo "*** with $libname and none of the candidates passed a file format test"
+		  echo "*** using a regex pattern. Last file checked: $potlib"
+		fi
+	      fi
+	    else
+	      # Add a -L argument.
+	      newdeplibs="$newdeplibs $a_deplib"
+	    fi
+	  done # Gone through all deplibs.
+	  ;;
+	none | unknown | *)
+	  newdeplibs=""
+	  if $echo "X $deplibs" | $Xsed -e 's/ -lc$//' \
+	       -e 's/ -[LR][^ ]*//g' -e 's/[ 	]//g' |
+	     grep . >/dev/null; then
+	    echo
+	    if test "X$deplibs_check_method" = "Xnone"; then
+	      echo "*** Warning: inter-library dependencies are not supported in this platform."
+	    else
+	      echo "*** Warning: inter-library dependencies are not known to be supported."
+	    fi
+	    echo "*** All declared inter-library dependencies are being dropped."
+	    droppeddeps=yes
+	  fi
+	  ;;
+	esac
+	versuffix=$versuffix_save
+	major=$major_save
+	release=$release_save
+	libname=$libname_save
+	name=$name_save
+
+	case $host in
+	*-*-rhapsody* | *-*-darwin1.[012])
+	  # On Rhapsody replace the C library is the System framework
+	  newdeplibs=`$echo "X $newdeplibs" | $Xsed -e 's/ -lc / -framework System /'`
+	  ;;
+	esac
+
+	if test "$droppeddeps" = yes; then
+	  if test "$module" = yes; then
+	    echo
+	    echo "*** Warning: libtool could not satisfy all declared inter-library"
+	    echo "*** dependencies of module $libname.  Therefore, libtool will create"
+	    echo "*** a static module, that should work as long as the dlopening"
+	    echo "*** application is linked with the -dlopen flag."
+	    if test -z "$global_symbol_pipe"; then
+	      echo
+	      echo "*** However, this would only work if libtool was able to extract symbol"
+	      echo "*** lists from a program, using \`nm' or equivalent, but libtool could"
+	      echo "*** not find such a program.  So, this module is probably useless."
+	      echo "*** \`nm' from GNU binutils and a full rebuild may help."
+	    fi
+	    if test "$build_old_libs" = no; then
+	      oldlibs="$output_objdir/$libname.$libext"
+	      build_libtool_libs=module
+	      build_old_libs=yes
+	    else
+	      build_libtool_libs=no
+	    fi
+	  else
+	    echo "*** The inter-library dependencies that have been dropped here will be"
+	    echo "*** automatically added whenever a program is linked with this library"
+	    echo "*** or is declared to -dlopen it."
+
+	    if test $allow_undefined = no; then
+	      echo
+	      echo "*** Since this library must not contain undefined symbols,"
+	      echo "*** because either the platform does not support them or"
+	      echo "*** it was explicitly requested with -no-undefined,"
+	      echo "*** libtool will only create a static version of it."
+	      if test "$build_old_libs" = no; then
+		oldlibs="$output_objdir/$libname.$libext"
+		build_libtool_libs=module
+		build_old_libs=yes
+	      else
+		build_libtool_libs=no
+	      fi
+	    fi
+	  fi
+	fi
+	# Done checking deplibs!
+	deplibs=$newdeplibs
+      fi
+
+      # All the library-specific variables (install_libdir is set above).
+      library_names=
+      old_library=
+      dlname=
+
+      # Test again, we may have decided not to build it any more
+      if test "$build_libtool_libs" = yes; then
+	if test $hardcode_into_libs = yes; then
+	  # Hardcode the library paths
+	  hardcode_libdirs=
+	  dep_rpath=
+	  rpath="$finalize_rpath"
+	  test "$mode" != relink && rpath="$compile_rpath$rpath"
+	  for libdir in $rpath; do
+	    if test -n "$hardcode_libdir_flag_spec"; then
+	      if test -n "$hardcode_libdir_separator"; then
+		if test -z "$hardcode_libdirs"; then
+		  hardcode_libdirs="$libdir"
+		else
+		  # Just accumulate the unique libdirs.
+		  case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
+		  *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+		    ;;
+		  *)
+		    hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
+		    ;;
+		  esac
+		fi
+	      else
+		eval flag=\"$hardcode_libdir_flag_spec\"
+		dep_rpath="$dep_rpath $flag"
+	      fi
+	    elif test -n "$runpath_var"; then
+	      case "$perm_rpath " in
+	      *" $libdir "*) ;;
+	      *) perm_rpath="$perm_rpath $libdir" ;;
+	      esac
+	    fi
+	  done
+	  # Substitute the hardcoded libdirs into the rpath.
+	  if test -n "$hardcode_libdir_separator" &&
+	     test -n "$hardcode_libdirs"; then
+	    libdir="$hardcode_libdirs"
+	    eval dep_rpath=\"$hardcode_libdir_flag_spec\"
+	  fi
+	  if test -n "$runpath_var" && test -n "$perm_rpath"; then
+	    # We should set the runpath_var.
+	    rpath=
+	    for dir in $perm_rpath; do
+	      rpath="$rpath$dir:"
+	    done
+	    eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var"
+	  fi
+	  test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs"
+	fi
+
+	shlibpath="$finalize_shlibpath"
+	test "$mode" != relink && shlibpath="$compile_shlibpath$shlibpath"
+	if test -n "$shlibpath"; then
+	  eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var"
+	fi
+
+	# Get the real and link names of the library.
+	eval library_names=\"$library_names_spec\"
+	set dummy $library_names
+	realname="$2"
+	shift; shift
+
+	if test -n "$soname_spec"; then
+	  eval soname=\"$soname_spec\"
+	else
+	  soname="$realname"
+	fi
+	test -z "$dlname" && dlname=$soname
+
+	lib="$output_objdir/$realname"
+	for link
+	do
+	  linknames="$linknames $link"
+	done
+
+	# Ensure that we have .o objects for linkers which dislike .lo
+	# (e.g. aix) in case we are running --disable-static
+	for obj in $libobjs; do
+	  xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'`
+	  if test "X$xdir" = "X$obj"; then
+	    xdir="."
+	  else
+	    xdir="$xdir"
+	  fi
+	  baseobj=`$echo "X$obj" | $Xsed -e 's%^.*/%%'`
+	  oldobj=`$echo "X$baseobj" | $Xsed -e "$lo2o"`
+	  if test ! -f $xdir/$oldobj; then
+	    $show "(cd $xdir && ${LN_S} $baseobj $oldobj)"
+	    $run eval '(cd $xdir && ${LN_S} $baseobj $oldobj)' || exit $?
+	  fi
+	done
+
+	# Use standard objects if they are pic
+	test -z "$pic_flag" && libobjs=`$echo "X$libobjs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+
+	# Prepare the list of exported symbols
+	if test -z "$export_symbols"; then
+	  if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then
+	    $show "generating symbol list for \`$libname.la'"
+	    export_symbols="$output_objdir/$libname.exp"
+	    $run $rm $export_symbols
+	    eval cmds=\"$export_symbols_cmds\"
+	    save_ifs="$IFS"; IFS='~'
+	    for cmd in $cmds; do
+	      IFS="$save_ifs"
+	      $show "$cmd"
+	      $run eval "$cmd" || exit $?
+	    done
+	    IFS="$save_ifs"
+	    if test -n "$export_symbols_regex"; then
+	      $show "egrep -e \"$export_symbols_regex\" \"$export_symbols\" > \"${export_symbols}T\""
+	      $run eval 'egrep -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"'
+	      $show "$mv \"${export_symbols}T\" \"$export_symbols\""
+	      $run eval '$mv "${export_symbols}T" "$export_symbols"'
+	    fi
+	  fi
+	fi
+
+	if test -n "$export_symbols" && test -n "$include_expsyms"; then
+	  $run eval '$echo "X$include_expsyms" | $SP2NL >> "$export_symbols"'
+	fi
+
+	if test -n "$convenience"; then
+	  if test -n "$whole_archive_flag_spec"; then
+	    eval libobjs=\"\$libobjs $whole_archive_flag_spec\"
+	  else
+	    gentop="$output_objdir/${outputname}x"
+	    $show "${rm}r $gentop"
+	    $run ${rm}r "$gentop"
+	    $show "mkdir $gentop"
+	    $run mkdir "$gentop"
+	    status=$?
+	    if test $status -ne 0 && test ! -d "$gentop"; then
+	      exit $status
+	    fi
+	    generated="$generated $gentop"
+
+	    for xlib in $convenience; do
+	      # Extract the objects.
+	      case $xlib in
+	      [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;;
+	      *) xabs=`pwd`"/$xlib" ;;
+	      esac
+	      xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'`
+	      xdir="$gentop/$xlib"
+
+	      $show "${rm}r $xdir"
+	      $run ${rm}r "$xdir"
+	      $show "mkdir $xdir"
+	      $run mkdir "$xdir"
+	      status=$?
+	      if test $status -ne 0 && test ! -d "$xdir"; then
+		exit $status
+	      fi
+	      $show "(cd $xdir && $AR x $xabs)"
+	      $run eval "(cd \$xdir && $AR x \$xabs)" || exit $?
+
+	      libobjs="$libobjs "`find $xdir -name \*.o -print -o -name \*.lo -print | $NL2SP`
+	    done
+	  fi
+	fi
+
+	if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then
+	  eval flag=\"$thread_safe_flag_spec\"
+	  linker_flags="$linker_flags $flag"
+	fi
+
+	# Make a backup of the uninstalled library when relinking
+	if test "$mode" = relink; then
+	  $run eval '(cd $output_objdir && $rm ${realname}U && $mv $realname ${realname}U)' || exit $?
+	fi
+
+	# Do each of the archive commands.
+	if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then
+	  eval cmds=\"$archive_expsym_cmds\"
+	else
+	  eval cmds=\"$archive_cmds\"
+	fi
+	save_ifs="$IFS"; IFS='~'
+	for cmd in $cmds; do
+	  IFS="$save_ifs"
+	  $show "$cmd"
+	  $run eval "$cmd" || exit $?
+	done
+	IFS="$save_ifs"
+
+	# Restore the uninstalled library and exit
+	if test "$mode" = relink; then
+	  $run eval '(cd $output_objdir && $rm ${realname}T && $mv $realname ${realname}T && $mv "$realname"U $realname)' || exit $?
+	  exit 0
+	fi
+
+	# Create links to the real library.
+	for linkname in $linknames; do
+	  if test "$realname" != "$linkname"; then
+	    $show "(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)"
+	    $run eval '(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)' || exit $?
+	  fi
+	done
+
+	# If -module or -export-dynamic was specified, set the dlname.
+	if test "$module" = yes || test "$export_dynamic" = yes; then
+	  # On all known operating systems, these are identical.
+	  dlname="$soname"
+	fi
+      fi
+      ;;
+
+    obj)
+      if test -n "$deplibs"; then
+	$echo "$modename: warning: \`-l' and \`-L' are ignored for objects" 1>&2
+      fi
+
+      if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+	$echo "$modename: warning: \`-dlopen' is ignored for objects" 1>&2
+      fi
+
+      if test -n "$rpath"; then
+	$echo "$modename: warning: \`-rpath' is ignored for objects" 1>&2
+      fi
+
+      if test -n "$xrpath"; then
+	$echo "$modename: warning: \`-R' is ignored for objects" 1>&2
+      fi
+
+      if test -n "$vinfo"; then
+	$echo "$modename: warning: \`-version-info' is ignored for objects" 1>&2
+      fi
+
+      if test -n "$release"; then
+	$echo "$modename: warning: \`-release' is ignored for objects" 1>&2
+      fi
+
+      case $output in
+      *.lo)
+	if test -n "$objs$old_deplibs"; then
+	  $echo "$modename: cannot build library object \`$output' from non-libtool objects" 1>&2
+	  exit 1
+	fi
+	libobj="$output"
+	obj=`$echo "X$output" | $Xsed -e "$lo2o"`
+	;;
+      *)
+	libobj=
+	obj="$output"
+	;;
+      esac
+
+      # Delete the old objects.
+      $run $rm $obj $libobj
+
+      # Objects from convenience libraries.  This assumes
+      # single-version convenience libraries.  Whenever we create
+      # different ones for PIC/non-PIC, this we'll have to duplicate
+      # the extraction.
+      reload_conv_objs=
+      gentop=
+      # reload_cmds runs $LD directly, so let us get rid of
+      # -Wl from whole_archive_flag_spec
+      wl=
+
+      if test -n "$convenience"; then
+	if test -n "$whole_archive_flag_spec"; then
+	  eval reload_conv_objs=\"\$reload_objs $whole_archive_flag_spec\"
+	else
+	  gentop="$output_objdir/${obj}x"
+	  $show "${rm}r $gentop"
+	  $run ${rm}r "$gentop"
+	  $show "mkdir $gentop"
+	  $run mkdir "$gentop"
+	  status=$?
+	  if test $status -ne 0 && test ! -d "$gentop"; then
+	    exit $status
+	  fi
+	  generated="$generated $gentop"
+
+	  for xlib in $convenience; do
+	    # Extract the objects.
+	    case $xlib in
+	    [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;;
+	    *) xabs=`pwd`"/$xlib" ;;
+	    esac
+	    xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'`
+	    xdir="$gentop/$xlib"
+
+	    $show "${rm}r $xdir"
+	    $run ${rm}r "$xdir"
+	    $show "mkdir $xdir"
+	    $run mkdir "$xdir"
+	    status=$?
+	    if test $status -ne 0 && test ! -d "$xdir"; then
+	      exit $status
+	    fi
+	    $show "(cd $xdir && $AR x $xabs)"
+	    $run eval "(cd \$xdir && $AR x \$xabs)" || exit $?
+
+	    reload_conv_objs="$reload_objs "`find $xdir -name \*.o -print -o -name \*.lo -print | $NL2SP`
+	  done
+	fi
+      fi
+
+      # Create the old-style object.
+      reload_objs="$objs$old_deplibs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}$'/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test
+
+      output="$obj"
+      eval cmds=\"$reload_cmds\"
+      save_ifs="$IFS"; IFS='~'
+      for cmd in $cmds; do
+	IFS="$save_ifs"
+	$show "$cmd"
+	$run eval "$cmd" || exit $?
+      done
+      IFS="$save_ifs"
+
+      # Exit if we aren't doing a library object file.
+      if test -z "$libobj"; then
+	if test -n "$gentop"; then
+	  $show "${rm}r $gentop"
+	  $run ${rm}r $gentop
+	fi
+
+	exit 0
+      fi
+
+      if test "$build_libtool_libs" != yes; then
+	if test -n "$gentop"; then
+	  $show "${rm}r $gentop"
+	  $run ${rm}r $gentop
+	fi
+
+	# Create an invalid libtool object if no PIC, so that we don't
+	# accidentally link it into a program.
+	$show "echo timestamp > $libobj"
+	$run eval "echo timestamp > $libobj" || exit $?
+	exit 0
+      fi
+
+      if test -n "$pic_flag" || test "$pic_mode" != default; then
+	# Only do commands if we really have different PIC objects.
+	reload_objs="$libobjs $reload_conv_objs"
+	output="$libobj"
+	eval cmds=\"$reload_cmds\"
+	save_ifs="$IFS"; IFS='~'
+	for cmd in $cmds; do
+	  IFS="$save_ifs"
+	  $show "$cmd"
+	  $run eval "$cmd" || exit $?
+	done
+	IFS="$save_ifs"
+      else
+	# Just create a symlink.
+	$show $rm $libobj
+	$run $rm $libobj
+	xdir=`$echo "X$libobj" | $Xsed -e 's%/[^/]*$%%'`
+	if test "X$xdir" = "X$libobj"; then
+	  xdir="."
+	else
+	  xdir="$xdir"
+	fi
+	baseobj=`$echo "X$libobj" | $Xsed -e 's%^.*/%%'`
+	oldobj=`$echo "X$baseobj" | $Xsed -e "$lo2o"`
+	$show "(cd $xdir && $LN_S $oldobj $baseobj)"
+	$run eval '(cd $xdir && $LN_S $oldobj $baseobj)' || exit $?
+      fi
+
+      if test -n "$gentop"; then
+	$show "${rm}r $gentop"
+	$run ${rm}r $gentop
+      fi
+
+      exit 0
+      ;;
+
+    prog)
+      case $host in
+	*cygwin*) output=`echo $output | sed -e 's,.exe$,,;s,$,.exe,'` ;;
+      esac
+      if test -n "$vinfo"; then
+	$echo "$modename: warning: \`-version-info' is ignored for programs" 1>&2
+      fi
+
+      if test -n "$release"; then
+	$echo "$modename: warning: \`-release' is ignored for programs" 1>&2
+      fi
+
+      if test "$preload" = yes; then
+	if test "$dlopen_support" = unknown && test "$dlopen_self" = unknown &&
+	   test "$dlopen_self_static" = unknown; then
+	  $echo "$modename: warning: \`AC_LIBTOOL_DLOPEN' not used. Assuming no dlopen support."
+	fi
+      fi
+
+      case $host in
+      *-*-rhapsody* | *-*-darwin1.[012])
+	# On Rhapsody replace the C library is the System framework
+	compile_deplibs=`$echo "X $compile_deplibs" | $Xsed -e 's/ -lc / -framework System /'`
+	finalize_deplibs=`$echo "X $finalize_deplibs" | $Xsed -e 's/ -lc / -framework System /'`
+	;;
+      esac
+
+      compile_command="$compile_command $compile_deplibs"
+      finalize_command="$finalize_command $finalize_deplibs"
+
+      if test -n "$rpath$xrpath"; then
+	# If the user specified any rpath flags, then add them.
+	for libdir in $rpath $xrpath; do
+	  # This is the magic to use -rpath.
+	  case "$finalize_rpath " in
+	  *" $libdir "*) ;;
+	  *) finalize_rpath="$finalize_rpath $libdir" ;;
+	  esac
+	done
+      fi
+
+      # Now hardcode the library paths
+      rpath=
+      hardcode_libdirs=
+      for libdir in $compile_rpath $finalize_rpath; do
+	if test -n "$hardcode_libdir_flag_spec"; then
+	  if test -n "$hardcode_libdir_separator"; then
+	    if test -z "$hardcode_libdirs"; then
+	      hardcode_libdirs="$libdir"
+	    else
+	      # Just accumulate the unique libdirs.
+	      case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
+	      *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+		;;
+	      *)
+		hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
+		;;
+	      esac
+	    fi
+	  else
+	    eval flag=\"$hardcode_libdir_flag_spec\"
+	    rpath="$rpath $flag"
+	  fi
+	elif test -n "$runpath_var"; then
+	  case "$perm_rpath " in
+	  *" $libdir "*) ;;
+	  *) perm_rpath="$perm_rpath $libdir" ;;
+	  esac
+	fi
+	case $host in
+	*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+	  case :$dllsearchpath: in
+	  *":$libdir:"*) ;;
+	  *) dllsearchpath="$dllsearchpath:$libdir";;
+	  esac
+	  ;;
+	esac
+      done
+      # Substitute the hardcoded libdirs into the rpath.
+      if test -n "$hardcode_libdir_separator" &&
+	 test -n "$hardcode_libdirs"; then
+	libdir="$hardcode_libdirs"
+	eval rpath=\" $hardcode_libdir_flag_spec\"
+      fi
+      compile_rpath="$rpath"
+
+      rpath=
+      hardcode_libdirs=
+      for libdir in $finalize_rpath; do
+	if test -n "$hardcode_libdir_flag_spec"; then
+	  if test -n "$hardcode_libdir_separator"; then
+	    if test -z "$hardcode_libdirs"; then
+	      hardcode_libdirs="$libdir"
+	    else
+	      # Just accumulate the unique libdirs.
+	      case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
+	      *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+		;;
+	      *)
+		hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
+		;;
+	      esac
+	    fi
+	  else
+	    eval flag=\"$hardcode_libdir_flag_spec\"
+	    rpath="$rpath $flag"
+	  fi
+	elif test -n "$runpath_var"; then
+	  case "$finalize_perm_rpath " in
+	  *" $libdir "*) ;;
+	  *) finalize_perm_rpath="$finalize_perm_rpath $libdir" ;;
+	  esac
+	fi
+      done
+      # Substitute the hardcoded libdirs into the rpath.
+      if test -n "$hardcode_libdir_separator" &&
+	 test -n "$hardcode_libdirs"; then
+	libdir="$hardcode_libdirs"
+	eval rpath=\" $hardcode_libdir_flag_spec\"
+      fi
+      finalize_rpath="$rpath"
+
+      if test -n "$libobjs" && test "$build_old_libs" = yes; then
+	# Transform all the library objects into standard objects.
+	compile_command=`$echo "X$compile_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+	finalize_command=`$echo "X$finalize_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+      fi
+
+      dlsyms=
+      if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+	if test -n "$NM" && test -n "$global_symbol_pipe"; then
+	  dlsyms="${outputname}S.c"
+	else
+	  $echo "$modename: not configured to extract global symbols from dlpreopened files" 1>&2
+	fi
+      fi
+
+      if test -n "$dlsyms"; then
+	case $dlsyms in
+	"") ;;
+	*.c)
+	  # Discover the nlist of each of the dlfiles.
+	  nlist="$output_objdir/${outputname}.nm"
+
+	  $show "$rm $nlist ${nlist}S ${nlist}T"
+	  $run $rm "$nlist" "${nlist}S" "${nlist}T"
+
+	  # Parse the name list into a source file.
+	  $show "creating $output_objdir/$dlsyms"
+
+	  test -z "$run" && $echo > "$output_objdir/$dlsyms" "\
+/* $dlsyms - symbol resolution table for \`$outputname' dlsym emulation. */
+/* Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP */
+
+#ifdef __cplusplus
+extern \"C\" {
+#endif
+
+/* Prevent the only kind of declaration conflicts we can make. */
+#define lt_preloaded_symbols some_other_symbol
+
+/* External symbol declarations for the compiler. */\
+"
+
+	  if test "$dlself" = yes; then
+	    $show "generating symbol list for \`$output'"
+
+	    test -z "$run" && $echo ': @PROGRAM@ ' > "$nlist"
+
+	    # Add our own program objects to the symbol list.
+	    progfiles=`$echo "X$objs$old_deplibs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+	    for arg in $progfiles; do
+	      $show "extracting global C symbols from \`$arg'"
+	      $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'"
+	    done
+
+	    if test -n "$exclude_expsyms"; then
+	      $run eval 'egrep -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T'
+	      $run eval '$mv "$nlist"T "$nlist"'
+	    fi
+
+	    if test -n "$export_symbols_regex"; then
+	      $run eval 'egrep -e "$export_symbols_regex" "$nlist" > "$nlist"T'
+	      $run eval '$mv "$nlist"T "$nlist"'
+	    fi
+
+	    # Prepare the list of exported symbols
+	    if test -z "$export_symbols"; then
+	      export_symbols="$output_objdir/$output.exp"
+	      $run $rm $export_symbols
+	      $run eval "sed -n -e '/^: @PROGRAM@$/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"'
+	    else
+	      $run eval "sed -e 's/\([][.*^$]\)/\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$output.exp"'
+	      $run eval 'grep -f "$output_objdir/$output.exp" < "$nlist" > "$nlist"T'
+	      $run eval 'mv "$nlist"T "$nlist"'
+	    fi
+	  fi
+
+	  for arg in $dlprefiles; do
+	    $show "extracting global C symbols from \`$arg'"
+	    name=`echo "$arg" | sed -e 's%^.*/%%'`
+	    $run eval 'echo ": $name " >> "$nlist"'
+	    $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'"
+	  done
+
+	  if test -z "$run"; then
+	    # Make sure we have at least an empty file.
+	    test -f "$nlist" || : > "$nlist"
+
+	    if test -n "$exclude_expsyms"; then
+	      egrep -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T
+	      $mv "$nlist"T "$nlist"
+	    fi
+
+	    # Try sorting and uniquifying the output.
+	    if grep -v "^: " < "$nlist" | sort +2 | uniq > "$nlist"S; then
+	      :
+	    else
+	      grep -v "^: " < "$nlist" > "$nlist"S
+	    fi
+
+	    if test -f "$nlist"S; then
+	      eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$dlsyms"'
+	    else
+	      echo '/* NONE */' >> "$output_objdir/$dlsyms"
+	    fi
+
+	    $echo >> "$output_objdir/$dlsyms" "\
+
+#undef lt_preloaded_symbols
+
+#if defined (__STDC__) && __STDC__
+# define lt_ptr void *
+#else
+# define lt_ptr char *
+# define const
+#endif
+
+/* The mapping between symbol names and symbols. */
+const struct {
+  const char *name;
+  lt_ptr address;
+}
+lt_preloaded_symbols[] =
+{\
+"
+
+	    eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$dlsyms"
+
+	    $echo >> "$output_objdir/$dlsyms" "\
+  {0, (lt_ptr) 0}
+};
+
+/* This works around a problem in FreeBSD linker */
+#ifdef FREEBSD_WORKAROUND
+static const void *lt_preloaded_setup() {
+  return lt_preloaded_symbols;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif\
+"
+	  fi
+
+	  pic_flag_for_symtable=
+	  case $host in
+	  # compiling the symbol table file with pic_flag works around
+	  # a FreeBSD bug that causes programs to crash when -lm is
+	  # linked before any other PIC object.  But we must not use
+	  # pic_flag when linking with -static.  The problem exists in
+	  # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1.
+	  *-*-freebsd2*|*-*-freebsd3.0*|*-*-freebsdelf3.0*)
+	    case "$compile_command " in
+	    *" -static "*) ;;
+	    *) pic_flag_for_symtable=" $pic_flag -DPIC -DFREEBSD_WORKAROUND";;
+	    esac;;
+	  *-*-hpux*)
+	    case "$compile_command " in
+	    *" -static "*) ;;
+	    *) pic_flag_for_symtable=" $pic_flag -DPIC";;
+	    esac
+	  esac
+
+	  # Now compile the dynamic symbol file.
+	  $show "(cd $output_objdir && $CC -c$no_builtin_flag$pic_flag_for_symtable \"$dlsyms\")"
+	  $run eval '(cd $output_objdir && $CC -c$no_builtin_flag$pic_flag_for_symtable "$dlsyms")' || exit $?
+
+	  # Clean up the generated files.
+	  $show "$rm $output_objdir/$dlsyms $nlist ${nlist}S ${nlist}T"
+	  $run $rm "$output_objdir/$dlsyms" "$nlist" "${nlist}S" "${nlist}T"
+
+	  # Transform the symbol file into the correct name.
+	  compile_command=`$echo "X$compile_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"`
+	  finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"`
+	  ;;
+	*)
+	  $echo "$modename: unknown suffix for \`$dlsyms'" 1>&2
+	  exit 1
+	  ;;
+	esac
+      else
+	# We keep going just in case the user didn't refer to
+	# lt_preloaded_symbols.  The linker will fail if global_symbol_pipe
+	# really was required.
+
+	# Nullify the symbol file.
+	compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"`
+	finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"`
+      fi
+
+      if test $need_relink = no || test "$build_libtool_libs" != yes; then
+	# Replace the output file specification.
+	compile_command=`$echo "X$compile_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'`
+	link_command="$compile_command$compile_rpath"
+
+	# We have no uninstalled library dependencies, so finalize right now.
+	$show "$link_command"
+	$run eval "$link_command"
+	status=$?
+
+	# Delete the generated files.
+	if test -n "$dlsyms"; then
+	  $show "$rm $output_objdir/${outputname}S.${objext}"
+	  $run $rm "$output_objdir/${outputname}S.${objext}"
+	fi
+
+	exit $status
+      fi
+
+      if test -n "$shlibpath_var"; then
+	# We should set the shlibpath_var
+	rpath=
+	for dir in $temp_rpath; do
+	  case $dir in
+	  [\\/]* | [A-Za-z]:[\\/]*)
+	    # Absolute path.
+	    rpath="$rpath$dir:"
+	    ;;
+	  *)
+	    # Relative path: add a thisdir entry.
+	    rpath="$rpath\$thisdir/$dir:"
+	    ;;
+	  esac
+	done
+	temp_rpath="$rpath"
+      fi
+
+      if test -n "$compile_shlibpath$finalize_shlibpath"; then
+	compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command"
+      fi
+      if test -n "$finalize_shlibpath"; then
+	finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command"
+      fi
+
+      compile_var=
+      finalize_var=
+      if test -n "$runpath_var"; then
+	if test -n "$perm_rpath"; then
+	  # We should set the runpath_var.
+	  rpath=
+	  for dir in $perm_rpath; do
+	    rpath="$rpath$dir:"
+	  done
+	  compile_var="$runpath_var=\"$rpath\$$runpath_var\" "
+	fi
+	if test -n "$finalize_perm_rpath"; then
+	  # We should set the runpath_var.
+	  rpath=
+	  for dir in $finalize_perm_rpath; do
+	    rpath="$rpath$dir:"
+	  done
+	  finalize_var="$runpath_var=\"$rpath\$$runpath_var\" "
+	fi
+      fi
+
+      if test "$no_install" = yes; then
+	# We don't need to create a wrapper script.
+	link_command="$compile_var$compile_command$compile_rpath"
+	# Replace the output file specification.
+	link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'`
+	# Delete the old output file.
+	$run $rm $output
+	# Link the executable and exit
+	$show "$link_command"
+	$run eval "$link_command" || exit $?
+	exit 0
+      fi
+
+      if test "$hardcode_action" = relink; then
+	# Fast installation is not supported
+	link_command="$compile_var$compile_command$compile_rpath"
+	relink_command="$finalize_var$finalize_command$finalize_rpath"
+
+	$echo "$modename: warning: this platform does not like uninstalled shared libraries" 1>&2
+	$echo "$modename: \`$output' will be relinked during installation" 1>&2
+      else
+	if test "$fast_install" != no; then
+	  link_command="$finalize_var$compile_command$finalize_rpath"
+	  if test "$fast_install" = yes; then
+	    relink_command=`$echo "X$compile_var$compile_command$compile_rpath" | $Xsed -e 's%@OUTPUT@%\$progdir/\$file%g'`
+	  else
+	    # fast_install is set to needless
+	    relink_command=
+	  fi
+	else
+	  link_command="$compile_var$compile_command$compile_rpath"
+	  relink_command="$finalize_var$finalize_command$finalize_rpath"
+	fi
+      fi
+
+      # Replace the output file specification.
+      link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'`
+
+      # Delete the old output files.
+      $run $rm $output $output_objdir/$outputname $output_objdir/lt-$outputname
+
+      $show "$link_command"
+      $run eval "$link_command" || exit $?
+
+      # Now create the wrapper script.
+      $show "creating $output"
+
+      # Quote the relink command for shipping.
+      if test -n "$relink_command"; then
+	# Preserve any variables that may affect compiler behavior
+	for var in $variables_saved_for_relink; do
+	  if eval test -z \"\${$var+set}\"; then
+	    relink_command="{ test -z \"\${$var+set}\" || unset $var || { $var=; export $var; }; }; $relink_command"
+	  elif eval var_value=\$$var; test -z "$var_value"; then
+	    relink_command="$var=; export $var; $relink_command"
+	  else
+	    var_value=`$echo "X$var_value" | $Xsed -e "$sed_quote_subst"`
+	    relink_command="$var=\"$var_value\"; export $var; $relink_command"
+	  fi
+	done
+	relink_command="(cd `pwd`; $relink_command)"
+	relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"`
+      fi
+
+      # Quote $echo for shipping.
+      if test "X$echo" = "X$SHELL $0 --fallback-echo"; then
+	case $0 in
+	[\\/]* | [A-Za-z]:[\\/]*) qecho="$SHELL $0 --fallback-echo";;
+	*) qecho="$SHELL `pwd`/$0 --fallback-echo";;
+	esac
+	qecho=`$echo "X$qecho" | $Xsed -e "$sed_quote_subst"`
+      else
+	qecho=`$echo "X$echo" | $Xsed -e "$sed_quote_subst"`
+      fi
+
+      # Only actually do things if our run command is non-null.
+      if test -z "$run"; then
+	# win32 will think the script is a binary if it has
+	# a .exe suffix, so we strip it off here.
+	case $output in
+	  *.exe) output=`echo $output|sed 's,.exe$,,'` ;;
+	esac
+	# test for cygwin because mv fails w/o .exe extensions
+	case $host in
+	  *cygwin*) exeext=.exe ;;
+	  *) exeext= ;;
+	esac
+	$rm $output
+	trap "$rm $output; exit 1" 1 2 15
+
+	$echo > $output "\
+#! $SHELL
+
+# $output - temporary wrapper script for $objdir/$outputname
+# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP
+#
+# The $output program cannot be directly executed until all the libtool
+# libraries that it depends on are installed.
+#
+# This wrapper script should never be moved out of the build directory.
+# If it is, it will not operate correctly.
+
+# Sed substitution that helps us do robust quoting.  It backslashifies
+# metacharacters that are still active within double-quoted strings.
+Xsed='sed -e 1s/^X//'
+sed_quote_subst='$sed_quote_subst'
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+if test \"\${CDPATH+set}\" = set; then CDPATH=:; export CDPATH; fi
+
+relink_command=\"$relink_command\"
+
+# This environment variable determines our operation mode.
+if test \"\$libtool_install_magic\" = \"$magic\"; then
+  # install mode needs the following variable:
+  notinst_deplibs='$notinst_deplibs'
+else
+  # When we are sourced in execute mode, \$file and \$echo are already set.
+  if test \"\$libtool_execute_magic\" != \"$magic\"; then
+    echo=\"$qecho\"
+    file=\"\$0\"
+    # Make sure echo works.
+    if test \"X\$1\" = X--no-reexec; then
+      # Discard the --no-reexec flag, and continue.
+      shift
+    elif test \"X\`(\$echo '\t') 2>/dev/null\`\" = 'X\t'; then
+      # Yippee, \$echo works!
+      :
+    else
+      # Restart under the correct shell, and then maybe \$echo will work.
+      exec $SHELL \"\$0\" --no-reexec \${1+\"\$@\"}
+    fi
+  fi\
+"
+	$echo >> $output "\
+
+  # Find the directory that this script lives in.
+  thisdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*$%%'\`
+  test \"x\$thisdir\" = \"x\$file\" && thisdir=.
+
+  # Follow symbolic links until we get to the real thisdir.
+  file=\`ls -ld \"\$file\" | sed -n 's/.*-> //p'\`
+  while test -n \"\$file\"; do
+    destdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*\$%%'\`
+
+    # If there was a directory component, then change thisdir.
+    if test \"x\$destdir\" != \"x\$file\"; then
+      case \"\$destdir\" in
+      [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;;
+      *) thisdir=\"\$thisdir/\$destdir\" ;;
+      esac
+    fi
+
+    file=\`\$echo \"X\$file\" | \$Xsed -e 's%^.*/%%'\`
+    file=\`ls -ld \"\$thisdir/\$file\" | sed -n 's/.*-> //p'\`
+  done
+
+  # Try to get the absolute directory name.
+  absdir=\`cd \"\$thisdir\" && pwd\`
+  test -n \"\$absdir\" && thisdir=\"\$absdir\"
+"
+
+	if test "$fast_install" = yes; then
+	  echo >> $output "\
+  program=lt-'$outputname'$exeext
+  progdir=\"\$thisdir/$objdir\"
+
+  if test ! -f \"\$progdir/\$program\" || \\
+     { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | sed 1q\`; \\
+       test \"X\$file\" != \"X\$progdir/\$program\"; }; then
+
+    file=\"\$\$-\$program\"
+
+    if test ! -d \"\$progdir\"; then
+      $mkdir \"\$progdir\"
+    else
+      $rm \"\$progdir/\$file\"
+    fi"
+
+	  echo >> $output "\
+
+    # relink executable if necessary
+    if test -n \"\$relink_command\"; then
+      if relink_command_output=\`eval \$relink_command 2>&1\`; then :
+      else
+	$echo \"\$relink_command_output\" >&2
+	$rm \"\$progdir/\$file\"
+	exit 1
+      fi
+    fi
+
+    $mv \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null ||
+    { $rm \"\$progdir/\$program\";
+      $mv \"\$progdir/\$file\" \"\$progdir/\$program\"; }
+    $rm \"\$progdir/\$file\"
+  fi"
+	else
+	  echo >> $output "\
+  program='$outputname'
+  progdir=\"\$thisdir/$objdir\"
+"
+	fi
+
+	echo >> $output "\
+
+  if test -f \"\$progdir/\$program\"; then"
+
+	# Export our shlibpath_var if we have one.
+	if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then
+	  $echo >> $output "\
+    # Add our own library path to $shlibpath_var
+    $shlibpath_var=\"$temp_rpath\$$shlibpath_var\"
+
+    # Some systems cannot cope with colon-terminated $shlibpath_var
+    # The second colon is a workaround for a bug in BeOS R4 sed
+    $shlibpath_var=\`\$echo \"X\$$shlibpath_var\" | \$Xsed -e 's/::*\$//'\`
+
+    export $shlibpath_var
+"
+	fi
+
+	# fixup the dll searchpath if we need to.
+	if test -n "$dllsearchpath"; then
+	  $echo >> $output "\
+    # Add the dll search path components to the executable PATH
+    PATH=$dllsearchpath:\$PATH
+"
+	fi
+
+	$echo >> $output "\
+    if test \"\$libtool_execute_magic\" != \"$magic\"; then
+      # Run the actual program with our arguments.
+"
+	case $host in
+	# win32 systems need to use the prog path for dll
+	# lookup to work
+	*-*-cygwin* | *-*-pw32*)
+	  $echo >> $output "\
+      exec \$progdir/\$program \${1+\"\$@\"}
+"
+	  ;;
+
+	# Backslashes separate directories on plain windows
+	*-*-mingw | *-*-os2*)
+	  $echo >> $output "\
+      exec \$progdir\\\\\$program \${1+\"\$@\"}
+"
+	  ;;
+
+	*)
+	  $echo >> $output "\
+      # Export the path to the program.
+      PATH=\"\$progdir:\$PATH\"
+      export PATH
+
+      exec \$program \${1+\"\$@\"}
+"
+	  ;;
+	esac
+	$echo >> $output "\
+      \$echo \"\$0: cannot exec \$program \${1+\"\$@\"}\"
+      exit 1
+    fi
+  else
+    # The program doesn't exist.
+    \$echo \"\$0: error: \$progdir/\$program does not exist\" 1>&2
+    \$echo \"This script is just a wrapper for \$program.\" 1>&2
+    echo \"See the $PACKAGE documentation for more information.\" 1>&2
+    exit 1
+  fi
+fi\
+"
+	chmod +x $output
+      fi
+      exit 0
+      ;;
+    esac
+
+    # See if we need to build an old-fashioned archive.
+    for oldlib in $oldlibs; do
+
+      if test "$build_libtool_libs" = convenience; then
+	oldobjs="$libobjs_save"
+	addlibs="$convenience"
+	build_libtool_libs=no
+      else
+	if test "$build_libtool_libs" = module; then
+	  oldobjs="$libobjs_save"
+	  build_libtool_libs=no
+	else
+	  oldobjs="$objs$old_deplibs "`$echo "X$libobjs_save" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP`
+	fi
+	addlibs="$old_convenience"
+      fi
+
+      if test -n "$addlibs"; then
+	gentop="$output_objdir/${outputname}x"
+	$show "${rm}r $gentop"
+	$run ${rm}r "$gentop"
+	$show "mkdir $gentop"
+	$run mkdir "$gentop"
+	status=$?
+	if test $status -ne 0 && test ! -d "$gentop"; then
+	  exit $status
+	fi
+	generated="$generated $gentop"
+
+	# Add in members from convenience archives.
+	for xlib in $addlibs; do
+	  # Extract the objects.
+	  case $xlib in
+	  [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;;
+	  *) xabs=`pwd`"/$xlib" ;;
+	  esac
+	  xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'`
+	  xdir="$gentop/$xlib"
+
+	  $show "${rm}r $xdir"
+	  $run ${rm}r "$xdir"
+	  $show "mkdir $xdir"
+	  $run mkdir "$xdir"
+	  status=$?
+	  if test $status -ne 0 && test ! -d "$xdir"; then
+	    exit $status
+	  fi
+	  $show "(cd $xdir && $AR x $xabs)"
+	  $run eval "(cd \$xdir && $AR x \$xabs)" || exit $?
+
+	  oldobjs="$oldobjs "`find $xdir -name \*.${objext} -print -o -name \*.lo -print | $NL2SP`
+	done
+      fi
+
+      # Do each command in the archive commands.
+      if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then
+	eval cmds=\"$old_archive_from_new_cmds\"
+      else
+	# Ensure that we have .o objects in place in case we decided
+	# not to build a shared library, and have fallen back to building
+	# static libs even though --disable-static was passed!
+	for oldobj in $oldobjs; do
+	  if test ! -f $oldobj; then
+	    xdir=`$echo "X$oldobj" | $Xsed -e 's%/[^/]*$%%'`
+	    if test "X$xdir" = "X$oldobj"; then
+	      xdir="."
+	    else
+	      xdir="$xdir"
+	    fi
+	    baseobj=`$echo "X$oldobj" | $Xsed -e 's%^.*/%%'`
+	    obj=`$echo "X$baseobj" | $Xsed -e "$o2lo"`
+	    $show "(cd $xdir && ${LN_S} $obj $baseobj)"
+	    $run eval '(cd $xdir && ${LN_S} $obj $baseobj)' || exit $?
+	  fi
+	done
+
+	eval cmds=\"$old_archive_cmds\"
+      fi
+      save_ifs="$IFS"; IFS='~'
+      for cmd in $cmds; do
+	IFS="$save_ifs"
+	$show "$cmd"
+	$run eval "$cmd" || exit $?
+      done
+      IFS="$save_ifs"
+    done
+
+    if test -n "$generated"; then
+      $show "${rm}r$generated"
+      $run ${rm}r$generated
+    fi
+
+    # Now create the libtool archive.
+    case $output in
+    *.la)
+      old_library=
+      test "$build_old_libs" = yes && old_library="$libname.$libext"
+      $show "creating $output"
+
+      # Preserve any variables that may affect compiler behavior
+      for var in $variables_saved_for_relink; do
+	if eval test -z \"\${$var+set}\"; then
+	  relink_command="{ test -z \"\${$var+set}\" || unset $var || { $var=; export $var; }; }; $relink_command"
+	elif eval var_value=\$$var; test -z "$var_value"; then
+	  relink_command="$var=; export $var; $relink_command"
+	else
+	  var_value=`$echo "X$var_value" | $Xsed -e "$sed_quote_subst"`
+	  relink_command="$var=\"$var_value\"; export $var; $relink_command"
+	fi
+      done
+      # Quote the link command for shipping.
+      relink_command="(cd `pwd`; $SHELL $0 --mode=relink $libtool_args)"
+      relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"`
+
+      # Only create the output if not a dry run.
+      if test -z "$run"; then
+	for installed in no yes; do
+	  if test "$installed" = yes; then
+	    if test -z "$install_libdir"; then
+	      break
+	    fi
+	    output="$output_objdir/$outputname"i
+	    # Replace all uninstalled libtool libraries with the installed ones
+	    newdependency_libs=
+	    for deplib in $dependency_libs; do
+	      case $deplib in
+	      *.la)
+		name=`$echo "X$deplib" | $Xsed -e 's%^.*/%%'`
+		eval libdir=`sed -n -e 's/^libdir=\(.*\)$/\1/p' $deplib`
+		if test -z "$libdir"; then
+		  $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2
+		  exit 1
+		fi
+		newdependency_libs="$newdependency_libs $libdir/$name"
+		;;
+	      *) newdependency_libs="$newdependency_libs $deplib" ;;
+	      esac
+	    done
+	    dependency_libs="$newdependency_libs"
+	    newdlfiles=
+	    for lib in $dlfiles; do
+	      name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'`
+	      eval libdir=`sed -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
+	      if test -z "$libdir"; then
+		$echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+		exit 1
+	      fi
+	      newdlfiles="$newdlfiles $libdir/$name"
+	    done
+	    dlfiles="$newdlfiles"
+	    newdlprefiles=
+	    for lib in $dlprefiles; do
+	      name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'`
+	      eval libdir=`sed -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
+	      if test -z "$libdir"; then
+		$echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+		exit 1
+	      fi
+	      newdlprefiles="$newdlprefiles $libdir/$name"
+	    done
+	    dlprefiles="$newdlprefiles"
+	  fi
+	  $rm $output
+	  # place dlname in correct position for cygwin
+	  tdlname=$dlname
+	  case $host,$output,$installed,$module,$dlname in
+	    *cygwin*,*lai,yes,no,*.dll) tdlname=../bin/$dlname ;;
+	  esac
+	  $echo > $output "\
+# $outputname - a libtool library file
+# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP
+#
+# Please DO NOT delete this file!
+# It is necessary for linking the library.
+
+# The name that we can dlopen(3).
+dlname='$tdlname'
+
+# Names of this library.
+library_names='$library_names'
+
+# The name of the static archive.
+old_library='$old_library'
+
+# Libraries that this one depends upon.
+dependency_libs='$dependency_libs'
+
+# Version information for $libname.
+current=$current
+age=$age
+revision=$revision
+
+# Is this an already installed library?
+installed=$installed
+
+# Files to dlopen/dlpreopen
+dlopen='$dlfiles'
+dlpreopen='$dlprefiles'
+
+# Directory that this library needs to be installed in:
+libdir='$install_libdir'"
+	  if test "$installed" = no && test $need_relink = yes; then
+	    $echo >> $output "\
+relink_command=\"$relink_command\""
+	  fi
+	done
+      fi
+
+      # Do a symbolic link so that the libtool archive can be found in
+      # LD_LIBRARY_PATH before the program is installed.
+      $show "(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)"
+      $run eval '(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)' || exit $?
+      ;;
+    esac
+    exit 0
+    ;;
+
+  # libtool install mode
+  install)
+    modename="$modename: install"
+
+    # There may be an optional sh(1) argument at the beginning of
+    # install_prog (especially on Windows NT).
+    if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh ||
+       # Allow the use of GNU shtool's install command.
+       $echo "X$nonopt" | $Xsed | grep shtool > /dev/null; then
+      # Aesthetically quote it.
+      arg=`$echo "X$nonopt" | $Xsed -e "$sed_quote_subst"`
+      case $arg in
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*)
+	arg="\"$arg\""
+	;;
+      esac
+      install_prog="$arg "
+      arg="$1"
+      shift
+    else
+      install_prog=
+      arg="$nonopt"
+    fi
+
+    # The real first argument should be the name of the installation program.
+    # Aesthetically quote it.
+    arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+    case $arg in
+    *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*)
+      arg="\"$arg\""
+      ;;
+    esac
+    install_prog="$install_prog$arg"
+
+    # We need to accept at least all the BSD install flags.
+    dest=
+    files=
+    opts=
+    prev=
+    install_type=
+    isdir=no
+    stripme=
+    for arg
+    do
+      if test -n "$dest"; then
+	files="$files $dest"
+	dest="$arg"
+	continue
+      fi
+
+      case $arg in
+      -d) isdir=yes ;;
+      -f) prev="-f" ;;
+      -g) prev="-g" ;;
+      -m) prev="-m" ;;
+      -o) prev="-o" ;;
+      -s)
+	stripme=" -s"
+	continue
+	;;
+      -*) ;;
+
+      *)
+	# If the previous option needed an argument, then skip it.
+	if test -n "$prev"; then
+	  prev=
+	else
+	  dest="$arg"
+	  continue
+	fi
+	;;
+      esac
+
+      # Aesthetically quote the argument.
+      arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+      case $arg in
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*)
+	arg="\"$arg\""
+	;;
+      esac
+      install_prog="$install_prog $arg"
+    done
+
+    if test -z "$install_prog"; then
+      $echo "$modename: you must specify an install program" 1>&2
+      $echo "$help" 1>&2
+      exit 1
+    fi
+
+    if test -n "$prev"; then
+      $echo "$modename: the \`$prev' option requires an argument" 1>&2
+      $echo "$help" 1>&2
+      exit 1
+    fi
+
+    if test -z "$files"; then
+      if test -z "$dest"; then
+	$echo "$modename: no file or destination specified" 1>&2
+      else
+	$echo "$modename: you must specify a destination" 1>&2
+      fi
+      $echo "$help" 1>&2
+      exit 1
+    fi
+
+    # Strip any trailing slash from the destination.
+    dest=`$echo "X$dest" | $Xsed -e 's%/$%%'`
+
+    # Check to see that the destination is a directory.
+    test -d "$dest" && isdir=yes
+    if test "$isdir" = yes; then
+      destdir="$dest"
+      destname=
+    else
+      destdir=`$echo "X$dest" | $Xsed -e 's%/[^/]*$%%'`
+      test "X$destdir" = "X$dest" && destdir=.
+      destname=`$echo "X$dest" | $Xsed -e 's%^.*/%%'`
+
+      # Not a directory, so check to see that there is only one file specified.
+      set dummy $files
+      if test $# -gt 2; then
+	$echo "$modename: \`$dest' is not a directory" 1>&2
+	$echo "$help" 1>&2
+	exit 1
+      fi
+    fi
+    case $destdir in
+    [\\/]* | [A-Za-z]:[\\/]*) ;;
+    *)
+      for file in $files; do
+	case $file in
+	*.lo) ;;
+	*)
+	  $echo "$modename: \`$destdir' must be an absolute directory name" 1>&2
+	  $echo "$help" 1>&2
+	  exit 1
+	  ;;
+	esac
+      done
+      ;;
+    esac
+
+    # This variable tells wrapper scripts just to set variables rather
+    # than running their programs.
+    libtool_install_magic="$magic"
+
+    staticlibs=
+    future_libdirs=
+    current_libdirs=
+    for file in $files; do
+
+      # Do each installation.
+      case $file in
+      *.$libext)
+	# Do the static libraries later.
+	staticlibs="$staticlibs $file"
+	;;
+
+      *.la)
+	# Check to see that this really is a libtool archive.
+	if (sed -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then :
+	else
+	  $echo "$modename: \`$file' is not a valid libtool archive" 1>&2
+	  $echo "$help" 1>&2
+	  exit 1
+	fi
+
+	library_names=
+	old_library=
+	relink_command=
+	# If there is no directory component, then add one.
+	case $file in
+	*/* | *\\*) . $file ;;
+	*) . ./$file ;;
+	esac
+
+	# Add the libdir to current_libdirs if it is the destination.
+	if test "X$destdir" = "X$libdir"; then
+	  case "$current_libdirs " in
+	  *" $libdir "*) ;;
+	  *) current_libdirs="$current_libdirs $libdir" ;;
+	  esac
+	else
+	  # Note the libdir as a future libdir.
+	  case "$future_libdirs " in
+	  *" $libdir "*) ;;
+	  *) future_libdirs="$future_libdirs $libdir" ;;
+	  esac
+	fi
+
+	dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`/
+	test "X$dir" = "X$file/" && dir=
+	dir="$dir$objdir"
+
+	if test -n "$relink_command"; then
+	  $echo "$modename: warning: relinking \`$file'" 1>&2
+	  $show "$relink_command"
+	  if $run eval "$relink_command"; then :
+	  else
+	    $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2
+	    continue
+	  fi
+	fi
+
+	# See the names of the shared library.
+	set dummy $library_names
+	if test -n "$2"; then
+	  realname="$2"
+	  shift
+	  shift
+
+	  srcname="$realname"
+	  test -n "$relink_command" && srcname="$realname"T
+
+	  # Install the shared library and build the symlinks.
+	  $show "$install_prog $dir/$srcname $destdir/$realname"
+	  $run eval "$install_prog $dir/$srcname $destdir/$realname" || exit $?
+	  if test -n "$stripme" && test -n "$striplib"; then
+	    $show "$striplib $destdir/$realname"
+	    $run eval "$striplib $destdir/$realname" || exit $?
+	  fi
+
+	  if test $# -gt 0; then
+	    # Delete the old symlinks, and create new ones.
+	    for linkname
+	    do
+	      if test "$linkname" != "$realname"; then
+		$show "(cd $destdir && $rm $linkname && $LN_S $realname $linkname)"
+		$run eval "(cd $destdir && $rm $linkname && $LN_S $realname $linkname)"
+	      fi
+	    done
+	  fi
+
+	  # Do each command in the postinstall commands.
+	  lib="$destdir/$realname"
+	  eval cmds=\"$postinstall_cmds\"
+	  save_ifs="$IFS"; IFS='~'
+	  for cmd in $cmds; do
+	    IFS="$save_ifs"
+	    $show "$cmd"
+	    $run eval "$cmd" || exit $?
+	  done
+	  IFS="$save_ifs"
+	fi
+
+	# Install the pseudo-library for information purposes.
+	name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+	instname="$dir/$name"i
+	$show "$install_prog $instname $destdir/$name"
+	$run eval "$install_prog $instname $destdir/$name" || exit $?
+
+	# Maybe install the static library, too.
+	test -n "$old_library" && staticlibs="$staticlibs $dir/$old_library"
+	;;
+
+      *.lo)
+	# Install (i.e. copy) a libtool object.
+
+	# Figure out destination file name, if it wasn't already specified.
+	if test -n "$destname"; then
+	  destfile="$destdir/$destname"
+	else
+	  destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+	  destfile="$destdir/$destfile"
+	fi
+
+	# Deduce the name of the destination old-style object file.
+	case $destfile in
+	*.lo)
+	  staticdest=`$echo "X$destfile" | $Xsed -e "$lo2o"`
+	  ;;
+	*.$objext)
+	  staticdest="$destfile"
+	  destfile=
+	  ;;
+	*)
+	  $echo "$modename: cannot copy a libtool object to \`$destfile'" 1>&2
+	  $echo "$help" 1>&2
+	  exit 1
+	  ;;
+	esac
+
+	# Install the libtool object if requested.
+	if test -n "$destfile"; then
+	  $show "$install_prog $file $destfile"
+	  $run eval "$install_prog $file $destfile" || exit $?
+	fi
+
+	# Install the old object if enabled.
+	if test "$build_old_libs" = yes; then
+	  # Deduce the name of the old-style object file.
+	  staticobj=`$echo "X$file" | $Xsed -e "$lo2o"`
+
+	  $show "$install_prog $staticobj $staticdest"
+	  $run eval "$install_prog \$staticobj \$staticdest" || exit $?
+	fi
+	exit 0
+	;;
+
+      *)
+	# Figure out destination file name, if it wasn't already specified.
+	if test -n "$destname"; then
+	  destfile="$destdir/$destname"
+	else
+	  destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+	  destfile="$destdir/$destfile"
+	fi
+
+	# Do a test to see if this is really a libtool program.
+	case $host in
+	*cygwin*|*mingw*)
+	    wrapper=`echo $file | sed -e 's,.exe$,,'`
+	    ;;
+	*)
+	    wrapper=$file
+	    ;;
+	esac
+	if (sed -e '4q' $wrapper | egrep "^# Generated by .*$PACKAGE")>/dev/null 2>&1; then
+	  notinst_deplibs=
+	  relink_command=
+
+	  # If there is no directory component, then add one.
+	  case $file in
+	  */* | *\\*) . $wrapper ;;
+	  *) . ./$wrapper ;;
+	  esac
+
+	  # Check the variables that should have been set.
+	  if test -z "$notinst_deplibs"; then
+	    $echo "$modename: invalid libtool wrapper script \`$wrapper'" 1>&2
+	    exit 1
+	  fi
+
+	  finalize=yes
+	  for lib in $notinst_deplibs; do
+	    # Check to see that each library is installed.
+	    libdir=
+	    if test -f "$lib"; then
+	      # If there is no directory component, then add one.
+	      case $lib in
+	      */* | *\\*) . $lib ;;
+	      *) . ./$lib ;;
+	      esac
+	    fi
+	    libfile="$libdir/"`$echo "X$lib" | $Xsed -e 's%^.*/%%g'` ### testsuite: skip nested quoting test
+	    if test -n "$libdir" && test ! -f "$libfile"; then
+	      $echo "$modename: warning: \`$lib' has not been installed in \`$libdir'" 1>&2
+	      finalize=no
+	    fi
+	  done
+
+	  relink_command=
+	  # If there is no directory component, then add one.
+	  case $file in
+	  */* | *\\*) . $wrapper ;;
+	  *) . ./$wrapper ;;
+	  esac
+
+	  outputname=
+	  if test "$fast_install" = no && test -n "$relink_command"; then
+	    if test "$finalize" = yes && test -z "$run"; then
+	      tmpdir="/tmp"
+	      test -n "$TMPDIR" && tmpdir="$TMPDIR"
+	      tmpdir="$tmpdir/libtool-$$"
+	      if $mkdir -p "$tmpdir" && chmod 700 "$tmpdir"; then :
+	      else
+		$echo "$modename: error: cannot create temporary directory \`$tmpdir'" 1>&2
+		continue
+	      fi
+	      file=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+	      outputname="$tmpdir/$file"
+	      # Replace the output file specification.
+	      relink_command=`$echo "X$relink_command" | $Xsed -e 's%@OUTPUT@%'"$outputname"'%g'`
+
+	      $show "$relink_command"
+	      if $run eval "$relink_command"; then :
+	      else
+		$echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2
+		${rm}r "$tmpdir"
+		continue
+	      fi
+	      file="$outputname"
+	    else
+	      $echo "$modename: warning: cannot relink \`$file'" 1>&2
+	    fi
+	  else
+	    # Install the binary that we compiled earlier.
+	    file=`$echo "X$file" | $Xsed -e "s%\([^/]*\)$%$objdir/\1%"`
+	  fi
+	fi
+
+	# remove .exe since cygwin /usr/bin/install will append another
+	# one anyways
+	case $install_prog,$host in
+	/usr/bin/install*,*cygwin*)
+	  case $file:$destfile in
+	  *.exe:*.exe)
+	    # this is ok
+	    ;;
+	  *.exe:*)
+	    destfile=$destfile.exe
+	    ;;
+	  *:*.exe)
+	    destfile=`echo $destfile | sed -e 's,.exe$,,'`
+	    ;;
+	  esac
+	  ;;
+	esac
+	$show "$install_prog$stripme $file $destfile"
+	$run eval "$install_prog\$stripme \$file \$destfile" || exit $?
+	test -n "$outputname" && ${rm}r "$tmpdir"
+	;;
+      esac
+    done
+
+    for file in $staticlibs; do
+      name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+
+      # Set up the ranlib parameters.
+      oldlib="$destdir/$name"
+
+      $show "$install_prog $file $oldlib"
+      $run eval "$install_prog \$file \$oldlib" || exit $?
+
+      if test -n "$stripme" && test -n "$striplib"; then
+	$show "$old_striplib $oldlib"
+	$run eval "$old_striplib $oldlib" || exit $?
+      fi
+
+      # Do each command in the postinstall commands.
+      eval cmds=\"$old_postinstall_cmds\"
+      save_ifs="$IFS"; IFS='~'
+      for cmd in $cmds; do
+	IFS="$save_ifs"
+	$show "$cmd"
+	$run eval "$cmd" || exit $?
+      done
+      IFS="$save_ifs"
+    done
+
+    if test -n "$future_libdirs"; then
+      $echo "$modename: warning: remember to run \`$progname --finish$future_libdirs'" 1>&2
+    fi
+
+    if test -n "$current_libdirs"; then
+      # Maybe just do a dry run.
+      test -n "$run" && current_libdirs=" -n$current_libdirs"
+      exec_cmd='$SHELL $0 --finish$current_libdirs'
+    else
+      exit 0
+    fi
+    ;;
+
+  # libtool finish mode
+  finish)
+    modename="$modename: finish"
+    libdirs="$nonopt"
+    admincmds=
+
+    if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then
+      for dir
+      do
+	libdirs="$libdirs $dir"
+      done
+
+      for libdir in $libdirs; do
+	if test -n "$finish_cmds"; then
+	  # Do each command in the finish commands.
+	  eval cmds=\"$finish_cmds\"
+	  save_ifs="$IFS"; IFS='~'
+	  for cmd in $cmds; do
+	    IFS="$save_ifs"
+	    $show "$cmd"
+	    $run eval "$cmd" || admincmds="$admincmds
+       $cmd"
+	  done
+	  IFS="$save_ifs"
+	fi
+	if test -n "$finish_eval"; then
+	  # Do the single finish_eval.
+	  eval cmds=\"$finish_eval\"
+	  $run eval "$cmds" || admincmds="$admincmds
+       $cmds"
+	fi
+      done
+    fi
+
+    # Exit here if they wanted silent mode.
+    test "$show" = ":" && exit 0
+
+    echo "----------------------------------------------------------------------"
+    echo "Libraries have been installed in:"
+    for libdir in $libdirs; do
+      echo "   $libdir"
+    done
+    echo
+    echo "If you ever happen to want to link against installed libraries"
+    echo "in a given directory, LIBDIR, you must either use libtool, and"
+    echo "specify the full pathname of the library, or use the \`-LLIBDIR'"
+    echo "flag during linking and do at least one of the following:"
+    if test -n "$shlibpath_var"; then
+      echo "   - add LIBDIR to the \`$shlibpath_var' environment variable"
+      echo "     during execution"
+    fi
+    if test -n "$runpath_var"; then
+      echo "   - add LIBDIR to the \`$runpath_var' environment variable"
+      echo "     during linking"
+    fi
+    if test -n "$hardcode_libdir_flag_spec"; then
+      libdir=LIBDIR
+      eval flag=\"$hardcode_libdir_flag_spec\"
+
+      echo "   - use the \`$flag' linker flag"
+    fi
+    if test -n "$admincmds"; then
+      echo "   - have your system administrator run these commands:$admincmds"
+    fi
+    if test -f /etc/ld.so.conf; then
+      echo "   - have your system administrator add LIBDIR to \`/etc/ld.so.conf'"
+    fi
+    echo
+    echo "See any operating system documentation about shared libraries for"
+    echo "more information, such as the ld(1) and ld.so(8) manual pages."
+    echo "----------------------------------------------------------------------"
+    exit 0
+    ;;
+
+  # libtool execute mode
+  execute)
+    modename="$modename: execute"
+
+    # The first argument is the command name.
+    cmd="$nonopt"
+    if test -z "$cmd"; then
+      $echo "$modename: you must specify a COMMAND" 1>&2
+      $echo "$help"
+      exit 1
+    fi
+
+    # Handle -dlopen flags immediately.
+    for file in $execute_dlfiles; do
+      if test ! -f "$file"; then
+	$echo "$modename: \`$file' is not a file" 1>&2
+	$echo "$help" 1>&2
+	exit 1
+      fi
+
+      dir=
+      case $file in
+      *.la)
+	# Check to see that this really is a libtool archive.
+	if (sed -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then :
+	else
+	  $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+	  $echo "$help" 1>&2
+	  exit 1
+	fi
+
+	# Read the libtool library.
+	dlname=
+	library_names=
+
+	# If there is no directory component, then add one.
+	case $file in
+	*/* | *\\*) . $file ;;
+	*) . ./$file ;;
+	esac
+
+	# Skip this library if it cannot be dlopened.
+	if test -z "$dlname"; then
+	  # Warn if it was a shared library.
+	  test -n "$library_names" && $echo "$modename: warning: \`$file' was not linked with \`-export-dynamic'"
+	  continue
+	fi
+
+	dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
+	test "X$dir" = "X$file" && dir=.
+
+	if test -f "$dir/$objdir/$dlname"; then
+	  dir="$dir/$objdir"
+	else
+	  $echo "$modename: cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'" 1>&2
+	  exit 1
+	fi
+	;;
+
+      *.lo)
+	# Just add the directory containing the .lo file.
+	dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
+	test "X$dir" = "X$file" && dir=.
+	;;
+
+      *)
+	$echo "$modename: warning \`-dlopen' is ignored for non-libtool libraries and objects" 1>&2
+	continue
+	;;
+      esac
+
+      # Get the absolute pathname.
+      absdir=`cd "$dir" && pwd`
+      test -n "$absdir" && dir="$absdir"
+
+      # Now add the directory to shlibpath_var.
+      if eval "test -z \"\$$shlibpath_var\""; then
+	eval "$shlibpath_var=\"\$dir\""
+      else
+	eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\""
+      fi
+    done
+
+    # This variable tells wrapper scripts just to set shlibpath_var
+    # rather than running their programs.
+    libtool_execute_magic="$magic"
+
+    # Check if any of the arguments is a wrapper script.
+    args=
+    for file
+    do
+      case $file in
+      -*) ;;
+      *)
+	# Do a test to see if this is really a libtool program.
+	if (sed -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+	  # If there is no directory component, then add one.
+	  case $file in
+	  */* | *\\*) . $file ;;
+	  *) . ./$file ;;
+	  esac
+
+	  # Transform arg to wrapped name.
+	  file="$progdir/$program"
+	fi
+	;;
+      esac
+      # Quote arguments (to preserve shell metacharacters).
+      file=`$echo "X$file" | $Xsed -e "$sed_quote_subst"`
+      args="$args \"$file\""
+    done
+
+    if test -z "$run"; then
+      if test -n "$shlibpath_var"; then
+	# Export the shlibpath_var.
+	eval "export $shlibpath_var"
+      fi
+
+      # Restore saved enviroment variables
+      if test "${save_LC_ALL+set}" = set; then
+	LC_ALL="$save_LC_ALL"; export LC_ALL
+      fi
+      if test "${save_LANG+set}" = set; then
+	LANG="$save_LANG"; export LANG
+      fi
+
+      # Now prepare to actually exec the command.
+      exec_cmd="\$cmd$args"
+    else
+      # Display what would be done.
+      if test -n "$shlibpath_var"; then
+	eval "\$echo \"\$shlibpath_var=\$$shlibpath_var\""
+	$echo "export $shlibpath_var"
+      fi
+      $echo "$cmd$args"
+      exit 0
+    fi
+    ;;
+
+  # libtool clean and uninstall mode
+  clean | uninstall)
+    modename="$modename: $mode"
+    rm="$nonopt"
+    files=
+    rmforce=
+    exit_status=0
+
+    # This variable tells wrapper scripts just to set variables rather
+    # than running their programs.
+    libtool_install_magic="$magic"
+
+    for arg
+    do
+      case $arg in
+      -f) rm="$rm $arg"; rmforce=yes ;;
+      -*) rm="$rm $arg" ;;
+      *) files="$files $arg" ;;
+      esac
+    done
+
+    if test -z "$rm"; then
+      $echo "$modename: you must specify an RM program" 1>&2
+      $echo "$help" 1>&2
+      exit 1
+    fi
+
+    rmdirs=
+
+    for file in $files; do
+      dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
+      if test "X$dir" = "X$file"; then
+	dir=.
+	objdir="$objdir"
+      else
+	objdir="$dir/$objdir"
+      fi
+      name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+      test $mode = uninstall && objdir="$dir"
+
+      # Remember objdir for removal later, being careful to avoid duplicates
+      if test $mode = clean; then
+	case " $rmdirs " in
+	  *" $objdir "*) ;;
+	  *) rmdirs="$rmdirs $objdir" ;;
+	esac
+      fi
+
+      # Don't error if the file doesn't exist and rm -f was used.
+      if (test -L "$file") >/dev/null 2>&1 \
+	|| (test -h "$file") >/dev/null 2>&1 \
+	|| test -f "$file"; then
+	:
+      elif test -d "$file"; then
+	exit_status=1
+	continue
+      elif test "$rmforce" = yes; then
+	continue
+      fi
+
+      rmfiles="$file"
+
+      case $name in
+      *.la)
+	# Possibly a libtool archive, so verify it.
+	if (sed -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+	  . $dir/$name
+
+	  # Delete the libtool libraries and symlinks.
+	  for n in $library_names; do
+	    rmfiles="$rmfiles $objdir/$n"
+	  done
+	  test -n "$old_library" && rmfiles="$rmfiles $objdir/$old_library"
+	  test $mode = clean && rmfiles="$rmfiles $objdir/$name $objdir/${name}i"
+
+	  if test $mode = uninstall; then
+	    if test -n "$library_names"; then
+	      # Do each command in the postuninstall commands.
+	      eval cmds=\"$postuninstall_cmds\"
+	      save_ifs="$IFS"; IFS='~'
+	      for cmd in $cmds; do
+		IFS="$save_ifs"
+		$show "$cmd"
+		$run eval "$cmd"
+		if test $? != 0 && test "$rmforce" != yes; then
+		  exit_status=1
+		fi
+	      done
+	      IFS="$save_ifs"
+	    fi
+
+	    if test -n "$old_library"; then
+	      # Do each command in the old_postuninstall commands.
+	      eval cmds=\"$old_postuninstall_cmds\"
+	      save_ifs="$IFS"; IFS='~'
+	      for cmd in $cmds; do
+		IFS="$save_ifs"
+		$show "$cmd"
+		$run eval "$cmd"
+		if test $? != 0 && test "$rmforce" != yes; then
+		  exit_status=1
+		fi
+	      done
+	      IFS="$save_ifs"
+	    fi
+	    # FIXME: should reinstall the best remaining shared library.
+	  fi
+	fi
+	;;
+
+      *.lo)
+	if test "$build_old_libs" = yes; then
+	  oldobj=`$echo "X$name" | $Xsed -e "$lo2o"`
+	  rmfiles="$rmfiles $dir/$oldobj"
+	fi
+	;;
+
+      *)
+	# Do a test to see if this is a libtool program.
+	if test $mode = clean &&
+	   (sed -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+	  relink_command=
+	  . $dir/$file
+
+	  rmfiles="$rmfiles $objdir/$name $objdir/${name}S.${objext}"
+	  if test "$fast_install" = yes && test -n "$relink_command"; then
+	    rmfiles="$rmfiles $objdir/lt-$name"
+	  fi
+	fi
+	;;
+      esac
+      $show "$rm $rmfiles"
+      $run $rm $rmfiles || exit_status=1
+    done
+
+    # Try to remove the ${objdir}s in the directories where we deleted files
+    for dir in $rmdirs; do
+      if test -d "$dir"; then
+	$show "rmdir $dir"
+	$run rmdir $dir >/dev/null 2>&1
+      fi
+    done
+
+    exit $exit_status
+    ;;
+
+  "")
+    $echo "$modename: you must specify a MODE" 1>&2
+    $echo "$generic_help" 1>&2
+    exit 1
+    ;;
+  esac
+
+  if test -z "$exec_cmd"; then
+    $echo "$modename: invalid operation mode \`$mode'" 1>&2
+    $echo "$generic_help" 1>&2
+    exit 1
+  fi
+fi # test -z "$show_help"
+
+if test -n "$exec_cmd"; then
+  eval exec $exec_cmd
+  exit 1
+fi
+
+# We need to display help for each of the modes.
+case $mode in
+"") $echo \
+"Usage: $modename [OPTION]... [MODE-ARG]...
+
+Provide generalized library-building support services.
+
+    --config          show all configuration variables
+    --debug           enable verbose shell tracing
+-n, --dry-run         display commands without modifying any files
+    --features        display basic configuration information and exit
+    --finish          same as \`--mode=finish'
+    --help            display this help message and exit
+    --mode=MODE       use operation mode MODE [default=inferred from MODE-ARGS]
+    --quiet           same as \`--silent'
+    --silent          don't print informational messages
+    --version         print version information
+
+MODE must be one of the following:
+
+      clean           remove files from the build directory
+      compile         compile a source file into a libtool object
+      execute         automatically set library path, then run a program
+      finish          complete the installation of libtool libraries
+      install         install libraries or executables
+      link            create a library or an executable
+      uninstall       remove libraries from an installed directory
+
+MODE-ARGS vary depending on the MODE.  Try \`$modename --help --mode=MODE' for
+a more detailed description of MODE."
+  exit 0
+  ;;
+
+clean)
+  $echo \
+"Usage: $modename [OPTION]... --mode=clean RM [RM-OPTION]... FILE...
+
+Remove files from the build directory.
+
+RM is the name of the program to use to delete files associated with each FILE
+(typically \`/bin/rm').  RM-OPTIONS are options (such as \`-f') to be passed
+to RM.
+
+If FILE is a libtool library, object or program, all the files associated
+with it are deleted. Otherwise, only FILE itself is deleted using RM."
+  ;;
+
+compile)
+  $echo \
+"Usage: $modename [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE
+
+Compile a source file into a libtool library object.
+
+This mode accepts the following additional options:
+
+  -o OUTPUT-FILE    set the output file name to OUTPUT-FILE
+  -prefer-pic       try to building PIC objects only
+  -prefer-non-pic   try to building non-PIC objects only
+  -static           always build a \`.o' file suitable for static linking
+
+COMPILE-COMMAND is a command to be used in creating a \`standard' object file
+from the given SOURCEFILE.
+
+The output file name is determined by removing the directory component from
+SOURCEFILE, then substituting the C source code suffix \`.c' with the
+library object suffix, \`.lo'."
+  ;;
+
+execute)
+  $echo \
+"Usage: $modename [OPTION]... --mode=execute COMMAND [ARGS]...
+
+Automatically set library path, then run a program.
+
+This mode accepts the following additional options:
+
+  -dlopen FILE      add the directory containing FILE to the library path
+
+This mode sets the library path environment variable according to \`-dlopen'
+flags.
+
+If any of the ARGS are libtool executable wrappers, then they are translated
+into their corresponding uninstalled binary, and any of their required library
+directories are added to the library path.
+
+Then, COMMAND is executed, with ARGS as arguments."
+  ;;
+
+finish)
+  $echo \
+"Usage: $modename [OPTION]... --mode=finish [LIBDIR]...
+
+Complete the installation of libtool libraries.
+
+Each LIBDIR is a directory that contains libtool libraries.
+
+The commands that this mode executes may require superuser privileges.  Use
+the \`--dry-run' option if you just want to see what would be executed."
+  ;;
+
+install)
+  $echo \
+"Usage: $modename [OPTION]... --mode=install INSTALL-COMMAND...
+
+Install executables or libraries.
+
+INSTALL-COMMAND is the installation command.  The first component should be
+either the \`install' or \`cp' program.
+
+The rest of the components are interpreted as arguments to that command (only
+BSD-compatible install options are recognized)."
+  ;;
+
+link)
+  $echo \
+"Usage: $modename [OPTION]... --mode=link LINK-COMMAND...
+
+Link object files or libraries together to form another library, or to
+create an executable program.
+
+LINK-COMMAND is a command using the C compiler that you would use to create
+a program from several object files.
+
+The following components of LINK-COMMAND are treated specially:
+
+  -all-static       do not do any dynamic linking at all
+  -avoid-version    do not add a version suffix if possible
+  -dlopen FILE      \`-dlpreopen' FILE if it cannot be dlopened at runtime
+  -dlpreopen FILE   link in FILE and add its symbols to lt_preloaded_symbols
+  -export-dynamic   allow symbols from OUTPUT-FILE to be resolved with dlsym(3)
+  -export-symbols SYMFILE
+		    try to export only the symbols listed in SYMFILE
+  -export-symbols-regex REGEX
+		    try to export only the symbols matching REGEX
+  -LLIBDIR          search LIBDIR for required installed libraries
+  -lNAME            OUTPUT-FILE requires the installed library libNAME
+  -module           build a library that can dlopened
+  -no-fast-install  disable the fast-install mode
+  -no-install       link a not-installable executable
+  -no-undefined     declare that a library does not refer to external symbols
+  -o OUTPUT-FILE    create OUTPUT-FILE from the specified objects
+  -release RELEASE  specify package release information
+  -rpath LIBDIR     the created library will eventually be installed in LIBDIR
+  -R[ ]LIBDIR       add LIBDIR to the runtime path of programs and libraries
+  -static           do not do any dynamic linking of libtool libraries
+  -version-info CURRENT[:REVISION[:AGE]]
+		    specify library version info [each variable defaults to 0]
+
+All other options (arguments beginning with \`-') are ignored.
+
+Every other argument is treated as a filename.  Files ending in \`.la' are
+treated as uninstalled libtool libraries, other files are standard or library
+object files.
+
+If the OUTPUT-FILE ends in \`.la', then a libtool library is created,
+only library objects (\`.lo' files) may be specified, and \`-rpath' is
+required, except when creating a convenience library.
+
+If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created
+using \`ar' and \`ranlib', or on Windows using \`lib'.
+
+If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file
+is created, otherwise an executable program is created."
+  ;;
+
+uninstall)
+  $echo \
+"Usage: $modename [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE...
+
+Remove libraries from an installation directory.
+
+RM is the name of the program to use to delete files associated with each FILE
+(typically \`/bin/rm').  RM-OPTIONS are options (such as \`-f') to be passed
+to RM.
+
+If FILE is a libtool library, all the files associated with it are deleted.
+Otherwise, only FILE itself is deleted using RM."
+  ;;
+
+*)
+  $echo "$modename: invalid operation mode \`$mode'" 1>&2
+  $echo "$help" 1>&2
+  exit 1
+  ;;
+esac
+
+echo
+$echo "Try \`$modename --help' for more information about other modes."
+
+exit 0
+
+# Local Variables:
+# mode:shell-script
+# sh-indentation:2
+# End:
diff --git a/memcheck/Makefile.am b/memcheck/Makefile.am
new file mode 100644
index 000000000..00387927b
--- /dev/null
+++ b/memcheck/Makefile.am
@@ -0,0 +1,80 @@
+SUBDIRS = demangle . docs tests
+
+valdir = $(libdir)/valgrind
+
+LDFLAGS = -Wl,-z -Wl,initfirst
+
+INCLUDES += -I$(srcdir)/demangle
+
+bin_SCRIPTS = valgrind
+
+val_DATA = linux22.supp linux24.supp
+
+EXTRA_DIST = $(val_DATA) \
+	PATCHES_APPLIED ACKNOWLEDGEMENTS \
+	README_KDE3_FOLKS \
+	README_MISSING_SYSCALL_OR_IOCTL TODO
+
+val_PROGRAMS = valgrind.so valgrinq.so
+
+valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+
+valgrind_so_SOURCES = \
+	vg_clientmalloc.c \
+	vg_clientperms.c \
+	vg_demangle.c \
+	vg_dispatch.S \
+	vg_errcontext.c \
+	vg_execontext.c \
+	vg_from_ucode.c \
+	vg_helpers.S \
+	vg_main.c \
+	vg_malloc2.c \
+	vg_memory.c \
+	vg_messages.c \
+	vg_mylibc.c \
+	vg_procselfmaps.c \
+	vg_profile.c \
+	vg_signals.c \
+	vg_startup.S \
+	vg_symtab2.c \
+	vg_syscall_mem.c \
+	vg_syscall.S \
+	vg_to_ucode.c \
+	vg_translate.c \
+	vg_transtab.c \
+	vg_valgrinq_dummy.c \
+	vg_vtagops.c
+
+valgrind_so_LDADD = \
+	demangle/cp-demangle.o \
+	demangle/cplus-dem.o \
+	demangle/dyn-string.o \
+	demangle/safe-ctype.o
+
+include_HEADERS = valgrind.h
+
+noinst_HEADERS = \
+        vg_kerneliface.h        \
+        vg_include.h            \
+        vg_version.h            \
+        vg_constants.h          \
+        vg_unsafe.h
+
+
+install-data-hook:
+	cd ${valdir} && rm -f default.supp && $(LN_S) $(DEFAULT_SUPP) default.supp
+
+vg_memory.o:
+	$(COMPILE) -O2 -mpreferred-stack-boundary=2 -c $<
+
+vg_clientmalloc.o:
+	$(COMPILE) -fno-omit-frame-pointer -c $<
+
+
+valgrind.so: $(valgrind_so_OBJECTS)
+	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+	$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+
+valgrinq.so: $(valgrinq_so_OBJECTS)
+	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
diff --git a/memcheck/docs/Makefile.am b/memcheck/docs/Makefile.am
new file mode 100644
index 000000000..e8a58fa18
--- /dev/null
+++ b/memcheck/docs/Makefile.am
@@ -0,0 +1,5 @@
+docdir = $(datadir)/doc/valgrind
+
+doc_DATA = index.html manual.html nav.html techdocs.html
+
+EXTRA_DIST = $(doc_DATA)
diff --git a/memcheck/docs/index.html b/memcheck/docs/index.html
new file mode 100644
index 000000000..111170256
--- /dev/null
+++ b/memcheck/docs/index.html
@@ -0,0 +1,26 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<html>
+
+<head>
+  <meta http-equiv="Content-Type"     
+        content="text/html; charset=iso-8859-1">
+  <meta http-equiv="Content-Language" content="en-gb">
+  <meta name="generator" 
+        content="Mozilla/4.76 (X11; U; Linux 2.4.1-0.1.9 i586) [Netscape]">
+  <meta name="author" content="Julian Seward <jseward@acm.org>">
+  <meta name="description" content="say what this prog does">
+  <meta name="keywords" content="Valgrind, memory checker, x86, GPL">
+  <title>Valgrind's user manual</title>
+</head>
+
+<frameset cols="150,*">
+  <frame name="nav" target="main" src="nav.html">
+  <frame name="main" src="manual.html" scrolling="auto">
+  <noframes>
+    <body>
+     <p>This page uses frames, but your browser doesn't support them.</p>
+    </body>
+  </noframes>
+</frameset>
+
+</html>
diff --git a/memcheck/docs/manual.html b/memcheck/docs/manual.html
new file mode 100644
index 000000000..1bcd02a81
--- /dev/null
+++ b/memcheck/docs/manual.html
@@ -0,0 +1,1753 @@
+<html>
+  <head>
+    <style type="text/css">
+      body      { background-color: #ffffff;
+                  color:            #000000;
+                  font-family:      Times, Helvetica, Arial;
+                  font-size:        14pt}
+      h4        { margin-bottom:    0.3em}
+      code      { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      pre       { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      a:link    { color:            #0000C0;
+                  text-decoration:  none; }
+      a:visited { color:            #0000C0; 
+                  text-decoration:  none; }
+      a:active  { color:            #0000C0;
+                  text-decoration:  none; }
+    </style>
+  </head>
+
+<body bgcolor="#ffffff">
+
+<a name="title">&nbsp;</a>
+<h1 align=center>Valgrind, snapshot 20020317</h1>
+
+<center>
+<a href="mailto:jseward@acm.org">jseward@acm.org<br>
+<a href="http://www.muraroa.demon.co.uk">http://www.muraroa.demon.co.uk</a><br>
+Copyright &copy; 2000-2002 Julian Seward
+<p>
+Valgrind is licensed under the GNU General Public License, 
+version 2<br>
+An open-source tool for finding memory-management problems in
+Linux-x86 executables.
+</center>
+
+<p>
+
+<hr width="100%">
+<a name="contents"></a>
+<h2>Contents of this manual</h2>
+
+<h4>1&nbsp; <a href="#intro">Introduction</a></h4>
+    1.1&nbsp; <a href="#whatfor">What Valgrind is for</a><br>
+    1.2&nbsp; <a href="#whatdoes">What it does with your program</a>
+
+<h4>2&nbsp; <a href="#howtouse">How to use it, and how to make sense 
+    of the results</a></h4>
+    2.1&nbsp; <a href="#starta">Getting started</a><br>
+    2.2&nbsp; <a href="#comment">The commentary</a><br>
+    2.3&nbsp; <a href="#report">Reporting of errors</a><br>
+    2.4&nbsp; <a href="#suppress">Suppressing errors</a><br>
+    2.5&nbsp; <a href="#flags">Command-line flags</a><br>
+    2.6&nbsp; <a href="#errormsgs">Explaination of error messages</a><br>
+    2.7&nbsp; <a href="#suppfiles">Writing suppressions files</a><br>
+    2.8&nbsp; <a href="#install">Building and installing</a><br>
+    2.9&nbsp; <a href="#problems">If you have problems</a><br>
+
+<h4>3&nbsp; <a href="#machine">Details of the checking machinery</a></h4>
+    3.1&nbsp; <a href="#vvalue">Valid-value (V) bits</a><br>
+    3.2&nbsp; <a href="#vaddress">Valid-address (A)&nbsp;bits</a><br>
+    3.3&nbsp; <a href="#together">Putting it all together</a><br>
+    3.4&nbsp; <a href="#signals">Signals</a><br>
+    3.5&nbsp; <a href="#leaks">Memory leak detection</a><br>
+
+<h4>4&nbsp; <a href="#limits">Limitations</a></h4>
+
+<h4>5&nbsp; <a href="#howitworks">How it works -- a rough overview</a></h4>
+    5.1&nbsp; <a href="#startb">Getting started</a><br>
+    5.2&nbsp; <a href="#engine">The translation/instrumentation engine</a><br>
+    5.3&nbsp; <a href="#track">Tracking the status of memory</a><br>
+    5.4&nbsp; <a href="#sys_calls">System calls</a><br>
+    5.5&nbsp; <a href="#sys_signals">Signals</a><br>
+
+<h4>6&nbsp; <a href="#example">An example</a></h4>
+
+<h4>7&nbsp; <a href="techdocs.html">The design and implementation of Valgrind</a></h4>
+
+<hr width="100%">
+
+<a name="intro"></a>
+<h2>1&nbsp; Introduction</h2>
+
+<a name="whatfor"></a>
+<h3>1.1&nbsp; What Valgrind is for</h3>
+
+Valgrind is a tool to help you find memory-management problems in your
+programs. When a program is run under Valgrind's supervision, all
+reads and writes of memory are checked, and calls to
+malloc/new/free/delete are intercepted. As a result, Valgrind can
+detect problems such as:
+<ul>
+  <li>Use of uninitialised memory</li>
+  <li>Reading/writing memory after it has been free'd</li>
+  <li>Reading/writing off the end of malloc'd blocks</li>
+  <li>Reading/writing inappropriate areas on the stack</li>
+  <li>Memory leaks -- where pointers to malloc'd blocks are lost forever</li>
+</ul>
+
+Problems like these can be difficult to find by other means, often
+lying undetected for long periods, then causing occasional,
+difficult-to-diagnose crashes.
+
+<p>
+Valgrind is closely tied to details of the CPU, operating system and
+to a less extent, compiler and basic C libraries. This makes it
+difficult to make it portable, so I have chosen at the outset to
+concentrate on what I believe to be a widely used platform: Red Hat
+Linux 7.2, on x86s. I believe that it will work without significant
+difficulty on other x86 GNU/Linux systems which use the 2.4 kernel and
+GNU libc 2.2.X, for example SuSE 7.1 and Mandrake 8.0.  Red Hat 6.2 is
+also supported.  It has worked in the past, and probably still does,
+on RedHat 7.1 and 6.2.  Note that I haven't compiled it on RedHat 7.1
+and 6.2 for a while, so they may no longer work now.
+<p>
+(Early Feb 02: after feedback from the KDE people it also works better
+on other Linuxes).
+<p>
+At some point in the past, Valgrind has also worked on Red Hat 6.2
+(x86), thanks to the efforts of Rob Noble.
+
+<p>
+Valgrind is licensed under the GNU General Public License, version
+2. Read the file LICENSE in the source distribution for details.
+
+<a name="whatdoes">
+<h3>1.2&nbsp; What it does with your program</h3>
+
+Valgrind is designed to be as non-intrusive as possible. It works
+directly with existing executables. You don't need to recompile,
+relink, or otherwise modify, the program to be checked. Simply place
+the word <code>valgrind</code> at the start of the command line
+normally used to run the program. So, for example, if you want to run
+the command <code>ls -l</code> on Valgrind, simply issue the
+command: <code>valgrind ls -l</code>.
+
+<p>Valgrind takes control of your program before it starts. Debugging
+information is read from the executable and associated libraries, so
+that error messages can be phrased in terms of source code
+locations. Your program is then run on a synthetic x86 CPU which
+checks every memory access. All detected errors are written to a
+log. When the program finishes, Valgrind searches for and reports on
+leaked memory.
+
+<p>You can run pretty much any dynamically linked ELF x86 executable using
+Valgrind. Programs run 25 to 50 times slower, and take a lot more
+memory, than they usually would. It works well enough to run large
+programs. For example, the Konqueror web browser from the KDE Desktop
+Environment, version 2.1.1, runs slowly but usably on Valgrind.
+
+<p>Valgrind simulates every single instruction your program executes.
+Because of this, it finds errors not only in your application but also
+in all supporting dynamically-linked (.so-format) libraries, including
+the GNU C library, the X client libraries, Qt, if you work with KDE, and
+so on. That often includes libraries, for example the GNU C library,
+which contain memory access violations, but which you cannot or do not
+want to fix.
+
+<p>Rather than swamping you with errors in which you are not
+interested, Valgrind allows you to selectively suppress errors, by
+recording them in a suppressions file which is read when Valgrind
+starts up. As supplied, Valgrind comes with a suppressions file
+designed to give reasonable behaviour on Red Hat 7.2 (also 7.1 and
+6.2) when running text-only and simple X applications.
+
+<p><a href="#example">Section 6</a> shows an example of use.
+<p>
+<hr width="100%">
+
+<a name="howtouse"></a>
+<h2>2&nbsp; How to use it, and how to make sense of the results</h2>
+
+<a name="starta"></a>
+<h3>2.1&nbsp; Getting started</h3>
+
+First off, consider whether it might be beneficial to recompile your
+application and supporting libraries with optimisation disabled and
+debugging info enabled (the <code>-g</code> flag).  You don't have to
+do this, but doing so helps Valgrind produce more accurate and less
+confusing error reports.  Chances are you're set up like this already,
+if you intended to debug your program with GNU gdb, or some other
+debugger.
+
+<p>Then just run your application, but place the word
+<code>valgrind</code> in front of your usual command-line invokation.
+Note that you should run the real (machine-code) executable here.  If
+your application is started by, for example, a shell or perl script,
+you'll need to modify it to invoke Valgrind on the real executables.
+Running such scripts directly under Valgrind will result in you
+getting error reports pertaining to <code>/bin/sh</code>,
+<code>/usr/bin/perl</code>, or whatever interpreter you're using.
+This almost certainly isn't what you want and can be hugely confusing.
+
+<a name="comment"></a>
+<h3>2.2&nbsp; The commentary</h3>
+
+Valgrind writes a commentary, detailing error reports and other
+significant events.  The commentary goes to standard output by
+default.  This may interfere with your program, so you can ask for it
+to be directed elsewhere.
+
+<p>All lines in the commentary are of the following form:<br>
+<pre>
+  ==12345== some-message-from-Valgrind
+</pre>
+<p>The <code>12345</code>  is the process ID.  This scheme makes it easy
+to distinguish program output from Valgrind commentary, and also easy
+to differentiate commentaries from different processes which have
+become merged together, for whatever reason.
+
+<p>By default, Valgrind writes only essential messages to the commentary,
+so as to avoid flooding you with information of secondary importance.
+If you want more information about what is happening, re-run, passing
+the <code>-v</code> flag to Valgrind.
+
+
+<a name="report"></a>
+<h3>2.3&nbsp; Reporting of errors</h3>
+
+When Valgrind detects something bad happening in the program, an error
+message is written to the commentary.  For example:<br>
+<pre>
+  ==25832== Invalid read of size 4
+  ==25832==    at 0x8048724: BandMatrix::ReSize(int, int, int) (bogon.cpp:45)
+  ==25832==    by 0x80487AF: main (bogon.cpp:66)
+  ==25832==    by 0x40371E5E: __libc_start_main (libc-start.c:129)
+  ==25832==    by 0x80485D1: (within /home/sewardj/newmat10/bogon)
+  ==25832==    Address 0xBFFFF74C is not stack'd, malloc'd or free'd
+</pre>
+
+<p>This message says that the program did an illegal 4-byte read of
+address 0xBFFFF74C, which, as far as it can tell, is not a valid stack
+address, nor corresponds to any currently malloc'd or free'd blocks.
+The read is happening at line 45 of <code>bogon.cpp</code>, called
+from line 66 of the same file, etc.  For errors associated with an
+identified malloc'd/free'd block, for example reading free'd memory,
+Valgrind reports not only the location where the error happened, but
+also where the associated block was malloc'd/free'd.
+
+<p>Valgrind remembers all error reports.  When an error is detected,
+it is compared against old reports, to see if it is a duplicate.  If
+so, the error is noted, but no further commentary is emitted.  This
+avoids you being swamped with bazillions of duplicate error reports.
+
+<p>If you want to know how many times each error occurred, run with
+the <code>-v</code> option.  When execution finishes, all the reports
+are printed out, along with, and sorted by, their occurrence counts.
+This makes it easy to see which errors have occurred most frequently.
+
+<p>Errors are reported before the associated operation actually
+happens.  For example, if you program decides to read from address
+zero, Valgrind will emit a message to this effect, and the program
+will then duly die with a segmentation fault.
+
+<p>In general, you should try and fix errors in the order that they
+are reported.  Not doing so can be confusing.  For example, a program
+which copies uninitialised values to several memory locations, and
+later uses them, will generate several error messages.  The first such
+error message may well give the most direct clue to the root cause of
+the problem.
+
+<a name="suppress"></a>
+<h3>2.4&nbsp; Suppressing errors</h3>
+
+Valgrind detects numerous problems in the base libraries, such as the
+GNU C library, and the XFree86 client libraries, which come
+pre-installed on your GNU/Linux system.  You can't easily fix these,
+but you don't want to see these errors (and yes, there are many!)  So
+Valgrind reads a list of errors to suppress at startup.  By default
+this file is <code>redhat72.supp</code>, located in the Valgrind
+installation directory.  
+
+<p>You can modify and add to the suppressions file at your leisure, or
+write your own.  Multiple suppression files are allowed.  This is
+useful if part of your project contains errors you can't or don't want
+to fix, yet you don't want to continuously be reminded of them.
+
+<p>Each error to be suppressed is described very specifically, to
+minimise the possibility that a suppression-directive inadvertantly
+suppresses a bunch of similar errors which you did want to see.  The
+suppression mechanism is designed to allow precise yet flexible
+specification of errors to suppress.
+
+<p>If you use the <code>-v</code> flag, at the end of execution, Valgrind
+prints out one line for each used suppression, giving its name and the
+number of times it got used.  Here's the suppressions used by a run of
+<code>ls -l</code>:
+<pre>
+  --27579-- supp: 1 socketcall.connect(serv_addr)/__libc_connect/__nscd_getgrgid_r
+  --27579-- supp: 1 socketcall.connect(serv_addr)/__libc_connect/__nscd_getpwuid_r
+  --27579-- supp: 6 strrchr/_dl_map_object_from_fd/_dl_map_object
+</pre>
+
+<a name="flags"></a>
+<h3>2.5&nbsp; Command-line flags</h3>
+
+You invoke Valgrind like this:
+<pre>
+  valgrind [options-for-Valgrind] your-prog [options for your-prog]
+</pre>
+
+<p>Valgrind's default settings succeed in giving reasonable behaviour
+in most cases.  Available options, in no particular order, are as
+follows:
+<ul>
+  <li><code>--help</code></li><br>
+
+  <li><code>--version</code><br>
+      <p>The usual deal.</li><br><p>
+
+  <li><code>-v --verbose</code><br>
+      <p>Be more verbose.  Gives extra information on various aspects
+      of your program, such as: the shared objects loaded, the
+      suppressions used, the progress of the instrumentation engine,
+      and warnings about unusual behaviour.
+      </li><br><p>
+
+  <li><code>-q --quiet</code><br>
+      <p>Run silently, and only print error messages.  Useful if you
+      are running regression tests or have some other automated test
+      machinery.
+      </li><br><p>
+
+  <li><code>--demangle=no</code><br>
+      <code>--demangle=yes</code> [the default]
+      <p>Disable/enable automatic demangling (decoding) of C++ names.
+      Enabled by default.  When enabled, Valgrind will attempt to
+      translate encoded C++ procedure names back to something
+      approaching the original.  The demangler handles symbols mangled
+      by g++ versions 2.X and 3.X.
+
+      <p>An important fact about demangling is that function
+      names mentioned in suppressions files should be in their mangled
+      form.  Valgrind does not demangle function names when searching
+      for applicable suppressions, because to do otherwise would make
+      suppressions file contents dependent on the state of Valgrind's
+      demangling machinery, and would also be slow and pointless.
+      </li><br><p>
+
+  <li><code>--num-callers=&lt;number&gt;</code> [default=4]<br>
+      <p>By default, Valgrind shows four levels of function call names
+      to help you identify program locations.  You can change that
+      number with this option.  This can help in determining the
+      program's location in deeply-nested call chains.  Note that errors
+      are commoned up using only the top three function locations (the
+      place in the current function, and that of its two immediate
+      callers).  So this doesn't affect the total number of errors
+      reported.  
+      <p>
+      The maximum value for this is 50.  Note that higher settings
+      will make Valgrind run a bit more slowly and take a bit more
+      memory, but can be useful when working with programs with
+      deeply-nested call chains.  
+      </li><br><p>
+
+  <li><code>--gdb-attach=no</code> [the default]<br>
+      <code>--gdb-attach=yes</code>
+      <p>When enabled, Valgrind will pause after every error shown,
+      and print the line
+      <br>
+      <code>---- Attach to GDB ? --- [Return/N/n/Y/y/C/c] ----</code>
+      <p>
+      Pressing <code>Ret</code>, or <code>N</code> <code>Ret</code>
+      or <code>n</code> <code>Ret</code>, causes Valgrind not to
+      start GDB for this error.
+      <p>
+      <code>Y</code> <code>Ret</code>
+      or <code>y</code> <code>Ret</code> causes Valgrind to
+      start GDB, for the program at this point.  When you have
+      finished with GDB, quit from it, and the program will continue.
+      Trying to continue from inside GDB doesn't work.
+      <p>
+      <code>C</code> <code>Ret</code>
+      or <code>c</code> <code>Ret</code> causes Valgrind not to
+      start GDB, and not to ask again.
+      <p>
+      <code>--gdb-attach=yes</code> conflicts with 
+      <code>--trace-children=yes</code>.  You can't use them
+      together.  Valgrind refuses to start up in this situation.
+      </li><br><p>
+     
+  <li><code>--partial-loads-ok=yes</code> [the default]<br>
+      <code>--partial-loads-ok=no</code>
+      <p>Controls how Valgrind handles word (4-byte) loads from
+      addresses for which some bytes are addressible and others
+      are not.  When <code>yes</code> (the default), such loads
+      do not elicit an address error.  Instead, the loaded V bytes
+      corresponding to the illegal addresses indicate undefined, and
+      those corresponding to legal addresses are loaded from shadow 
+      memory, as usual.
+      <p>
+      When <code>no</code>, loads from partially
+      invalid addresses are treated the same as loads from completely
+      invalid addresses: an illegal-address error is issued,
+      and the resulting V bytes indicate valid data.
+      </li><br><p>
+
+  <li><code>--sloppy-malloc=no</code> [the default]<br>
+      <code>--sloppy-malloc=yes</code>
+      <p>When enabled, all requests for malloc/calloc are rounded up
+      to a whole number of machine words -- in other words, made
+      divisible by 4.  For example, a request for 17 bytes of space
+      would result in a 20-byte area being made available.  This works
+      around bugs in sloppy libraries which assume that they can
+      safely rely on malloc/calloc requests being rounded up in this
+      fashion.  Without the workaround, these libraries tend to
+      generate large numbers of errors when they access the ends of
+      these areas.  Valgrind snapshots dated 17 Feb 2002 and later are
+      cleverer about this problem, and you should no longer need to 
+      use this flag.
+      </li><br><p>
+
+  <li><code>--trace-children=no</code> [the default]</br>
+      <code>--trace-children=yes</code>
+      <p>When enabled, Valgrind will trace into child processes.  This
+      is confusing and usually not what you want, so is disabled by
+      default.</li><br><p>
+
+  <li><code>--freelist-vol=&lt;number></code> [default: 1000000]
+      <p>When the client program releases memory using free (in C) or
+      delete (C++), that memory is not immediately made available for
+      re-allocation.  Instead it is marked inaccessible and placed in
+      a queue of freed blocks.  The purpose is to delay the point at
+      which freed-up memory comes back into circulation.  This
+      increases the chance that Valgrind will be able to detect
+      invalid accesses to blocks for some significant period of time
+      after they have been freed.  
+      <p>
+      This flag specifies the maximum total size, in bytes, of the
+      blocks in the queue.  The default value is one million bytes.
+      Increasing this increases the total amount of memory used by
+      Valgrind but may detect invalid uses of freed blocks which would
+      otherwise go undetected.</li><br><p>
+
+  <li><code>--logfile-fd=&lt;number></code> [default: 2, stderr]
+      <p>Specifies the file descriptor on which Valgrind communicates
+      all of its messages.  The default, 2, is the standard error
+      channel.  This may interfere with the client's own use of
+      stderr.  To dump Valgrind's commentary in a file without using
+      stderr, something like the following works well (sh/bash
+      syntax):<br>
+      <code>&nbsp;&nbsp;
+            valgrind --logfile-fd=9 my_prog 9> logfile</code><br>
+      That is: tell Valgrind to send all output to file descriptor 9,
+      and ask the shell to route file descriptor 9 to "logfile".
+      </li><br><p>
+
+  <li><code>--suppressions=&lt;filename></code> [default:
+      /installation/directory/redhat72.supp] <p>Specifies an extra
+      file from which to read descriptions of errors to suppress.  You
+      may use as many extra suppressions files as you
+      like.</li><br><p>
+
+  <li><code>--leak-check=no</code> [default]<br>
+      <code>--leak-check=yes</code>
+      <p>When enabled, search for memory leaks when the client program
+      finishes.  A memory leak means a malloc'd block, which has not
+      yet been free'd, but to which no pointer can be found.  Such a
+      block can never be free'd by the program, since no pointer to it
+      exists.  Leak checking is disabled by default
+      because it tends to generate dozens of error messages.
+      </li><br><p>
+
+  <li><code>--show-reachable=no</code> [default]<br>
+      <code>--show-reachable=yes</code> <p>When disabled, the memory
+      leak detector only shows blocks for which it cannot find a
+      pointer to at all, or it can only find a pointer to the middle
+      of.  These blocks are prime candidates for memory leaks.  When
+      enabled, the leak detector also reports on blocks which it could
+      find a pointer to.  Your program could, at least in principle,
+      have freed such blocks before exit.  Contrast this to blocks for
+      which no pointer, or only an interior pointer could be found:
+      they are more likely to indicate memory leaks, because
+      you do not actually have a pointer to the start of the block
+      which you can hand to free(), even if you wanted to.
+      </li><br><p>
+
+  <li><code>--leak-resolution=low</code> [default]<br>
+      <code>--leak-resolution=med</code> <br>
+      <code>--leak-resolution=high</code>
+      <p>When doing leak checking, determines how willing Valgrind is
+      to consider different backtraces the same.  When set to
+      <code>low</code>, the default, only the first two entries need
+      match.  When <code>med</code>, four entries have to match.  When
+      <code>high</code>, all entries need to match.  
+      <p>
+      For hardcore leak debugging, you probably want to use
+      <code>--leak-resolution=high</code> together with 
+      <code>--num-callers=40</code> or some such large number.  Note
+      however that this can give an overwhelming amount of
+      information, which is why the defaults are 4 callers and
+      low-resolution matching.
+      <p>
+      Note that the <code>--leak-resolution=</code> setting does not
+      affect Valgrind's ability to find leaks.  It only changes how
+      the results are presented to you.
+      </li><br><p>
+
+  <li><code>--workaround-gcc296-bugs=no</code> [default]<br>
+      <code>--workaround-gcc296-bugs=yes</code> <p>When enabled,
+      assume that reads and writes some small distance below the stack
+      pointer <code>%esp</code> are due to bugs in gcc 2.96, and does
+      not report them.  The "small distance" is 256 bytes by default.
+      Note that gcc 2.96 is the default compiler on some popular Linux
+      distributions (RedHat 7.X, Mandrake) and so you may well need to
+      use this flag.  Do not use it if you do not have to, as it can
+      cause real errors to be overlooked.  A better option is to use a
+      gcc/g++ which works properly; 2.95.3 seems to be a good choice.
+      <p>
+      Unfortunately (27 Feb 02) it looks like g++ 3.0.4 is similarly
+      buggy, so you may need to issue this flag if you use 3.0.4.
+      </li><br><p>
+
+  <li><code>--client-perms=no</code> [default]<br>
+      <code>--client-perms=yes</code> <p>An experimental feature.
+      <p>
+      When enabled, and when <code>--instrument=yes</code> (which is
+      the default), Valgrind honours client directives to set and
+      query address range permissions.  This allows the client program
+      to tell Valgrind about changes in memory range permissions that
+      Valgrind would not otherwise know about, and so allows clients
+      to get Valgrind to do arbitrary custom checks.
+      <p>
+      Clients need to include the header file <code>valgrind.h</code>
+      to make this work.  The macros therein have the magical property
+      that they generate code in-line which Valgrind can spot.
+      However, the code does nothing when not run on Valgrind, so you
+      are not forced to run your program on Valgrind just because you
+      use the macros in this file.
+      <p>
+      A brief description of the available macros:
+      <ul>
+      <li><code>VALGRIND_MAKE_NOACCESS</code>,
+          <code>VALGRIND_MAKE_WRITABLE</code> and
+          <code>VALGRIND_MAKE_READABLE</code>.  These mark address
+          ranges as completely inaccessible, accessible but containing
+          undefined data, and accessible and containing defined data,
+          respectively.  Subsequent errors may have their faulting
+          addresses described in terms of these blocks.  Returns a
+          "block handle".
+      <p>
+      <li><code>VALGRIND_DISCARD</code>: At some point you may want
+          Valgrind to stop reporting errors in terms of the blocks
+          defined by the previous three macros.  To do this, the above
+          macros return a small-integer "block handle".  You can pass
+          this block handle to <code>VALGRIND_DISCARD</code>.  After
+          doing so, Valgrind will no longer be able to relate
+          addressing errors to the user-defined block associated with
+          the handle.  The permissions settings associated with the
+          handle remain in place; this just affects how errors are
+          reported, not whether they are reported.  Returns 1 for an
+          invalid handle and 0 for a valid handle (although passing
+          invalid handles is harmless).
+      <p>
+      <li><code>VALGRIND_CHECK_NOACCESS</code>,
+          <code>VALGRIND_CHECK_WRITABLE</code> and
+          <code>VALGRIND_CHECK_READABLE</code>: check immediately
+          whether or not the given address range has the relevant
+          property, and if not, print an error message.  Also, for the
+          convenience of the client, returns zero if the relevant
+          property holds; otherwise, the returned value is the address
+          of the first byte for which the property is not true.
+      <p>
+      <li><code>VALGRIND_CHECK_NOACCESS</code>: a quick and easy way
+          to find out whether Valgrind thinks a particular variable
+          (lvalue, to be precise) is addressible and defined.  Prints
+          an error message if not.  Returns no value.
+      <p>
+      <li><code>VALGRIND_MAKE_NOACCESS_STACK</code>: a highly
+          experimental feature.  Similarly to
+          <code>VALGRIND_MAKE_NOACCESS</code>, this marks an address
+          range as inaccessible, so that subsequent accesses to an
+          address in the range gives an error.  However, this macro
+          does not return a block handle.  Instead, all annotations
+          created like this are reviewed at each client
+          <code>ret</code> (subroutine return) instruction, and those
+          which now define an address range block the client's stack
+          pointer register (<code>%esp</code>) are automatically
+          deleted.
+          <p>
+          In other words, this macro allows the client to tell
+          Valgrind about red-zones on its own stack.  Valgrind
+          automatically discards this information when the stack
+          retreats past such blocks.  Beware: hacky and flaky.
+      </ul>
+      </li>
+      <p>
+      As of 17 March 02 (the time of writing this), there is a small
+      problem with all of these macros, which is that I haven't
+      figured out how to make them produce sensible (always-succeeds)
+      return values when the client is run on the real CPU or on
+      Valgrind without <code>--client-perms=yes</code>.  So if you
+      write client code which depends on the return values, be aware
+      that it may misbehave when not run with full Valgrindification.
+      If you always ignore the return values you should always be
+      safe.  I plan to fix this.
+</ul>
+
+There are also some options for debugging Valgrind itself.  You
+shouldn't need to use them in the normal run of things.  Nevertheless:
+
+<ul>
+
+  <li><code>--single-step=no</code> [default]<br>
+      <code>--single-step=yes</code>
+      <p>When enabled, each x86 insn is translated seperately into
+      instrumented code.  When disabled, translation is done on a
+      per-basic-block basis, giving much better translations.</li><br>
+      <p>
+
+  <li><code>--optimise=no</code><br>
+      <code>--optimise=yes</code> [default]
+      <p>When enabled, various improvements are applied to the
+      intermediate code, mainly aimed at allowing the simulated CPU's
+      registers to be cached in the real CPU's registers over several
+      simulated instructions.</li><br>
+      <p>
+
+  <li><code>--instrument=no</code><br>
+      <code>--instrument=yes</code> [default]
+      <p>When disabled, the translations don't actually contain any
+      instrumentation.</li><br>
+      <p>
+
+  <li><code>--cleanup=no</code><br>
+      <code>--cleanup=yes</code> [default]
+      <p>When enabled, various improvments are applied to the
+      post-instrumented intermediate code, aimed at removing redundant
+      value checks.</li><br>
+      <p>
+
+  <li><code>--trace-syscalls=no</code> [default]<br>
+      <code>--trace-syscalls=yes</code>
+      <p>Enable/disable tracing of system call intercepts.</li><br>
+      <p>
+
+  <li><code>--trace-signals=no</code> [default]<br>
+      <code>--trace-signals=yes</code>
+      <p>Enable/disable tracing of signal handling.</li><br>
+      <p>
+
+  <li><code>--trace-symtab=no</code> [default]<br>
+      <code>--trace-symtab=yes</code>
+      <p>Enable/disable tracing of symbol table reading.</li><br>
+      <p>
+
+  <li><code>--trace-malloc=no</code> [default]<br>
+      <code>--trace-malloc=yes</code>
+      <p>Enable/disable tracing of malloc/free (et al) intercepts.
+      </li><br>
+      <p>
+
+  <li><code>--stop-after=&lt;number></code> 
+      [default: infinity, more or less]
+      <p>After &lt;number> basic blocks have been executed, shut down
+      Valgrind and switch back to running the client on the real CPU.
+      </li><br>
+      <p>
+
+  <li><code>--dump-error=&lt;number></code>
+      [default: inactive]
+      <p>After the program has exited, show gory details of the
+      translation of the basic block containing the &lt;number>'th
+      error context.  When used with <code>--single-step=yes</code>, 
+      can show the
+      exact x86 instruction causing an error.</li><br>
+      <p>
+
+  <li><code>--smc-check=none</code><br>
+      <code>--smc-check=some</code> [default]<br>
+      <code>--smc-check=all</code>
+      <p>How carefully should Valgrind check for self-modifying code
+      writes, so that translations can be discarded?&nbsp; When
+      "none", no writes are checked.  When "some", only writes
+      resulting from moves from integer registers to memory are
+      checked.  When "all", all memory writes are checked, even those
+      with which are no sane program would generate code -- for
+      example, floating-point writes.</li>
+</ul>
+
+
+<a name="errormsgs">
+<h3>2.6&nbsp; Explaination of error messages</h3>
+
+Despite considerable sophistication under the hood, Valgrind can only
+really detect two kinds of errors, use of illegal addresses, and use
+of undefined values.  Nevertheless, this is enough to help you
+discover all sorts of memory-management nasties in your code.  This
+section presents a quick summary of what error messages mean.  The
+precise behaviour of the error-checking machinery is described in
+<a href="#machine">Section 4</a>.
+
+
+<h4>2.6.1&nbsp; Illegal read / Illegal write errors</h4>
+For example:
+<pre>
+  ==30975== Invalid read of size 4
+  ==30975==    at 0x40F6BBCC: (within /usr/lib/libpng.so.2.1.0.9)
+  ==30975==    by 0x40F6B804: (within /usr/lib/libpng.so.2.1.0.9)
+  ==30975==    by 0x40B07FF4: read_png_image__FP8QImageIO (kernel/qpngio.cpp:326)
+  ==30975==    by 0x40AC751B: QImageIO::read() (kernel/qimage.cpp:3621)
+  ==30975==    Address 0xBFFFF0E0 is not stack'd, malloc'd or free'd
+</pre>
+
+<p>This happens when your program reads or writes memory at a place
+which Valgrind reckons it shouldn't.  In this example, the program did
+a 4-byte read at address 0xBFFFF0E0, somewhere within the
+system-supplied library libpng.so.2.1.0.9, which was called from
+somewhere else in the same library, called from line 326 of
+qpngio.cpp, and so on.
+
+<p>Valgrind tries to establish what the illegal address might relate
+to, since that's often useful.  So, if it points into a block of
+memory which has already been freed, you'll be informed of this, and
+also where the block was free'd at..  Likewise, if it should turn out
+to be just off the end of a malloc'd block, a common result of
+off-by-one-errors in array subscripting, you'll be informed of this
+fact, and also where the block was malloc'd.
+
+<p>In this example, Valgrind can't identify the address.  Actually the
+address is on the stack, but, for some reason, this is not a valid
+stack address -- it is below the stack pointer, %esp, and that isn't
+allowed.
+
+<p>Note that Valgrind only tells you that your program is about to
+access memory at an illegal address.  It can't stop the access from
+happening.  So, if your program makes an access which normally would
+result in a segmentation fault, you program will still suffer the same
+fate -- but you will get a message from Valgrind immediately prior to
+this.  In this particular example, reading junk on the stack is
+non-fatal, and the program stays alive.
+
+
+<h4>2.6.2&nbsp; Use of uninitialised values</h4>
+For example:
+<pre>
+  ==19146== Use of uninitialised CPU condition code
+  ==19146==    at 0x402DFA94: _IO_vfprintf (_itoa.h:49)
+  ==19146==    by 0x402E8476: _IO_printf (printf.c:36)
+  ==19146==    by 0x8048472: main (tests/manuel1.c:8)
+  ==19146==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+</pre>
+
+<p>An uninitialised-value use error is reported when your program uses
+a value which hasn't been initialised -- in other words, is undefined.
+Here, the undefined value is used somewhere inside the printf()
+machinery of the C library.  This error was reported when running the
+following small program:
+<pre>
+  int main()
+  {
+    int x;
+    printf ("x = %d\n", x);
+  }
+</pre>
+
+<p>It is important to understand that your program can copy around
+junk (uninitialised) data to its heart's content.  Valgrind observes
+this and keeps track of the data, but does not complain.  A complaint
+is issued only when your program attempts to make use of uninitialised
+data.  In this example, x is uninitialised.  Valgrind observes the
+value being passed to _IO_printf and thence to
+_IO_vfprintf, but makes no comment.  However,
+_IO_vfprintf has to examine the value of x
+so it can turn it into the corresponding ASCII string, and it is at
+this point that Valgrind complains.
+
+<p>Sources of uninitialised data tend to be:
+<ul>
+  <li>Local variables in procedures which have not been initialised,
+      as in the example above.</li><br><p>
+
+  <li>The contents of malloc'd blocks, before you write something
+      there.  In C++, the new operator is a wrapper round malloc, so
+      if you create an object with new, its fields will be
+      uninitialised until you fill them in, which is only Right and
+      Proper.</li>
+</ul>
+
+
+
+<h4>2.6.3&nbsp; Illegal frees</h4>
+For example:
+<pre>
+  ==7593== Invalid free()
+  ==7593==    at 0x4004FFDF: free (ut_clientmalloc.c:577)
+  ==7593==    by 0x80484C7: main (tests/doublefree.c:10)
+  ==7593==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+  ==7593==    by 0x80483B1: (within tests/doublefree)
+  ==7593==    Address 0x3807F7B4 is 0 bytes inside a block of size 177 free'd
+  ==7593==    at 0x4004FFDF: free (ut_clientmalloc.c:577)
+  ==7593==    by 0x80484C7: main (tests/doublefree.c:10)
+  ==7593==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+  ==7593==    by 0x80483B1: (within tests/doublefree)
+</pre>
+<p>Valgrind keeps track of the blocks allocated by your program with
+malloc/new, so it can know exactly whether or not the argument to
+free/delete is legitimate or not.  Here, this test program has
+freed the same block twice.  As with the illegal read/write errors,
+Valgrind attempts to make sense of the address free'd.  If, as
+here, the address is one which has previously been freed, you wil
+be told that -- making duplicate frees of the same block easy to spot.
+
+
+<h4>2.6.4&nbsp; Passing system call parameters with inadequate
+read/write permissions</h4>
+
+Valgrind checks all parameters to system calls.  If a system call
+needs to read from a buffer provided by your program, Valgrind checks
+that the entire buffer is addressible and has valid data, ie, it is
+readable.  And if the system call needs to write to a user-supplied
+buffer, Valgrind checks that the buffer is addressible.  After the
+system call, Valgrind updates its administrative information to
+precisely reflect any changes in memory permissions caused by the
+system call.
+
+<p>Here's an example of a system call with an invalid parameter:
+<pre>
+  #include &lt;stdlib.h>
+  #include &lt;unistd.h>
+  int main( void )
+  {
+    char* arr = malloc(10);
+    (void) write( 1 /* stdout */, arr, 10 );
+    return 0;
+  }
+</pre>
+
+<p>You get this complaint ...
+<pre>
+  ==8230== Syscall param write(buf) lacks read permissions
+  ==8230==    at 0x4035E072: __libc_write
+  ==8230==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+  ==8230==    by 0x80483B1: (within tests/badwrite)
+  ==8230==    by &lt;bogus frame pointer> ???
+  ==8230==    Address 0x3807E6D0 is 0 bytes inside a block of size 10 alloc'd
+  ==8230==    at 0x4004FEE6: malloc (ut_clientmalloc.c:539)
+  ==8230==    by 0x80484A0: main (tests/badwrite.c:6)
+  ==8230==    by 0x402A6E5E: __libc_start_main (libc-start.c:129)
+  ==8230==    by 0x80483B1: (within tests/badwrite)
+</pre>
+
+<p>... because the program has tried to write uninitialised junk from
+the malloc'd block to the standard output.
+
+
+<h4>2.6.5&nbsp; Warning messages you might see</h4>
+
+Most of these only appear if you run in verbose mode (enabled by
+<code>-v</code>):
+<ul>
+<li> <code>More than 50 errors detected.  Subsequent errors
+     will still be recorded, but in less detail than before.</code>
+     <br>
+     After 50 different errors have been shown, Valgrind becomes 
+     more conservative about collecting them.  It then requires only 
+     the program counters in the top two stack frames to match when
+     deciding whether or not two errors are really the same one.
+     Prior to this point, the PCs in the top four frames are required
+     to match.  This hack has the effect of slowing down the
+     appearance of new errors after the first 50.  The 50 constant can
+     be changed by recompiling Valgrind.
+<p>
+<li> <code>More than 500 errors detected.  I'm not reporting any more.
+     Final error counts may be inaccurate.  Go fix your
+     program!</code>
+     <br>
+     After 500 different errors have been detected, Valgrind ignores
+     any more.  It seems unlikely that collecting even more different
+     ones would be of practical help to anybody, and it avoids the
+     danger that Valgrind spends more and more of its time comparing
+     new errors against an ever-growing collection.  As above, the 500
+     number is a compile-time constant.
+<p>
+<li> <code>Warning: client exiting by calling exit(&lt;number>).
+     Bye!</code>
+     <br>
+     Your program has called the <code>exit</code> system call, which
+     will immediately terminate the process.  You'll get no exit-time
+     error summaries or leak checks.  Note that this is not the same
+     as your program calling the ANSI C function <code>exit()</code>
+     -- that causes a normal, controlled shutdown of Valgrind.
+<p>
+<li> <code>Warning: client switching stacks?</code>
+     <br>
+     Valgrind spotted such a large change in the stack pointer, %esp,
+     that it guesses the client is switching to a different stack.
+     At this point it makes a kludgey guess where the base of the new
+     stack is, and sets memory permissions accordingly.  You may get
+     many bogus error messages following this, if Valgrind guesses
+     wrong.  At the moment "large change" is defined as a change of
+     more that 2000000 in the value of the %esp (stack pointer)
+     register.
+<p>
+<li> <code>Warning: client attempted to close Valgrind's logfile fd &lt;number>
+     </code>
+     <br>
+     Valgrind doesn't allow the client
+     to close the logfile, because you'd never see any diagnostic
+     information after that point.  If you see this message,
+     you may want to use the <code>--logfile-fd=&lt;number></code> 
+     option to specify a different logfile file-descriptor number.
+<p>
+<li> <code>Warning: noted but unhandled ioctl &lt;number></code>
+     <br>
+     Valgrind observed a call to one of the vast family of
+     <code>ioctl</code> system calls, but did not modify its
+     memory status info (because I have not yet got round to it).
+     The call will still have gone through, but you may get spurious
+     errors after this as a result of the non-update of the memory info.
+<p>
+<li> <code>Warning: unblocking signal &lt;number> due to
+     sigprocmask</code>
+     <br>
+     Really just a diagnostic from the signal simulation machinery.  
+     This message will appear if your program handles a signal by
+     first <code>longjmp</code>ing out of the signal handler,
+     and then unblocking the signal with <code>sigprocmask</code>
+     -- a standard signal-handling idiom.
+<p>
+<li> <code>Warning: bad signal number &lt;number> in __NR_sigaction.</code>
+     <br>
+     Probably indicates a bug in the signal simulation machinery.
+<p>
+<li> <code>Warning: set address range perms: large range &lt;number></code>
+     <br> 
+     Diagnostic message, mostly for my benefit, to do with memory 
+     permissions.
+</ul>
+
+
+<a name="suppfiles"></a>
+<h3>2.7&nbsp; Writing suppressions files</h3>
+
+A suppression file describes a bunch of errors which, for one reason
+or another, you don't want Valgrind to tell you about.  Usually the
+reason is that the system libraries are buggy but unfixable, at least
+within the scope of the current debugging session.  Multiple
+suppresions files are allowed.  By default, Valgrind uses
+<code>linux24.supp</code> in the directory where it is installed.
+
+<p>
+You can ask to add suppressions from another file, by specifying
+<code>--suppressions=/path/to/file.supp</code>.
+
+<p>Each suppression has the following components:<br>
+<ul>
+
+  <li>Its name.  This merely gives a handy name to the suppression, by
+      which it is referred to in the summary of used suppressions
+      printed out when a program finishes.  It's not important what
+      the name is; any identifying string will do.
+      <p>
+
+  <li>The nature of the error to suppress.  Either: 
+      <code>Value1</code>, 
+      <code>Value2</code>,
+      <code>Value4</code>,
+      <code>Value8</code> or 
+      <code>Value0</code>,
+      meaning an uninitialised-value error when
+      using a value of 1, 2, 4 or 8 bytes, 
+      or the CPU's condition codes, respectively.  Or: 
+      <code>Addr1</code>,
+      <code>Addr2</code>, 
+      <code>Addr4</code> or 
+      <code>Addr8</code>, meaning an invalid address during a
+      memory access of 1, 2, 4 or 8 bytes respectively.  Or 
+      <code>Param</code>,
+      meaning an invalid system call parameter error.  Or
+      <code>Free</code>, meaning an invalid or mismatching free.</li><br>
+      <p>
+
+  <li>The "immediate location" specification.  For Value and Addr
+      errors, is either the name of the function in which the error
+      occurred, or, failing that, the full path the the .so file
+      containing the error location.  For Param errors, is the name of
+      the offending system call parameter.  For Free errors, is the
+      name of the function doing the freeing (eg, <code>free</code>,
+      <code>__builtin_vec_delete</code>, etc)</li><br>
+      <p>
+
+  <li>The caller of the above "immediate location".  Again, either a
+      function or shared-object name.</li><br>
+      <p>
+
+  <li>Optionally, one or two extra calling-function or object names,
+      for greater precision.</li>
+</ul>
+
+<p>
+Locations may be either names of shared objects or wildcards matching
+function names.  They begin <code>obj:</code> and <code>fun:</code>
+respectively.  Function and object names to match against may use the 
+wildcard characters <code>*</code> and <code>?</code>.
+
+A suppression only suppresses an error when the error matches all the
+details in the suppression.  Here's an example:
+<pre>
+  {
+    __gconv_transform_ascii_internal/__mbrtowc/mbtowc
+    Value4
+    fun:__gconv_transform_ascii_internal
+    fun:__mbr*toc
+    fun:mbtowc
+  }
+</pre>
+
+<p>What is means is: suppress a use-of-uninitialised-value error, when
+the data size is 4, when it occurs in the function
+<code>__gconv_transform_ascii_internal</code>, when that is called
+from any function of name matching <code>__mbr*toc</code>, 
+when that is called from
+<code>mbtowc</code>.  It doesn't apply under any other circumstances.
+The string by which this suppression is identified to the user is
+__gconv_transform_ascii_internal/__mbrtowc/mbtowc.
+
+<p>Another example:
+<pre>
+  {
+    libX11.so.6.2/libX11.so.6.2/libXaw.so.7.0
+    Value4
+    obj:/usr/X11R6/lib/libX11.so.6.2
+    obj:/usr/X11R6/lib/libX11.so.6.2
+    obj:/usr/X11R6/lib/libXaw.so.7.0
+  }
+</pre>
+
+<p>Suppress any size 4 uninitialised-value error which occurs anywhere
+in <code>libX11.so.6.2</code>, when called from anywhere in the same
+library, when called from anywhere in <code>libXaw.so.7.0</code>.  The
+inexact specification of locations is regrettable, but is about all
+you can hope for, given that the X11 libraries shipped with Red Hat
+7.2 have had their symbol tables removed.
+
+<p>Note -- since the above two examples did not make it clear -- that
+you can freely mix the <code>obj:</code> and <code>fun:</code>
+styles of description within a single suppression record.
+
+
+<a name="install"></a>
+<h3>2.8&nbsp; Building and installing</h3>
+At the moment, very rudimentary.
+
+<p>The tarball is set up for a standard Red Hat 7.1 (6.2) machine.  To
+build, just do "make".  No configure script, no autoconf, no nothing.
+
+<p>The files needed for installation are: valgrind.so, valgring.so,
+valgrind, VERSION, redhat72.supp (or redhat62.supp). You can copy
+these to any directory you like. However, you then need to edit the
+shell script "valgrind". On line 4, set the environment variable
+<code>VALGRIND</code> to point to the directory you have copied the
+installation into.
+
+
+<a name="problems"></a>
+<h3>2.9&nbsp; If you have problems</h3>
+Mail me (<a href="mailto:jseward@acm.org">jseward@acm.org</a>).
+
+<p>See <a href="#limits">Section 4</a> for the known limitations of
+Valgrind, and for a list of programs which are known not to work on
+it.
+
+<p>The translator/instrumentor has a lot of assertions in it.  They
+are permanently enabled, and I have no plans to disable them.  If one
+of these breaks, please mail me!
+
+<p>If you get an assertion failure on the expression
+<code>chunkSane(ch)</code> in <code>vg_free()</code> in
+<code>vg_malloc.c</code>, this may have happened because your program
+wrote off the end of a malloc'd block, or before its beginning.
+Valgrind should have emitted a proper message to that effect before
+dying in this way.  This is a known problem which I should fix.
+<p>
+
+<hr width="100%">
+
+<a name="machine"></a>
+<h2>3&nbsp; Details of the checking machinery</h2>
+
+Read this section if you want to know, in detail, exactly what and how
+Valgrind is checking.
+
+<a name="vvalue"></a>
+<h3>3.1&nbsp; Valid-value (V) bits</h3>
+
+It is simplest to think of Valgrind implementing a synthetic Intel x86
+CPU which is identical to a real CPU, except for one crucial detail.
+Every bit (literally) of data processed, stored and handled by the
+real CPU has, in the synthetic CPU, an associated "valid-value" bit,
+which says whether or not the accompanying bit has a legitimate value.
+In the discussions which follow, this bit is referred to as the V
+(valid-value) bit.
+
+<p>Each byte in the system therefore has a 8 V bits which accompanies
+it wherever it goes.  For example, when the CPU loads a word-size item
+(4 bytes) from memory, it also loads the corresponding 32 V bits from
+a bitmap which stores the V bits for the process' entire address
+space.  If the CPU should later write the whole or some part of that
+value to memory at a different address, the relevant V bits will be
+stored back in the V-bit bitmap.
+
+<p>In short, each bit in the system has an associated V bit, which
+follows it around everywhere, even inside the CPU.  Yes, the CPU's
+(integer) registers have their own V bit vectors.
+
+<p>Copying values around does not cause Valgrind to check for, or
+report on, errors.  However, when a value is used in a way which might
+conceivably affect the outcome of your program's computation, the
+associated V bits are immediately checked.  If any of these indicate
+that the value is undefined, an error is reported.
+
+<p>Here's an (admittedly nonsensical) example:
+<pre>
+  int i, j;
+  int a[10], b[10];
+  for (i = 0; i &lt; 10; i++) {
+    j = a[i];
+    b[i] = j;
+  }
+</pre>
+
+<p>Valgrind emits no complaints about this, since it merely copies
+uninitialised values from <code>a[]</code> into <code>b[]</code>, and
+doesn't use them in any way.  However, if the loop is changed to
+<pre>
+  for (i = 0; i &lt; 10; i++) {
+    j += a[i];
+  }
+  if (j == 77) 
+     printf("hello there\n");
+</pre>
+then Valgrind will complain, at the <code>if</code>, that the
+condition depends on uninitialised values.
+
+<p>Most low level operations, such as adds, cause Valgrind to 
+use the V bits for the operands to calculate the V bits for the
+result.  Even if the result is partially or wholly undefined,
+it does not complain.
+
+<p>Checks on definedness only occur in two places: when a value is
+used to generate a memory address, and where control flow decision
+needs to be made.  Also, when a system call is detected, valgrind
+checks definedness of parameters as required.
+
+<p>If a check should detect undefinedness, and error message is
+issued.  The resulting value is subsequently regarded as well-defined.
+To do otherwise would give long chains of error messages.  In effect,
+we say that undefined values are non-infectious.
+
+<p>This sounds overcomplicated.  Why not just check all reads from
+memory, and complain if an undefined value is loaded into a CPU register? 
+Well, that doesn't work well, because perfectly legitimate C programs routinely
+copy uninitialised values around in memory, and we don't want endless complaints
+about that.  Here's the canonical example.  Consider a struct
+like this:
+<pre>
+  struct S { int x; char c; };
+  struct S s1, s2;
+  s1.x = 42;
+  s1.c = 'z';
+  s2 = s1;
+</pre>
+
+<p>The question to ask is: how large is <code>struct S</code>, in
+bytes?  An int is 4 bytes and a char one byte, so perhaps a struct S
+occupies 5 bytes?  Wrong.  All (non-toy) compilers I know of will
+round the size of <code>struct S</code> up to a whole number of words,
+in this case 8 bytes.  Not doing this forces compilers to generate
+truly appalling code for subscripting arrays of <code>struct
+S</code>'s.
+
+<p>So s1 occupies 8 bytes, yet only 5 of them will be initialised.
+For the assignment <code>s2 = s1</code>, gcc generates code to copy
+all 8 bytes wholesale into <code>s2</code> without regard for their
+meaning.  If Valgrind simply checked values as they came out of
+memory, it would yelp every time a structure assignment like this
+happened.  So the more complicated semantics described above is
+necessary.  This allows gcc to copy <code>s1</code> into
+<code>s2</code> any way it likes, and a warning will only be emitted
+if the uninitialised values are later used.
+
+<p>One final twist to this story.  The above scheme allows garbage to
+pass through the CPU's integer registers without complaint.  It does
+this by giving the integer registers V tags, passing these around in
+the expected way.  This complicated and computationally expensive to
+do, but is necessary.  Valgrind is more simplistic about
+floating-point loads and stores.  In particular, V bits for data read
+as a result of floating-point loads are checked at the load
+instruction.  So if your program uses the floating-point registers to
+do memory-to-memory copies, you will get complaints about
+uninitialised values.  Fortunately, I have not yet encountered a
+program which (ab)uses the floating-point registers in this way.
+
+<a name="vaddress"></a>
+<h3>3.2&nbsp; Valid-address (A) bits</h3>
+
+Notice that the previous section describes how the validity of values
+is established and maintained without having to say whether the
+program does or does not have the right to access any particular
+memory location.  We now consider the latter issue.
+
+<p>As described above, every bit in memory or in the CPU has an
+associated valid-value (V) bit.  In addition, all bytes in memory, but
+not in the CPU, have an associated valid-address (A) bit.  This
+indicates whether or not the program can legitimately read or write
+that location.  It does not give any indication of the validity or the
+data at that location -- that's the job of the V bits -- only whether
+or not the location may be accessed.
+
+<p>Every time your program reads or writes memory, Valgrind checks the
+A bits associated with the address.  If any of them indicate an
+invalid address, an error is emitted.  Note that the reads and writes
+themselves do not change the A bits, only consult them.
+
+<p>So how do the A bits get set/cleared?  Like this:
+
+<ul>
+  <li>When the program starts, all the global data areas are marked as
+      accessible.</li><br>
+      <p>
+
+  <li>When the program does malloc/new, the A bits for the exactly the
+      area allocated, and not a byte more, are marked as accessible.
+      Upon freeing the area the A bits are changed to indicate
+      inaccessibility.</li><br>
+      <p>
+
+  <li>When the stack pointer register (%esp) moves up or down, A bits
+      are set.  The rule is that the area from %esp up to the base of
+      the stack is marked as accessible, and below %esp is
+      inaccessible.  (If that sounds illogical, bear in mind that the
+      stack grows down, not up, on almost all Unix systems, including
+      GNU/Linux.)  Tracking %esp like this has the useful side-effect
+      that the section of stack used by a function for local variables
+      etc is automatically marked accessible on function entry and
+      inaccessible on exit.</li><br>
+      <p>
+
+  <li>When doing system calls, A bits are changed appropriately.  For
+      example, mmap() magically makes files appear in the process's
+      address space, so the A bits must be updated if mmap()
+      succeeds.</li><br>
+</ul>
+
+
+<a name="together"></a>
+<h3>3.3&nbsp; Putting it all together</h3>
+Valgrind's checking machinery can be summarised as follows:
+
+<ul>
+  <li>Each byte in memory has 8 associated V (valid-value) bits,
+      saying whether or not the byte has a defined value, and a single
+      A (valid-address) bit, saying whether or not the program
+      currently has the right to read/write that address.</li><br>
+      <p>
+
+  <li>When memory is read or written, the relevant A bits are
+      consulted.  If they indicate an invalid address, Valgrind emits
+      an Invalid read or Invalid write error.</li><br>
+      <p>
+
+  <li>When memory is read into the CPU's integer registers, the
+      relevant V bits are fetched from memory and stored in the
+      simulated CPU.  They are not consulted.</li><br>
+      <p>
+
+  <li>When an integer register is written out to memory, the V bits
+      for that register are written back to memory too.</li><br>
+      <p>
+
+  <li>When memory is read into the CPU's floating point registers, the
+      relevant V bits are read from memory and they are immediately
+      checked.  If any are invalid, an uninitialised value error is
+      emitted.  This precludes using the floating-point registers to
+      copy possibly-uninitialised memory, but simplifies Valgrind in
+      that it does not have to track the validity status of the
+      floating-point registers.</li><br>
+      <p>
+
+  <li>As a result, when a floating-point register is written to
+      memory, the associated V bits are set to indicate a valid
+      value.</li><br>
+      <p>
+
+  <li>When values in integer CPU registers are used to generate a
+      memory address, or to determine the outcome of a conditional
+      branch, the V bits for those values are checked, and an error
+      emitted if any of them are undefined.</li><br>
+      <p>
+
+  <li>When values in integer CPU registers are used for any other
+      purpose, Valgrind computes the V bits for the result, but does
+      not check them.</li><br>
+      <p>
+
+  <li>One the V bits for a value in the CPU have been checked, they
+      are then set to indicate validity.  This avoids long chains of
+      errors.</li><br>
+      <p>
+
+  <li>When values are loaded from memory, valgrind checks the A bits
+      for that location and issues an illegal-address warning if
+      needed.  In that case, the V bits loaded are forced to indicate
+      Valid, despite the location being invalid.
+      <p>
+      This apparently strange choice reduces the amount of confusing
+      information presented to the user.  It avoids the
+      unpleasant phenomenon in which memory is read from a place which
+      is both unaddressible and contains invalid values, and, as a
+      result, you get not only an invalid-address (read/write) error,
+      but also a potentially large set of uninitialised-value errors,
+      one for every time the value is used.
+      <p>
+      There is a hazy boundary case to do with multi-byte loads from
+      addresses which are partially valid and partially invalid.  See
+      details of the flag <code>--partial-loads-ok</code> for details.
+      </li><br>
+</ul>
+
+Valgrind intercepts calls to malloc, calloc, realloc, valloc,
+memalign, free, new and delete.  The behaviour you get is:
+
+<ul>
+
+  <li>malloc/new: the returned memory is marked as addressible but not
+      having valid values.  This means you have to write on it before
+      you can read it.</li><br>
+      <p>
+
+  <li>calloc: returned memory is marked both addressible and valid,
+      since calloc() clears the area to zero.</li><br>
+      <p>
+
+  <li>realloc: if the new size is larger than the old, the new section
+      is addressible but invalid, as with malloc.</li><br>
+      <p>
+
+  <li>If the new size is smaller, the dropped-off section is marked as
+      unaddressible.  You may only pass to realloc a pointer
+      previously issued to you by malloc/calloc/new/realloc.</li><br>
+      <p>
+
+  <li>free/delete: you may only pass to free a pointer previously
+      issued to you by malloc/calloc/new/realloc, or the value
+      NULL. Otherwise, Valgrind complains.  If the pointer is indeed
+      valid, Valgrind marks the entire area it points at as
+      unaddressible, and places the block in the freed-blocks-queue.
+      The aim is to defer as long as possible reallocation of this
+      block.  Until that happens, all attempts to access it will
+      elicit an invalid-address error, as you would hope.</li><br>
+</ul>
+
+
+
+<a name="signals"></a>
+<h3>3.4&nbsp; Signals</h3>
+
+Valgrind provides suitable handling of signals, so, provided you stick
+to POSIX stuff, you should be ok.  Basic sigaction() and sigprocmask()
+are handled.  Signal handlers may return in the normal way or do
+longjmp(); both should work ok.  As specified by POSIX, a signal is
+blocked in its own handler.  Default actions for signals should work
+as before.  Etc, etc.
+
+<p>Under the hood, dealing with signals is a real pain, and Valgrind's
+simulation leaves much to be desired.  If your program does
+way-strange stuff with signals, bad things may happen.  If so, let me
+know.  I don't promise to fix it, but I'd at least like to be aware of
+it.
+
+
+<a name="leaks"><a/>
+<h3>3.5&nbsp; Memory leak detection</h3>
+
+Valgrind keeps track of all memory blocks issued in response to calls
+to malloc/calloc/realloc/new.  So when the program exits, it knows
+which blocks are still outstanding -- have not been returned, in other
+words.  Ideally, you want your program to have no blocks still in use
+at exit.  But many programs do.
+
+<p>For each such block, Valgrind scans the entire address space of the
+process, looking for pointers to the block.  One of three situations
+may result:
+
+<ul>
+  <li>A pointer to the start of the block is found.  This usually
+      indicates programming sloppiness; since the block is still
+      pointed at, the programmer could, at least in principle, free'd
+      it before program exit.</li><br>
+      <p>
+
+  <li>A pointer to the interior of the block is found.  The pointer
+      might originally have pointed to the start and have been moved
+      along, or it might be entirely unrelated.  Valgrind deems such a
+      block as "dubious", that is, possibly leaked,
+      because it's unclear whether or
+      not a pointer to it still exists.</li><br>
+      <p>
+
+  <li>The worst outcome is that no pointer to the block can be found.
+      The block is classified as "leaked", because the
+      programmer could not possibly have free'd it at program exit,
+      since no pointer to it exists.  This might be a symptom of
+      having lost the pointer at some earlier point in the
+      program.</li>
+</ul>
+
+Valgrind reports summaries about leaked and dubious blocks.
+For each such block, it will also tell you where the block was
+allocated.  This should help you figure out why the pointer to it has
+been lost.  In general, you should attempt to ensure your programs do
+not have any leaked or dubious blocks at exit.
+
+<p>The precise area of memory in which Valgrind searches for pointers
+is: all naturally-aligned 4-byte words for which all A bits indicate
+addressibility and all V bits indicated that the stored value is
+actually valid.
+
+<p><hr width="100%">
+
+
+<a name="limits"></a>
+<h2>4&nbsp; Limitations</h2>
+
+The following list of limitations seems depressingly long.  However,
+most programs actually work fine.
+
+<p>Valgrind will run x86-GNU/Linux ELF dynamically linked binaries, on
+a kernel 2.4.X system, subject to the following constraints:
+
+<ul>
+  <li>No MMX, SSE, SSE2, 3DNow instructions.  If the translator
+      encounters these, Valgrind will simply give up.  It may be
+      possible to add support for them at a later time. Intel added a
+      few instructions such as "cmov" to the integer instruction set
+      on Pentium and later processors, and these are supported.
+      Nevertheless it's safest to think of Valgrind as implementing
+      the 486 instruction set.</li><br>
+      <p>
+
+  <li>Multithreaded programs are not supported, since I haven't yet
+      figured out how to do this.  To be more specific, it is the
+      "clone" system call which is not supported.  A program calls
+      "clone" to create threads.  Valgrind will abort if this
+      happens.</li><nr>
+      <p>
+
+  <li>Valgrind assumes that the floating point registers are not used
+      as intermediaries in memory-to-memory copies, so it immediately
+      checks V bits in floating-point loads/stores.  If you want to
+      write code which copies around possibly-uninitialised values,
+      you must ensure these travel through the integer registers, not
+      the FPU.</li><br>
+      <p>
+
+  <li>If your program does its own memory management, rather than
+      using malloc/new/free/delete, it should still work, but
+      Valgrind's error checking won't be so effective.</li><br>
+      <p>
+
+  <li>Valgrind's signal simulation is not as robust as it could be.
+      Basic POSIX-compliant sigaction and sigprocmask functionality is
+      supplied, but it's conceivable that things could go badly awry
+      if you do wierd things with signals.  Workaround: don't.
+      Programs that do non-POSIX signal tricks are in any case
+      inherently unportable, so should be avoided if
+      possible.</li><br>
+      <p>
+
+  <li>I have no idea what happens if programs try to handle signals on
+      an alternate stack (sigaltstack).  YMMV.</li><br>
+      <p>
+
+  <li>Programs which switch stacks are not well handled.  Valgrind
+      does have support for this, but I don't have great faith in it.
+      It's difficult -- there's no cast-iron way to decide whether a
+      large change in %esp is as a result of the program switching
+      stacks, or merely allocating a large object temporarily on the
+      current stack -- yet Valgrind needs to handle the two situations
+      differently.</li><br>
+      <p>
+
+  <li>x86 instructions, and system calls, have been implemented on
+      demand.  So it's possible, although unlikely, that a program
+      will fall over with a message to that effect.  If this happens,
+      please mail me ALL the details printed out, so I can try and
+      implement the missing feature.</li><br>
+      <p>
+
+  <li>x86 floating point works correctly, but floating-point code may
+      run even more slowly than integer code, due to my simplistic
+      approach to FPU emulation.</li><br>
+      <p>
+
+  <li>You can't Valgrind-ize statically linked binaries.  Valgrind
+      relies on the dynamic-link mechanism to gain control at
+      startup.</li><br>
+      <p>
+
+  <li>Memory consumption of your program is majorly increased whilst
+      running under Valgrind.  This is due to the large amount of
+      adminstrative information maintained behind the scenes.  Another
+      cause is that Valgrind dynamically translates the original
+      executable and never throws any translation away, except in
+      those rare cases where self-modifying code is detected.
+      Translated, instrumented code is 8-12 times larger than the
+      original (!) so you can easily end up with 15+ MB of
+      translations when running (eg) a web browser.  There's not a lot
+      you can do about this -- use Valgrind on a fast machine with a lot
+      of memory and swap space.  At some point I may implement a LRU
+      caching scheme for translations, so as to bound the maximum
+      amount of memory devoted to them, to say 8 or 16 MB.</li>
+</ul>
+
+
+Programs which are known not to work are:
+
+<ul>
+  <li>Netscape 4.76 works pretty well on some platforms -- quite
+      nicely on my AMD K6-III (400 MHz).  I can surf, do mail, etc, no
+      problem.  On other platforms is has been observed to crash
+      during startup.  Despite much investigation I can't figure out
+      why.</li><br>
+      <p>
+
+  <li>kpackage (a KDE front end to rpm) dies because the CPUID
+      instruction is unimplemented.  Easy to fix.</li><br>
+      <p>
+
+  <li>knode (a KDE newsreader) tries to do multithreaded things, and
+      fails.</li><br>
+      <p>
+
+  <li>emacs starts up but immediately concludes it is out of memory
+      and aborts.  Emacs has it's own memory-management scheme, but I
+      don't understand why this should interact so badly with
+      Valgrind.</li><br>
+      <p>
+
+  <li>Gimp and Gnome and GTK-based apps die early on because
+      of unimplemented system call wrappers.  (I'm a KDE user :)
+      This wouldn't be hard to fix.
+      </li><br>
+      <p>
+
+  <li>As a consequence of me being a KDE user, almost all KDE apps
+      work ok -- except those which are multithreaded.
+      </li><br>
+      <p>
+</ul>
+
+
+<p><hr width="100%">
+
+
+<a name="howitworks"></a>
+<h2>5&nbsp; How it works -- a rough overview</h2>
+Some gory details, for those with a passion for gory details.  You
+don't need to read this section if all you want to do is use Valgrind.
+
+<a name="startb"></a>
+<h3>5.1&nbsp; Getting started</h3>
+
+Valgrind is compiled into a shared object, valgrind.so.  The shell
+script valgrind sets the LD_PRELOAD environment variable to point to
+valgrind.so.  This causes the .so to be loaded as an extra library to
+any subsequently executed dynamically-linked ELF binary, viz, the
+program you want to debug.
+
+<p>The dynamic linker allows each .so in the process image to have an
+initialisation function which is run before main().  It also allows
+each .so to have a finalisation function run after main() exits.
+
+<p>When valgrind.so's initialisation function is called by the dynamic
+linker, the synthetic CPU to starts up.  The real CPU remains locked
+in valgrind.so for the entire rest of the program, but the synthetic
+CPU returns from the initialisation function.  Startup of the program
+now continues as usual -- the dynamic linker calls all the other .so's
+initialisation routines, and eventually runs main().  This all runs on
+the synthetic CPU, not the real one, but the client program cannot
+tell the difference.
+
+<p>Eventually main() exits, so the synthetic CPU calls valgrind.so's
+finalisation function.  Valgrind detects this, and uses it as its cue
+to exit.  It prints summaries of all errors detected, possibly checks
+for memory leaks, and then exits the finalisation routine, but now on
+the real CPU.  The synthetic CPU has now lost control -- permanently
+-- so the program exits back to the OS on the real CPU, just as it
+would have done anyway.
+
+<p>On entry, Valgrind switches stacks, so it runs on its own stack.
+On exit, it switches back.  This means that the client program
+continues to run on its own stack, so we can switch back and forth
+between running it on the simulated and real CPUs without difficulty.
+This was an important design decision, because it makes it easy (well,
+significantly less difficult) to debug the synthetic CPU.
+
+
+<a name="engine"></a>
+<h3>5.2&nbsp; The translation/instrumentation engine</h3>
+
+Valgrind does not directly run any of the original program's code.  Only
+instrumented translations are run.  Valgrind maintains a translation
+table, which allows it to find the translation quickly for any branch
+target (code address).  If no translation has yet been made, the
+translator - a just-in-time translator - is summoned.  This makes an
+instrumented translation, which is added to the collection of
+translations.  Subsequent jumps to that address will use this
+translation.
+
+<p>Valgrind can optionally check writes made by the application, to
+see if they are writing an address contained within code which has
+been translated.  Such a write invalidates translations of code
+bracketing the written address.  Valgrind will discard the relevant
+translations, which causes them to be re-made, if they are needed
+again, reflecting the new updated data stored there.  In this way,
+self modifying code is supported.  In practice I have not found any
+Linux applications which use self-modifying-code.
+
+<p>The JITter translates basic blocks -- blocks of straight-line-code
+-- as single entities.  To minimise the considerable difficulties of
+dealing with the x86 instruction set, x86 instructions are first
+translated to a RISC-like intermediate code, similar to sparc code,
+but with an infinite number of virtual integer registers.  Initially
+each insn is translated seperately, and there is no attempt at
+instrumentation.
+
+<p>The intermediate code is improved, mostly so as to try and cache
+the simulated machine's registers in the real machine's registers over
+several simulated instructions.  This is often very effective.  Also,
+we try to remove redundant updates of the simulated machines's
+condition-code register.
+
+<p>The intermediate code is then instrumented, giving more
+intermediate code.  There are a few extra intermediate-code operations
+to support instrumentation; it is all refreshingly simple.  After
+instrumentation there is a cleanup pass to remove redundant value
+checks.
+
+<p>This gives instrumented intermediate code which mentions arbitrary
+numbers of virtual registers.  A linear-scan register allocator is
+used to assign real registers and possibly generate spill code.  All
+of this is still phrased in terms of the intermediate code.  This
+machinery is inspired by the work of Reuben Thomas (MITE).
+
+<p>Then, and only then, is the final x86 code emitted.  The
+intermediate code is carefully designed so that x86 code can be
+generated from it without need for spare registers or other
+inconveniences.
+
+<p>The translations are managed using a traditional LRU-based caching
+scheme.  The translation cache has a default size of about 14MB.
+
+<a name="track"></a>
+
+<h3>5.3&nbsp; Tracking the status of memory</h3> Each byte in the
+process' address space has nine bits associated with it: one A bit and
+eight V bits.  The A and V bits for each byte are stored using a
+sparse array, which flexibly and efficiently covers arbitrary parts of
+the 32-bit address space without imposing significant space or
+performance overheads for the parts of the address space never
+visited.  The scheme used, and speedup hacks, are described in detail
+at the top of the source file vg_memory.c, so you should read that for
+the gory details.
+
+<a name="sys_calls"></a>
+
+<h3>5.4 System calls</h3>
+All system calls are intercepted.  The memory status map is consulted
+before and updated after each call.  It's all rather tiresome.  See
+vg_syscall_mem.c for details.
+
+<a name="sys_signals"></a>
+
+<h3>5.5&nbsp; Signals</h3>
+All system calls to sigaction() and sigprocmask() are intercepted.  If
+the client program is trying to set a signal handler, Valgrind makes a
+note of the handler address and which signal it is for.  Valgrind then
+arranges for the same signal to be delivered to its own handler.
+
+<p>When such a signal arrives, Valgrind's own handler catches it, and
+notes the fact.  At a convenient safe point in execution, Valgrind
+builds a signal delivery frame on the client's stack and runs its
+handler.  If the handler longjmp()s, there is nothing more to be said.
+If the handler returns, Valgrind notices this, zaps the delivery
+frame, and carries on where it left off before delivering the signal.
+
+<p>The purpose of this nonsense is that setting signal handlers
+essentially amounts to giving callback addresses to the Linux kernel.
+We can't allow this to happen, because if it did, signal handlers
+would run on the real CPU, not the simulated one.  This means the
+checking machinery would not operate during the handler run, and,
+worse, memory permissions maps would not be updated, which could cause
+spurious error reports once the handler had returned.
+
+<p>An even worse thing would happen if the signal handler longjmp'd
+rather than returned: Valgrind would completely lose control of the
+client program.
+
+<p>Upshot: we can't allow the client to install signal handlers
+directly.  Instead, Valgrind must catch, on behalf of the client, any
+signal the client asks to catch, and must delivery it to the client on
+the simulated CPU, not the real one.  This involves considerable
+gruesome fakery; see vg_signals.c for details.
+<p>
+
+<hr width="100%">
+
+<a name="example"></a>
+<h2>6&nbsp; Example</h2>
+This is the log for a run of a small program. The program is in fact
+correct, and the reported error is as the result of a potentially serious
+code generation bug in GNU g++ (snapshot 20010527).
+<pre>
+sewardj@phoenix:~/newmat10$
+~/Valgrind-6/valgrind -v ./bogon 
+==25832== Valgrind 0.10, a memory error detector for x86 RedHat 7.1.
+==25832== Copyright (C) 2000-2001, and GNU GPL'd, by Julian Seward.
+==25832== Startup, with flags:
+==25832== --suppressions=/home/sewardj/Valgrind/redhat71.supp
+==25832== reading syms from /lib/ld-linux.so.2
+==25832== reading syms from /lib/libc.so.6
+==25832== reading syms from /mnt/pima/jrs/Inst/lib/libgcc_s.so.0
+==25832== reading syms from /lib/libm.so.6
+==25832== reading syms from /mnt/pima/jrs/Inst/lib/libstdc++.so.3
+==25832== reading syms from /home/sewardj/Valgrind/valgrind.so
+==25832== reading syms from /proc/self/exe
+==25832== loaded 5950 symbols, 142333 line number locations
+==25832== 
+==25832== Invalid read of size 4
+==25832==    at 0x8048724: _ZN10BandMatrix6ReSizeEiii (bogon.cpp:45)
+==25832==    by 0x80487AF: main (bogon.cpp:66)
+==25832==    by 0x40371E5E: __libc_start_main (libc-start.c:129)
+==25832==    by 0x80485D1: (within /home/sewardj/newmat10/bogon)
+==25832==    Address 0xBFFFF74C is not stack'd, malloc'd or free'd
+==25832==
+==25832== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+==25832== malloc/free: in use at exit: 0 bytes in 0 blocks.
+==25832== malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+==25832== For a detailed leak analysis, rerun with: --leak-check=yes
+==25832==
+==25832== exiting, did 1881 basic blocks, 0 misses.
+==25832== 223 translations, 3626 bytes in, 56801 bytes out.
+</pre>
+<p>The GCC folks fixed this about a week before gcc-3.0 shipped.
+<hr width="100%">
+<p>
+</body>
+</html>
diff --git a/memcheck/docs/nav.html b/memcheck/docs/nav.html
new file mode 100644
index 000000000..686ac2bde
--- /dev/null
+++ b/memcheck/docs/nav.html
@@ -0,0 +1,68 @@
+<html>
+  <head>
+    <title>Valgrind</title>
+    <base target="main">
+    <style type="text/css">
+    <style type="text/css">
+      body      { background-color: #ffffff;
+                  color:            #000000;
+                  font-family:      Times, Helvetica, Arial;
+                  font-size:        14pt}
+      h4        { margin-bottom:    0.3em}
+      code      { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      pre       { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      a:link    { color:            #0000C0;
+                  text-decoration:  none; }
+      a:visited { color:            #0000C0; 
+                  text-decoration:  none; }
+      a:active  { color:            #0000C0;
+                  text-decoration:  none; }
+    </style>
+  </head>
+
+  <body>
+    <br>
+    <a href="manual.html#contents"><b>Contents of this manual</b></a><br>
+    <a href="manual.html#intro">1 Introduction</a><br>
+    <a href="manual.html#whatfor">1.1 What Valgrind is for</a><br>
+    <a href="manual.html#whatdoes">1.2 What it does with
+       your program</a>
+    <p>
+    <a href="manual.html#howtouse">2 <b>How to use it, and how to
+       make sense of the results</b></a><br>
+    <a href="manual.html#starta">2.1 Getting started</a><br>
+    <a href="manual.html#comment">2.2 The commentary</a><br>
+    <a href="manual.html#report">2.3 Reporting of errors</a><br>
+    <a href="manual.html#suppress">2.4 Suppressing errors</a><br>
+    <a href="manual.html#flags">2.5 Command-line flags</a><br>
+    <a href="manual.html#errormsgs">2.6 Explanation of error messages</a><br>
+    <a href="manual.html#suppfiles">2.7 Writing suppressions files</a><br>
+    <a href="manual.html#install">2.8 Building and installing</a><br>
+    <a href="manual.html#problems">2.9 If you have problems</a>
+    <p>
+    <a href="manual.html#machine">3 <b>Details of the checking machinery</b></a><br>
+    <a href="manual.html#vvalue">3.1 Valid-value (V) bits</a><br>
+    <a href="manual.html#vaddress">3.2 Valid-address (A) bits</a><br>
+    <a href="manual.html#together">3.3 Putting it all together</a><br>
+    <a href="manual.html#signals">3.4 Signals</a><br>
+    <a href="manual.html#leaks">3.5 Memory leak detection</a>
+    <p>
+    <a href="manual.html#limits">4 <b>Limitations</b></a><br>
+    <p>
+    <a href="manual.html#howitworks">5 <b>How it works -- a rough overview</b></a><br>
+    <a href="manual.html#startb">5.1 Getting started</a><br>
+    <a href="manual.html#engine">5.2 The translation/instrumentation engine</a><br>
+    <a href="manual.html#track">5.3 Tracking the status of memory</a><br>
+    <a href="manual.html#sys_calls">5.4 System calls</a><br>
+    <a href="manual.html#sys_signals">5.5 Signals</a>
+    <p>
+    <a href="manual.html#example">6 <b>An example</b></a><br>
+    <p>
+    <a href="techdocs.html">7 <b>The design and implementation of Valgrind</b></a><br>
+
+</body>
+</html>
diff --git a/memcheck/docs/techdocs.html b/memcheck/docs/techdocs.html
new file mode 100644
index 000000000..4044d4957
--- /dev/null
+++ b/memcheck/docs/techdocs.html
@@ -0,0 +1,2116 @@
+<html>
+  <head>
+    <style type="text/css">
+      body      { background-color: #ffffff;
+                  color:            #000000;
+                  font-family:      Times, Helvetica, Arial;
+                  font-size:        14pt}
+      h4        { margin-bottom:    0.3em}
+      code      { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      pre       { color:            #000000;
+                  font-family:      Courier; 
+                  font-size:        13pt }
+      a:link    { color:            #0000C0;
+                  text-decoration:  none; }
+      a:visited { color:            #0000C0; 
+                  text-decoration:  none; }
+      a:active  { color:            #0000C0;
+                  text-decoration:  none; }
+    </style>
+    <title>The design and implementation of Valgrind</title>
+  </head>
+
+<body bgcolor="#ffffff">
+
+<a name="title">&nbsp;</a>
+<h1 align=center>The design and implementation of Valgrind</h1>
+
+<center>
+Detailed technical notes for hackers, maintainers and the
+overly-curious<br>
+These notes pertain to snapshot 20020306<br>
+<p>
+<a href="mailto:jseward@acm.org">jseward@acm.org<br>
+<a href="http://developer.kde.org/~sewardj">http://developer.kde.org/~sewardj</a><br>
+<a href="http://www.muraroa.demon.co.uk">http://www.muraroa.demon.co.uk</a><br>
+Copyright &copy; 2000-2002 Julian Seward
+<p>
+Valgrind is licensed under the GNU General Public License, 
+version 2<br>
+An open-source tool for finding memory-management problems in
+x86 GNU/Linux executables.
+</center>
+
+<p>
+
+
+
+
+<hr width="100%">
+
+<h2>Introduction</h2>
+
+This document contains a detailed, highly-technical description of the
+internals of Valgrind.  This is not the user manual; if you are an
+end-user of Valgrind, you do not want to read this.  Conversely, if
+you really are a hacker-type and want to know how it works, I assume
+that you have read the user manual thoroughly.
+<p>
+You may need to read this document several times, and carefully.  Some
+important things, I only say once.
+
+
+<h3>History</h3>
+
+Valgrind came into public view in late Feb 2002.  However, it has been
+under contemplation for a very long time, perhaps seriously for about
+five years.  Somewhat over two years ago, I started working on the x86
+code generator for the Glasgow Haskell Compiler
+(http://www.haskell.org/ghc), gaining familiarity with x86 internals
+on the way.  I then did Cacheprof (http://www.cacheprof.org), gaining
+further x86 experience.  Some time around Feb 2000 I started
+experimenting with a user-space x86 interpreter for x86-Linux.  This
+worked, but it was clear that a JIT-based scheme would be necessary to
+give reasonable performance for Valgrind.  Design work for the JITter
+started in earnest in Oct 2000, and by early 2001 I had an x86-to-x86
+dynamic translator which could run quite large programs.  This
+translator was in a sense pointless, since it did not do any
+instrumentation or checking.
+
+<p>
+Most of the rest of 2001 was taken up designing and implementing the
+instrumentation scheme.  The main difficulty, which consumed a lot
+of effort, was to design a scheme which did not generate large numbers
+of false uninitialised-value warnings.  By late 2001 a satisfactory
+scheme had been arrived at, and I started to test it on ever-larger
+programs, with an eventual eye to making it work well enough so that
+it was helpful to folks debugging the upcoming version 3 of KDE.  I've
+used KDE since before version 1.0, and wanted to Valgrind to be an
+indirect contribution to the KDE 3 development effort.  At the start of
+Feb 02 the kde-core-devel crew started using it, and gave a huge
+amount of helpful feedback and patches in the space of three weeks.
+Snapshot 20020306 is the result.
+
+<p>
+In the best Unix tradition, or perhaps in the spirit of Fred Brooks'
+depressing-but-completely-accurate epitaph "build one to throw away;
+you will anyway", much of Valgrind is a second or third rendition of
+the initial idea.  The instrumentation machinery
+(<code>vg_translate.c</code>, <code>vg_memory.c</code>) and core CPU
+simulation (<code>vg_to_ucode.c</code>, <code>vg_from_ucode.c</code>)
+have had three redesigns and rewrites; the register allocator,
+low-level memory manager (<code>vg_malloc2.c</code>) and symbol table
+reader (<code>vg_symtab2.c</code>) are on the second rewrite.  In a
+sense, this document serves to record some of the knowledge gained as
+a result.
+
+
+<h3>Design overview</h3>
+
+Valgrind is compiled into a Linux shared object,
+<code>valgrind.so</code>, and also a dummy one,
+<code>valgrinq.so</code>, of which more later.  The
+<code>valgrind</code> shell script adds <code>valgrind.so</code> to
+the <code>LD_PRELOAD</code> list of extra libraries to be
+loaded with any dynamically linked library.  This is a standard trick,
+one which I assume the <code>LD_PRELOAD</code> mechanism was developed
+to support.
+
+<p>
+<code>valgrind.so</code>
+is linked with the <code>-z initfirst</code> flag, which requests that
+its initialisation code is run before that of any other object in the
+executable image.  When this happens, valgrind gains control.  The
+real CPU becomes "trapped" in <code>valgrind.so</code> and the 
+translations it generates.  The synthetic CPU provided by Valgrind
+does, however, return from this initialisation function.  So the 
+normal startup actions, orchestrated by the dynamic linker
+<code>ld.so</code>, continue as usual, except on the synthetic CPU,
+not the real one.  Eventually <code>main</code> is run and returns,
+and then the finalisation code of the shared objects is run,
+presumably in inverse order to which they were initialised.  Remember,
+this is still all happening on the simulated CPU.  Eventually
+<code>valgrind.so</code>'s own finalisation code is called.  It spots
+this event, shuts down the simulated CPU, prints any error summaries
+and/or does leak detection, and returns from the initialisation code
+on the real CPU.  At this point, in effect the real and synthetic CPUs
+have merged back into one, Valgrind has lost control of the program,
+and the program finally <code>exit()s</code> back to the kernel in the
+usual way.
+
+<p>
+The normal course of activity, one Valgrind has started up, is as
+follows.  Valgrind never runs any part of your program (usually
+referred to as the "client"), not a single byte of it, directly.
+Instead it uses function <code>VG_(translate)</code> to translate
+basic blocks (BBs, straight-line sequences of code) into instrumented
+translations, and those are run instead.  The translations are stored
+in the translation cache (TC), <code>vg_tc</code>, with the
+translation table (TT), <code>vg_tt</code> supplying the
+original-to-translation code address mapping.  Auxiliary array
+<code>VG_(tt_fast)</code> is used as a direct-map cache for fast
+lookups in TT; it usually achieves a hit rate of around 98% and
+facilitates an orig-to-trans lookup in 4 x86 insns, which is not bad.
+
+<p>
+Function <code>VG_(dispatch)</code> in <code>vg_dispatch.S</code> is
+the heart of the JIT dispatcher.  Once a translated code address has
+been found, it is executed simply by an x86 <code>call</code>
+to the translation.  At the end of the translation, the next 
+original code addr is loaded into <code>%eax</code>, and the 
+translation then does a <code>ret</code>, taking it back to the
+dispatch loop, with, interestingly, zero branch mispredictions.  
+The address requested in <code>%eax</code> is looked up first in
+<code>VG_(tt_fast)</code>, and, if not found, by calling C helper
+<code>VG_(search_transtab)</code>.  If there is still no translation 
+available, <code>VG_(dispatch)</code> exits back to the top-level
+C dispatcher <code>VG_(toploop)</code>, which arranges for 
+<code>VG_(translate)</code> to make a new translation.  All fairly
+unsurprising, really.  There are various complexities described below.
+
+<p>
+The translator, orchestrated by <code>VG_(translate)</code>, is
+complicated but entirely self-contained.  It is described in great
+detail in subsequent sections.  Translations are stored in TC, with TT
+tracking administrative information.  The translations are subject to
+an approximate LRU-based management scheme.  With the current
+settings, the TC can hold at most about 15MB of translations, and LRU
+passes prune it to about 13.5MB.  Given that the
+orig-to-translation expansion ratio is about 13:1 to 14:1, this means
+TC holds translations for more or less a megabyte of original code,
+which generally comes to about 70000 basic blocks for C++ compiled
+with optimisation on.  Generating new translations is expensive, so it
+is worth having a large TC to minimise the (capacity) miss rate.
+
+<p>
+The dispatcher, <code>VG_(dispatch)</code>, receives hints from
+the translations which allow it to cheaply spot all control 
+transfers corresponding to x86 <code>call</code> and <code>ret</code>
+instructions.  It has to do this in order to spot some special events:
+<ul>
+<li>Calls to <code>VG_(shutdown)</code>.  This is Valgrind's cue to
+    exit.  NOTE: actually this is done a different way; it should be
+    cleaned up.
+<p>
+<li>Returns of system call handlers, to the return address 
+    <code>VG_(signalreturn_bogusRA)</code>.  The signal simulator
+    needs to know when a signal handler is returning, so we spot
+    jumps (returns) to this address.
+<p>
+<li>Calls to <code>vg_trap_here</code>.  All <code>malloc</code>,
+    <code>free</code>, etc calls that the client program makes are
+    eventually routed to a call to <code>vg_trap_here</code>,
+    and Valgrind does its own special thing with these calls.
+    In effect this provides a trapdoor, by which Valgrind can
+    intercept certain calls on the simulated CPU, run the call as it
+    sees fit itself (on the real CPU), and return the result to
+    the simulated CPU, quite transparently to the client program.
+</ul>
+Valgrind intercepts the client's <code>malloc</code>,
+<code>free</code>, etc,
+calls, so that it can store additional information.  Each block 
+<code>malloc</code>'d by the client gives rise to a shadow block
+in which Valgrind stores the call stack at the time of the
+<code>malloc</code>
+call.  When the client calls <code>free</code>, Valgrind tries to
+find the shadow block corresponding to the address passed to
+<code>free</code>, and emits an error message if none can be found.
+If it is found, the block is placed on the freed blocks queue 
+<code>vg_freed_list</code>, it is marked as inaccessible, and
+its shadow block now records the call stack at the time of the
+<code>free</code> call.  Keeping <code>free</code>'d blocks in
+this queue allows Valgrind to spot all (presumably invalid) accesses
+to them.  However, once the volume of blocks in the free queue 
+exceeds <code>VG_(clo_freelist_vol)</code>, blocks are finally
+removed from the queue.
+
+<p>
+Keeping track of A and V bits (note: if you don't know what these are,
+you haven't read the user guide carefully enough) for memory is done
+in <code>vg_memory.c</code>.  This implements a sparse array structure
+which covers the entire 4G address space in a way which is reasonably
+fast and reasonably space efficient.  The 4G address space is divided
+up into 64K sections, each covering 64Kb of address space.  Given a
+32-bit address, the top 16 bits are used to select one of the 65536
+entries in <code>VG_(primary_map)</code>.  The resulting "secondary"
+(<code>SecMap</code>) holds A and V bits for the 64k of address space
+chunk corresponding to the lower 16 bits of the address.
+
+
+<h3>Design decisions</h3>
+
+Some design decisions were motivated by the need to make Valgrind
+debuggable.  Imagine you are writing a CPU simulator.  It works fairly
+well.  However, you run some large program, like Netscape, and after
+tens of millions of instructions, it crashes.  How can you figure out
+where in your simulator the bug is?
+
+<p>
+Valgrind's answer is: cheat.  Valgrind is designed so that it is
+possible to switch back to running the client program on the real
+CPU at any point.  Using the <code>--stop-after= </code> flag, you can 
+ask Valgrind to run just some number of basic blocks, and then 
+run the rest of the way on the real CPU.  If you are searching for
+a bug in the simulated CPU, you can use this to do a binary search,
+which quickly leads you to the specific basic block which is
+causing the problem.  
+
+<p>
+This is all very handy.  It does constrain the design in certain
+unimportant ways.  Firstly, the layout of memory, when viewed from the
+client's point of view, must be identical regardless of whether it is
+running on the real or simulated CPU.  This means that Valgrind can't
+do pointer swizzling -- well, no great loss -- and it can't run on 
+the same stack as the client -- again, no great loss.  
+Valgrind operates on its own stack, <code>VG_(stack)</code>, which
+it switches to at startup, temporarily switching back to the client's
+stack when doing system calls for the client.
+
+<p>
+Valgrind also receives signals on its own stack,
+<code>VG_(sigstack)</code>, but for different gruesome reasons
+discussed below.
+
+<p>
+This nice clean switch-back-to-the-real-CPU-whenever-you-like story
+is muddied by signals.  Problem is that signals arrive at arbitrary
+times and tend to slightly perturb the basic block count, with the
+result that you can get close to the basic block causing a problem but
+can't home in on it exactly.  My kludgey hack is to define
+<code>SIGNAL_SIMULATION</code> to 1 towards the bottom of 
+<code>vg_syscall_mem.c</code>, so that signal handlers are run on the
+real CPU and don't change the BB counts.
+
+<p>
+A second hole in the switch-back-to-real-CPU story is that Valgrind's
+way of delivering signals to the client is different from that of the
+kernel.  Specifically, the layout of the signal delivery frame, and
+the mechanism used to detect a sighandler returning, are different.
+So you can't expect to make the transition inside a sighandler and
+still have things working, but in practice that's not much of a
+restriction.
+
+<p>
+Valgrind's implementation of <code>malloc</code>, <code>free</code>,
+etc, (in <code>vg_clientmalloc.c</code>, not the low-level stuff in
+<code>vg_malloc2.c</code>) is somewhat complicated by the need to 
+handle switching back at arbitrary points.  It does work tho.
+
+
+
+<h3>Correctness</h3>
+
+There's only one of me, and I have a Real Life (tm) as well as hacking
+Valgrind [allegedly :-].  That means I don't have time to waste
+chasing endless bugs in Valgrind.  My emphasis is therefore on doing
+everything as simply as possible, with correctness, stability and
+robustness being the number one priority, more important than
+performance or functionality.  As a result:
+<ul>
+<li>The code is absolutely loaded with assertions, and these are
+    <b>permanently enabled.</b>  I have no plan to remove or disable
+    them later.  Over the past couple of months, as valgrind has
+    become more widely used, they have shown their worth, pulling
+    up various bugs which would otherwise have appeared as
+    hard-to-find segmentation faults.
+    <p>
+    I am of the view that it's acceptable to spend 5% of the total
+    running time of your valgrindified program doing assertion checks
+    and other internal sanity checks.
+<p>
+<li>Aside from the assertions, valgrind contains various sets of
+    internal sanity checks, which get run at varying frequencies
+    during normal operation.  <code>VG_(do_sanity_checks)</code>
+    runs every 1000 basic blocks, which means 500 to 2000 times/second 
+    for typical machines at present.  It checks that Valgrind hasn't
+    overrun its private stack, and does some simple checks on the
+    memory permissions maps.  Once every 25 calls it does some more
+    extensive checks on those maps.  Etc, etc.
+    <p>
+    The following components also have sanity check code, which can
+    be enabled to aid debugging:
+    <ul>
+    <li>The low-level memory-manager
+        (<code>VG_(mallocSanityCheckArena)</code>).  This does a 
+        complete check of all blocks and chains in an arena, which
+        is very slow.  Is not engaged by default.
+    <p>
+    <li>The symbol table reader(s): various checks to ensure
+        uniqueness of mappings; see <code>VG_(read_symbols)</code>
+        for a start.  Is permanently engaged.
+    <p>
+    <li>The A and V bit tracking stuff in <code>vg_memory.c</code>.
+        This can be compiled with cpp symbol
+        <code>VG_DEBUG_MEMORY</code> defined, which removes all the
+        fast, optimised cases, and uses simple-but-slow fallbacks
+        instead.  Not engaged by default.
+    <p>
+    <li>Ditto <code>VG_DEBUG_LEAKCHECK</code>.
+    <p>
+    <li>The JITter parses x86 basic blocks into sequences of 
+        UCode instructions.  It then sanity checks each one with
+        <code>VG_(saneUInstr)</code> and sanity checks the sequence
+        as a whole with <code>VG_(saneUCodeBlock)</code>.  This stuff
+        is engaged by default, and has caught some way-obscure bugs
+        in the simulated CPU machinery in its time.
+    <p>
+    <li>The system call wrapper does
+        <code>VG_(first_and_last_secondaries_look_plausible)</code> after
+        every syscall; this is known to pick up bugs in the syscall
+        wrappers.  Engaged by default.
+    <p>
+    <li>The main dispatch loop, in <code>VG_(dispatch)</code>, checks
+        that translations do not set <code>%ebp</code> to any value
+        different from <code>VG_EBP_DISPATCH_CHECKED</code> or
+        <code>& VG_(baseBlock)</code>.  In effect this test is free,
+        and is permanently engaged.
+    <p>
+    <li>There are a couple of ifdefed-out consistency checks I
+        inserted whilst debugging the new register allocater, 
+        <code>vg_do_register_allocation</code>.
+    </ul>
+<p>
+<li>I try to avoid techniques, algorithms, mechanisms, etc, for which
+    I can supply neither a convincing argument that they are correct,
+    nor sanity-check code which might pick up bugs in my
+    implementation.  I don't always succeed in this, but I try.
+    Basically the idea is: avoid techniques which are, in practice,
+    unverifiable, in some sense.   When doing anything, always have in
+    mind: "how can I verify that this is correct?"
+</ul>
+
+<p>
+Some more specific things are:
+
+<ul>
+<li>Valgrind runs in the same namespace as the client, at least from
+    <code>ld.so</code>'s point of view, and it therefore absolutely
+    had better not export any symbol with a name which could clash
+    with that of the client or any of its libraries.  Therefore, all
+    globally visible symbols exported from <code>valgrind.so</code>
+    are defined using the <code>VG_</code> CPP macro.  As you'll see
+    from <code>vg_constants.h</code>, this appends some arbitrary
+    prefix to the symbol, in order that it be, we hope, globally
+    unique.  Currently the prefix is <code>vgPlain_</code>.  For
+    convenience there are also <code>VGM_</code>, <code>VGP_</code>
+    and <code>VGOFF_</code>.  All locally defined symbols are declared
+    <code>static</code> and do not appear in the final shared object.
+    <p>
+    To check this, I periodically do 
+    <code>nm valgrind.so | grep " T "</code>, 
+    which shows you all the globally exported text symbols.
+    They should all have an approved prefix, except for those like
+    <code>malloc</code>, <code>free</code>, etc, which we deliberately
+    want to shadow and take precedence over the same names exported
+    from <code>glibc.so</code>, so that valgrind can intercept those
+    calls easily.  Similarly, <code>nm valgrind.so | grep " D "</code>
+    allows you to find any rogue data-segment symbol names.
+<p>
+<li>Valgrind tries, and almost succeeds, in being completely
+    independent of all other shared objects, in particular of
+    <code>glibc.so</code>.  For example, we have our own low-level
+    memory manager in <code>vg_malloc2.c</code>, which is a fairly
+    standard malloc/free scheme augmented with arenas, and
+    <code>vg_mylibc.c</code> exports reimplementations of various bits
+    and pieces you'd normally get from the C library.
+    <p>
+    Why all the hassle?  Because imagine the potential chaos of both
+    the simulated and real CPUs executing in <code>glibc.so</code>.
+    It just seems simpler and cleaner to be completely self-contained,
+    so that only the simulated CPU visits <code>glibc.so</code>.  In
+    practice it's not much hassle anyway.  Also, valgrind starts up
+    before glibc has a chance to initialise itself, and who knows what
+    difficulties that could lead to.  Finally, glibc has definitions
+    for some types, specifically <code>sigset_t</code>, which conflict
+    (are different from) the Linux kernel's idea of same.  When 
+    Valgrind wants to fiddle around with signal stuff, it wants to
+    use the kernel's definitions, not glibc's definitions.  So it's 
+    simplest just to keep glibc out of the picture entirely.
+    <p>
+    To find out which glibc symbols are used by Valgrind, reinstate
+    the link flags <code>-nostdlib -Wl,-no-undefined</code>.  This
+    causes linking to fail, but will tell you what you depend on.
+    I have mostly, but not entirely, got rid of the glibc
+    dependencies; what remains is, IMO, fairly harmless.  AFAIK the
+    current dependencies are: <code>memset</code>,
+    <code>memcmp</code>, <code>stat</code>, <code>system</code>,
+    <code>sbrk</code>, <code>setjmp</code> and <code>longjmp</code>.
+
+<p>
+<li>Similarly, valgrind should not really import any headers other
+    than the Linux kernel headers, since it knows of no API other than
+    the kernel interface to talk to.  At the moment this is really not
+    in a good state, and <code>vg_syscall_mem</code> imports, via
+    <code>vg_unsafe.h</code>, a significant number of C-library
+    headers so as to know the sizes of various structs passed across
+    the kernel boundary.  This is of course completely bogus, since
+    there is no guarantee that the C library's definitions of these
+    structs matches those of the kernel.  I have started to sort this
+    out using <code>vg_kerneliface.h</code>, into which I had intended
+    to copy all kernel definitions which valgrind could need, but this
+    has not gotten very far.  At the moment it mostly contains
+    definitions for <code>sigset_t</code> and <code>struct
+    sigaction</code>, since the kernel's definition for these really
+    does clash with glibc's.  I plan to use a <code>vki_</code> prefix
+    on all these types and constants, to denote the fact that they
+    pertain to <b>V</b>algrind's <b>K</b>ernel <b>I</b>nterface.
+    <p>
+    Another advantage of having a <code>vg_kerneliface.h</code> file
+    is that it makes it simpler to interface to a different kernel.
+    Once can, for example, easily imagine writing a new
+    <code>vg_kerneliface.h</code> for FreeBSD, or x86 NetBSD.
+
+</ul>
+
+<h3>Current limitations</h3>
+
+No threads.  I think fixing this is close to a research-grade problem.
+<p>
+No MMX.  Fixing this should be relatively easy, using the same giant
+trick used for x86 FPU instructions.  See below.
+<p>
+Support for weird (non-POSIX) signal stuff is patchy.  Does anybody
+care?
+<p>
+
+
+
+
+<hr width="100%">
+
+<h2>The instrumenting JITter</h2>
+
+This really is the heart of the matter.  We begin with various side
+issues.
+
+<h3>Run-time storage, and the use of host registers</h3>
+
+Valgrind translates client (original) basic blocks into instrumented
+basic blocks, which live in the translation cache TC, until either the
+client finishes or the translations are ejected from TC to make room
+for newer ones.
+<p>
+Since it generates x86 code in memory, Valgrind has complete control
+of the use of registers in the translations.  Now pay attention.  I
+shall say this only once, and it is important you understand this.  In
+what follows I will refer to registers in the host (real) cpu using
+their standard names, <code>%eax</code>, <code>%edi</code>, etc.  I
+refer to registers in the simulated CPU by capitalising them:
+<code>%EAX</code>, <code>%EDI</code>, etc.  These two sets of
+registers usually bear no direct relationship to each other; there is
+no fixed mapping between them.  This naming scheme is used fairly
+consistently in the comments in the sources.
+<p>
+Host registers, once things are up and running, are used as follows:
+<ul>
+<li><code>%esp</code>, the real stack pointer, points
+    somewhere in Valgrind's private stack area,
+    <code>VG_(stack)</code> or, transiently, into its signal delivery
+    stack, <code>VG_(sigstack)</code>.
+<p>
+<li><code>%edi</code> is used as a temporary in code generation; it
+    is almost always dead, except when used for the <code>Left</code>
+    value-tag operations.
+<p>
+<li><code>%eax</code>, <code>%ebx</code>, <code>%ecx</code>,
+    <code>%edx</code> and <code>%esi</code> are available to
+    Valgrind's register allocator.  They are dead (carry unimportant
+    values) in between translations, and are live only in
+    translations.  The one exception to this is <code>%eax</code>,
+    which, as mentioned far above, has a special significance to the
+    dispatch loop <code>VG_(dispatch)</code>: when a translation
+    returns to the dispatch loop, <code>%eax</code> is expected to
+    contain the original-code-address of the next translation to run.
+    The register allocator is so good at minimising spill code that
+    using five regs and not having to save/restore <code>%edi</code>
+    actually gives better code than allocating to <code>%edi</code>
+    as well, but then having to push/pop it around special uses.
+<p>
+<li><code>%ebp</code> points permanently at
+    <code>VG_(baseBlock)</code>.  Valgrind's translations are
+    position-independent, partly because this is convenient, but also
+    because translations get moved around in TC as part of the LRUing
+    activity.  <b>All</b> static entities which need to be referred to
+    from generated code, whether data or helper functions, are stored
+    starting at <code>VG_(baseBlock)</code> and are therefore reached
+    by indexing from <code>%ebp</code>.  There is but one exception, 
+    which is that by placing the value
+    <code>VG_EBP_DISPATCH_CHECKED</code>
+    in <code>%ebp</code> just before a return to the dispatcher, 
+    the dispatcher is informed that the next address to run, 
+    in <code>%eax</code>, requires special treatment.
+<p>
+<li>The real machine's FPU state is pretty much unimportant, for
+    reasons which will become obvious.  Ditto its <code>%eflags</code>
+    register.
+</ul>
+
+<p>
+The state of the simulated CPU is stored in memory, in
+<code>VG_(baseBlock)</code>, which is a block of 200 words IIRC.
+Recall that <code>%ebp</code> points permanently at the start of this
+block.  Function <code>vg_init_baseBlock</code> decides what the
+offsets of various entities in <code>VG_(baseBlock)</code> are to be,
+and allocates word offsets for them.  The code generator then emits
+<code>%ebp</code> relative addresses to get at those things.  The
+sequence in which entities are allocated has been carefully chosen so
+that the 32 most popular entities come first, because this means 8-bit
+offsets can be used in the generated code.
+
+<p>
+If I was clever, I could make <code>%ebp</code> point 32 words along 
+<code>VG_(baseBlock)</code>, so that I'd have another 32 words of
+short-form offsets available, but that's just complicated, and it's
+not important -- the first 32 words take 99% (or whatever) of the
+traffic.
+
+<p>
+Currently, the sequence of stuff in <code>VG_(baseBlock)</code> is as
+follows:
+<ul>
+<li>9 words, holding the simulated integer registers,
+    <code>%EAX</code> .. <code>%EDI</code>, and the simulated flags,
+    <code>%EFLAGS</code>.
+<p>
+<li>Another 9 words, holding the V bit "shadows" for the above 9 regs.
+<p>
+<li>The <b>addresses</b> of various helper routines called from
+    generated code: 
+    <code>VG_(helper_value_check4_fail)</code>,
+    <code>VG_(helper_value_check0_fail)</code>,
+    which register V-check failures,
+    <code>VG_(helperc_STOREV4)</code>,
+    <code>VG_(helperc_STOREV1)</code>,
+    <code>VG_(helperc_LOADV4)</code>,
+    <code>VG_(helperc_LOADV1)</code>,
+    which do stores and loads of V bits to/from the 
+    sparse array which keeps track of V bits in memory,
+    and
+    <code>VGM_(handle_esp_assignment)</code>, which messes with
+    memory addressibility resulting from changes in <code>%ESP</code>.
+<p>
+<li>The simulated <code>%EIP</code>.
+<p>
+<li>24 spill words, for when the register allocator can't make it work
+    with 5 measly registers.
+<p>
+<li>Addresses of helpers <code>VG_(helperc_STOREV2)</code>,
+    <code>VG_(helperc_LOADV2)</code>.  These are here because 2-byte
+    loads and stores are relatively rare, so are placed above the
+    magic 32-word offset boundary.
+<p>
+<li>For similar reasons, addresses of helper functions 
+    <code>VGM_(fpu_write_check)</code> and
+    <code>VGM_(fpu_read_check)</code>, which handle the A/V maps
+    testing and changes required by FPU writes/reads.  
+<p>
+<li>Some other boring helper addresses:
+    <code>VG_(helper_value_check2_fail)</code> and
+    <code>VG_(helper_value_check1_fail)</code>.  These are probably
+    never emitted now, and should be removed.
+<p>
+<li>The entire state of the simulated FPU, which I believe to be
+    108 bytes long.
+<p>
+<li>Finally, the addresses of various other helper functions in
+    <code>vg_helpers.S</code>, which deal with rare situations which
+    are tedious or difficult to generate code in-line for.
+</ul>
+
+<p>
+As a general rule, the simulated machine's state lives permanently in
+memory at <code>VG_(baseBlock)</code>.  However, the JITter does some
+optimisations which allow the simulated integer registers to be
+cached in real registers over multiple simulated instructions within
+the same basic block.  These are always flushed back into memory at
+the end of every basic block, so that the in-memory state is
+up-to-date between basic blocks.  (This flushing is implied by the
+statement above that the real machine's allocatable registers are
+dead in between simulated blocks).
+
+
+<h3>Startup, shutdown, and system calls</h3>
+
+Getting into of Valgrind (<code>VG_(startup)</code>, called from
+<code>valgrind.so</code>'s initialisation section), really means
+copying the real CPU's state into <code>VG_(baseBlock)</code>, and
+then installing our own stack pointer, etc, into the real CPU, and
+then starting up the JITter.  Exiting valgrind involves copying the
+simulated state back to the real state.
+
+<p>
+Unfortunately, there's a complication at startup time.  Problem is
+that at the point where we need to take a snapshot of the real CPU's
+state, the offsets in <code>VG_(baseBlock)</code> are not set up yet,
+because to do so would involve disrupting the real machine's state
+significantly.  The way round this is to dump the real machine's state
+into a temporary, static block of memory,
+<code>VG_(m_state_static)</code>.  We can then set up the
+<code>VG_(baseBlock)</code> offsets at our leisure, and copy into it
+from <code>VG_(m_state_static)</code> at some convenient later time.
+This copying is done by
+<code>VG_(copy_m_state_static_to_baseBlock)</code>.
+
+<p>
+On exit, the inverse transformation is (rather unnecessarily) used:
+stuff in <code>VG_(baseBlock)</code> is copied to
+<code>VG_(m_state_static)</code>, and the assembly stub then copies
+from <code>VG_(m_state_static)</code> into the real machine registers.
+
+<p>
+Doing system calls on behalf of the client (<code>vg_syscall.S</code>)
+is something of a half-way house.  We have to make the world look
+sufficiently like that which the client would normally have to make
+the syscall actually work properly, but we can't afford to lose
+control.  So the trick is to copy all of the client's state, <b>except
+its program counter</b>, into the real CPU, do the system call, and
+copy the state back out.  Note that the client's state includes its
+stack pointer register, so one effect of this partial restoration is
+to cause the system call to be run on the client's stack, as it should
+be.
+
+<p>
+As ever there are complications.  We have to save some of our own state
+somewhere when restoring the client's state into the CPU, so that we
+can keep going sensibly afterwards.  In fact the only thing which is
+important is our own stack pointer, but for paranoia reasons I save 
+and restore our own FPU state as well, even though that's probably
+pointless.
+
+<p>
+The complication on the above complication is, that for horrible
+reasons to do with signals, we may have to handle a second client
+system call whilst the client is blocked inside some other system 
+call (unbelievable!).  That means there's two sets of places to 
+dump Valgrind's stack pointer and FPU state across the syscall,
+and we decide which to use by consulting
+<code>VG_(syscall_depth)</code>, which is in turn maintained by
+<code>VG_(wrap_syscall)</code>.
+
+
+
+<h3>Introduction to UCode</h3>
+
+UCode lies at the heart of the x86-to-x86 JITter.  The basic premise
+is that dealing the the x86 instruction set head-on is just too darn
+complicated, so we do the traditional compiler-writer's trick and
+translate it into a simpler, easier-to-deal-with form.
+
+<p>
+In normal operation, translation proceeds through six stages,
+coordinated by <code>VG_(translate)</code>:
+<ol>
+<li>Parsing of an x86 basic block into a sequence of UCode
+    instructions (<code>VG_(disBB)</code>).
+<p>
+<li>UCode optimisation (<code>vg_improve</code>), with the aim of
+    caching simulated registers in real registers over multiple
+    simulated instructions, and removing redundant simulated
+    <code>%EFLAGS</code> saving/restoring.
+<p>
+<li>UCode instrumentation (<code>vg_instrument</code>), which adds
+    value and address checking code.
+<p>
+<li>Post-instrumentation cleanup (<code>vg_cleanup</code>), removing
+    redundant value-check computations.
+<p>
+<li>Register allocation (<code>vg_do_register_allocation</code>),
+    which, note, is done on UCode.
+<p>
+<li>Emission of final instrumented x86 code
+    (<code>VG_(emit_code)</code>).
+</ol>
+
+<p>
+Notice how steps 2, 3, 4 and 5 are simple UCode-to-UCode
+transformation passes, all on straight-line blocks of UCode (type
+<code>UCodeBlock</code>).  Steps 2 and 4 are optimisation passes and
+can be disabled for debugging purposes, with
+<code>--optimise=no</code> and <code>--cleanup=no</code> respectively.
+
+<p>
+Valgrind can also run in a no-instrumentation mode, given
+<code>--instrument=no</code>.  This is useful for debugging the JITter
+quickly without having to deal with the complexity of the
+instrumentation mechanism too.  In this mode, steps 3 and 4 are
+omitted.
+
+<p>
+These flags combine, so that <code>--instrument=no</code> together with 
+<code>--optimise=no</code> means only steps 1, 5 and 6 are used.
+<code>--single-step=yes</code> causes each x86 instruction to be
+treated as a single basic block.  The translations are terrible but
+this is sometimes instructive.  
+
+<p>
+The <code>--stop-after=N</code> flag switches back to the real CPU
+after <code>N</code> basic blocks.  It also re-JITs the final basic
+block executed and prints the debugging info resulting, so this
+gives you a way to get a quick snapshot of how a basic block looks as
+it passes through the six stages mentioned above.  If you want to 
+see full information for every block translated (probably not, but
+still ...) find, in <code>VG_(translate)</code>, the lines
+<br><code>   dis = True;</code>
+<br><code>   dis = debugging_translation;</code>
+<br>
+and comment out the second line.  This will spew out debugging
+junk faster than you can possibly imagine.
+
+
+
+<h3>UCode operand tags: type <code>Tag</code></h3>
+
+UCode is, more or less, a simple two-address RISC-like code.  In
+keeping with the x86 AT&T assembly syntax, generally speaking the
+first operand is the source operand, and the second is the destination
+operand, which is modified when the uinstr is notionally executed.
+
+<p>
+UCode instructions have up to three operand fields, each of which has
+a corresponding <code>Tag</code> describing it.  Possible values for
+the tag are:
+
+<ul>
+<li><code>NoValue</code>: indicates that the field is not in use.
+<p>
+<li><code>Lit16</code>: the field contains a 16-bit literal.
+<p>
+<li><code>Literal</code>: the field denotes a 32-bit literal, whose
+    value is stored in the <code>lit32</code> field of the uinstr
+    itself.  Since there is only one <code>lit32</code> for the whole
+    uinstr, only one operand field may contain this tag.
+<p>
+<li><code>SpillNo</code>: the field contains a spill slot number, in
+    the range 0 to 23 inclusive, denoting one of the spill slots
+    contained inside <code>VG_(baseBlock)</code>.  Such tags only
+    exist after register allocation.
+<p>
+<li><code>RealReg</code>: the field contains a number in the range 0
+    to 7 denoting an integer x86 ("real") register on the host.  The
+    number is the Intel encoding for integer registers.  Such tags
+    only exist after register allocation.
+<p>
+<li><code>ArchReg</code>: the field contains a number in the range 0
+    to 7 denoting an integer x86 register on the simulated CPU.  In
+    reality this means a reference to one of the first 8 words of
+    <code>VG_(baseBlock)</code>.  Such tags can exist at any point in
+    the translation process.
+<p>
+<li>Last, but not least, <code>TempReg</code>.  The field contains the
+    number of one of an infinite set of virtual (integer)
+    registers. <code>TempReg</code>s are used everywhere throughout
+    the translation process; you can have as many as you want.  The
+    register allocator maps as many as it can into
+    <code>RealReg</code>s and turns the rest into
+    <code>SpillNo</code>s, so <code>TempReg</code>s should not exist
+    after the register allocation phase.
+    <p>
+    <code>TempReg</code>s are always 32 bits long, even if the data
+    they hold is logically shorter.  In that case the upper unused
+    bits are required, and, I think, generally assumed, to be zero.  
+    <code>TempReg</code>s holding V bits for quantities shorter than 
+    32 bits are expected to have ones in the unused places, since a
+    one denotes "undefined".
+</ul>
+
+
+<h3>UCode instructions: type <code>UInstr</code></h3>
+
+<p>
+UCode was carefully designed to make it possible to do register
+allocation on UCode and then translate the result into x86 code
+without needing any extra registers ... well, that was the original
+plan, anyway.  Things have gotten a little more complicated since
+then.  In what follows, UCode instructions are referred to as uinstrs,
+to distinguish them from x86 instructions.  Uinstrs of course have
+uopcodes which are (naturally) different from x86 opcodes.
+
+<p>
+A uinstr (type <code>UInstr</code>) contains
+various fields, not all of which are used by any one uopcode:
+<ul>
+<li>Three 16-bit operand fields, <code>val1</code>, <code>val2</code>
+    and <code>val3</code>.
+<p>
+<li>Three tag fields, <code>tag1</code>, <code>tag2</code>
+    and <code>tag3</code>.  Each of these has a value of type
+    <code>Tag</code>,
+    and they describe what the <code>val1</code>, <code>val2</code>
+    and <code>val3</code> fields contain.
+<p>
+<li>A 32-bit literal field.
+<p>
+<li>Two <code>FlagSet</code>s, specifying which x86 condition codes are
+    read and written by the uinstr.
+<p>
+<li>An opcode byte, containing a value of type <code>Opcode</code>.
+<p>
+<li>A size field, indicating the data transfer size (1/2/4/8/10) in
+    cases where this makes sense, or zero otherwise.
+<p>
+<li>A condition-code field, which, for jumps, holds a
+    value of type <code>Condcode</code>, indicating the condition
+    which applies.  The encoding is as it is in the x86 insn stream,
+    except we add a 17th value <code>CondAlways</code> to indicate
+    an unconditional transfer.
+<p>
+<li>Various 1-bit flags, indicating whether this insn pertains to an
+    x86 CALL or RET instruction, whether a widening is signed or not,
+    etc.
+</ul>
+
+<p>
+UOpcodes (type <code>Opcode</code>) are divided into two groups: those
+necessary merely to express the functionality of the x86 code, and
+extra uopcodes needed to express the instrumentation.  The former
+group contains:
+<ul>
+<li><code>GET</code> and <code>PUT</code>, which move values from the
+    simulated CPU's integer registers (<code>ArchReg</code>s) into
+    <code>TempReg</code>s, and back.  <code>GETF</code> and
+    <code>PUTF</code> do the corresponding thing for the simulated
+    <code>%EFLAGS</code>.  There are no corresponding insns for the
+    FPU register stack, since we don't explicitly simulate its
+    registers.
+<p>
+<li><code>LOAD</code> and <code>STORE</code>, which, in RISC-like
+    fashion, are the only uinstrs able to interact with memory.
+<p>
+<li><code>MOV</code> and <code>CMOV</code> allow unconditional and
+    conditional moves of values between <code>TempReg</code>s.
+<p>
+<li>ALU operations.  Again in RISC-like fashion, these only operate on
+    <code>TempReg</code>s (before reg-alloc) or <code>RealReg</code>s
+    (after reg-alloc).  These are: <code>ADD</code>, <code>ADC</code>,
+    <code>AND</code>, <code>OR</code>, <code>XOR</code>,
+    <code>SUB</code>, <code>SBB</code>, <code>SHL</code>,
+    <code>SHR</code>, <code>SAR</code>, <code>ROL</code>,
+    <code>ROR</code>, <code>RCL</code>, <code>RCR</code>,
+    <code>NOT</code>, <code>NEG</code>, <code>INC</code>,
+    <code>DEC</code>, <code>BSWAP</code>, <code>CC2VAL</code> and
+    <code>WIDEN</code>.  <code>WIDEN</code> does signed or unsigned
+    value widening.  <code>CC2VAL</code> is used to convert condition
+    codes into a value, zero or one.  The rest are obvious.
+    <p>
+    To allow for more efficient code generation, we bend slightly the
+    restriction at the start of the previous para: for
+    <code>ADD</code>, <code>ADC</code>, <code>XOR</code>,
+    <code>SUB</code> and <code>SBB</code>, we allow the first (source)
+    operand to also be an <code>ArchReg</code>, that is, one of the
+    simulated machine's registers.  Also, many of these ALU ops allow
+    the source operand to be a literal.  See
+    <code>VG_(saneUInstr)</code> for the final word on the allowable
+    forms of uinstrs.
+<p>
+<li><code>LEA1</code> and <code>LEA2</code> are not strictly
+    necessary, but allow faciliate better translations.  They
+    record the fancy x86 addressing modes in a direct way, which
+    allows those amodes to be emitted back into the final
+    instruction stream more or less verbatim.
+<p>
+<li><code>CALLM</code> calls a machine-code helper, one of the methods
+    whose address is stored at some <code>VG_(baseBlock)</code>
+    offset.  <code>PUSH</code> and <code>POP</code> move values
+    to/from <code>TempReg</code> to the real (Valgrind's) stack, and
+    <code>CLEAR</code> removes values from the stack.
+    <code>CALLM_S</code> and <code>CALLM_E</code> delimit the
+    boundaries of call setups and clearings, for the benefit of the
+    instrumentation passes.  Getting this right is critical, and so
+    <code>VG_(saneUCodeBlock)</code> makes various checks on the use
+    of these uopcodes.
+    <p>
+    It is important to understand that these uopcodes have nothing to
+    do with the x86 <code>call</code>, <code>return,</code>
+    <code>push</code> or <code>pop</code> instructions, and are not
+    used to implement them.  Those guys turn into combinations of
+    <code>GET</code>, <code>PUT</code>, <code>LOAD</code>,
+    <code>STORE</code>, <code>ADD</code>, <code>SUB</code>, and
+    <code>JMP</code>.  What these uopcodes support is calling of
+    helper functions such as <code>VG_(helper_imul_32_64)</code>,
+    which do stuff which is too difficult or tedious to emit inline.
+<p>
+<li><code>FPU</code>, <code>FPU_R</code> and <code>FPU_W</code>.
+    Valgrind doesn't attempt to simulate the internal state of the
+    FPU at all.  Consequently it only needs to be able to distinguish
+    FPU ops which read and write memory from those that don't, and
+    for those which do, it needs to know the effective address and
+    data transfer size.  This is made easier because the x86 FP
+    instruction encoding is very regular, basically consisting of
+    16 bits for a non-memory FPU insn and 11 (IIRC) bits + an address mode
+    for a memory FPU insn.  So our <code>FPU</code> uinstr carries
+    the 16 bits in its <code>val1</code> field.  And
+    <code>FPU_R</code> and <code>FPU_W</code> carry 11 bits in that
+    field, together with the identity of a <code>TempReg</code> or
+    (later) <code>RealReg</code> which contains the address.
+<p>
+<li><code>JIFZ</code> is unique, in that it allows a control-flow
+    transfer which is not deemed to end a basic block.  It causes a
+    jump to a literal (original) address if the specified argument
+    is zero.
+<p>
+<li>Finally, <code>INCEIP</code> advances the simulated
+    <code>%EIP</code> by the specified literal amount.  This supports
+    lazy <code>%EIP</code> updating, as described below.
+</ul>
+
+<p>
+Stages 1 and 2 of the 6-stage translation process mentioned above
+deal purely with these uopcodes, and no others.  They are
+sufficient to express pretty much all the x86 32-bit protected-mode 
+instruction set, at
+least everything understood by a pre-MMX original Pentium (P54C). 
+
+<p>
+Stages 3, 4, 5 and 6 also deal with the following extra
+"instrumentation" uopcodes.  They are used to express all the
+definedness-tracking and -checking machinery which valgrind does.  In
+later sections we show how to create checking code for each of the
+uopcodes above.  Note that these instrumentation uopcodes, although
+some appearing complicated, have been carefully chosen so that
+efficient x86 code can be generated for them.  GNU superopt v2.5 did a
+great job helping out here.  Anyways, the uopcodes are as follows:
+
+<ul>
+<li><code>GETV</code> and <code>PUTV</code> are analogues to
+    <code>GET</code> and <code>PUT</code> above.  They are identical
+    except that they move the V bits for the specified values back and
+    forth to <code>TempRegs</code>, rather than moving the values
+    themselves.
+<p>
+<li>Similarly, <code>LOADV</code> and <code>STOREV</code> read and
+    write V bits from the synthesised shadow memory that Valgrind
+    maintains.  In fact they do more than that, since they also do
+    address-validity checks, and emit complaints if the read/written
+    addresses are unaddressible.
+<p>
+<li><code>TESTV</code>, whose parameters are a <code>TempReg</code>
+    and a size, tests the V bits in the <code>TempReg</code>, at the
+    specified operation size (0/1/2/4 byte) and emits an error if any
+    of them indicate undefinedness.  This is the only uopcode capable
+    of doing such tests.
+<p>
+<li><code>SETV</code>, whose parameters are also <code>TempReg</code>
+    and a size, makes the V bits in the <code>TempReg</code> indicated
+    definedness, at the specified operation size.  This is usually
+    used to generate the correct V bits for a literal value, which is
+    of course fully defined.
+<p>
+<li><code>GETVF</code> and <code>PUTVF</code> are analogues to
+    <code>GETF</code> and <code>PUTF</code>.  They move the single V
+    bit used to model definedness of <code>%EFLAGS</code> between its
+    home in <code>VG_(baseBlock)</code> and the specified
+    <code>TempReg</code>.
+<p>
+<li><code>TAG1</code> denotes one of a family of unary operations on
+    <code>TempReg</code>s containing V bits.  Similarly,
+    <code>TAG2</code> denotes one in a family of binary operations on
+    V bits.
+</ul>
+
+<p>
+These 10 uopcodes are sufficient to express Valgrind's entire
+definedness-checking semantics.  In fact most of the interesting magic
+is done by the <code>TAG1</code> and <code>TAG2</code>
+suboperations.
+
+<p>
+First, however, I need to explain about V-vector operation sizes.
+There are 4 sizes: 1, 2 and 4, which operate on groups of 8, 16 and 32
+V bits at a time, supporting the usual 1, 2 and 4 byte x86 operations.
+However there is also the mysterious size 0, which really means a
+single V bit.  Single V bits are used in various circumstances; in
+particular, the definedness of <code>%EFLAGS</code> is modelled with a
+single V bit.  Now might be a good time to also point out that for
+V bits, 1 means "undefined" and 0 means "defined".  Similarly, for A
+bits, 1 means "invalid address" and 0 means "valid address".  This
+seems counterintuitive (and so it is), but testing against zero on
+x86s saves instructions compared to testing against all 1s, because
+many ALU operations set the Z flag for free, so to speak.
+
+<p>
+With that in mind, the tag ops are:
+
+<ul>
+<li><b>(UNARY) Pessimising casts</b>: <code>VgT_PCast40</code>,
+    <code>VgT_PCast20</code>, <code>VgT_PCast10</code>,
+    <code>VgT_PCast01</code>, <code>VgT_PCast02</code> and
+    <code>VgT_PCast04</code>.  A "pessimising cast" takes a V-bit
+    vector at one size, and creates a new one at another size,
+    pessimised in the sense that if any of the bits in the source
+    vector indicate undefinedness, then all the bits in the result
+    indicate undefinedness.  In this case the casts are all to or from
+    a single V bit, so for example <code>VgT_PCast40</code> is a
+    pessimising cast from 32 bits to 1, whereas
+    <code>VgT_PCast04</code> simply copies the single source V bit
+    into all 32 bit positions in the result.  Surprisingly, these ops
+    can all be implemented very efficiently.
+    <p>
+    There are also the pessimising casts <code>VgT_PCast14</code>,
+    from 8 bits to 32, <code>VgT_PCast12</code>, from 8 bits to 16,
+    and <code>VgT_PCast11</code>, from 8 bits to 8.  This last one
+    seems nonsensical, but in fact it isn't a no-op because, as
+    mentioned above, any undefined (1) bits in the source infect the
+    entire result.
+<p>
+<li><b>(UNARY) Propagating undefinedness upwards in a word</b>:
+    <code>VgT_Left4</code>, <code>VgT_Left2</code> and
+    <code>VgT_Left1</code>.  These are used to simulate the worst-case
+    effects of carry propagation in adds and subtracts.  They return a
+    V vector identical to the original, except that if the original
+    contained any undefined bits, then it and all bits above it are
+    marked as undefined too.  Hence the Left bit in the names.
+<p>
+<li><b>(UNARY) Signed and unsigned value widening</b>:
+     <code>VgT_SWiden14</code>, <code>VgT_SWiden24</code>,
+     <code>VgT_SWiden12</code>, <code>VgT_ZWiden14</code>,
+     <code>VgT_ZWiden24</code> and <code>VgT_ZWiden12</code>.  These
+     mimic the definedness effects of standard signed and unsigned
+     integer widening.  Unsigned widening creates zero bits in the new
+     positions, so <code>VgT_ZWiden*</code> accordingly park mark
+     those parts of their argument as defined.  Signed widening copies
+     the sign bit into the new positions, so <code>VgT_SWiden*</code>
+     copies the definedness of the sign bit into the new positions.
+     Because 1 means undefined and 0 means defined, these operations
+     can (fascinatingly) be done by the same operations which they
+     mimic.  Go figure.
+<p>
+<li><b>(BINARY) Undefined-if-either-Undefined,
+     Defined-if-either-Defined</b>: <code>VgT_UifU4</code>,
+     <code>VgT_UifU2</code>, <code>VgT_UifU1</code>,
+     <code>VgT_UifU0</code>, <code>VgT_DifD4</code>,
+     <code>VgT_DifD2</code>, <code>VgT_DifD1</code>.  These do simple
+     bitwise operations on pairs of V-bit vectors, with
+     <code>UifU</code> giving undefined if either arg bit is
+     undefined, and <code>DifD</code> giving defined if either arg bit
+     is defined.  Abstract interpretation junkies, if any make it this
+     far, may like to think of them as meets and joins (or is it joins
+     and meets) in the definedness lattices.  
+<p>
+<li><b>(BINARY; one value, one V bits) Generate argument improvement
+    terms for AND and OR</b>: <code>VgT_ImproveAND4_TQ</code>,
+    <code>VgT_ImproveAND2_TQ</code>, <code>VgT_ImproveAND1_TQ</code>,
+    <code>VgT_ImproveOR4_TQ</code>, <code>VgT_ImproveOR2_TQ</code>,
+    <code>VgT_ImproveOR1_TQ</code>.  These help out with AND and OR
+    operations.  AND and OR have the inconvenient property that the
+    definedness of the result depends on the actual values of the
+    arguments as well as their definedness.  At the bit level:
+    <br><code>1 AND undefined = undefined</code>, but 
+    <br><code>0 AND undefined = 0</code>, and similarly 
+    <br><code>0 OR  undefined = undefined</code>, but 
+    <br><code>1 OR  undefined = 1</code>.
+    <br>
+    <p>
+    It turns out that gcc (quite legitimately) generates code which
+    relies on this fact, so we have to model it properly in order to
+    avoid flooding users with spurious value errors.  The ultimate
+    definedness result of AND and OR is calculated using
+    <code>UifU</code> on the definedness of the arguments, but we
+    also <code>DifD</code> in some "improvement" terms which 
+    take into account the above phenomena.  
+    <p>
+    <code>ImproveAND</code> takes as its first argument the actual
+    value of an argument to AND (the T) and the definedness of that
+    argument (the Q), and returns a V-bit vector which is defined (0)
+    for bits which have value 0 and are defined; this, when
+    <code>DifD</code> into the final result causes those bits to be
+    defined even if the corresponding bit in the other argument is undefined.
+    <p>
+    The <code>ImproveOR</code> ops do the dual thing for OR
+    arguments.  Note that XOR does not have this property that one
+    argument can make the other irrelevant, so there is no need for
+    such complexity for XOR.
+</ul>
+
+<p>
+That's all the tag ops.  If you stare at this long enough, and then
+run Valgrind and stare at the pre- and post-instrumented ucode, it
+should be fairly obvious how the instrumentation machinery hangs
+together.
+
+<p>
+One point, if you do this: in order to make it easy to differentiate
+<code>TempReg</code>s carrying values from <code>TempReg</code>s
+carrying V bit vectors, Valgrind prints the former as (for example)
+<code>t28</code> and the latter as <code>q28</code>; the fact that
+they carry the same number serves to indicate their relationship.
+This is purely for the convenience of the human reader; the register
+allocator and code generator don't regard them as different.
+
+
+<h3>Translation into UCode</h3>
+
+<code>VG_(disBB)</code> allocates a new <code>UCodeBlock</code> and
+then uses <code>disInstr</code> to translate x86 instructions one at a
+time into UCode, dumping the result in the <code>UCodeBlock</code>.
+This goes on until a control-flow transfer instruction is encountered.
+
+<p>
+Despite the large size of <code>vg_to_ucode.c</code>, this translation
+is really very simple.  Each x86 instruction is translated entirely
+independently of its neighbours, merrily allocating new
+<code>TempReg</code>s as it goes.  The idea is to have a simple
+translator -- in reality, no more than a macro-expander -- and the --
+resulting bad UCode translation is cleaned up by the UCode
+optimisation phase which follows.  To give you an idea of some x86
+instructions and their translations (this is a complete basic block,
+as Valgrind sees it):
+<pre>
+        0x40435A50:  incl %edx
+
+           0: GETL      %EDX, t0
+           1: INCL      t0  (-wOSZAP)
+           2: PUTL      t0, %EDX
+
+        0x40435A51:  movsbl (%edx),%eax
+
+           3: GETL      %EDX, t2
+           4: LDB       (t2), t2
+           5: WIDENL_Bs t2
+           6: PUTL      t2, %EAX
+
+        0x40435A54:  testb $0x20, 1(%ecx,%eax,2)
+
+           7: GETL      %EAX, t6
+           8: GETL      %ECX, t8
+           9: LEA2L     1(t8,t6,2), t4
+          10: LDB       (t4), t10
+          11: MOVB      $0x20, t12
+          12: ANDB      t12, t10  (-wOSZACP)
+          13: INCEIPo   $9
+
+        0x40435A59:  jnz-8 0x40435A50
+
+          14: Jnzo      $0x40435A50  (-rOSZACP)
+          15: JMPo      $0x40435A5B
+</pre>
+
+<p>
+Notice how the block always ends with an unconditional jump to the
+next block.  This is a bit unnecessary, but makes many things simpler.
+
+<p>
+Most x86 instructions turn into sequences of <code>GET</code>,
+</code>PUT</code>, <code>LEA1</code>, <code>LEA2</code>,
+<code>LOAD</code> and <code>STORE</code>.  Some complicated ones
+however rely on calling helper bits of code in 
+<code>vg_helpers.S</code>.  The ucode instructions <code>PUSH</code>,
+<code>POP</code>, <code>CALL</code>, <code>CALLM_S</code> and
+<code>CALLM_E</code> support this.  The calling convention is somewhat
+ad-hoc and is not the C calling convention.  The helper routines must 
+save all integer registers, and the flags, that they use.  Args are
+passed on the stack underneath the return address, as usual, and if 
+result(s) are to be returned, it (they) are either placed in dummy arg
+slots created by the ucode <code>PUSH</code> sequence, or just
+overwrite the incoming args.
+
+<p>
+In order that the instrumentation mechanism can handle calls to these
+helpers, <code>VG_(saneUCodeBlock)</code> enforces the following
+restrictions on calls to helpers:
+
+<ul>
+<li>Each <code>CALL</code> uinstr must be bracketed by a preceding
+    <code>CALLM_S</code> marker (dummy uinstr) and a trailing
+    <code>CALLM_E</code> marker.  These markers are used by the
+    instrumentation mechanism later to establish the boundaries of the
+    <code>PUSH</code>, <code>POP</code> and <code>CLEAR</code>
+    sequences for the call.
+<p>
+<li><code>PUSH</code>, <code>POP</code> and <code>CLEAR</code>
+    may only appear inside sections bracketed by <code>CALLM_S</code>
+    and <code>CALLM_E</code>, and nowhere else.
+<p>
+<li>In any such bracketed section, no two <code>PUSH</code> insns may
+    push the same <code>TempReg</code>.  Dually, no two two
+    <code>POP</code>s may pop the same <code>TempReg</code>.
+<p>
+<li>Finally, although this is not checked, args should be removed from
+    the stack with <code>CLEAR</code>, rather than <code>POP</code>s
+    into a <code>TempReg</code> which is not subsequently used.  This
+    is because the instrumentation mechanism assumes that all values
+    <code>POP</code>ped from the stack are actually used.
+</ul>
+
+Some of the translations may appear to have redundant
+<code>TempReg</code>-to-<code>TempReg</code> moves.  This helps the
+next phase, UCode optimisation, to generate better code.
+
+
+
+<h3>UCode optimisation</h3>
+
+UCode is then subjected to an improvement pass
+(<code>vg_improve()</code>), which blurs the boundaries between the
+translations of the original x86 instructions.  It's pretty
+straightforward.  Three transformations are done:
+
+<ul>
+<li>Redundant <code>GET</code> elimination.  Actually, more general
+    than that -- eliminates redundant fetches of ArchRegs.  In our
+    running example, uinstr 3 <code>GET</code>s <code>%EDX</code> into
+    <code>t2</code> despite the fact that, by looking at the previous
+    uinstr, it is already in <code>t0</code>.  The <code>GET</code> is
+    therefore removed, and <code>t2</code> renamed to <code>t0</code>.
+    Assuming <code>t0</code> is allocated to a host register, it means
+    the simulated <code>%EDX</code> will exist in a host CPU register
+    for more than one simulated x86 instruction, which seems to me to
+    be a highly desirable property.
+    <p>
+    There is some mucking around to do with subregisters;
+    <code>%AL</code> vs <code>%AH</code> <code>%AX</code> vs
+    <code>%EAX</code> etc.  I can't remember how it works, but in
+    general we are very conservative, and these tend to invalidate the
+    caching. 
+<p>
+<li>Redundant <code>PUT</code> elimination.  This annuls
+    <code>PUT</code>s of values back to simulated CPU registers if a
+    later <code>PUT</code> would overwrite the earlier
+    <code>PUT</code> value, and there is no intervening reads of the
+    simulated register (<code>ArchReg</code>).
+    <p>
+    As before, we are paranoid when faced with subregister references.
+    Also, <code>PUT</code>s of <code>%ESP</code> are never annulled,
+    because it is vital the instrumenter always has an up-to-date
+    <code>%ESP</code> value available, <code>%ESP</code> changes
+    affect addressibility of the memory around the simulated stack
+    pointer.
+    <p>
+    The implication of the above paragraph is that the simulated
+    machine's registers are only lazily updated once the above two
+    optimisation phases have run, with the exception of
+    <code>%ESP</code>.  <code>TempReg</code>s go dead at the end of
+    every basic block, from which is is inferrable that any
+    <code>TempReg</code> caching a simulated CPU reg is flushed (back
+    into the relevant <code>VG_(baseBlock)</code> slot) at the end of
+    every basic block.  The further implication is that the simulated
+    registers are only up-to-date at in between basic blocks, and not
+    at arbitrary points inside basic blocks.  And the consequence of
+    that is that we can only deliver signals to the client in between
+    basic blocks.  None of this seems any problem in practice.
+<p>
+<li>Finally there is a simple def-use thing for condition codes.  If
+    an earlier uinstr writes the condition codes, and the next uinsn
+    along which actually cares about the condition codes writes the
+    same or larger set of them, but does not read any, the earlier
+    uinsn is marked as not writing any condition codes.  This saves 
+    a lot of redundant cond-code saving and restoring.
+</ul>
+
+The effect of these transformations on our short block is rather
+unexciting, and shown below.  On longer basic blocks they can
+dramatically improve code quality.
+
+<pre>
+at 3: delete GET, rename t2 to t0 in (4 .. 6)
+at 7: delete GET, rename t6 to t0 in (8 .. 9)
+at 1: annul flag write OSZAP due to later OSZACP
+
+Improved code:
+           0: GETL      %EDX, t0
+           1: INCL      t0
+           2: PUTL      t0, %EDX
+           4: LDB       (t0), t0
+           5: WIDENL_Bs t0
+           6: PUTL      t0, %EAX
+           8: GETL      %ECX, t8
+           9: LEA2L     1(t8,t0,2), t4
+          10: LDB       (t4), t10
+          11: MOVB      $0x20, t12
+          12: ANDB      t12, t10  (-wOSZACP)
+          13: INCEIPo   $9
+          14: Jnzo      $0x40435A50  (-rOSZACP)
+          15: JMPo      $0x40435A5B
+</pre>
+
+<h3>UCode instrumentation</h3>
+
+Once you understand the meaning of the instrumentation uinstrs,
+discussed in detail above, the instrumentation scheme is fairly
+straighforward.  Each uinstr is instrumented in isolation, and the
+instrumentation uinstrs are placed before the original uinstr.
+Our running example continues below.  I have placed a blank line 
+after every original ucode, to make it easier to see which
+instrumentation uinstrs correspond to which originals.
+
+<p>
+As mentioned somewhere above, <code>TempReg</code>s carrying values 
+have names like <code>t28</code>, and each one has a shadow carrying
+its V bits, with names like <code>q28</code>.  This pairing aids in
+reading instrumented ucode.
+
+<p>
+One decision about all this is where to have "observation points",
+that is, where to check that V bits are valid.  I use a minimalistic
+scheme, only checking where a failure of validity could cause the 
+original program to (seg)fault.  So the use of values as memory
+addresses causes a check, as do conditional jumps (these cause a check
+on the definedness of the condition codes).  And arguments
+<code>PUSH</code>ed for helper calls are checked, hence the wierd
+restrictions on help call preambles described above.
+
+<p>
+Another decision is that once a value is tested, it is thereafter
+regarded as defined, so that we do not emit multiple undefined-value
+errors for the same undefined value.  That means that
+<code>TESTV</code> uinstrs are always followed by <code>SETV</code> 
+on the same (shadow) <code>TempReg</code>s.  Most of these
+<code>SETV</code>s are redundant and are removed by the
+post-instrumentation cleanup phase.
+
+<p>
+The instrumentation for calling helper functions deserves further
+comment.  The definedness of results from a helper is modelled using
+just one V bit.  So, in short, we do pessimising casts of the
+definedness of all the args, down to a single bit, and then
+<code>UifU</code> these bits together.  So this single V bit will say
+"undefined" if any part of any arg is undefined.  This V bit is then
+pessimally cast back up to the result(s) sizes, as needed.  If, by
+seeing that all the args are got rid of with <code>CLEAR</code> and
+none with <code>POP</code>, Valgrind sees that the result of the call
+is not actually used, it immediately examines the result V bit with a
+<code>TESTV</code> -- <code>SETV</code> pair.  If it did not do this,
+there would be no observation point to detect that the some of the
+args to the helper were undefined.  Of course, if the helper's results
+are indeed used, we don't do this, since the result usage will
+presumably cause the result definedness to be checked at some suitable
+future point.
+
+<p>
+In general Valgrind tries to track definedness on a bit-for-bit basis,
+but as the above para shows, for calls to helpers we throw in the
+towel and approximate down to a single bit.  This is because it's too
+complex and difficult to track bit-level definedness through complex
+ops such as integer multiply and divide, and in any case there is no
+reasonable code fragments which attempt to (eg) multiply two
+partially-defined values and end up with something meaningful, so
+there seems little point in modelling multiplies, divides, etc, in
+that level of detail.
+
+<p>
+Integer loads and stores are instrumented with firstly a test of the
+definedness of the address, followed by a <code>LOADV</code> or
+<code>STOREV</code> respectively.  These turn into calls to 
+(for example) <code>VG_(helperc_LOADV4)</code>.  These helpers do two
+things: they perform an address-valid check, and they load or store V
+bits from/to the relevant address in the (simulated V-bit) memory.
+
+<p>
+FPU loads and stores are different.  As above the definedness of the
+address is first tested.  However, the helper routine for FPU loads
+(<code>VGM_(fpu_read_check)</code>) emits an error if either the
+address is invalid or the referenced area contains undefined values.
+It has to do this because we do not simulate the FPU at all, and so
+cannot track definedness of values loaded into it from memory, so we
+have to check them as soon as they are loaded into the FPU, ie, at
+this point.  We notionally assume that everything in the FPU is
+defined.
+
+<p>
+It follows therefore that FPU writes first check the definedness of
+the address, then the validity of the address, and finally mark the
+written bytes as well-defined.
+
+<p>
+If anyone is inspired to extend Valgrind to MMX/SSE insns, I suggest
+you use the same trick.  It works provided that the FPU/MMX unit is
+not used to merely as a conduit to copy partially undefined data from
+one place in memory to another.  Unfortunately the integer CPU is used
+like that (when copying C structs with holes, for example) and this is
+the cause of much of the elaborateness of the instrumentation here
+described.
+
+<p>
+<code>vg_instrument()</code> in <code>vg_translate.c</code> actually
+does the instrumentation.  There are comments explaining how each
+uinstr is handled, so we do not repeat that here.  As explained
+already, it is bit-accurate, except for calls to helper functions.
+Unfortunately the x86 insns <code>bt/bts/btc/btr</code> are done by
+helper fns, so bit-level accuracy is lost there.  This should be fixed
+by doing them inline; it will probably require adding a couple new
+uinstrs.  Also, left and right rotates through the carry flag (x86
+<code>rcl</code> and <code>rcr</code>) are approximated via a single
+V bit; so far this has not caused anyone to complain.  The
+non-carry rotates, <code>rol</code> and <code>ror</code>, are much
+more common and are done exactly.  Re-visiting the instrumentation for
+AND and OR, they seem rather verbose, and I wonder if it could be done
+more concisely now.
+
+<p>
+The lowercase <code>o</code> on many of the uopcodes in the running
+example indicates that the size field is zero, usually meaning a
+single-bit operation.
+
+<p>
+Anyroads, the post-instrumented version of our running example looks
+like this:
+
+<pre>
+Instrumented code:
+           0: GETVL     %EDX, q0
+           1: GETL      %EDX, t0
+
+           2: TAG1o     q0 = Left4 ( q0 )
+           3: INCL      t0
+
+           4: PUTVL     q0, %EDX
+           5: PUTL      t0, %EDX
+
+           6: TESTVL    q0
+           7: SETVL     q0
+           8: LOADVB    (t0), q0
+           9: LDB       (t0), t0
+
+          10: TAG1o     q0 = SWiden14 ( q0 )
+          11: WIDENL_Bs t0
+
+          12: PUTVL     q0, %EAX
+          13: PUTL      t0, %EAX
+
+          14: GETVL     %ECX, q8
+          15: GETL      %ECX, t8
+
+          16: MOVL      q0, q4
+          17: SHLL      $0x1, q4
+          18: TAG2o     q4 = UifU4 ( q8, q4 )
+          19: TAG1o     q4 = Left4 ( q4 )
+          20: LEA2L     1(t8,t0,2), t4
+
+          21: TESTVL    q4
+          22: SETVL     q4
+          23: LOADVB    (t4), q10
+          24: LDB       (t4), t10
+
+          25: SETVB     q12
+          26: MOVB      $0x20, t12
+
+          27: MOVL      q10, q14
+          28: TAG2o     q14 = ImproveAND1_TQ ( t10, q14 )
+          29: TAG2o     q10 = UifU1 ( q12, q10 )
+          30: TAG2o     q10 = DifD1 ( q14, q10 )
+          31: MOVL      q12, q14
+          32: TAG2o     q14 = ImproveAND1_TQ ( t12, q14 )
+          33: TAG2o     q10 = DifD1 ( q14, q10 )
+          34: MOVL      q10, q16
+          35: TAG1o     q16 = PCast10 ( q16 )
+          36: PUTVFo    q16
+          37: ANDB      t12, t10  (-wOSZACP)
+
+          38: INCEIPo   $9
+
+          39: GETVFo    q18
+          40: TESTVo    q18
+          41: SETVo     q18
+          42: Jnzo      $0x40435A50  (-rOSZACP)
+
+          43: JMPo      $0x40435A5B
+</pre>
+
+
+<h3>UCode post-instrumentation cleanup</h3>
+
+<p>
+This pass, coordinated by <code>vg_cleanup()</code>, removes redundant
+definedness computation created by the simplistic instrumentation
+pass.  It consists of two passes,
+<code>vg_propagate_definedness()</code> followed by
+<code>vg_delete_redundant_SETVs</code>.
+
+<p>
+<code>vg_propagate_definedness()</code> is a simple
+constant-propagation and constant-folding pass.  It tries to determine
+which <code>TempReg</code>s containing V bits will always indicate
+"fully defined", and it propagates this information as far as it can,
+and folds out as many operations as possible.  For example, the
+instrumentation for an ADD of a literal to a variable quantity will be
+reduced down so that the definedness of the result is simply the
+definedness of the variable quantity, since the literal is by
+definition fully defined.
+
+<p>
+<code>vg_delete_redundant_SETVs</code> removes <code>SETV</code>s on
+shadow <code>TempReg</code>s for which the next action is a write.
+I don't think there's anything else worth saying about this; it is
+simple.  Read the sources for details.
+
+<p>
+So the cleaned-up running example looks like this.  As above, I have
+inserted line breaks after every original (non-instrumentation) uinstr
+to aid readability.  As with straightforward ucode optimisation, the
+results in this block are undramatic because it is so short; longer
+blocks benefit more because they have more redundancy which gets
+eliminated.
+
+
+<pre>
+at 29: delete UifU1 due to defd arg1
+at 32: change ImproveAND1_TQ to MOV due to defd arg2
+at 41: delete SETV
+at 31: delete MOV
+at 25: delete SETV
+at 22: delete SETV
+at 7: delete SETV
+
+           0: GETVL     %EDX, q0
+           1: GETL      %EDX, t0
+
+           2: TAG1o     q0 = Left4 ( q0 )
+           3: INCL      t0
+
+           4: PUTVL     q0, %EDX
+           5: PUTL      t0, %EDX
+
+           6: TESTVL    q0
+           8: LOADVB    (t0), q0
+           9: LDB       (t0), t0
+
+          10: TAG1o     q0 = SWiden14 ( q0 )
+          11: WIDENL_Bs t0
+
+          12: PUTVL     q0, %EAX
+          13: PUTL      t0, %EAX
+
+          14: GETVL     %ECX, q8
+          15: GETL      %ECX, t8
+
+          16: MOVL      q0, q4
+          17: SHLL      $0x1, q4
+          18: TAG2o     q4 = UifU4 ( q8, q4 )
+          19: TAG1o     q4 = Left4 ( q4 )
+          20: LEA2L     1(t8,t0,2), t4
+
+          21: TESTVL    q4
+          23: LOADVB    (t4), q10
+          24: LDB       (t4), t10
+
+          26: MOVB      $0x20, t12
+
+          27: MOVL      q10, q14
+          28: TAG2o     q14 = ImproveAND1_TQ ( t10, q14 )
+          30: TAG2o     q10 = DifD1 ( q14, q10 )
+          32: MOVL      t12, q14
+          33: TAG2o     q10 = DifD1 ( q14, q10 )
+          34: MOVL      q10, q16
+          35: TAG1o     q16 = PCast10 ( q16 )
+          36: PUTVFo    q16
+          37: ANDB      t12, t10  (-wOSZACP)
+
+          38: INCEIPo   $9
+          39: GETVFo    q18
+          40: TESTVo    q18
+          42: Jnzo      $0x40435A50  (-rOSZACP)
+
+          43: JMPo      $0x40435A5B
+</pre>
+
+
+<h3>Translation from UCode</h3>
+
+This is all very simple, even though <code>vg_from_ucode.c</code>
+is a big file.  Position-independent x86 code is generated into 
+a dynamically allocated array <code>emitted_code</code>; this is
+doubled in size when it overflows.  Eventually the array is handed
+back to the caller of <code>VG_(translate)</code>, who must copy
+the result into TC and TT, and free the array.
+
+<p>
+This file is structured into four layers of abstraction, which,
+thankfully, are glued back together with extensive
+<code>__inline__</code> directives.  From the bottom upwards:
+
+<ul>
+<li>Address-mode emitters, <code>emit_amode_regmem_reg</code> et al.
+<p>
+<li>Emitters for specific x86 instructions.  There are quite a lot of
+    these, with names such as <code>emit_movv_offregmem_reg</code>.
+    The <code>v</code> suffix is Intel parlance for a 16/32 bit insn;
+    there are also <code>b</code> suffixes for 8 bit insns.
+<p>
+<li>The next level up are the <code>synth_*</code> functions, which
+    synthesise possibly a sequence of raw x86 instructions to do some
+    simple task.  Some of these are quite complex because they have to
+    work around Intel's silly restrictions on subregister naming.  See 
+    <code>synth_nonshiftop_reg_reg</code> for example.
+<p>
+<li>Finally, at the top of the heap, we have
+    <code>emitUInstr()</code>,
+    which emits code for a single uinstr.
+</ul>
+
+<p>
+Some comments:
+<ul>
+<li>The hack for FPU instructions becomes apparent here.  To do a
+    <code>FPU</code> ucode instruction, we load the simulated FPU's
+    state into from its <code>VG_(baseBlock)</code> into the real FPU
+    using an x86 <code>frstor</code> insn, do the ucode
+    <code>FPU</code> insn on the real CPU, and write the updated FPU
+    state back into <code>VG_(baseBlock)</code> using an
+    <code>fnsave</code> instruction.  This is pretty brutal, but is
+    simple and it works, and even seems tolerably efficient.  There is
+    no attempt to cache the simulated FPU state in the real FPU over
+    multiple back-to-back ucode FPU instructions.
+    <p>
+    <code>FPU_R</code> and <code>FPU_W</code> are also done this way,
+    with the minor complication that we need to patch in some
+    addressing mode bits so the resulting insn knows the effective
+    address to use.  This is easy because of the regularity of the x86
+    FPU instruction encodings.
+<p>
+<li>An analogous trick is done with ucode insns which claim, in their
+    <code>flags_r</code> and <code>flags_w</code> fields, that they
+    read or write the simulated <code>%EFLAGS</code>.  For such cases
+    we first copy the simulated <code>%EFLAGS</code> into the real
+    <code>%eflags</code>, then do the insn, then, if the insn says it
+    writes the flags, copy back to <code>%EFLAGS</code>.  This is a
+    bit expensive, which is why the ucode optimisation pass goes to
+    some effort to remove redundant flag-update annotations.
+</ul>
+
+<p>
+And so ... that's the end of the documentation for the instrumentating
+translator!  It's really not that complex, because it's composed as a
+sequence of simple(ish) self-contained transformations on
+straight-line blocks of code.
+
+
+<h3>Top-level dispatch loop</h3>
+
+Urk.  In <code>VG_(toploop)</code>.  This is basically boring and
+unsurprising, not to mention fiddly and fragile.  It needs to be
+cleaned up.  
+
+<p>
+The only perhaps surprise is that the whole thing is run
+on top of a <code>setjmp</code>-installed exception handler, because,
+supposing a translation got a segfault, we have to bail out of the
+Valgrind-supplied exception handler <code>VG_(oursignalhandler)</code>
+and immediately start running the client's segfault handler, if it has
+one.  In particular we can't finish the current basic block and then
+deliver the signal at some convenient future point, because signals
+like SIGILL, SIGSEGV and SIGBUS mean that the faulting insn should not
+simply be re-tried.  (I'm sure there is a clearer way to explain this).
+
+
+<h3>Exceptions, creating new translations</h3>
+<h3>Self-modifying code</h3>
+
+<h3>Lazy updates of the simulated program counter</h3>
+
+Simulated <code>%EIP</code> is not updated after every simulated x86
+insn as this was regarded as too expensive.  Instead ucode
+<code>INCEIP</code> insns move it along as and when necessary.
+Currently we don't allow it to fall more than 4 bytes behind reality
+(see <code>VG_(disBB)</code> for the way this works).
+<p>
+Note that <code>%EIP</code> is always brought up to date by the inner
+dispatch loop in <code>VG_(dispatch)</code>, so that if the client
+takes a fault we know at least which basic block this happened in.
+
+
+<h3>The translation cache and translation table</h3>
+
+<h3>Signals</h3>
+
+Horrible, horrible.  <code>vg_signals.c</code>.
+Basically, since we have to intercept all system
+calls anyway, we can see when the client tries to install a signal
+handler.  If it does so, we make a note of what the client asked to
+happen, and ask the kernel to route the signal to our own signal
+handler, <code>VG_(oursignalhandler)</code>.  This simply notes the
+delivery of signals, and returns.  
+
+<p>
+Every 1000 basic blocks, we see if more signals have arrived.  If so,
+<code>VG_(deliver_signals)</code> builds signal delivery frames on the
+client's stack, and allows their handlers to be run.  Valgrind places
+in these signal delivery frames a bogus return address,
+</code>VG_(signalreturn_bogusRA)</code>, and checks all jumps to see
+if any jump to it.  If so, this is a sign that a signal handler is
+returning, and if so Valgrind removes the relevant signal frame from
+the client's stack, restores the from the signal frame the simulated
+state before the signal was delivered, and allows the client to run
+onwards.  We have to do it this way because some signal handlers never
+return, they just <code>longjmp()</code>, which nukes the signal
+delivery frame.
+
+<p>
+The Linux kernel has a different but equally horrible hack for
+detecting signal handler returns.  Discovering it is left as an
+exercise for the reader.
+
+
+
+<h3>Errors, error contexts, error reporting, suppressions</h3>
+<h3>Client malloc/free</h3>
+<h3>Low-level memory management</h3>
+<h3>A and V bitmaps</h3>
+<h3>Symbol table management</h3>
+<h3>Dealing with system calls</h3>
+<h3>Namespace management</h3>
+<h3>GDB attaching</h3>
+<h3>Non-dependence on glibc or anything else</h3>
+<h3>The leak detector</h3>
+<h3>Performance problems</h3>
+<h3>Continuous sanity checking</h3>
+<h3>Tracing, or not tracing, child processes</h3>
+<h3>Assembly glue for syscalls</h3>
+
+
+<hr width="100%">
+
+<h2>Extensions</h2>
+
+Some comments about Stuff To Do.
+
+<h3>Bugs</h3>
+
+Stephan Kulow and Marc Mutz report problems with kmail in KDE 3 CVS
+(RC2 ish) when run on Valgrind.  Stephan has it deadlocking; Marc has
+it looping at startup.  I can't repro either behaviour. Needs
+repro-ing and fixing.
+
+
+<h3>Threads</h3>
+
+Doing a good job of thread support strikes me as almost a
+research-level problem.  The central issues are how to do fast cheap
+locking of the <code>VG_(primary_map)</code> structure, whether or not
+accesses to the individual secondary maps need locking, what
+race-condition issues result, and whether the already-nasty mess that
+is the signal simulator needs further hackery.
+
+<p>
+I realise that threads are the most-frequently-requested feature, and
+I am thinking about it all.  If you have guru-level understanding of 
+fast mutual exclusion mechanisms and race conditions, I would be
+interested in hearing from you.
+
+
+<h3>Verification suite</h3>
+
+Directory <code>tests/</code> contains various ad-hoc tests for
+Valgrind.  However, there is no systematic verification or regression
+suite, that, for example, exercises all the stuff in
+<code>vg_memory.c</code>, to ensure that illegal memory accesses and
+undefined value uses are detected as they should be.  It would be good
+to have such a suite.
+
+
+<h3>Porting to other platforms</h3>
+
+It would be great if Valgrind was ported to FreeBSD and x86 NetBSD,
+and to x86 OpenBSD, if it's possible (doesn't OpenBSD use a.out-style
+executables, not ELF ?)
+
+<p>
+The main difficulties, for an x86-ELF platform, seem to be:
+
+<ul>
+<li>You'd need to rewrite the <code>/proc/self/maps</code> parser
+    (<code>vg_procselfmaps.c</code>).
+    Easy.
+<p>
+<li>You'd need to rewrite <code>vg_syscall_mem.c</code>, or, more
+    specifically, provide one for your OS.  This is tedious, but you
+    can implement syscalls on demand, and the Linux kernel interface
+    is, for the most part, going to look very similar to the *BSD
+    interfaces, so it's really a copy-paste-and-modify-on-demand job.
+    As part of this, you'd need to supply a new
+    <code>vg_kerneliface.h</code> file.
+<p>
+<li>You'd also need to change the syscall wrappers for Valgrind's
+    internal use, in <code>vg_mylibc.c</code>.
+</ul>
+
+All in all, I think a port to x86-ELF *BSDs is not really very
+difficult, and in some ways I would like to see it happen, because
+that would force a more clear factoring of Valgrind into platform
+dependent and independent pieces.  Not to mention, *BSD folks also
+deserve to use Valgrind just as much as the Linux crew do.
+
+
+<p>
+<hr width="100%">
+
+<h2>Easy stuff which ought to be done</h2>
+
+<h3>MMX instructions</h3>
+
+MMX insns should be supported, using the same trick as for FPU insns.
+If the MMX registers are not used to copy uninitialised junk from one
+place to another in memory, this means we don't have to actually
+simulate the internal MMX unit state, so the FPU hack applies.  This
+should be fairly easy.
+
+
+
+<h3>Fix stabs-info reader</h3>
+
+The machinery in <code>vg_symtab2.c</code> which reads "stabs" style
+debugging info is pretty weak.  It usually correctly translates 
+simulated program counter values into line numbers and procedure
+names, but the file name is often completely wrong.  I think the
+logic used to parse "stabs" entries is weak.  It should be fixed.
+The simplest solution, IMO, is to copy either the logic or simply the
+code out of GNU binutils which does this; since GDB can clearly get it
+right, binutils (or GDB?) must have code to do this somewhere.
+
+
+
+
+
+<h3>BT/BTC/BTS/BTR</h3>
+
+These are x86 instructions which test, complement, set, or reset, a
+single bit in a word.  At the moment they are both incorrectly
+implemented and incorrectly instrumented.
+
+<p>
+The incorrect instrumentation is due to use of helper functions.  This
+means we lose bit-level definedness tracking, which could wind up
+giving spurious uninitialised-value use errors.  The Right Thing to do
+is to invent a couple of new UOpcodes, I think <code>GET_BIT</code>
+and <code>SET_BIT</code>, which can be used to implement all 4 x86
+insns, get rid of the helpers, and give bit-accurate instrumentation
+rules for the two new UOpcodes.
+
+<p>
+I realised the other day that they are mis-implemented too.  The x86
+insns take a bit-index and a register or memory location to access.
+For registers the bit index clearly can only be in the range zero to
+register-width minus 1, and I assumed the same applied to memory
+locations too.  But evidently not; for memory locations the index can
+be arbitrary, and the processor will index arbitrarily into memory as
+a result.  This too should be fixed.  Sigh.  Presumably indexing
+outside the immediate word is not actually used by any programs yet
+tested on Valgrind, for otherwise they (presumably) would simply not
+work at all.  If you plan to hack on this, first check the Intel docs
+to make sure my understanding is really correct.
+
+
+
+<h3>Using PREFETCH instructions</h3>
+
+Here's a small but potentially interesting project for performance
+junkies.  Experiments with valgrind's code generator and optimiser(s)
+suggest that reducing the number of instructions executed in the
+translations and mem-check helpers gives disappointingly small
+performance improvements.  Perhaps this is because performance of
+Valgrindified code is limited by cache misses.  After all, each read
+in the original program now gives rise to at least three reads, one
+for the <code>VG_(primary_map)</code>, one of the resulting
+secondary, and the original.  Not to mention, the instrumented
+translations are 13 to 14 times larger than the originals.  All in all
+one would expect the memory system to be hammered to hell and then
+some.
+
+<p>
+So here's an idea.  An x86 insn involving a read from memory, after
+instrumentation, will turn into ucode of the following form:
+<pre>
+    ... calculate effective addr, into ta and qa ...
+    TESTVL qa             -- is the addr defined?
+    LOADV (ta), qloaded   -- fetch V bits for the addr
+    LOAD  (ta), tloaded   -- do the original load
+</pre>
+At the point where the <code>LOADV</code> is done, we know the actual
+address (<code>ta</code>) from which the real <code>LOAD</code> will
+be done.  We also know that the <code>LOADV</code> will take around
+20 x86 insns to do.  So it seems plausible that doing a prefetch of
+<code>ta</code> just before the <code>LOADV</code> might just avoid a
+miss at the <code>LOAD</code> point, and that might be a significant
+performance win.
+
+<p>
+Prefetch insns are notoriously tempermental, more often than not
+making things worse rather than better, so this would require
+considerable fiddling around.  It's complicated because Intels and
+AMDs have different prefetch insns with different semantics, so that
+too needs to be taken into account.  As a general rule, even placing
+the prefetches before the <code>LOADV</code> insn is too near the
+<code>LOAD</code>; the ideal distance is apparently circa 200 CPU
+cycles.  So it might be worth having another analysis/transformation
+pass which pushes prefetches as far back as possible, hopefully 
+immediately after the effective address becomes available.
+
+<p>
+Doing too many prefetches is also bad because they soak up bus
+bandwidth / cpu resources, so some cleverness in deciding which loads
+to prefetch and which to not might be helpful.  One can imagine not
+prefetching client-stack-relative (<code>%EBP</code> or
+<code>%ESP</code>) accesses, since the stack in general tends to show
+good locality anyway.
+
+<p>
+There's quite a lot of experimentation to do here, but I think it
+might make an interesting week's work for someone.
+
+<p>
+As of 15-ish March 2002, I've started to experiment with this, using
+the AMD <code>prefetch/prefetchw</code> insns.
+
+
+
+<h3>User-defined permission ranges</h3>
+
+This is quite a large project -- perhaps a month's hacking for a
+capable hacker to do a good job -- but it's potentially very
+interesting.  The outcome would be that Valgrind could detect a 
+whole class of bugs which it currently cannot.
+
+<p>
+The presentation falls into two pieces.
+
+<p>
+<b>Part 1: user-defined address-range permission setting</b>
+<p>
+
+Valgrind intercepts the client's <code>malloc</code>,
+<code>free</code>, etc calls, watches system calls, and watches the
+stack pointer move.  This is currently the only way it knows about
+which addresses are valid and which not.  Sometimes the client program
+knows extra information about its memory areas.  For example, the
+client could at some point know that all elements of an array are
+out-of-date.  We would like to be able to convey to Valgrind this
+information that the array is now addressable-but-uninitialised, so
+that Valgrind can then warn if elements are used before they get new
+values. 
+
+<p>
+What I would like are some macros like this:
+<pre>
+   VALGRIND_MAKE_NOACCESS(addr, len)
+   VALGRIND_MAKE_WRITABLE(addr, len)
+   VALGRIND_MAKE_READABLE(addr, len)
+</pre>
+and also, to check that memory is addressible/initialised,
+<pre>
+   VALGRIND_CHECK_ADDRESSIBLE(addr, len)
+   VALGRIND_CHECK_INITIALISED(addr, len)
+</pre>
+
+<p>
+I then include in my sources a header defining these macros, rebuild
+my app, run under Valgrind, and get user-defined checks.
+
+<p>
+Now here's a neat trick.  It's a nuisance to have to re-link the app
+with some new library which implements the above macros.  So the idea
+is to define the macros so that the resulting executable is still
+completely stand-alone, and can be run without Valgrind, in which case
+the macros do nothing, but when run on Valgrind, the Right Thing
+happens.  How to do this?  The idea is for these macros to turn into a
+piece of inline assembly code, which (1) has no effect when run on the
+real CPU, (2) is easily spotted by Valgrind's JITter, and (3) no sane
+person would ever write, which is important for avoiding false matches
+in (2).  So here's a suggestion:
+<pre>
+   VALGRIND_MAKE_NOACCESS(addr, len)
+</pre>
+becomes (roughly speaking)
+<pre>
+   movl addr, %eax
+   movl len,  %ebx
+   movl $1,   %ecx   -- 1 describes the action; MAKE_WRITABLE might be
+                     -- 2, etc
+   rorl $13, %ecx
+   rorl $19, %ecx
+   rorl $11, %eax
+   rorl $21, %eax
+</pre>
+The rotate sequences have no effect, and it's unlikely they would
+appear for any other reason, but they define a unique byte-sequence
+which the JITter can easily spot.  Using the operand constraints
+section at the end of a gcc inline-assembly statement, we can tell gcc
+that the assembly fragment kills <code>%eax</code>, <code>%ebx</code>,
+<code>%ecx</code> and the condition codes, so this fragment is made
+harmless when not running on Valgrind, runs quickly when not on
+Valgrind, and does not require any other library support.
+
+
+<p>
+<b>Part 2: using it to detect interference between stack variables</b>
+<p>
+
+Currently Valgrind cannot detect errors of the following form:
+<pre>
+void fooble ( void )
+{
+   int a[10];
+   int b[10];
+   a[10] = 99;
+}
+</pre>
+Now imagine rewriting this as
+<pre>
+void fooble ( void )
+{
+   int spacer0;
+   int a[10];
+   int spacer1;
+   int b[10];
+   int spacer2;
+   VALGRIND_MAKE_NOACCESS(&spacer0, sizeof(int));
+   VALGRIND_MAKE_NOACCESS(&spacer1, sizeof(int));
+   VALGRIND_MAKE_NOACCESS(&spacer2, sizeof(int));
+   a[10] = 99;
+}
+</pre>
+Now the invalid write is certain to hit <code>spacer0</code> or
+<code>spacer1</code>, so Valgrind will spot the error.
+
+<p>
+There are two complications.
+
+<p>
+The first is that we don't want to annotate sources by hand, so the
+Right Thing to do is to write a C/C++ parser, annotator, prettyprinter
+which does this automatically, and run it on post-CPP'd C/C++ source.
+See http://www.cacheprof.org for an example of a system which
+transparently inserts another phase into the gcc/g++ compilation
+route.  The parser/prettyprinter is probably not as hard as it sounds;
+I would write it in Haskell, a powerful functional language well
+suited to doing symbolic computation, with which I am intimately
+familar.  There is already a C parser written in Haskell by someone in
+the Haskell community, and that would probably be a good starting
+point.
+
+<p>
+The second complication is how to get rid of these
+<code>NOACCESS</code> records inside Valgrind when the instrumented
+function exits; after all, these refer to stack addresses and will
+make no sense whatever when some other function happens to re-use the
+same stack address range, probably shortly afterwards.  I think I
+would be inclined to define a special stack-specific macro
+<pre>
+   VALGRIND_MAKE_NOACCESS_STACK(addr, len)
+</pre>
+which causes Valgrind to record the client's <code>%ESP</code> at the
+time it is executed.  Valgrind will then watch for changes in
+<code>%ESP</code> and discard such records as soon as the protected
+area is uncovered by an increase in <code>%ESP</code>.  I hesitate
+with this scheme only because it is potentially expensive, if there
+are hundreds of such records, and considering that changes in
+<code>%ESP</code> already require expensive messing with stack access
+permissions.
+
+<p>
+This is probably easier and more robust than for the instrumenter 
+program to try and spot all exit points for the procedure and place
+suitable deallocation annotations there.  Plus C++ procedures can 
+bomb out at any point if they get an exception, so spotting return
+points at the source level just won't work at all.
+
+<p>
+Although some work, it's all eminently doable, and it would make
+Valgrind into an even-more-useful tool.
+
+<p>
+Update: as of 17 March 2002, this (these hooks) are done.
+
+
+<p>
+</body>
+</html>
diff --git a/missing b/missing
new file mode 100755
index 000000000..7789652e8
--- /dev/null
+++ b/missing
@@ -0,0 +1,190 @@
+#! /bin/sh
+# Common stub for a few missing GNU programs while installing.
+# Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+# Franc,ois Pinard <pinard@iro.umontreal.ca>, 1996.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+if test $# -eq 0; then
+  echo 1>&2 "Try \`$0 --help' for more information"
+  exit 1
+fi
+
+case "$1" in
+
+  -h|--h|--he|--hel|--help)
+    echo "\
+$0 [OPTION]... PROGRAM [ARGUMENT]...
+
+Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
+error status if there is no known handling for PROGRAM.
+
+Options:
+  -h, --help      display this help and exit
+  -v, --version   output version information and exit
+
+Supported PROGRAM values:
+  aclocal      touch file \`aclocal.m4'
+  autoconf     touch file \`configure'
+  autoheader   touch file \`config.h.in'
+  automake     touch all \`Makefile.in' files
+  bison        create \`y.tab.[ch]', if possible, from existing .[ch]
+  flex         create \`lex.yy.c', if possible, from existing .c
+  lex          create \`lex.yy.c', if possible, from existing .c
+  makeinfo     touch the output file
+  yacc         create \`y.tab.[ch]', if possible, from existing .[ch]"
+    ;;
+
+  -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
+    echo "missing - GNU libit 0.0"
+    ;;
+
+  -*)
+    echo 1>&2 "$0: Unknown \`$1' option"
+    echo 1>&2 "Try \`$0 --help' for more information"
+    exit 1
+    ;;
+
+  aclocal)
+    echo 1>&2 "\
+WARNING: \`$1' is missing on your system.  You should only need it if
+         you modified \`acinclude.m4' or \`configure.in'.  You might want
+         to install the \`Automake' and \`Perl' packages.  Grab them from
+         any GNU archive site."
+    touch aclocal.m4
+    ;;
+
+  autoconf)
+    echo 1>&2 "\
+WARNING: \`$1' is missing on your system.  You should only need it if
+         you modified \`configure.in'.  You might want to install the
+         \`Autoconf' and \`GNU m4' packages.  Grab them from any GNU
+         archive site."
+    touch configure
+    ;;
+
+  autoheader)
+    echo 1>&2 "\
+WARNING: \`$1' is missing on your system.  You should only need it if
+         you modified \`acconfig.h' or \`configure.in'.  You might want
+         to install the \`Autoconf' and \`GNU m4' packages.  Grab them
+         from any GNU archive site."
+    files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' configure.in`
+    test -z "$files" && files="config.h"
+    touch_files=
+    for f in $files; do
+      case "$f" in
+      *:*) touch_files="$touch_files "`echo "$f" |
+				       sed -e 's/^[^:]*://' -e 's/:.*//'`;;
+      *) touch_files="$touch_files $f.in";;
+      esac
+    done
+    touch $touch_files
+    ;;
+
+  automake)
+    echo 1>&2 "\
+WARNING: \`$1' is missing on your system.  You should only need it if
+         you modified \`Makefile.am', \`acinclude.m4' or \`configure.in'.
+         You might want to install the \`Automake' and \`Perl' packages.
+         Grab them from any GNU archive site."
+    find . -type f -name Makefile.am -print |
+	   sed 's/\.am$/.in/' |
+	   while read f; do touch "$f"; done
+    ;;
+
+  bison|yacc)
+    echo 1>&2 "\
+WARNING: \`$1' is missing on your system.  You should only need it if
+         you modified a \`.y' file.  You may need the \`Bison' package
+         in order for those modifications to take effect.  You can get
+         \`Bison' from any GNU archive site."
+    rm -f y.tab.c y.tab.h
+    if [ $# -ne 1 ]; then
+        eval LASTARG="\${$#}"
+	case "$LASTARG" in
+	*.y)
+	    SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
+	    if [ -f "$SRCFILE" ]; then
+	         cp "$SRCFILE" y.tab.c
+	    fi
+	    SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
+	    if [ -f "$SRCFILE" ]; then
+	         cp "$SRCFILE" y.tab.h
+	    fi
+	  ;;
+	esac
+    fi
+    if [ ! -f y.tab.h ]; then
+	echo >y.tab.h
+    fi
+    if [ ! -f y.tab.c ]; then
+	echo 'main() { return 0; }' >y.tab.c
+    fi
+    ;;
+
+  lex|flex)
+    echo 1>&2 "\
+WARNING: \`$1' is missing on your system.  You should only need it if
+         you modified a \`.l' file.  You may need the \`Flex' package
+         in order for those modifications to take effect.  You can get
+         \`Flex' from any GNU archive site."
+    rm -f lex.yy.c
+    if [ $# -ne 1 ]; then
+        eval LASTARG="\${$#}"
+	case "$LASTARG" in
+	*.l)
+	    SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
+	    if [ -f "$SRCFILE" ]; then
+	         cp "$SRCFILE" lex.yy.c
+	    fi
+	  ;;
+	esac
+    fi
+    if [ ! -f lex.yy.c ]; then
+	echo 'main() { return 0; }' >lex.yy.c
+    fi
+    ;;
+
+  makeinfo)
+    echo 1>&2 "\
+WARNING: \`$1' is missing on your system.  You should only need it if
+         you modified a \`.texi' or \`.texinfo' file, or any other file
+         indirectly affecting the aspect of the manual.  The spurious
+         call might also be the consequence of using a buggy \`make' (AIX,
+         DU, IRIX).  You might want to install the \`Texinfo' package or
+         the \`GNU make' package.  Grab either from any GNU archive site."
+    file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'`
+    if test -z "$file"; then
+      file=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
+      file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $file`
+    fi
+    touch $file
+    ;;
+
+  *)
+    echo 1>&2 "\
+WARNING: \`$1' is needed, and you do not seem to have it handy on your
+         system.  You might have modified some files without having the
+         proper tools for further handling them.  Check the \`README' file,
+         it often tells you about the needed prerequirements for installing
+         this package.  You may also peek at any GNU archive site, in case
+         some other package would contain this missing \`$1' program."
+    exit 1
+    ;;
+esac
+
+exit 0
diff --git a/mkinstalldirs b/mkinstalldirs
new file mode 100755
index 000000000..0fdc11546
--- /dev/null
+++ b/mkinstalldirs
@@ -0,0 +1,40 @@
+#! /bin/sh
+# mkinstalldirs --- make directory hierarchy
+# Author: Noah Friedman <friedman@prep.ai.mit.edu>
+# Created: 1993-05-16
+# Public domain
+
+# $Id: mkinstalldirs,v 1.1 2002/03/22 01:28:22 sewardj Exp $
+
+errstatus=0
+
+for file
+do
+   set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'`
+   shift
+
+   pathcomp=
+   for d
+   do
+     pathcomp="$pathcomp$d"
+     case "$pathcomp" in
+       -* ) pathcomp=./$pathcomp ;;
+     esac
+
+     if test ! -d "$pathcomp"; then
+        echo "mkdir $pathcomp"
+
+        mkdir "$pathcomp" || lasterr=$?
+
+        if test ! -d "$pathcomp"; then
+  	  errstatus=$lasterr
+        fi
+     fi
+
+     pathcomp="$pathcomp/"
+   done
+done
+
+exit $errstatus
+
+# mkinstalldirs ends here
diff --git a/none/Makefile.am b/none/Makefile.am
new file mode 100644
index 000000000..00387927b
--- /dev/null
+++ b/none/Makefile.am
@@ -0,0 +1,80 @@
+SUBDIRS = demangle . docs tests
+
+valdir = $(libdir)/valgrind
+
+LDFLAGS = -Wl,-z -Wl,initfirst
+
+INCLUDES += -I$(srcdir)/demangle
+
+bin_SCRIPTS = valgrind
+
+val_DATA = linux22.supp linux24.supp
+
+EXTRA_DIST = $(val_DATA) \
+	PATCHES_APPLIED ACKNOWLEDGEMENTS \
+	README_KDE3_FOLKS \
+	README_MISSING_SYSCALL_OR_IOCTL TODO
+
+val_PROGRAMS = valgrind.so valgrinq.so
+
+valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+
+valgrind_so_SOURCES = \
+	vg_clientmalloc.c \
+	vg_clientperms.c \
+	vg_demangle.c \
+	vg_dispatch.S \
+	vg_errcontext.c \
+	vg_execontext.c \
+	vg_from_ucode.c \
+	vg_helpers.S \
+	vg_main.c \
+	vg_malloc2.c \
+	vg_memory.c \
+	vg_messages.c \
+	vg_mylibc.c \
+	vg_procselfmaps.c \
+	vg_profile.c \
+	vg_signals.c \
+	vg_startup.S \
+	vg_symtab2.c \
+	vg_syscall_mem.c \
+	vg_syscall.S \
+	vg_to_ucode.c \
+	vg_translate.c \
+	vg_transtab.c \
+	vg_valgrinq_dummy.c \
+	vg_vtagops.c
+
+valgrind_so_LDADD = \
+	demangle/cp-demangle.o \
+	demangle/cplus-dem.o \
+	demangle/dyn-string.o \
+	demangle/safe-ctype.o
+
+include_HEADERS = valgrind.h
+
+noinst_HEADERS = \
+        vg_kerneliface.h        \
+        vg_include.h            \
+        vg_version.h            \
+        vg_constants.h          \
+        vg_unsafe.h
+
+
+install-data-hook:
+	cd ${valdir} && rm -f default.supp && $(LN_S) $(DEFAULT_SUPP) default.supp
+
+vg_memory.o:
+	$(COMPILE) -O2 -mpreferred-stack-boundary=2 -c $<
+
+vg_clientmalloc.o:
+	$(COMPILE) -fno-omit-frame-pointer -c $<
+
+
+valgrind.so: $(valgrind_so_OBJECTS)
+	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+	$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+
+valgrinq.so: $(valgrinq_so_OBJECTS)
+	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
diff --git a/stamp-h.in b/stamp-h.in
new file mode 100644
index 000000000..9788f7023
--- /dev/null
+++ b/stamp-h.in
@@ -0,0 +1 @@
+timestamp
diff --git a/tests/Makefile.am b/tests/Makefile.am
new file mode 100644
index 000000000..d12254858
--- /dev/null
+++ b/tests/Makefile.am
@@ -0,0 +1,26 @@
+EXTRA_DIST = \
+	badaddrvalue.c badjump.c \
+	badloop.c bitfield1.c \
+	blocked_syscall.c clientperm.c \
+	clientstackperm.c coolo_sigaction.cpp \
+	coolo_strlen.c coolo_strlen.s \
+	cpuid_c.c cpuid_s.s \
+	doublefree.c errs1.c \
+	exitprog.c floored.c \
+	fprw.c fwrite.c \
+	inline.c inlineh.c \
+	inlineh.h malloc1.c \
+	malloc2.c manuel1.c \
+	manuel2.c manuel3.c \
+	memalign_test.c memcmptest.c \
+	memtests.cpp mmaptest.c \
+	oneparam.c pushfpopf_c.c \
+	pushfpopf.s rcl_assert.s \
+	rcrl.c readline1.c \
+	realloc1.c sha1.test.c \
+	shortpush.c shorts.c \
+	signal1.c signal2.c \
+	signal3.c smc1.c \
+	suppfree.c tronical.c \
+	tronical.s twoparams.c \
+	twoparams.s
\ No newline at end of file
diff --git a/tests/Makefile.in b/tests/Makefile.in
new file mode 100644
index 000000000..93747f1fa
--- /dev/null
+++ b/tests/Makefile.in
@@ -0,0 +1,175 @@
+# Makefile.in generated automatically by automake 1.4-p4 from Makefile.am
+
+# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+
+DESTDIR =
+
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+
+top_builddir = ..
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS)
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+transform = @program_transform_name@
+
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_alias = @host_alias@
+host_triplet = @host@
+CC = @CC@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+DEFAULT_SUPP = @DEFAULT_SUPP@
+LN_S = @LN_S@
+MAKEINFO = @MAKEINFO@
+PACKAGE = @PACKAGE@
+RANLIB = @RANLIB@
+VERSION = @VERSION@
+
+EXTRA_DIST =  	badaddrvalue.c badjump.c 	badloop.c bitfield1.c 	blocked_syscall.c clientperm.c 	clientstackperm.c coolo_sigaction.cpp 	coolo_strlen.c coolo_strlen.s 	cpuid_c.c cpuid_s.s 	doublefree.c errs1.c 	exitprog.c floored.c 	fprw.c fwrite.c 	inline.c inlineh.c 	inlineh.h malloc1.c 	malloc2.c manuel1.c 	manuel2.c manuel3.c 	memalign_test.c memcmptest.c 	memtests.cpp mmaptest.c 	oneparam.c pushfpopf_c.c 	pushfpopf.s rcl_assert.s 	rcrl.c readline1.c 	realloc1.c sha1.test.c 	shortpush.c shorts.c 	signal1.c signal2.c 	signal3.c smc1.c 	suppfree.c tronical.c 	tronical.s twoparams.c 	twoparams.s
+
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = ../config.h
+CONFIG_CLEAN_FILES = 
+DIST_COMMON =  Makefile.am Makefile.in
+
+
+DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST)
+
+TAR = tar
+GZIP_ENV = --best
+all: all-redirect
+.SUFFIXES:
+$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) 
+	cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps tests/Makefile
+
+Makefile: $(srcdir)/Makefile.in  $(top_builddir)/config.status
+	cd $(top_builddir) \
+	  && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+tags: TAGS
+TAGS:
+
+
+distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir)
+
+subdir = tests
+
+distdir: $(DISTFILES)
+	@for file in $(DISTFILES); do \
+	  d=$(srcdir); \
+	  if test -d $$d/$$file; then \
+	    cp -pr $$d/$$file $(distdir)/$$file; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || ln $$d/$$file $(distdir)/$$file 2> /dev/null \
+	    || cp -p $$d/$$file $(distdir)/$$file || :; \
+	  fi; \
+	done
+info-am:
+info: info-am
+dvi-am:
+dvi: dvi-am
+check-am: all-am
+check: check-am
+installcheck-am:
+installcheck: installcheck-am
+install-exec-am:
+install-exec: install-exec-am
+
+install-data-am:
+install-data: install-data-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+install: install-am
+uninstall-am:
+uninstall: uninstall-am
+all-am: Makefile
+all-redirect: all-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install
+installdirs:
+
+
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-rm -f Makefile $(CONFIG_CLEAN_FILES)
+	-rm -f config.cache config.log stamp-h stamp-h[0-9]*
+
+maintainer-clean-generic:
+mostlyclean-am:  mostlyclean-generic
+
+mostlyclean: mostlyclean-am
+
+clean-am:  clean-generic mostlyclean-am
+
+clean: clean-am
+
+distclean-am:  distclean-generic clean-am
+
+distclean: distclean-am
+
+maintainer-clean-am:  maintainer-clean-generic distclean-am
+	@echo "This command is intended for maintainers to use;"
+	@echo "it deletes files that may require special tools to rebuild."
+
+maintainer-clean: maintainer-clean-am
+
+.PHONY: tags distdir info-am info dvi-am dvi check check-am \
+installcheck-am installcheck install-exec-am install-exec \
+install-data-am install-data install-am install uninstall-am uninstall \
+all-redirect all-am all installdirs mostlyclean-generic \
+distclean-generic clean-generic maintainer-clean-generic clean \
+mostlyclean distclean maintainer-clean
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/tests/badaddrvalue.c b/tests/badaddrvalue.c
new file mode 100644
index 000000000..1bb204747
--- /dev/null
+++ b/tests/badaddrvalue.c
@@ -0,0 +1,12 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main ( void )
+{
+   char* aa = malloc(8);
+   aa[-1] = 17;
+   if (aa[-1] == 17) 
+      printf("17\n"); else printf("not 17\n");
+   return 0;
+}
diff --git a/tests/badjump.c b/tests/badjump.c
new file mode 100644
index 000000000..053663be4
--- /dev/null
+++ b/tests/badjump.c
@@ -0,0 +1,6 @@
+
+int main ( void )
+{
+   char* p = (char*)0xE000000;
+   return ((int(*)(void)) p) ();
+}
diff --git a/tests/badloop.c b/tests/badloop.c
new file mode 100644
index 000000000..8780cf7f8
--- /dev/null
+++ b/tests/badloop.c
@@ -0,0 +1,15 @@
+
+#include <stdio.h>
+
+int main ( void )
+{
+   int a[5];
+   int i, s;
+   a[0] = a[1] = a[3] = a[4] = 0;
+   s = 0;
+   for (i = 0; i < 5; i++) 
+      s += a[i];
+   if (s == 377)
+      printf("sum is %d\n", s);
+   return 0;
+}
diff --git a/tests/bitfield1.c b/tests/bitfield1.c
new file mode 100644
index 000000000..4a7a61a97
--- /dev/null
+++ b/tests/bitfield1.c
@@ -0,0 +1,18 @@
+
+#include <malloc.h>
+
+typedef
+   struct {
+      int          x;
+      unsigned int y:1;
+      int          z;
+   } 
+   Fooble;
+
+void main ( void )
+{
+   Fooble* f = malloc(sizeof(Fooble));
+   f->x = 1;
+   f->z = 1;
+   f->y = (f == (Fooble*)17 ? 1 : 0);
+}
diff --git a/tests/blocked_syscall.c b/tests/blocked_syscall.c
new file mode 100644
index 000000000..0ac9d5cc9
--- /dev/null
+++ b/tests/blocked_syscall.c
@@ -0,0 +1,32 @@
+
+#include <stdio.h>
+#include <unistd.h>
+#include <assert.h>
+#include <signal.h>
+
+int fds[2];
+
+void the_sighandler ( int signo )
+{
+   int nw;
+   //   assert(signo == SIGUSR1);
+   //   printf("sighandler running; should unblock now\n");
+   nw = write(fds[1], "zzz", 1);
+   //  assert(nw == 1);
+}
+
+int main ( void )
+{
+   char buf[10];
+   int res, nr;
+   void* oldsh = signal(SIGUSR1, the_sighandler);
+   assert(oldsh != SIG_ERR);
+   printf("pid = %d\n", getpid());
+   res = pipe(fds);
+   assert (res == 0);
+   printf("doing read(); this should block\n");
+   nr = read(fds[0], buf, 1);
+   /* blocks */
+   printf("read returned %d\n", nr);
+   return 0;
+}
diff --git a/tests/clientperm.c b/tests/clientperm.c
new file mode 100644
index 000000000..ee7809d80
--- /dev/null
+++ b/tests/clientperm.c
@@ -0,0 +1,39 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "valgrind.h"
+
+int main1 ( void )
+{
+  int xxx, i;
+  for (i = 0; i < 10; i++) VALGRIND_CHECK_DEFINED(xxx);
+  return 0;
+}
+
+int main ( void )
+{
+   int i, sum, m;
+   char* aa = calloc(100,1);
+   sum = 0;
+
+   VALGRIND_CHECK_READABLE(aa,100);
+
+   m = VALGRIND_MAKE_WRITABLE( &aa[49], 1 );
+   VALGRIND_CHECK_WRITABLE(aa,100);
+
+   printf("m_na: returned value is %d\n", m );
+
+   for (i = 0; i < 100; i++)
+     sum += aa[i];
+   printf("sum is %d\n", sum);
+
+   m = VALGRIND_DISCARD(m);
+   printf("m_rm: returned value is %d\n", m );
+
+   for (i = 0; i < 100; i++)
+     sum += aa[i];
+   printf("sum is %d\n", sum);
+
+   return 0;
+}
diff --git a/tests/clientstackperm.c b/tests/clientstackperm.c
new file mode 100644
index 000000000..31a50101f
--- /dev/null
+++ b/tests/clientstackperm.c
@@ -0,0 +1,36 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "valgrind.h"
+
+
+int baaaad ( void )
+{
+   int i;
+   int spacer0[10];
+   int aaa[10];
+   int spacer1[10];
+   int bbb[10];
+   int spacer2[10];
+   int ccc[10];
+   int spacer3[10];
+   VALGRIND_MAKE_NOACCESS_STACK(spacer0, sizeof spacer0);
+   VALGRIND_MAKE_NOACCESS_STACK(spacer1, sizeof spacer1);
+   VALGRIND_MAKE_NOACCESS_STACK(spacer2, sizeof spacer2);
+   VALGRIND_MAKE_NOACCESS_STACK(spacer3, sizeof spacer3);
+   printf("reading %p\n", &aaa[-3]);
+   return aaa[-3];
+   for (i = 0; i < 10; i++) {
+     printf("poking addr %p\n", & spacer1[i]);
+     spacer0[i] = spacer1[i] = spacer2[i] = spacer3[i] = 0;
+   }
+}
+
+
+int main ( void )
+{
+   int z = baaaad();
+   return 0;
+}
diff --git a/tests/coolo_sigaction.cpp b/tests/coolo_sigaction.cpp
new file mode 100644
index 000000000..b41938b06
--- /dev/null
+++ b/tests/coolo_sigaction.cpp
@@ -0,0 +1,54 @@
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static struct sigaction oldChildHandlerData;
+
+void theHandler(int arg)
+{
+  printf("handled %d\n", arg);
+}
+
+void setupHandlers()
+{
+  struct sigaction act;
+  act.sa_handler=theHandler;
+  sigemptyset(&(act.sa_mask));
+  sigaddset(&(act.sa_mask), SIGCHLD);
+  // Make sure we don't block this signal. gdb tends to do that :-(
+  sigprocmask(SIG_UNBLOCK, &(act.sa_mask), 0);
+
+  act.sa_flags = SA_NOCLDSTOP;
+
+  // CC: take care of SunOS which automatically restarts interrupted system
+  // calls (and thus does not have SA_RESTART)
+
+#ifdef SA_RESTART
+  act.sa_flags |= SA_RESTART;
+#endif
+
+  sigaction( SIGCHLD, &act, &oldChildHandlerData );
+
+  act.sa_handler=SIG_IGN;
+  sigemptyset(&(act.sa_mask));
+  sigaddset(&(act.sa_mask), SIGPIPE);
+  act.sa_flags = 0;
+  sigaction( SIGPIPE, &act, 0L);
+}
+
+int main()
+{
+    int i;
+    char buffer[200];
+    setupHandlers();
+    FILE *p = popen("echo Hallo World", "r");
+    while (!feof(p)) {
+        int n = fread(buffer, 200, 1, p);
+        write(2, buffer, n);
+    }
+    fclose(p);
+    for (i = 0; i < 1000000; i++) ;
+    return 0;
+}
diff --git a/tests/coolo_strlen.c b/tests/coolo_strlen.c
new file mode 100644
index 000000000..16e987025
--- /dev/null
+++ b/tests/coolo_strlen.c
@@ -0,0 +1,13 @@
+
+#include <string.h>
+
+int main() {
+        char buffer[200] = "HALLO";
+        char *b2 = strdup(strcat(buffer, "THis is a very long strings"));
+        int len = strlen(b2);
+        if (len < 12)
+                return 0;
+        else
+                return 1;
+}
+
diff --git a/tests/coolo_strlen.s b/tests/coolo_strlen.s
new file mode 100644
index 000000000..6cadfccfa
--- /dev/null
+++ b/tests/coolo_strlen.s
@@ -0,0 +1,90 @@
+	.file	"coolo_strlen.c"
+	.version	"01.01"
+gcc2_compiled.:
+.section	.rodata
+.LC0:
+	.string	"HALLO"
+.globl memset
+.LC1:
+	.string	"THis is a very long strings"
+.text
+	.align 4
+.globl main
+	.type	 main,@function
+main:
+	movl .LC0,%eax
+	pushl %ebp
+	movl %esp,%ebp
+	subl $216,%esp
+	movl %eax,-200(%ebp)
+	movw .LC0+4,%ax
+	movw %ax,-196(%ebp)
+	leal -194(%ebp),%eax
+	addl $-4,%esp
+	pushl $194
+	pushl $0
+	pushl %eax
+	call memset
+	addl $16,%esp
+	addl $-12,%esp
+	addl $-8,%esp
+	pushl $.LC1
+	leal -200(%ebp),%eax
+	pushl %eax
+	call strcat
+	addl $16,%esp
+	pushl %eax
+	call __strdup
+	movl %eax,%edx
+	movl %edx,%ecx
+	andl $3,%ecx
+	je .L105
+	jp .L110
+	cmpl $2,%ecx
+	je .L111
+	cmpb %ch,(%eax)
+	je .L109
+	incl %eax
+.L111:
+	cmpb %ch,(%eax)
+	je .L109
+	incl %eax
+.L110:
+	cmpb %ch,(%eax)
+	je .L109
+	incl %eax
+.L105:
+	movl (%eax),%ecx
+	testb %ch,%cl
+	jne .L106
+	testb %cl,%cl
+	je .L109
+	testb %ch,%ch
+	je .L108
+.L106:
+	testl $16711680,%ecx
+	je .L107
+	addl $4,%eax
+	testl $-16777216,%ecx
+	jne .L105
+	subl $3,%eax
+.L107:
+	incl %eax
+.L108:
+	incl %eax
+.L109:
+	subl %edx,%eax
+	cmpl $11,%eax
+	jle .L102
+	movl $1,%eax
+	jmp .L104
+	.p2align 4,,7
+.L102:
+	xorl %eax,%eax
+.L104:
+	movl %ebp,%esp
+	popl %ebp
+	ret
+.Lfe1:
+	.size	 main,.Lfe1-main
+	.ident	"GCC: (GNU) 2.95.3 20010315 (release)"
diff --git a/tests/cpuid_c.c b/tests/cpuid_c.c
new file mode 100644
index 000000000..333ff1127
--- /dev/null
+++ b/tests/cpuid_c.c
@@ -0,0 +1,21 @@
+
+#include <stdio.h>
+
+// in cpuid_s.s
+extern void get_cpuid0 ( unsigned int* buf );
+extern void get_cpuid1 ( unsigned int* buf );
+
+unsigned int buf[4];
+
+int main ( void )
+{
+   get_cpuid0(&buf[0]);
+   printf("cpuid words (0): 0x%x 0x%x 0x%x 0x%x\n", 
+          buf[0], buf[1], buf[2], buf[3] );
+
+   get_cpuid1(&buf[0]);
+   printf("cpuid words (1): 0x%x 0x%x 0x%x 0x%x\n", 
+          buf[0], buf[1], buf[2], buf[3] );
+
+   return 0;
+}
diff --git a/tests/cpuid_s.s b/tests/cpuid_s.s
new file mode 100644
index 000000000..27bcc935d
--- /dev/null
+++ b/tests/cpuid_s.s
@@ -0,0 +1,77 @@
+
+	
+	.file	"oneparam.c"
+	.version	"01.01"
+gcc2_compiled.:
+.text
+	.align 4
+
+.globl get_cpuid0
+	.type	 get_cpuid0,@function
+get_cpuid0:
+	pushl	%ebp
+	movl	%esp, %ebp
+	movl	8(%ebp), %eax
+
+	pushl	%edi
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+
+	movl	%eax, %edi
+	movl	$0, %eax
+	cpuid	
+	movl	%eax, (%edi)
+	movl	%ebx, 4(%edi)
+	movl	%ecx, 8(%edi)
+	movl	%edx, 12(%edi)
+
+	popl	%edx
+	popl	%ecx
+	popl	%ebx
+	popl	%eax
+	popl	%edi
+	
+	popl	%ebp
+	ret
+.Lfe1:
+	.size	 get_cpuid0,.Lfe1-get_cpuid0
+
+
+.globl get_cpuid1
+	.type	 get_cpuid1,@function
+get_cpuid1:
+	pushl	%ebp
+	movl	%esp, %ebp
+	movl	8(%ebp), %eax
+
+	pushl	%edi
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+
+	movl	%eax, %edi
+	movl	$1, %eax
+	cpuid	
+	movl	%eax, (%edi)
+	movl	%ebx, 4(%edi)
+	movl	%ecx, 8(%edi)
+	movl	%edx, 12(%edi)
+
+	popl	%edx
+	popl	%ecx
+	popl	%ebx
+	popl	%eax
+	popl	%edi
+	
+	popl	%ebp
+	ret
+.Lfe2:
+	.size	 get_cpuid1,.Lfe2-get_cpuid1
+
+
+
+
+	.ident	"GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-98)"
diff --git a/tests/doublefree.c b/tests/doublefree.c
new file mode 100644
index 000000000..3c2705081
--- /dev/null
+++ b/tests/doublefree.c
@@ -0,0 +1,12 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main ( void )
+{
+   int i;
+   void* p = malloc(177);
+   for (i = 0; i < 2; i++)
+     free(p);
+   return 0;
+}
diff --git a/tests/errs1.c b/tests/errs1.c
new file mode 100644
index 000000000..30d6cf6ab
--- /dev/null
+++ b/tests/errs1.c
@@ -0,0 +1,17 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+
+char* p;
+
+void ddd  ( void ) { p[-1] += 'z'; }
+void ccc  ( void ) { ddd(); }
+void bbb  ( void ) { ccc(); }
+void aaa  ( void ) { bbb(); }
+
+void zzzzzzz  ( void ) { p = malloc(10); }
+void yyy  ( void ) { zzzzzzz(); }
+void xxx  ( void ) { yyy(); }
+void www  ( void ) { xxx(); }
+
+void main ( void ) { www(); aaa(); }
diff --git a/tests/exitprog.c b/tests/exitprog.c
new file mode 100644
index 000000000..a3c2f4f6b
--- /dev/null
+++ b/tests/exitprog.c
@@ -0,0 +1,15 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define ZILLION 1000000
+
+void main ( void )
+{
+   int i;
+   char* a = malloc(ZILLION * sizeof(char));
+   for (i = 0; i <= ZILLION; i++) a[i] = 0;
+   a = (char*)177;
+   _exit(1);
+}
diff --git a/tests/floored.c b/tests/floored.c
new file mode 100644
index 000000000..9cdf8a6c2
--- /dev/null
+++ b/tests/floored.c
@@ -0,0 +1,17 @@
+
+#include <math.h>
+#include <stdio.h>
+
+int xToI ( );
+
+void main ( void )
+{
+   printf ( "the answer is %d\n", xToI () );
+}
+
+
+int xToI()
+{
+    return (int)floor(2.90) + 1;
+}
+
diff --git a/tests/fprw.c b/tests/fprw.c
new file mode 100644
index 000000000..556d8a035
--- /dev/null
+++ b/tests/fprw.c
@@ -0,0 +1,26 @@
+
+/* most of the nasties in this are in the same bb, so you need to run
+with --single-step=yes to get them properly distinguished. */
+
+#include <stdlib.h>
+
+int main ( void )
+{
+   volatile double d;
+   volatile float f;
+   double* dp = malloc(sizeof(double));
+   float* fp = malloc(sizeof(float));
+   int* ip = (int*)0x1234567;
+   d += 1.0;
+   f += 10.0;
+   *dp += 2.0;
+   *fp += 20.0;
+   free(dp);
+   free(fp);
+   *dp += 3.0;
+   *fp += 30.0;
+   free(ip);
+   ip = malloc(sizeof(int));
+   * ((double*)ip) = 1.2 + d;
+   return 0;
+}
diff --git a/tests/fwrite.c b/tests/fwrite.c
new file mode 100644
index 000000000..1eec4a4f8
--- /dev/null
+++ b/tests/fwrite.c
@@ -0,0 +1,9 @@
+
+#include <stdlib.h>
+#include <unistd.h>
+int main ( void )
+{
+   char* arr = malloc(10);
+   (void) write( 1 /* stdout */, arr, 10 );
+   return 0;
+}
diff --git a/tests/inline.c b/tests/inline.c
new file mode 100644
index 000000000..6db2fdd8b
--- /dev/null
+++ b/tests/inline.c
@@ -0,0 +1,20 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+
+__inline__
+static int addemup ( int* arr )
+{
+   int i, j = 0;
+   for (i = 0; i <= 10; i++)
+      j += arr[i];
+   return j;
+}
+
+void main ( void )
+{
+   int sum;
+   int* a = calloc(10, sizeof(int));
+   sum = addemup(a);
+   printf("sum is %d\n", sum);
+}
diff --git a/tests/inlineh.c b/tests/inlineh.c
new file mode 100644
index 000000000..f65806567
--- /dev/null
+++ b/tests/inlineh.c
@@ -0,0 +1,23 @@
+
+#include <stdio.h>
+
+extern int burble ( int );
+
+__inline__
+static int inline_thisfile ( void )
+{
+   return burble(17);
+}
+
+#include "inlineh.h"
+
+void main ( void )
+{
+   int a;
+   a = 0;
+   a += inline_thisfile();
+   a *= 100;
+   a += inline_otherfile();
+   a /= 100;
+   printf("answer is %d\n", a);
+}
diff --git a/tests/inlineh.h b/tests/inlineh.h
new file mode 100644
index 000000000..e34172a1c
--- /dev/null
+++ b/tests/inlineh.h
@@ -0,0 +1,6 @@
+
+__inline__
+static int inline_otherfile ( void )
+{
+   return burble(19);
+}
diff --git a/tests/malloc1.c b/tests/malloc1.c
new file mode 100644
index 000000000..dff5250eb
--- /dev/null
+++ b/tests/malloc1.c
@@ -0,0 +1,24 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+
+void really ( void );
+
+int main ( void )
+{ 
+   really();
+   return 0;
+}
+
+void really ( void )
+{
+   int i;
+   char* p = malloc(10);
+   for (i = 0; i < 10; i++)
+      p[i] = 'z';
+   free(p);
+   p[1] = 'z';
+   p = malloc(10);
+   p[2] = 'z';
+   p[-1] = 'z';
+}
diff --git a/tests/malloc2.c b/tests/malloc2.c
new file mode 100644
index 000000000..2d6a0ab85
--- /dev/null
+++ b/tests/malloc2.c
@@ -0,0 +1,50 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/* The original test driver machinery. */
+#define N_TEST_TRANSACTIONS 50000
+#define N_TEST_ARR 10000
+
+#define M_TEST_MALLOC 1000
+
+void* test_arr[N_TEST_ARR];
+
+int main ( int argc, char** argv )
+{
+   int i, j, k, nbytes;
+   unsigned char* chp;
+   char z;
+
+   for (i = 0; i < N_TEST_ARR; i++)
+      test_arr[i] = NULL;
+
+   for (i = 0; i < N_TEST_TRANSACTIONS; i++) {
+      j = random() % N_TEST_ARR;
+      if (test_arr[j]) {
+         free(test_arr[j]);
+         test_arr[j] = NULL;
+      } else {
+         nbytes = 1 + random() % M_TEST_MALLOC;
+         if (random()%64 == 32) 
+            nbytes *= 17;
+         test_arr[j] = malloc( nbytes );
+         chp = test_arr[j];
+         for (k = 1; k < nbytes; k++) 
+            chp[k] = (unsigned char)(k + 99);
+      }
+   }
+
+   for (i = 0; test_arr[i] == NULL; i++) ;
+   free(test_arr[i]);
+   ((char*)test_arr[i])[0] = 0;
+
+   for (i = 0; i < N_TEST_ARR; i++) {
+      if (test_arr[i]) {
+         free(test_arr[i]);
+         test_arr[i] = NULL;
+      }
+   }
+
+   return 0;
+}
diff --git a/tests/manuel1.c b/tests/manuel1.c
new file mode 100644
index 000000000..d56dfe240
--- /dev/null
+++ b/tests/manuel1.c
@@ -0,0 +1,9 @@
+#include <stdio.h>
+#include <malloc.h>
+
+int main ()
+{
+  int x;
+
+  printf ("x = %d\n", x);
+}
diff --git a/tests/manuel2.c b/tests/manuel2.c
new file mode 100644
index 000000000..af362b44f
--- /dev/null
+++ b/tests/manuel2.c
@@ -0,0 +1,9 @@
+#include <stdio.h>
+#include <malloc.h>
+
+int main ()
+{
+  int *x;
+
+  printf ("x = %d\n", *x);
+}
diff --git a/tests/manuel3.c b/tests/manuel3.c
new file mode 100644
index 000000000..44ab712e4
--- /dev/null
+++ b/tests/manuel3.c
@@ -0,0 +1,13 @@
+#include <stdio.h>
+#include <malloc.h>
+
+int main ()
+{
+  int *x, y;
+
+  x = (int *) malloc (sizeof (int));
+
+  y = *x == 173;
+
+  printf ("x = %d\n", y);
+}
diff --git a/tests/memalign_test.c b/tests/memalign_test.c
new file mode 100644
index 000000000..a24808c55
--- /dev/null
+++ b/tests/memalign_test.c
@@ -0,0 +1,19 @@
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main ( void )
+{
+  void* a[10];
+  int i;
+  for (i = 0; i < 10; i++) {
+    a[i] = valloc(11111 * (i+1));
+    //    printf("I acquire %p\n", a[i]);
+  }
+  for (i = 0; i < 10; i++) {
+    //    printf("I release %p\n", a[i]);
+    free(a[i]);
+  }
+  free(a[9]);
+  return 0;
+}
diff --git a/tests/memcmptest.c b/tests/memcmptest.c
new file mode 100644
index 000000000..56dd85fbd
--- /dev/null
+++ b/tests/memcmptest.c
@@ -0,0 +1,19 @@
+
+#include <string.h>
+#include <stdio.h>
+
+char* s1;
+char* s2;
+
+int main ( void )
+{
+  s1 = malloc(10); strcpy(s1,"fooble");
+  s2 = malloc(10); strcpy(s2,"fooble");
+  if (memcmp(s1, s2, 8) != 0)
+    printf("different\n");
+  else
+    printf("same (?!)\n");
+  return 0;
+}
+
+	
diff --git a/tests/memtests.cpp b/tests/memtests.cpp
new file mode 100644
index 000000000..b5f2f3ff3
--- /dev/null
+++ b/tests/memtests.cpp
@@ -0,0 +1,29 @@
+#include <stdlib.h>
+
+int main()
+{
+  int* fpointer = (int*)malloc(10);
+  delete fpointer; // should give warning
+  fpointer = (int*)malloc(10);
+  delete [] fpointer; // should give warning
+  fpointer = (int*)malloc(10);
+  free (fpointer); // should work!
+
+  int* nvec = new int[10];
+  delete nvec; // should give a warning
+  nvec = new int[10];
+  free (nvec); // should give a warning
+  nvec = new int[10];
+  delete [] nvec; // should work!
+
+  int* n = new int;
+  delete [] n; // should give a warning
+  n = new int;
+  free(n); // should give a warning
+  n = new int;
+  delete n; // should work!
+
+  free(0);
+
+  return 0;
+}
diff --git a/tests/mmaptest.c b/tests/mmaptest.c
new file mode 100644
index 000000000..4e52b1a6d
--- /dev/null
+++ b/tests/mmaptest.c
@@ -0,0 +1,15 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+int main()
+{
+    int fd;
+
+    mkdir("dir", 0666);
+    fd = open("dir", O_RDONLY);
+    mmap(NULL, 4711, PROT_READ, MAP_PRIVATE, fd, 0);
+    return 0;
+}
diff --git a/tests/oneparam.c b/tests/oneparam.c
new file mode 100644
index 000000000..648e304b4
--- /dev/null
+++ b/tests/oneparam.c
@@ -0,0 +1,10 @@
+
+/* general simple function to use as a template for assembly hacks */
+
+void fooble ( int* a )
+{
+   a[0] = 33;
+   a[1] = 44;
+   a[2] = 55;
+   a[3] = 66;
+}
diff --git a/tests/pushfpopf.s b/tests/pushfpopf.s
new file mode 100644
index 000000000..f1409950b
--- /dev/null
+++ b/tests/pushfpopf.s
@@ -0,0 +1,38 @@
+	.file	"twoparams.c"
+	.version	"01.01"
+gcc2_compiled.:
+.text
+	.align 4
+.globl fooble
+	.type	 fooble,@function
+fooble:
+	pushl	%ebp
+	movl	%esp, %ebp
+	movl	8(%ebp), %eax
+	
+	subl	12(%ebp), %eax
+	# flags are now undef if either operand is
+	# save possibly undef flags on stack
+	pushfl
+	
+	movl	$0, %eax
+	addl	$0, %eax
+	# flags are now definitely defined
+
+	popfl
+	# resulting flag definedness depends on outcome of sub above
+	# should override that created by 0 + 0 above
+	
+	# now use the condition codes to generate a value
+	# in a way which will cause undefinedness to get reported
+	jz	labelz
+	movl	$22, %eax
+	jmp	theend
+labelz:
+	movl	$33, %eax
+theend:	
+	popl	%ebp
+	ret
+.Lfe1:
+	.size	 fooble,.Lfe1-fooble
+	.ident	"GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-98)"
diff --git a/tests/pushfpopf_c.c b/tests/pushfpopf_c.c
new file mode 100644
index 000000000..f45271e0f
--- /dev/null
+++ b/tests/pushfpopf_c.c
@@ -0,0 +1,14 @@
+
+#include <stdio.h>
+
+// in pushfpopf.s
+extern int fooble ( int, int );
+
+int main ( void )
+{
+   int arr[2];
+   arr[0] = 3;
+   //   arr[1] = 45;
+   printf("fooble: result is %d\n", fooble(arr[0], arr[1]));
+   return 0;
+}
diff --git a/tests/rcl_assert.s b/tests/rcl_assert.s
new file mode 100644
index 000000000..303feae53
--- /dev/null
+++ b/tests/rcl_assert.s
@@ -0,0 +1,8 @@
+
+                .text
+                .globl  main
+        main:
+                xorl    %eax, %eax
+                rcll    %eax
+                imull   %eax, %eax
+                ret
diff --git a/tests/rcrl.c b/tests/rcrl.c
new file mode 100644
index 000000000..115118d12
--- /dev/null
+++ b/tests/rcrl.c
@@ -0,0 +1,12 @@
+#include <stdio.h>
+
+  int main (void)  
+  {
+    double U = 
+      ((double) 10586 / (double) 16119) + 
+      ((double) 37190 / (double) 108363);
+    printf ("x = %f\n", U);
+  
+    return 0;
+  }
+
diff --git a/tests/readline1.c b/tests/readline1.c
new file mode 100644
index 000000000..0d2c1a5b9
--- /dev/null
+++ b/tests/readline1.c
@@ -0,0 +1,25 @@
+
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+
+int rl_insert ( int, int );
+
+void main ( void )
+{
+   rl_insert(1, 'z');
+}
+
+int zzzstrlen ( char* str )
+{
+   if (str[1] == 0) return 2; else return 10;
+}
+
+int rl_insert ( int count, int c )
+{
+   char str[2];
+   str[1] = 0;
+   str[0] = c;
+   printf("HERE strlen  is %d\n", zzzstrlen(str));
+   return 0;
+}
diff --git a/tests/realloc1.c b/tests/realloc1.c
new file mode 100644
index 000000000..a5d1edc64
--- /dev/null
+++ b/tests/realloc1.c
@@ -0,0 +1,14 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main ( void )
+{
+   int i;
+   char* p = malloc(1);
+   for (i = 2; i < 50; i++) {
+      p = realloc(p, i);
+      p[i-1] = 'z';
+   }
+   return 0;
+}
diff --git a/tests/sha1.test.c b/tests/sha1.test.c
new file mode 100644
index 000000000..28b339fd6
--- /dev/null
+++ b/tests/sha1.test.c
@@ -0,0 +1,250 @@
+
+/*
+Stick the enclosed text in a file "sha1.test.c".
+
+Here is a sample run:
+    hugh $ cc sha1.test.c
+    hugh $ valgrind ./a.out
+    ==26189== valgrind-20020227, a memory error detector for x86 GNU/Linux.
+    ==26189== Copyright (C) 2000-2002, and GNU GPL'd, by Julian Seward.
+    ==26189== For more details, rerun with: -v
+    ==26189== 
+
+    valgrind: vg_main.c:495 (vgPlain_create_translation_for): Assertion `trans_size > 0 && trans_size < 65536' failed.
+    Please report this bug to me at: jseward@acm.org
+
+    hugh $ valgrind --instrument=no ./a.out
+    ==26209== valgrind-20020227, a memory error detector for x86 GNU/Linux.
+    ==26209== Copyright (C) 2000-2002, and GNU GPL'd, by Julian Seward.
+    ==26209== For more details, rerun with: -v
+    ==26209== 
+    ==26209== 
+
+*/
+
+/* ================ sha1.c ================ */
+/*
+SHA-1 in C
+By Steve Reid <steve@edmweb.com>
+100% Public Domain
+
+Test Vectors (from FIPS PUB 180-1)
+"abc"
+  A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D
+"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+  84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1
+A million repetitions of "a"
+  34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F
+*/
+
+/* #define LITTLE_ENDIAN * This should be #define'd already, if true. */
+/* #define SHA1HANDSOFF * Copies data before messing with it. */
+
+#define SHA1HANDSOFF
+
+#include <string.h>
+#include <sys/types.h>	/* for u_int*_t */
+
+/* ================ sha1.h ================ */
+/*
+SHA-1 in C
+By Steve Reid <steve@edmweb.com>
+100% Public Domain
+*/
+
+typedef struct {
+    u_int32_t state[5];
+    u_int32_t count[2];
+    unsigned char buffer[64];
+} SHA1_CTX;
+
+void SHA1Transform(u_int32_t state[5], const unsigned char buffer[64]);
+void SHA1Init(SHA1_CTX* context);
+void SHA1Update(SHA1_CTX* context, const unsigned char* data, u_int32_t len);
+void SHA1Final(unsigned char digest[20], SHA1_CTX* context);
+/* ================ end of sha1.h ================ */
+#include <endian.h>
+
+#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
+
+/* blk0() and blk() perform the initial expand. */
+/* I got the idea of expanding during the round function from SSLeay */
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \
+    |(rol(block->l[i],8)&0x00FF00FF))
+#elif BYTE_ORDER == BIG_ENDIAN
+#define blk0(i) block->l[i]
+#else
+#error "Endianness not defined!"
+#endif
+#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \
+    ^block->l[(i+2)&15]^block->l[i&15],1))
+
+/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
+#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30);
+#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
+#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
+
+
+/* Hash a single 512-bit block. This is the core of the algorithm. */
+
+void SHA1Transform(u_int32_t state[5], const unsigned char buffer[64])
+{
+u_int32_t a, b, c, d, e;
+typedef union {
+    unsigned char c[64];
+    u_int32_t l[16];
+} CHAR64LONG16;
+#ifdef SHA1HANDSOFF
+CHAR64LONG16 block[1];  /* use array to appear as a pointer */
+    memcpy(block, buffer, 64);
+#else
+    /* The following had better never be used because it causes the
+     * pointer-to-const buffer to be cast into a pointer to non-const.
+     * And the result is written through.  I threw a "const" in, hoping
+     * this will cause a diagnostic.
+     */
+CHAR64LONG16* block = (const CHAR64LONG16*)buffer;
+#endif
+    /* Copy context->state[] to working vars */
+    a = state[0];
+    b = state[1];
+    c = state[2];
+    d = state[3];
+    e = state[4];
+    /* 4 rounds of 20 operations each. Loop unrolled. */
+    R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
+    R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
+    R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
+    R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
+    R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
+    R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
+    R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
+    R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
+    R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
+    R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
+    R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
+    R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
+    R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
+    R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
+    R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
+    R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
+    R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
+    R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
+    R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
+    R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
+    /* Add the working vars back into context.state[] */
+    state[0] += a;
+    state[1] += b;
+    state[2] += c;
+    state[3] += d;
+    state[4] += e;
+    /* Wipe variables */
+    a = b = c = d = e = 0;
+#ifdef SHA1HANDSOFF
+    memset(block, '\0', sizeof(block));
+#endif
+}
+
+
+/* SHA1Init - Initialize new context */
+
+void SHA1Init(SHA1_CTX* context)
+{
+    /* SHA1 initialization constants */
+    context->state[0] = 0x67452301;
+    context->state[1] = 0xEFCDAB89;
+    context->state[2] = 0x98BADCFE;
+    context->state[3] = 0x10325476;
+    context->state[4] = 0xC3D2E1F0;
+    context->count[0] = context->count[1] = 0;
+}
+
+
+/* Run your data through this. */
+
+void SHA1Update(SHA1_CTX* context, const unsigned char* data, u_int32_t len)
+{
+u_int32_t i;
+u_int32_t j;
+
+    j = context->count[0];
+    if ((context->count[0] += len << 3) < j)
+	context->count[1]++;
+    context->count[1] += (len>>29);
+    j = (j >> 3) & 63;
+    if ((j + len) > 63) {
+        memcpy(&context->buffer[j], data, (i = 64-j));
+        SHA1Transform(context->state, context->buffer);
+        for ( ; i + 63 < len; i += 64) {
+            SHA1Transform(context->state, &data[i]);
+        }
+        j = 0;
+    }
+    else i = 0;
+    memcpy(&context->buffer[j], &data[i], len - i);
+}
+
+
+/* Add padding and return the message digest. */
+
+void SHA1Final(unsigned char digest[20], SHA1_CTX* context)
+{
+unsigned i;
+unsigned char finalcount[8];
+unsigned char c;
+
+#if 0	/* untested "improvement" by DHR */
+    /* Convert context->count to a sequence of bytes
+     * in finalcount.  Second element first, but
+     * big-endian order within element.
+     * But we do it all backwards.
+     */
+    unsigned char *fcp = &finalcount[8];
+
+    for (i = 0; i < 2; i++)
+    {
+	u_int32_t t = context->count[i];
+	int j;
+
+	for (j = 0; j < 4; t >>= 8, j++)
+	    *--fcp = (unsigned char) t
+    }
+#else
+    for (i = 0; i < 8; i++) {
+        finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)]
+         >> ((3-(i & 3)) * 8) ) & 255);  /* Endian independent */
+    }
+#endif
+    c = 0200;
+    SHA1Update(context, &c, 1);
+    while ((context->count[0] & 504) != 448) {
+	c = 0000;
+        SHA1Update(context, &c, 1);
+    }
+    SHA1Update(context, finalcount, 8);  /* Should cause a SHA1Transform() */
+    for (i = 0; i < 20; i++) {
+        digest[i] = (unsigned char)
+         ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
+    }
+    /* Wipe variables */
+    memset(context, '\0', sizeof(*context));
+    memset(&finalcount, '\0', sizeof(finalcount));
+}
+/* ================ end of sha1.c ================ */
+
+int
+main(int argc, char **argv)
+{
+    SHA1_CTX ctx;
+    unsigned char hash[20];
+
+    SHA1Init(&ctx);
+    SHA1Update(&ctx, "abc", 3);
+    SHA1Final(hash, &ctx);
+    return 0;
+}
+
+
diff --git a/tests/shortpush.c b/tests/shortpush.c
new file mode 100644
index 000000000..e91a26820
--- /dev/null
+++ b/tests/shortpush.c
@@ -0,0 +1,15 @@
+
+typedef unsigned short UShort;
+
+UShort mul16 ( UShort a, UShort b );
+
+int main ( int argc, char** argv )
+{
+   UShort x = mul16 ( 10, 20 );
+   return ((int)x) - 200;
+}
+
+UShort mul16 ( UShort a, UShort b )
+{
+   return a * b;
+}
diff --git a/tests/shorts.c b/tests/shorts.c
new file mode 100644
index 000000000..034c7068f
--- /dev/null
+++ b/tests/shorts.c
@@ -0,0 +1,36 @@
+
+#include <stdio.h>
+
+typedef struct { short ot; short ob; short nt; short nb; } Stuff;
+
+void PaintThumb( Stuff* w )
+{
+    short oldtop = w->ot;
+    short oldbot = w->ob;
+    short newtop = w->nt;
+    short newbot = w->nb;
+
+        if (newtop < oldtop) { fprintf(stderr,"case1\n");
+	//	    FillArea(w, newtop, XawMin(newbot, oldtop), 1);
+	}
+	if (newtop > oldtop) { fprintf(stderr,"case2\n");
+	//	    FillArea(w, oldtop, XawMin(newtop, oldbot), 0);
+	}
+	if (newbot < oldbot) { fprintf(stderr,"case3\n");
+	//	    FillArea(w, XawMax(newbot, oldtop), oldbot, 0);
+	}
+	if (newbot > oldbot) { fprintf(stderr,"case4\n");
+	//	    FillArea(w, XawMax(newtop, oldbot), newbot, 1);
+	}
+}
+
+int main ( void )
+{
+  Stuff st;
+  st.ot = -332;
+  st.ob = -301;
+  st.nt = 0;
+  st.nb = 31;
+  PaintThumb( &st );
+  return 0;
+}
diff --git a/tests/signal1.c b/tests/signal1.c
new file mode 100644
index 000000000..68a5cade6
--- /dev/null
+++ b/tests/signal1.c
@@ -0,0 +1,22 @@
+
+#include <stdio.h>
+#include <signal.h>
+
+volatile int spin;
+
+void sig_hdlr ( int signo )
+{
+   printf ( "caught signal\n" );
+   spin = 0;
+   printf ( "signal returns\n" );
+}
+
+void main ( void )
+{
+   spin = 1;
+   printf ( "installing sig handler\n" );
+   signal(SIGINT, sig_hdlr);
+   printf ( "entering busy wait\n" );
+   while (spin) { };
+   printf ( "exited\n" );
+}
diff --git a/tests/signal2.c b/tests/signal2.c
new file mode 100644
index 000000000..6892d3347
--- /dev/null
+++ b/tests/signal2.c
@@ -0,0 +1,18 @@
+
+#include <stdio.h>
+#include <signal.h>
+
+void sig_hdlr ( int signo )
+{
+   printf ( "caught sig segv\n" );
+   exit(1);
+}
+
+void main ( void )
+{
+   printf ( "installing sig handler\n" );
+   signal(SIGSEGV, sig_hdlr);
+   printf ( "doing bad thing\n" );
+   * (int*) 0 = 0;
+   printf ( "exited normally ?!\n" );
+}
diff --git a/tests/signal3.c b/tests/signal3.c
new file mode 100644
index 000000000..e2b4d17b3
--- /dev/null
+++ b/tests/signal3.c
@@ -0,0 +1,33 @@
+
+#include <signal.h>
+#include <sys/time.h>
+#include <stdio.h>
+#include <assert.h>
+
+void hdp_tick ( int sigNo )
+{
+   int j;
+   printf("tick "); fflush(stdout);
+   for (j = 0; j < 10 * 5000; j++) ;
+   printf("tock\n");
+}
+
+void hdp_init_profiling ( void )
+{
+   struct itimerval value;
+   int ret;
+
+   value.it_interval.tv_sec  = 0;
+   value.it_interval.tv_usec = 50 * 1000;
+   value.it_value = value.it_interval;
+
+   signal(SIGPROF, hdp_tick);
+   ret = setitimer(ITIMER_PROF, &value, NULL);
+   assert(ret == 0);
+}
+
+void main ( void )
+{
+   hdp_init_profiling();
+   while (1) {}
+}
diff --git a/tests/smc1.c b/tests/smc1.c
new file mode 100644
index 000000000..398f88d90
--- /dev/null
+++ b/tests/smc1.c
@@ -0,0 +1,72 @@
+
+/* Test Heimdall's ability to spot writes to code which has been
+   translated, and discard the out-of-date translations.
+
+   CORRECT output is
+
+      in p 0
+      in q 1
+      in p 2
+      in q 3
+      in p 4
+      in q 5
+      in p 6
+      in q 7
+      in p 8
+      in q 9
+
+  WRONG output (if you fail to spot code-writes to code[0 .. 4]) is
+
+      in p 0
+      in p 1
+      in p 2
+      in p 3
+      in p 4
+      in p 5
+      in p 6
+      in p 7
+      in p 8
+      in p 9
+*/
+
+#include <stdio.h>
+
+typedef unsigned int Addr;
+typedef unsigned char UChar;
+
+void q ( int n )
+{
+   printf("in q %d\n", n);
+}
+
+void p ( int n )
+{
+   printf("in p %d\n", n);
+}
+
+UChar code[100];
+
+/* Make `code' be JMP-32 dest */
+void set_dest ( Addr dest )
+{
+   unsigned int delta;
+   delta = dest - ((Addr)(&code[0]));
+   delta -= 5;
+   
+   code[0] = 0xE9;   /* JMP d32 */
+   code[1] = (delta & 0xFF);
+   code[2] = ((delta >> 8) & 0xFF);
+   code[3] = ((delta >> 16) & 0xFF);
+   code[4] = ((delta >> 24) & 0xFF);
+}
+
+void main ( void )
+{
+   int i;
+   for (i = 0; i < 10; i += 2) {
+      set_dest ( (Addr)&p );
+      (  (void (*)(int)) (&code[0])  ) (i);
+      set_dest ( (Addr)&q );
+      (  (void (*)(int)) (&code[0])  ) (i+1);
+   }
+}
diff --git a/tests/suppfree.c b/tests/suppfree.c
new file mode 100644
index 000000000..8298f021a
--- /dev/null
+++ b/tests/suppfree.c
@@ -0,0 +1,30 @@
+
+#include <stdlib.h>
+
+void ddd ( char* x )
+{
+  free(x);
+  free(x);
+}
+
+void ccc (char* x)
+{
+  ddd(x);
+}
+
+void bbb (char* x)
+{
+  ccc(x);
+}
+
+void aaa (char* x)
+{
+  bbb(x);
+}
+
+int main ( void )
+{
+   char* x = malloc(10);
+   aaa(x);
+   return 0;
+}
diff --git a/tests/tronical.c b/tests/tronical.c
new file mode 100644
index 000000000..dabd83d7e
--- /dev/null
+++ b/tests/tronical.c
@@ -0,0 +1,37 @@
+#include <stdio.h>
+
+struct Foo
+{
+    int a1 : 1;
+    int a2 : 1;
+    int a3 : 1;
+    int a4 : 1;
+    int a5 : 1;
+    int a6 : 1;
+    int a7 : 1;
+    int bleh : 1;
+};
+
+struct Foo* foo;
+
+void set()
+{
+    foo->bleh = 1;
+}
+
+void get()
+{
+    if ( foo->bleh == 0 )
+	printf( "blieb\n" );
+}
+
+int main()
+{
+  foo = malloc(sizeof(struct Foo));
+    set();
+
+    get();
+
+    return 0;
+}
+
diff --git a/tests/tronical.s b/tests/tronical.s
new file mode 100644
index 000000000..ee17c099e
--- /dev/null
+++ b/tests/tronical.s
@@ -0,0 +1,58 @@
+	.file	"tronical.c"
+	.version	"01.01"
+gcc2_compiled.:
+.text
+	.align 4
+.globl set
+	.type	 set,@function
+set:
+	pushl	%ebp
+	movl	foo, %eax
+	orb	$128, (%eax)
+	movl	%esp, %ebp
+	popl	%ebp
+	ret
+.Lfe1:
+	.size	 set,.Lfe1-set
+	.section	.rodata.str1.1,"ams",@progbits,1
+.LC0:
+	.string	"blieb\n"
+.text
+	.align 4
+.globl get
+	.type	 get,@function
+get:
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$8, %esp
+	movl	foo, %eax
+	cmpb	$0, (%eax)
+	js	.L4
+	subl	$12, %esp
+	pushl	$.LC0
+	call	printf
+	addl	$16, %esp
+.L4:
+	leave
+	ret
+.Lfe2:
+	.size	 get,.Lfe2-get
+	.align 4
+.globl main
+	.type	 main,@function
+main:
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$20, %esp
+	pushl	$4
+	call	malloc
+	movl	%eax, foo
+	call	set
+	call	get
+	xorl	%eax, %eax
+	leave
+	ret
+.Lfe3:
+	.size	 main,.Lfe3-main
+	.comm	foo,4,4
+	.ident	"GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-98)"
diff --git a/tests/twoparams.c b/tests/twoparams.c
new file mode 100644
index 000000000..91c966a26
--- /dev/null
+++ b/tests/twoparams.c
@@ -0,0 +1,7 @@
+
+/* general simple function to use as a template for assembly hacks */
+
+int fooble ( int a, int b )
+{
+   return a - b;
+}
diff --git a/tests/twoparams.s b/tests/twoparams.s
new file mode 100644
index 000000000..5adfec50f
--- /dev/null
+++ b/tests/twoparams.s
@@ -0,0 +1,17 @@
+	.file	"twoparams.c"
+	.version	"01.01"
+gcc2_compiled.:
+.text
+	.align 4
+.globl fooble
+	.type	 fooble,@function
+fooble:
+	pushl	%ebp
+	movl	%esp, %ebp
+	movl	8(%ebp), %eax
+	subl	12(%ebp), %eax
+	popl	%ebp
+	ret
+.Lfe1:
+	.size	 fooble,.Lfe1-fooble
+	.ident	"GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-98)"
diff --git a/valgrind.h b/valgrind.h
new file mode 100644
index 000000000..5a32ab565
--- /dev/null
+++ b/valgrind.h
@@ -0,0 +1,156 @@
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+
+#ifndef __VALGRIND_H
+#define __VALGRIND_H
+
+
+/* This file is for inclusion into client (your!) code.
+
+   You can use these macros to manipulate and query memory permissions
+   inside your own programs.
+
+   The resulting executables will still run without Valgrind, just a
+   little bit more slowly than they otherwise would, but otherwise
+   unchanged.  
+
+   When run on Valgrind with --client-perms=yes, Valgrind observes
+   these macro calls and takes appropriate action.  When run on
+   Valgrind with --client-perms=no (the default), Valgrind observes
+   these macro calls but does not take any action as a result.  */
+
+
+
+/* This defines the magic code sequence which the JITter spots and
+   handles magically.  Don't look too closely at this; it will rot
+   your brain.  
+*/
+#define VALGRIND_MAGIC_SEQUENCE(_zzq_res,_zzq_code,_zzq_addr,_zzq_len)  \
+  asm volatile("movl %1, %%eax\n\t"                                     \
+               "movl %2, %%ebx\n\t"                                     \
+               "movl %3, %%ecx\n\t"                                     \
+               "roll $29, %%eax ; roll $3, %%eax\n\t"                   \
+               "roll $27, %%eax ; roll $5, %%eax\n\t"                   \
+               "movl %%eax, %0\t"                                       \
+               : "=r" (_zzq_res)                                        \
+               : "r" (_zzq_code), "r" (_zzq_addr), "r" (_zzq_len)       \
+               : "eax", "ebx", "ecx", "cc", "memory"                    \
+              );
+
+
+
+/* Client-code macros to manipulate the state of memory. */
+
+/* Mark memory at _qzz_addr as unaddressible and undefined for
+   _qzz_len bytes.  Returns an int handle pertaining to the block
+   descriptions Valgrind will use in subsequent error messages. */
+#define VALGRIND_MAKE_NOACCESS(_qzz_addr,_qzz_len)               \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res,1001,_qzz_addr,_qzz_len);   \
+    _qzz_res;                                                    \
+   })
+
+/* Similarly, mark memory at _qzz_addr as addressible but undefined
+   for _qzz_len bytes. */
+#define VALGRIND_MAKE_WRITABLE(_qzz_addr,_qzz_len)               \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res,1002,_qzz_addr,_qzz_len);   \
+    _qzz_res;                                                    \
+   })
+
+/* Similarly, mark memory at _qzz_addr as addressible and defined
+   for _qzz_len bytes. */
+#define VALGRIND_MAKE_READABLE(_qzz_addr,_qzz_len)               \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res,1003,_qzz_addr,_qzz_len);   \
+    _qzz_res;                                                    \
+   })
+
+/* Discard a block-description-handle obtained from the above three
+   macros.  After this, Valgrind will no longer be able to relate
+   addressing errors to the user-defined block associated with the
+   handle.  The permissions settings associated with the handle remain
+   in place.  Returns 1 for an invalid handle, 0 for a valid
+   handle. */
+#define VALGRIND_DISCARD(_qzz_blkindex)                          \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res,2004,0,_qzz_blkindex);      \
+    _qzz_res;                                                    \
+   })
+
+
+
+/* Client-code macros to check the state of memory. */
+
+/* Check that memory at _qzz_addr is addressible for _qzz_len bytes.
+   If suitable addressibility is not established, Valgrind prints an
+   error message and returns the address of the first offending byte.
+   Otherwise it returns zero. */
+#define VALGRIND_CHECK_WRITABLE(_qzz_addr,_qzz_len)              \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res,2002,_qzz_addr,_qzz_len);   \
+    _qzz_res;                                                    \
+   })
+
+/* Check that memory at _qzz_addr is addressible and defined for
+   _qzz_len bytes.  If suitable addressibility and definedness are not
+   established, Valgrind prints an error message and returns the
+   address of the first offending byte.  Otherwise it returns zero. */
+#define VALGRIND_CHECK_READABLE(_qzz_addr,_qzz_len)              \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res,2003,_qzz_addr,_qzz_len);   \
+    _qzz_res;                                                    \
+   })
+
+
+/* Use this macro to force the definedness and addressibility of a
+   value to be checked.  If suitable addressibility and definedness
+   are not established, Valgrind prints an error message and returns
+   the address of the first offending byte.  Otherwise it returns
+   zero. */
+#define VALGRIND_CHECK_DEFINED(__lvalue)                         \
+   (void)                                                        \
+   VALGRIND_CHECK_READABLE(                                      \
+      (volatile unsigned char *)&(__lvalue),                     \
+                      (unsigned int)(sizeof (__lvalue)))
+
+
+
+/* Mark memory, intended to be on the client's stack, at _qzz_addr as
+   unaddressible and undefined for _qzz_len bytes.  Does not return a
+   value.  The record associated with this setting will be
+   automatically removed by Valgrind when the containing routine
+   exits. */
+#define VALGRIND_MAKE_NOACCESS_STACK(_qzz_addr,_qzz_len)         \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res,3001,_qzz_addr,_qzz_len);   \
+    _qzz_res;                                                    \
+   })
+
+
+#endif
diff --git a/valgrind.in b/valgrind.in
new file mode 100755
index 000000000..f791c663a
--- /dev/null
+++ b/valgrind.in
@@ -0,0 +1,167 @@
+#!/bin/sh
+
+# Should point to the installation directory
+prefix="@prefix@"
+exec_prefix="@exec_prefix@"
+VALGRIND="@libdir@/valgrind"
+
+
+# Other stuff ...
+version="@VERSION@"
+emailto="jseward@acm.org"
+
+# The default name of the suppressions file
+vgsupp="--suppressions=$VALGRIND/default.supp"
+
+# name we were invoked with
+vgname=`echo $0 | sed 's,^.*/,,'`
+
+# Valgrind options
+vgopts=
+
+# Prog and arg to run
+argopts=
+
+# Show usage info?
+dousage=0
+
+# show version info?
+doversion=0
+
+# Collect up args for Valgrind
+for arg
+do
+  case "$arg" in
+#   options for the user
+    --help)                 dousage=1; break;;
+    --version)              doversion=1; break;;
+    --logfile-fd=*)         vgopts="$vgopts $arg"; shift;;
+    -v)                     vgopts="$vgopts $arg"; shift;;
+    --verbose)              vgopts="$vgopts -v"; shift;;
+    -q)                     vgopts="$vgopts $arg"; shift;;
+    --quiet)                vgopts="$vgopts $arg"; shift;;
+    --gdb-attach=no)        vgopts="$vgopts $arg"; shift;;
+    --gdb-attach=yes)       vgopts="$vgopts $arg"; shift;;
+    --demangle=no)          vgopts="$vgopts $arg"; shift;;
+    --demangle=yes)         vgopts="$vgopts $arg"; shift;;
+    --num-callers=*)        vgopts="$vgopts $arg"; shift;;
+    --partial-loads-ok=no)  vgopts="$vgopts $arg"; shift;;
+    --partial-loads-ok=yes) vgopts="$vgopts $arg"; shift;;
+    --leak-check=no)        vgopts="$vgopts $arg"; shift;;
+    --leak-check=yes)       vgopts="$vgopts $arg"; shift;;
+    --show-reachable=no)    vgopts="$vgopts $arg"; shift;;
+    --show-reachable=yes)   vgopts="$vgopts $arg"; shift;;
+    --leak-resolution=low)  vgopts="$vgopts $arg"; shift;;
+    --leak-resolution=med)  vgopts="$vgopts $arg"; shift;;
+    --leak-resolution=high) vgopts="$vgopts $arg"; shift;;
+    --sloppy-malloc=no)     vgopts="$vgopts $arg"; shift;;
+    --sloppy-malloc=yes)    vgopts="$vgopts $arg"; shift;;
+    --trace-children=no)    vgopts="$vgopts $arg"; shift;;
+    --trace-children=yes)   vgopts="$vgopts $arg"; shift;;
+    --workaround-gcc296-bugs=no)    vgopts="$vgopts $arg"; shift;;
+    --workaround-gcc296-bugs=yes)   vgopts="$vgopts $arg"; shift;;
+    --freelist-vol=*)       vgopts="$vgopts $arg"; shift;;
+    --suppressions=*)       vgopts="$vgopts $arg"; shift;;
+#   options for debugging Valgrind
+    --sanity-level=*)       vgopts="$vgopts $arg"; shift;;
+    --single-step=yes)      vgopts="$vgopts $arg"; shift;;
+    --single-step=no)       vgopts="$vgopts $arg"; shift;;
+    --optimise=yes)         vgopts="$vgopts $arg"; shift;;
+    --optimise=no)          vgopts="$vgopts $arg"; shift;;
+    --instrument=yes)       vgopts="$vgopts $arg"; shift;;
+    --instrument=no)        vgopts="$vgopts $arg"; shift;;
+    --cleanup=yes)          vgopts="$vgopts $arg"; shift;;
+    --cleanup=no)           vgopts="$vgopts $arg"; shift;;
+    --client-perms=yes)     vgopts="$vgopts $arg"; shift;;
+    --client-perms=no)      vgopts="$vgopts $arg"; shift;;
+    --smc-check=none)       vgopts="$vgopts $arg"; shift;;
+    --smc-check=some)       vgopts="$vgopts $arg"; shift;;
+    --smc-check=all)        vgopts="$vgopts $arg"; shift;;
+    --trace-syscalls=yes)   vgopts="$vgopts $arg"; shift;;
+    --trace-syscalls=no)    vgopts="$vgopts $arg"; shift;;
+    --trace-signals=yes)    vgopts="$vgopts $arg"; shift;;
+    --trace-signals=no)     vgopts="$vgopts $arg"; shift;;
+    --trace-symtab=yes)     vgopts="$vgopts $arg"; shift;;
+    --trace-symtab=no)      vgopts="$vgopts $arg"; shift;;
+    --trace-malloc=yes)     vgopts="$vgopts $arg"; shift;;
+    --trace-malloc=no)      vgopts="$vgopts $arg"; shift;;
+    --stop-after=*)         vgopts="$vgopts $arg"; shift;;
+    --dump-error=*)         vgopts="$vgopts $arg"; shift;;
+    -*)                     dousage=1; break;;
+    *)                      break;;
+  esac
+done
+
+# Collect up the prog and args to run
+for arg
+do
+  case "$arg" in
+   *)     argopts="$argopts $arg"; shift;;
+  esac
+done
+
+if [ z"$doversion" = z1 ]; then
+   echo "valgrind-$version"
+   exit 1
+fi
+
+if [ z"$argopts" = z   -o   z"$dousage" = z1 ]; then
+   echo
+   echo "usage: $vgname [options] prog-and-args"
+   echo
+   echo "  options for the user, with defaults in [ ], are:"
+   echo "    --help                    show this message"
+   echo "    --version                 show version"
+   echo "    -q --quiet                run silently; only print error msgs"
+   echo "    -v --verbose              be more verbose, incl counts of errors"
+   echo "    --gdb-attach=no|yes       start GDB when errors detected? [no]"
+   echo "    --demangle=no|yes         automatically demangle C++ names? [yes]"
+   echo "    --num-callers=<number>    show <num> callers in stack traces [4]"
+   echo "    --partial-loads-ok=no|yes too hard to explain here; see manual [yes]"
+   echo "    --leak-check=no|yes       search for memory leaks at exit? [no]"
+   echo "    --leak-resolution=low|med|high"
+   echo "                              amount of bt merging in leak check [low]"
+   echo "    --show-reachable=no|yes   show reachable blocks in leak check? [no]"
+   echo "    --sloppy-malloc=no|yes    round malloc sizes to next word? [no]"
+   echo "    --trace-children=no|yes   Valgrind-ise child processes? [no]"
+   echo "    --logfile-fd=<number>     file descriptor for messages [2=stderr]"
+   echo "    --freelist-vol=<number>   volume of freed blocks queue [1000000]"
+   echo "    --workaround-gcc296-bugs=no|yes  self explanatory [no]"
+   echo "    --suppressions=<filename> suppress errors described in"
+   echo "                              suppressions file <filename>"
+   echo "    --client-perms=no|yes     handle client VG_MAKE_* requests? [no]"
+   echo
+   echo "  options for debugging Valgrind itself are:"
+   echo "    --sanity-level=<number>   level of sanity checking to do [1]"
+   echo "    --single-step=no|yes      translate each instr separately? [no]"
+   echo "    --optimise=no|yes         improve intermediate code? [yes]"
+   echo "    --instrument=no|yes       actually do memory checks? [yes]"
+   echo "    --cleanup=no|yes          improve after instrumentation? [yes]"
+   echo "    --smc-check=none|some|all check writes for s-m-c? [some]"
+   echo "    --trace-syscalls=no|yes   show all system calls? [no]"
+   echo "    --trace-signals=no|yes    show signal handling details? [no]"
+   echo "    --trace-symtab=no|yes     show symbol table details? [no]"
+   echo "    --trace-malloc=no|yes     show client malloc details? [no]"
+   echo "    --stop-after=<number>     switch to real CPU after executing"
+   echo "                              <number> basic blocks [infinity]"
+   echo "    --dump-error=<number>     show translation for basic block"
+   echo "                              associated with <number>'th"
+   echo "                              error context [0=don't show any]"
+   echo
+   echo "  Extra options are read from env variable \$VALGRIND_OPTS"
+   echo
+   echo "  Valgrind is Copyright (C) 2000-2002 Julian Seward"
+   echo "  and licensed under the GNU General Public License, version 2."
+   echo "  Bug reports, feedback, admiration, abuse, etc, to: $emailto."
+   echo
+   exit 1
+fi
+
+
+VG_ARGS="$VALGRIND_OPTS $vgsupp $vgopts"
+export VG_ARGS
+LD_PRELOAD=$VALGRIND/valgrind.so:$LD_PRELOAD
+export LD_PRELOAD
+exec $argopts
+
+
diff --git a/vg_clientmalloc.c b/vg_clientmalloc.c
new file mode 100644
index 000000000..d2be752d0
--- /dev/null
+++ b/vg_clientmalloc.c
@@ -0,0 +1,937 @@
+
+/*--------------------------------------------------------------------*/
+/*--- An implementation of malloc/free for the client.             ---*/
+/*---                                            vg_clientmalloc.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Defns                                                ---*/
+/*------------------------------------------------------------*/
+
+/* #define DEBUG_CLIENTMALLOC */
+
+/* Holds malloc'd but not freed blocks. */
+#define VG_MALLOCLIST_NO(aa) (((UInt)(aa)) % VG_N_MALLOCLISTS)
+static ShadowChunk* vg_malloclist[VG_N_MALLOCLISTS];
+static Bool         vg_client_malloc_init_done = False;
+
+/* Holds blocks after freeing. */
+static ShadowChunk* vg_freed_list_start   = NULL;
+static ShadowChunk* vg_freed_list_end     = NULL;
+static Int          vg_freed_list_volume  = 0;
+
+/* Stats ... */
+static UInt         vg_cmalloc_n_mallocs  = 0;
+static UInt         vg_cmalloc_n_frees    = 0;
+static UInt         vg_cmalloc_bs_mallocd = 0;
+
+static UInt         vg_mlist_frees = 0;
+static UInt         vg_mlist_tries = 0;
+
+
+/*------------------------------------------------------------*/
+/*--- Fns                                                  ---*/
+/*------------------------------------------------------------*/
+
+/* Allocate a suitably-sized array, copy all the malloc-d block
+   shadows into it, and return both the array and the size of it.
+   This is used by the memory-leak detector.
+*/
+ShadowChunk** VG_(get_malloc_shadows) ( /*OUT*/ UInt* n_shadows )
+{
+   UInt          i, scn;
+   ShadowChunk** arr;
+   ShadowChunk*  sc;
+   *n_shadows = 0;
+   for (scn = 0; scn < VG_N_MALLOCLISTS; scn++) {
+      for (sc = vg_malloclist[scn]; sc != NULL; sc = sc->next) {
+         (*n_shadows)++;
+      }
+   }
+   if (*n_shadows == 0) return NULL;
+
+   arr = VG_(malloc)( VG_AR_PRIVATE, 
+                      *n_shadows * sizeof(ShadowChunk*) );
+
+   i = 0;
+   for (scn = 0; scn < VG_N_MALLOCLISTS; scn++) {
+      for (sc = vg_malloclist[scn]; sc != NULL; sc = sc->next) {
+         arr[i++] = sc;
+      }
+   }
+   vg_assert(i == *n_shadows);
+   return arr;
+}
+
+static void client_malloc_init ( void )
+{
+   UInt ml_no;
+   if (vg_client_malloc_init_done) return;
+   for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++)
+      vg_malloclist[ml_no] = NULL;
+   vg_client_malloc_init_done = True;
+}
+
+
+static __attribute__ ((unused))
+       Int count_freelist ( void )
+{
+   ShadowChunk* sc;
+   Int n = 0;
+   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
+      n++;
+   return n;
+}
+
+static __attribute__ ((unused))
+       Int count_malloclists ( void )
+{
+   ShadowChunk* sc;
+   UInt ml_no;
+   Int  n = 0;
+   for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) 
+      for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next)
+         n++;
+   return n;
+}
+
+static __attribute__ ((unused))
+       void freelist_sanity ( void )
+{
+   ShadowChunk* sc;
+   Int n = 0;
+   /* VG_(printf)("freelist sanity\n"); */
+   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
+      n += sc->size;
+   vg_assert(n == vg_freed_list_volume);
+}
+
+/* Remove sc from malloc list # sc.  It is an unchecked error for
+   sc not to be present in the list. 
+*/
+static void remove_from_malloclist ( UInt ml_no, ShadowChunk* sc )
+{
+   ShadowChunk *sc1, *sc2;
+   if (sc == vg_malloclist[ml_no]) {
+      vg_malloclist[ml_no] = vg_malloclist[ml_no]->next;
+   } else {
+      sc1 = vg_malloclist[ml_no];
+      vg_assert(sc1 != NULL);
+      sc2 = sc1->next;
+      while (sc2 != sc) {
+         vg_assert(sc2 != NULL);
+         sc1 = sc2;
+         sc2 = sc2->next;
+      }
+      vg_assert(sc1->next == sc);
+      vg_assert(sc2 == sc);
+      sc1->next = sc2->next;
+   }
+}
+
+
+/* Put a shadow chunk on the freed blocks queue, possibly freeing up
+   some of the oldest blocks in the queue at the same time. */
+
+static void add_to_freed_queue ( ShadowChunk* sc )
+{
+   ShadowChunk* sc1;
+
+   /* Put it at the end of the freed list */
+   if (vg_freed_list_end == NULL) {
+      vg_assert(vg_freed_list_start == NULL);
+      vg_freed_list_end = vg_freed_list_start = sc;
+      vg_freed_list_volume = sc->size;
+   } else {
+      vg_assert(vg_freed_list_end->next == NULL);
+      vg_freed_list_end->next = sc;
+      vg_freed_list_end = sc;
+      vg_freed_list_volume += sc->size;
+   }
+   sc->next = NULL;
+
+   /* Release enough of the oldest blocks to bring the free queue
+      volume below vg_clo_freelist_vol. */
+
+   while (vg_freed_list_volume > VG_(clo_freelist_vol)) {
+      /* freelist_sanity(); */
+      vg_assert(vg_freed_list_start != NULL);
+      vg_assert(vg_freed_list_end != NULL);
+
+      sc1 = vg_freed_list_start;
+      vg_freed_list_volume -= sc1->size;
+      /* VG_(printf)("volume now %d\n", vg_freed_list_volume); */
+      vg_assert(vg_freed_list_volume >= 0);
+
+      if (vg_freed_list_start == vg_freed_list_end) {
+         vg_freed_list_start = vg_freed_list_end = NULL;
+      } else {
+         vg_freed_list_start = sc1->next;
+      }
+      sc1->next = NULL; /* just paranoia */
+      VG_(free)(VG_AR_CLIENT,  (void*)(sc1->data));
+      VG_(free)(VG_AR_PRIVATE, sc1);
+   }
+}
+
+
+/* Allocate a user-chunk of size bytes.  Also allocate its shadow
+   block, make the shadow block point at the user block.  Put the
+   shadow chunk on the appropriate list, and set all memory
+   protections correctly. */
+
+static ShadowChunk* client_malloc_shadow ( UInt align, UInt size, 
+                                           VgAllocKind kind )
+{
+   ShadowChunk* sc;
+   Addr         p;
+   UInt         ml_no;
+
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] client_malloc_shadow ( al %d, sz %d )\n", 
+               count_malloclists(), 
+               count_freelist(), vg_freed_list_volume,
+               align, size );
+#  endif
+
+   if (align == 0)
+      p = (Addr)VG_(malloc)(VG_AR_CLIENT, size);
+   else
+      p = (Addr)VG_(malloc_aligned)(VG_AR_CLIENT, align, size);
+
+   sc        = VG_(malloc)(VG_AR_PRIVATE, sizeof(ShadowChunk));
+   sc->where = VG_(get_ExeContext)(True);
+   sc->size  = size;
+   sc->allockind = kind;
+   sc->data  = p;
+   ml_no     = VG_MALLOCLIST_NO(p);
+   sc->next  = vg_malloclist[ml_no];
+   vg_malloclist[ml_no] = sc;
+
+   VGM_(make_writable)(p, size);
+   VGM_(make_noaccess)(p + size, 
+                       VG_AR_CLIENT_REDZONE_SZB);
+   VGM_(make_noaccess)(p - VG_AR_CLIENT_REDZONE_SZB, 
+                       VG_AR_CLIENT_REDZONE_SZB);
+
+   return sc;
+}
+
+
+/* Allocate memory, noticing whether or not we are doing the full
+   instrumentation thing. */
+
+void* VG_(client_malloc) ( UInt size, UInt raw_alloc_kind )
+{
+   ShadowChunk* sc;
+   VgAllocKind kind;
+
+   VGP_PUSHCC(VgpCliMalloc);
+   client_malloc_init();
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] client_malloc ( %d, %x )\n", 
+               count_malloclists(), 
+               count_freelist(), vg_freed_list_volume,
+               size, raw_alloc_kind );
+#  endif
+   if (!VG_(clo_instrument)) {
+      VGP_POPCC;
+      return VG_(malloc) ( VG_AR_CLIENT, size );
+   }
+   switch (raw_alloc_kind) {
+      case 0x4002: kind = Vg_AllocNewVec; break;
+      case 0x4001: kind = Vg_AllocNew; break;
+      case 0x4000: /* malloc */
+      case 6666:   /* calloc */
+                   kind = Vg_AllocMalloc; break;
+      default:     /* should not happen */
+                   /* therefore we make sure it doesn't -- JRS */
+                   VG_(panic)("VG_(client_malloc): raw_alloc_kind");
+                   break; /*NOTREACHED*/
+   }
+   sc = client_malloc_shadow ( 0, size, kind );
+   VGP_POPCC;
+   return (void*)(sc->data);
+}
+
+
+void* VG_(client_memalign) ( UInt align, UInt size )
+{
+   ShadowChunk* sc;
+   VGP_PUSHCC(VgpCliMalloc);
+   client_malloc_init();
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] client_memalign ( al %d, sz %d )\n", 
+               count_malloclists(), 
+               count_freelist(), vg_freed_list_volume,
+               align, size );
+#  endif
+   if (!VG_(clo_instrument)) {
+      VGP_POPCC;
+      return VG_(malloc_aligned) ( VG_AR_CLIENT, align, size );
+   }
+   sc = client_malloc_shadow ( align, size, Vg_AllocMalloc );
+   VGP_POPCC;
+   return (void*)(sc->data);
+}
+
+
+void VG_(client_free) ( void* ptrV, UInt raw_alloc_kind )
+{
+   ShadowChunk* sc;
+   UInt         ml_no;
+   VgAllocKind  kind;
+
+   VGP_PUSHCC(VgpCliMalloc);
+   client_malloc_init();
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] client_free ( %p, %x )\n", 
+               count_malloclists(), 
+               count_freelist(), vg_freed_list_volume,
+               ptrV, raw_alloc_kind );
+#  endif
+   if (!VG_(clo_instrument)) {
+      VGP_POPCC;
+      VG_(free) ( VG_AR_CLIENT, ptrV );
+      return;
+   }
+
+   /* first, see if ptrV is one vg_client_malloc gave out. */
+   ml_no = VG_MALLOCLIST_NO(ptrV);
+   vg_mlist_frees++;
+   for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) {
+      vg_mlist_tries++;
+      if ((Addr)ptrV == sc->data)
+         break;
+   }
+
+   if (sc == NULL) {
+      VG_(record_free_error) ( (Addr)ptrV );
+      VGP_POPCC;
+      return;
+   }
+
+   switch (raw_alloc_kind) {
+      case 0x5002: kind = Vg_AllocNewVec; break;
+      case 0x5001: kind = Vg_AllocNew; break;
+      case 0x5000: 
+      default:
+         kind = Vg_AllocMalloc;
+         /* should only happen if bug in client code */
+         break;
+   }
+
+   /* check if its a matching free() / delete / delete [] */
+   if (kind != sc->allockind)
+      VG_(record_freemismatch_error) ( (Addr) ptrV );
+
+   /* Remove the shadow chunk from the mallocd list. */
+   remove_from_malloclist ( ml_no, sc );
+
+   /* Declare it inaccessible. */
+   VGM_(make_noaccess) ( sc->data - VG_AR_CLIENT_REDZONE_SZB, 
+                         sc->size + 2*VG_AR_CLIENT_REDZONE_SZB );
+   VGM_(make_noaccess) ( (Addr)sc, sizeof(ShadowChunk) );
+   sc->where = VG_(get_ExeContext)(True);
+
+   /* Put it out of harm's way for a while. */
+   add_to_freed_queue ( sc );
+   VGP_POPCC;
+}
+
+
+
+void* VG_(client_calloc) ( UInt nmemb, UInt size1 )
+{
+   ShadowChunk* sc;
+   Addr         p;
+   UInt         size, i, ml_no;
+
+   VGP_PUSHCC(VgpCliMalloc);
+   client_malloc_init();
+
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] client_calloc ( %d, %d )\n", 
+               count_malloclists(), 
+               count_freelist(), vg_freed_list_volume,
+               nmemb, size1 );
+#  endif
+
+   if (!VG_(clo_instrument)) {
+      VGP_POPCC;
+      return VG_(calloc) ( VG_AR_CLIENT, nmemb, size1 );
+   }
+
+   size      = nmemb * size1;
+   p         = (Addr)VG_(malloc)(VG_AR_CLIENT, size);
+   sc        = VG_(malloc)(VG_AR_PRIVATE, sizeof(ShadowChunk));
+   sc->where = VG_(get_ExeContext)(True);
+   sc->size  = size;
+   sc->allockind = Vg_AllocMalloc; /* its a lie - but true. eat this :) */
+   sc->data  = p;
+   ml_no     = VG_MALLOCLIST_NO(p);
+   sc->next  = vg_malloclist[ml_no];
+   vg_malloclist[ml_no] = sc;
+
+   VGM_(make_readable)(p, size);
+   VGM_(make_noaccess)(p + size, 
+                       VG_AR_CLIENT_REDZONE_SZB);
+   VGM_(make_noaccess)(p - VG_AR_CLIENT_REDZONE_SZB, 
+                       VG_AR_CLIENT_REDZONE_SZB);
+
+   for (i = 0; i < size; i++) ((UChar*)p)[i] = 0;
+
+   VGP_POPCC;
+   return (void*)p;
+}
+
+
+void* VG_(client_realloc) ( void* ptrV, UInt size_new )
+{
+   ShadowChunk *sc, *sc_new;
+   UInt         i, ml_no;
+
+   VGP_PUSHCC(VgpCliMalloc);
+   client_malloc_init();
+
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] client_realloc ( %p, %d )\n", 
+               count_malloclists(), 
+               count_freelist(), vg_freed_list_volume,
+               ptrV, size_new );
+#  endif
+
+   if (!VG_(clo_instrument)) {
+      vg_assert(ptrV != NULL && size_new != 0);
+      VGP_POPCC;
+      return VG_(realloc) ( VG_AR_CLIENT, ptrV, size_new );
+   }
+
+   /* First try and find the block. */
+   ml_no = VG_MALLOCLIST_NO(ptrV);
+   for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) {
+      if ((Addr)ptrV == sc->data)
+         break;
+   }
+  
+   if (sc == NULL) {
+      VG_(record_free_error) ( (Addr)ptrV );
+      /* Perhaps we should keep going regardless. */
+      VGP_POPCC;
+      return NULL;
+   }
+
+   if (sc->allockind != Vg_AllocMalloc) {
+      /* can not realloc a range that was allocated with new or new [] */
+      VG_(record_freemismatch_error) ( (Addr)ptrV );
+      /* but keep going anyway */
+   }
+
+   if (sc->size == size_new) {
+      /* size unchanged */
+      VGP_POPCC;
+      return ptrV;
+   }
+   if (sc->size > size_new) {
+      /* new size is smaller */
+      VGM_(make_noaccess)( sc->data + size_new, 
+                           sc->size - size_new );
+      sc->size = size_new;
+      VGP_POPCC;
+      return ptrV;
+   } else {
+      /* new size is bigger */
+      sc_new = client_malloc_shadow ( 0, size_new, Vg_AllocMalloc );
+      for (i = 0; i < sc->size; i++)
+         ((UChar*)(sc_new->data))[i] = ((UChar*)(sc->data))[i];
+      VGM_(copy_address_range_perms) ( 
+         sc->data, sc_new->data, sc->size );
+      remove_from_malloclist ( VG_MALLOCLIST_NO(sc->data), sc );
+      VGM_(make_noaccess) ( sc->data - VG_AR_CLIENT_REDZONE_SZB, 
+                            sc->size + 2*VG_AR_CLIENT_REDZONE_SZB );
+      VGM_(make_noaccess) ( (Addr)sc, sizeof(ShadowChunk) );
+      add_to_freed_queue ( sc );
+      VGP_POPCC;
+      return (void*)sc_new->data;
+   }  
+}
+
+
+void VG_(clientmalloc_done) ( void )
+{
+   UInt         nblocks, nbytes, ml_no;
+   ShadowChunk* sc;
+
+   client_malloc_init();
+
+   nblocks = nbytes = 0;
+
+   for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) {
+      for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) {
+         nblocks ++;
+         nbytes  += sc->size;
+      }
+   }
+
+   if (VG_(clo_verbosity) == 0)
+     return;
+
+   VG_(message)(Vg_UserMsg, 
+                "malloc/free: in use at exit: %d bytes in %d blocks.",
+                nbytes, nblocks);
+   VG_(message)(Vg_UserMsg, 
+                "malloc/free: %d allocs, %d frees, %d bytes allocated.",
+                vg_cmalloc_n_mallocs,
+                vg_cmalloc_n_frees, vg_cmalloc_bs_mallocd);
+   if (!VG_(clo_leak_check))
+      VG_(message)(Vg_UserMsg, 
+                   "For a detailed leak analysis,  rerun with: --leak-check=yes");
+   if (0)
+      VG_(message)(Vg_DebugMsg,
+                   "free search: %d tries, %d frees", 
+                   vg_mlist_tries, 
+                   vg_mlist_frees );
+   if (VG_(clo_verbosity) > 1)
+      VG_(message)(Vg_UserMsg, "");
+}
+
+
+/* Describe an address as best you can, for error messages,
+   putting the result in ai. */
+
+void VG_(describe_addr) ( Addr a, AddrInfo* ai )
+{
+   ShadowChunk* sc;
+   UInt         ml_no;
+   Bool         ok;
+
+   /* Perhaps it's a user-def'd block ? */
+   ok = VG_(client_perm_maybe_describe)( a, ai );
+   if (ok)
+      return;
+   /* Perhaps it's on the stack? */
+   if (VG_(is_plausible_stack_addr)(a)
+       && a >= (Addr)VG_(baseBlock)[VGOFF_(m_esp)]) {
+      ai->akind = Stack;
+      return;
+   }
+   /* Search for a freed block which might bracket it. */
+   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next) {
+      if (sc->data - VG_AR_CLIENT_REDZONE_SZB <= a
+          && a < sc->data + sc->size + VG_AR_CLIENT_REDZONE_SZB) {
+         ai->akind      = Freed;
+         ai->blksize    = sc->size;
+         ai->rwoffset   = (Int)(a) - (Int)(sc->data);
+         ai->lastchange = sc->where;
+         return;
+      }
+   }
+   /* Search for a mallocd block which might bracket it. */
+   for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) {
+      for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) {
+         if (sc->data - VG_AR_CLIENT_REDZONE_SZB <= a
+             && a < sc->data + sc->size + VG_AR_CLIENT_REDZONE_SZB) {
+            ai->akind      = Mallocd;
+            ai->blksize    = sc->size;
+            ai->rwoffset   = (Int)(a) - (Int)(sc->data);
+            ai->lastchange = sc->where;
+            return;
+         }
+      }
+   }
+   /* Clueless ... */
+   ai->akind = Unknown;
+   return;
+}
+
+/*------------------------------------------------------------*/
+/*--- Replace the C library versions with our own.  Hairy. ---*/
+/*------------------------------------------------------------*/
+
+/* Below are new versions of malloc, __builtin_new, free, 
+   __builtin_delete, calloc and realloc.
+
+   malloc, __builtin_new, free, __builtin_delete, calloc and realloc
+   can be entered either on the real CPU or the simulated one.  If on
+   the real one, this is because the dynamic linker is running the
+   static initialisers for C++, before starting up Valgrind itself.
+   In this case it is safe to route calls through to
+   VG_(malloc)/vg_free, since that is self-initialising.
+
+   Once Valgrind is initialised, vg_running_on_simd_CPU becomes True.
+   The call needs to be transferred from the simulated CPU back to the
+   real one and routed to the vg_client_* functions.  To do that, the
+   args are passed to vg_trap_here, which the simulator detects.  The
+   bogus epilogue fn call is to guarantee that gcc doesn't tailcall
+   vg_trap_here, since that would cause the simulator's detection to
+   fail -- it only checks the targets of call transfers, not jumps.
+   And of course we have to be sure gcc won't inline either the
+   vg_trap_here or vg_bogus_epilogue.  Ha ha ha.  What a mess.
+*/
+
+/* Place afterwards to guarantee it won't get inlined ... */
+static UInt vg_trap_here_WRAPPER ( UInt arg1, UInt arg2, UInt what_to_do );
+static void vg_bogus_epilogue ( void );
+
+/* ALL calls to malloc wind up here. */
+void* malloc ( UInt n )
+{
+   if (VG_(clo_trace_malloc))
+      VG_(printf)("malloc[simd=%d](%d)", 
+                  (UInt)VG_(running_on_simd_CPU), n );
+
+   if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; }
+
+   vg_cmalloc_n_mallocs ++;
+   vg_cmalloc_bs_mallocd += n;
+
+   if (VG_(running_on_simd_CPU)) {
+      UInt v = vg_trap_here_WRAPPER ( 0, n, 0x4000 );
+      vg_bogus_epilogue();
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return (void*)v;
+   } else {
+      void* v = VG_(malloc)(VG_AR_CLIENT, n);
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return (void*)v;
+   }
+}
+
+void* __builtin_new ( UInt n )
+{
+   if (VG_(clo_trace_malloc))
+      VG_(printf)("__builtin_new[simd=%d](%d)", 
+                  (UInt)VG_(running_on_simd_CPU), n );
+
+   if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; }
+
+   vg_cmalloc_n_mallocs++;
+   vg_cmalloc_bs_mallocd += n;
+
+   if (VG_(running_on_simd_CPU)) {
+      UInt v = vg_trap_here_WRAPPER ( 0, n, 0x4001 );
+      vg_bogus_epilogue();
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return (void*)v;
+   } else {
+      void* v = VG_(malloc)(VG_AR_CLIENT, n);
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return v;
+   }
+}
+
+void* __builtin_vec_new ( Int n )
+{
+   if (VG_(clo_trace_malloc))
+      VG_(printf)("__builtin_vec_new[simd=%d](%d)", 
+                  (UInt)VG_(running_on_simd_CPU), n );
+
+   if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; }
+
+   vg_cmalloc_n_mallocs++;
+   vg_cmalloc_bs_mallocd += n;
+
+   if (VG_(running_on_simd_CPU)) {
+      UInt v = vg_trap_here_WRAPPER ( 0, n, 0x4002 );
+      vg_bogus_epilogue();
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return (void*)v;
+   } else {
+      void* v = VG_(malloc)(VG_AR_CLIENT, n);
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return v;
+   }
+}
+
+void free ( void* p )
+{
+   if (VG_(clo_trace_malloc))
+      VG_(printf)("free[simd=%d](%p)\n", 
+                  (UInt)VG_(running_on_simd_CPU), p );
+   vg_cmalloc_n_frees ++;
+
+   if (p == NULL) 
+      return;
+   if (VG_(running_on_simd_CPU)) {
+      (void)vg_trap_here_WRAPPER ( 0, (UInt)p, 0x5000 );
+      vg_bogus_epilogue();
+   } else {
+      VG_(free)(VG_AR_CLIENT, p);      
+   }
+}
+
+void __builtin_delete ( void* p )
+{
+   if (VG_(clo_trace_malloc))
+      VG_(printf)("__builtin_delete[simd=%d](%p)\n", 
+                  (UInt)VG_(running_on_simd_CPU), p );
+   vg_cmalloc_n_frees ++;
+
+   if (p == NULL) 
+      return;
+   if (VG_(running_on_simd_CPU)) {
+      (void)vg_trap_here_WRAPPER ( 0, (UInt)p, 0x5001 );
+      vg_bogus_epilogue();
+   } else {
+      VG_(free)(VG_AR_CLIENT, p);
+   }
+}
+
+void __builtin_vec_delete ( void* p )
+{
+   if (VG_(clo_trace_malloc))
+       VG_(printf)("__builtin_vec_delete[simd=%d](%p)\n", 
+                   (UInt)VG_(running_on_simd_CPU), p );
+   vg_cmalloc_n_frees ++;
+
+   if (p == NULL) 
+      return;
+   if (VG_(running_on_simd_CPU)) {
+      (void)vg_trap_here_WRAPPER ( 0, (UInt)p, 0x5002 );
+      vg_bogus_epilogue();
+   } else {
+      VG_(free)(VG_AR_CLIENT, p);
+   }
+}
+
+void* calloc ( UInt nmemb, UInt size )
+{
+   if (VG_(clo_trace_malloc))
+      VG_(printf)("calloc[simd=%d](%d,%d)", 
+                  (UInt)VG_(running_on_simd_CPU), nmemb, size );
+   vg_cmalloc_n_mallocs ++;
+   vg_cmalloc_bs_mallocd += size * nmemb;
+
+   if (VG_(running_on_simd_CPU)) {
+      UInt v = vg_trap_here_WRAPPER ( nmemb, size, 6666 );
+      vg_bogus_epilogue();
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return (void*)v;
+   } else {
+      void* v = VG_(calloc)(VG_AR_CLIENT, nmemb, size);
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return v;
+   }
+}
+
+void* realloc ( void* ptrV, UInt new_size )
+{
+   if (VG_(clo_trace_malloc))
+      VG_(printf)("realloc[simd=%d](%p,%d)", 
+                  (UInt)VG_(running_on_simd_CPU), ptrV, new_size );
+
+   if (VG_(clo_sloppy_malloc)) 
+      { while ((new_size % 4) > 0) new_size++; }
+
+   vg_cmalloc_n_frees ++;
+   vg_cmalloc_n_mallocs ++;
+   vg_cmalloc_bs_mallocd += new_size;
+
+   if (ptrV == NULL)
+      return malloc(new_size);
+   if (new_size == 0) {
+      free(ptrV);
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = 0\n" );
+      return NULL;
+   }   
+   if (VG_(running_on_simd_CPU)) {
+      UInt v = vg_trap_here_WRAPPER ( (UInt)ptrV, new_size, 7777 );
+      vg_bogus_epilogue();
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return (void*)v;
+   } else {
+      void* v = VG_(realloc)(VG_AR_CLIENT, ptrV, new_size);
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return v;
+   }
+}
+
+void* memalign ( Int alignment, Int n )
+{
+   if (VG_(clo_trace_malloc))
+      VG_(printf)("memalign[simd=%d](al %d, size %d)", 
+                  (UInt)VG_(running_on_simd_CPU), alignment, n );
+
+   if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; }
+
+   vg_cmalloc_n_mallocs ++;
+   vg_cmalloc_bs_mallocd += n;
+
+   if (VG_(running_on_simd_CPU)) {
+      UInt v = vg_trap_here_WRAPPER ( alignment, n, 8888 );
+      vg_bogus_epilogue();
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return (void*)v;
+   } else {
+      void* v = VG_(malloc_aligned)(VG_AR_CLIENT, alignment, n);
+      if (VG_(clo_trace_malloc)) 
+         VG_(printf)(" = %p\n", v );
+      return (void*)v;
+   }
+}
+
+void* valloc ( Int size )
+{
+   return memalign(VKI_BYTES_PER_PAGE, size);
+}
+
+
+/* Various compatibility wrapper functions, for glibc and libstdc++. */
+void cfree ( void* p )
+{
+   free ( p );
+}
+
+void* mallinfo ( void )
+{ 
+   VG_(message)(Vg_UserMsg, 
+                "Warning: incorrectly-handled call to mallinfo()"); 
+   return NULL;
+}
+
+
+
+int mallopt ( int cmd, int value )
+{
+   /* In glibc-2.2.4, 1 denoted a successful return value for mallopt */
+   return 1;
+}
+
+
+/* Bomb out if we get any of these. */
+void pvalloc ( void )
+{ VG_(panic)("call to pvalloc\n"); }
+
+void malloc_stats ( void )
+{ VG_(panic)("call to malloc_stats\n"); }
+void malloc_usable_size ( void )
+{ VG_(panic)("call to malloc_usable_size\n"); }
+void malloc_trim ( void )
+{ VG_(panic)("call to malloc_trim\n"); }
+void malloc_get_state ( void )
+{ VG_(panic)("call to malloc_get_state\n"); }
+void malloc_set_state ( void )
+{ VG_(panic)("call to malloc_set_state\n"); }
+
+
+int __posix_memalign ( void **memptr, UInt alignment, UInt size )
+{
+    void *mem;
+
+    /* Test whether the SIZE argument is valid.  It must be a power of
+       two multiple of sizeof (void *).  */
+    if (size % sizeof (void *) != 0 || (size & (size - 1)) != 0)
+       return 22 /*EINVAL*/;
+
+    mem = memalign (alignment, size);
+
+    if (mem != NULL) {
+       *memptr = mem;
+       return 0;
+    }
+
+    return 12 /*ENOMEM*/;
+}
+
+ 
+/*------------------------------------------------------------*/
+/*--- Magic supporting hacks.                              ---*/
+/*------------------------------------------------------------*/
+
+extern UInt VG_(trap_here) ( UInt arg1, UInt arg2, UInt what_to_do );
+
+static
+UInt vg_trap_here_WRAPPER ( UInt arg1, UInt arg2, UInt what_to_do )
+{
+   /* The point of this idiocy is to make a plain, ordinary call to
+      vg_trap_here which vg_dispatch_when_CALL can spot.  Left to
+      itself, with -fpic, gcc generates "call vg_trap_here@PLT" which
+      doesn't get spotted, for whatever reason.  I guess I could check
+      _all_ control flow transfers, but that would be an undesirable
+      performance overhead. 
+
+      If you compile without -fpic, gcc generates the obvious call
+      insn, so the wrappers below will work if they just call
+      vg_trap_here.  But I don't want to rule out building with -fpic,
+      hence this hack.  Sigh.
+   */
+   UInt v;
+
+#  define WHERE_TO       VG_(trap_here)
+#  define STRINGIFY(xx)  __STRING(xx)
+
+   asm("# call to vg_trap_here\n"
+       "\t pushl %3\n"
+       "\t pushl %2\n"
+       "\t pushl %1\n"
+       "\t call  "  STRINGIFY(WHERE_TO) "\n"
+       "\t addl $12, %%esp\n"
+       "\t movl %%eax, %0\n"
+       : "=r" (v)
+       : "r" (arg1), "r" (arg2), "r" (what_to_do)
+       : "eax", "esp", "cc", "memory");
+   return v;
+
+#  undef WHERE_TO
+#  undef STRINGIFY
+}
+
+/* Last, but not least ... */
+void vg_bogus_epilogue ( void )
+{
+   /* Runs on simulated CPU only. */
+}
+
+UInt VG_(trap_here) ( UInt arg1, UInt arg2, UInt what_to_do )
+{
+   /* Calls to this fn are detected in vg_dispatch.S and are handled
+      specially.  So this fn should never be entered.  */
+   VG_(panic)("vg_trap_here called!");
+   return 0; /*NOTREACHED*/
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                        vg_clientmalloc.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_clientperms.c b/vg_clientperms.c
new file mode 100644
index 000000000..5f19e4b9c
--- /dev/null
+++ b/vg_clientperms.c
@@ -0,0 +1,364 @@
+
+/*--------------------------------------------------------------------*/
+/*--- For when the client advises Valgrind about permissions.      ---*/
+/*---                                             vg_clientperms.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+#include "vg_constants.h"
+
+
+/*------------------------------------------------------------*/
+/*--- General client block management.                     ---*/
+/*------------------------------------------------------------*/
+
+/* This is managed as an expanding array of client block descriptors.
+   Indices of live descriptors are issued to the client, so it can ask
+   to free them later.  Therefore we cannot slide live entries down
+   over dead ones.  Instead we must use free/inuse flags and scan for
+   an empty slot at allocation time.  This in turn means allocation is
+   relatively expensive, so we hope this does not happen too often. 
+*/
+
+typedef
+   enum { CG_NotInUse, CG_NoAccess, CG_Writable, CG_Readable }
+   CGenBlockKind;
+
+typedef
+   struct {
+      Addr          start;
+      UInt          size;
+      ExeContext*   where;
+      CGenBlockKind kind;
+   } 
+   CGenBlock;
+
+/* This subsystem is self-initialising. */
+static UInt       vg_cgb_size = 0;
+static UInt       vg_cgb_used = 0;
+static CGenBlock* vg_cgbs     = NULL;
+
+/* Stats for this subsystem. */
+static UInt vg_cgb_used_MAX = 0;   /* Max in use. */
+static UInt vg_cgb_allocs   = 0;   /* Number of allocs. */
+static UInt vg_cgb_discards = 0;   /* Number of discards. */
+static UInt vg_cgb_search   = 0;   /* Number of searches. */
+
+
+static
+Int vg_alloc_client_block ( void )
+{
+   Int        i, sz_new;
+   CGenBlock* cgbs_new;
+
+   vg_cgb_allocs++;
+
+   for (i = 0; i < vg_cgb_used; i++) {
+      vg_cgb_search++;
+      if (vg_cgbs[i].kind == CG_NotInUse)
+         return i;
+   }
+
+   /* Not found.  Try to allocate one at the end. */
+   if (vg_cgb_used < vg_cgb_size) {
+      vg_cgb_used++;
+      return vg_cgb_used-1;
+   }
+
+   /* Ok, we have to allocate a new one. */
+   vg_assert(vg_cgb_used == vg_cgb_size);
+   sz_new = (vg_cgbs == NULL) ? 10 : (2 * vg_cgb_size);
+
+   cgbs_new = VG_(malloc)( VG_AR_PRIVATE, sz_new * sizeof(CGenBlock) );
+   for (i = 0; i < vg_cgb_used; i++) 
+      cgbs_new[i] = vg_cgbs[i];
+
+   if (vg_cgbs != NULL)
+      VG_(free)( VG_AR_PRIVATE, vg_cgbs );
+   vg_cgbs = cgbs_new;
+
+   vg_cgb_size = sz_new;
+   vg_cgb_used++;
+   if (vg_cgb_used > vg_cgb_used_MAX)
+      vg_cgb_used_MAX = vg_cgb_used;
+   return vg_cgb_used-1;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Stack block management.                              ---*/
+/*------------------------------------------------------------*/
+
+/* This is managed as an expanding array of CStackBlocks.  They are
+   packed up against the left-hand end of the array, with no holes.
+   They are kept sorted by the start field, with the [0] having the
+   highest value.  This means it's pretty cheap to put new blocks at
+   the end, corresponding to stack pushes, since the additions put
+   blocks on in what is presumably fairly close to strictly descending
+   order.  If this assumption doesn't hold the performance
+   consequences will be horrible.
+
+   When the client's %ESP jumps back upwards as the result of a RET
+   insn, we shrink the array backwards from the end, in a
+   guaranteed-cheap linear scan.  
+*/
+
+typedef
+   struct {
+      Addr        start;
+      UInt        size;
+      ExeContext* where;
+   } 
+   CStackBlock;
+
+/* This subsystem is self-initialising. */
+static UInt         vg_csb_size = 0;
+static UInt         vg_csb_used = 0;
+static CStackBlock* vg_csbs     = NULL;
+
+/* Stats for this subsystem. */
+static UInt vg_csb_used_MAX = 0;   /* Max in use. */
+static UInt vg_csb_allocs   = 0;   /* Number of allocs. */
+static UInt vg_csb_discards = 0;   /* Number of discards. */
+static UInt vg_csb_swaps    = 0;   /* Number of searches. */
+
+static
+void vg_add_client_stack_block ( Addr aa, UInt sz )
+{
+   UInt i, sz_new;
+   CStackBlock* csbs_new;
+   vg_csb_allocs++;
+
+   /* Ensure there is space for a new block. */
+
+   if (vg_csb_used >= vg_csb_size) {
+
+      /* No; we have to expand the array. */
+      vg_assert(vg_csb_used == vg_csb_size);
+
+      sz_new = (vg_csbs == NULL) ? 10 : (2 * vg_csb_size);
+
+      csbs_new = VG_(malloc)( VG_AR_PRIVATE, sz_new * sizeof(CStackBlock) );
+      for (i = 0; i < vg_csb_used; i++) 
+        csbs_new[i] = vg_csbs[i];
+
+      if (vg_csbs != NULL)
+         VG_(free)( VG_AR_PRIVATE, vg_csbs );
+      vg_csbs = csbs_new;
+
+      vg_csb_size = sz_new;
+   }
+
+   /* Ok, we can use [vg_csb_used]. */
+   vg_csbs[vg_csb_used].start = aa;
+   vg_csbs[vg_csb_used].size  = sz;
+   vg_csbs[vg_csb_used].where = VG_(get_ExeContext) ( False );   
+   vg_csb_used++;
+
+   if (vg_csb_used > vg_csb_used_MAX)
+      vg_csb_used_MAX = vg_csb_used;
+
+   vg_assert(vg_csb_used <= vg_csb_size);
+
+   /* VG_(printf)("acsb  %p %d\n", aa, sz); */
+   VGM_(make_noaccess) ( aa, sz );
+
+   /* And make sure that they are in descending order of address. */
+   i = vg_csb_used;
+   while (i > 0 && vg_csbs[i-1].start < vg_csbs[i].start) {
+      CStackBlock tmp = vg_csbs[i-1];
+      vg_csbs[i-1] = vg_csbs[i];
+      vg_csbs[i] = tmp;
+      vg_csb_swaps++;
+   }
+
+#  if 1
+   for (i = 1; i < vg_csb_used; i++)
+      vg_assert(vg_csbs[i-1].start >= vg_csbs[i].start);
+#  endif
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Externally visible functions.                        ---*/
+/*------------------------------------------------------------*/
+
+void VG_(show_client_block_stats) ( void )
+{
+   VG_(message)(Vg_DebugMsg, 
+      "general CBs: %d allocs, %d discards, %d maxinuse, %d search",
+      vg_cgb_allocs, vg_cgb_discards, vg_cgb_used_MAX, vg_cgb_search 
+   );
+   VG_(message)(Vg_DebugMsg, 
+      "  stack CBs: %d allocs, %d discards, %d maxinuse, %d swap",
+      vg_csb_allocs, vg_csb_discards, vg_csb_used_MAX, vg_csb_swaps
+   );
+}
+
+
+Bool VG_(client_perm_maybe_describe)( Addr a, AddrInfo* ai )
+{
+   Int i;
+   /* VG_(printf)("try to identify %d\n", a); */
+
+   /* First see if it's a stack block.  We do two passes, one exact
+      and one with a bit of slop, so as to try and get the most
+      accurate fix. */
+   for (i = 0; i < vg_csb_used; i++) {
+      if (vg_csbs[i].start <= a
+          && a < vg_csbs[i].start + vg_csbs[i].size) {
+         ai->akind = UserS;
+         ai->blksize = vg_csbs[i].size;
+         ai->rwoffset  = (Int)(a) - (Int)(vg_csbs[i].start);
+         ai->lastchange = vg_csbs[i].where;
+         return True;
+      }
+   }
+
+   /* No exact match on the stack.  Re-do the stack scan with a bit of
+      slop. */
+   for (i = 0; i < vg_csb_used; i++) {
+      if (vg_csbs[i].start - 8 <= a
+          && a < vg_csbs[i].start + vg_csbs[i].size + 8) {
+         ai->akind = UserS;
+         ai->blksize = vg_csbs[i].size;
+         ai->rwoffset  = (Int)(a) - (Int)(vg_csbs[i].start);
+         ai->lastchange = vg_csbs[i].where;
+         return True;
+      }
+   }
+
+   /* No match on the stack.  Perhaps it's a general block ? */
+   for (i = 0; i < vg_cgb_used; i++) {
+      if (vg_cgbs[i].kind == CG_NotInUse) 
+         continue;
+      if (vg_cgbs[i].start - VG_AR_CLIENT_REDZONE_SZB <= a
+          && a < vg_cgbs[i].start + vg_cgbs[i].size + VG_AR_CLIENT_REDZONE_SZB) {
+         ai->akind = UserG;
+         ai->blksize = vg_cgbs[i].size;
+         ai->rwoffset  = (Int)(a) - (Int)(vg_cgbs[i].start);
+         ai->lastchange = vg_cgbs[i].where;
+         return True;
+      }
+   }
+   return False;
+}
+
+
+void VG_(delete_client_stack_blocks_following_ESP_change) ( void )
+{
+   Addr newESP;
+   if (!VG_(clo_client_perms)) return;
+   newESP = VG_(baseBlock)[VGOFF_(m_esp)];
+   while (vg_csb_used > 0 && 
+          vg_csbs[vg_csb_used-1].start + vg_csbs[vg_csb_used-1].size <= newESP) {
+      vg_csb_used--;
+      vg_csb_discards++;
+      if (VG_(clo_verbosity) > 2)
+         VG_(printf)("discarding stack block %p for %d\n", 
+            vg_csbs[vg_csb_used].start, vg_csbs[vg_csb_used].size);
+   }
+}
+
+
+UInt VG_(handle_client_request) ( UInt code, Addr aa, UInt nn )
+{
+   Int  i;
+   Bool ok;
+   Addr bad_addr;
+
+   if (VG_(clo_verbosity) > 2)
+      VG_(printf)("client request: code %d,  addr %p,  len %d\n", 
+                  code, aa, nn );
+
+   vg_assert(VG_(clo_client_perms));
+   vg_assert(VG_(clo_instrument));
+
+   switch (code) {
+      case 1001: /* make no access */
+         i = vg_alloc_client_block();
+         /* VG_(printf)("allocated %d %p\n", i, vg_cgbs); */
+         vg_cgbs[i].kind  = CG_NoAccess;
+         vg_cgbs[i].start = aa;
+         vg_cgbs[i].size  = nn;
+         vg_cgbs[i].where = VG_(get_ExeContext) ( False );
+         VGM_(make_noaccess) ( aa, nn );
+         return i;
+      case 1002: /* make writable */
+         i = vg_alloc_client_block();
+         vg_cgbs[i].kind  = CG_Writable;
+         vg_cgbs[i].start = aa;
+         vg_cgbs[i].size  = nn;
+         vg_cgbs[i].where = VG_(get_ExeContext) ( False );
+         VGM_(make_writable) ( aa, nn );
+         return i;
+      case 1003: /* make readable */
+         i = vg_alloc_client_block();
+         vg_cgbs[i].kind  = CG_Readable;
+         vg_cgbs[i].start = aa;
+         vg_cgbs[i].size  = nn;
+         vg_cgbs[i].where = VG_(get_ExeContext) ( False );
+         VGM_(make_readable) ( aa, nn );
+         return i;
+
+      case 2002: /* check writable */
+         ok = VGM_(check_writable) ( aa, nn, &bad_addr );
+         if (!ok)
+            VG_(record_user_err) ( bad_addr, True );
+         return ok ? (UInt)NULL : bad_addr;
+      case 2003: /* check readable */
+         ok = VGM_(check_readable) ( aa, nn, &bad_addr );
+         if (!ok)
+            VG_(record_user_err) ( bad_addr, False );
+         return ok ? (UInt)NULL : bad_addr;
+
+      case 2004: /* discard */
+         if (vg_cgbs == NULL 
+             || nn >= vg_cgb_used || vg_cgbs[nn].kind == CG_NotInUse)
+            return 1;
+         vg_assert(nn >= 0 && nn < vg_cgb_used);
+         vg_cgbs[nn].kind = CG_NotInUse;
+         vg_cgb_discards++;
+         return 0;
+
+      case 3001: /* make noaccess stack block */
+         vg_add_client_stack_block ( aa, nn );
+         return 0;
+
+      default:
+         VG_(message)(Vg_UserMsg, 
+                      "Warning: unknown client request code %d", code);
+         return 1;
+   }
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                         vg_clientperms.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_constants.h b/vg_constants.h
new file mode 100644
index 000000000..ef48ef009
--- /dev/null
+++ b/vg_constants.h
@@ -0,0 +1,105 @@
+
+/*--------------------------------------------------------------------*/
+/*--- A header file containing constants (for assembly code).      ---*/
+/*---                                               vg_constants.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#ifndef __VG_CONSTANTS_H
+#define __VG_CONSTANTS_H
+
+
+/* This file is included in all Valgrind source files, including
+   assembly ones. */
+
+/* All symbols externally visible from valgrind.so are prefixed
+   as specified here.  The prefix can be changed, so as to avoid
+   namespace conflict problems.
+*/
+#define VGAPPEND(str1,str2) str1##str2
+
+/* These macros should add different prefixes so the same base
+   name can safely be used across different macros. */
+#define VG_(str)    VGAPPEND(vgPlain_,str)
+#define VGM_(str)   VGAPPEND(vgMem_,str)
+#define VGP_(str)   VGAPPEND(vgProf_,str)
+#define VGOFF_(str) VGAPPEND(vgOff_,str)
+
+/* Reasons why the inner simulation loop might stop (i.e. why has
+   vg_dispatch_ctr reached zero? */
+#define VG_Y_SIGCHECK   0     /* signal check due */
+#define VG_Y_SMC        1     /* write to code detected */
+#define VG_Y_EXIT       2     /* natural or debug end to simulation */
+#define VG_Y_TRANSLATE  3     /* translation of vg_m_eip needed */
+
+/* Check for pending signals every this-many jumps.  Since this
+   happens in the region of once per millisecond, we also take the
+   opportunity do do a bit of quick sanity checking at the same time.
+   Look at the call sites of VG_(deliver_signals). */
+#define VG_SIGCHECK_INTERVAL   1000
+
+/* A ,agic values that %ebp might be set to when returning to the
+   dispatcher.  The only other legitimate value is to point to the
+   start of VG_(baseBlock). */
+#define VG_EBP_DISPATCH_CHECKED 17
+
+/* Debugging hack for assembly code ... sigh. */
+#if 0
+#define OYNK(nnn) pushal;  pushl $nnn; call VG_(oynk) ; addl $4,%esp; popal
+#else
+#define OYNK(nnn)
+#endif
+
+#if 1
+#define OYNNK(nnn) pushal;  pushl $nnn; call VG_(oynk) ; addl $4,%esp; popal
+#else
+#define OYNNK(nnn)
+#endif
+
+/* Constants for the fast translation lookup cache. */
+#define VG_TT_FAST_BITS 15
+#define VG_TT_FAST_SIZE (1 << VG_TT_FAST_BITS)
+#define VG_TT_FAST_MASK ((VG_TT_FAST_SIZE) - 1)
+
+/* Constants for the fast original-code-write check cache. */
+
+/* Usually you want this to be zero. */
+#define VG_SMC_FASTCHECK_IN_C 0
+
+#define VG_SMC_CACHE_BITS  19
+#define VG_SMC_CACHE_SIZE  (1 << VG_SMC_CACHE_BITS)
+#define VG_SMC_CACHE_MASK  ((VG_SMC_CACHE_SIZE) - 1)
+
+#define VG_SMC_CACHE_SHIFT 6
+
+
+#endif /* ndef __VG_INCLUDE_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                           vg_constants.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_demangle.c b/vg_demangle.c
new file mode 100644
index 000000000..cb141f60a
--- /dev/null
+++ b/vg_demangle.c
@@ -0,0 +1,70 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Demangling of C++ mangled names.                             ---*/
+/*---                                                vg_demangle.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+#include "demangle.h"
+
+#define ADD_TO_RESULT(zzstr,zzn)                   \
+{                                                  \
+   Char* zz = (zzstr);                             \
+   Int nn = (zzn);                                 \
+   Int ii;                                         \
+   for (ii = 0; ii < nn; ii++) {                   \
+      result[n_result] = zz[ii];                   \
+      if (n_result < result_size-1) n_result++;    \
+      result[n_result] = 0;                        \
+   }                                               \
+}
+
+void VG_(demangle) ( Char* orig, Char* result, Int result_size )
+{
+   Int   n_result = 0;
+   Char* demangled = VG_(cplus_demangle) ( orig, DMGL_ANSI | DMGL_PARAMS );
+   if (demangled) {
+      ADD_TO_RESULT(demangled, VG_(strlen)(demangled));
+      VG_(free) (VG_AR_DEMANGLE, demangled);
+   } else {
+      ADD_TO_RESULT(orig, VG_(strlen)(orig));
+   }
+
+   /* Check that the demangler isn't leaking. */
+   /* 15 Feb 02: if this assertion fails, this is not a disaster.
+      Comment it out, and let me know.  (jseward@acm.org). */
+   vg_assert(VG_(is_empty_arena)(VG_AR_DEMANGLE));
+
+   /* VG_(show_all_arena_stats)(); */
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_demangle.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_dispatch.S b/vg_dispatch.S
new file mode 100644
index 000000000..52231946e
--- /dev/null
+++ b/vg_dispatch.S
@@ -0,0 +1,379 @@
+
+##--------------------------------------------------------------------##
+##--- The core dispatch loop, for jumping to a code address.       ---##
+##---                                                vg_dispatch.S ---##
+##--------------------------------------------------------------------##
+
+/*
+  This file is part of Valgrind, an x86 protected-mode emulator 
+  designed for debugging and profiling binaries on x86-Unixes.
+
+  Copyright (C) 2000-2002 Julian Seward 
+     jseward@acm.org
+     Julian_Seward@muraroa.demon.co.uk
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_constants.h"
+
+
+/*------------------------------------------------------------*/
+/*--- The normal-case dispatch machinery.                  ---*/
+/*------------------------------------------------------------*/
+	
+/* To transfer to an (original) code address, load it into %eax and
+   jump to vg_dispatch.  This fragment of code tries to find the
+   address of the corresponding translation by searching the translation
+   table.   If it fails, a new translation is made, added to the
+   translation table, and then jumped to.  Almost all the hard
+   work is done by C routines; this code simply handles the
+   common case fast -- when the translation address is found in
+   the translation cache.
+
+   At entry, %eax is the only live (real-machine) register; the
+   entire simulated state is tidily saved in vg_m_state.  
+*/
+
+	
+/* The C world needs a way to get started simulating.  So we provide
+   a function void vg_run_innerloop ( void ), which starts running
+   from vg_m_eip, and exits when the counter reaches zero.  This loop
+   can also exit if vg_oursignalhandler() catches a non-resumable
+   signal, for example SIGSEGV.  It then longjmp()s back past here.
+*/
+	
+.globl VG_(run_innerloop)
+VG_(run_innerloop):
+	#OYNK(1000)
+	# ----- entry point to VG_(run_innerloop) -----
+	pushal
+	# Set up the baseBlock pointer
+	movl	$VG_(baseBlock), %ebp
+
+	# fetch m_eip into %eax
+	movl	VGOFF_(m_eip), %esi
+	movl	(%ebp, %esi, 4), %eax
+	
+	# fall thru to vg_dispatch
+	
+.globl VG_(dispatch)
+VG_(dispatch):
+	# %eax holds destination (original) address
+	# To signal any kind of interruption, set vg_dispatch_ctr
+	# to 1, and vg_interrupt_reason to the appropriate value
+	# before jumping here.
+
+	# %ebp indicates further details of the control transfer
+	# requested to the address in %eax.  The idea is that we 
+	# want to check all jump targets to see if they are either
+	# VG_(signalreturn_bogusRA) or VG_(trap_here), both of which
+	# require special treatment.  However, testing all branch
+	# targets is expensive, and anyway in most cases JITter knows
+	# that a jump cannot be to either of these two.  We therefore
+	# adopt the following trick.
+	#
+	# If ebp == & VG_(baseBlock), which is what it started out as,
+	# this is a jump for which the JITter knows no check need be
+	# made.
+	# 
+	# If it is ebp == VG_EBP_DISPATCH_CHECKED, we had better make
+	# the check. 
+	#
+	# If %ebp has any other value, we panic.
+	#
+	# What the JITter assumes is that VG_(signalreturn_bogusRA) can
+	# only be arrived at from an x86 ret insn, and dually that
+	# VG_(trap_here) can only be arrived at from an x86 call insn.
+	# The net effect is that all call and return targets are checked
+	# but straightforward jumps are not.
+	#
+	# Thinks ... is this safe if the client happens to tailcall
+	# VG_(trap_here)  ?  I dont think that can happen -- if it did
+	# it would be a problem.
+	#
+	cmpl	$VG_(baseBlock), %ebp
+	jnz	dispatch_checked_maybe
+
+dispatch_unchecked:
+	# save the jump address at VG_(baseBlock)[VGOFF_(m_eip)],
+	# so that if this block takes a fault, we later know where we were.
+	movl	VGOFF_(m_eip), %esi
+	movl	%eax, (%ebp, %esi, 4)
+	
+	# do we require attention?
+	# this check has to be after the call/ret transfer checks, because
+	# we have to ensure that any control transfer following a syscall
+	# return is an ordinary transfer.  By the time we get here, we have
+	# established that the next transfer, which might get delayed till
+	# after a syscall return, is an ordinary one.
+	# All a bit subtle ...
+	#OYNK(1001)
+	decl	VG_(dispatch_ctr)
+	jz	counter_is_zero
+
+	#OYNK(1002)
+	# try a fast lookup in the translation cache
+	movl	%eax, %ebx
+	andl	$VG_TT_FAST_MASK, %ebx	
+	# ebx = tt_fast index
+	movl	VG_(tt_fast)(,%ebx,4), %ebx	
+	# ebx points at a tt entry
+	# now compare target with the tte.orig_addr field (+0)
+	cmpl	%eax, (%ebx)
+	jnz	full_search
+	# Found a match.  Set the tte.mru_epoch field (+8)
+	# and call the tte.trans_addr field (+4)
+	movl	VG_(current_epoch), %ecx
+	movl	%ecx, 8(%ebx)
+	call	*4(%ebx)
+	jmp	VG_(dispatch)
+	
+full_search:
+	#no luck?  try the full table search	
+	pushl	%eax
+	call	VG_(search_transtab)
+	addl	$4, %esp
+
+	# %eax has trans addr or zero
+	cmpl	$0, %eax
+	jz	need_translation
+	# full table search also zeroes the tte.last_use field,
+	# so we dont have to do so here.
+	call	*%eax
+	jmp	VG_(dispatch)
+
+need_translation:
+	OYNK(1003)
+	movl	$VG_Y_TRANSLATE, VG_(interrupt_reason)
+counter_is_zero:
+	OYNK(1004)
+	popal
+	# ----- (the only) exit point from VG_(run_innerloop) -----
+	# ----- unless of course vg_oursignalhandler longjmp()s
+	# ----- back through it, due to an unmanagable signal
+	ret
+
+
+/* The normal way to get back to the translation loop is to put
+   the address of the next (original) address and return.
+   However, simulation of a RET insn requires a check as to whether 
+   the next address is vg_signalreturn_bogusRA.  If so, a signal 
+   handler is returning, so we need to invoke our own mechanism to 
+   deal with that, by calling vg_signal_returns().  This restores 
+   the simulated machine state from the VgSigContext structure on 
+   the stack, including the (simulated, of course) %eip saved when 
+   the signal was delivered.  We then arrange to jump to the 
+   restored %eip.
+*/ 
+dispatch_checked_maybe:
+	# Possibly a checked dispatch.  Sanity check ...
+	cmpl	$VG_EBP_DISPATCH_CHECKED, %ebp
+	jz	dispatch_checked
+	# ebp has an invalid value ... crap out.
+	pushl	$panic_msg_ebp
+	call	VG_(panic)
+	#	(never returns)
+
+dispatch_checked:
+	OYNK(2000)
+	# first off, restore %ebp -- since it is currently wrong
+	movl	$VG_(baseBlock), %ebp
+
+	# see if we need to mess with stack blocks
+	pushl	%ebp
+	pushl	%eax
+	call	VG_(delete_client_stack_blocks_following_ESP_change)
+	popl	%eax
+	popl	%ebp
+	
+	# is this a signal return?
+	cmpl	$VG_(signalreturn_bogusRA), %eax
+	jz	dispatch_to_signalreturn_bogusRA
+	# should we intercept this call?
+	cmpl	$VG_(trap_here), %eax
+	jz	dispatch_to_trap_here
+	# ok, its not interesting.  Handle the normal way.
+	jmp	dispatch_unchecked
+
+dispatch_to_signalreturn_bogusRA:
+	OYNK(2001)
+	pushal
+	call	VG_(signal_returns)
+	popal
+	# %EIP will now point to the insn which should have followed
+	# the signal delivery.  Jump to it.  Since we no longer have any
+	# hint from the JITter about whether or not it is checkable,
+	# go via the conservative route.
+	movl	VGOFF_(m_eip), %esi
+	movl	(%ebp, %esi, 4), %eax
+	jmp	dispatch_checked
+
+	
+/* Similarly, check CALL targets to see if it is the ultra-magical
+   vg_trap_here(), and, if so, act accordingly.  See vg_clientmalloc.c.
+   Be careful not to get the real and simulated CPUs, 
+   stacks and regs mixed up ...
+*/
+dispatch_to_trap_here:
+	OYNK(111)
+	/* Considering the params to vg_trap_here(), we should have:
+	   12(%ESP) is what_to_do
+	    8(%ESP) is arg2
+	    4(%ESP) is arg1
+	    0(%ESP) is return address
+	*/
+	movl	VGOFF_(m_esp), %esi
+	movl	(%ebp, %esi, 4), %ebx
+	# %ebx now holds simulated %ESP
+	cmpl	$0x4000, 12(%ebx)
+	jz	handle_malloc
+	cmpl	$0x4001, 12(%ebx)
+	jz	handle_malloc
+	cmpl	$0x4002, 12(%ebx)
+	jz	handle_malloc
+	cmpl	$0x5000, 12(%ebx)
+	jz	handle_free
+	cmpl	$0x5001, 12(%ebx)
+	jz	handle_free
+	cmpl	$0x5002, 12(%ebx)
+	jz	handle_free
+	cmpl	$6666, 12(%ebx)
+	jz	handle_calloc
+	cmpl	$7777, 12(%ebx)
+	jz	handle_realloc
+	cmpl	$8888, 12(%ebx)
+	jz	handle_memalign
+	push	$panic_msg_trap
+	call	VG_(panic)
+	# vg_panic never returns
+
+handle_malloc:
+	# %ESP is in %ebx
+	pushl     12(%ebx)
+	pushl	8(%ebx)
+	call	VG_(client_malloc)
+	addl	$8, %esp
+	# returned value is in %eax
+	jmp	save_eax_and_simulate_RET
+	
+handle_free:
+	# %ESP is in %ebx
+	pushl	12(%ebx)
+	pushl	8(%ebx)
+	call	VG_(client_free)
+	addl	$8, %esp
+	jmp	simulate_RET
+	
+handle_calloc:
+	# %ESP is in %ebx
+	pushl	8(%ebx)
+	pushl	4(%ebx)
+	call	VG_(client_calloc)
+	addl	$8, %esp
+	# returned value is in %eax
+	jmp	save_eax_and_simulate_RET
+
+handle_realloc:
+	# %ESP is in %ebx
+	pushl	8(%ebx)
+	pushl	4(%ebx)
+	call	VG_(client_realloc)
+	addl	$8, %esp
+	# returned value is in %eax
+	jmp	save_eax_and_simulate_RET
+
+handle_memalign:
+	# %ESP is in %ebx
+	pushl	8(%ebx)
+	pushl	4(%ebx)
+	call	VG_(client_memalign)
+	addl	$8, %esp
+	# returned value is in %eax
+	jmp	save_eax_and_simulate_RET
+
+save_eax_and_simulate_RET:
+	movl	VGOFF_(m_eax), %esi
+	movl	%eax, (%ebp, %esi, 4)	# %eax -> %EAX
+	# set %EAX bits to VALID
+	movl	VGOFF_(sh_eax), %esi
+	movl	$0x0 /* All 32 bits VALID */, (%ebp, %esi, 4)
+	# fall thru ...
+simulate_RET:
+	# standard return
+	movl	VGOFF_(m_esp), %esi
+	movl	(%ebp, %esi, 4), %ebx	# %ESP -> %ebx
+	movl	0(%ebx), %eax		# RA -> %eax
+	addl	$4, %ebx		# %ESP += 4
+	movl	%ebx, (%ebp, %esi, 4)	# %ebx -> %ESP
+	jmp	dispatch_checked	# jump to %eax
+
+.data
+panic_msg_trap:
+.ascii	"dispatch_to_trap_here: unknown what_to_do"
+.byte	0
+panic_msg_ebp:
+.ascii	"vg_dispatch: %ebp has invalid value!"
+.byte	0
+.text	
+
+	
+/*------------------------------------------------------------*/
+/*--- A helper for delivering signals when the client is   ---*/
+/*--- (presumably) blocked in a system call.               ---*/
+/*------------------------------------------------------------*/
+
+/* Returns, in %eax, the next orig_addr to run.
+   The caller needs to decide whether the returned orig_addr
+   requires special handling.
+ 
+   extern Addr VG_(run_singleton_translation) ( Addr trans_addr )
+*/
+	
+/* should we take care to save the FPU state here? */
+	
+.globl VG_(run_singleton_translation)
+VG_(run_singleton_translation):
+	movl    4(%esp), %eax		# eax = trans_addr
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebp
+
+	# set up ebp correctly for translations
+	movl	$VG_(baseBlock), %ebp
+
+	# run the translation
+	call	*%eax
+
+	# next orig_addr is correctly in %eax already
+	
+	popl	%ebp
+	popl	%edi
+	popl	%esi
+	popl	%edx
+	popl	%ecx
+	popl	%ebx
+	
+        ret
+
+##--------------------------------------------------------------------##
+##--- end                                            vg_dispatch.S ---##
+##--------------------------------------------------------------------##
diff --git a/vg_errcontext.c b/vg_errcontext.c
new file mode 100644
index 000000000..42e09b53a
--- /dev/null
+++ b/vg_errcontext.c
@@ -0,0 +1,1070 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Management of error messages.                vg_errcontext.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+#include "vg_constants.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Defns                                                ---*/
+/*------------------------------------------------------------*/
+
+/* Suppression is a type describing an error which we want to
+   suppress, ie, not show the user, usually because it is caused by a
+   problem in a library which we can't fix, replace or work around.
+   Suppressions are read from a file at startup time, specified by
+   vg_clo_suppressions, and placed in the vg_suppressions list.  This
+   gives flexibility so that new suppressions can be added to the file
+   as and when needed. 
+*/
+typedef 
+   enum { 
+      /* Bad syscall params */
+      Param, 
+      /* Use of invalid values of given size */
+      Value0, Value1, Value2, Value4, Value8, 
+      /* Invalid read/write attempt at given size */
+      Addr1, Addr2, Addr4, Addr8,
+      /* Invalid or mismatching free */
+      FreeS
+   } 
+   SuppressionKind;
+
+
+/* For each caller specified for a suppression, record the nature of
+   the caller name. */
+typedef
+   enum { 
+      /* Name is of an shared object file. */
+      ObjName,
+      /* Name is of a function. */
+      FunName 
+   }
+   SuppressionLocTy;
+
+
+/* A complete suppression record. */
+typedef
+   struct _Suppression {
+      struct _Suppression* next;
+      /* The number of times this error has been suppressed. */
+      Int count;
+      /* The name by which the suppression is referred to. */
+      Char* sname;
+      /* What kind of suppression. */
+      SuppressionKind skind;
+      /* Name of syscall param if skind==Param */
+      Char* param;
+      /* Name of fn where err occurs, and immediate caller (mandatory). */
+      SuppressionLocTy caller0_ty;
+      Char*            caller0;
+      SuppressionLocTy caller1_ty;
+      Char*            caller1;
+      /* Optional extra callers. */
+      SuppressionLocTy caller2_ty;
+      Char*            caller2;
+      SuppressionLocTy caller3_ty;
+      Char*            caller3;
+   } 
+   Suppression;
+
+
+/* ErrContext is a type for recording just enough info to generate an
+   error report for an illegal memory access.  The idea is that
+   (typically) the same few points in the program generate thousands
+   of illegal accesses, and we don't want to spew out a fresh error
+   message for each one.  Instead, we use these structures to common
+   up duplicates.  
+*/
+
+/* What kind of error it is. */
+typedef 
+   enum { ValueErr, AddrErr, 
+          ParamErr, UserErr, /* behaves like an anonymous ParamErr */
+          FreeErr, FreeMismatchErr }
+   ErrKind;
+
+/* What kind of memory access is involved in the error? */
+typedef
+   enum { ReadAxs, WriteAxs, ExecAxs }
+   AxsKind;
+
+/* Top-level struct for recording errors. */
+typedef
+   struct _ErrContext {
+      /* ALL */
+      struct _ErrContext* next;
+      /* ALL */
+      /* NULL if unsuppressed; or ptr to suppression record. */
+      Suppression* supp;
+      /* ALL */
+      Int count;
+      /* ALL */
+      ErrKind ekind;
+      /* ALL */
+      ExeContext* where;
+      /* Addr */
+      AxsKind axskind;
+      /* Addr, Value */
+      Int size;
+      /* Addr, Free, Param, User */
+      Addr addr;
+      /* Addr, Free, Param, User */
+      AddrInfo addrinfo;
+      /* Param */
+      Char* syscall_param;
+      /* Param, User */
+      Bool isWriteableLack;
+   } 
+   ErrContext;
+
+/* The list of error contexts found, both suppressed and unsuppressed.
+   Initially empty, and grows as errors are detected. */
+static ErrContext* vg_err_contexts = NULL;
+
+/* The list of suppression directives, as read from the specified
+   suppressions file. */
+static Suppression* vg_suppressions = NULL;
+
+/* Running count of unsuppressed errors detected. */
+static UInt vg_n_errs_found = 0;
+
+/* Running count of suppressed errors detected. */
+static UInt vg_n_errs_suppressed = 0;
+
+/* forwards ... */
+static Suppression* is_suppressible_error ( ErrContext* ec );
+
+
+/*------------------------------------------------------------*/
+/*--- Helper fns                                           ---*/
+/*------------------------------------------------------------*/
+
+
+static void clear_AddrInfo ( AddrInfo* ai )
+{
+   ai->akind      = Unknown;
+   ai->blksize    = 0;
+   ai->rwoffset   = 0;
+   ai->lastchange = NULL;
+}
+
+static void clear_ErrContext ( ErrContext* ec )
+{
+   ec->next    = NULL;
+   ec->supp    = NULL;
+   ec->count   = 0;
+   ec->ekind   = ValueErr;
+   ec->where   = NULL;
+   ec->axskind = ReadAxs;
+   ec->size    = 0;
+   ec->addr    = 0;
+   clear_AddrInfo ( &ec->addrinfo );
+   ec->syscall_param   = NULL;
+   ec->isWriteableLack = False;
+}
+
+
+static __inline__
+Bool vg_eq_ExeContext ( Bool top_2_only,
+                        ExeContext* e1, ExeContext* e2 )
+{
+   /* Note that frames after the 4th are always ignored. */
+   if (top_2_only) {
+      return VG_(eq_ExeContext_top2(e1, e2));
+   } else {
+      return VG_(eq_ExeContext_top4(e1, e2));
+   }
+}
+
+
+static Bool eq_AddrInfo ( Bool cheap_addr_cmp,
+                          AddrInfo* ai1, AddrInfo* ai2 )
+{
+   if (ai1->akind != ai2->akind) 
+      return False;
+   if (ai1->akind == Freed || ai1->akind == Mallocd) {
+      if (ai1->blksize != ai2->blksize)
+         return False;
+      if (!vg_eq_ExeContext(cheap_addr_cmp, 
+                            ai1->lastchange, ai2->lastchange))
+         return False;
+   }
+   return True;
+}
+
+/* Compare error contexts, to detect duplicates.  Note that if they
+   are otherwise the same, the faulting addrs and associated rwoffsets
+   are allowed to be different.  */
+
+static Bool eq_ErrContext ( Bool cheap_addr_cmp,
+                            ErrContext* e1, ErrContext* e2 )
+{
+   if (e1->ekind != e2->ekind) 
+      return False;
+   if (!vg_eq_ExeContext(cheap_addr_cmp, e1->where, e2->where))
+      return False;
+
+   switch (e1->ekind) {
+      case UserErr:
+      case ParamErr:
+         if (e1->isWriteableLack != e2->isWriteableLack) return False;
+         if (e1->ekind == ParamErr 
+             && 0 != VG_(strcmp)(e1->syscall_param, e2->syscall_param))
+            return False;
+         return True;
+      case FreeErr:
+      case FreeMismatchErr:
+         if (e1->addr != e2->addr) return False;
+         if (!eq_AddrInfo(cheap_addr_cmp, &e1->addrinfo, &e2->addrinfo)) 
+            return False;
+         return True;
+      case AddrErr:
+         if (e1->axskind != e2->axskind) return False;
+         if (e1->size != e2->size) return False;
+         if (!eq_AddrInfo(cheap_addr_cmp, &e1->addrinfo, &e2->addrinfo)) 
+            return False;
+         return True;
+      case ValueErr:
+         if (e1->size != e2->size) return False;
+         return True;
+      default: 
+         VG_(panic)("eq_ErrContext");
+   }
+}
+
+static void pp_AddrInfo ( Addr a, AddrInfo* ai )
+{
+   switch (ai->akind) {
+      case Stack: 
+         VG_(message)(Vg_UserMsg, "   Address 0x%x is on the stack", a);
+         break;
+      case Unknown:
+         VG_(message)(Vg_UserMsg, 
+                      "   Address 0x%x is not stack'd, malloc'd or free'd", a);
+         break;
+      case Freed: case Mallocd: case UserG: case UserS: {
+         UInt delta;
+         UChar* relative;
+         if (ai->rwoffset < 0) {
+            delta    = (UInt)(- ai->rwoffset);
+            relative = "before";
+         } else if (ai->rwoffset >= ai->blksize) {
+            delta    = ai->rwoffset - ai->blksize;
+            relative = "after";
+         } else {
+            delta    = ai->rwoffset;
+            relative = "inside";
+         }
+         if (ai->akind == UserS) {
+            VG_(message)(Vg_UserMsg, 
+               "   Address 0x%x is %d bytes %s a %d-byte stack red-zone created",
+               a, delta, relative, 
+               ai->blksize );
+	 } else {
+            VG_(message)(Vg_UserMsg, 
+               "   Address 0x%x is %d bytes %s a block of size %d %s",
+               a, delta, relative, 
+               ai->blksize,
+               ai->akind==Mallocd ? "alloc'd" 
+                  : ai->akind==Freed ? "free'd" 
+                                     : "client-defined");
+         }
+         VG_(pp_ExeContext)(ai->lastchange);
+         break;
+      }
+      default:
+         VG_(panic)("pp_AddrInfo");
+   }
+}
+
+static void pp_ErrContext ( ErrContext* ec, Bool printCount )
+{
+   if (printCount)
+      VG_(message)(Vg_UserMsg, "Observed %d times:", ec->count );
+   switch (ec->ekind) {
+      case ValueErr:
+         if (ec->size == 0) {
+             VG_(message)(Vg_UserMsg,
+                          "Use of uninitialised CPU condition code");
+         } else {
+             VG_(message)(Vg_UserMsg,
+                          "Use of uninitialised value of size %d",
+                          ec->size);
+         }
+         VG_(pp_ExeContext)(ec->where);
+         break;
+      case AddrErr:
+         switch (ec->axskind) {
+            case ReadAxs:
+               VG_(message)(Vg_UserMsg, "Invalid read of size %d", 
+                                        ec->size ); 
+               break;
+            case WriteAxs:
+               VG_(message)(Vg_UserMsg, "Invalid write of size %d", 
+                                        ec->size ); 
+               break;
+            case ExecAxs:
+               VG_(message)(Vg_UserMsg, "Jump to the invalid address "
+                                        "stated on the next line");
+               break;
+            default: 
+               VG_(panic)("pp_ErrContext(axskind)");
+         }
+         VG_(pp_ExeContext)(ec->where);
+         pp_AddrInfo(ec->addr, &ec->addrinfo);
+         break;
+      case FreeErr:
+         VG_(message)(Vg_UserMsg,"Invalid free() / delete / delete[]");
+         /* fall through */
+      case FreeMismatchErr:
+         if (ec->ekind == FreeMismatchErr)
+            VG_(message)(Vg_UserMsg, 
+                         "Mismatched free() / delete / delete []");
+         VG_(pp_ExeContext)(ec->where);
+         pp_AddrInfo(ec->addr, &ec->addrinfo);
+         break;
+      case ParamErr:
+         if (ec->isWriteableLack) {
+            VG_(message)(Vg_UserMsg, 
+               "Syscall param %s contains unaddressable byte(s)",
+                ec->syscall_param );
+         } else {
+            VG_(message)(Vg_UserMsg, 
+                "Syscall param %s contains uninitialised or "
+                "unaddressable byte(s)",
+            ec->syscall_param);
+         }
+         VG_(pp_ExeContext)(ec->where);
+         pp_AddrInfo(ec->addr, &ec->addrinfo);
+         break;
+      case UserErr:
+         if (ec->isWriteableLack) {
+            VG_(message)(Vg_UserMsg, 
+               "Unaddressable byte(s) found during client check request");
+         } else {
+            VG_(message)(Vg_UserMsg, 
+               "Uninitialised or "
+               "unaddressable byte(s) found during client check request");
+         }
+         VG_(pp_ExeContext)(ec->where);
+         pp_AddrInfo(ec->addr, &ec->addrinfo);
+         break;
+      default: 
+         VG_(panic)("pp_ErrContext");
+   }
+}
+
+
+/* Figure out if we want to attach for GDB for this error, possibly
+   by asking the user. */
+static
+Bool vg_is_GDB_attach_requested ( void )
+{
+   Char ch, ch2;
+   Int res;
+
+   if (VG_(clo_GDB_attach) == False)
+      return False;
+
+   VG_(message)(Vg_UserMsg, "");
+
+  again:
+   VG_(printf)(
+      "==%d== "
+      "---- Attach to GDB ? --- [Return/N/n/Y/y/C/c] ---- ", 
+      VG_(getpid)()
+   );
+
+   res = VG_(read)(0 /*stdin*/, &ch, 1);
+   if (res != 1) goto ioerror;
+   /* res == 1 */
+   if (ch == '\n') return False;
+   if (ch != 'N' && ch != 'n' && ch != 'Y' && ch != 'y' 
+      && ch != 'C' && ch != 'c') goto again;
+
+   res = VG_(read)(0 /*stdin*/, &ch2, 1);
+   if (res != 1) goto ioerror;
+   if (ch2 != '\n') goto again;
+
+   /* No, don't want to attach. */
+   if (ch == 'n' || ch == 'N') return False;
+   /* Yes, want to attach. */
+   if (ch == 'y' || ch == 'Y') return True;
+   /* No, don't want to attach, and don't ask again either. */
+   vg_assert(ch == 'c' || ch == 'C');
+
+  ioerror:
+   VG_(clo_GDB_attach) = False;
+   return False;
+}
+
+
+/* Top-level entry point to the error management subsystem.  All
+   detected errors are notified here; this routine decides if/when the
+   user should see the error. */
+static void VG_(maybe_add_context) ( ErrContext* ec )
+{
+   ErrContext* p;
+   ErrContext* p_prev;
+   Bool        cheap_addr_cmp         = False;
+   static Bool is_first_shown_context = True;
+   static Bool stopping_message       = False;
+   static Bool slowdown_message       = False;
+   static Int  vg_n_errs_shown        = 0;
+
+   /* After M_VG_COLLECT_NO_ERRORS_AFTER different errors have been
+      found, just refuse to collect any more. */
+   if (vg_n_errs_shown >= M_VG_COLLECT_NO_ERRORS_AFTER) {
+      if (!stopping_message) {
+         VG_(message)(Vg_UserMsg, "");
+         VG_(message)(Vg_UserMsg, 
+            "More than %d errors detected.  I'm not reporting any more.",
+            M_VG_COLLECT_NO_ERRORS_AFTER);
+         VG_(message)(Vg_UserMsg, 
+            "Final error counts may be inaccurate.  Go fix your program!");
+         VG_(message)(Vg_UserMsg, "");
+         stopping_message = True;
+      }
+      return;
+   }
+
+   /* After M_VG_COLLECT_ERRORS_SLOWLY_AFTER different errors have
+      been found, be much more conservative about collecting new
+      ones. */
+   if (vg_n_errs_shown >= M_VG_COLLECT_ERRORS_SLOWLY_AFTER) {
+      cheap_addr_cmp = True;
+      if (!slowdown_message) {
+         VG_(message)(Vg_UserMsg, "");
+         VG_(message)(Vg_UserMsg, 
+            "More than %d errors detected.  Subsequent errors",
+            M_VG_COLLECT_ERRORS_SLOWLY_AFTER);
+         VG_(message)(Vg_UserMsg, 
+            "will still be recorded, but in less detail than before.");
+         slowdown_message = True;
+      }
+   }
+
+
+   /* First, see if we've got an error record matching this one. */
+   p      = vg_err_contexts;
+   p_prev = NULL;
+   while (p != NULL) {
+      if (eq_ErrContext(cheap_addr_cmp, p, ec)) {
+         /* Found it. */
+         p->count++;
+	 if (p->supp != NULL) {
+            /* Deal correctly with suppressed errors. */
+            p->supp->count++;
+            vg_n_errs_suppressed++;	 
+         } else {
+            vg_n_errs_found++;
+         }
+
+         /* Move p to the front of the list so that future searches
+            for it are faster. */
+         if (p_prev != NULL) {
+            vg_assert(p_prev->next == p);
+            p_prev->next    = p->next;
+            p->next         = vg_err_contexts;
+            vg_err_contexts = p;
+	 }
+         return;
+      }
+      p_prev = p;
+      p      = p->next;
+   }
+
+   /* Didn't see it.  Copy and add. */
+
+   /* OK, we're really going to collect it. */
+
+   p = VG_(malloc)(VG_AR_ERRCTXT, sizeof(ErrContext));
+   *p = *ec;
+   p->next = vg_err_contexts;
+   p->supp = is_suppressible_error(ec);
+   vg_err_contexts = p;
+   if (p->supp == NULL) {
+      vg_n_errs_found++;
+      if (!is_first_shown_context)
+         VG_(message)(Vg_UserMsg, "");
+      pp_ErrContext(p, False);      
+      is_first_shown_context = False;
+      vg_n_errs_shown++;
+      /* Perhaps we want a GDB attach at this point? */
+      if (vg_is_GDB_attach_requested()) {
+         VG_(swizzle_esp_then_start_GDB)();
+      }
+   } else {
+      vg_n_errs_suppressed++;
+      p->supp->count++;
+   }
+}
+
+
+
+
+/*------------------------------------------------------------*/
+/*--- Exported fns                                         ---*/
+/*------------------------------------------------------------*/
+
+void VG_(record_value_error) ( Int size )
+{
+   ErrContext ec;
+   clear_ErrContext( &ec );
+   ec.count = 1;
+   ec.next  = NULL;
+   ec.where = VG_(get_ExeContext)( False );
+   ec.ekind = ValueErr;
+   ec.size  = size;
+   VG_(maybe_add_context) ( &ec );
+}
+
+void VG_(record_address_error) ( Addr a, Int size, Bool isWrite )
+{
+   ErrContext ec;
+
+   /* If this is caused by an access immediately below %ESP, and the
+      user asks nicely, we just ignore it. */
+   if (VG_(clo_workaround_gcc296_bugs) && VG_(is_just_below_ESP)(a))
+      return;
+
+   clear_ErrContext( &ec );
+   ec.count   = 1;
+   ec.next    = NULL;
+   ec.where   = VG_(get_ExeContext)( False );
+   ec.ekind   = AddrErr;
+   ec.axskind = isWrite ? WriteAxs : ReadAxs;
+   ec.size    = size;
+   ec.addr    = a;
+   VG_(describe_addr) ( a, &ec.addrinfo );
+   VG_(maybe_add_context) ( &ec );
+}
+
+void VG_(record_jump_error) ( Addr a )
+{
+   ErrContext ec;
+   clear_ErrContext( &ec );
+   ec.count   = 1;
+   ec.next    = NULL;
+   ec.where   = VG_(get_ExeContext)( False );
+   ec.ekind   = AddrErr;
+   ec.axskind = ExecAxs;
+   ec.addr    = a;
+   VG_(describe_addr) ( a, &ec.addrinfo );
+   VG_(maybe_add_context) ( &ec );
+}
+
+void VG_(record_free_error) ( Addr a )
+{
+   ErrContext ec;
+   clear_ErrContext( &ec );
+   ec.count   = 1;
+   ec.next    = NULL;
+   ec.where   = VG_(get_ExeContext)( True );
+   ec.ekind   = FreeErr;
+   ec.addr    = a;
+   VG_(describe_addr) ( a, &ec.addrinfo );
+   VG_(maybe_add_context) ( &ec );
+}
+
+void VG_(record_freemismatch_error) ( Addr a )
+{
+   ErrContext ec;
+   clear_ErrContext( &ec );
+   ec.count   = 1;
+   ec.next    = NULL;
+   ec.where   = VG_(get_ExeContext)( True );
+   ec.ekind   = FreeMismatchErr;
+   ec.addr    = a;
+   VG_(describe_addr) ( a, &ec.addrinfo );
+   VG_(maybe_add_context) ( &ec );
+}
+
+void VG_(record_param_err) ( Addr a, Bool isWriteLack, Char* msg )
+{
+   ErrContext ec;
+   clear_ErrContext( &ec );
+   ec.count   = 1;
+   ec.next    = NULL;
+   ec.where   = VG_(get_ExeContext)( False );
+   ec.ekind   = ParamErr;
+   ec.addr    = a;
+   VG_(describe_addr) ( a, &ec.addrinfo );
+   ec.syscall_param = msg;
+   ec.isWriteableLack = isWriteLack;
+   VG_(maybe_add_context) ( &ec );
+}
+
+
+void VG_(record_user_err) ( Addr a, Bool isWriteLack )
+{
+   ErrContext ec;
+   clear_ErrContext( &ec );
+   ec.count   = 1;
+   ec.next    = NULL;
+   ec.where   = VG_(get_ExeContext)( False );
+   ec.ekind   = UserErr;
+   ec.addr    = a;
+   VG_(describe_addr) ( a, &ec.addrinfo );
+   ec.isWriteableLack = isWriteLack;
+   VG_(maybe_add_context) ( &ec );
+}
+
+
+void VG_(show_all_errors) ( void )
+{
+   Int         i, n_min;
+   Int         n_err_contexts, n_supp_contexts;
+   ErrContext  *p, *p_min;
+   Suppression *su;
+   Bool        any_supp;
+
+   if (VG_(clo_verbosity) == 0)
+      return;
+
+   n_err_contexts = 0;
+   for (p = vg_err_contexts; p != NULL; p = p->next) {
+      if (p->supp == NULL)
+         n_err_contexts++;
+   }
+
+   n_supp_contexts = 0;
+   for (su = vg_suppressions; su != NULL; su = su->next) {
+      if (su->count > 0)
+         n_supp_contexts++;
+   }
+
+   VG_(message)(Vg_UserMsg,
+                "ERROR SUMMARY: "
+                "%d errors from %d contexts (suppressed: %d from %d)",
+                vg_n_errs_found, n_err_contexts, 
+                vg_n_errs_suppressed, n_supp_contexts );
+
+   if (VG_(clo_verbosity) <= 1)
+      return;
+
+   /* Print the contexts in order of increasing error count. */
+   for (i = 0; i < n_err_contexts; i++) {
+      n_min = (1 << 30) - 1;
+      p_min = NULL;
+      for (p = vg_err_contexts; p != NULL; p = p->next) {
+         if (p->supp != NULL) continue;
+         if (p->count < n_min) {
+            n_min = p->count;
+            p_min = p;
+         }
+      }
+      if (p_min == NULL) VG_(panic)("pp_AllErrContexts");
+
+      VG_(message)(Vg_UserMsg, "");
+      VG_(message)(Vg_UserMsg, "%d errors in context %d of %d:",
+                   p_min->count,
+                   i+1, n_err_contexts);
+      pp_ErrContext( p_min, False );
+
+      if ((i+1 == VG_(clo_dump_error))) {
+         VG_(translate) ( p_min->where->eips[0], NULL, NULL, NULL );
+      }
+
+      p_min->count = 1 << 30;
+   } 
+
+   if (n_supp_contexts > 0) 
+      VG_(message)(Vg_DebugMsg, "");
+   any_supp = False;
+   for (su = vg_suppressions; su != NULL; su = su->next) {
+      if (su->count > 0) {
+         any_supp = True;
+         VG_(message)(Vg_DebugMsg, "supp: %4d %s", su->count, 
+                                   su->sname);
+      }
+   }
+
+   if (n_err_contexts > 0) {
+      if (any_supp) 
+         VG_(message)(Vg_UserMsg, "");
+      VG_(message)(Vg_UserMsg,
+                   "IN SUMMARY: "
+                   "%d errors from %d contexts (suppressed: %d from %d)",
+                   vg_n_errs_found, n_err_contexts, 
+                   vg_n_errs_suppressed,
+                   n_supp_contexts );
+      VG_(message)(Vg_UserMsg, "");
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Standard suppressions                                ---*/
+/*------------------------------------------------------------*/
+
+/* Get a non-blank, non-comment line of at most nBuf chars from fd.
+   Skips leading spaces on the line. Return True if EOF was hit instead. 
+*/
+
+#define VG_ISSPACE(ch) (((ch)==' ') || ((ch)=='\n') || ((ch)=='\t'))
+
+static Bool getLine ( Int fd, Char* buf, Int nBuf )
+{
+   Char ch;
+   Int  n, i;
+   while (True) {
+      /* First, read until a non-blank char appears. */
+      while (True) {
+         n = VG_(read)(fd, &ch, 1);
+         if (n == 1 && !VG_ISSPACE(ch)) break;
+         if (n == 0) return True;
+      }
+
+      /* Now, read the line into buf. */
+      i = 0;
+      buf[i++] = ch; buf[i] = 0;
+      while (True) {
+         n = VG_(read)(fd, &ch, 1);
+         if (n == 0) return False; /* the next call will return True */
+         if (ch == '\n') break;
+         if (i > 0 && i == nBuf-1) i--;
+         buf[i++] = ch; buf[i] = 0;
+      }
+      while (i > 1 && VG_ISSPACE(buf[i-1])) { 
+         i--; buf[i] = 0; 
+      };
+
+      /* VG_(printf)("The line is `%s'\n", buf); */
+      /* Ok, we have a line.  If a non-comment line, return.
+         If a comment line, start all over again. */
+      if (buf[0] != '#') return False;
+   }
+}
+
+
+/* *p_caller contains the raw name of a caller, supposedly either
+       fun:some_function_name   or
+       obj:some_object_name.
+   Set *p_ty accordingly and advance *p_caller over the descriptor
+   (fun: or obj:) part.
+   Returns False if failed.
+*/
+static Bool setLocationTy ( Char** p_caller, SuppressionLocTy* p_ty )
+{
+   if (VG_(strncmp)(*p_caller, "fun:", 4) == 0) {
+      (*p_caller) += 4;
+      *p_ty = FunName;
+      return True;
+   }
+   if (VG_(strncmp)(*p_caller, "obj:", 4) == 0) {
+      (*p_caller) += 4;
+      *p_ty = ObjName;
+      return True;
+   }
+   VG_(printf)("location should start with fun: or obj:\n");
+   return False;
+}
+
+
+/* Read suppressions from the file specified in vg_clo_suppressions
+   and place them in the suppressions list.  If there's any difficulty
+   doing this, just give up -- there's no point in trying to recover.  
+*/
+#define STREQ(s1,s2) (s1 != NULL && s2 != NULL \
+                      && VG_(strcmp)((s1),(s2))==0)
+
+static Char* copyStr ( Char* str )
+{
+   Int   n, i;
+   Char* str2;
+   n    = VG_(strlen)(str);
+   str2 = VG_(malloc)(VG_AR_PRIVATE, n+1);
+   vg_assert(n > 0);
+   for (i = 0; i < n+1; i++) str2[i] = str[i];
+   return str2;
+}
+
+static void load_one_suppressions_file ( Char* filename )
+{
+#  define N_BUF 200
+   Int  fd;
+   Bool eof;
+   Char buf[N_BUF+1];
+   fd = VG_(open_read)( filename );
+   if (fd == -1) {
+      VG_(message)(Vg_UserMsg, 
+                   "FATAL: can't open suppressions file `%s'", 
+                   filename );
+      VG_(exit)(1);
+   }
+
+   while (True) {
+      Suppression* supp;
+      supp = VG_(malloc)(VG_AR_PRIVATE, sizeof(Suppression));
+      supp->count = 0;
+      supp->param = supp->caller0 = supp->caller1 
+                  = supp->caller2 = supp->caller3 = NULL;
+
+      eof = getLine ( fd, buf, N_BUF );
+      if (eof) break;
+
+      if (!STREQ(buf, "{")) goto syntax_error;
+      
+      eof = getLine ( fd, buf, N_BUF );
+      if (eof || STREQ(buf, "}")) goto syntax_error;
+      supp->sname = copyStr(buf);
+
+      eof = getLine ( fd, buf, N_BUF );
+      if (eof) goto syntax_error;
+      else if (STREQ(buf, "Param"))  supp->skind = Param;
+      else if (STREQ(buf, "Value0")) supp->skind = Value0;
+      else if (STREQ(buf, "Value1")) supp->skind = Value1;
+      else if (STREQ(buf, "Value2")) supp->skind = Value2;
+      else if (STREQ(buf, "Value4")) supp->skind = Value4;
+      else if (STREQ(buf, "Value8")) supp->skind = Value8;
+      else if (STREQ(buf, "Addr1"))  supp->skind = Addr1;
+      else if (STREQ(buf, "Addr2"))  supp->skind = Addr2;
+      else if (STREQ(buf, "Addr4"))  supp->skind = Addr4;
+      else if (STREQ(buf, "Addr8"))  supp->skind = Addr8;
+      else if (STREQ(buf, "Free"))   supp->skind = FreeS;
+      else goto syntax_error;
+
+      if (supp->skind == Param) {
+         eof = getLine ( fd, buf, N_BUF );
+         if (eof) goto syntax_error;
+         supp->param = copyStr(buf);
+      }
+
+      eof = getLine ( fd, buf, N_BUF );
+      if (eof) goto syntax_error;
+      supp->caller0 = copyStr(buf);
+      if (!setLocationTy(&(supp->caller0), &(supp->caller0_ty)))
+         goto syntax_error;
+
+      eof = getLine ( fd, buf, N_BUF );
+      if (eof) goto syntax_error;
+      supp->caller1 = copyStr(buf);
+      if (!setLocationTy(&(supp->caller1), &(supp->caller1_ty)))
+         goto syntax_error;
+      
+      eof = getLine ( fd, buf, N_BUF );
+      if (eof) goto syntax_error;
+      if (!STREQ(buf, "}")) {
+         supp->caller2 = copyStr(buf);
+         if (!setLocationTy(&(supp->caller2), &(supp->caller2_ty)))
+            goto syntax_error;
+         eof = getLine ( fd, buf, N_BUF );
+         if (eof) goto syntax_error;
+         if (!STREQ(buf, "}")) {
+            supp->caller3 = copyStr(buf);
+            if (!setLocationTy(&(supp->caller3), &(supp->caller3_ty)))
+               goto syntax_error;
+            eof = getLine ( fd, buf, N_BUF );
+            if (eof || !STREQ(buf, "}")) goto syntax_error;
+         }
+      }
+
+      supp->next = vg_suppressions;
+      vg_suppressions = supp;
+   }
+
+   VG_(close)(fd);
+   return;
+
+  syntax_error:
+   if (eof) {
+      VG_(message)(Vg_UserMsg, 
+                   "FATAL: in suppressions file `%s': unexpected EOF", 
+                   filename );
+   } else {
+      VG_(message)(Vg_UserMsg, 
+                   "FATAL: in suppressions file `%s': syntax error on: %s", 
+                   filename, buf );
+   }
+   VG_(close)(fd);
+   VG_(message)(Vg_UserMsg, "exiting now.");
+    VG_(exit)(1);
+
+#  undef N_BUF   
+}
+
+
+void VG_(load_suppressions) ( void )
+{
+   Int i;
+   vg_suppressions = NULL;
+   for (i = 0; i < VG_(clo_n_suppressions); i++) {
+      if (VG_(clo_verbosity) > 1) {
+         VG_(message)(Vg_UserMsg, "Reading suppressions file: %s", 
+                                  VG_(clo_suppressions)[i] );
+      }
+      load_one_suppressions_file( VG_(clo_suppressions)[i] );
+   }
+}
+
+
+/* Does an error context match a suppression?  ie is this a
+   suppressible error?  If so, return a pointer to the Suppression
+   record, otherwise NULL.
+   Tries to minimise the number of calls to what_fn_is_this since they
+   are expensive.  
+*/
+static Suppression* is_suppressible_error ( ErrContext* ec )
+{
+#  define STREQ(s1,s2) (s1 != NULL && s2 != NULL \
+                        && VG_(strcmp)((s1),(s2))==0)
+
+   Char caller0_obj[M_VG_ERRTXT];
+   Char caller0_fun[M_VG_ERRTXT];
+   Char caller1_obj[M_VG_ERRTXT];
+   Char caller1_fun[M_VG_ERRTXT];
+   Char caller2_obj[M_VG_ERRTXT];
+   Char caller2_fun[M_VG_ERRTXT];
+   Char caller3_obj[M_VG_ERRTXT];
+   Char caller3_fun[M_VG_ERRTXT];
+
+   Suppression* su;
+   Int          su_size;
+
+   /* vg_what_fn_or_object_is_this returns:
+         <function_name>      or
+         <object_name>        or
+         ???
+      so the strings in the suppression file should match these.
+   */
+
+   /* Initialise these strs so they are always safe to compare, even
+      if what_fn_or_object_is_this doesn't write anything to them. */
+   caller0_obj[0] = caller1_obj[0] = caller2_obj[0] = caller3_obj[0] = 0;
+   caller0_fun[0] = caller1_fun[0] = caller2_obj[0] = caller3_obj[0] = 0;
+
+   VG_(what_obj_and_fun_is_this)
+      ( ec->where->eips[0], caller0_obj, M_VG_ERRTXT,
+                            caller0_fun, M_VG_ERRTXT );
+   VG_(what_obj_and_fun_is_this)
+      ( ec->where->eips[1], caller1_obj, M_VG_ERRTXT,
+                            caller1_fun, M_VG_ERRTXT );
+
+   if (VG_(clo_backtrace_size) > 2) {
+      VG_(what_obj_and_fun_is_this)
+         ( ec->where->eips[2], caller2_obj, M_VG_ERRTXT,
+                               caller2_fun, M_VG_ERRTXT );
+
+      if (VG_(clo_backtrace_size) > 3) {
+         VG_(what_obj_and_fun_is_this)
+            ( ec->where->eips[3], caller3_obj, M_VG_ERRTXT,
+                                  caller3_fun, M_VG_ERRTXT );
+      }
+   }
+
+   /* See if the error context matches any suppression. */
+   for (su = vg_suppressions; su != NULL; su = su->next) {
+      switch (su->skind) {
+         case FreeS:
+         case Param:  case Value0: su_size = 0; break;
+         case Value1: case Addr1:  su_size = 1; break;
+         case Value2: case Addr2:  su_size = 2; break;
+         case Value4: case Addr4:  su_size = 4; break;
+         case Value8: case Addr8:  su_size = 8; break;
+         default: VG_(panic)("errcontext_matches_suppression");
+      }
+      switch (su->skind) {
+         case Param:
+            if (ec->ekind != ParamErr) continue;
+            if (!STREQ(su->param, ec->syscall_param)) continue;
+            break;
+         case Value0: case Value1: case Value2: case Value4: case Value8:
+            if (ec->ekind != ValueErr) continue;
+            if (ec->size  != su_size)  continue;
+            break;
+         case Addr1: case Addr2: case Addr4: case Addr8:
+            if (ec->ekind != AddrErr) continue;
+            if (ec->size  != su_size) continue;
+            break;
+         case FreeS:
+            if (ec->ekind != FreeErr && ec->ekind != FreeMismatchErr) continue;
+            break;
+      }
+
+      switch (su->caller0_ty) {
+         case ObjName: if (!VG_(stringMatch)(su->caller0, 
+                                             caller0_obj)) continue;
+                       break;
+         case FunName: if (!VG_(stringMatch)(su->caller0, 
+                                             caller0_fun)) continue;
+                       break;
+         default: goto baaaad;
+      }
+
+      switch (su->caller1_ty) {
+         case ObjName: if (!VG_(stringMatch)(su->caller1, 
+                                             caller1_obj)) continue;
+                       break;
+         case FunName: if (!VG_(stringMatch)(su->caller1, 
+                                             caller1_fun)) continue;
+                       break;
+         default: goto baaaad;
+      }
+
+      if (VG_(clo_backtrace_size) > 2 && su->caller2 != NULL) {
+         switch (su->caller2_ty) {
+            case ObjName: if (!VG_(stringMatch)(su->caller2, 
+                                                caller2_obj)) continue;
+                          break;
+            case FunName: if (!VG_(stringMatch)(su->caller2, 
+                                                caller2_fun)) continue;
+                          break;
+            default: goto baaaad;
+         }
+      }
+
+      if (VG_(clo_backtrace_size) > 3 && su->caller3 != NULL) {
+         switch (su->caller3_ty) {
+            case ObjName: if (!VG_(stringMatch)(su->caller3,
+                                                caller3_obj)) continue;
+                          break;
+            case FunName: if (!VG_(stringMatch)(su->caller3, 
+                                                caller3_fun)) continue;
+                          break;
+            default: goto baaaad;
+         }
+      }
+
+      return su;
+   }
+
+   return NULL;
+
+  baaaad:
+   VG_(panic)("is_suppressible_error");
+
+#  undef STREQ
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                          vg_errcontext.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_execontext.c b/vg_execontext.c
new file mode 100644
index 000000000..759345b7a
--- /dev/null
+++ b/vg_execontext.c
@@ -0,0 +1,259 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Storage, and equality on, execution contexts (backtraces).   ---*/
+/*---                                              vg_execontext.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+#include "vg_constants.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Low-level ExeContext storage.                        ---*/
+/*------------------------------------------------------------*/
+
+/* The idea is only to ever store any one context once, so as to save
+   space and make exact comparisons faster. */
+
+static ExeContext* vg_ec_list[VG_N_EC_LISTS];
+
+/* Stats only: the number of times the system was searched to locate a
+   context. */
+static UInt vg_ec_searchreqs;
+
+/* Stats only: the number of full context comparisons done. */
+static UInt vg_ec_searchcmps;
+
+/* Stats only: total number of stored contexts. */
+static UInt vg_ec_totstored;
+
+/* Number of 2, 4 and (fast) full cmps done. */
+static UInt vg_ec_cmp2s;
+static UInt vg_ec_cmp4s;
+static UInt vg_ec_cmpAlls;
+
+
+/*------------------------------------------------------------*/
+/*--- Exported functions.                                  ---*/
+/*------------------------------------------------------------*/
+
+
+/* Initialise this subsystem. */
+void VG_(init_ExeContext_storage) ( void )
+{
+   Int i;
+   vg_ec_searchreqs = 0;
+   vg_ec_searchcmps = 0;
+   vg_ec_totstored = 0;
+   vg_ec_cmp2s = 0;
+   vg_ec_cmp4s = 0;
+   vg_ec_cmpAlls = 0;
+   for (i = 0; i < VG_N_EC_LISTS; i++)
+      vg_ec_list[i] = NULL;
+}
+
+
+/* Show stats. */
+void VG_(show_ExeContext_stats) ( void )
+{
+   VG_(message)(Vg_DebugMsg, 
+      "exectx: %d lists, %d contexts (avg %d per list)",
+      VG_N_EC_LISTS, vg_ec_totstored, 
+      vg_ec_totstored / VG_N_EC_LISTS 
+   );
+   VG_(message)(Vg_DebugMsg, 
+      "exectx: %d searches, %d full compares (%d per 1000)",
+      vg_ec_searchreqs, vg_ec_searchcmps, 
+      vg_ec_searchreqs == 0 
+         ? 0 
+         : (UInt)( (((ULong)vg_ec_searchcmps) * 1000) 
+           / ((ULong)vg_ec_searchreqs )) 
+   );
+   VG_(message)(Vg_DebugMsg, 
+      "exectx: %d cmp2, %d cmp4, %d cmpAll",
+      vg_ec_cmp2s, vg_ec_cmp4s, vg_ec_cmpAlls 
+   );
+}
+
+
+/* Print an ExeContext. */
+void VG_(pp_ExeContext) ( ExeContext* e )
+{
+   VG_(mini_stack_dump) ( e );
+}
+
+
+/* Compare two ExeContexts, comparing all callers. */
+Bool VG_(eq_ExeContext_all) ( ExeContext* e1, ExeContext* e2 )
+{
+   vg_ec_cmpAlls++;
+   /* Just do pointer comparison. */
+   if (e1 != e2) return False;
+   return True;
+}
+
+
+/* Compare two ExeContexts, just comparing the top two callers. */
+Bool VG_(eq_ExeContext_top2) ( ExeContext* e1, ExeContext* e2 )
+{
+   vg_ec_cmp2s++;
+   if (e1->eips[0] != e2->eips[0]
+       || e1->eips[1] != e2->eips[1]) return False;
+   return True;
+}
+
+
+/* Compare two ExeContexts, just comparing the top four callers. */
+Bool VG_(eq_ExeContext_top4) ( ExeContext* e1, ExeContext* e2 )
+{
+   vg_ec_cmp4s++;
+   if (e1->eips[0] != e2->eips[0]
+       || e1->eips[1] != e2->eips[1]) return False;
+
+   if (VG_(clo_backtrace_size) < 3) return True;
+   if (e1->eips[2] != e2->eips[2]) return False;
+
+   if (VG_(clo_backtrace_size) < 4) return True;
+   if (e1->eips[3] != e2->eips[3]) return False;
+
+   return True;
+}
+
+
+/* This guy is the head honcho here.  Take a snapshot of the client's
+   stack.  Search our collection of ExeContexts to see if we already
+   have it, and if not, allocate a new one.  Either way, return a
+   pointer to the context.  If there is a matching context we
+   guarantee to not allocate a new one.  Thus we never store
+   duplicates, and so exact equality can be quickly done as equality
+   on the returned ExeContext* values themselves.  Inspired by Hugs's
+   Text type.  
+*/
+ExeContext* VG_(get_ExeContext) ( Bool skip_top_frame )
+{
+   Int         i;
+   UInt        ebp;
+   Addr        eips[VG_DEEPEST_BACKTRACE];
+   Bool        same;
+   UInt        hash;
+   ExeContext* new_ec;
+   ExeContext* list;
+
+   VGP_PUSHCC(VgpExeContext);
+
+   vg_assert(VG_(clo_backtrace_size) >= 2 
+             && VG_(clo_backtrace_size) <= VG_DEEPEST_BACKTRACE);
+
+   /* First snaffle %EIPs from the client's stack into eips[0
+      .. VG_(clo_backtrace_size)-1], putting zeroes in when the trail
+      goes cold. */
+
+   for (i = 0; i < VG_(clo_backtrace_size); i++)
+      eips[i] = 0;
+   
+#  define GET_CALLER(lval)                                        \
+   if (ebp != 0 && VGM_(check_readable)(ebp, 8, NULL)) {          \
+      lval = ((UInt*)ebp)[1];  /* ret addr */                     \
+      ebp  = ((UInt*)ebp)[0];  /* old ebp */                      \
+   } else {                                                       \
+      lval = ebp = 0;                                             \
+   }
+
+   ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
+
+   if (skip_top_frame) {
+      for (i = 0; i < VG_(clo_backtrace_size); i++)
+         GET_CALLER(eips[i]);
+   } else {
+      eips[0] = VG_(baseBlock)[VGOFF_(m_eip)];
+      for (i = 1; i < VG_(clo_backtrace_size); i++)
+         GET_CALLER(eips[i]);
+   }
+#  undef GET_CALLER
+
+   /* Now figure out if we've seen this one before.  First hash it so
+      as to determine the list number. */
+
+   hash = 0;
+   for (i = 0; i < VG_(clo_backtrace_size); i++) {
+      hash ^= (UInt)eips[i];
+      hash = (hash << 29) | (hash >> 3);
+   }
+   hash = hash % VG_N_EC_LISTS;
+
+   /* And (the expensive bit) look a matching entry in the list. */
+
+   vg_ec_searchreqs++;
+
+   list = vg_ec_list[hash];
+
+   while (True) {
+      if (list == NULL) break;
+      vg_ec_searchcmps++;
+      same = True;
+      for (i = 0; i < VG_(clo_backtrace_size); i++) {
+         if (list->eips[i] != eips[i]) {
+            same = False;
+            break; 
+         }
+      }
+      if (same) break;
+      list = list->next;
+   }
+
+   if (list != NULL) {
+      /* Yay!  We found it.  */
+      VGP_POPCC;
+      return list;
+   }
+
+   /* Bummer.  We have to allocate a new context record. */
+   vg_ec_totstored++;
+
+   new_ec 
+      = VG_(malloc)( 
+           VG_AR_EXECTXT, 
+           sizeof(struct _ExeContextRec *) 
+              + VG_(clo_backtrace_size) * sizeof(Addr) 
+        );
+
+   for (i = 0; i < VG_(clo_backtrace_size); i++)
+      new_ec->eips[i] = eips[i];
+
+   new_ec->next = vg_ec_list[hash];
+   vg_ec_list[hash] = new_ec;
+
+   VGP_POPCC;
+   return new_ec;
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                          vg_execontext.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_from_ucode.c b/vg_from_ucode.c
new file mode 100644
index 000000000..4d0429355
--- /dev/null
+++ b/vg_from_ucode.c
@@ -0,0 +1,2682 @@
+
+/*--------------------------------------------------------------------*/
+/*--- The JITter: translate ucode back to x86 code.                ---*/
+/*---                                              vg_from_ucode.c ---*/
+/*--------------------------------------------------------------------*/
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Renamings of frequently-used global functions.       ---*/
+/*------------------------------------------------------------*/
+
+#define dis       VG_(disassemble)
+#define nameIReg  VG_(nameOfIntReg)
+#define nameISize VG_(nameOfIntSize)
+
+
+/*------------------------------------------------------------*/
+/*--- Instruction emission -- turning final uinstrs back   ---*/
+/*--- into x86 code.                                       ---*/
+/*------------------------------------------------------------*/
+
+/* [2001-07-08 This comment is now somewhat out of date.]
+
+   This is straightforward but for one thing: to facilitate generating
+   code in a single pass, we generate position-independent code.  To
+   do this, calls and jmps to fixed addresses must specify the address
+   by first loading it into a register, and jump to/call that
+   register.  Fortunately, the only jump to a literal is the jump back
+   to vg_dispatch, and only %eax is live then, conveniently.  Ucode
+   call insns may only have a register as target anyway, so there's no
+   need to do anything fancy for them.
+
+   The emit_* routines constitute the lowest level of instruction
+   emission.  They simply emit the sequence of bytes corresponding to
+   the relevant instruction, with no further ado.  In particular there
+   is no checking about whether uses of byte registers makes sense,
+   nor whether shift insns have their first operand in %cl, etc.
+
+   These issues are taken care of by the level above, the synth_*
+   routines.  These detect impossible operand combinations and turn
+   them into sequences of legal instructions.  Finally, emitUInstr is
+   phrased in terms of the synth_* abstraction layer.  */
+
+static UChar* emitted_code;
+static Int    emitted_code_used;
+static Int    emitted_code_size;
+
+static void expandEmittedCode ( void )
+{
+   Int    i;
+   UChar* tmp = VG_(jitmalloc)(2 * emitted_code_size);
+   /* VG_(printf)("expand to %d\n", 2 * emitted_code_size); */
+   for (i = 0; i < emitted_code_size; i++)
+      tmp[i] = emitted_code[i];
+   VG_(jitfree)(emitted_code);
+   emitted_code = tmp;
+   emitted_code_size *= 2;
+}
+
+static __inline__ void emitB ( UInt b )
+{
+   if (dis) {
+      if (b < 16) VG_(printf)("0%x ", b); else VG_(printf)("%2x ", b);
+   }
+   if (emitted_code_used == emitted_code_size)
+      expandEmittedCode();
+
+   emitted_code[emitted_code_used] = (UChar)b;
+   emitted_code_used++;
+}
+
+static __inline__ void emitW ( UInt l )
+{
+   emitB ( (l) & 0x000000FF );
+   emitB ( (l >> 8) & 0x000000FF );
+}
+
+static __inline__ void emitL ( UInt l )
+{
+   emitB ( (l) & 0x000000FF );
+   emitB ( (l >> 8) & 0x000000FF );
+   emitB ( (l >> 16) & 0x000000FF );
+   emitB ( (l >> 24) & 0x000000FF );
+}
+
+static __inline__ void newEmit ( void )
+{
+   if (dis)
+      VG_(printf)("\t       %4d: ", emitted_code_used );
+}
+
+/* Is this a callee-save register, in the normal C calling convention?  */
+#define VG_CALLEE_SAVED(reg) (reg == R_EBX || reg == R_ESI || reg == R_EDI)
+
+
+/*----------------------------------------------------*/
+/*--- Addressing modes                             ---*/
+/*----------------------------------------------------*/
+
+static __inline__ UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem )
+{
+   return ((mod & 3) << 6) | ((reg & 7) << 3) | (regmem & 7);
+}
+
+static __inline__ UChar mkSIB ( Int scale, Int regindex, Int regbase )
+{
+   Int shift;
+   switch (scale) {
+      case 1: shift = 0; break;
+      case 2: shift = 1; break;
+      case 4: shift = 2; break;
+      case 8: shift = 3; break;
+      default: VG_(panic)( "mkSIB" );
+   }
+   return ((shift & 3) << 6) | ((regindex & 7) << 3) | (regbase & 7);
+}
+
+static __inline__ void emit_amode_litmem_reg ( Addr addr, Int reg )
+{
+   /* ($ADDR), reg */
+   emitB ( mkModRegRM(0, reg, 5) );
+   emitL ( addr );
+}
+
+static __inline__ void emit_amode_regmem_reg ( Int regmem, Int reg )
+{
+   /* (regmem), reg */
+   if (regmem == R_ESP) 
+      VG_(panic)("emit_amode_regmem_reg");
+   if (regmem == R_EBP) {
+      emitB ( mkModRegRM(1, reg, 5) );
+      emitB ( 0x00 );
+   } else {
+      emitB( mkModRegRM(0, reg, regmem) );
+   }
+}
+
+static __inline__ void emit_amode_offregmem_reg ( Int off, Int regmem, Int reg )
+{
+   if (regmem == R_ESP)
+      VG_(panic)("emit_amode_offregmem_reg(ESP)");
+   if (off < -128 || off > 127) {
+      /* Use a large offset */
+      /* d32(regmem), reg */
+      emitB ( mkModRegRM(2, reg, regmem) );
+      emitL ( off );
+   } else {
+      /* d8(regmem), reg */
+      emitB ( mkModRegRM(1, reg, regmem) );
+      emitB ( off & 0xFF );
+   }
+}
+
+static __inline__ void emit_amode_sib_reg ( Int off, Int scale, Int regbase, 
+                                            Int regindex, Int reg )
+{
+   if (regindex == R_ESP)
+      VG_(panic)("emit_amode_sib_reg(ESP)");
+   if (off < -128 || off > 127) {
+      /* Use a 32-bit offset */
+      emitB ( mkModRegRM(2, reg, 4) ); /* SIB with 32-bit displacement */
+      emitB ( mkSIB( scale, regindex, regbase ) );
+      emitL ( off );
+   } else {
+      /* Use an 8-bit offset */
+      emitB ( mkModRegRM(1, reg, 4) ); /* SIB with 8-bit displacement */
+      emitB ( mkSIB( scale, regindex, regbase ) );
+      emitB ( off & 0xFF );
+   }
+}
+
+static __inline__ void emit_amode_ereg_greg ( Int e_reg, Int g_reg )
+{
+   /* other_reg, reg */
+   emitB ( mkModRegRM(3, g_reg, e_reg) );
+}
+
+static __inline__ void emit_amode_greg_ereg ( Int g_reg, Int e_reg )
+{
+   /* other_reg, reg */
+   emitB ( mkModRegRM(3, g_reg, e_reg) );
+}
+
+
+/*----------------------------------------------------*/
+/*--- Opcode translation                           ---*/
+/*----------------------------------------------------*/
+
+static __inline__ Int mkGrp1opcode ( Opcode opc )
+{
+   switch (opc) {
+      case ADD: return 0;
+      case OR:  return 1;
+      case ADC: return 2;
+      case SBB: return 3;
+      case AND: return 4;
+      case SUB: return 5;
+      case XOR: return 6;
+      default: VG_(panic)("mkGrp1opcode");
+   }
+}
+
+static __inline__ Int mkGrp2opcode ( Opcode opc )
+{
+   switch (opc) {
+      case ROL: return 0;
+      case ROR: return 1;
+      case RCL: return 2;
+      case RCR: return 3;
+      case SHL: return 4;
+      case SHR: return 5;
+      case SAR: return 7;
+      default: VG_(panic)("mkGrp2opcode");
+   }
+}
+
+static __inline__ Int mkGrp3opcode ( Opcode opc )
+{
+   switch (opc) {
+      case NOT: return 2;
+      case NEG: return 3;
+      default: VG_(panic)("mkGrp3opcode");
+   }
+}
+
+static __inline__ Int mkGrp4opcode ( Opcode opc )
+{
+   switch (opc) {
+      case INC: return 0;
+      case DEC: return 1;
+      default: VG_(panic)("mkGrp4opcode");
+   }
+}
+
+static __inline__ Int mkGrp5opcode ( Opcode opc )
+{
+   switch (opc) {
+      case CALLM: return 2;
+      case JMP:   return 4;
+      default: VG_(panic)("mkGrp5opcode");
+   }
+}
+
+static __inline__ UChar mkPrimaryOpcode ( Opcode opc )
+{
+   switch (opc) {
+      case ADD: return 0x00;
+      case ADC: return 0x10;
+      case AND: return 0x20;
+      case XOR: return 0x30;
+      case OR:  return 0x08;
+      case SBB: return 0x18;
+      case SUB: return 0x28;
+      default: VG_(panic)("mkPrimaryOpcode");
+  }
+}
+
+/*----------------------------------------------------*/
+/*--- v-size (4, or 2 with OSO) insn emitters      ---*/
+/*----------------------------------------------------*/
+
+static void emit_movv_offregmem_reg ( Int sz, Int off, Int areg, Int reg )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   emitB ( 0x8B ); /* MOV Ev, Gv */
+   emit_amode_offregmem_reg ( off, areg, reg );
+   if (dis)
+      VG_(printf)( "\n\t\tmov%c\t0x%x(%s), %s\n", 
+                   nameISize(sz), off, nameIReg(4,areg), nameIReg(sz,reg));
+}
+
+static void emit_movv_reg_offregmem ( Int sz, Int reg, Int off, Int areg )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   emitB ( 0x89 ); /* MOV Gv, Ev */
+   emit_amode_offregmem_reg ( off, areg, reg );
+   if (dis)
+      VG_(printf)( "\n\t\tmov%c\t%s, 0x%x(%s)\n", 
+                   nameISize(sz), nameIReg(sz,reg), off, nameIReg(4,areg));
+}
+
+static void emit_movv_regmem_reg ( Int sz, Int reg1, Int reg2 )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   emitB ( 0x8B ); /* MOV Ev, Gv */
+   emit_amode_regmem_reg ( reg1, reg2 );
+   if (dis)
+      VG_(printf)( "\n\t\tmov%c\t(%s), %s\n",
+                   nameISize(sz),  nameIReg(4,reg1), nameIReg(sz,reg2));
+}
+
+static void emit_movv_reg_regmem ( Int sz, Int reg1, Int reg2 )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   emitB ( 0x89 ); /* MOV Gv, Ev */
+   emit_amode_regmem_reg ( reg2, reg1 );
+   if (dis)
+      VG_(printf)( "\n\t\tmov%c\t%s, (%s)\n", 
+                   nameISize(sz), nameIReg(sz,reg1), nameIReg(4,reg2));
+}
+
+static void emit_movv_reg_reg ( Int sz, Int reg1, Int reg2 )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   emitB ( 0x89 ); /* MOV Gv, Ev */
+   emit_amode_ereg_greg ( reg2, reg1 );
+   if (dis)
+      VG_(printf)( "\n\t\tmov%c\t%s, %s\n", 
+                   nameISize(sz), nameIReg(sz,reg1), nameIReg(sz,reg2));
+}
+
+static void emit_nonshiftopv_lit_reg ( Int sz, Opcode opc, 
+                                       UInt lit, Int reg )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   if (lit == VG_(extend_s_8to32)(lit & 0x000000FF)) {
+      /* short form OK */
+      emitB ( 0x83 ); /* Grp1 Ib,Ev */
+      emit_amode_ereg_greg ( reg, mkGrp1opcode(opc) );
+      emitB ( lit & 0x000000FF );
+   } else {
+      emitB ( 0x81 ); /* Grp1 Iv,Ev */
+      emit_amode_ereg_greg ( reg, mkGrp1opcode(opc) );
+      if (sz == 2) emitW ( lit ); else emitL ( lit );
+   }
+   if (dis)
+      VG_(printf)( "\n\t\t%s%c\t$0x%x, %s\n", 
+                   VG_(nameUOpcode)(False,opc), nameISize(sz), 
+                   lit, nameIReg(sz,reg));
+}
+
+static void emit_shiftopv_lit_reg ( Int sz, Opcode opc, UInt lit, Int reg )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   emitB ( 0xC1 ); /* Grp2 Ib,Ev */
+   emit_amode_ereg_greg ( reg, mkGrp2opcode(opc) );
+   emitB ( lit );
+   if (dis)
+      VG_(printf)( "\n\t\t%s%c\t$%d, %s\n", 
+                   VG_(nameUOpcode)(False,opc), nameISize(sz), 
+                   lit, nameIReg(sz,reg));
+}
+
+static void emit_shiftopv_cl_stack0 ( Int sz, Opcode opc )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   emitB ( 0xD3 ); /* Grp2 CL,Ev */
+   emitB ( mkModRegRM ( 1, mkGrp2opcode(opc), 4 ) );
+   emitB ( 0x24 ); /* a SIB, I think `d8(%esp)' */
+   emitB ( 0x00 ); /* the d8 displacement */
+   if (dis)
+      VG_(printf)("\n\t\t%s%c %%cl, 0(%%esp)\n",
+                  VG_(nameUOpcode)(False,opc), nameISize(sz) );
+}
+
+static void emit_shiftopb_cl_stack0 ( Opcode opc )
+{
+   newEmit();
+   emitB ( 0xD2 ); /* Grp2 CL,Eb */
+   emitB ( mkModRegRM ( 1, mkGrp2opcode(opc), 4 ) );
+   emitB ( 0x24 ); /* a SIB, I think `d8(%esp)' */
+   emitB ( 0x00 ); /* the d8 displacement */
+   if (dis)
+      VG_(printf)("\n\t\t%s%c %%cl, 0(%%esp)\n",
+                  VG_(nameUOpcode)(False,opc), nameISize(1) );
+}
+
+static void emit_nonshiftopv_offregmem_reg ( Int sz, Opcode opc, 
+                                             Int off, Int areg, Int reg )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   emitB ( 3 + mkPrimaryOpcode(opc) ); /* op Ev, Gv */
+   emit_amode_offregmem_reg ( off, areg, reg );
+   if (dis)
+      VG_(printf)( "\n\t\t%s%c\t0x%x(%s), %s\n", 
+                   VG_(nameUOpcode)(False,opc), nameISize(sz),
+                   off, nameIReg(4,areg), nameIReg(sz,reg));
+}
+
+static void emit_nonshiftopv_reg_reg ( Int sz, Opcode opc, 
+                                       Int reg1, Int reg2 )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+#  if 0
+   /* Perfectly correct, but the GNU assembler uses the other form.
+      Therefore we too use the other form, to aid verification. */
+   emitB ( 3 + mkPrimaryOpcode(opc) ); /* op Ev, Gv */
+   emit_amode_ereg_greg ( reg1, reg2 );
+#  else
+   emitB ( 1 + mkPrimaryOpcode(opc) ); /* op Gv, Ev */
+   emit_amode_greg_ereg ( reg1, reg2 );
+#  endif
+   if (dis)
+      VG_(printf)( "\n\t\t%s%c\t%s, %s\n", 
+                   VG_(nameUOpcode)(False,opc), nameISize(sz), 
+                   nameIReg(sz,reg1), nameIReg(sz,reg2));
+}
+
+static void emit_movv_lit_reg ( Int sz, UInt lit, Int reg )
+{
+   if (lit == 0) {
+      emit_nonshiftopv_reg_reg ( sz, XOR, reg, reg );
+      return;
+   }
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   emitB ( 0xB8+reg ); /* MOV imm, Gv */
+   if (sz == 2) emitW ( lit ); else emitL ( lit );
+   if (dis)
+      VG_(printf)( "\n\t\tmov%c\t$0x%x, %s\n", 
+                   nameISize(sz), lit, nameIReg(sz,reg));
+}
+
+static void emit_unaryopv_reg ( Int sz, Opcode opc, Int reg )
+{
+   newEmit();
+   if (sz == 2) emitB ( 0x66 );
+   switch (opc) {
+      case NEG:
+         emitB ( 0xF7 );
+         emit_amode_ereg_greg ( reg, mkGrp3opcode(NEG) );
+         if (dis)
+            VG_(printf)( "\n\t\tneg%c\t%s\n", 
+                         nameISize(sz), nameIReg(sz,reg));
+         break;
+      case NOT:
+         emitB ( 0xF7 );
+         emit_amode_ereg_greg ( reg, mkGrp3opcode(NOT) );
+         if (dis)
+            VG_(printf)( "\n\t\tnot%c\t%s\n", 
+                         nameISize(sz), nameIReg(sz,reg));
+         break;
+      case DEC:
+         emitB ( 0x48 + reg );
+         if (dis)
+            VG_(printf)( "\n\t\tdec%c\t%s\n", 
+                         nameISize(sz), nameIReg(sz,reg));
+         break;
+      case INC:
+         emitB ( 0x40 + reg );
+         if (dis)
+            VG_(printf)( "\n\t\tinc%c\t%s\n", 
+                         nameISize(sz), nameIReg(sz,reg));
+         break;
+      default: 
+         VG_(panic)("emit_unaryopv_reg");
+   }
+}
+
+static void emit_pushv_reg ( Int sz, Int reg )
+{
+   newEmit();
+   if (sz == 2) {
+      emitB ( 0x66 ); 
+   } else {
+      vg_assert(sz == 4);
+   }
+   emitB ( 0x50 + reg );
+   if (dis)
+      VG_(printf)("\n\t\tpush%c %s\n", nameISize(sz), nameIReg(sz,reg));
+}
+
+static void emit_popv_reg ( Int sz, Int reg )
+{
+   newEmit();
+   if (sz == 2) {
+      emitB ( 0x66 ); 
+   } else {
+      vg_assert(sz == 4);
+   }
+   emitB ( 0x58 + reg );
+   if (dis)
+      VG_(printf)("\n\t\tpop%c %s\n", nameISize(sz), nameIReg(sz,reg));
+}
+
+static void emit_pushl_lit8 ( Int lit8 )
+{
+   vg_assert(lit8 >= -128 && lit8 < 128);
+   newEmit();
+   emitB ( 0x6A );
+   emitB ( (UChar)((UInt)lit8) );
+   if (dis)
+      VG_(printf)("\n\t\tpushl $%d\n", lit8 );
+}
+
+static void emit_pushl_lit32 ( UInt int32 )
+{
+   newEmit();
+   emitB ( 0x68 );
+   emitL ( int32 );
+   if (dis)
+      VG_(printf)("\n\t\tpushl $0x%x\n", int32 );
+}
+
+static void emit_cmpl_zero_reg ( Int reg )
+{
+   newEmit();
+   emitB ( 0x83 );
+   emit_amode_ereg_greg ( reg, 7 /* Grp 3 opcode for CMP */ );
+   emitB ( 0x00 );
+   if (dis)
+      VG_(printf)("\n\t\tcmpl $0, %s\n", nameIReg(4,reg));
+}
+
+static void emit_swapl_reg_ECX ( Int reg )
+{
+   newEmit();
+   emitB ( 0x87 ); /* XCHG Gv,Ev */
+   emit_amode_ereg_greg ( reg, R_ECX );
+   if (dis) 
+      VG_(printf)("\n\t\txchgl %%ecx, %s\n", nameIReg(4,reg));
+}
+
+static void emit_swapl_reg_EAX ( Int reg )
+{
+   newEmit();
+   emitB ( 0x90 + reg ); /* XCHG Gv,eAX */
+   if (dis) 
+      VG_(printf)("\n\t\txchgl %%eax, %s\n", nameIReg(4,reg));
+}
+
+static void emit_swapl_reg_reg ( Int reg1, Int reg2 )
+{
+   newEmit();
+   emitB ( 0x87 ); /* XCHG Gv,Ev */
+   emit_amode_ereg_greg ( reg1, reg2 );
+   if (dis) 
+      VG_(printf)("\n\t\txchgl %s, %s\n", nameIReg(4,reg1), 
+                  nameIReg(4,reg2));
+}
+
+static void emit_bswapl_reg ( Int reg )
+{
+   newEmit();
+   emitB ( 0x0F );
+   emitB ( 0xC8 + reg ); /* BSWAP r32 */
+   if (dis) 
+      VG_(printf)("\n\t\tbswapl %s\n", nameIReg(4,reg));
+}
+
+static void emit_movl_reg_reg ( Int regs, Int regd )
+{
+   newEmit();
+   emitB ( 0x89 ); /* MOV Gv,Ev */
+   emit_amode_ereg_greg ( regd, regs );
+   if (dis) 
+      VG_(printf)("\n\t\tmovl %s, %s\n", nameIReg(4,regs), nameIReg(4,regd));
+}
+
+static void emit_testv_lit_reg ( Int sz, UInt lit, Int reg )
+{
+   newEmit();
+   if (sz == 2) {
+      emitB ( 0x66 );
+   } else {
+      vg_assert(sz == 4);
+   }
+   emitB ( 0xF7 ); /* Grp3 Ev */
+   emit_amode_ereg_greg ( reg, 0 /* Grp3 subopcode for TEST */ );
+   if (sz == 2) emitW ( lit ); else emitL ( lit );
+   if (dis)
+      VG_(printf)("\n\t\ttest%c $0x%x, %s\n", nameISize(sz), 
+                                            lit, nameIReg(sz,reg));
+}
+
+static void emit_testv_lit_offregmem ( Int sz, UInt lit, Int off, Int reg )
+{
+   newEmit();
+   if (sz == 2) {
+      emitB ( 0x66 );
+   } else {
+      vg_assert(sz == 4);
+   }
+   emitB ( 0xF7 ); /* Grp3 Ev */
+   emit_amode_offregmem_reg ( off, reg, 0 /* Grp3 subopcode for TEST */ );
+   if (sz == 2) emitW ( lit ); else emitL ( lit );
+   if (dis)
+      VG_(printf)("\n\t\ttest%c $%d, 0x%x(%s)\n", 
+                  nameISize(sz), lit, off, nameIReg(4,reg) );
+}
+
+static void emit_movv_lit_offregmem ( Int sz, UInt lit, Int off, Int memreg )
+{
+   newEmit();
+   if (sz == 2) {
+      emitB ( 0x66 );
+   } else {
+      vg_assert(sz == 4);
+   }
+   emitB ( 0xC7 ); /* Grp11 Ev */
+   emit_amode_offregmem_reg ( off, memreg, 0 /* Grp11 subopcode for MOV */ );
+   if (sz == 2) emitW ( lit ); else emitL ( lit );
+   if (dis)
+      VG_(printf)( "\n\t\tmov%c\t$0x%x, 0x%x(%s)\n", 
+                   nameISize(sz), lit, off, nameIReg(4,memreg) );
+}
+
+
+/*----------------------------------------------------*/
+/*--- b-size (1 byte) instruction emitters         ---*/
+/*----------------------------------------------------*/
+
+/* There is some doubt as to whether C6 (Grp 11) is in the
+   486 insn set.  ToDo: investigate. */
+static void emit_movb_lit_offregmem ( UInt lit, Int off, Int memreg )
+{
+   newEmit();
+   emitB ( 0xC6 ); /* Grp11 Eb */
+   emit_amode_offregmem_reg ( off, memreg, 0 /* Grp11 subopcode for MOV */ );
+   emitB ( lit );
+   if (dis)
+      VG_(printf)( "\n\t\tmovb\t$0x%x, 0x%x(%s)\n", 
+                   lit, off, nameIReg(4,memreg) );
+}
+
+static void emit_nonshiftopb_offregmem_reg ( Opcode opc, 
+                                             Int off, Int areg, Int reg )
+{
+   newEmit();
+   emitB ( 2 + mkPrimaryOpcode(opc) ); /* op Eb, Gb */
+   emit_amode_offregmem_reg ( off, areg, reg );
+   if (dis)
+      VG_(printf)( "\n\t\t%sb\t0x%x(%s), %s\n", 
+                   VG_(nameUOpcode)(False,opc), off, nameIReg(4,areg), 
+                   nameIReg(1,reg));
+}
+
+static void emit_movb_reg_offregmem ( Int reg, Int off, Int areg )
+{
+   /* Could do better when reg == %al. */
+   newEmit();
+   emitB ( 0x88 ); /* MOV G1, E1 */
+   emit_amode_offregmem_reg ( off, areg, reg );
+   if (dis)
+      VG_(printf)( "\n\t\tmovb\t%s, 0x%x(%s)\n", 
+                   nameIReg(1,reg), off, nameIReg(4,areg));
+}
+
+static void emit_nonshiftopb_reg_reg ( Opcode opc, Int reg1, Int reg2 )
+{
+   newEmit();
+   emitB ( 2 + mkPrimaryOpcode(opc) ); /* op Eb, Gb */
+   emit_amode_ereg_greg ( reg1, reg2 );
+   if (dis)
+      VG_(printf)( "\n\t\t%sb\t%s, %s\n", 
+                   VG_(nameUOpcode)(False,opc),
+                   nameIReg(1,reg1), nameIReg(1,reg2));
+}
+
+static void emit_movb_reg_regmem ( Int reg1, Int reg2 )
+{
+   newEmit();
+   emitB ( 0x88 ); /* MOV G1, E1 */
+   emit_amode_regmem_reg ( reg2, reg1 );
+   if (dis)
+      VG_(printf)( "\n\t\tmovb\t%s, (%s)\n", nameIReg(1,reg1), 
+                                             nameIReg(4,reg2));
+}
+
+static void emit_nonshiftopb_lit_reg ( Opcode opc, UInt lit, Int reg )
+{
+   newEmit();
+   emitB ( 0x80 ); /* Grp1 Ib,Eb */
+   emit_amode_ereg_greg ( reg, mkGrp1opcode(opc) );
+   emitB ( lit & 0x000000FF );
+   if (dis)
+      VG_(printf)( "\n\t\t%sb\t$0x%x, %s\n", VG_(nameUOpcode)(False,opc),
+                                             lit, nameIReg(1,reg));
+}
+
+static void emit_shiftopb_lit_reg ( Opcode opc, UInt lit, Int reg )
+{
+   newEmit();
+   emitB ( 0xC0 ); /* Grp2 Ib,Eb */
+   emit_amode_ereg_greg ( reg, mkGrp2opcode(opc) );
+   emitB ( lit );
+   if (dis)
+      VG_(printf)( "\n\t\t%sb\t$%d, %s\n", 
+                   VG_(nameUOpcode)(False,opc),
+                   lit, nameIReg(1,reg));
+}
+
+static void emit_unaryopb_reg ( Opcode opc, Int reg )
+{
+   newEmit();
+   switch (opc) {
+      case INC:
+         emitB ( 0xFE );
+         emit_amode_ereg_greg ( reg, mkGrp4opcode(INC) );
+         if (dis)
+            VG_(printf)( "\n\t\tincb\t%s\n", nameIReg(1,reg));
+         break;
+      case DEC:
+         emitB ( 0xFE );
+         emit_amode_ereg_greg ( reg, mkGrp4opcode(DEC) );
+         if (dis)
+            VG_(printf)( "\n\t\tdecb\t%s\n", nameIReg(1,reg));
+         break;
+      case NOT:
+         emitB ( 0xF6 );
+         emit_amode_ereg_greg ( reg, mkGrp3opcode(NOT) );
+         if (dis)
+            VG_(printf)( "\n\t\tnotb\t%s\n", nameIReg(1,reg));
+         break;
+      case NEG:
+         emitB ( 0xF6 );
+         emit_amode_ereg_greg ( reg, mkGrp3opcode(NEG) );
+         if (dis)
+            VG_(printf)( "\n\t\tnegb\t%s\n", nameIReg(1,reg));
+         break;
+      default: 
+         VG_(panic)("emit_unaryopb_reg");
+   }
+}
+
+static void emit_testb_lit_reg ( UInt lit, Int reg )
+{
+   newEmit();
+   emitB ( 0xF6 ); /* Grp3 Eb */
+   emit_amode_ereg_greg ( reg, 0 /* Grp3 subopcode for TEST */ );
+   emitB ( lit );
+   if (dis)
+      VG_(printf)("\n\t\ttestb $0x%x, %s\n", lit, nameIReg(1,reg));
+}
+
+
+/*----------------------------------------------------*/
+/*--- zero-extended load emitters                  ---*/
+/*----------------------------------------------------*/
+
+static void emit_movzbl_offregmem_reg ( Int off, Int regmem, Int reg )
+{
+   newEmit();
+   emitB ( 0x0F ); emitB ( 0xB6 ); /* MOVZBL */
+   emit_amode_offregmem_reg ( off, regmem, reg );
+   if (dis)
+      VG_(printf)( "\n\t\tmovzbl\t0x%x(%s), %s\n", 
+                   off, nameIReg(4,regmem), nameIReg(4,reg));
+}
+
+static void emit_movzbl_regmem_reg ( Int reg1, Int reg2 )
+{
+   newEmit();
+   emitB ( 0x0F ); emitB ( 0xB6 ); /* MOVZBL */
+   emit_amode_regmem_reg ( reg1, reg2 );
+   if (dis)
+      VG_(printf)( "\n\t\tmovzbl\t(%s), %s\n", nameIReg(4,reg1), 
+                                               nameIReg(4,reg2));
+}
+
+static void emit_movzwl_offregmem_reg ( Int off, Int areg, Int reg )
+{
+   newEmit();
+   emitB ( 0x0F ); emitB ( 0xB7 ); /* MOVZWL */
+   emit_amode_offregmem_reg ( off, areg, reg );
+   if (dis)
+      VG_(printf)( "\n\t\tmovzwl\t0x%x(%s), %s\n",
+                   off, nameIReg(4,areg), nameIReg(4,reg));
+}
+
+static void emit_movzwl_regmem_reg ( Int reg1, Int reg2 )
+{
+   newEmit();
+   emitB ( 0x0F ); emitB ( 0xB7 ); /* MOVZWL */
+   emit_amode_regmem_reg ( reg1, reg2 );
+   if (dis)
+      VG_(printf)( "\n\t\tmovzwl\t(%s), %s\n", nameIReg(4,reg1), 
+                                             nameIReg(4,reg2));
+}
+
+/*----------------------------------------------------*/
+/*--- FPU instruction emitters                     ---*/
+/*----------------------------------------------------*/
+
+static void emit_get_fpu_state ( void )
+{
+   Int off = 4 * VGOFF_(m_fpustate);
+   newEmit();
+   emitB ( 0xDD ); emitB ( 0xA5 ); /* frstor d32(%ebp) */
+   emitL ( off );
+   if (dis)
+      VG_(printf)("\n\t\tfrstor\t%d(%%ebp)\n", off );
+}
+
+static void emit_put_fpu_state ( void )
+{
+   Int off = 4 * VGOFF_(m_fpustate);
+   newEmit();
+   emitB ( 0xDD ); emitB ( 0xB5 ); /* fnsave d32(%ebp) */
+   emitL ( off );
+   if (dis)
+      VG_(printf)("\n\t\tfnsave\t%d(%%ebp)\n", off );
+}
+
+static void emit_fpu_no_mem ( UChar first_byte, 
+                              UChar second_byte )
+{
+   newEmit();
+   emitB ( first_byte );
+   emitB ( second_byte );
+   if (dis)
+      VG_(printf)("\n\t\tfpu-0x%x:0x%x\n", 
+                  (UInt)first_byte, (UInt)second_byte );
+}
+
+static void emit_fpu_regmem ( UChar first_byte, 
+                              UChar second_byte_masked, 
+                              Int reg )
+{
+   newEmit();
+   emitB ( first_byte );
+   emit_amode_regmem_reg ( reg, second_byte_masked >> 3 );
+   if (dis)
+      VG_(printf)("\n\t\tfpu-0x%x:0x%x-(%s)\n", 
+                  (UInt)first_byte, (UInt)second_byte_masked,
+                  nameIReg(4,reg) );
+}
+
+
+/*----------------------------------------------------*/
+/*--- misc instruction emitters                    ---*/
+/*----------------------------------------------------*/
+
+static void emit_call_reg ( Int reg )
+{
+   newEmit();
+   emitB ( 0xFF ); /* Grp5 */
+   emit_amode_ereg_greg ( reg, mkGrp5opcode(CALLM) );
+   if (dis)
+      VG_(printf)( "\n\t\tcall\t*%s\n", nameIReg(4,reg) );
+}
+
+
+static void emit_call_star_EBP_off ( Int byte_off )
+{
+  newEmit();
+  if (byte_off < -128 || byte_off > 127) {
+     emitB ( 0xFF );
+     emitB ( 0x95 );
+     emitL ( byte_off );
+  } else {
+     emitB ( 0xFF );
+     emitB ( 0x55 );
+     emitB ( byte_off );
+  }
+  if (dis)
+     VG_(printf)( "\n\t\tcall * %d(%%ebp)\n", byte_off );
+}
+
+
+static void emit_addlit8_offregmem ( Int lit8, Int regmem, Int off )
+{
+   vg_assert(lit8 >= -128 && lit8 < 128);
+   newEmit();
+   emitB ( 0x83 ); /* Grp1 Ib,Ev */
+   emit_amode_offregmem_reg ( off, regmem, 
+                              0 /* Grp1 subopcode for ADD */ );
+   emitB ( lit8 & 0xFF );
+   if (dis)
+      VG_(printf)( "\n\t\taddl $%d, %d(%s)\n", lit8, off, 
+                                               nameIReg(4,regmem));
+}
+
+
+static void emit_add_lit_to_esp ( Int lit )
+{
+   if (lit < -128 || lit > 127) VG_(panic)("emit_add_lit_to_esp");
+   newEmit();
+   emitB ( 0x83 );
+   emitB ( 0xC4 );
+   emitB ( lit & 0xFF );
+   if (dis)
+      VG_(printf)( "\n\t\taddl $%d, %%esp\n", lit );
+}
+
+
+static void emit_movb_AL_zeroESPmem ( void )
+{
+   /* movb %al, 0(%esp) */
+   /* 88442400              movb    %al, 0(%esp) */
+   newEmit();
+   emitB ( 0x88 );
+   emitB ( 0x44 );
+   emitB ( 0x24 );
+   emitB ( 0x00 );
+   if (dis)
+      VG_(printf)( "\n\t\tmovb %%al, 0(%%esp)\n" );
+}
+
+static void emit_movb_zeroESPmem_AL ( void )
+{
+   /* movb 0(%esp), %al */
+   /* 8A442400              movb    0(%esp), %al */
+   newEmit();
+   emitB ( 0x8A );
+   emitB ( 0x44 );
+   emitB ( 0x24 );
+   emitB ( 0x00 );
+   if (dis)
+      VG_(printf)( "\n\t\tmovb 0(%%esp), %%al\n" );
+}
+
+
+/* Emit a jump short with an 8-bit signed offset.  Note that the
+   offset is that which should be added to %eip once %eip has been
+   advanced over this insn.  */
+static void emit_jcondshort_delta ( Condcode cond, Int delta )
+{
+   vg_assert(delta >= -128 && delta <= 127);
+   newEmit();
+   emitB ( 0x70 + (UInt)cond );
+   emitB ( (UChar)delta );
+   if (dis)
+      VG_(printf)( "\n\t\tj%s-8\t%%eip+%d\n", 
+                   VG_(nameCondcode)(cond), delta );
+}
+
+static void emit_get_eflags ( void )
+{
+   Int off = 4 * VGOFF_(m_eflags);
+   vg_assert(off >= 0 && off < 128);
+   newEmit();
+   emitB ( 0xFF ); /* PUSHL off(%ebp) */
+   emitB ( 0x75 );
+   emitB ( off );
+   emitB ( 0x9D ); /* POPFL */
+   if (dis)
+      VG_(printf)( "\n\t\tpushl %d(%%ebp) ; popfl\n", off );
+}
+
+static void emit_put_eflags ( void )
+{
+   Int off = 4 * VGOFF_(m_eflags);
+   vg_assert(off >= 0 && off < 128);
+   newEmit();
+   emitB ( 0x9C ); /* PUSHFL */
+   emitB ( 0x8F ); /* POPL vg_m_state.m_eflags */
+   emitB ( 0x45 );
+   emitB ( off );
+   if (dis)
+      VG_(printf)( "\n\t\tpushfl ; popl %d(%%ebp)\n", off );
+}
+
+static void emit_setb_reg ( Int reg, Condcode cond )
+{
+   newEmit();
+   emitB ( 0x0F ); emitB ( 0x90 + (UChar)cond );
+   emit_amode_ereg_greg ( reg, 0 );
+   if (dis)
+      VG_(printf)("\n\t\tset%s %s\n", 
+                  VG_(nameCondcode)(cond), nameIReg(1,reg));
+}
+
+static void emit_ret ( void )
+{
+   newEmit();
+   emitB ( 0xC3 ); /* RET */
+   if (dis)
+      VG_(printf)("\n\t\tret\n");
+}
+
+static void emit_pushal ( void )
+{
+   newEmit();
+   emitB ( 0x60 ); /* PUSHAL */
+   if (dis)
+      VG_(printf)("\n\t\tpushal\n");
+}
+
+static void emit_popal ( void )
+{
+   newEmit();
+   emitB ( 0x61 ); /* POPAL */
+   if (dis)
+      VG_(printf)("\n\t\tpopal\n");
+}
+
+static void emit_lea_litreg_reg ( UInt lit, Int regmem, Int reg )
+{
+   newEmit();
+   emitB ( 0x8D ); /* LEA M,Gv */
+   emit_amode_offregmem_reg ( (Int)lit, regmem, reg );
+   if (dis)
+      VG_(printf)("\n\t\tleal 0x%x(%s), %s\n",
+                  lit, nameIReg(4,regmem), nameIReg(4,reg) );
+}
+
+static void emit_lea_sib_reg ( UInt lit, Int scale,
+			       Int regbase, Int regindex, Int reg )
+{
+   newEmit();
+   emitB ( 0x8D ); /* LEA M,Gv */
+   emit_amode_sib_reg ( (Int)lit, scale, regbase, regindex, reg );
+   if (dis)
+      VG_(printf)("\n\t\tleal 0x%x(%s,%s,%d), %s\n",
+                  lit, nameIReg(4,regbase), 
+                       nameIReg(4,regindex), scale,
+                       nameIReg(4,reg) );
+}
+
+static void emit_AMD_prefetch_reg ( Int reg )
+{
+   newEmit();
+   emitB ( 0x0F );
+   emitB ( 0x0D );
+   emit_amode_regmem_reg ( reg, 1 /* 0 is prefetch; 1 is prefetchw */ );
+   if (dis)
+      VG_(printf)("\n\t\tamd-prefetch (%s)\n", nameIReg(4,reg) );
+}
+
+/*----------------------------------------------------*/
+/*--- Instruction synthesisers                     ---*/
+/*----------------------------------------------------*/
+
+static Condcode invertCondition ( Condcode cond )
+{
+   return (Condcode)(1 ^ (UInt)cond);
+}
+
+
+/* Synthesise a call to *baseBlock[offset], ie,
+   call * (4 x offset)(%ebp).
+*/
+static void synth_call_baseBlock_method ( Bool ensure_shortform, 
+                                          Int word_offset )
+{
+   vg_assert(word_offset >= 0);
+   vg_assert(word_offset < VG_BASEBLOCK_WORDS);
+   if (ensure_shortform)
+      vg_assert(word_offset < 32);
+   emit_call_star_EBP_off ( 4 * word_offset );
+}
+
+
+/* Jump to the next translation, by loading its original addr into
+   %eax and returning to the scheduler.  Or, if is a RET transfer,
+   don't return; instead jump to vg_dispatch_when_RET, which checks
+   whether this is a signal handler returning, and takes suitable
+   evasive action.
+*/
+static void synth_jmp_reg ( Int reg, 
+                            Bool is_ret_dispatch,
+                            Bool is_call_dispatch )
+{
+   if (reg != R_EAX)
+      emit_movv_reg_reg ( 4, reg, R_EAX );
+   if (is_ret_dispatch || is_call_dispatch) {
+      /* The (hopefully) rare case. */
+      vg_assert(!(is_ret_dispatch && is_call_dispatch));
+      emit_movv_lit_reg ( 4, VG_EBP_DISPATCH_CHECKED, R_EBP );
+   }
+   emit_ret();
+}
+
+
+/* Same deal as synth_jmp_reg. */
+static void synth_jmp_lit ( Addr addr )
+{
+   emit_movv_lit_reg ( 4, addr, R_EAX );
+   emit_ret();
+}
+
+
+/* Dispatch, but with a call-target check. */
+static void synth_jmp_lit_call_dispatch ( Addr addr )
+{
+   emit_movv_lit_reg ( 4, addr, R_EAX );
+   emit_movv_lit_reg ( 4, VG_EBP_DISPATCH_CHECKED, R_EBP );
+   emit_ret();
+}
+
+
+static void synth_jcond_lit ( Condcode cond, Addr addr )
+{
+  /* Do the following:
+        get eflags
+        jmp short if not cond to xyxyxy
+        addr -> eax
+        ret
+        xyxyxy
+
+   2 0000 750C                  jnz     xyxyxy
+   3 0002 B877665544            movl    $0x44556677, %eax
+   4 0007 C3                    ret
+   5 0008 FFE3                  jmp     *%ebx
+   6                    xyxyxy:
+  */
+   emit_get_eflags();
+   emit_jcondshort_delta ( invertCondition(cond), 5+1 );
+   synth_jmp_lit ( addr );
+}
+
+
+static void synth_jmp_ifzero_reg_lit ( Int reg, Addr addr )
+{
+   /* 0000 83FF00                cmpl    $0, %edi
+      0003 750A                  jnz     next
+      0005 B844332211            movl    $0x11223344, %eax
+      000a C3                    ret
+      next:
+   */
+   emit_cmpl_zero_reg ( reg );
+   emit_jcondshort_delta ( CondNZ, 5+1 );
+   synth_jmp_lit ( addr );
+}
+
+
+static void synth_mov_lit_reg ( Int size, UInt lit, Int reg ) 
+{
+   /* Load the zero-extended literal into reg, at size l,
+      regardless of the request size. */
+   emit_movv_lit_reg ( 4, lit, reg );
+}
+
+
+static void synth_mov_regmem_reg ( Int size, Int reg1, Int reg2 ) 
+{
+   switch (size) {
+      case 4: emit_movv_regmem_reg ( 4, reg1, reg2 ); break;
+      case 2: emit_movzwl_regmem_reg ( reg1, reg2 ); break;
+      case 1: emit_movzbl_regmem_reg ( reg1, reg2 ); break;
+      default: VG_(panic)("synth_mov_regmem_reg");
+   }  
+}
+
+
+static void synth_mov_offregmem_reg ( Int size, Int off, Int areg, Int reg ) 
+{
+   switch (size) {
+      case 4: emit_movv_offregmem_reg ( 4, off, areg, reg ); break;
+      case 2: emit_movzwl_offregmem_reg ( off, areg, reg ); break;
+      case 1: emit_movzbl_offregmem_reg ( off, areg, reg ); break;
+      default: VG_(panic)("synth_mov_offregmem_reg");
+   }  
+}
+
+
+static void synth_mov_reg_offregmem ( Int size, Int reg, 
+                                      Int off, Int areg )
+{
+   switch (size) {
+      case 4: emit_movv_reg_offregmem ( 4, reg, off, areg ); break;
+      case 2: emit_movv_reg_offregmem ( 2, reg, off, areg ); break;
+      case 1: if (reg < 4) {
+                 emit_movb_reg_offregmem ( reg, off, areg ); 
+              }
+              else {
+                 emit_swapl_reg_EAX ( reg );
+                 emit_movb_reg_offregmem ( R_AL, off, areg );
+                 emit_swapl_reg_EAX ( reg );
+              }
+              break;
+      default: VG_(panic)("synth_mov_reg_offregmem");
+   }
+}
+
+
+static void synth_mov_reg_memreg ( Int size, Int reg1, Int reg2 )
+{
+   Int s1;
+   switch (size) {
+      case 4: emit_movv_reg_regmem ( 4, reg1, reg2 ); break;
+      case 2: emit_movv_reg_regmem ( 2, reg1, reg2 ); break;
+      case 1: if (reg1 < 4) {
+                 emit_movb_reg_regmem ( reg1, reg2 ); 
+              }
+              else {
+                 /* Choose a swap reg which is < 4 and not reg1 or reg2. */
+                 for (s1 = 0; s1 == reg1 || s1 == reg2; s1++) ;
+                 emit_swapl_reg_reg ( s1, reg1 );
+                 emit_movb_reg_regmem ( s1, reg2 );
+                 emit_swapl_reg_reg ( s1, reg1 );
+              }
+              break;
+      default: VG_(panic)("synth_mov_reg_litmem");
+   }
+}
+
+
+static void synth_unaryop_reg ( Bool upd_cc,
+                                Opcode opcode, Int size,
+                                Int reg )
+{
+   /* NB! opcode is a uinstr opcode, not an x86 one! */
+   switch (size) {
+      case 4: if (upd_cc) emit_get_eflags();
+              emit_unaryopv_reg ( 4, opcode, reg );
+              if (upd_cc) emit_put_eflags();
+              break;
+      case 2: if (upd_cc) emit_get_eflags();
+              emit_unaryopv_reg ( 2, opcode, reg );
+              if (upd_cc) emit_put_eflags();
+              break;
+      case 1: if (reg < 4) {
+                 if (upd_cc) emit_get_eflags();
+                 emit_unaryopb_reg ( opcode, reg );
+                 if (upd_cc) emit_put_eflags();
+              } else {
+                 emit_swapl_reg_EAX ( reg );
+                 if (upd_cc) emit_get_eflags();
+                 emit_unaryopb_reg ( opcode, R_AL );
+                 if (upd_cc) emit_put_eflags();
+                 emit_swapl_reg_EAX ( reg );
+              }
+              break;
+      default: VG_(panic)("synth_unaryop_reg");
+   }
+}
+
+
+
+static void synth_nonshiftop_reg_reg ( Bool upd_cc, 
+                                       Opcode opcode, Int size, 
+                                       Int reg1, Int reg2 )
+{
+   /* NB! opcode is a uinstr opcode, not an x86 one! */
+   switch (size) {
+      case 4: if (upd_cc) emit_get_eflags();
+              emit_nonshiftopv_reg_reg ( 4, opcode, reg1, reg2 );
+              if (upd_cc) emit_put_eflags();
+              break;
+      case 2: if (upd_cc) emit_get_eflags();
+              emit_nonshiftopv_reg_reg ( 2, opcode, reg1, reg2 );
+              if (upd_cc) emit_put_eflags();
+              break;
+      case 1: { /* Horrible ... */
+         Int s1, s2;
+         /* Choose s1 and s2 to be x86 regs which we can talk about the
+            lowest 8 bits, ie either %eax, %ebx, %ecx or %edx.  Make
+            sure s1 != s2 and that neither of them equal either reg1 or
+            reg2. Then use them as temporaries to make things work. */
+         if (reg1 < 4 && reg2 < 4) {
+            if (upd_cc) emit_get_eflags();
+            emit_nonshiftopb_reg_reg(opcode, reg1, reg2); 
+            if (upd_cc) emit_put_eflags();
+            break;
+         }
+         for (s1 = 0; s1 == reg1 || s1 == reg2; s1++) ;
+         if (reg1 >= 4 && reg2 < 4) {
+            emit_swapl_reg_reg ( reg1, s1 );
+            if (upd_cc) emit_get_eflags();
+            emit_nonshiftopb_reg_reg(opcode, s1, reg2);
+            if (upd_cc) emit_put_eflags();
+            emit_swapl_reg_reg ( reg1, s1 );
+            break;
+         }
+         for (s2 = 0; s2 == reg1 || s2 == reg2 || s2 == s1; s2++) ;
+         if (reg1 < 4 && reg2 >= 4) {
+            emit_swapl_reg_reg ( reg2, s2 );
+            if (upd_cc) emit_get_eflags();
+            emit_nonshiftopb_reg_reg(opcode, reg1, s2);
+            if (upd_cc) emit_put_eflags();
+            emit_swapl_reg_reg ( reg2, s2 );
+            break;
+         }
+         if (reg1 >= 4 && reg2 >= 4 && reg1 != reg2) {
+            emit_swapl_reg_reg ( reg1, s1 );
+            emit_swapl_reg_reg ( reg2, s2 );
+            if (upd_cc) emit_get_eflags();
+            emit_nonshiftopb_reg_reg(opcode, s1, s2);
+            if (upd_cc) emit_put_eflags();
+            emit_swapl_reg_reg ( reg1, s1 );
+            emit_swapl_reg_reg ( reg2, s2 );
+            break;
+         }
+         if (reg1 >= 4 && reg2 >= 4 && reg1 == reg2) {
+            emit_swapl_reg_reg ( reg1, s1 );
+            if (upd_cc) emit_get_eflags();
+            emit_nonshiftopb_reg_reg(opcode, s1, s1);
+            if (upd_cc) emit_put_eflags();
+            emit_swapl_reg_reg ( reg1, s1 );
+            break;
+         }
+         VG_(panic)("synth_nonshiftopb_reg_reg");
+      }
+      default: VG_(panic)("synth_nonshiftop_reg_reg");
+   }
+}
+
+
+static void synth_nonshiftop_offregmem_reg ( 
+   Bool upd_cc,
+   Opcode opcode, Int size, 
+   Int off, Int areg, Int reg )
+{
+   switch (size) {
+      case 4: 
+         if (upd_cc) emit_get_eflags();
+         emit_nonshiftopv_offregmem_reg ( 4, opcode, off, areg, reg ); 
+         if (upd_cc) emit_put_eflags();
+         break;
+      case 2: 
+         if (upd_cc) emit_get_eflags();
+         emit_nonshiftopv_offregmem_reg ( 2, opcode, off, areg, reg ); 
+         if (upd_cc) emit_put_eflags();
+         break;
+      case 1: 
+         if (reg < 4) {
+            if (upd_cc) emit_get_eflags();
+            emit_nonshiftopb_offregmem_reg ( opcode, off, areg, reg );
+            if (upd_cc) emit_put_eflags();
+         } else {
+            emit_swapl_reg_EAX ( reg );
+            if (upd_cc) emit_get_eflags();
+            emit_nonshiftopb_offregmem_reg ( opcode, off, areg, R_AL );
+            if (upd_cc) emit_put_eflags();
+            emit_swapl_reg_EAX ( reg );
+         }
+         break;
+      default: 
+         VG_(panic)("synth_nonshiftop_litmem_reg");
+   }
+}
+
+
+static void synth_nonshiftop_lit_reg ( Bool upd_cc,
+                                       Opcode opcode, Int size, 
+                                       UInt lit, Int reg )
+{
+   switch (size) {
+      case 4: if (upd_cc) emit_get_eflags();
+              emit_nonshiftopv_lit_reg ( 4, opcode, lit, reg );
+              if (upd_cc) emit_put_eflags();
+              break;
+      case 2: if (upd_cc) emit_get_eflags();
+              emit_nonshiftopv_lit_reg ( 2, opcode, lit, reg );
+              if (upd_cc) emit_put_eflags();
+              break;
+      case 1: if (reg < 4) {
+                 if (upd_cc) emit_get_eflags();
+                 emit_nonshiftopb_lit_reg ( opcode, lit, reg );
+                 if (upd_cc) emit_put_eflags();
+              } else {
+                 emit_swapl_reg_EAX ( reg );
+                 if (upd_cc) emit_get_eflags();
+                 emit_nonshiftopb_lit_reg ( opcode, lit, R_AL );
+                 if (upd_cc) emit_put_eflags();
+                 emit_swapl_reg_EAX ( reg );
+              }
+              break;
+      default: VG_(panic)("synth_nonshiftop_lit_reg");
+   }
+}
+
+
+static void synth_push_reg ( Int size, Int reg )
+{
+   switch (size) {
+      case 4: 
+         emit_pushv_reg ( 4, reg ); 
+         break;
+      case 2: 
+         emit_pushv_reg ( 2, reg ); 
+         break;
+      /* Pray that we don't have to generate this really cruddy bit of
+         code very often.  Could do better, but can I be bothered? */
+      case 1: 
+         vg_assert(reg != R_ESP); /* duh */
+         emit_add_lit_to_esp(-1);
+         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
+         emit_movb_AL_zeroESPmem();
+         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
+         break;
+     default: 
+         VG_(panic)("synth_push_reg");
+   }
+}
+
+
+static void synth_pop_reg ( Int size, Int reg )
+{
+   switch (size) {
+      case 4: 
+         emit_popv_reg ( 4, reg ); 
+         break;
+      case 2: 
+         emit_popv_reg ( 2, reg ); 
+         break;
+      case 1:
+         /* Same comment as above applies. */
+         vg_assert(reg != R_ESP); /* duh */
+         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
+         emit_movb_zeroESPmem_AL();
+         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
+         emit_add_lit_to_esp(1);
+         break;
+      default: VG_(panic)("synth_pop_reg");
+   }
+}
+
+
+static void synth_shiftop_reg_reg ( Bool upd_cc,
+                                    Opcode opcode, Int size, 
+                                    Int regs, Int regd )
+{
+   synth_push_reg ( size, regd );
+   if (regs != R_ECX) emit_swapl_reg_ECX ( regs );
+   if (upd_cc) emit_get_eflags();
+   switch (size) {
+      case 4: emit_shiftopv_cl_stack0 ( 4, opcode ); break;
+      case 2: emit_shiftopv_cl_stack0 ( 2, opcode ); break;
+      case 1: emit_shiftopb_cl_stack0 ( opcode ); break;
+      default: VG_(panic)("synth_shiftop_reg_reg");
+   }
+   if (upd_cc) emit_put_eflags();
+   if (regs != R_ECX) emit_swapl_reg_ECX ( regs );
+   synth_pop_reg ( size, regd );
+}
+
+
+static void synth_shiftop_lit_reg ( Bool upd_cc,
+                                    Opcode opcode, Int size, 
+                                    UInt lit, Int reg )
+{
+   switch (size) {
+      case 4: if (upd_cc) emit_get_eflags();
+              emit_shiftopv_lit_reg ( 4, opcode, lit, reg );
+              if (upd_cc) emit_put_eflags();
+              break;
+      case 2: if (upd_cc) emit_get_eflags();
+              emit_shiftopv_lit_reg ( 2, opcode, lit, reg );
+              if (upd_cc) emit_put_eflags();
+              break;
+      case 1: if (reg < 4) {
+                 if (upd_cc) emit_get_eflags();
+                 emit_shiftopb_lit_reg ( opcode, lit, reg );
+                 if (upd_cc) emit_put_eflags();
+              } else {
+                 emit_swapl_reg_EAX ( reg );
+                 if (upd_cc) emit_get_eflags();
+                 emit_shiftopb_lit_reg ( opcode, lit, R_AL );
+                 if (upd_cc) emit_put_eflags();
+                 emit_swapl_reg_EAX ( reg );
+              }
+              break;
+      default: VG_(panic)("synth_nonshiftop_lit_reg");
+   }
+}
+
+
+static void synth_setb_reg ( Int reg, Condcode cond )
+{
+   emit_get_eflags();
+   if (reg < 4) {
+      emit_setb_reg ( reg, cond );
+   } else {
+      emit_swapl_reg_EAX ( reg );
+      emit_setb_reg ( R_AL, cond );
+      emit_swapl_reg_EAX ( reg );
+   }
+}
+
+
+static void synth_fpu_regmem ( UChar first_byte,
+                               UChar second_byte_masked, 
+                               Int reg )
+{
+   emit_get_fpu_state();
+   emit_fpu_regmem ( first_byte, second_byte_masked, reg );
+   emit_put_fpu_state();
+}
+
+
+static void synth_fpu_no_mem ( UChar first_byte,
+                               UChar second_byte )
+{
+   emit_get_fpu_state();
+   emit_fpu_no_mem ( first_byte, second_byte );
+   emit_put_fpu_state();
+}
+
+
+static void synth_movl_reg_reg ( Int src, Int dst )
+{
+   emit_movl_reg_reg ( src, dst );
+}
+
+static void synth_cmovl_reg_reg ( Condcode cond, Int src, Int dst )
+{
+   emit_get_eflags();
+   emit_jcondshort_delta ( invertCondition(cond), 
+                           2 /* length of the next insn */ );
+   emit_movl_reg_reg ( src, dst );
+}
+
+
+/* A word in memory containing a pointer to vg_helper_smc_check4.
+   Never changes. 
+*/
+static const Addr vg_helper_smc_check4_ADDR
+   = (Addr)&VG_(helper_smc_check4);
+
+static void synth_orig_code_write_check ( Int sz, Int reg )
+{
+   UInt offset;
+
+   /*
+     In this example, reg is %eax and sz == 8:
+
+     -- check the first four bytes
+     0087 89C5                  movl    %eax, %ebp
+     0089 FF1544332211          call    * 0x11223344
+                  
+     -- check the second four
+     008f 89C5                  movl    %eax, %ebp
+     0091 83C504                addl    $4, %ebp
+     0094 FF1544332211          call    * 0x11223344
+
+     Because we can't call an absolute address (alas), the
+     address called is stored in memory at 0x11223344 in this
+     example, and it just contains the address of 
+     vg_helper_smc_check4 -- which is where we really want
+     to get to.
+   */
+   vg_assert(0);
+
+   if (sz < 4) sz = 4;
+
+   for (offset = 0; offset < sz; offset += 4) {
+
+      emit_movl_reg_reg ( reg, R_EBP );
+
+      if (offset > 0) {
+         newEmit();
+         emitB ( 0x83 ); emitB ( 0xC5 ); emitB ( offset );
+         if (dis) VG_(printf)("\n");
+      }
+
+      newEmit();
+      emitB ( 0xFF ); emitB ( 0x15 ); 
+      emitL ( (Addr)&vg_helper_smc_check4_ADDR );
+      if (dis) VG_(printf)("\n");
+   }
+}
+
+
+/* Synthesise a minimal test (and which discards result) of reg32
+   against lit.  It's always safe do simply
+      emit_testv_lit_reg ( 4, lit, reg32 )
+   but we try to do better when possible.
+*/
+static void synth_minimal_test_lit_reg ( UInt lit, Int reg32 )
+{
+   if ((lit & 0xFFFFFF00) == 0 && reg32 < 4) {
+      /* We can get away with a byte insn. */
+      emit_testb_lit_reg ( lit, reg32 );
+   }
+   else 
+   if ((lit & 0xFFFF0000) == 0) {
+      /* Literal fits in 16 bits; do a word insn. */
+      emit_testv_lit_reg ( 2, lit, reg32 );
+   }
+   else {
+      /* Totally general ... */
+      emit_testv_lit_reg ( 4, lit, reg32 );
+   }
+}
+
+
+/*----------------------------------------------------*/
+/*--- Top level of the uinstr -> x86 translation.  ---*/
+/*----------------------------------------------------*/
+
+/* Return the byte offset from %ebp (ie, into baseBlock)
+   for the specified ArchReg or SpillNo. */
+
+static Int spillOrArchOffset ( Int size, Tag tag, UInt value )
+{
+   if (tag == SpillNo) {
+      vg_assert(size == 4);
+      vg_assert(value >= 0 && value < VG_MAX_SPILLSLOTS);
+      return 4 * (value + VGOFF_(spillslots));
+   }
+   if (tag == ArchReg) {
+      switch (value) {
+         case R_EAX: return 4 * VGOFF_(m_eax);
+         case R_ECX: return 4 * VGOFF_(m_ecx);
+         case R_EDX: return 4 * VGOFF_(m_edx);
+         case R_EBX: return 4 * VGOFF_(m_ebx);
+         case R_ESP:
+           if (size == 1) return 4 * VGOFF_(m_eax) + 1;
+                     else return 4 * VGOFF_(m_esp);
+         case R_EBP:
+           if (size == 1) return 4 * VGOFF_(m_ecx) + 1;
+                     else return 4 * VGOFF_(m_ebp);
+         case R_ESI:
+           if (size == 1) return 4 * VGOFF_(m_edx) + 1;
+                     else return 4 * VGOFF_(m_esi);
+         case R_EDI:
+           if (size == 1) return 4 * VGOFF_(m_ebx) + 1;
+                     else return 4 * VGOFF_(m_edi);
+      }
+   }
+   VG_(panic)("spillOrArchOffset");
+}
+
+
+static Int eflagsOffset ( void )
+{
+   return 4 * VGOFF_(m_eflags);
+}
+
+
+static Int shadowOffset ( Int arch )
+{
+   switch (arch) {
+      case R_EAX: return 4 * VGOFF_(sh_eax);
+      case R_ECX: return 4 * VGOFF_(sh_ecx);
+      case R_EDX: return 4 * VGOFF_(sh_edx);
+      case R_EBX: return 4 * VGOFF_(sh_ebx);
+      case R_ESP: return 4 * VGOFF_(sh_esp);
+      case R_EBP: return 4 * VGOFF_(sh_ebp);
+      case R_ESI: return 4 * VGOFF_(sh_esi);
+      case R_EDI: return 4 * VGOFF_(sh_edi);
+      default:    VG_(panic)( "shadowOffset");
+   }
+}
+
+
+static Int shadowFlagsOffset ( void )
+{
+   return 4 * VGOFF_(sh_eflags);
+}
+
+
+static void synth_LOADV ( Int sz, Int a_reg, Int tv_reg )
+{
+   Int i, j, helper_offw;
+   Int pushed[VG_MAX_REALREGS+2];
+   Int n_pushed;
+   switch (sz) {
+      case 4: helper_offw = VGOFF_(helperc_LOADV4); break;
+      case 2: helper_offw = VGOFF_(helperc_LOADV2); break;
+      case 1: helper_offw = VGOFF_(helperc_LOADV1); break;
+      default: VG_(panic)("synth_LOADV");
+   }
+   n_pushed = 0;
+   for (i = 0; i < VG_MAX_REALREGS; i++) {
+      j = VG_(rankToRealRegNo) ( i );
+      if (VG_CALLEE_SAVED(j)) continue;
+      if (j == tv_reg || j == a_reg) continue;
+      emit_pushv_reg ( 4, j );
+      pushed[n_pushed++] = j;
+   }
+   emit_pushv_reg ( 4, a_reg );
+   pushed[n_pushed++] = a_reg;
+   vg_assert(n_pushed <= VG_MAX_REALREGS+1);
+
+   synth_call_baseBlock_method ( False, helper_offw );
+   /* Result is in %eax; we need to get it to tv_reg. */
+   if (tv_reg != R_EAX)
+      emit_movv_reg_reg ( 4, R_EAX, tv_reg );
+
+   while (n_pushed > 0) {
+      n_pushed--;
+      if (pushed[n_pushed] == tv_reg) {
+         emit_add_lit_to_esp ( 4 );
+      } else {
+         emit_popv_reg ( 4, pushed[n_pushed] );
+      }
+   }
+}
+
+
+static void synth_STOREV ( Int sz,
+                           Int tv_tag, Int tv_val,
+                           Int a_reg )
+{
+   Int i, j, helper_offw;
+   vg_assert(tv_tag == RealReg || tv_tag == Literal);
+   switch (sz) {
+      case 4: helper_offw = VGOFF_(helperc_STOREV4); break;
+      case 2: helper_offw = VGOFF_(helperc_STOREV2); break;
+      case 1: helper_offw = VGOFF_(helperc_STOREV1); break;
+      default: VG_(panic)("synth_STOREV");
+   }
+   for (i = 0; i < VG_MAX_REALREGS; i++) {
+      j = VG_(rankToRealRegNo) ( i );
+      if (VG_CALLEE_SAVED(j)) continue;
+      if ((tv_tag == RealReg && j == tv_val) || j == a_reg) continue;
+      emit_pushv_reg ( 4, j );
+   }
+   if (tv_tag == RealReg) {
+      emit_pushv_reg ( 4, tv_val );
+   } else {
+     if (tv_val == VG_(extend_s_8to32)(tv_val))
+        emit_pushl_lit8 ( VG_(extend_s_8to32)(tv_val) );
+     else
+        emit_pushl_lit32(tv_val);
+   }
+   emit_pushv_reg ( 4, a_reg );
+   synth_call_baseBlock_method ( False, helper_offw );
+   emit_popv_reg ( 4, a_reg );
+   if (tv_tag == RealReg) {
+      emit_popv_reg ( 4, tv_val );
+   } else {
+      emit_add_lit_to_esp ( 4 );
+   }
+   for (i = VG_MAX_REALREGS-1; i >= 0; i--) {
+      j = VG_(rankToRealRegNo) ( i );
+      if (VG_CALLEE_SAVED(j)) continue;
+      if ((tv_tag == RealReg && j == tv_val) || j == a_reg) continue;
+      emit_popv_reg ( 4, j );
+   }
+}
+
+
+static void synth_WIDEN_signed ( Int sz_src, Int sz_dst, Int reg )
+{
+   if (sz_src == 1 && sz_dst == 4) {
+      emit_shiftopv_lit_reg ( 4, SHL, 24, reg );
+      emit_shiftopv_lit_reg ( 4, SAR, 24, reg );
+   }
+   else if (sz_src == 2 && sz_dst == 4) {
+      emit_shiftopv_lit_reg ( 4, SHL, 16, reg );
+      emit_shiftopv_lit_reg ( 4, SAR, 16, reg );
+   }
+   else if (sz_src == 1 && sz_dst == 2) {
+      emit_shiftopv_lit_reg ( 2, SHL, 8, reg );
+      emit_shiftopv_lit_reg ( 2, SAR, 8, reg );
+   }
+   else
+      VG_(panic)("synth_WIDEN");
+}
+
+
+static void synth_SETV ( Int sz, Int reg )
+{
+   UInt val;
+   switch (sz) {
+      case 4: val = 0x00000000; break;
+      case 2: val = 0xFFFF0000; break;
+      case 1: val = 0xFFFFFF00; break;
+      case 0: val = 0xFFFFFFFE; break;
+      default: VG_(panic)("synth_SETV");
+   }
+   emit_movv_lit_reg ( 4, val, reg );
+}
+
+
+static void synth_TESTV ( Int sz, Int tag, Int val )
+{
+   vg_assert(tag == ArchReg || tag == RealReg);
+   if (tag == ArchReg) {
+      switch (sz) {
+         case 4: 
+            emit_testv_lit_offregmem ( 
+               4, 0xFFFFFFFF, shadowOffset(val), R_EBP );
+            break;
+         case 2: 
+            emit_testv_lit_offregmem ( 
+               4, 0x0000FFFF, shadowOffset(val), R_EBP );
+            break;
+         case 1:
+            if (val < 4) {
+               emit_testv_lit_offregmem ( 
+                  4, 0x000000FF, shadowOffset(val), R_EBP );
+            } else {
+               emit_testv_lit_offregmem ( 
+                  4, 0x0000FF00, shadowOffset(val-4), R_EBP );
+            }
+            break;
+         case 0: 
+            /* should never happen */
+         default: 
+            VG_(panic)("synth_TESTV(ArchReg)");
+      }
+   } else {
+      switch (sz) {
+         case 4:
+            /* Works, but holds the entire 32-bit literal, hence
+               generating a 6-byte insn.  We want to know if any bits
+               in the reg are set, but since this is for the full reg,
+               we might as well compare it against zero, which can be
+               done with a shorter insn. */
+            /* synth_minimal_test_lit_reg ( 0xFFFFFFFF, val ); */
+            emit_cmpl_zero_reg ( val );
+            break;
+         case 2:
+            synth_minimal_test_lit_reg ( 0x0000FFFF, val );
+            break;
+         case 1:
+            synth_minimal_test_lit_reg ( 0x000000FF, val );
+            break;
+         case 0:
+            synth_minimal_test_lit_reg ( 0x00000001, val );
+            break;
+         default: 
+            VG_(panic)("synth_TESTV(RealReg)");
+      }
+   }
+   emit_jcondshort_delta ( CondZ, 3 );
+   synth_call_baseBlock_method (
+      True, /* needed to guarantee that this insn is indeed 3 bytes long */
+      (sz==4 ? VGOFF_(helper_value_check4_fail)
+             : (sz==2 ? VGOFF_(helper_value_check2_fail)
+                      : sz == 1 ? VGOFF_(helper_value_check1_fail)
+                                : VGOFF_(helper_value_check0_fail)))
+   );
+}
+
+
+static void synth_GETV ( Int sz, Int arch, Int reg )
+{
+   /* VG_(printf)("synth_GETV %d of Arch %s\n", sz, nameIReg(sz, arch)); */
+   switch (sz) {
+      case 4: 
+         emit_movv_offregmem_reg ( 4, shadowOffset(arch), R_EBP, reg );
+         break;
+      case 2: 
+         emit_movzwl_offregmem_reg ( shadowOffset(arch), R_EBP, reg );
+         emit_nonshiftopv_lit_reg ( 4, OR, 0xFFFF0000, reg );
+         break;
+      case 1: 
+         if (arch < 4) {
+            emit_movzbl_offregmem_reg ( shadowOffset(arch), R_EBP, reg );
+         } else {
+            emit_movzbl_offregmem_reg ( shadowOffset(arch-4)+1, R_EBP, reg );
+         }
+         emit_nonshiftopv_lit_reg ( 4, OR, 0xFFFFFF00, reg );
+         break;
+      default: 
+         VG_(panic)("synth_GETV");
+   }
+}
+
+
+static void synth_PUTV ( Int sz, Int srcTag, UInt lit_or_reg, Int arch )
+{
+   if (srcTag == Literal) {
+     /* PUTV with a Literal is only ever used to set the corresponding
+        ArchReg to `all valid'.  Should really be a kind of SETV. */
+      UInt lit = lit_or_reg;
+      switch (sz) {
+         case 4:
+            vg_assert(lit == 0x00000000);
+            emit_movv_lit_offregmem ( 4, 0x00000000, 
+                                      shadowOffset(arch), R_EBP );
+            break;
+         case 2:
+            vg_assert(lit == 0xFFFF0000);
+            emit_movv_lit_offregmem ( 2, 0x0000, 
+                                      shadowOffset(arch), R_EBP );
+            break;
+         case 1:
+            vg_assert(lit == 0xFFFFFF00);
+            if (arch < 4) {
+               emit_movb_lit_offregmem ( 0x00, 
+                                         shadowOffset(arch), R_EBP );
+            } else {
+               emit_movb_lit_offregmem ( 0x00, 
+                                         shadowOffset(arch-4)+1, R_EBP );
+            }
+            break;
+         default: 
+            VG_(panic)("synth_PUTV(lit)");
+      }
+
+   } else {
+
+      UInt reg;
+      vg_assert(srcTag == RealReg);
+
+      if (sz == 1 && lit_or_reg >= 4) {
+         emit_swapl_reg_EAX ( lit_or_reg );
+         reg = R_EAX;
+      } else {
+         reg = lit_or_reg;
+      }
+
+      if (sz == 1) vg_assert(reg < 4);
+
+      switch (sz) {
+         case 4:
+            emit_movv_reg_offregmem ( 4, reg,
+                                      shadowOffset(arch), R_EBP );
+            break;
+         case 2:
+            emit_movv_reg_offregmem ( 2, reg,
+                                      shadowOffset(arch), R_EBP );
+            break;
+         case 1:
+            if (arch < 4) {
+               emit_movb_reg_offregmem ( reg,
+                                         shadowOffset(arch), R_EBP );
+	    } else {
+               emit_movb_reg_offregmem ( reg,
+                                         shadowOffset(arch-4)+1, R_EBP );
+            }
+            break;
+         default: 
+            VG_(panic)("synth_PUTV(reg)");
+      }
+
+      if (sz == 1 && lit_or_reg >= 4) {
+         emit_swapl_reg_EAX ( lit_or_reg );
+      }
+   }
+}
+
+
+static void synth_GETVF ( Int reg )
+{
+   emit_movv_offregmem_reg ( 4, shadowFlagsOffset(), R_EBP, reg );
+   /* paranoia only; should be unnecessary ... */
+   /* emit_nonshiftopv_lit_reg ( 4, OR, 0xFFFFFFFE, reg ); */
+}
+
+
+static void synth_PUTVF ( UInt reg )
+{
+   emit_movv_reg_offregmem ( 4, reg, shadowFlagsOffset(), R_EBP );
+}
+
+
+static void synth_handle_esp_assignment ( Int reg )
+{
+   emit_pushal();
+   emit_pushv_reg ( 4, reg );
+   synth_call_baseBlock_method ( False, VGOFF_(handle_esp_assignment) );
+   emit_add_lit_to_esp ( 4 );
+   emit_popal();
+}
+
+
+static void synth_fpu_mem_check_actions ( Bool isWrite, 
+                                          Int size, Int a_reg )
+{
+   Int helper_offw
+     = isWrite ? VGOFF_(fpu_write_check)
+               : VGOFF_(fpu_read_check);
+   emit_pushal();
+   emit_pushl_lit8 ( size );
+   emit_pushv_reg ( 4, a_reg );
+   synth_call_baseBlock_method ( False, helper_offw );
+   emit_add_lit_to_esp ( 8 );   
+   emit_popal();
+}
+
+
+#if 0
+/* FixMe.  Useful for debugging. */
+void VG_(oink) ( Int n )
+{
+   VG_(printf)("OiNk(%d): ", n );
+   VG_(show_reg_tags)( &VG_(m_shadow) );
+}
+
+static void synth_OINK ( Int n )
+{
+   emit_pushal();
+   emit_movv_lit_reg ( 4, n, R_EBP );
+   emit_pushl_reg ( R_EBP );
+   emit_movv_lit_reg ( 4, (Addr)&VG_(oink), R_EBP );
+   emit_call_reg ( R_EBP );
+   emit_add_lit_to_esp ( 4 );
+   emit_popal();
+}
+#endif
+
+static void synth_TAG1_op ( VgTagOp op, Int reg )
+{
+   switch (op) {
+
+      /* Scheme is
+            neg<sz> %reg          -- CF = %reg==0 ? 0 : 1
+            sbbl %reg, %reg       -- %reg = -CF
+            or 0xFFFFFFFE, %reg   -- invalidate all bits except lowest
+      */
+      case VgT_PCast40:
+         emit_unaryopv_reg(4, NEG, reg);
+         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFFFE, reg);
+         break;
+      case VgT_PCast20:
+         emit_unaryopv_reg(2, NEG, reg);
+         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFFFE, reg);
+         break;
+      case VgT_PCast10:
+         if (reg >= 4) {
+            emit_swapl_reg_EAX(reg);
+            emit_unaryopb_reg(NEG, R_EAX);
+            emit_swapl_reg_EAX(reg);
+         } else {
+            emit_unaryopb_reg(NEG, reg);
+         }
+         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFFFE, reg);
+         break;
+
+      /* Scheme is
+            andl $1, %reg -- %reg is 0 or 1
+            negl %reg -- %reg is 0 or 0xFFFFFFFF
+            and possibly an OR to invalidate unused bits.
+      */
+      case VgT_PCast04:
+         emit_nonshiftopv_lit_reg(4, AND, 0x00000001, reg);
+         emit_unaryopv_reg(4, NEG, reg);
+         break;
+      case VgT_PCast02:
+         emit_nonshiftopv_lit_reg(4, AND, 0x00000001, reg);
+         emit_unaryopv_reg(4, NEG, reg);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
+         break;
+      case VgT_PCast01:
+         emit_nonshiftopv_lit_reg(4, AND, 0x00000001, reg);
+         emit_unaryopv_reg(4, NEG, reg);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, reg);
+         break;
+
+      /* Scheme is
+            shl $24, %reg -- make irrelevant bits disappear
+            negl %reg             -- CF = %reg==0 ? 0 : 1
+            sbbl %reg, %reg       -- %reg = -CF
+            and possibly an OR to invalidate unused bits.
+      */
+      case VgT_PCast14:
+         emit_shiftopv_lit_reg(4, SHL, 24, reg);
+         emit_unaryopv_reg(4, NEG, reg);
+         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
+         break;
+      case VgT_PCast12:
+         emit_shiftopv_lit_reg(4, SHL, 24, reg);
+         emit_unaryopv_reg(4, NEG, reg);
+         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
+         break;
+      case VgT_PCast11:
+         emit_shiftopv_lit_reg(4, SHL, 24, reg);
+         emit_unaryopv_reg(4, NEG, reg);
+         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, reg);
+         break;
+
+      /* We steal %ebp (a non-allocable reg) as a temporary:
+            pushl %ebp
+            movl %reg, %ebp
+            negl %ebp
+            orl %ebp, %reg
+            popl %ebp
+         This sequence turns out to be correct regardless of the 
+         operation width.
+      */
+      case VgT_Left4:
+      case VgT_Left2:
+      case VgT_Left1:
+         vg_assert(reg != R_EDI);
+         emit_movv_reg_reg(4, reg, R_EDI);
+         emit_unaryopv_reg(4, NEG, R_EDI);
+         emit_nonshiftopv_reg_reg(4, OR, R_EDI, reg);
+         break;
+
+      /* These are all fairly obvious; do the op and then, if
+         necessary, invalidate unused bits. */
+      case VgT_SWiden14:
+         emit_shiftopv_lit_reg(4, SHL, 24, reg);
+         emit_shiftopv_lit_reg(4, SAR, 24, reg);
+         break;
+      case VgT_SWiden24:
+         emit_shiftopv_lit_reg(4, SHL, 16, reg);
+         emit_shiftopv_lit_reg(4, SAR, 16, reg);
+         break;
+      case VgT_SWiden12:
+         emit_shiftopv_lit_reg(4, SHL, 24, reg);
+         emit_shiftopv_lit_reg(4, SAR, 24, reg);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
+         break;
+      case VgT_ZWiden14:
+         emit_nonshiftopv_lit_reg(4, AND, 0x000000FF, reg);
+         break;
+      case VgT_ZWiden24:
+         emit_nonshiftopv_lit_reg(4, AND, 0x0000FFFF, reg);
+         break;
+      case VgT_ZWiden12:
+         emit_nonshiftopv_lit_reg(4, AND, 0x000000FF, reg);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
+         break;
+
+      default:
+         VG_(panic)("synth_TAG1_op");
+   }
+}
+
+
+static void synth_TAG2_op ( VgTagOp op, Int regs, Int regd )
+{
+   switch (op) {
+
+      /* UifU is implemented by OR, since 1 means Undefined. */
+      case VgT_UifU4:
+      case VgT_UifU2:
+      case VgT_UifU1:
+      case VgT_UifU0:
+         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
+         break;
+
+      /* DifD is implemented by AND, since 0 means Defined. */
+      case VgT_DifD4:
+      case VgT_DifD2:
+      case VgT_DifD1:
+         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
+         break;
+
+      /* ImproveAND(value, tags) = value OR tags.
+	 Defined (0) value 0s give defined (0); all other -> undefined (1).
+         value is in regs; tags is in regd. 
+         Be paranoid and invalidate unused bits; I don't know whether 
+         or not this is actually necessary. */
+      case VgT_ImproveAND4_TQ:
+         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
+         break;
+      case VgT_ImproveAND2_TQ:
+         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, regd);
+         break;
+      case VgT_ImproveAND1_TQ:
+         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, regd);
+         break;
+
+      /* ImproveOR(value, tags) = (not value) OR tags.
+	 Defined (0) value 1s give defined (0); all other -> undefined (1).
+         value is in regs; tags is in regd. 
+         To avoid trashing value, this is implemented (re de Morgan) as
+               not (value AND (not tags))
+         Be paranoid and invalidate unused bits; I don't know whether 
+         or not this is actually necessary. */
+      case VgT_ImproveOR4_TQ:
+         emit_unaryopv_reg(4, NOT, regd);
+         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
+         emit_unaryopv_reg(4, NOT, regd);
+         break;
+      case VgT_ImproveOR2_TQ:
+         emit_unaryopv_reg(4, NOT, regd);
+         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
+         emit_unaryopv_reg(4, NOT, regd);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, regd);
+         break;
+      case VgT_ImproveOR1_TQ:
+         emit_unaryopv_reg(4, NOT, regd);
+         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
+         emit_unaryopv_reg(4, NOT, regd);
+         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, regd);
+         break;
+
+      default:
+         VG_(panic)("synth_TAG2_op");
+   }
+}
+
+/*----------------------------------------------------*/
+/*--- Generate code for a single UInstr.           ---*/
+/*----------------------------------------------------*/
+
+static void emitUInstr ( Int i, UInstr* u )
+{
+   if (dis)
+      VG_(ppUInstr)(i, u);
+
+#  if 0
+   if (0&& VG_(translations_done) >= 600) {
+      Bool old_dis = dis;
+      dis = False; 
+      synth_OINK(i);
+      dis = old_dis;
+   }
+#  endif
+
+   switch (u->opcode) {
+
+      case NOP: case CALLM_S: case CALLM_E: break;
+
+      case INCEIP: {
+         vg_assert(u->tag1 == Lit16);
+         emit_addlit8_offregmem ( u->val1, R_EBP, 4 * VGOFF_(m_eip) );
+         break;
+      }
+
+      case LEA1: {
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == RealReg);
+         emit_lea_litreg_reg ( u->lit32, u->val1, u->val2 );
+         break;
+      }
+
+      case LEA2: {
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == RealReg);
+         vg_assert(u->tag3 == RealReg);
+         emit_lea_sib_reg ( u->lit32, u->extra4b, 
+                            u->val1, u->val2, u->val3 );
+         break;
+      }
+
+      case WIDEN: {
+         vg_assert(u->tag1 == RealReg);
+         if (u->signed_widen) {
+            synth_WIDEN_signed ( u->extra4b, u->size, u->val1 );
+         } else {
+            /* no need to generate any code. */
+         }
+         break;
+      }
+
+      case SETV: {
+         vg_assert(VG_(clo_instrument));
+         vg_assert(u->tag1 == RealReg);
+         synth_SETV ( u->size, u->val1 );
+         break;
+      }
+
+      case STOREV: {
+         vg_assert(VG_(clo_instrument));
+         vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
+         vg_assert(u->tag2 == RealReg);
+         synth_STOREV ( u->size, u->tag1, 
+                                 u->tag1==Literal ? u->lit32 : u->val1, 
+                                 u->val2 );
+         break;
+      }
+
+      case STORE: {
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == RealReg);
+         synth_mov_reg_memreg ( u->size, u->val1, u->val2 );
+         if (u->smc_check) 
+            synth_orig_code_write_check ( u->size, u->val2 );
+         break;
+      }
+
+      case LOADV: {
+         vg_assert(VG_(clo_instrument));
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == RealReg);
+         if (0 && VG_(clo_instrument))
+            emit_AMD_prefetch_reg ( u->val1 );
+         synth_LOADV ( u->size, u->val1, u->val2 );
+         break;
+      }
+
+      case LOAD: {
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == RealReg);
+         synth_mov_regmem_reg ( u->size, u->val1, u->val2 );
+         break;
+      }
+
+      case TESTV: {
+         vg_assert(VG_(clo_instrument));
+         vg_assert(u->tag1 == RealReg || u->tag1 == ArchReg);
+         synth_TESTV(u->size, u->tag1, u->val1);
+         break;
+      }
+
+      case GETV: {
+         vg_assert(VG_(clo_instrument));
+         vg_assert(u->tag1 == ArchReg);
+         vg_assert(u->tag2 == RealReg);
+         synth_GETV(u->size, u->val1, u->val2);
+         break;
+      }
+
+      case GETVF: {
+         vg_assert(VG_(clo_instrument));
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->size == 0);
+         synth_GETVF(u->val1);
+         break;
+      }
+
+      case PUTV: {
+         vg_assert(VG_(clo_instrument));
+         vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
+         vg_assert(u->tag2 == ArchReg);
+         synth_PUTV(u->size, u->tag1, 
+                             u->tag1==Literal ? u->lit32 : u->val1, 
+                             u->val2 );
+         break;
+      }
+
+      case PUTVF: {
+         vg_assert(VG_(clo_instrument));
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->size == 0);
+         synth_PUTVF(u->val1);
+         break;
+      }
+
+      case GET: {
+         vg_assert(u->tag1 == ArchReg || u->tag1 == SpillNo);
+         vg_assert(u->tag2 == RealReg);
+         synth_mov_offregmem_reg ( 
+            u->size, 
+            spillOrArchOffset( u->size, u->tag1, u->val1 ),
+            R_EBP,
+            u->val2 
+         );
+         break;
+      }
+            
+      case PUT: {
+         vg_assert(u->tag2 == ArchReg || u->tag2 == SpillNo);
+         vg_assert(u->tag1 == RealReg);
+         if (u->tag2 == ArchReg 
+             && u->val2 == R_ESP
+             && u->size == 4
+             && VG_(clo_instrument)) {
+            synth_handle_esp_assignment ( u->val1 );
+	 }
+         synth_mov_reg_offregmem ( 
+            u->size, 
+            u->val1, 
+            spillOrArchOffset( u->size, u->tag2, u->val2 ),
+            R_EBP
+         );
+         break;
+      }
+
+      case GETF: {
+         vg_assert(u->size == 2 || u->size == 4);
+         vg_assert(u->tag1 == RealReg);
+         synth_mov_offregmem_reg ( 
+            u->size, 
+            eflagsOffset(),
+            R_EBP,
+            u->val1
+         );
+         break;
+      }
+            
+      case PUTF: {
+         vg_assert(u->size == 2 || u->size == 4);
+         vg_assert(u->tag1 == RealReg);
+         synth_mov_reg_offregmem ( 
+            u->size, 
+            u->val1,
+            eflagsOffset(),
+            R_EBP
+         );
+         break;
+      }
+            
+      case MOV: {
+         vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
+         vg_assert(u->tag2 == RealReg);
+         switch (u->tag1) {
+            case RealReg: vg_assert(u->size == 4);
+                          if (u->val1 != u->val2)
+                             synth_movl_reg_reg ( u->val1, u->val2 ); 
+                          break;
+            case Literal: synth_mov_lit_reg ( u->size, u->lit32, u->val2 ); 
+                          break;
+            default: VG_(panic)("emitUInstr:mov");
+	 }
+         break;
+      }
+
+      case SBB:
+      case ADC:
+      case XOR:
+      case OR:
+      case AND:
+      case SUB:
+      case ADD: {
+         vg_assert(u->tag2 == RealReg);
+         switch (u->tag1) {
+            case Literal: synth_nonshiftop_lit_reg (
+                             VG_(anyFlagUse)(u), 
+                             u->opcode, u->size, u->lit32, u->val2 );
+                          break;
+            case RealReg: synth_nonshiftop_reg_reg (
+                             VG_(anyFlagUse)(u), 
+                             u->opcode, u->size, u->val1, u->val2 );
+                          break;
+            case ArchReg: synth_nonshiftop_offregmem_reg (
+                             VG_(anyFlagUse)(u), 
+                             u->opcode, u->size, 
+                             spillOrArchOffset( u->size, u->tag1, u->val1 ), 
+                             R_EBP,
+                             u->val2 );
+                          break;
+            default: VG_(panic)("emitUInstr:non-shift-op");
+         }
+         break;
+      }
+
+      case RCR:
+      case RCL:
+      case ROR:
+      case ROL:
+      case SAR:
+      case SHR:
+      case SHL: {
+         vg_assert(u->tag2 == RealReg);
+         switch (u->tag1) {
+            case Literal: synth_shiftop_lit_reg (
+                             VG_(anyFlagUse)(u), 
+                             u->opcode, u->size, u->lit32, u->val2 );
+                          break;
+            case RealReg: synth_shiftop_reg_reg (
+                             VG_(anyFlagUse)(u), 
+                             u->opcode, u->size, u->val1, u->val2 );
+                          break;
+            default: VG_(panic)("emitUInstr:non-shift-op");
+         }
+         break;
+      }
+
+      case INC:
+      case DEC:
+      case NEG:
+      case NOT:
+         vg_assert(u->tag1 == RealReg);
+         synth_unaryop_reg ( 
+            VG_(anyFlagUse)(u), u->opcode, u->size, u->val1 );
+         break;
+
+      case BSWAP:
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->size == 4);
+	 vg_assert(!VG_(anyFlagUse)(u));
+         emit_bswapl_reg ( u->val1 );
+         break;
+
+      case CMOV: 
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == RealReg);
+         vg_assert(u->cond != CondAlways);
+         vg_assert(u->size == 4);
+         synth_cmovl_reg_reg ( u->cond, u->val1, u->val2 );
+         break;
+
+      case JMP: {
+         vg_assert(u->tag2 == NoValue);
+         vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
+         if (u->cond == CondAlways) {
+            if (u->tag1 == RealReg) {
+               synth_jmp_reg ( u->val1, u->ret_dispatch, u->call_dispatch );
+            } else {
+               vg_assert(!u->ret_dispatch);
+               if (u->call_dispatch)
+                  synth_jmp_lit_call_dispatch ( 
+                     u->tag1==Literal ? u->lit32 : u->val1 );
+               else
+                  synth_jmp_lit ( 
+                     u->tag1==Literal ? u->lit32 : u->val1 );
+            }
+         } else {
+            if (u->tag1 == RealReg) {
+               VG_(panic)("emitUInstr: conditional jump to reg");
+            } else {
+               vg_assert(!u->ret_dispatch);
+               vg_assert(!u->call_dispatch);
+               synth_jcond_lit ( u->cond, 
+                                 u->tag1==Literal ? u->lit32 : u->val1 );
+            }
+         }
+         break;
+      }
+
+      case JIFZ:
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == Literal);
+         vg_assert(u->size == 4);
+         synth_jmp_ifzero_reg_lit ( u->val1, u->lit32 );
+         break;
+
+      case TAG1:
+         synth_TAG1_op ( u->val3, u->val1 );
+         break;
+
+      case TAG2:
+         if (u->val3 != VgT_DebugFn) {
+            synth_TAG2_op ( u->val3, u->val1, u->val2 );
+         } else {
+            /* Assume a call to VgT_DebugFn passing both args
+               and placing the result back in the second. */
+            Int j, k;
+            /* u->val2 is the reg into which the result is written.  So
+               don't save/restore it.  And it can be used at a temp for
+               the call target, too.  Since %eax is used for the return
+               value from the C procedure, it is preserved only by
+               virtue of not being mentioned as a VG_CALLEE_SAVED reg. */
+            for (k = 0; k < VG_MAX_REALREGS; k++) {
+               j = VG_(rankToRealRegNo) ( k );
+               if (VG_CALLEE_SAVED(j)) continue;
+               if (j == u->val2) continue;
+               emit_pushv_reg ( 4, j );
+            }
+            emit_pushv_reg(4, u->val2);
+            emit_pushv_reg(4, u->val1);
+            emit_movv_lit_reg ( 4, (UInt)(&VG_(DebugFn)), u->val2 );
+            emit_call_reg ( u->val2 );
+            if (u->val2 != R_EAX)
+               emit_movv_reg_reg ( 4, R_EAX, u->val2 );
+            /* nuke args */
+            emit_add_lit_to_esp(8);
+            for (k = VG_MAX_REALREGS-1; k >= 0; k--) {
+               j = VG_(rankToRealRegNo) ( k );
+               if (VG_CALLEE_SAVED(j)) continue;
+               if (j == u->val2) continue;
+               emit_popv_reg ( 4, j );
+            }
+         }
+         break;
+
+      case PUSH:
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == NoValue);
+         emit_pushv_reg ( 4, u->val1 );
+         break;
+
+      case POP:
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == NoValue);
+         emit_popv_reg ( 4, u->val1 );
+         break;
+
+      case CALLM:
+         vg_assert(u->tag1 == Lit16);
+         vg_assert(u->tag2 == NoValue);
+         vg_assert(u->size == 0);
+         if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) 
+            emit_get_eflags();
+         synth_call_baseBlock_method ( False, u->val1 );
+         if (u->flags_w != FlagsEmpty) 
+            emit_put_eflags();
+         break;
+
+      case CLEAR:
+         vg_assert(u->tag1 == Lit16);
+         vg_assert(u->tag2 == NoValue);
+         emit_add_lit_to_esp ( u->val1 );
+         break;
+
+      case CC2VAL:
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == NoValue);
+         vg_assert(VG_(anyFlagUse)(u));
+         synth_setb_reg ( u->val1, u->cond );
+         break;
+
+      /* We assume that writes to memory done by FPU_Ws are not going
+         to be used to create new code, so there's no orig-code-write
+         checks done by default. */
+      case FPU_R: 
+      case FPU_W:         
+         vg_assert(u->tag1 == Lit16);
+         vg_assert(u->tag2 == RealReg);
+         if (VG_(clo_instrument))
+            synth_fpu_mem_check_actions ( 
+               u->opcode==FPU_W, u->size, u->val2 );
+         synth_fpu_regmem ( (u->val1 >> 8) & 0xFF,
+                            u->val1 & 0xFF,
+                            u->val2 );
+         if (u->opcode == FPU_W && u->smc_check) 
+            synth_orig_code_write_check ( u->size, u->val2 );
+         break;
+
+      case FPU:
+         vg_assert(u->tag1 == Lit16);
+         vg_assert(u->tag2 == NoValue);
+         synth_fpu_no_mem ( (u->val1 >> 8) & 0xFF,
+                            u->val1 & 0xFF );
+         break;
+
+      default: 
+         VG_(printf)("emitUInstr: unhandled insn:\n");
+         VG_(ppUInstr)(0,u);
+         VG_(panic)("emitUInstr: unimplemented opcode");
+   }
+
+}
+
+
+/* Emit x86 for the ucode in cb, returning the address of the
+   generated code and setting *nbytes to its size. */
+UChar* VG_(emit_code) ( UCodeBlock* cb, Int* nbytes )
+{
+   Int i;
+   emitted_code_used = 0;
+   emitted_code_size = 500; /* reasonable initial size */
+   emitted_code = VG_(jitmalloc)(emitted_code_size);
+
+   if (dis) VG_(printf)("Generated code:\n");
+
+   for (i = 0; i < cb->used; i++) {
+      if (cb->instrs[i].opcode != NOP) {
+         UInstr* u = &cb->instrs[i];
+#        if 1
+         /* Check on the sanity of this insn. */
+         Bool sane = VG_(saneUInstr)( False, u );
+         if (!sane) {
+            VG_(printf)("\ninsane instruction\n");
+            VG_(ppUInstr)( i, u );
+	 }
+         vg_assert(sane);
+#        endif
+#        if 0
+         /* Pass args to TAG1/TAG2 to vg_DebugFn for sanity checking.
+            Requires a suitable definition of vg_DebugFn. */
+	 if (u->opcode == TAG1) {
+            UInstr t1;
+            vg_assert(u->tag1 == RealReg);
+            VG_(emptyUInstr)( &t1 );
+            t1.opcode = TAG2;
+            t1.tag1 = t1.tag2 = RealReg;
+            t1.val1 = t1.val2 = u->val1;
+            t1.tag3 = Lit16;
+            t1.val3 = VgT_DebugFn;
+            emitUInstr( i, &t1 );
+	 }
+	 if (u->opcode == TAG2) {
+            UInstr t1;
+            vg_assert(u->tag1 == RealReg);
+            vg_assert(u->tag2 == RealReg);
+            VG_(emptyUInstr)( &t1 );
+            t1.opcode = TAG2;
+            t1.tag1 = t1.tag2 = RealReg;
+            t1.val1 = t1.val2 = u->val1;
+            t1.tag3 = Lit16;
+            t1.val3 = VgT_DebugFn;
+            if (u->val3 == VgT_UifU1 || u->val3 == VgT_UifU2 
+                || u->val3 == VgT_UifU4 || u->val3 == VgT_DifD1 
+                || u->val3 == VgT_DifD2 || u->val3 == VgT_DifD4)
+               emitUInstr( i, &t1 );
+            t1.val1 = t1.val2 = u->val2;
+            emitUInstr( i, &t1 );
+	 }
+#        endif
+         emitUInstr( i, u );
+      }
+   }
+
+   /* Returns a pointer to the emitted code.  This will have to be
+      copied by the caller into the translation cache, and then freed
+      using VG_(jitfree). */
+   *nbytes = emitted_code_used;
+   return emitted_code;
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                          vg_from_ucode.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_helpers.S b/vg_helpers.S
new file mode 100644
index 000000000..781175d49
--- /dev/null
+++ b/vg_helpers.S
@@ -0,0 +1,625 @@
+
+##--------------------------------------------------------------------##
+##--- Support routines for the JITter output.                      ---##
+##---                                                 vg_helpers.S ---##
+##--------------------------------------------------------------------##
+
+/*
+  This file is part of Valgrind, an x86 protected-mode emulator 
+  designed for debugging and profiling binaries on x86-Unixes.
+
+  Copyright (C) 2000-2002 Julian Seward 
+     jseward@acm.org
+     Julian_Seward@muraroa.demon.co.uk
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_constants.h"
+
+
+/* Various helper routines, for instructions which are just too
+   darn tedious for the JITter to output code in-line:
+	
+	* integer division
+	* integer multiplication
+        * setting and getting obscure eflags
+	* double-length shifts
+	
+   All routines use a standard calling convention designed for
+   calling from translations, in which the incoming args are
+   underneath the return address, the callee saves _all_ registers,
+   and the incoming parameters can be modified, to return results.
+*/
+
+
+/*
+   On entry:
+	%ECX value
+	%EBX value
+	%EAX value -- also the result
+	RA   <- %esp  -- after pushal+pushfl is 36(%esp)
+*/
+.global VG_(helper_do_client_request)
+VG_(helper_do_client_request):
+	pushal
+	pushfl
+	
+	movl	48(%esp), %eax
+	pushl	%eax
+	movl	48(%esp), %eax
+	pushl	%eax
+	movl	48(%esp), %eax
+	pushl	%eax
+
+	call	VG_(handle_client_request)
+	movl	%eax, 52(%esp)
+
+	addl	$12, %esp
+	
+	popfl
+	popal
+	ret
+
+
+.global VG_(helper_do_syscall)
+VG_(helper_do_syscall):
+	pushal
+	call	VG_(wrap_syscall)
+	popal
+#	movl	$VG_(baseBlock), %ebp
+	ret
+
+
+	
+.global VG_(helper_value_check0_fail)
+VG_(helper_value_check0_fail):
+	pushal
+	call	VG_(helperc_value_check0_fail)
+	popal
+	ret
+
+.global VG_(helper_value_check1_fail)
+VG_(helper_value_check1_fail):
+	pushal
+	call	VG_(helperc_value_check1_fail)
+	popal
+	ret
+
+.global VG_(helper_value_check2_fail)
+VG_(helper_value_check2_fail):
+	pushal
+	call	VG_(helperc_value_check2_fail)
+	popal
+	ret
+
+.global VG_(helper_value_check4_fail)
+VG_(helper_value_check4_fail):
+	pushal
+	call	VG_(helperc_value_check4_fail)
+	popal
+	ret
+
+
+/* Set things up so the dispatch loop exits normally.  Used when it is
+   detected that the program wants to finish, ie it has called
+   vg_shutdown. 
+*/
+.global VG_(helper_request_normal_exit)
+VG_(helper_request_normal_exit):
+	pushl	%eax
+	movl	VG_(dispatch_ctr), %eax
+	movl	%eax, VG_(dispatch_ctr_SAVED)
+	movl	$1, VG_(dispatch_ctr)
+	movl	$VG_Y_EXIT, VG_(interrupt_reason)
+	popl	%eax
+	ret
+
+
+/* Do a original-code-write check for the address in %ebp. */
+.global VG_(helper_smc_check4)
+VG_(helper_smc_check4):
+#if VG_SMC_FASTCHECK_IN_C
+
+	# save the live regs
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	pushl	%esi
+	pushl	%edi
+	
+	pushl	%ebp
+	call	VG_(smc_check4)
+	addl	$4, %esp
+
+	popl	%edi
+	popl	%esi
+	popl	%edx
+	popl	%ecx
+	popl	%ebx
+	popl	%eax
+	
+	ret
+#else	
+	incl	VG_(smc_total_check4s)
+	pushl	%ebp
+	shrl	$VG_SMC_CACHE_SHIFT, %ebp
+	andl	$VG_SMC_CACHE_MASK, %ebp
+	cmpb	$0, VG_(smc_cache)(%ebp)
+	jnz	vg_smc_cache_failure
+	addl	$4, %esp
+	ret
+      vg_smc_cache_failure:
+	popl	%ebp
+	pushal
+	pushl	%ebp
+	call	VG_(smc_check4)
+	addl	$4, %esp
+	popal
+	ret
+#endif
+
+	
+/* Fetch the time-stamp-ctr reg.
+   On entry:
+	dummy, replaced by %EAX value
+	dummy, replaced by %EDX value
+	RA   <- %esp
+*/
+.global VG_(helper_RDTSC)
+VG_(helper_RDTSC):
+	pushl	%eax
+	pushl	%edx
+	rdtsc
+	movl	%edx, 12(%esp)
+	movl	%eax, 16(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+
+
+/* Do the CPUID instruction.
+   On entry:
+	dummy, replaced by %EAX value
+	dummy, replaced by %EBX value
+	dummy, replaced by %ECX value
+	dummy, replaced by %EDX value
+	RA   <- %esp
+
+   As emulating a real CPUID is kinda hard, as it
+   has to return different values depending on EAX, 
+   we just pretend to not support CPUID at all until
+   it becomes a problem. This will for sure disable
+   all MMX / 3dnow checks so they don't bother us
+   with code we don't understand. (Dirk <dirk@kde.org>)
+   
+   http://www.sandpile.org/ia32/cpuid.htm
+
+   (Later: we instead pretend to be like Werner's P54C P133, that is
+    an original pre-MMX Pentium).
+   <werner> cpuid words (0): 0x1 0x756e6547 0x6c65746e 0x49656e69
+   <werner> cpuid words (1): 0x52b 0x0 0x0 0x1bf
+*/
+.global VG_(helper_CPUID)
+VG_(helper_CPUID):
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	movl	32(%esp), %eax
+/*
+	cpuid
+*/
+/*
+        xor     %eax,%eax
+        xor     %ebx,%ebx
+        xor     %ecx,%ecx
+        xor     %edx,%edx
+*/
+	cmpl	$0, %eax
+	jz	cpuid__0
+	movl	$0x52b, %eax
+	movl	$0x0,   %ebx
+	movl	$0x0,   %ecx
+	movl	$0x1bf, %edx
+	jmp	cpuid__99
+cpuid__0:
+	movl	$0x1,        %eax
+	movl	$0x756e6547, %ebx
+	movl	$0x6c65746e, %ecx
+	movl	$0x49656e69, %edx
+cpuid__99:
+		
+	movl	%edx, 20(%esp)
+	movl	%ecx, 24(%esp)
+	movl	%ebx, 28(%esp)
+	movl	%eax, 32(%esp)
+	popl	%edx
+	popl	%ecx
+	popl	%ebx
+	popl	%eax
+	ret
+
+
+/* Fetch the FPU status register.
+   On entry:
+	dummy, replaced by result
+	RA   <- %esp
+*/
+.global VG_(helper_fstsw_AX)
+VG_(helper_fstsw_AX):
+	pushl	%eax
+	pushl	%esi
+	movl	VGOFF_(m_fpustate), %esi
+	frstor	(%ebp, %esi, 4)
+	fstsw	%ax
+	popl	%esi
+	movw	%ax, 8(%esp)
+	popl	%eax
+	ret
+
+
+/* Copy %ah into %eflags.
+   On entry:
+	value of %eax
+	RA   <- %esp
+*/
+.global VG_(helper_SAHF)
+VG_(helper_SAHF):
+	pushl	%eax
+	movl	8(%esp), %eax
+	sahf
+	popl	%eax
+	ret
+
+
+/* Bit scan forwards/reverse.  Sets flags (??).
+   On entry:
+	value, replaced by result
+	RA   <- %esp
+*/
+.global VG_(helper_bsr)
+VG_(helper_bsr):
+	pushl	%eax
+	bsrl	8(%esp), %eax
+	movl	%eax, 8(%esp)
+	popl	%eax
+	ret
+
+.global VG_(helper_bsf)
+VG_(helper_bsf):
+	pushl	%eax
+	bsfl	8(%esp), %eax
+	movl	%eax, 8(%esp)
+	popl	%eax
+	ret
+
+
+/* Bit test and set/reset/complement.  Sets flags.
+   On entry:
+	src
+	dst
+	RA   <- %esp
+*/
+.global VG_(helper_bt)
+VG_(helper_bt):
+	pushl	%eax
+	movl	12(%esp), %eax
+	btl	%eax, 8(%esp)
+	popl	%eax
+	ret
+.global VG_(helper_bts)
+VG_(helper_bts):
+	pushl	%eax
+	movl	12(%esp), %eax
+	btsl	%eax, 8(%esp)
+	popl	%eax
+	ret
+.global VG_(helper_btr)
+VG_(helper_btr):
+	pushl	%eax
+	movl	12(%esp), %eax
+	btrl	%eax, 8(%esp)
+	popl	%eax
+	ret
+.global VG_(helper_btc)
+VG_(helper_btc):
+	pushl	%eax
+	movl	12(%esp), %eax
+	btcl	%eax, 8(%esp)
+	popl	%eax
+	ret
+	
+	
+/* 32-bit double-length shift left/right.
+   On entry:
+	amount
+	src
+	dst
+	RA   <- %esp
+*/
+.global VG_(helper_shldl)
+VG_(helper_shldl):
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+
+	movb	24(%esp), %cl
+	movl	20(%esp), %ebx
+	movl	16(%esp), %eax
+	shldl	%cl, %ebx, %eax
+	movl	%eax, 16(%esp)
+	
+	popl	%ecx
+	popl	%ebx
+	popl	%eax
+	ret
+
+.global VG_(helper_shldw)
+VG_(helper_shldw):
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+
+	movb	24(%esp), %cl
+	movw	20(%esp), %bx
+	movw	16(%esp), %ax
+	shldw	%cl, %bx, %ax
+	movw	%ax, 16(%esp)
+	
+	popl	%ecx
+	popl	%ebx
+	popl	%eax
+	ret
+
+.global VG_(helper_shrdl)
+VG_(helper_shrdl):
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+
+	movb	24(%esp), %cl
+	movl	20(%esp), %ebx
+	movl	16(%esp), %eax
+	shrdl	%cl, %ebx, %eax
+	movl	%eax, 16(%esp)
+	
+	popl	%ecx
+	popl	%ebx
+	popl	%eax
+	ret
+
+.global VG_(helper_shrdw)
+VG_(helper_shrdw):
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+
+	movb	24(%esp), %cl
+	movw	20(%esp), %bx
+	movw	16(%esp), %ax
+	shrdw	%cl, %bx, %ax
+	movw	%ax, 16(%esp)
+	
+	popl	%ecx
+	popl	%ebx
+	popl	%eax
+	ret
+
+	
+/* Get the direction flag, and return either 1 or -1. */
+.global VG_(helper_get_dirflag)
+VG_(helper_get_dirflag):
+	pushfl
+	pushl	%eax
+
+	pushfl
+	popl	%eax
+	shrl	$10, %eax
+	andl	$1, %eax
+	jnz	L1
+	movl	$1, %eax
+	jmp	L2
+L1:	movl	$-1, %eax
+L2:	movl	%eax, 12(%esp)
+
+	popl %eax
+	popfl
+	ret
+
+
+/* Clear/set the direction flag. */
+.global VG_(helper_CLD)
+VG_(helper_CLD):
+	cld
+	ret
+
+.global VG_(helper_STD)
+VG_(helper_STD):
+	std
+	ret
+
+
+
+/* Signed 32-to-64 multiply. */
+.globl VG_(helper_imul_32_64)
+VG_(helper_imul_32_64):
+	pushl	%eax
+	pushl	%edx
+	movl	16(%esp), %eax
+	imull	12(%esp)
+	movl	%eax, 16(%esp)
+	movl	%edx, 12(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+	
+/* Signed 16-to-32 multiply. */
+.globl VG_(helper_imul_16_32)
+VG_(helper_imul_16_32):
+	pushl	%eax
+	pushl	%edx
+	movw	16(%esp), %ax
+	imulw	12(%esp)
+	movw	%ax, 16(%esp)
+	movw	%dx, 12(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+	
+/* Signed 8-to-16 multiply. */
+.globl VG_(helper_imul_8_16)
+VG_(helper_imul_8_16):
+	pushl	%eax
+	pushl	%edx
+	movb	16(%esp), %al
+	imulb	12(%esp)
+	movw	%ax, 16(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+	
+
+	
+	
+	
+	
+/* Unsigned 32-to-64 multiply. */
+.globl VG_(helper_mul_32_64)
+VG_(helper_mul_32_64):
+	pushl	%eax
+	pushl	%edx
+	movl	16(%esp), %eax
+	mull	12(%esp)
+	movl	%eax, 16(%esp)
+	movl	%edx, 12(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+	
+/* Unsigned 16-to-32 multiply. */
+.globl VG_(helper_mul_16_32)
+VG_(helper_mul_16_32):
+	pushl	%eax
+	pushl	%edx
+	movw	16(%esp), %ax
+	mulw	12(%esp)
+	movw	%ax, 16(%esp)
+	movw	%dx, 12(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+	
+/* Unsigned 8-to-16 multiply. */
+.globl VG_(helper_mul_8_16)
+VG_(helper_mul_8_16):
+	pushl	%eax
+	pushl	%edx
+	movb	16(%esp), %al
+	mulb	12(%esp)
+	movw	%ax, 16(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+	
+	
+	
+		
+/* Unsigned 64-into-32 divide. */
+.globl	VG_(helper_div_64_32)
+VG_(helper_div_64_32):
+	pushl	%eax
+	pushl	%edx
+	movl	16(%esp),%eax
+	movl	12(%esp),%edx
+	divl	20(%esp)
+	movl	%eax,16(%esp)
+	movl	%edx,12(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+
+/* Signed 64-into-32 divide. */
+.globl	VG_(helper_idiv_64_32)
+VG_(helper_idiv_64_32):
+	pushl	%eax
+	pushl	%edx
+	movl	16(%esp),%eax
+	movl	12(%esp),%edx
+	idivl	20(%esp)
+	movl	%eax,16(%esp)
+	movl	%edx,12(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+
+/* Unsigned 32-into-16 divide. */
+.globl	VG_(helper_div_32_16)
+VG_(helper_div_32_16):
+	pushl	%eax
+	pushl	%edx
+	movw	16(%esp),%ax
+	movw	12(%esp),%dx
+	divw	20(%esp)
+	movw	%ax,16(%esp)
+	movw	%dx,12(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+
+/* Signed 32-into-16 divide. */
+.globl	VG_(helper_idiv_32_16)
+VG_(helper_idiv_32_16):
+	pushl	%eax
+	pushl	%edx
+	movw	16(%esp),%ax
+	movw	12(%esp),%dx
+	idivw	20(%esp)
+	movw	%ax,16(%esp)
+	movw	%dx,12(%esp)
+	popl	%edx
+	popl	%eax
+	ret
+
+/* Unsigned 16-into-8 divide. */
+.globl	VG_(helper_div_16_8)
+VG_(helper_div_16_8):
+	pushl	%eax
+	movw	12(%esp),%ax
+	divb	16(%esp)
+	movb	%ah,12(%esp)
+	movb	%al,8(%esp)
+	popl	%eax
+	ret
+
+/* Signed 16-into-8 divide. */
+.globl	VG_(helper_idiv_16_8)
+VG_(helper_idiv_16_8):
+	pushl	%eax
+	movw	12(%esp),%ax
+	idivb	16(%esp)
+	movb	%ah,12(%esp)
+	movb	%al,8(%esp)
+	popl	%eax
+	ret
+
+		
+##--------------------------------------------------------------------##
+##--- end                                             vg_helpers.S ---##
+##--------------------------------------------------------------------##
diff --git a/vg_include.h b/vg_include.h
new file mode 100644
index 000000000..83d6eae01
--- /dev/null
+++ b/vg_include.h
@@ -0,0 +1,1452 @@
+
+/*--------------------------------------------------------------------*/
+/*--- A header file for all parts of Valgrind.                     ---*/
+/*--- Include no other!                                            ---*/
+/*---                                                 vg_include.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#ifndef __VG_INCLUDE_H
+#define __VG_INCLUDE_H
+
+
+#include <stdarg.h>       /* ANSI varargs stuff  */
+#include <setjmp.h>       /* for jmp_buf         */
+
+
+/* ---------------------------------------------------------------------
+   Build options and table sizes.  You should be able to change these
+   options or sizes, recompile, and still have a working system.
+   ------------------------------------------------------------------ */
+
+#include "vg_constants.h"
+
+
+/* Set to 1 to enable time profiling.  Since this uses SIGPROF, we
+   don't want this permanently enabled -- only for profiling
+   builds. */
+#if 0
+#  define VG_PROFILE
+#endif
+
+
+/* Total number of integer registers available for allocation.  That's
+   all of them except %esp, %edi and %ebp.  %edi is a general spare
+   temporary.  %ebp permanently points at VG_(baseBlock).  Note that
+   it's important that this tie in with what rankToRealRegNo() says.
+   DO NOT CHANGE THIS VALUE FROM 5. !  */
+#define VG_MAX_REALREGS 5
+
+/* Total number of spill slots available for allocation, if a TempReg
+   doesn't make it into a RealReg.  Just bomb the entire system if
+   this value is too small; we don't expect it will ever get
+   particularly high. */
+#define VG_MAX_SPILLSLOTS 24
+
+
+/* Constants for the slow translation lookup cache. */
+#define VG_TRANSTAB_SLOW_BITS 11
+#define VG_TRANSTAB_SLOW_SIZE (1 << VG_TRANSTAB_SLOW_BITS)
+#define VG_TRANSTAB_SLOW_MASK ((VG_TRANSTAB_SLOW_SIZE) - 1)
+
+/* Size of a buffer used for creating messages. */
+#define M_VG_MSGBUF 10000
+
+/* Size of a smallish table used to read /proc/self/map entries. */
+#define M_PROCMAP_BUF 20000
+
+/* Max length of pathname to a .so/executable file. */
+#define M_VG_LIBNAMESTR 100
+
+/* Max length of a text fragment used to construct error messages. */
+#define M_VG_ERRTXT 512
+
+/* Max length of the string copied from env var VG_ARGS at startup. */
+#define M_VG_CMDLINE_STRLEN 1000
+
+/* Max number of options for Valgrind which we can handle. */
+#define M_VG_CMDLINE_OPTS 100
+
+/* After this many different unsuppressed errors have been observed,
+   be more conservative about collecting new ones. */
+#define M_VG_COLLECT_ERRORS_SLOWLY_AFTER 50
+
+/* After this many different unsuppressed errors have been observed,
+   stop collecting errors at all, and tell the user their program is
+   evidently a steaming pile of camel dung. */
+#define M_VG_COLLECT_NO_ERRORS_AFTER 500
+
+/* These many bytes below %ESP are considered addressible if we're
+   doing the --workaround-gcc296-bugs hack. */
+#define VG_GCC296_BUG_STACK_SLOP 256
+
+/* The maximum number of calls we're prepared to save in a
+   backtrace. */
+#define VG_DEEPEST_BACKTRACE 50
+
+/* Number of lists in which we keep track of malloc'd but not free'd
+   blocks.  Should be prime. */
+#define VG_N_MALLOCLISTS 997
+
+/* Number of lists in which we keep track of ExeContexts.  Should be
+   prime. */
+#define VG_N_EC_LISTS /*997*/ 4999
+
+
+/* ---------------------------------------------------------------------
+   Basic types
+   ------------------------------------------------------------------ */
+
+typedef unsigned char          UChar;
+typedef unsigned short         UShort;
+typedef unsigned int           UInt;
+typedef unsigned long long int ULong;
+
+typedef signed char          Char;
+typedef signed short         Short;
+typedef signed int           Int;
+typedef signed long long int Long;
+
+typedef unsigned int Addr;
+
+typedef unsigned char Bool;
+#define False ((Bool)0)
+#define True ((Bool)1)
+
+#define mycat_wrk(aaa,bbb) aaa##bbb
+#define mycat(aaa,bbb) mycat_wrk(aaa,bbb)
+
+/* Just pray that gcc's constant folding works properly ... */
+#define BITS(bit7,bit6,bit5,bit4,bit3,bit2,bit1,bit0)               \
+   ( ((bit7) << 7) | ((bit6) << 6) | ((bit5) << 5) | ((bit4) << 4)  \
+     | ((bit3) << 3) | ((bit2) << 2) | ((bit1) << 1) | (bit0))
+
+
+/* ---------------------------------------------------------------------
+   Now the basic types are set up, we can haul in the kernel-interface
+   definitions.
+   ------------------------------------------------------------------ */
+
+#include "./vg_kerneliface.h"
+
+
+/* ---------------------------------------------------------------------
+   Command-line-settable options
+   ------------------------------------------------------------------ */
+
+#define VG_CLO_SMC_NONE 0
+#define VG_CLO_SMC_SOME 1
+#define VG_CLO_SMC_ALL  2
+
+#define VG_CLO_MAX_SFILES 10
+
+/* Enquire about whether to attach to GDB at errors?   default: NO */
+extern Bool  VG_(clo_GDB_attach);
+/* Sanity-check level: 0 = none, 1 (default), > 1 = expensive. */
+extern Int   VG_(sanity_level);
+/* Verbosity level: 0 = silent, 1 (default), > 1 = more verbose. */
+extern Int   VG_(clo_verbosity);
+/* Automatically attempt to demangle C++ names?  default: YES */
+extern Bool  VG_(clo_demangle);
+/* Do leak check at exit?  default: NO */
+extern Bool  VG_(clo_leak_check);
+/* In leak check, show reachable-but-not-freed blocks?  default: NO */
+extern Bool  VG_(clo_show_reachable);
+/* How closely should we compare ExeContexts in leak records? default: 2 */
+extern Int   VG_(clo_leak_resolution);
+/* Round malloc sizes upwards to integral number of words? default:
+   NO */
+extern Bool  VG_(clo_sloppy_malloc);
+/* Allow loads from partially-valid addresses?  default: YES */
+extern Bool  VG_(clo_partial_loads_ok);
+/* Simulate child processes? default: NO */
+extern Bool  VG_(clo_trace_children);
+/* The file id on which we send all messages.  default: 2 (stderr). */
+extern Int   VG_(clo_logfile_fd);
+/* Max volume of the freed blocks queue. */
+extern Int   VG_(clo_freelist_vol);
+/* Assume accesses immediately below %esp are due to gcc-2.96 bugs.
+   default: NO */
+extern Bool  VG_(clo_workaround_gcc296_bugs);
+
+/* The number of suppression files specified. */
+extern Int   VG_(clo_n_suppressions);
+/* The names of the suppression files. */
+extern Char* VG_(clo_suppressions)[VG_CLO_MAX_SFILES];
+
+/* Single stepping?  default: NO */
+extern Bool  VG_(clo_single_step);
+/* Code improvement?  default: YES */
+extern Bool  VG_(clo_optimise);
+/* Memory-check instrumentation?  default: YES */
+extern Bool  VG_(clo_instrument);
+/* DEBUG: clean up instrumented code?  default: YES */
+extern Bool  VG_(clo_cleanup);
+/* Handle client memory-range-permissions-setting requests?  default: NO */
+extern Bool  VG_(clo_client_perms);
+/* SMC write checks?  default: SOME (1,2,4 byte movs to mem) */
+extern Int   VG_(clo_smc_check);
+/* DEBUG: print system calls?  default: NO */
+extern Bool  VG_(clo_trace_syscalls);
+/* DEBUG: print signal details?  default: NO */
+extern Bool  VG_(clo_trace_signals);
+/* DEBUG: print symtab details?  default: NO */
+extern Bool  VG_(clo_trace_symtab);
+/* DEBUG: print malloc details?  default: NO */
+extern Bool  VG_(clo_trace_malloc);
+/* Stop after this many basic blocks.  default: Infinity. */
+extern ULong VG_(clo_stop_after);
+/* Display gory details for the k'th most popular error.  default:
+   Infinity. */
+extern Int   VG_(clo_dump_error);
+/* Number of parents of a backtrace.  Default: 8.  */
+extern Int   VG_(clo_backtrace_size);
+
+
+/* ---------------------------------------------------------------------
+   Debugging and profiling stuff
+   ------------------------------------------------------------------ */
+
+/* No, really.  I _am_ that strange. */
+#define OINK(nnn) VG_(message)(Vg_DebugMsg, "OINK %d",nnn)
+
+/* Tools for building messages from multiple parts. */
+typedef
+   enum { Vg_UserMsg, Vg_DebugMsg, Vg_DebugExtraMsg }
+   VgMsgKind;
+
+extern void VG_(start_msg)  ( VgMsgKind kind );
+extern void VG_(add_to_msg) ( Char* format, ... );
+extern void VG_(end_msg)    ( void );
+
+/* Send a simple, single-part message. */
+extern void VG_(message)    ( VgMsgKind kind, Char* format, ... );
+
+/* Create a logfile into which messages can be dumped. */
+extern void VG_(startup_logging) ( void );
+extern void VG_(shutdown_logging) ( void );
+
+
+/* Profiling stuff */
+#ifdef VG_PROFILE
+
+#define VGP_M_STACK 10
+
+#define VGP_M_CCS 20  /* == the # of elems in VGP_LIST */
+#define VGP_LIST \
+   VGP_PAIR(VgpRun=0,      "running"),                \
+   VGP_PAIR(VgpMalloc,     "low-lev malloc/free"),    \
+   VGP_PAIR(VgpCliMalloc,  "client  malloc/free"),    \
+   VGP_PAIR(VgpTranslate,  "translate-main"),         \
+   VGP_PAIR(VgpToUCode,    "to-ucode"),               \
+   VGP_PAIR(VgpFromUcode,  "from-ucode"),             \
+   VGP_PAIR(VgpImprove,    "improve"),                \
+   VGP_PAIR(VgpInstrument, "instrument"),             \
+   VGP_PAIR(VgpCleanup,    "cleanup"),                \
+   VGP_PAIR(VgpRegAlloc,   "reg-alloc"),              \
+   VGP_PAIR(VgpDoLRU,      "do-lru"),                 \
+   VGP_PAIR(VgpSlowFindT,  "slow-search-transtab"),   \
+   VGP_PAIR(VgpInitAudit,  "init-mem-audit"),         \
+   VGP_PAIR(VgpExeContext, "exe-context"),            \
+   VGP_PAIR(VgpReadSyms,   "read-syms"),              \
+   VGP_PAIR(VgpAddToT,     "add-to-transtab"),        \
+   VGP_PAIR(VgpSARP,       "set-addr-range-perms"),   \
+   VGP_PAIR(VgpSyscall,    "syscall wrapper"),        \
+   VGP_PAIR(VgpSpare1,     "spare 1"),                \
+   VGP_PAIR(VgpSpare2,     "spare 2")
+
+#define VGP_PAIR(enumname,str) enumname
+typedef enum { VGP_LIST } VgpCC;
+#undef VGP_PAIR
+
+extern void VGP_(init_profiling) ( void );
+extern void VGP_(done_profiling) ( void );
+extern void VGP_(pushcc) ( VgpCC );
+extern void VGP_(popcc) ( void );
+
+#define VGP_PUSHCC(cc) VGP_(pushcc)(cc)
+#define VGP_POPCC      VGP_(popcc)()
+
+#else
+
+#define VGP_PUSHCC(cc) /* */
+#define VGP_POPCC      /* */
+
+#endif /* VG_PROFILE */
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_malloc2.c
+   ------------------------------------------------------------------ */
+
+/* Allocation arenas.  
+      SYMTAB    is for Valgrind's symbol table storage.
+      CLIENT    is for the client's mallocs/frees.
+      DEMANGLE  is for the C++ demangler.
+      EXECTXT   is for storing ExeContexts.
+      ERRCTXT   is for storing ErrContexts.
+      PRIVATE   is for Valgrind general stuff.
+      TRANSIENT is for very short-term use.  It should be empty
+                in between uses.
+   When adding a new arena, remember also to add it
+   to ensure_mm_init(). 
+*/
+typedef Int ArenaId;
+
+#define VG_N_ARENAS 7
+
+#define VG_AR_PRIVATE   0    /* :: ArenaId */
+#define VG_AR_SYMTAB    1    /* :: ArenaId */
+#define VG_AR_CLIENT    2    /* :: ArenaId */
+#define VG_AR_DEMANGLE  3    /* :: ArenaId */
+#define VG_AR_EXECTXT   4    /* :: ArenaId */
+#define VG_AR_ERRCTXT   5    /* :: ArenaId */
+#define VG_AR_TRANSIENT 6    /* :: ArenaId */
+
+extern void* VG_(malloc)  ( ArenaId arena, Int nbytes );
+extern void  VG_(free)    ( ArenaId arena, void* ptr );
+extern void* VG_(calloc)  ( ArenaId arena, Int nmemb, Int nbytes );
+extern void* VG_(realloc) ( ArenaId arena, void* ptr, Int size );
+extern void* VG_(malloc_aligned) ( ArenaId aid, Int req_alignB, 
+                                                Int req_pszB );
+
+extern void  VG_(mallocSanityCheckArena) ( ArenaId arena );
+extern void  VG_(mallocSanityCheckAll)   ( void );
+
+extern void  VG_(show_all_arena_stats) ( void );
+extern Bool  VG_(is_empty_arena) ( ArenaId aid );
+
+
+/* The red-zone size for the client.  This can be arbitrary, but
+   unfortunately must be set at compile time. */
+#define VG_AR_CLIENT_REDZONE_SZW 4
+
+#define VG_AR_CLIENT_REDZONE_SZB \
+   (VG_AR_CLIENT_REDZONE_SZW * VKI_BYTES_PER_WORD)
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_signals.c
+   ------------------------------------------------------------------ */
+
+/* The maximum number of basic blocks that we're prepared to run in a
+   signal handler which is called when the client is stuck in a
+   blocking system call.  The purpose of this is to check that such a
+   signal handler doesn't merely do a longjmp() and keep going
+   forever; it should return instead.  NOTE that this doesn't apply to
+   signals delivered under normal conditions, only when they are
+   delivered and the client is already blocked in a system call. */
+#define VG_MAX_BBS_IN_IMMEDIATE_SIGNAL 50000
+
+extern void VG_(sigstartup_actions) ( void );
+
+extern void VG_(deliver_signals) ( void );
+extern void VG_(unblock_host_signal) ( Int sigNo );
+
+
+/* Fake system calls for signal handling. */
+extern void VG_(do__NR_sigaction)     ( void );
+extern void VG_(do__NR_sigprocmask)   ( Int how, vki_ksigset_t* set );
+
+
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_mylibc.c
+   ------------------------------------------------------------------ */
+
+
+#define NULL ((void*)0)
+
+extern void VG_(exit)( Int status )
+            __attribute__ ((__noreturn__));
+
+extern void VG_(printf) ( const char *format, ... );
+/* too noisy ...  __attribute__ ((format (printf, 1, 2))) ; */
+
+extern void VG_(sprintf) ( Char* buf, Char *format, ... );
+
+extern void VG_(vprintf) ( void(*send)(Char), 
+                          const Char *format, va_list vargs );
+
+extern Bool VG_(isspace) ( Char c );
+
+extern Int VG_(strlen) ( const Char* str );
+
+extern Long VG_(atoll) ( Char* str );
+
+extern Char* VG_(strcat) ( Char* dest, const Char* src );
+extern Char* VG_(strncat) ( Char* dest, const Char* src, Int n );
+extern Char* VG_(strpbrk) ( const Char* s, const Char* accept );
+
+extern Char* VG_(strcpy) ( Char* dest, const Char* src );
+
+extern Int VG_(strcmp)    ( const Char* s1, const Char* s2 );
+extern Int VG_(strcmp_ws) ( const Char* s1, const Char* s2 );
+
+extern Int VG_(strncmp)    ( const Char* s1, const Char* s2, Int nmax );
+extern Int VG_(strncmp_ws) ( const Char* s1, const Char* s2, Int nmax );
+
+extern Char* VG_(strstr) ( const Char* haystack, Char* needle );
+extern Char* VG_(strchr) ( const Char* s, Char c );
+extern Char* VG_(strdup) ( ArenaId aid, const Char* s);
+
+extern Char* VG_(getenv) ( Char* name );
+extern Int   VG_(getpid) ( void );
+
+
+extern Char VG_(toupper) ( Char c );
+
+extern void VG_(strncpy_safely) ( Char* dest, const Char* src, Int ndest );
+
+extern void VG_(strncpy) ( Char* dest, const Char* src, Int ndest );
+
+extern Bool VG_(stringMatch) ( Char* pat, Char* str );
+
+
+#define __STRING(x)  #x
+
+/* Asserts are permanently enabled.  Hurrah! */
+#define vg_assert(expr)                                               \
+  ((void) ((expr) ? 0 :						      \
+	   (VG_(assert_fail) (__STRING(expr),			      \
+			      __FILE__, __LINE__,                     \
+                              __PRETTY_FUNCTION__), 0)))
+
+extern void VG_(assert_fail) ( Char* expr, Char* file, 
+                               Int line, Char* fn )
+            __attribute__ ((__noreturn__));
+
+/* Later ... extern void vg_restore_SIGABRT ( void ); */
+
+/* Reading files. */
+extern Int  VG_(open_read) ( Char* pathname );
+extern void VG_(close)     ( Int fd );
+extern Int  VG_(read)      ( Int fd, void* buf, Int count);
+extern Int  VG_(write)     ( Int fd, void* buf, Int count);
+
+/* mmap-ery ... */
+extern void* VG_(mmap)( void* start, UInt length, 
+                        UInt prot, UInt flags, UInt fd, UInt offset );
+
+extern Int VG_(munmap)( void* start, Int length );
+
+
+/* Print a (panic) message, and abort. */
+extern void VG_(panic) ( Char* str )
+            __attribute__ ((__noreturn__));
+
+/* Get memory by anonymous mmap. */
+void* VG_(get_memory_from_mmap) ( Int nBytes );
+
+/* Signal stuff.  Note that these use the vk_ (kernel) structure
+   definitions, which are different in places from those that glibc
+   defines.  Since we're operating right at the kernel interface,
+   glibc's view of the world is entirely irrelevant. */
+extern Int VG_(ksigfillset)( vki_ksigset_t* set );
+extern Int VG_(ksigemptyset)( vki_ksigset_t* set );
+extern Int VG_(ksigaddset)( vki_ksigset_t* set, Int signum );
+
+extern Int VG_(ksigprocmask)( Int how, const vki_ksigset_t* set, 
+                                       vki_ksigset_t* oldset );
+extern Int VG_(ksigaction) ( Int signum,  
+                             const vki_ksigaction* act,  
+                             vki_ksigaction* oldact );
+extern Int VG_(ksigismember) ( vki_ksigset_t* set, Int signum );
+
+extern Int VG_(ksignal)(Int signum, void (*sighandler)(Int));
+
+extern Int VG_(ksigaltstack)( const vki_kstack_t* ss, vki_kstack_t* oss );
+
+
+
+/* ---------------------------------------------------------------------
+   Definitions for the JITter (vg_translate.c, vg_to_ucode.c,
+   vg_from_ucode.c).
+   ------------------------------------------------------------------ */
+
+/* Tags which describe what operands are. */
+typedef
+   enum { TempReg=0, ArchReg=1, RealReg=2, 
+          SpillNo=3, Literal=4, Lit16=5, 
+          NoValue=6 }
+   Tag;
+
+
+/* Microinstruction opcodes. */
+typedef
+   enum {
+      NOP,
+      GET,
+      PUT,
+      LOAD,
+      STORE,
+      MOV,
+      CMOV, /* Used for cmpxchg and cmov */
+      WIDEN,
+      JMP,
+
+      /* Read/write the %EFLAGS register into a TempReg. */
+      GETF, PUTF,
+
+      ADD, ADC, AND, OR,  XOR, SUB, SBB,
+      SHL, SHR, SAR, ROL, ROR, RCL, RCR,
+      NOT, NEG, INC, DEC, BSWAP,
+      CC2VAL,
+
+      /* Not strictly needed, but useful for making better
+         translations of address calculations. */
+      LEA1,  /* reg2 := const + reg1 */
+      LEA2,  /* reg3 := const + reg1 + reg2 * 1,2,4 or 8 */
+
+      /* not for translating x86 calls -- only to call helpers */
+      CALLM_S, CALLM_E, /* Mark start and end of push/pop sequences
+                           for CALLM. */
+      PUSH, POP, CLEAR, /* Add/remove/zap args for helpers. */
+      CALLM,  /* call to a machine-code helper */
+
+      /* Hack for translating string (REP-) insns.  Jump to literal if
+         TempReg/RealReg is zero. */
+      JIFZ,
+
+      /* FPU ops which read/write mem or don't touch mem at all. */
+      FPU_R,
+      FPU_W,
+      FPU,
+
+      /* Advance the simulated %eip by some small (< 128) number. */
+      INCEIP,
+
+      /* uinstrs which are not needed for mere translation of x86 code,
+         only for instrumentation of it. */
+      LOADV,
+      STOREV,
+      GETV,
+      PUTV,
+      TESTV,
+      SETV,
+      /* Get/set the v-bit (and it is only one bit) for the simulated
+         %eflags register. */
+      GETVF,
+      PUTVF,
+
+      /* Do a unary or binary tag op.  Only for post-instrumented
+         code.  For TAG1, first and only arg is a TempReg, and is both
+         arg and result reg.  For TAG2, first arg is src, second is
+         dst, in the normal way; both are TempRegs.  In both cases,
+         3rd arg is a RiCHelper with a Lit16 tag.  This indicates
+         which tag op to do. */
+      TAG1,
+      TAG2
+   }
+   Opcode;
+
+
+/* Condition codes, observing the Intel encoding.  CondAlways is an
+   extra. */
+typedef
+   enum {
+      CondO      = 0,  /* overflow           */
+      CondNO     = 1,  /* no overflow        */
+      CondB      = 2,  /* below              */
+      CondNB     = 3,  /* not below          */
+      CondZ      = 4,  /* zero               */
+      CondNZ     = 5,  /* not zero           */
+      CondBE     = 6,  /* below or equal     */
+      CondNBE    = 7,  /* not below or equal */
+      CondS      = 8,  /* negative           */
+      ConsNS     = 9,  /* not negative       */
+      CondP      = 10, /* parity even        */
+      CondNP     = 11, /* not parity even    */
+      CondL      = 12, /* jump less          */
+      CondNL     = 13, /* not less           */
+      CondLE     = 14, /* less or equal      */
+      CondNLE    = 15, /* not less or equal  */
+      CondAlways = 16  /* Jump always        */
+   } 
+   Condcode;
+
+
+/* Flags.  User-level code can only read/write O(verflow), S(ign),
+   Z(ero), A(ux-carry), C(arry), P(arity), and may also write
+   D(irection).  That's a total of 7 flags.  A FlagSet is a bitset,
+   thusly: 
+      76543210
+       DOSZACP
+   and bit 7 must always be zero since it is unused.
+*/
+typedef UChar FlagSet;
+
+#define FlagD (1<<6)
+#define FlagO (1<<5)
+#define FlagS (1<<4)
+#define FlagZ (1<<3)
+#define FlagA (1<<2)
+#define FlagC (1<<1)
+#define FlagP (1<<0)
+
+#define FlagsOSZACP (FlagO | FlagS | FlagZ | FlagA | FlagC | FlagP)
+#define FlagsOSZAP  (FlagO | FlagS | FlagZ | FlagA |         FlagP)
+#define FlagsOSZCP  (FlagO | FlagS | FlagZ |         FlagC | FlagP)
+#define FlagsOSACP  (FlagO | FlagS |         FlagA | FlagC | FlagP)
+#define FlagsSZACP  (        FlagS | FlagZ | FlagA | FlagC | FlagP)
+#define FlagsSZAP   (        FlagS | FlagZ | FlagA |         FlagP)
+#define FlagsOC     (FlagO |                         FlagC        )
+
+#define FlagsALL    (FlagsOSZACP | FlagD)
+#define FlagsEmpty  (FlagSet)0
+
+#define VG_IS_FLAG_SUBSET(set1,set2) \
+   (( ((FlagSet)set1) & ((FlagSet)set2) ) == ((FlagSet)set1) )
+
+#define VG_UNION_FLAG_SETS(set1,set2) \
+   ( ((FlagSet)set1) | ((FlagSet)set2) )
+
+
+
+/* A Micro (u)-instruction. */
+typedef
+   struct {
+      /* word 1 */
+      UInt    lit32;      /* 32-bit literal */
+
+      /* word 2 */
+      UShort  val1;       /* first operand */
+      UShort  val2;       /* second operand */
+
+      /* word 3 */
+      UShort  val3;       /* third operand */
+      UChar   opcode;     /* opcode */
+      UChar   size;       /* data transfer size */
+
+      /* word 4 */
+      FlagSet flags_r;    /* :: FlagSet */
+      FlagSet flags_w;    /* :: FlagSet */
+      UChar   tag1:4;     /* first  operand tag */
+      UChar   tag2:4;     /* second operand tag */
+      UChar   tag3:4;     /* third  operand tag */
+      UChar   extra4b:4;  /* Spare field, used by WIDEN for src
+                             -size, and by LEA2 for scale 
+                             (1,2,4 or 8) */
+
+      /* word 5 */
+      UChar   cond;            /* condition, for jumps */
+      Bool    smc_check:1;     /* do a smc test, if writes memory. */
+      Bool    signed_widen:1;  /* signed or unsigned WIDEN ? */
+      Bool    ret_dispatch:1;  /* Is this jump as a result of RET ? */
+      Bool    call_dispatch:1; /* Is this jump as a result of CALL ? */
+   }
+   UInstr;
+
+
+/* Expandable arrays of uinstrs. */
+typedef 
+   struct { 
+      Int     used; 
+      Int     size; 
+      UInstr* instrs;
+      Int     nextTemp;
+   }
+   UCodeBlock;
+
+/* Refer to `the last instruction stuffed in', including as an
+   lvalue. */
+#define LAST_UINSTR(cb) (cb)->instrs[(cb)->used-1]
+
+/* An invalid temporary number :-) */
+#define INVALID_TEMPREG 999999999
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_demangle.c
+   ------------------------------------------------------------------ */
+
+extern void VG_(demangle) ( Char* orig, Char* result, Int result_size );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_from_ucode.c
+   ------------------------------------------------------------------ */
+
+extern UChar* VG_(emit_code) ( UCodeBlock* cb, Int* nbytes );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_to_ucode.c
+   ------------------------------------------------------------------ */
+
+extern Int   VG_(disBB)          ( UCodeBlock* cb, Addr eip0 );
+extern Char* VG_(nameOfIntReg)   ( Int size, Int reg );
+extern Char  VG_(nameOfIntSize)  ( Int size );
+extern UInt  VG_(extend_s_8to32) ( UInt x );
+extern Int   VG_(getNewTemp)     ( UCodeBlock* cb );
+extern Int   VG_(getNewShadow)   ( UCodeBlock* cb );
+
+#define SHADOW(tempreg)  ((tempreg)+1)
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_translate.c
+   ------------------------------------------------------------------ */
+
+extern void  VG_(translate)  ( Addr  orig_addr,
+                               UInt* orig_size,
+                               Addr* trans_addr,
+                               UInt* trans_size );
+
+extern void  VG_(emptyUInstr) ( UInstr* u );
+extern void  VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz );
+extern void  VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
+                               Tag tag1, UInt val1 );
+extern void  VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
+                               Tag tag1, UInt val1,
+                               Tag tag2, UInt val2 );
+extern void  VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
+                               Tag tag1, UInt val1,
+                               Tag tag2, UInt val2,
+                               Tag tag3, UInt val3 );
+extern void VG_(setFlagRW) ( UInstr* u, 
+                             FlagSet fr, FlagSet fw );
+
+extern void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 );
+extern Bool VG_(anyFlagUse) ( UInstr* u );
+
+
+
+extern void  VG_(ppUInstr)        ( Int instrNo, UInstr* u );
+extern void  VG_(ppUCodeBlock)    ( UCodeBlock* cb, Char* title );
+
+extern Char* VG_(nameCondcode)    ( Condcode cond );
+extern Bool  VG_(saneUInstr)      ( Bool beforeRA, UInstr* u );
+extern Bool  VG_(saneUCodeBlock)  ( UCodeBlock* cb );
+extern Char* VG_(nameUOpcode)     ( Bool upper, Opcode opc );
+extern Int   VG_(rankToRealRegNo) ( Int rank );
+
+extern void* VG_(jitmalloc) ( Int nbytes );
+extern void  VG_(jitfree)   ( void* ptr );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_execontext.c.
+   ------------------------------------------------------------------ */
+
+/* Records the PC and a bit of the call chain.  The first 4 %eip
+   values are used in comparisons do remove duplicate errors, and for
+   comparing against suppression specifications.  The rest are purely
+   informational (but often important). */
+
+typedef
+   struct _ExeContextRec {
+      struct _ExeContextRec * next;
+      /* The size of this array is VG_(clo_backtrace_size); at least
+         2, at most VG_DEEPEST_BACKTRACE.  [0] is the current %eip,
+         [1] is its caller, [2] is the caller of [1], etc. */
+      Addr eips[0];
+   }
+   ExeContext;
+
+
+/* Initialise the ExeContext storage mechanism. */
+extern void VG_(init_ExeContext_storage) ( void );
+
+/* Print stats (informational only). */
+extern void VG_(show_ExeContext_stats) ( void );
+
+
+/* Take a snapshot of the client's stack.  Search our collection of
+   ExeContexts to see if we already have it, and if not, allocate a
+   new one.  Either way, return a pointer to the context. */
+extern ExeContext* VG_(get_ExeContext) ( Bool skip_top_frame );
+
+/* Print an ExeContext. */
+extern void VG_(pp_ExeContext) ( ExeContext* );
+
+/* Compare two ExeContexts, just comparing the top two callers. */
+extern Bool VG_(eq_ExeContext_top2) ( ExeContext* e1, ExeContext* e2 );
+
+/* Compare two ExeContexts, just comparing the top four callers. */
+extern Bool VG_(eq_ExeContext_top4) ( ExeContext* e1, ExeContext* e2 );
+
+/* Compare two ExeContexts, comparing all callers. */
+extern Bool VG_(eq_ExeContext_all) ( ExeContext* e1, ExeContext* e2 );
+
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_errcontext.c.
+   ------------------------------------------------------------------ */
+
+extern void VG_(load_suppressions)    ( void );
+extern void VG_(show_all_errors)      ( void );
+extern void VG_(record_value_error)   ( Int size );
+extern void VG_(record_free_error)    ( Addr a );
+extern void VG_(record_freemismatch_error)    ( Addr a );
+extern void VG_(record_address_error) ( Addr a, Int size, 
+                                        Bool isWrite );
+extern void VG_(record_jump_error) ( Addr a );
+extern void VG_(record_param_err) ( Addr a, 
+                                    Bool isWriteLack, 
+                                    Char* msg );
+extern void VG_(record_user_err) ( Addr a, Bool isWriteLack );
+
+
+/* The classification of a faulting address. */
+typedef 
+   enum { Stack, Unknown, Freed, Mallocd, UserG, UserS }
+   AddrKind;
+
+/* Records info about a faulting address. */
+typedef
+   struct {
+      /* ALL */
+      AddrKind akind;
+      /* Freed, Mallocd */
+      Int blksize;
+      /* Freed, Mallocd */
+      Int rwoffset;
+      /* Freed, Mallocd */
+      ExeContext* lastchange;
+   }
+   AddrInfo;
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_clientperms.c
+   ------------------------------------------------------------------ */
+
+extern Bool VG_(client_perm_maybe_describe)( Addr a, AddrInfo* ai );
+
+extern UInt VG_(handle_client_request) ( UInt code, Addr aa, UInt nn );
+
+extern void VG_(delete_client_stack_blocks_following_ESP_change) ( void );
+
+extern void VG_(show_client_block_stats) ( void );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_procselfmaps.c
+   ------------------------------------------------------------------ */
+
+extern 
+void VG_(read_procselfmaps) (
+   void (*record_mapping)( Addr, UInt, Char, Char, Char, UInt, UChar* )
+);
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_symtab2.c
+   ------------------------------------------------------------------ */
+
+/* We assume the executable is loaded here ... can't really find
+   out.  There is a hacky sanity check in vg_init_memory_audit()
+   which should trip up most stupidities.
+*/
+#define VG_ASSUMED_EXE_BASE  (Addr)0x8048000
+
+extern void VG_(read_symbols) ( void );
+extern void VG_(mini_stack_dump) ( ExeContext* ec );
+extern void VG_(what_obj_and_fun_is_this)
+                                     ( Addr a,
+                                       Char* obj_buf, Int n_obj_buf,
+                                       Char* fun_buf, Int n_fun_buf );
+
+extern void VG_(symtab_notify_munmap) ( Addr start, UInt length );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_clientmalloc.c
+   ------------------------------------------------------------------ */
+
+/* these numbers are not arbitary. if you change them,
+   adjust vg_dispatch.S as well */
+
+typedef
+   enum { 
+      Vg_AllocMalloc = 0,
+      Vg_AllocNew = 1,
+      Vg_AllocNewVec = 2 
+   }
+   VgAllocKind;
+
+/* Description of a malloc'd chunk. */
+typedef 
+   struct _ShadowChunk {
+      struct _ShadowChunk* next;
+      ExeContext*   where;          /* where malloc'd/free'd */
+      UInt          size : 30;      /* size requested.       */
+      VgAllocKind   allockind : 2;  /* which wrapper did the allocation */
+      Addr          data;           /* ptr to actual block.  */
+   } 
+   ShadowChunk;
+
+extern void          VG_(clientmalloc_done) ( void );
+extern void          VG_(describe_addr) ( Addr a, AddrInfo* ai );
+extern ShadowChunk** VG_(get_malloc_shadows) ( /*OUT*/ UInt* n_shadows );
+
+/* This should never be called; if it is, something's seriously
+   wrong. */
+extern UInt VG_(trap_here) ( UInt arg1, UInt arg2, UInt what_to_do );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_main.c
+   ------------------------------------------------------------------ */
+
+/* How big is the saved FPU state? */
+#define VG_SIZE_OF_FPUSTATE 108
+/* ... and in words ... */
+#define VG_SIZE_OF_FPUSTATE_W ((VG_SIZE_OF_FPUSTATE+3)/4)
+
+/* A structure used as an intermediary when passing the simulated
+   CPU's state to some assembly fragments, particularly system calls.
+   Stuff is copied from baseBlock to here, the assembly magic runs,
+   and then the inverse copy is done. */
+
+extern UInt VG_(m_state_static) [8 /* int regs, in Intel order */ 
+                                 + 1 /* %eflags */ 
+                                 + 1 /* %eip */
+                                 + VG_SIZE_OF_FPUSTATE_W /* FPU state */
+                                ];
+
+/* Handy fns for doing the copy back and forth. */
+extern void VG_(copy_baseBlock_to_m_state_static) ( void );
+extern void VG_(copy_m_state_static_to_baseBlock) ( void );
+
+/* Create, and add to TT/TC, the translation of a client basic
+   block. */
+extern void VG_(create_translation_for) ( Addr orig_addr );
+
+/* Called when some unhandleable client behaviour is detected.
+   Prints a msg and aborts. */
+extern void VG_(unimplemented) ( Char* msg );
+
+/* The stack on which Valgrind runs.  We can't use the same stack as the
+   simulatee -- that's an important design decision.  */
+extern UInt VG_(stack)[10000];
+
+/* Similarly, we have to ask for signals to be delivered on an
+   alternative stack, since it is possible, although unlikely, that
+   we'll have to run client code from inside the Valgrind-installed
+   signal handler.  If this happens it will be done by
+   vg_deliver_signal_immediately(). */
+extern UInt VG_(sigstack)[10000];
+
+
+/* vg_oursignalhandler() might longjmp().  Here's the jmp_buf. */
+extern jmp_buf VG_(toploop_jmpbuf);
+/* ... and if so, here's the signal which caused it to do so. */
+extern Int     VG_(longjmpd_on_signal);
+
+/* Holds client's %esp at the point we gained control.  From this the
+   client's argc, argv and envp are deduced. */
+extern Addr   VG_(esp_at_startup);
+extern Int    VG_(client_argc);
+extern Char** VG_(client_argv);
+extern Char** VG_(client_envp);
+
+/* Remove valgrind.so from a LD_PRELOAD=... string so child processes
+   don't get traced into. */
+extern void   VG_(mash_LD_PRELOAD_string)( Char* ld_preload_str );
+
+/* Something of a function looking for a home ... start up GDB.  This
+   is called from VG_(swizzle_esp_then_start_GDB) and so runs on the
+   *client's* stack.  This is necessary to give GDB the illusion that
+   the client program really was running on the real cpu. */
+extern void VG_(start_GDB_whilst_on_client_stack) ( void );
+
+/* Spew out vast amounts of junk during JITting? */
+extern Bool  VG_(disassemble);
+
+/* 64-bit counter for the number of basic blocks done. */
+extern ULong VG_(bbs_done);
+/* 64-bit counter for the number of bbs to go before a debug exit. */
+extern ULong VG_(bbs_to_go);
+
+/* Counts downwards in vg_run_innerloop. */
+extern UInt VG_(dispatch_ctr);
+
+/* If vg_dispatch_ctr is set to 1 to force a stop, its
+   previous value is saved here. */
+extern UInt VG_(dispatch_ctr_SAVED);
+
+/* This is why vg_run_innerloop() exited. */
+extern UInt VG_(interrupt_reason);
+
+/* Is the client running on the simulated CPU or the real one? */
+extern Bool VG_(running_on_simd_CPU); /* Initially False */
+
+/* The current LRU epoch. */
+extern UInt VG_(current_epoch);
+
+
+/* --- Counters, for informational purposes only. --- */
+
+/* Number of lookups which miss the fast tt helper. */
+extern UInt VG_(tt_fast_misses);
+
+/* Counts for LRU informational messages. */
+
+/* Number and total o/t size of new translations this epoch. */
+extern UInt VG_(this_epoch_in_count);
+extern UInt VG_(this_epoch_in_osize);
+extern UInt VG_(this_epoch_in_tsize);
+/* Number and total o/t size of discarded translations this epoch. */
+extern UInt VG_(this_epoch_out_count);
+extern UInt VG_(this_epoch_out_osize);
+extern UInt VG_(this_epoch_out_tsize);
+/* Number and total o/t size of translations overall. */
+extern UInt VG_(overall_in_count);
+extern UInt VG_(overall_in_osize);
+extern UInt VG_(overall_in_tsize);
+/* Number and total o/t size of discards overall. */
+extern UInt VG_(overall_out_count);
+extern UInt VG_(overall_out_osize);
+extern UInt VG_(overall_out_tsize);
+
+/* The number of LRU-clearings of TT/TC. */
+extern UInt VG_(number_of_lrus);
+
+/* Counts pertaining to the register allocator. */
+
+/* total number of uinstrs input to reg-alloc */
+extern UInt VG_(uinstrs_prealloc);
+
+/* total number of uinstrs added due to spill code */
+extern UInt VG_(uinstrs_spill);
+
+/* number of bbs requiring spill code */
+extern UInt VG_(translations_needing_spill);
+
+/* total of register ranks over all translations */
+extern UInt VG_(total_reg_rank);
+
+/* Counts pertaining to the self-modifying-code detection machinery. */
+
+/* Total number of writes checked. */
+//extern UInt VG_(smc_total_check4s);
+
+/* Number of writes which the fast smc check couldn't show were
+   harmless. */
+extern UInt VG_(smc_cache_passed);
+
+/* Numnber of writes which really did write on original code. */
+extern UInt VG_(smc_fancy_passed);
+
+/* Number of translations discarded as a result. */
+//extern UInt VG_(smc_discard_count);
+
+/* Counts pertaining to internal sanity checking. */
+extern UInt VG_(sanity_fast_count);
+extern UInt VG_(sanity_slow_count);
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_memory.c
+   ------------------------------------------------------------------ */
+
+extern void VGM_(init_memory_audit) ( void );
+extern Addr VGM_(curr_dataseg_end);
+extern void VG_(show_reg_tags) ( void );
+extern void VG_(detect_memory_leaks) ( void );
+extern void VG_(done_prof_mem) ( void );
+
+/* Set permissions for an address range.  Not speed-critical. */
+extern void VGM_(make_noaccess) ( Addr a, UInt len );
+extern void VGM_(make_writable) ( Addr a, UInt len );
+extern void VGM_(make_readable) ( Addr a, UInt len );
+/* Use with care! (read: use for shmat only) */
+extern void VGM_(make_readwritable) ( Addr a, UInt len );
+extern void VGM_(copy_address_range_perms) ( Addr src, Addr dst,
+                                             UInt len );
+
+/* Check permissions for an address range.  Not speed-critical. */
+extern Bool VGM_(check_writable) ( Addr a, UInt len, Addr* bad_addr );
+extern Bool VGM_(check_readable) ( Addr a, UInt len, Addr* bad_addr );
+extern Bool VGM_(check_readable_asciiz) ( Addr a, Addr* bad_addr );
+
+/* Sanity checks which may be done at any time.  Doing them at
+   signal-delivery time turns out to be convenient. */
+extern void VG_(do_sanity_checks) ( Bool force_expensive );
+/* Very cheap ... */
+extern Bool VG_(first_and_last_secondaries_look_plausible) ( void );
+
+/* These functions are called from generated code. */
+extern void VG_(helperc_STOREV4) ( UInt, Addr );
+extern void VG_(helperc_STOREV2) ( UInt, Addr );
+extern void VG_(helperc_STOREV1) ( UInt, Addr );
+
+extern UInt VG_(helperc_LOADV1) ( Addr );
+extern UInt VG_(helperc_LOADV2) ( Addr );
+extern UInt VG_(helperc_LOADV4) ( Addr );
+
+extern void VGM_(handle_esp_assignment) ( Addr new_espA );
+extern void VGM_(fpu_write_check) ( Addr addr, Int size );
+extern void VGM_(fpu_read_check)  ( Addr addr, Int size );
+
+/* Safely (avoiding SIGSEGV / SIGBUS) scan the entire valid address
+   space and pass the addresses and values of all addressible,
+   defined, aligned words to notify_word.  This is the basis for the
+   leak detector.  Returns the number of calls made to notify_word.  */
+UInt VG_(scan_all_valid_memory) ( void (*notify_word)( Addr, UInt ) );
+
+/* Is this address within some small distance below %ESP?  Used only
+   for the --workaround-gcc296-bugs kludge. */
+extern Bool VG_(is_just_below_ESP)( Addr aa );
+
+/* Nasty kludgery to deal with applications which switch stacks,
+   like netscape. */
+#define VG_STACK_STARTS_AT      0xC0000000
+#define VG_PLAUSIBLE_STACK_SIZE 8000000
+
+extern Bool VG_(is_plausible_stack_addr) ( Addr );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_syscall_mem.c
+   ------------------------------------------------------------------ */
+
+/* Counts the depth of nested syscalls.  Is used in
+   VG_(deliver_signals) do discover whether or not the client is in a
+   syscall (presumably _blocked_ in a syscall) when a signal is
+   delivered.  If so, the signal delivery mechanism needs to behave
+   differently from normal. */
+extern Int VG_(syscall_depth);
+
+extern void VG_(wrap_syscall) ( void );
+
+extern Bool VG_(is_kerror) ( Int res );
+
+#define KERNEL_DO_SYSCALL(result_lvalue)                 \
+         VG_(copy_baseBlock_to_m_state_static)();        \
+         VG_(do_syscall)();                              \
+         VG_(copy_m_state_static_to_baseBlock)();        \
+         result_lvalue = VG_(baseBlock)[VGOFF_(m_eax)];
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_transtab.c
+   ------------------------------------------------------------------ */
+
+/* An entry in the translation table (TT). */
+typedef
+   struct {
+      /* +0 */  Addr   orig_addr;
+      /* +4 */  Addr   trans_addr;
+      /* +8 */  UInt   mru_epoch;
+      /* +12 */ UShort orig_size;
+      /* +14 */ UShort trans_size;
+   }
+   TTEntry;
+
+/* The number of basic blocks in an epoch (one age-step). */
+#define VG_BBS_PER_EPOCH 20000
+
+extern void VG_(get_tt_tc_used) ( UInt* tt_used, UInt* tc_used );
+extern void VG_(maybe_do_lru_pass) ( void );
+extern void VG_(flush_transtab) ( void );
+extern Addr VG_(copy_to_transcache) ( Addr trans_addr, Int trans_size );
+extern void VG_(add_to_trans_tab) ( TTEntry* tte );
+
+extern void VG_(smc_mark_original) ( Addr original_addr, 
+                                     Int original_len );
+
+extern void VG_(init_transtab_and_SMC) ( void );
+
+extern void VG_(sanity_check_tc_tt) ( void );
+extern Addr VG_(search_transtab) ( Addr original_addr );
+
+extern void VG_(invalidate_tt_fast)( void );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_vtagops.c
+   ------------------------------------------------------------------ */
+
+/* Lists the names of value-tag operations used in instrumented
+   code.  These are the third argument to TAG1 and TAG2 uinsns. */
+
+typedef
+   enum { 
+     /* Unary. */
+     VgT_PCast40, VgT_PCast20, VgT_PCast10,
+     VgT_PCast01, VgT_PCast02, VgT_PCast04,
+
+     VgT_PCast14, VgT_PCast12, VgT_PCast11,
+
+     VgT_Left4, VgT_Left2, VgT_Left1,
+
+     VgT_SWiden14, VgT_SWiden24, VgT_SWiden12,
+     VgT_ZWiden14, VgT_ZWiden24, VgT_ZWiden12,
+
+     /* Binary; 1st is rd; 2nd is rd+wr */
+     VgT_UifU4, VgT_UifU2, VgT_UifU1, VgT_UifU0,
+     VgT_DifD4, VgT_DifD2, VgT_DifD1,
+
+     VgT_ImproveAND4_TQ, VgT_ImproveAND2_TQ, VgT_ImproveAND1_TQ, 
+     VgT_ImproveOR4_TQ, VgT_ImproveOR2_TQ, VgT_ImproveOR1_TQ,
+     VgT_DebugFn
+   }
+   VgTagOp;
+
+extern Char* VG_(nameOfTagOp) ( VgTagOp );
+extern UInt VG_(DebugFn) ( UInt a1, UInt a2 );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_syscall.S
+   ------------------------------------------------------------------ */
+
+extern void VG_(do_syscall) ( void );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_startup.S
+   ------------------------------------------------------------------ */
+
+extern void VG_(shutdown);
+extern void VG_(switch_to_real_CPU) ( void );
+
+extern void VG_(swizzle_esp_then_start_GDB) ( void );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_dispatch.S
+   ------------------------------------------------------------------ */
+
+extern void VG_(dispatch);
+extern void VG_(run_innerloop) ( void );
+
+/* Returns the next orig_addr to run. */
+extern Addr VG_(run_singleton_translation) ( Addr trans_addr );
+
+
+/* ---------------------------------------------------------------------
+   Exports of vg_helpers.S
+   ------------------------------------------------------------------ */
+
+/* For doing exits ... */
+extern void VG_(helper_request_normal_exit);
+
+/* SMC fast checks. */
+extern void VG_(helper_smc_check4);
+
+/* Mul, div, etc, -- we don't codegen these directly. */
+extern void VG_(helper_idiv_64_32);
+extern void VG_(helper_div_64_32);
+extern void VG_(helper_idiv_32_16);
+extern void VG_(helper_div_32_16);
+extern void VG_(helper_idiv_16_8);
+extern void VG_(helper_div_16_8);
+
+extern void VG_(helper_imul_32_64);
+extern void VG_(helper_mul_32_64);
+extern void VG_(helper_imul_16_32);
+extern void VG_(helper_mul_16_32);
+extern void VG_(helper_imul_8_16);
+extern void VG_(helper_mul_8_16);
+
+extern void VG_(helper_CLD);
+extern void VG_(helper_STD);
+extern void VG_(helper_get_dirflag);
+
+extern void VG_(helper_shldl);
+extern void VG_(helper_shldw);
+extern void VG_(helper_shrdl);
+extern void VG_(helper_shrdw);
+
+extern void VG_(helper_RDTSC);
+extern void VG_(helper_CPUID);
+
+extern void VG_(helper_bt);
+extern void VG_(helper_bts);
+extern void VG_(helper_btr);
+extern void VG_(helper_btc);
+
+extern void VG_(helper_bsf);
+extern void VG_(helper_bsr);
+
+extern void VG_(helper_fstsw_AX);
+extern void VG_(helper_SAHF);
+
+extern void VG_(helper_value_check4_fail);
+extern void VG_(helper_value_check2_fail);
+extern void VG_(helper_value_check1_fail);
+extern void VG_(helper_value_check0_fail);
+
+extern void VG_(helper_do_syscall);
+extern void VG_(helper_do_client_request);
+
+
+/* ---------------------------------------------------------------------
+   The state of the simulated CPU.
+   ------------------------------------------------------------------ */
+
+/* This is the Intel register encoding. */
+#define R_EAX 0
+#define R_ECX 1
+#define R_EDX 2
+#define R_EBX 3
+#define R_ESP 4
+#define R_EBP 5
+#define R_ESI 6
+#define R_EDI 7
+
+#define R_AL (0+R_EAX)
+#define R_CL (0+R_ECX)
+#define R_DL (0+R_EDX)
+#define R_BL (0+R_EBX)
+#define R_AH (4+R_EAX)
+#define R_CH (4+R_ECX)
+#define R_DH (4+R_EDX)
+#define R_BH (4+R_EBX)
+
+
+/* ---------------------------------------------------------------------
+   Offsets into baseBlock for everything which needs to referred to
+   from generated code.  The order of these decls does not imply 
+   what the order of the actual offsets is.  The latter is important
+   and is set up in vg_main.c.
+   ------------------------------------------------------------------ */
+
+/* An array of words.  In generated code, %ebp always points to the
+   start of this array.  Useful stuff, like the simulated CPU state,
+   and the addresses of helper functions, can then be found by
+   indexing off %ebp.  The following declares variables which, at
+   startup time, are given values denoting offsets into baseBlock.
+   These offsets are in *words* from the start of baseBlock. */
+
+#define VG_BASEBLOCK_WORDS 200
+
+extern UInt VG_(baseBlock)[VG_BASEBLOCK_WORDS];
+
+
+/* -----------------------------------------------------
+   Read-write parts of baseBlock.
+   -------------------------------------------------- */
+
+/* State of the simulated CPU. */
+extern Int VGOFF_(m_eax);
+extern Int VGOFF_(m_ecx);
+extern Int VGOFF_(m_edx);
+extern Int VGOFF_(m_ebx);
+extern Int VGOFF_(m_esp);
+extern Int VGOFF_(m_ebp);
+extern Int VGOFF_(m_esi);
+extern Int VGOFF_(m_edi);
+extern Int VGOFF_(m_eflags);
+extern Int VGOFF_(m_fpustate);
+extern Int VGOFF_(m_eip);
+
+/* Reg-alloc spill area (VG_MAX_SPILLSLOTS words long). */
+extern Int VGOFF_(spillslots);
+
+/* Records the valid bits for the 8 integer regs & flags reg. */
+extern Int VGOFF_(sh_eax);
+extern Int VGOFF_(sh_ecx);
+extern Int VGOFF_(sh_edx);
+extern Int VGOFF_(sh_ebx);
+extern Int VGOFF_(sh_esp);
+extern Int VGOFF_(sh_ebp);
+extern Int VGOFF_(sh_esi);
+extern Int VGOFF_(sh_edi);
+extern Int VGOFF_(sh_eflags);
+
+
+/* -----------------------------------------------------
+   Read-only parts of baseBlock.
+   -------------------------------------------------- */
+
+/* Offsets of addresses of helper functions.  A "helper" function is
+   one which is called from generated code. */
+
+extern Int VGOFF_(helper_idiv_64_32);
+extern Int VGOFF_(helper_div_64_32);
+extern Int VGOFF_(helper_idiv_32_16);
+extern Int VGOFF_(helper_div_32_16);
+extern Int VGOFF_(helper_idiv_16_8);
+extern Int VGOFF_(helper_div_16_8);
+
+extern Int VGOFF_(helper_imul_32_64);
+extern Int VGOFF_(helper_mul_32_64);
+extern Int VGOFF_(helper_imul_16_32);
+extern Int VGOFF_(helper_mul_16_32);
+extern Int VGOFF_(helper_imul_8_16);
+extern Int VGOFF_(helper_mul_8_16);
+
+extern Int VGOFF_(helper_CLD);
+extern Int VGOFF_(helper_STD);
+extern Int VGOFF_(helper_get_dirflag);
+
+extern Int VGOFF_(helper_shldl);
+extern Int VGOFF_(helper_shldw);
+extern Int VGOFF_(helper_shrdl);
+extern Int VGOFF_(helper_shrdw);
+
+extern Int VGOFF_(helper_RDTSC);
+extern Int VGOFF_(helper_CPUID);
+
+extern Int VGOFF_(helper_bt);
+extern Int VGOFF_(helper_bts);
+extern Int VGOFF_(helper_btr);
+extern Int VGOFF_(helper_btc);
+
+extern Int VGOFF_(helper_bsf);
+extern Int VGOFF_(helper_bsr);
+
+extern Int VGOFF_(helper_fstsw_AX);
+extern Int VGOFF_(helper_SAHF);
+
+extern Int VGOFF_(helper_value_check4_fail);
+extern Int VGOFF_(helper_value_check2_fail);
+extern Int VGOFF_(helper_value_check1_fail);
+extern Int VGOFF_(helper_value_check0_fail);
+
+extern Int VGOFF_(helper_do_syscall);
+extern Int VGOFF_(helper_do_client_request);
+
+extern Int VGOFF_(helperc_STOREV4); /* :: UInt -> Addr -> void */
+extern Int VGOFF_(helperc_STOREV2); /* :: UInt -> Addr -> void */
+extern Int VGOFF_(helperc_STOREV1); /* :: UInt -> Addr -> void */
+
+extern Int VGOFF_(helperc_LOADV4); /* :: Addr -> UInt -> void */
+extern Int VGOFF_(helperc_LOADV2); /* :: Addr -> UInt -> void */
+extern Int VGOFF_(helperc_LOADV1); /* :: Addr -> UInt -> void */
+
+extern Int VGOFF_(handle_esp_assignment); /* :: Addr -> void */
+extern Int VGOFF_(fpu_write_check);       /* :: Addr -> Int -> void */
+extern Int VGOFF_(fpu_read_check);        /* :: Addr -> Int -> void */
+
+extern Int VGOFF_(helper_request_normal_exit);
+
+
+
+#endif /* ndef __VG_INCLUDE_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                             vg_include.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_kerneliface.h b/vg_kerneliface.h
new file mode 100644
index 000000000..856a1c4ab
--- /dev/null
+++ b/vg_kerneliface.h
@@ -0,0 +1,165 @@
+
+/*--------------------------------------------------------------------*/
+/*--- A header file defining structures and constants which are    ---*/
+/*--- important at the kernel boundary for this platform.          ---*/
+/*---                                             vg_kerneliface.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#ifndef __VG_KERNELIFACE_H
+#define __VG_KERNELIFACE_H
+
+/* This file is ONLY to be included into vg_include.h.  Do not include
+   it directly into valgrind source .c files.  This file defines types
+   and constants for the kernel interface, and to make that clear
+   everything is prefixed VKI. */
+
+/*--- All the following stuff is correct for Linux kernels 2.2.X and
+      2.4.X. 
+---*/
+
+/* Should really get this from an include file somewhere. */
+#define VKI_BYTES_PER_PAGE_BITS 12
+#define VKI_BYTES_PER_PAGE (1 << VKI_BYTES_PER_PAGE_BITS)
+
+#define VKI_BYTES_PER_WORD 4
+#define VKI_WORDS_PER_PAGE (VKI_BYTES_PER_PAGE / VKI_BYTES_PER_WORD)
+
+
+/* For system call numbers __NR_... */
+#include <asm/unistd.h>
+
+/* An implementation of signal sets.  These are the same as the sigset
+   implementations in the relevant Linux kernels.  Note carefully that
+   this has nothing to do with glibc's signal sets.  We work entirely
+   at the kernel boundary, so the libc stuff is invisible and
+   irrelevant.  */
+
+/* The following is copied from
+   /usr/src/linux-2.4.9-13/include/asm-i386/signal.h */
+#define VKI_KNSIG       64  /* true for linux 2.2.X and 2.4.X */
+#define VKI_KNSIG_BPW   32  /* since we're using UInts */
+#define VKI_KNSIG_WORDS (VKI_KNSIG / VKI_KNSIG_BPW)
+
+typedef 
+   struct { 
+      UInt ws[VKI_KNSIG_WORDS]; 
+   }
+   vki_ksigset_t;
+
+typedef
+   struct {
+      void*         ksa_handler;
+      unsigned long ksa_flags;
+      void (*ksa_restorer)(void);
+      vki_ksigset_t ksa_mask;
+   }
+   vki_ksigaction;
+
+typedef 
+   struct {
+      void* ss_sp;
+      Int   ss_flags;
+      UInt  ss_size;
+   } 
+   vki_kstack_t;
+
+
+#define VKI_SIG_BLOCK          0    /* for blocking signals */
+#define VKI_SIG_UNBLOCK        1    /* for unblocking signals */
+#define VKI_SIG_SETMASK        2    /* for setting the signal mask */
+
+#define VKI_SIG_DFL ((void*)0)     /* default signal handling */
+#define VKI_SIG_IGN ((void*)1)     /* ignore signal */
+#define VKI_SIG_ERR ((void*)-1)    /* error return from signal */
+
+#define VKI_SA_ONSTACK      0x08000000
+#define VKI_SA_RESTART      0x10000000
+#if 0
+#define VKI_SA_NOCLDSTOP    0x00000001
+#define VKI_SA_NOCLDWAIT    0x00000002 /* not supported yet */
+#define VKI_SA_SIGINFO      0x00000004
+#define VKI_SA_NODEFER      0x40000000
+#define VKI_SA_RESETHAND    0x80000000
+#define VKI_SA_NOMASK       SA_NODEFER
+#define VKI_SA_ONESHOT      SA_RESETHAND
+#define VKI_SA_INTERRUPT    0x20000000 /* dummy -- ignored */
+#define VKI_SA_RESTORER     0x04000000
+#endif
+
+#define VKI_SIGABRT          6
+#define VKI_SIGSEGV         11
+#define VKI_SIGBUS           7
+#define VKI_SIGILL           4
+#define VKI_SIGFPE           8
+#define VKI_SIGKILL          9
+#define VKI_SIGABRT          6
+#define VKI_SIGSTOP         19
+#define VKI_SIGTERM         15
+
+/* The following are copied from /usr/include/bits/mman.h, which in
+   turn claims to have got them from the kernel headers. */
+
+#define VKI_PROT_READ      0x1             /* Page can be read.  */
+#define VKI_PROT_WRITE     0x2             /* Page can be written.  */
+#define VKI_PROT_EXEC      0x4             /* Page can be executed.  */
+#define VKI_MAP_ANONYMOUS  0x20            /* Don't use a file.  */
+#define VKI_MAP_PRIVATE    0x02            /* Changes are private.  */
+
+
+/* Gawd ... hack ... */
+
+typedef struct vki__user_cap_header_struct {
+        UInt version;
+        int pid;
+} vki_cap_user_header_t;
+ 
+typedef struct vki__user_cap_data_struct {
+        UInt effective;
+        UInt permitted;
+        UInt inheritable;
+} vki_cap_user_data_t;
+  
+
+/* "Byrial Jensen" <byrial@image.dk> says:
+               [various] ioctls take a pointer to a "struct
+               termios" but this is another and shorter "struct
+               termios" than the one defined in <termios.h> and used
+               by tcgetattr(3) and tcsetattr(3) and other library
+               functions. GNU libc translate between its library
+               termios and the kernel termios. 
+*/
+
+#define VKI_SIZEOF_STRUCT_TERMIOS 36
+
+
+#endif /* ndef __VG_KERNELIFACE_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                         vg_kerneliface.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_main.c b/vg_main.c
new file mode 100644
index 000000000..798d43b0c
--- /dev/null
+++ b/vg_main.c
@@ -0,0 +1,1440 @@
+
+/*--------------------------------------------------------------------*/
+/*--- C startup stuff, reached from vg_startup.S.                  ---*/
+/*---                                                    vg_main.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+#include "vg_constants.h"
+#include "vg_version.h"
+
+
+/* ---------------------------------------------------------------------
+   Compute offsets into baseBlock.  See comments in vg_include.h.
+   ------------------------------------------------------------------ */
+
+/* The variables storing offsets. */
+
+#define INVALID_OFFSET (-1)
+
+Int VGOFF_(m_eax) = INVALID_OFFSET;
+Int VGOFF_(m_ecx) = INVALID_OFFSET;
+Int VGOFF_(m_edx) = INVALID_OFFSET;
+Int VGOFF_(m_ebx) = INVALID_OFFSET;
+Int VGOFF_(m_esp) = INVALID_OFFSET;
+Int VGOFF_(m_ebp) = INVALID_OFFSET;
+Int VGOFF_(m_esi) = INVALID_OFFSET;
+Int VGOFF_(m_edi) = INVALID_OFFSET;
+Int VGOFF_(m_eflags) = INVALID_OFFSET;
+Int VGOFF_(m_fpustate) = INVALID_OFFSET;
+Int VGOFF_(m_eip) = INVALID_OFFSET;
+Int VGOFF_(spillslots) = INVALID_OFFSET;
+Int VGOFF_(sh_eax) = INVALID_OFFSET;
+Int VGOFF_(sh_ecx) = INVALID_OFFSET;
+Int VGOFF_(sh_edx) = INVALID_OFFSET;
+Int VGOFF_(sh_ebx) = INVALID_OFFSET;
+Int VGOFF_(sh_esp) = INVALID_OFFSET;
+Int VGOFF_(sh_ebp) = INVALID_OFFSET;
+Int VGOFF_(sh_esi) = INVALID_OFFSET;
+Int VGOFF_(sh_edi) = INVALID_OFFSET;
+Int VGOFF_(sh_eflags) = INVALID_OFFSET;
+Int VGOFF_(helper_idiv_64_32) = INVALID_OFFSET;
+Int VGOFF_(helper_div_64_32) = INVALID_OFFSET;
+Int VGOFF_(helper_idiv_32_16) = INVALID_OFFSET;
+Int VGOFF_(helper_div_32_16) = INVALID_OFFSET;
+Int VGOFF_(helper_idiv_16_8) = INVALID_OFFSET;
+Int VGOFF_(helper_div_16_8) = INVALID_OFFSET;
+Int VGOFF_(helper_imul_32_64) = INVALID_OFFSET;
+Int VGOFF_(helper_mul_32_64) = INVALID_OFFSET;
+Int VGOFF_(helper_imul_16_32) = INVALID_OFFSET;
+Int VGOFF_(helper_mul_16_32) = INVALID_OFFSET;
+Int VGOFF_(helper_imul_8_16) = INVALID_OFFSET;
+Int VGOFF_(helper_mul_8_16) = INVALID_OFFSET;
+Int VGOFF_(helper_CLD) = INVALID_OFFSET;
+Int VGOFF_(helper_STD) = INVALID_OFFSET;
+Int VGOFF_(helper_get_dirflag) = INVALID_OFFSET;
+Int VGOFF_(helper_shldl) = INVALID_OFFSET;
+Int VGOFF_(helper_shldw) = INVALID_OFFSET;
+Int VGOFF_(helper_shrdl) = INVALID_OFFSET;
+Int VGOFF_(helper_shrdw) = INVALID_OFFSET;
+Int VGOFF_(helper_RDTSC) = INVALID_OFFSET;
+Int VGOFF_(helper_CPUID) = INVALID_OFFSET;
+Int VGOFF_(helper_BSWAP) = INVALID_OFFSET;
+Int VGOFF_(helper_bt) = INVALID_OFFSET;
+Int VGOFF_(helper_bts) = INVALID_OFFSET;
+Int VGOFF_(helper_btr) = INVALID_OFFSET;
+Int VGOFF_(helper_btc) = INVALID_OFFSET;
+Int VGOFF_(helper_bsf) = INVALID_OFFSET;
+Int VGOFF_(helper_bsr) = INVALID_OFFSET;
+Int VGOFF_(helper_fstsw_AX) = INVALID_OFFSET;
+Int VGOFF_(helper_SAHF) = INVALID_OFFSET;
+Int VGOFF_(helper_value_check4_fail) = INVALID_OFFSET;
+Int VGOFF_(helper_value_check2_fail) = INVALID_OFFSET;
+Int VGOFF_(helper_value_check1_fail) = INVALID_OFFSET;
+Int VGOFF_(helper_value_check0_fail) = INVALID_OFFSET;
+Int VGOFF_(helper_do_syscall) = INVALID_OFFSET;
+Int VGOFF_(helper_do_client_request) = INVALID_OFFSET;
+Int VGOFF_(helperc_LOADV4) = INVALID_OFFSET;
+Int VGOFF_(helperc_LOADV2) = INVALID_OFFSET;
+Int VGOFF_(helperc_LOADV1) = INVALID_OFFSET;
+Int VGOFF_(helperc_STOREV4) = INVALID_OFFSET;
+Int VGOFF_(helperc_STOREV2) = INVALID_OFFSET;
+Int VGOFF_(helperc_STOREV1) = INVALID_OFFSET;
+Int VGOFF_(handle_esp_assignment) = INVALID_OFFSET;
+Int VGOFF_(fpu_write_check) = INVALID_OFFSET;
+Int VGOFF_(fpu_read_check) = INVALID_OFFSET;
+Int VGOFF_(helper_request_normal_exit) = INVALID_OFFSET;
+
+
+/* This is the actual defn of baseblock. */
+UInt VG_(baseBlock)[VG_BASEBLOCK_WORDS];
+
+/* Words. */
+static Int baB_off = 0;
+
+/* Returns the offset, in words. */
+static Int alloc_BaB ( Int words )
+{
+   Int off = baB_off;
+   baB_off += words;
+   if (baB_off >= VG_BASEBLOCK_WORDS)
+      VG_(panic)( "alloc_BaB: baseBlock is too small");
+
+   return off;   
+}
+
+/* Allocate 1 word in baseBlock and set it to the given value. */
+static Int alloc_BaB_1_set ( Addr a )
+{
+   Int off = alloc_BaB(1);
+   VG_(baseBlock)[off] = (UInt)a;
+   return off;
+}
+
+
+/* Here we assign actual offsets.  It's important to get the most
+   popular referents within 128 bytes of the start, so we can take
+   advantage of short addressing modes relative to %ebp.  Popularity
+   of offsets was measured on 22 Feb 02 running a KDE application, and
+   the slots rearranged accordingly, with a 1.5% reduction in total
+   size of translations. */
+
+static void vg_init_baseBlock ( void )
+{
+   baB_off = 0;
+
+   /* Those with offsets under 128 are carefully chosen. */
+
+   /* WORD offsets in this column */
+   /* 0   */ VGOFF_(m_eax)     = alloc_BaB(1);
+   /* 1   */ VGOFF_(m_ecx)     = alloc_BaB(1);
+   /* 2   */ VGOFF_(m_edx)     = alloc_BaB(1);
+   /* 3   */ VGOFF_(m_ebx)     = alloc_BaB(1);
+   /* 4   */ VGOFF_(m_esp)     = alloc_BaB(1);
+   /* 5   */ VGOFF_(m_ebp)     = alloc_BaB(1);
+   /* 6   */ VGOFF_(m_esi)     = alloc_BaB(1);
+   /* 7   */ VGOFF_(m_edi)     = alloc_BaB(1);
+   /* 8   */ VGOFF_(m_eflags)  = alloc_BaB(1);
+
+   /* 9   */ VGOFF_(sh_eax)    = alloc_BaB(1);
+   /* 10  */ VGOFF_(sh_ecx)    = alloc_BaB(1);
+   /* 11  */ VGOFF_(sh_edx)    = alloc_BaB(1);
+   /* 12  */ VGOFF_(sh_ebx)    = alloc_BaB(1);
+   /* 13  */ VGOFF_(sh_esp)    = alloc_BaB(1);
+   /* 14  */ VGOFF_(sh_ebp)    = alloc_BaB(1);
+   /* 15  */ VGOFF_(sh_esi)    = alloc_BaB(1);
+   /* 16  */ VGOFF_(sh_edi)    = alloc_BaB(1);
+   /* 17  */ VGOFF_(sh_eflags) = alloc_BaB(1);
+
+   /* 18  */ 
+   VGOFF_(helper_value_check4_fail) 
+      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check4_fail) );
+   /* 19 */
+   VGOFF_(helper_value_check0_fail)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check0_fail) );
+
+   /* 20  */
+   VGOFF_(helperc_STOREV4)
+      = alloc_BaB_1_set( (Addr) & VG_(helperc_STOREV4) );
+   /* 21  */
+   VGOFF_(helperc_STOREV1)
+      = alloc_BaB_1_set( (Addr) & VG_(helperc_STOREV1) );
+
+   /* 22  */
+   VGOFF_(helperc_LOADV4)
+      = alloc_BaB_1_set( (Addr) & VG_(helperc_LOADV4) );
+   /* 23  */
+   VGOFF_(helperc_LOADV1)
+      = alloc_BaB_1_set( (Addr) & VG_(helperc_LOADV1) );
+
+   /* 24  */
+   VGOFF_(handle_esp_assignment)
+      = alloc_BaB_1_set( (Addr) & VGM_(handle_esp_assignment) );
+
+   /* 25 */
+   VGOFF_(m_eip) = alloc_BaB(1);
+
+   /* There are currently 24 spill slots */
+   /* 26 .. 49  This overlaps the magic boundary at >= 32 words, but
+      most spills are to low numbered spill slots, so the ones above
+      the boundary don't see much action. */
+   VGOFF_(spillslots) = alloc_BaB(VG_MAX_SPILLSLOTS);
+
+   /* These two pushed beyond the boundary because 2-byte transactions
+      are rare. */
+   /* 50  */
+   VGOFF_(helperc_STOREV2)
+      = alloc_BaB_1_set( (Addr) & VG_(helperc_STOREV2) );
+   /* 51  */
+   VGOFF_(helperc_LOADV2)
+      = alloc_BaB_1_set( (Addr) & VG_(helperc_LOADV2) );
+
+   /* 52  */
+   VGOFF_(fpu_write_check)
+      = alloc_BaB_1_set( (Addr) & VGM_(fpu_write_check) );
+   /* 53  */
+   VGOFF_(fpu_read_check)
+      = alloc_BaB_1_set( (Addr) & VGM_(fpu_read_check) );
+
+   /* Actually I don't think these two are ever used. */
+   /* 54  */ 
+   VGOFF_(helper_value_check2_fail)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check2_fail) );
+   /* 55  */ 
+   VGOFF_(helper_value_check1_fail)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check1_fail) );
+
+   /* I gave up counting at this point.  Since they're way above the
+      short-amode-boundary, there's no point. */
+
+   VGOFF_(m_fpustate) = alloc_BaB(VG_SIZE_OF_FPUSTATE_W);
+
+   VGOFF_(helper_idiv_64_32)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_idiv_64_32) );
+   VGOFF_(helper_div_64_32)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_div_64_32) );
+   VGOFF_(helper_idiv_32_16)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_idiv_32_16) );
+   VGOFF_(helper_div_32_16)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_div_32_16) );
+   VGOFF_(helper_idiv_16_8)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_idiv_16_8) );
+   VGOFF_(helper_div_16_8)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_div_16_8) );
+
+   VGOFF_(helper_imul_32_64)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_imul_32_64) );
+   VGOFF_(helper_mul_32_64)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_mul_32_64) );
+   VGOFF_(helper_imul_16_32)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_imul_16_32) );
+   VGOFF_(helper_mul_16_32)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_mul_16_32) );
+   VGOFF_(helper_imul_8_16)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_imul_8_16) );
+   VGOFF_(helper_mul_8_16)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_mul_8_16) );
+
+   VGOFF_(helper_CLD)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_CLD) );
+   VGOFF_(helper_STD)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_STD) );
+   VGOFF_(helper_get_dirflag)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_get_dirflag) );
+
+   VGOFF_(helper_shldl)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_shldl) );
+   VGOFF_(helper_shldw)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_shldw) );
+   VGOFF_(helper_shrdl)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_shrdl) );
+   VGOFF_(helper_shrdw)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_shrdw) );
+
+   VGOFF_(helper_RDTSC)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_RDTSC) );
+   VGOFF_(helper_CPUID)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_CPUID) );
+
+   VGOFF_(helper_bt)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_bt) );
+   VGOFF_(helper_bts)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_bts) );
+   VGOFF_(helper_btr)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_btr) );
+   VGOFF_(helper_btc)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_btc) );
+
+   VGOFF_(helper_bsf)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_bsf) );
+   VGOFF_(helper_bsr)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_bsr) );
+
+   VGOFF_(helper_fstsw_AX)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_fstsw_AX) );
+   VGOFF_(helper_SAHF)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_SAHF) );
+
+   VGOFF_(helper_request_normal_exit)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_request_normal_exit) );
+
+   VGOFF_(helper_do_syscall)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_do_syscall) );
+   VGOFF_(helper_do_client_request)
+      = alloc_BaB_1_set( (Addr) & VG_(helper_do_client_request) );
+}
+
+
+/* ---------------------------------------------------------------------
+   Global entities which are not referenced from generated code.
+   ------------------------------------------------------------------ */
+
+/* The stack on which Valgrind runs.  We can't use the same stack as
+   the simulatee -- that's an important design decision.  */
+UInt VG_(stack)[10000];
+
+/* Ditto our signal delivery stack. */
+UInt VG_(sigstack)[10000];
+
+/* Saving stuff across system calls. */
+UInt VG_(real_fpu_state_saved_over_syscall_d1)[VG_SIZE_OF_FPUSTATE_W];
+UInt VG_(real_fpu_state_saved_over_syscall_d2)[VG_SIZE_OF_FPUSTATE_W];
+Addr VG_(esp_saved_over_syscall_d1);
+Addr VG_(esp_saved_over_syscall_d2);
+
+/* Counts downwards in vg_run_innerloop. */
+UInt VG_(dispatch_ctr);
+
+/* If vg_dispatch_ctr is set to 1 to force a stop, its
+   previous value is saved here. */
+UInt VG_(dispatch_ctr_SAVED);
+
+/* This is why vg_run_innerloop() exited. */
+UInt VG_(interrupt_reason);
+
+/* vg_oursignalhandler() might longjmp().  Here's the jmp_buf. */
+jmp_buf VG_(toploop_jmpbuf);
+/* ... and if so, here's the signal which caused it to do so. */
+Int     VG_(longjmpd_on_signal);
+
+/* 64-bit counter for the number of basic blocks done. */
+ULong VG_(bbs_done);
+/* 64-bit counter for the number of bbs to go before a debug exit. */
+ULong VG_(bbs_to_go);
+
+/* Produce debugging output? */
+Bool VG_(disassemble) = False;
+
+/* The current LRU epoch. */
+UInt VG_(current_epoch) = 0;
+
+
+/* ---------------------------------------------------------------------
+   Counters, for informational purposes only.
+   ------------------------------------------------------------------ */
+
+/* Number of lookups which miss the fast tt helper. */
+UInt VG_(tt_fast_misses) = 0;
+
+
+/* Counts for LRU informational messages. */
+
+/* Number and total o/t size of new translations this epoch. */
+UInt VG_(this_epoch_in_count) = 0;
+UInt VG_(this_epoch_in_osize) = 0;
+UInt VG_(this_epoch_in_tsize) = 0;
+/* Number and total o/t size of discarded translations this epoch. */
+UInt VG_(this_epoch_out_count) = 0;
+UInt VG_(this_epoch_out_osize) = 0;
+UInt VG_(this_epoch_out_tsize) = 0;
+/* Number and total o/t size of translations overall. */
+UInt VG_(overall_in_count) = 0;
+UInt VG_(overall_in_osize) = 0;
+UInt VG_(overall_in_tsize) = 0;
+/* Number and total o/t size of discards overall. */
+UInt VG_(overall_out_count) = 0;
+UInt VG_(overall_out_osize) = 0;
+UInt VG_(overall_out_tsize) = 0;
+
+/* The number of LRU-clearings of TT/TC. */
+UInt VG_(number_of_lrus) = 0;
+
+
+/* Counts pertaining to the register allocator. */
+
+/* total number of uinstrs input to reg-alloc */
+UInt VG_(uinstrs_prealloc) = 0;
+
+/* total number of uinstrs added due to spill code */
+UInt VG_(uinstrs_spill) = 0;
+
+/* number of bbs requiring spill code */
+UInt VG_(translations_needing_spill) = 0;
+
+/* total of register ranks over all translations */
+UInt VG_(total_reg_rank) = 0;
+
+
+/* Counts pertaining to the self-modifying-code detection machinery. */
+
+/* Total number of writes checked. */
+UInt VG_(smc_total_check4s) = 0;
+
+/* Number of writes which the fast smc check couldn't show were
+   harmless. */
+UInt VG_(smc_cache_passed) = 0;
+
+/* Numnber of writes which really did write on original code. */
+UInt VG_(smc_fancy_passed) = 0;
+
+/* Number of translations discarded as a result. */
+UInt VG_(smc_discard_count) = 0;
+
+
+/* Counts pertaining to internal sanity checking. */
+
+UInt VG_(sanity_fast_count) = 0;
+UInt VG_(sanity_slow_count) = 0;
+
+
+
+/* ---------------------------------------------------------------------
+   Values derived from command-line options.
+   ------------------------------------------------------------------ */
+
+Bool   VG_(clo_GDB_attach);
+Int    VG_(sanity_level);
+Int    VG_(clo_verbosity);
+Bool   VG_(clo_demangle);
+Bool   VG_(clo_leak_check);
+Bool   VG_(clo_show_reachable);
+Int    VG_(clo_leak_resolution);
+Bool   VG_(clo_sloppy_malloc);
+Bool   VG_(clo_partial_loads_ok);
+Bool   VG_(clo_trace_children);
+Int    VG_(clo_logfile_fd);
+Int    VG_(clo_freelist_vol);
+Bool   VG_(clo_workaround_gcc296_bugs);
+Int    VG_(clo_n_suppressions);
+Char*  VG_(clo_suppressions)[VG_CLO_MAX_SFILES];
+Bool   VG_(clo_single_step);
+Bool   VG_(clo_optimise);
+Bool   VG_(clo_instrument);
+Bool   VG_(clo_cleanup);
+Bool   VG_(clo_client_perms);
+Int    VG_(clo_smc_check);
+Bool   VG_(clo_trace_syscalls);
+Bool   VG_(clo_trace_signals);
+Bool   VG_(clo_trace_symtab);
+Bool   VG_(clo_trace_malloc);
+ULong  VG_(clo_stop_after);
+Int    VG_(clo_dump_error);
+Int    VG_(clo_backtrace_size);
+
+/* This Bool is needed by wrappers in vg_clientmalloc.c to decide how
+   to behave.  Initially we say False. */
+Bool VG_(running_on_simd_CPU) = False;
+
+/* Holds client's %esp at the point we gained control. */
+Addr VG_(esp_at_startup);
+
+/* As deduced from VG_(esp_at_startup), the client's argc, argv[] and
+   envp[] as extracted from the client's stack at startup-time. */
+Int    VG_(client_argc);
+Char** VG_(client_argv);
+Char** VG_(client_envp);
+
+/* A place into which to copy the value of env var VG_ARGS, so we
+   don't have to modify the original. */
+static Char vg_cmdline_copy[M_VG_CMDLINE_STRLEN];
+
+
+/* ---------------------------------------------------------------------
+   Top level simulation loop.
+   ------------------------------------------------------------------ */
+
+/* Create a translation of the client basic block beginning at
+   orig_addr, and add it to the translation cache & translation table.
+   This probably doesn't really belong here, but, hey ... */
+void VG_(create_translation_for) ( Addr orig_addr )
+{
+   Addr    trans_addr;
+   TTEntry tte;
+   Int orig_size, trans_size;
+   /* Ensure there is space to hold a translation. */
+   VG_(maybe_do_lru_pass)();
+   VG_(translate)( orig_addr, &orig_size, &trans_addr, &trans_size );
+   /* Copy data at trans_addr into the translation cache.
+      Returned pointer is to the code, not to the 4-byte
+      header. */
+   /* Since the .orig_size and .trans_size fields are
+      UShort, be paranoid. */
+   vg_assert(orig_size > 0 && orig_size < 65536);
+   vg_assert(trans_size > 0 && trans_size < 65536);
+   tte.orig_size  = orig_size;
+   tte.orig_addr  = orig_addr;
+   tte.trans_size = trans_size;
+   tte.trans_addr = VG_(copy_to_transcache)
+                       ( trans_addr, trans_size );
+   tte.mru_epoch  = VG_(current_epoch);
+   /* Free the intermediary -- was allocated by VG_(emit_code). */
+   VG_(jitfree)( (void*)trans_addr );
+   /* Add to trans tab and set back pointer. */
+   VG_(add_to_trans_tab) ( &tte );
+   /* Update stats. */
+   VG_(this_epoch_in_count) ++;
+   VG_(this_epoch_in_osize) += orig_size;
+   VG_(this_epoch_in_tsize) += trans_size;
+   VG_(overall_in_count) ++;
+   VG_(overall_in_osize) += orig_size;
+   VG_(overall_in_tsize) += trans_size;
+   /* Record translated area for SMC detection. */
+   VG_(smc_mark_original) ( 
+      VG_(baseBlock)[VGOFF_(m_eip)], orig_size );
+}
+
+
+/* Runs the client program from %EIP (baseBlock[off_eip]) until it
+   asks to exit, or until vg_bbs_to_go jumps have happened (the latter
+   case is for debugging).  */
+
+void VG_(toploop) ( void )
+{
+   volatile UInt dispatch_ctr_SAVED;
+   volatile Int  done_this_time;
+
+   /* For the LRU structures, records when the epoch began. */
+   volatile ULong epoch_started_at = 0;
+
+   while (True) {
+     next_outer_loop:
+
+      /* Age the LRU structures if an epoch has been completed. */
+      if (VG_(bbs_done) - epoch_started_at >= VG_BBS_PER_EPOCH) {
+         VG_(current_epoch)++;
+         epoch_started_at = VG_(bbs_done);
+         if (VG_(clo_verbosity) > 2) {
+            UInt tt_used, tc_used;
+            VG_(get_tt_tc_used) ( &tt_used, &tc_used );
+            VG_(message)(Vg_UserMsg,
+               "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
+               VG_(bbs_done), 
+               VG_(this_epoch_in_count),
+               VG_(this_epoch_in_osize),
+               VG_(this_epoch_in_tsize),
+               VG_(this_epoch_out_count),
+               VG_(this_epoch_out_osize),
+               VG_(this_epoch_out_tsize),
+               tt_used, tc_used
+            );
+	 }
+         VG_(this_epoch_in_count) = 0;
+         VG_(this_epoch_in_osize) = 0;
+         VG_(this_epoch_in_tsize) = 0;
+         VG_(this_epoch_out_count) = 0;
+         VG_(this_epoch_out_osize) = 0;
+         VG_(this_epoch_out_tsize) = 0;
+      }
+
+      /* Figure out how many bbs to ask vg_run_innerloop to do. */
+      if (VG_(bbs_to_go) >= VG_SIGCHECK_INTERVAL)
+         VG_(dispatch_ctr) = 1 + VG_SIGCHECK_INTERVAL;
+      else
+         VG_(dispatch_ctr) = 1 + (UInt)VG_(bbs_to_go);
+
+      /* ... and remember what we asked for. */
+      dispatch_ctr_SAVED = VG_(dispatch_ctr);
+
+      /* Now have a go at doing them. */
+      VG_(interrupt_reason) = VG_Y_SIGCHECK;
+      if (__builtin_setjmp(VG_(toploop_jmpbuf)) == 0) {
+         /* try this ... */
+         VG_(run_innerloop)();
+         /* We get here if the client didn't take a fault. */
+         switch (VG_(interrupt_reason)) {
+            case VG_Y_SIGCHECK:
+               /* The counter fell to zero and no other situation has
+                  been detected. */
+               vg_assert(VG_(dispatch_ctr) == 0);
+               done_this_time  = dispatch_ctr_SAVED - 1;
+               VG_(bbs_to_go)  -= (ULong)done_this_time;
+               VG_(bbs_done)   += (ULong)done_this_time;
+               /* Exit if the debug run has ended. */
+               if (VG_(bbs_to_go) == 0) goto debug_stop;
+               VG_(deliver_signals)();
+               VG_(do_sanity_checks)(False);
+               goto next_outer_loop;
+            case VG_Y_EXIT:
+               /* The target program tried to exit. */
+               done_this_time = dispatch_ctr_SAVED - VG_(dispatch_ctr_SAVED);
+               done_this_time --;
+               VG_(bbs_to_go)   -= (ULong)done_this_time;
+               VG_(bbs_done)    += (ULong)done_this_time;
+               return;
+            case VG_Y_SMC:
+               /* A write to original code was detected. */
+               done_this_time = dispatch_ctr_SAVED - VG_(dispatch_ctr_SAVED);
+               VG_(bbs_to_go)   -= (ULong)done_this_time;
+               VG_(bbs_done)    += (ULong)done_this_time;
+               VG_(flush_transtab)();
+               goto next_outer_loop;
+            case VG_Y_TRANSLATE: {
+               /* Need to provide a translation of code at vg_m_eip. */
+               done_this_time = dispatch_ctr_SAVED - VG_(dispatch_ctr);
+               vg_assert(done_this_time > 0);
+               done_this_time --;
+               VG_(bbs_to_go) -= (ULong)done_this_time;
+               VG_(bbs_done)  += (ULong)done_this_time;
+               VG_(create_translation_for)(VG_(baseBlock)[VGOFF_(m_eip)]);
+               goto next_outer_loop;
+            }
+            default:
+               VG_(panic)("vg_toploop: invalid interrupt reason");
+         }
+      } else {
+        /* We get here if the client took a fault, which caused our
+           signal handler to longjmp. */
+         done_this_time = dispatch_ctr_SAVED - VG_(dispatch_ctr);
+         VG_(bbs_to_go)   -= (ULong)done_this_time;
+         VG_(bbs_done)    += (ULong)done_this_time;
+         if (VG_(interrupt_reason) == VG_Y_EXIT) return;
+         VG_(deliver_signals)();
+         VG_(do_sanity_checks)(False);
+         VG_(unblock_host_signal)(VG_(longjmpd_on_signal));
+      }
+   }
+
+   /* NOTREACHED */
+
+  debug_stop:
+   /* If we exited because of a debug stop, print the translation 
+      of the last block executed -- by translating it again, and 
+      throwing away the result. */
+   VG_(printf)(
+      "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
+   VG_(translate)( VG_(baseBlock)[VGOFF_(m_eip)], NULL, NULL, NULL );
+   VG_(printf)("\n");
+   VG_(printf)(
+      "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
+}
+
+
+/* ---------------------------------------------------------------------
+   Processing of command-line options.
+   ------------------------------------------------------------------ */
+
+static void bad_option ( Char* opt )
+{
+   VG_(shutdown_logging)();
+   VG_(clo_logfile_fd) = 2; /* stderr */
+   VG_(printf)("valgrind.so: Bad option `%s'; aborting.\n", opt);
+   VG_(exit)(1);
+}
+
+static void config_error ( Char* msg )
+{
+   VG_(shutdown_logging)();
+   VG_(clo_logfile_fd) = 2; /* stderr */
+   VG_(printf)("valgrind.so: Startup or configuration error:\n\t%s\n", msg);
+   VG_(printf)("valgrind.so: Unable to start up properly.  Giving up.\n");
+   VG_(exit)(1);
+}
+
+
+static void process_cmd_line_options ( void )
+{
+   UChar* argv[M_VG_CMDLINE_OPTS];
+   UInt   argc;
+   UChar* p;
+   UChar* str;
+   Int    i, eventually_logfile_fd;
+
+#  define ISSPACE(cc)      ((cc) == ' ' || (cc) == '\t' || (cc) == '\n')
+#  define STREQ(s1,s2)     (0==VG_(strcmp_ws)((s1),(s2)))
+#  define STREQN(nn,s1,s2) (0==VG_(strncmp_ws)((s1),(s2),(nn)))
+
+   /* Set defaults. */
+   VG_(clo_GDB_attach)       = False;
+   VG_(sanity_level)         = 1;
+   VG_(clo_verbosity)        = 1;
+   VG_(clo_demangle)         = True;
+   VG_(clo_leak_check)       = False;
+   VG_(clo_show_reachable)   = False;
+   VG_(clo_leak_resolution)  = 2;
+   VG_(clo_sloppy_malloc)    = False;
+   VG_(clo_partial_loads_ok) = True;
+   VG_(clo_trace_children)   = False;
+   VG_(clo_logfile_fd)       = 2; /* stderr */
+   VG_(clo_freelist_vol)     = 1000000;
+   VG_(clo_workaround_gcc296_bugs) = False;
+   VG_(clo_n_suppressions)   = 0;
+   VG_(clo_single_step)      = False;
+   VG_(clo_optimise)         = True;
+   VG_(clo_instrument)       = True;
+   VG_(clo_cleanup)          = True;
+   VG_(clo_client_perms)     = False;
+   VG_(clo_smc_check)        = /* VG_CLO_SMC_SOME */ VG_CLO_SMC_NONE;
+   VG_(clo_trace_syscalls)   = False;
+   VG_(clo_trace_signals)    = False;
+   VG_(clo_trace_symtab)     = False;
+   VG_(clo_trace_malloc)     = False;
+   VG_(clo_stop_after)       = 1000000000000LL;
+   VG_(clo_dump_error)       = 0;
+   VG_(clo_backtrace_size)   = 4;
+
+   eventually_logfile_fd = VG_(clo_logfile_fd);
+
+   /* Once logging is started, we can safely send messages pertaining
+      to failures in initialisation. */
+   VG_(startup_logging)();
+
+   /* Magically find the client's argc/argv/envp.  This kludge is
+      entirely dependent on the stack layout imposed by libc at
+      startup.  Hence the magic offsets.  Then check (heuristically)
+      that the results are plausible.  There must be a better way to
+      do this ... */
+
+#  if 0
+   /* Use this to search for the correct offsets if the tests below
+      barf. */
+   { Int i;
+     VG_(printf)("startup %%esp is %p\n", VG_(esp_at_startup) );
+     for (i = 0; i < 10; i++) {
+        Char* p = ((Char**)VG_(esp_at_startup))[i];
+        VG_(printf)("%d:  %p\n", i, p);
+     }
+   }
+#  endif
+
+   /* These offsets (5,6,7) are right for my RedHat 7.2 (glibc-2.2.4)
+      box. */
+
+   VG_(client_argc) = (Int)   ( ((void**)VG_(esp_at_startup)) [5] );
+   VG_(client_argv) = (Char**)( ((void**)VG_(esp_at_startup)) [6] );
+   VG_(client_envp) = (Char**)( ((void**)VG_(esp_at_startup)) [7] );
+
+   if ( ((UInt)VG_(client_argc)) > 0 &&
+        ((UInt)VG_(client_argc)) < 10000 &&
+        (Addr)VG_(client_argv) >= 0x8000000 &&
+        (Addr)VG_(client_envp) >= 0x8000000)
+      goto argc_argv_envp_OK;
+
+   /* If that's no good, try some other offsets discovered by KDE
+      folks on 8 Feb 02:
+      For glibc > 2.2.4 the offset 9/10/11 did the trick. Coolo found
+      out those, on I think a Caldera 3.1 with glibc 2.2.4 -- the same
+      offsets worked for on a debian sid with glibc 2.2.5.  */
+
+   VG_(client_argc) = (Int)   ( ((void**)VG_(esp_at_startup)) [9] );
+   VG_(client_argv) = (Char**)( ((void**)VG_(esp_at_startup)) [10] );
+   VG_(client_envp) = (Char**)( ((void**)VG_(esp_at_startup)) [11] );
+
+   if ( ((UInt)VG_(client_argc)) > 0 &&
+        ((UInt)VG_(client_argc)) < 10000 &&
+        (Addr)VG_(client_argv) >= 0x8000000 &&
+        (Addr)VG_(client_envp) >= 0x8000000)
+      goto argc_argv_envp_OK;
+
+   /* Doesn't look promising.  Try offsets for RedHat 6.2
+      (glibc-2.1.3) instead.  In this case, the argv and envp vectors
+      are actually on the stack (bizarrely). */
+
+   VG_(client_argc) = (Int)      ( ((void**)VG_(esp_at_startup)) [4] );
+   VG_(client_argv) = (Char**) & ( ((void**)VG_(esp_at_startup)) [5] );
+   VG_(client_envp) 
+      = (Char**) & ( ((void**)VG_(esp_at_startup)) [6 + VG_(client_argc)] );
+
+   if ( ((UInt)VG_(client_argc)) > 0 &&
+        ((UInt)VG_(client_argc)) < 10000 &&
+        (Addr)VG_(client_argv) >= 0x8000000 &&
+        (Addr)VG_(client_envp) >= 0x8000000)
+      goto argc_argv_envp_OK;
+
+   /* Here's yet another variant, from <hansen> (irc.kde.org). */
+
+   VG_(client_argc) = (Int)      ( ((void**)VG_(esp_at_startup)) [9] );
+   VG_(client_argv) = (Char**) & ( ((void**)VG_(esp_at_startup)) [10] );
+   VG_(client_envp) 
+      = (Char**) & ( ((void**)VG_(esp_at_startup)) [11 + VG_(client_argc)] );
+
+   if ( ((UInt)VG_(client_argc)) > 0 &&
+        ((UInt)VG_(client_argc)) < 10000 &&
+        (Addr)VG_(client_argv) >= 0x8000000 &&
+        (Addr)VG_(client_envp) >= 0x8000000)
+      goto argc_argv_envp_OK;
+
+   /* VG_(printf)("%d %p %p\n", VG_(client_argc), VG_(client_argv), 
+                                                  VG_(client_envp));
+   */
+   /* We're hosed.  Give up :-( */
+   config_error(
+      "Can't get plausible values for client's argc/argv/envp.\n\t"
+      "You may be able to fix this; see process_cmd_line_options()\n\t"
+      "in vg_main.c"
+   );
+   /* NOTREACHED */
+
+  argc_argv_envp_OK:
+
+   /* Now that VG_(client_envp) has been set, we can extract the args
+      for Valgrind itself.  Copy into global var so that we don't have to
+      write zeroes to the getenv'd value itself. */
+   str = VG_(getenv)("VG_ARGS");
+   argc = 0;
+
+   if (!str) {
+      config_error("Can't read options from env var VG_ARGS.");
+   }
+
+   if (VG_(strlen)(str) >= M_VG_CMDLINE_STRLEN-1) {
+      config_error("Command line length exceeds M_CMDLINE_STRLEN.");
+   }
+   VG_(strcpy)(vg_cmdline_copy, str);
+   str = NULL;
+
+   p = &vg_cmdline_copy[0];
+   while (True) {
+      while (ISSPACE(*p)) { *p = 0; p++; }
+      if (*p == 0) break;
+      if (argc < M_VG_CMDLINE_OPTS-1) { 
+         argv[argc] = p; argc++; 
+      } else {
+         config_error(
+            "Found more than M_CMDLINE_OPTS command-line opts.");
+      }
+      while (*p != 0 && !ISSPACE(*p)) p++;
+   }
+
+   for (i = 0; i < argc; i++) {
+
+      if (STREQ(argv[i], "-v") || STREQ(argv[i], "--verbose"))
+         VG_(clo_verbosity)++;
+      else if (STREQ(argv[i], "-q") || STREQ(argv[i], "--quiet"))
+         VG_(clo_verbosity)--;
+
+      else if (STREQ(argv[i], "--gdb-attach=yes"))
+         VG_(clo_GDB_attach) = True;
+      else if (STREQ(argv[i], "--gdb-attach=no"))
+         VG_(clo_GDB_attach) = False;
+
+      else if (STREQ(argv[i], "--demangle=yes"))
+         VG_(clo_demangle) = True;
+      else if (STREQ(argv[i], "--demangle=no"))
+         VG_(clo_demangle) = False;
+
+      else if (STREQ(argv[i], "--partial-loads-ok=yes"))
+         VG_(clo_partial_loads_ok) = True;
+      else if (STREQ(argv[i], "--partial-loads-ok=no"))
+         VG_(clo_partial_loads_ok) = False;
+
+      else if (STREQ(argv[i], "--leak-check=yes"))
+         VG_(clo_leak_check) = True;
+      else if (STREQ(argv[i], "--leak-check=no"))
+         VG_(clo_leak_check) = False;
+
+      else if (STREQ(argv[i], "--show-reachable=yes"))
+         VG_(clo_show_reachable) = True;
+      else if (STREQ(argv[i], "--show-reachable=no"))
+         VG_(clo_show_reachable) = False;
+
+      else if (STREQ(argv[i], "--leak-resolution=low"))
+         VG_(clo_leak_resolution) = 2;
+      else if (STREQ(argv[i], "--leak-resolution=med"))
+         VG_(clo_leak_resolution) = 4;
+      else if (STREQ(argv[i], "--leak-resolution=high"))
+         VG_(clo_leak_resolution) = VG_DEEPEST_BACKTRACE;
+
+      else if (STREQ(argv[i], "--sloppy-malloc=yes"))
+         VG_(clo_sloppy_malloc) = True;
+      else if (STREQ(argv[i], "--sloppy-malloc=no"))
+         VG_(clo_sloppy_malloc) = False;
+
+      else if (STREQ(argv[i], "--trace-children=yes"))
+         VG_(clo_trace_children) = True;
+      else if (STREQ(argv[i], "--trace-children=no"))
+         VG_(clo_trace_children) = False;
+
+      else if (STREQ(argv[i], "--workaround-gcc296-bugs=yes"))
+         VG_(clo_workaround_gcc296_bugs) = True;
+      else if (STREQ(argv[i], "--workaround-gcc296-bugs=no"))
+         VG_(clo_workaround_gcc296_bugs) = False;
+
+      else if (STREQN(15, argv[i], "--sanity-level="))
+         VG_(sanity_level) = (Int)VG_(atoll)(&argv[i][15]);
+
+      else if (STREQN(13, argv[i], "--logfile-fd="))
+         eventually_logfile_fd = (Int)VG_(atoll)(&argv[i][13]);
+
+      else if (STREQN(15, argv[i], "--freelist-vol=")) {
+         VG_(clo_freelist_vol) = (Int)VG_(atoll)(&argv[i][15]);
+         if (VG_(clo_freelist_vol) < 0) VG_(clo_freelist_vol) = 2;
+      }
+
+      else if (STREQN(15, argv[i], "--suppressions=")) {
+         if (VG_(clo_n_suppressions) >= VG_CLO_MAX_SFILES) {
+            VG_(message)(Vg_UserMsg, "Too many logfiles specified.");
+            VG_(message)(Vg_UserMsg, 
+                         "Increase VG_CLO_MAX_SFILES and recompile.");
+            bad_option(argv[i]);
+         }
+         VG_(clo_suppressions)[VG_(clo_n_suppressions)] = &argv[i][15];
+         VG_(clo_n_suppressions)++;
+      }
+      else if (STREQ(argv[i], "--single-step=yes"))
+         VG_(clo_single_step) = True;
+      else if (STREQ(argv[i], "--single-step=no"))
+         VG_(clo_single_step) = False;
+
+      else if (STREQ(argv[i], "--optimise=yes"))
+         VG_(clo_optimise) = True;
+      else if (STREQ(argv[i], "--optimise=no"))
+         VG_(clo_optimise) = False;
+
+      else if (STREQ(argv[i], "--instrument=yes"))
+         VG_(clo_instrument) = True;
+      else if (STREQ(argv[i], "--instrument=no"))
+         VG_(clo_instrument) = False;
+
+      else if (STREQ(argv[i], "--cleanup=yes"))
+         VG_(clo_cleanup) = True;
+      else if (STREQ(argv[i], "--cleanup=no"))
+         VG_(clo_cleanup) = False;
+
+      else if (STREQ(argv[i], "--client-perms=yes"))
+         VG_(clo_client_perms) = True;
+      else if (STREQ(argv[i], "--client-perms=no"))
+         VG_(clo_client_perms) = False;
+
+      else if (STREQ(argv[i], "--smc-check=none"))
+         VG_(clo_smc_check) = VG_CLO_SMC_NONE;
+      else if (STREQ(argv[i], "--smc-check=some"))
+         VG_(clo_smc_check) = VG_CLO_SMC_SOME;
+      else if (STREQ(argv[i], "--smc-check=all"))
+         VG_(clo_smc_check) = VG_CLO_SMC_ALL;
+
+      else if (STREQ(argv[i], "--trace-syscalls=yes"))
+         VG_(clo_trace_syscalls) = True;
+      else if (STREQ(argv[i], "--trace-syscalls=no"))
+         VG_(clo_trace_syscalls) = False;
+
+      else if (STREQ(argv[i], "--trace-signals=yes"))
+         VG_(clo_trace_signals) = True;
+      else if (STREQ(argv[i], "--trace-signals=no"))
+         VG_(clo_trace_signals) = False;
+
+      else if (STREQ(argv[i], "--trace-symtab=yes"))
+         VG_(clo_trace_symtab) = True;
+      else if (STREQ(argv[i], "--trace-symtab=no"))
+         VG_(clo_trace_symtab) = False;
+
+      else if (STREQ(argv[i], "--trace-malloc=yes"))
+         VG_(clo_trace_malloc) = True;
+      else if (STREQ(argv[i], "--trace-malloc=no"))
+         VG_(clo_trace_malloc) = False;
+
+      else if (STREQN(13, argv[i], "--stop-after="))
+         VG_(clo_stop_after) = VG_(atoll)(&argv[i][13]);
+
+      else if (STREQN(13, argv[i], "--dump-error="))
+         VG_(clo_dump_error) = (Int)VG_(atoll)(&argv[i][13]);
+
+      else if (STREQN(14, argv[i], "--num-callers=")) {
+         /* Make sure it's sane. */
+	 VG_(clo_backtrace_size) = (Int)VG_(atoll)(&argv[i][14]);
+         if (VG_(clo_backtrace_size) < 2)
+            VG_(clo_backtrace_size) = 2;
+         if (VG_(clo_backtrace_size) >= VG_DEEPEST_BACKTRACE)
+            VG_(clo_backtrace_size) = VG_DEEPEST_BACKTRACE;
+      }
+
+      else
+         bad_option(argv[i]);
+   }
+
+#  undef ISSPACE
+#  undef STREQ
+#  undef STREQN
+
+   if (VG_(clo_verbosity < 0))
+      VG_(clo_verbosity) = 0;
+
+   if (VG_(clo_GDB_attach) && VG_(clo_trace_children)) {
+      VG_(message)(Vg_UserMsg, "");
+      VG_(message)(Vg_UserMsg, 
+         "--gdb-attach=yes conflicts with --trace-children=yes");
+      VG_(message)(Vg_UserMsg, 
+         "Please choose one or the other, but not both.");
+      bad_option("--gdb-attach=yes and --trace-children=yes");
+   }
+
+   if (VG_(clo_client_perms) && !VG_(clo_instrument)) {
+      VG_(message)(Vg_UserMsg, "");
+      VG_(message)(Vg_UserMsg, 
+         "--client-perms=yes requires --instrument=yes");
+      bad_option("--client-perms=yes without --instrument=yes");
+   }
+
+   if (VG_(clo_client_perms))
+      vg_assert(VG_(clo_instrument));
+
+   VG_(clo_logfile_fd) = eventually_logfile_fd;
+
+#  define STRINGIFY(xx)  __STRING(xx)
+   if (VG_(clo_verbosity > 0))
+      VG_(message)(Vg_UserMsg, 
+                   "valgrind-%s, a memory error detector for x86 GNU/Linux.",
+                   STRINGIFY(VG_VERSION));
+#  undef STRINGIFY
+   if (VG_(clo_verbosity > 0))
+      VG_(message)(Vg_UserMsg, 
+                   "Copyright (C) 2000-2002, and GNU GPL'd, by Julian Seward.");
+   if (VG_(clo_verbosity) > 1) {
+      VG_(message)(Vg_UserMsg, "Startup, with flags:");
+      for (i = 0; i < argc; i++) {
+         VG_(message)(Vg_UserMsg, "   %s", argv[i]);
+      }
+   }
+
+   if (VG_(clo_n_suppressions) == 0) {
+      config_error("No error-suppression files were specified.");
+   }
+}
+
+
+/* ---------------------------------------------------------------------
+   Copying to/from m_state_static.
+   ------------------------------------------------------------------ */
+
+UInt VG_(m_state_static) [8 /* int regs, in Intel order */ 
+                          + 1 /* %eflags */ 
+                          + 1 /* %eip */
+                          + VG_SIZE_OF_FPUSTATE_W /* FPU state */
+                         ];
+
+void VG_(copy_baseBlock_to_m_state_static) ( void )
+{
+   Int i;
+   VG_(m_state_static)[ 0/4] = VG_(baseBlock)[VGOFF_(m_eax)];
+   VG_(m_state_static)[ 4/4] = VG_(baseBlock)[VGOFF_(m_ecx)];
+   VG_(m_state_static)[ 8/4] = VG_(baseBlock)[VGOFF_(m_edx)];
+   VG_(m_state_static)[12/4] = VG_(baseBlock)[VGOFF_(m_ebx)];
+   VG_(m_state_static)[16/4] = VG_(baseBlock)[VGOFF_(m_esp)];
+   VG_(m_state_static)[20/4] = VG_(baseBlock)[VGOFF_(m_ebp)];
+   VG_(m_state_static)[24/4] = VG_(baseBlock)[VGOFF_(m_esi)];
+   VG_(m_state_static)[28/4] = VG_(baseBlock)[VGOFF_(m_edi)];
+
+   VG_(m_state_static)[32/4] = VG_(baseBlock)[VGOFF_(m_eflags)];
+   VG_(m_state_static)[36/4] = VG_(baseBlock)[VGOFF_(m_eip)];
+
+   for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
+      VG_(m_state_static)[40/4 + i] 
+         = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
+}
+
+
+void VG_(copy_m_state_static_to_baseBlock) ( void )
+{
+   Int i;
+   VG_(baseBlock)[VGOFF_(m_eax)] = VG_(m_state_static)[ 0/4];
+   VG_(baseBlock)[VGOFF_(m_ecx)] = VG_(m_state_static)[ 4/4];
+   VG_(baseBlock)[VGOFF_(m_edx)] = VG_(m_state_static)[ 8/4];
+   VG_(baseBlock)[VGOFF_(m_ebx)] = VG_(m_state_static)[12/4];
+   VG_(baseBlock)[VGOFF_(m_esp)] = VG_(m_state_static)[16/4];
+   VG_(baseBlock)[VGOFF_(m_ebp)] = VG_(m_state_static)[20/4];
+   VG_(baseBlock)[VGOFF_(m_esi)] = VG_(m_state_static)[24/4];
+   VG_(baseBlock)[VGOFF_(m_edi)] = VG_(m_state_static)[28/4];
+
+   VG_(baseBlock)[VGOFF_(m_eflags)] = VG_(m_state_static)[32/4];
+   VG_(baseBlock)[VGOFF_(m_eip)] = VG_(m_state_static)[36/4];
+
+   for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
+      VG_(baseBlock)[VGOFF_(m_fpustate) + i]
+         = VG_(m_state_static)[40/4 + i];
+}
+
+
+/* ---------------------------------------------------------------------
+   Show accumulated counts.
+   ------------------------------------------------------------------ */
+
+static void vg_show_counts ( void )
+{
+   VG_(message)(Vg_DebugMsg,
+                " dispatch: %lu basic blocks, %d tt_fast misses.", 
+                VG_(bbs_done),  VG_(tt_fast_misses));
+   VG_(message)(Vg_DebugMsg,
+                "translate: new %d (%d -> %d), discard %d (%d -> %d).",
+                VG_(overall_in_count),
+                VG_(overall_in_osize),
+                VG_(overall_in_tsize),
+                VG_(overall_out_count),
+                VG_(overall_out_osize),
+                VG_(overall_out_tsize) );
+   VG_(message)(Vg_DebugMsg,
+		"      lru: %d epochs, %d clearings.",
+		VG_(current_epoch),
+                VG_(number_of_lrus) );
+   VG_(message)(Vg_DebugMsg, 
+                "reg-alloc: %d t-req-spill, "
+                "%d+%d orig+spill uis, %d total-reg-r.",
+                VG_(translations_needing_spill),
+                VG_(uinstrs_prealloc),
+                VG_(uinstrs_spill),
+                VG_(total_reg_rank) );
+   VG_(message)(Vg_DebugMsg, 
+                "smc-check: %d checks, %d fast pass, "
+                "%d slow pass, %d discards.",
+		VG_(smc_total_check4s),
+		VG_(smc_cache_passed),
+		VG_(smc_fancy_passed),
+		VG_(smc_discard_count) );
+   VG_(message)(Vg_DebugMsg, 
+                "   sanity: %d cheap, %d expensive checks.",
+                VG_(sanity_fast_count), 
+                VG_(sanity_slow_count) );
+}
+
+
+/* ---------------------------------------------------------------------
+   Main!
+   ------------------------------------------------------------------ */
+
+/* Where we jump to once Valgrind has got control, and the real
+   machine's state has been copied to the m_state_static. */
+
+void VG_(main) ( void )
+{
+   Int i;
+
+   /* Set up our stack sanity-check words. */
+   for (i = 0; i < 10; i++) {
+      VG_(stack)[i]         = (UInt)(&VG_(stack)[i])         ^ 0xA4B3C2D1;
+      VG_(stack)[10000-1-i] = (UInt)(&VG_(stack)[10000-i-1]) ^ 0xABCD4321;
+   }
+
+   /* Set up baseBlock offsets and copy the saved machine's state into
+      it. */
+   vg_init_baseBlock();
+   VG_(copy_m_state_static_to_baseBlock)();
+
+   /* Process Valgrind's command-line opts (from env var VG_OPTS). */
+   process_cmd_line_options();
+
+   /* Initialise the signal handling subsystem. */
+   VG_(sigstartup_actions)();
+
+#  ifdef VG_PROFILE
+   VGP_(init_profiling)();
+#  endif
+
+   if (VG_(clo_instrument)) {
+      VGP_PUSHCC(VgpInitAudit);
+      VGM_(init_memory_audit)();
+      VGP_POPCC;
+      VGP_PUSHCC(VgpReadSyms);
+      VG_(read_symbols)();
+      VGP_POPCC;
+   }
+
+   /* This should come after init_memory_audit; otherwise the latter
+      carefully sets up the permissions maps to cover the anonymous
+      mmaps for the translation table and translation cache, which
+      wastes > 20M of virtual address space. */
+   VG_(init_transtab_and_SMC)();
+
+   if (VG_(clo_verbosity) == 1) {
+      VG_(message)(Vg_UserMsg, 
+                   "For more details, rerun with: -v");
+   }
+
+   /* Now it is safe for malloc et al in vg_clientmalloc.c to act
+      instrumented-ly. */
+   VG_(running_on_simd_CPU) = True;
+   if (VG_(clo_instrument)) {
+      VGM_(make_readable) ( (Addr)&VG_(running_on_simd_CPU), 1 );
+      VGM_(make_readable) ( (Addr)&VG_(clo_instrument), 1 );
+      VGM_(make_readable) ( (Addr)&VG_(clo_trace_malloc), 1 );
+      VGM_(make_readable) ( (Addr)&VG_(clo_sloppy_malloc), 1 );
+   }
+
+   if (VG_(clo_verbosity) > 0)
+      VG_(message)(Vg_UserMsg, "");
+
+   VG_(bbs_to_go) = VG_(clo_stop_after);
+   VG_(toploop)();
+
+   if (VG_(clo_verbosity) > 0)
+      VG_(message)(Vg_UserMsg, "");
+
+   if (VG_(clo_instrument)) {
+      VG_(show_all_errors)();
+      VG_(clientmalloc_done)();
+      if (VG_(clo_verbosity) == 1) {
+         VG_(message)(Vg_UserMsg, 
+                      "For counts of detected errors, rerun with: -v");
+      }
+      if (VG_(clo_leak_check)) VG_(detect_memory_leaks)();
+   }
+   VG_(running_on_simd_CPU) = False;
+   
+   VG_(do_sanity_checks)(True /*include expensive checks*/ );
+
+   if (VG_(clo_verbosity) > 1)
+      vg_show_counts();
+
+   if (0) {
+      VG_(message)(Vg_DebugMsg, "");
+      VG_(message)(Vg_DebugMsg, 
+         "------ Valgrind's internal memory use stats follow ------" );
+      VG_(mallocSanityCheckAll)();
+      VG_(show_all_arena_stats)();
+      VG_(message)(Vg_DebugMsg, 
+         "------ Valgrind's ExeContext management stats follow ------" );
+      VG_(show_ExeContext_stats)();
+      VG_(message)(Vg_DebugMsg, 
+         "------ Valgrind's client block stats follow ---------------" );
+      VG_(show_client_block_stats)();
+   }
+ 
+#  ifdef VG_PROFILE
+   VGP_(done_profiling)();
+#  endif
+
+   VG_(done_prof_mem)();
+
+   VG_(shutdown_logging)();
+
+   /* In LD_PRELOAD, convert "valgrind.so" into "valgrinq.so", so that
+      child processes don't get traced into.  Also done on simulated
+      execve system call. */
+   if (!VG_(clo_trace_children)) { 
+      VG_(mash_LD_PRELOAD_string)(VG_(getenv)("LD_PRELOAD"));
+   }
+
+   /* Prepare to restore state to the real CPU. */
+   VG_(copy_baseBlock_to_m_state_static)();
+
+   /* This pushes a return address on the simulator's stack, which
+      is abandoned.  We call vg_sigshutdown_actions() at the end
+      of vg_switch_to_real_CPU(), so as to ensure that the original
+      stack and machine state is restored before the real signal
+      mechanism is restored.
+   */
+   VG_(switch_to_real_CPU)();
+}
+
+
+/* Debugging thing .. can be called from assembly with OYNK macro. */
+void VG_(oynk) ( Int n )
+{
+   OINK(n);
+}
+
+
+/* Find "valgrind.so" in a LD_PRELOAD=... string, and convert it to
+   "valgrinq.so", which doesn't do anything.  This is used to avoid
+   tracing into child processes.  To make this work the build system
+   also supplies a dummy file, "valgrinq.so". 
+*/
+void VG_(mash_LD_PRELOAD_string)( Char* ld_preload_str )
+{
+   Char* p;
+   if (ld_preload_str == NULL)
+      return;
+   p = VG_(strstr)(ld_preload_str, "valgrind.so");
+   if (p == NULL)
+      return;
+   p[7] = 'q';
+}
+
+/* RUNS ON THE CLIENT'S STACK, but on the real CPU.  Start GDB and get
+   it to attach to this process.  Called if the user requests this
+   service after an error has been shown, so she can poke around and
+   look at parameters, memory, etc.  You can't meaningfully get GDB to
+   continue the program, though; to continue, quit GDB.  */
+extern void VG_(start_GDB_whilst_on_client_stack) ( void )
+{
+   UChar buf[100];
+   VG_(sprintf)(buf,
+                "/usr/bin/gdb -nw /proc/%d/exe %d", 
+                VG_(getpid)(), VG_(getpid)());
+   VG_(printf)("starting GDB with cmd: %s\n", buf);
+   VG_(mash_LD_PRELOAD_string)(VG_(getenv)("LD_PRELOAD"));
+   { /* HACK ALERT */
+     extern int system ( const char * );
+     system(buf);
+     /* end of HACK ALERT */
+   }
+   VG_(message)(Vg_UserMsg, "");
+   VG_(message)(Vg_UserMsg, 
+      "GDB has detached.  Valgrind regains control.  We continue.");
+}
+
+
+/* Print some helpful-ish text about unimplemented things, and give
+   up. */
+extern void VG_(unimplemented) ( Char* msg )
+{
+   VG_(message)(Vg_UserMsg, "");
+   VG_(message)(Vg_UserMsg, 
+      "Valgrind detected that your program requires");
+   VG_(message)(Vg_UserMsg, 
+      "the following unimplemented functionality:");
+   VG_(message)(Vg_UserMsg, "   %s", msg);
+   VG_(message)(Vg_UserMsg,
+      "This may be because the functionality is hard to implement,");
+   VG_(message)(Vg_UserMsg,
+      "or because no reasonable program would behave this way,");
+   VG_(message)(Vg_UserMsg,
+      "or because nobody has yet needed it.  In any case, let me know");
+   VG_(message)(Vg_UserMsg,
+      "(jseward@acm.org) and/or try to work around the problem, if you can.");
+   VG_(message)(Vg_UserMsg,
+      "");
+   VG_(message)(Vg_UserMsg,
+      "Valgrind has to exit now.  Sorry.  Bye!");
+   VG_(message)(Vg_UserMsg,
+      "");
+   VG_(exit)(1);
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- Replace some C lib things with equivs which don't get ---*/
+/*--- spurious value warnings.  THEY RUN ON SIMD CPU!       ---*/
+/*-------------------------------------------------------------*/
+
+char* strrchr ( const char* s, int c )
+{
+   UChar  ch   = (UChar)((UInt)c);
+   UChar* p    = (UChar*)s;
+   UChar* last = NULL;
+   while (True) {
+      if (*p == ch) last = p;
+      if (*p == 0) return last;
+      p++;
+   }
+}
+
+char* strchr ( const char* s, int c )
+{
+   UChar  ch = (UChar)((UInt)c);
+   UChar* p  = (UChar*)s;
+   while (True) {
+      if (*p == ch) return p;
+      if (*p == 0) return NULL;
+      p++;
+   }
+}
+
+char* strcat ( char* dest, const char* src )
+{
+   Char* dest_orig = dest;
+   while (*dest) dest++;
+   while (*src) *dest++ = *src++;
+   *dest = 0;
+   return dest_orig;
+}
+
+unsigned int strlen ( const char* str )
+{
+   UInt i = 0;
+   while (str[i] != 0) i++;
+   return i;
+}
+
+char* strcpy ( char* dest, const char* src )
+{
+   Char* dest_orig = dest;
+   while (*src) *dest++ = *src++;
+   *dest = 0;
+   return dest_orig;
+}
+
+int strncmp ( const char* s1, const char* s2, unsigned int nmax )
+{
+   unsigned int n = 0;
+   while (True) {
+      if (n >= nmax) return 0;
+      if (*s1 == 0 && *s2 == 0) return 0;
+      if (*s1 == 0) return -1;
+      if (*s2 == 0) return 1;
+
+      if (*(UChar*)s1 < *(UChar*)s2) return -1;
+      if (*(UChar*)s1 > *(UChar*)s2) return 1;
+
+      s1++; s2++; n++;
+   }
+}
+
+int strcmp ( const char* s1, const char* s2 )
+{
+   while (True) {
+      if (*s1 == 0 && *s2 == 0) return 0;
+      if (*s1 == 0) return -1;
+      if (*s2 == 0) return 1;
+
+      if (*(char*)s1 < *(char*)s2) return -1;
+      if (*(char*)s1 > *(char*)s2) return 1;
+
+      s1++; s2++;
+   }
+}
+
+void* memchr(const void *s, int c, unsigned int n)
+{
+   unsigned int i;
+   UChar c0 = (UChar)c;
+   UChar* p = (UChar*)s;
+   for (i = 0; i < n; i++)
+      if (p[i] == c0) return (void*)(&p[i]);
+   return NULL;
+}
+
+void* memcpy( void *dst, const void *src, unsigned int len )
+{
+    register char *d;
+    register char *s;
+    if ( dst > src ) {
+        d = (char *)dst + len - 1;
+        s = (char *)src + len - 1;
+        while ( len-- )
+            *d-- = *s--;
+    } else if ( dst < src ) {
+        d = (char *)dst;
+        s = (char *)src;
+        while ( len-- )
+            *d++ = *s++;
+    }
+    return dst;
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                vg_main.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_malloc2.c b/vg_malloc2.c
new file mode 100644
index 000000000..1ad35be1c
--- /dev/null
+++ b/vg_malloc2.c
@@ -0,0 +1,1298 @@
+
+/*--------------------------------------------------------------------*/
+/*--- An implementation of malloc/free which doesn't use sbrk.     ---*/
+/*---                                                 vg_malloc2.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+
+#include "vg_include.h"
+
+/* Define to turn on (heavyweight) debugging machinery. */
+/* #define DEBUG_MALLOC */
+
+
+/*------------------------------------------------------------*/
+/*--- Structs n stuff                                      ---*/
+/*------------------------------------------------------------*/
+
+#define VG_REDZONE_LO_MASK 0x31415927
+#define VG_REDZONE_HI_MASK 0x14141356
+
+#define VG_N_MALLOC_LISTS 16 /* do not change this */
+
+
+typedef UInt Word;
+typedef Word WordF;
+typedef Word WordL;
+
+
+/* A superblock. */
+typedef 
+   struct _Superblock {
+      struct _Superblock* next;
+      /* number of payload words in this superblock. */
+      Int  n_payload_words;
+      Word payload_words[0];
+   }
+   Superblock;
+
+
+/* An arena. */
+typedef 
+   struct {
+      Char*       name;
+      Int         rz_szW; /* Red zone size in words */
+      Bool        rz_check; /* Check red-zone on free? */
+      Int         min_sblockW; /* Minimum superblock size */
+      WordF*      freelist[VG_N_MALLOC_LISTS];
+      Superblock* sblocks;
+      /* Stats only. */
+      UInt bytes_on_loan;
+      UInt bytes_mmaped;
+      UInt bytes_on_loan_max;
+   } 
+   Arena;
+
+
+/* Block layout:
+
+     this block total sizeW   (1 word)
+     freelist previous ptr    (1 word)
+     freelist next  ptr       (1 word)
+     red zone words (depends on .rz_szW field of Arena)
+     (payload words)
+     red zone words (depends on .rz_szW field of Arena)
+     this block total sizeW  (1 word)
+
+     Total size in words (bszW) and payload size in words (pszW)
+     are related by
+        bszW == pszW + 4 + 2 * a->rz_szW
+
+     Furthermore, both size fields in the block are negative if it is
+     not in use, and positive if it is in use.  A block size of zero
+     is not possible, because a block always has at least four words
+     of overhead.  
+*/
+typedef
+   struct {
+      Int   bszW_lo;
+      Word* prev;
+      Word* next;
+      Word  redzone[0];
+   } 
+   BlockHeader;
+
+
+/*------------------------------------------------------------*/
+/*--- Forwardses ... and misc ...                          ---*/
+/*------------------------------------------------------------*/
+
+static Bool blockSane ( Arena* a, Word* b );
+
+/* Align ptr p upwards to an align-sized boundary. */
+static
+void* align_upwards ( void* p, Int align )
+{
+   Addr a = (Addr)p;
+   if ((a % align) == 0) return (void*)a;
+   return (void*)(a - (a % align) + align);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Arena management stuff                               ---*/
+/*------------------------------------------------------------*/
+
+/* The arena structures themselves. */
+static Arena vg_arena[VG_N_ARENAS];
+
+/* Functions external to this module identify arenas using ArenaIds,
+   not Arena*s.  This fn converts the former to the latter. */
+static Arena* arenaId_to_ArenaP ( ArenaId arena )
+{
+   vg_assert(arena >= 0 && arena < VG_N_ARENAS);
+   return & vg_arena[arena];
+}
+
+
+/* Initialise an arena. */
+static
+void arena_init ( Arena* a, Char* name, 
+                  Int rz_szW, Bool rz_check, Int min_sblockW )
+{
+   Int i;
+   vg_assert((min_sblockW % VKI_WORDS_PER_PAGE) == 0);
+   a->name = name;
+   a->rz_szW = rz_szW;
+   a->rz_check = rz_check;
+   a->min_sblockW = min_sblockW;
+   for (i = 0; i < VG_N_MALLOC_LISTS; i++) a->freelist[i] = NULL;
+   a->sblocks = NULL;
+   a->bytes_on_loan     = 0;
+   a->bytes_mmaped      = 0;
+   a->bytes_on_loan_max = 0;
+}
+
+
+/* Print vital stats for an arena. */
+void VG_(show_all_arena_stats) ( void )
+{
+   Int i;
+   for (i = 0; i < VG_N_ARENAS; i++) {
+      VG_(message)(Vg_DebugMsg,
+         "Arena `%s': %7d max useful, %7d mmap'd, %7d current useful",
+         vg_arena[i].name, 
+         vg_arena[i].bytes_on_loan_max, 
+         vg_arena[i].bytes_mmaped, 
+         vg_arena[i].bytes_on_loan 
+      );
+   }
+}
+
+
+/* It is important that this library is self-initialising, because it
+   may get called very early on -- as a result of C++ static
+   constructor initialisations -- before Valgrind itself is
+   initialised.  Hence vg_malloc() and vg_free() below always call
+   ensure_mm_init() to ensure things are correctly initialised.  */
+
+static
+void ensure_mm_init ( void )
+{
+   static Bool init_done = False;
+   if (init_done) return;
+
+   /* Use a checked red zone size of 1 word for our internal stuff,
+      and an unchecked zone of arbitrary size for the client.  Of
+      course the client's red zone is checked really, but using the
+      addressibility maps, not by the mechanism implemented here,
+      which merely checks at the time of freeing that the red zone
+      words are unchanged. */
+
+   arena_init ( &vg_arena[VG_AR_PRIVATE], "private ", 
+                1, True, 262144 );
+
+   arena_init ( &vg_arena[VG_AR_SYMTAB],  "symtab  ", 
+                1, True, 262144 );
+
+   arena_init ( &vg_arena[VG_AR_CLIENT],  "client  ",  
+                VG_AR_CLIENT_REDZONE_SZW, False, 262144 );
+
+   arena_init ( &vg_arena[VG_AR_DEMANGLE], "demangle",  
+                4 /*paranoid*/, True, 16384 );
+
+   arena_init ( &vg_arena[VG_AR_EXECTXT],  "exectxt ",  
+                1, True, 16384 );
+
+   arena_init ( &vg_arena[VG_AR_ERRCTXT],  "errctxt ",  
+                1, True, 16384 );
+
+   arena_init ( &vg_arena[VG_AR_TRANSIENT], "transien",  
+                2, True, 16384 );
+
+   init_done = True;
+#  ifdef DEBUG_MALLOC
+   VG_(mallocSanityCheckAll)();
+#  endif
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Arena management stuff                               ---*/
+/*------------------------------------------------------------*/
+
+static
+Superblock* newSuperblock ( Arena* a, Int cszW )
+{
+   Superblock* sb;
+   cszW += 2; /* Take into account sb->next and sb->n_words fields */
+   if (cszW < a->min_sblockW) cszW = a->min_sblockW;
+   while ((cszW % VKI_WORDS_PER_PAGE) > 0) cszW++;
+   sb = VG_(get_memory_from_mmap) ( cszW * sizeof(Word) );
+   sb->n_payload_words = cszW - 2;
+   a->bytes_mmaped += cszW * sizeof(Word);
+   if (0)
+      VG_(message)(Vg_DebugMsg, "newSuperblock, %d payload words", 
+                                sb->n_payload_words);
+   return sb;
+}
+
+
+/* Find the superblock containing the given chunk. */
+static
+Superblock* findSb ( Arena* a, UInt* ch )
+{
+   Superblock* sb;
+   for (sb = a->sblocks; sb; sb = sb->next)
+      if (&sb->payload_words[0] <= ch
+          && ch < &sb->payload_words[sb->n_payload_words]) 
+         return sb;
+   VG_(printf)("findSb: can't find pointer %p in arena `%s'\n",
+               ch, a->name );
+   VG_(panic)("findSb: vg_free() in wrong arena?");
+   return NULL; /*NOTREACHED*/
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Low-level functions for working with blocks.         ---*/
+/*------------------------------------------------------------*/
+
+/* Add the not-in-use attribute to a bszW. */
+static __inline__
+Int mk_free_bszW ( Int bszW )
+{
+   vg_assert(bszW != 0);
+   return (bszW < 0) ? bszW : -bszW;
+}
+
+/* Add the in-use attribute to a bszW. */
+static __inline__
+Int mk_inuse_bszW ( Int bszW )
+{
+   vg_assert(bszW != 0);
+   return (bszW < 0) ? -bszW : bszW;
+}
+
+/* Remove the in-use/not-in-use attribute from a bszW, leaving just
+   the size. */
+static __inline__
+Int mk_plain_bszW ( Int bszW )
+{
+   vg_assert(bszW != 0);
+   return (bszW < 0) ? -bszW : bszW;
+}
+
+/* Does this bszW have the in-use attribute ? */
+static __inline__
+Bool is_inuse_bszW ( Int bszW )
+{
+   vg_assert(bszW != 0);
+   return (bszW < 0) ? False : True;
+}
+
+
+/* Given the addr of the first word of a block, return the addr of the
+   last word. */
+static __inline__
+WordL* first_to_last ( WordF* fw )
+{
+   return fw + mk_plain_bszW(fw[0]) - 1;
+}
+
+/* Given the addr of the last word of a block, return the addr of the
+   first word. */
+static __inline__
+WordF* last_to_first ( WordL* lw )
+{
+   return lw - mk_plain_bszW(lw[0]) + 1;
+}
+
+
+/* Given the addr of the first word of a block, return the addr of the
+   first word of its payload. */
+static __inline__
+Word* first_to_payload ( Arena* a, WordF* fw )
+{
+   return & fw[3 + a->rz_szW];
+}
+
+/* Given the addr of the first word of a the payload of a block,
+   return the addr of the first word of the block. */
+static __inline__
+Word* payload_to_first ( Arena* a, WordF* payload )
+{
+   return & payload[- 3 - a->rz_szW];
+}
+
+/* Set and get the lower size field of a block. */
+static __inline__
+void set_bszW_lo ( WordF* fw, Int bszW ) { 
+   fw[0] = bszW; 
+}
+static __inline__
+Int get_bszW_lo ( WordF* fw )
+{
+   return fw[0];
+}
+
+
+/* Set and get the next and previous link fields of a block. */
+static __inline__
+void set_prev_p  ( WordF* fw, Word* prev_p ) { 
+   fw[1] = (Word)prev_p; 
+}
+static __inline__
+void set_next_p  ( WordF* fw, Word* next_p ) { 
+   fw[2] = (Word)next_p; 
+}
+static __inline__
+Word* get_prev_p  ( WordF* fw ) { 
+   return (Word*)(fw[1]);
+}
+static __inline__
+Word* get_next_p  ( WordF* fw ) { 
+   return (Word*)(fw[2]);
+}
+
+
+/* Set and get the upper size field of a block. */
+static __inline__
+void set_bszW_hi ( WordF* fw, Int bszW ) {
+   WordL* lw = first_to_last(fw);
+   vg_assert(lw == fw + mk_plain_bszW(bszW) - 1);
+   lw[0] = bszW;
+}
+static __inline__
+Int get_bszW_hi ( WordF* fw ) {
+   WordL* lw = first_to_last(fw);
+   return lw[0];
+}
+
+/* Get the upper size field of a block, given a pointer to the last
+   word of it. */
+static __inline__
+Int get_bszW_hi_from_last_word ( WordL* lw ) {
+   WordF* fw = last_to_first(lw);
+   return get_bszW_lo(fw);
+}
+
+
+/* Read and write the lower and upper red-zone words of a block. */
+static __inline__
+void set_rz_lo_word ( Arena* a, WordF* fw, Int rz_wordno, Word w )
+{
+   fw[3 + rz_wordno] = w;
+}
+static __inline__
+void set_rz_hi_word ( Arena* a, WordF* fw, Int rz_wordno, Word w )
+{
+   WordL* lw = first_to_last(fw);
+   lw[-1-rz_wordno] = w;
+}
+static __inline__
+Word get_rz_lo_word ( Arena* a, WordF* fw, Int rz_wordno )
+{
+   return fw[3 + rz_wordno];
+}
+static __inline__
+Word get_rz_hi_word ( Arena* a, WordF* fw, Int rz_wordno )
+{
+   WordL* lw = first_to_last(fw);
+   return lw[-1-rz_wordno];
+}
+
+
+/* Return the lower, upper and total overhead in words for a block.
+   These are determined purely by which arena the block lives in. */
+static __inline__
+Int overhead_szW_lo ( Arena* a )
+{
+   return 3 + a->rz_szW;
+}
+static __inline__
+Int overhead_szW_hi ( Arena* a )
+{
+   return 1 + a->rz_szW;
+}
+static __inline__
+Int overhead_szW ( Arena* a )
+{
+   return overhead_szW_lo(a) + overhead_szW_hi(a);
+}
+
+
+/* Convert pointer size in words to block size in words, and back. */
+static __inline__
+Int pszW_to_bszW ( Arena* a, Int pszW )
+{
+   vg_assert(pszW >= 0);
+   return pszW + overhead_szW(a);
+}
+static __inline__
+Int bszW_to_pszW ( Arena* a, Int bszW )
+{
+   Int pszW = bszW - overhead_szW(a);
+   vg_assert(pszW >= 0);
+   return pszW;
+}
+
+/*------------------------------------------------------------*/
+/*--- Functions for working with freelists.                ---*/
+/*------------------------------------------------------------*/
+
+/* Determination of which freelist a block lives on is based on the
+   payload size, not block size, in words. */
+
+/* Convert a payload size in words to a freelist number. */
+
+static
+Int pszW_to_listNo ( Int pszW )
+{
+   vg_assert(pszW >= 0);
+   if (pszW <= 3)   return 0;
+   if (pszW <= 4)   return 1;
+   if (pszW <= 5)   return 2;
+   if (pszW <= 6)   return 3;
+   if (pszW <= 7)   return 4;
+   if (pszW <= 8)   return 5;
+   if (pszW <= 9)   return 6;
+   if (pszW <= 10)  return 7;
+   if (pszW <= 11)  return 8;
+   if (pszW <= 12)  return 9;
+   if (pszW <= 16)  return 10;
+   if (pszW <= 32)  return 11;
+   if (pszW <= 64)  return 12;
+   if (pszW <= 128) return 13;
+   if (pszW <= 256) return 14;
+   return 15;
+}
+
+
+/* What are the minimum and maximum payload sizes for a given list? */
+
+static
+Int listNo_to_pszW_min ( Int listNo )
+{
+   Int pszW = 0;
+   vg_assert(listNo >= 0 && listNo <= VG_N_MALLOC_LISTS);
+   while (pszW_to_listNo(pszW) < listNo) pszW++;
+   return pszW;
+}
+
+static
+Int listNo_to_pszW_max ( Int listNo )
+{
+   vg_assert(listNo >= 0 && listNo <= VG_N_MALLOC_LISTS);
+   if (listNo == VG_N_MALLOC_LISTS-1) {
+      return 999999999;
+   } else {
+      return listNo_to_pszW_min(listNo+1) - 1;
+   }
+}
+
+
+/* A nasty hack to try and reduce fragmentation.  Try and replace
+   a->freelist[lno] with another block on the same list but with a
+   lower address, with the idea of attempting to recycle the same
+   blocks rather than cruise through the address space. */
+
+static 
+void swizzle ( Arena* a, Int lno )
+{
+   UInt* p_best;
+   UInt* pp;
+   UInt* pn;
+   Int   i;
+
+   p_best = a->freelist[lno];
+   if (p_best == NULL) return;
+
+   pn = pp = p_best;
+   for (i = 0; i < 20; i++) {
+      pn = get_next_p(pn);
+      pp = get_prev_p(pp);
+      if (pn < p_best) p_best = pn;
+      if (pp < p_best) p_best = pp;
+   }
+   if (p_best < a->freelist[lno]) {
+#     ifdef DEBUG_MALLOC
+      VG_(printf)("retreat by %d\n", 
+           ((Char*)(a->freelist[lno])) - ((Char*)p_best));
+#     endif
+      a->freelist[lno] = p_best;
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Creating and deleting blocks.                        ---*/
+/*------------------------------------------------------------*/
+
+/* Mark the words at b .. b+bszW-1 as not in use, and add them to the
+   relevant free list. */
+
+static
+void mkFreeBlock ( Arena* a, Word* b, Int bszW, Int b_lno )
+{
+   Int pszW = bszW_to_pszW(a, bszW);
+   vg_assert(pszW >= 0);
+   vg_assert(b_lno == pszW_to_listNo(pszW));
+   /* Set the size fields and indicate not-in-use. */
+   set_bszW_lo(b, mk_free_bszW(bszW));
+   set_bszW_hi(b, mk_free_bszW(bszW));
+
+   /* Add to the relevant list. */
+   if (a->freelist[b_lno] == NULL) {
+      set_prev_p(b, b);
+      set_next_p(b, b);
+      a->freelist[b_lno] = b;
+   } else {
+      Word* b_prev = get_prev_p(a->freelist[b_lno]);
+      Word* b_next = a->freelist[b_lno];
+      set_next_p(b_prev, b);
+      set_prev_p(b_next, b);
+      set_next_p(b, b_next);
+      set_prev_p(b, b_prev);
+   }
+#  ifdef DEBUG_MALLOC
+   (void)blockSane(a,b);
+#  endif
+}
+
+
+/* Mark the words at b .. b+bszW-1 as in use, and set up the block
+   appropriately. */
+static
+void mkInuseBlock ( Arena* a, UInt* b, UInt bszW )
+{
+   Int i;
+   set_bszW_lo(b, mk_inuse_bszW(bszW));
+   set_bszW_hi(b, mk_inuse_bszW(bszW));
+   set_prev_p(b, NULL);
+   set_next_p(b, NULL);
+   if (a->rz_check) {
+      for (i = 0; i < a->rz_szW; i++) {
+         set_rz_lo_word(a, b, i, (UInt)b ^ VG_REDZONE_LO_MASK);
+         set_rz_hi_word(a, b, i, (UInt)b ^ VG_REDZONE_HI_MASK);
+      }
+   }
+#  ifdef DEBUG_MALLOC
+   (void)blockSane(a,b);
+#  endif
+}
+
+
+/* Remove a block from a given list.  Does no sanity checking. */
+static
+void unlinkBlock ( Arena* a, UInt* b, Int listno )
+{
+   vg_assert(listno >= 0 && listno < VG_N_MALLOC_LISTS);
+   if (get_prev_p(b) == b) {
+      /* Only one element in the list; treat it specially. */
+      vg_assert(get_next_p(b) == b);
+      a->freelist[listno] = NULL;
+   } else {
+      UInt* b_prev = get_prev_p(b);
+      UInt* b_next = get_next_p(b);
+      a->freelist[listno] = b_prev;
+      set_next_p(b_prev, b_next);
+      set_prev_p(b_next, b_prev);
+      swizzle ( a, listno );
+   }
+   set_prev_p(b, NULL);
+   set_next_p(b, NULL);
+}
+
+
+/* Split an existing free block into two pieces, and put the fragment
+   (the second one along in memory) onto the relevant free list.
+   req_bszW is the required size of the block which isn't the
+   fragment. */
+static
+void splitChunk ( Arena* a, UInt* b, Int b_listno, UInt req_bszW )
+{
+   Int b_bszW, frag_bszW;
+   b_bszW = mk_plain_bszW(get_bszW_lo(b));
+   vg_assert(req_bszW < b_bszW);
+   frag_bszW = b_bszW - req_bszW;
+   vg_assert(frag_bszW >= overhead_szW(a));
+   /*
+   printf( "split %d into %d and %d\n", 
+                   b_bszW,req_bszW,frag_bszW  );
+   */
+   vg_assert(bszW_to_pszW(a, frag_bszW) > 0);
+   unlinkBlock(a, b, b_listno);
+   mkInuseBlock(a, b, req_bszW);
+   mkFreeBlock(a, &b[req_bszW], frag_bszW, 
+                  pszW_to_listNo(bszW_to_pszW(a, frag_bszW)));
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Sanity-check/debugging machinery.                    ---*/
+/*------------------------------------------------------------*/
+
+/* Do some crude sanity checks on a chunk. */
+static 
+Bool blockSane ( Arena* a, Word* b )
+{
+#  define BLEAT(str) VG_(printf)("blockSane: fail -- %s\n",str)
+   Int i;
+   if (get_bszW_lo(b) != get_bszW_hi(b)) 
+      {BLEAT("sizes");return False;}
+   if (a->rz_check && is_inuse_bszW(get_bszW_lo(b))) {
+      for (i = 0; i < a->rz_szW; i++) {
+         if (get_rz_lo_word(a, b, i) != ((Word)b ^ VG_REDZONE_LO_MASK))
+            {BLEAT("redzone-lo");return False;}
+         if (get_rz_hi_word(a, b, i) != ((Word)b ^ VG_REDZONE_HI_MASK))
+            {BLEAT("redzone-hi");return False;}
+      }      
+   }
+   return True;
+#  undef BLEAT
+}
+
+
+/* Print superblocks (only for debugging). */
+static 
+void ppSuperblocks ( Arena* a )
+{
+   Int i, ch_bszW, blockno;
+   UInt* ch;
+   Superblock* sb = a->sblocks;
+   blockno = 1;
+
+   while (sb) {
+      VG_(printf)( "\n" );
+      VG_(printf)( "superblock %d at %p, sb->n_pl_ws = %d, next = %p\n", 
+                   blockno++, sb, sb->n_payload_words, sb->next );
+      i = 0;
+      while (True) {
+         if (i >= sb->n_payload_words) break;
+         ch     = &sb->payload_words[i];
+         ch_bszW = get_bszW_lo(ch);
+         VG_(printf)( "   block at %d, bszW %d: ", i, mk_plain_bszW(ch_bszW) );
+         VG_(printf)( "%s, ", is_inuse_bszW(ch_bszW) ? "inuse" : "free" );
+         VG_(printf)( "%s\n", blockSane(a,ch) ? "ok" : "BAD" );
+         i += mk_plain_bszW(ch_bszW);
+      }
+      if (i > sb->n_payload_words) 
+         VG_(printf)( "   last block overshoots end of SB\n");
+      sb = sb->next;
+   }
+   VG_(printf)( "end of superblocks\n\n" );
+}
+
+
+/* Sanity check both the superblocks and the chains. */
+void VG_(mallocSanityCheckArena) ( ArenaId aid )
+{
+   Int         i, superblockctr, b_bszW, b_pszW, blockctr_sb, blockctr_li;
+   Int         blockctr_sb_free, listno, list_min_pszW, list_max_pszW;
+   Superblock* sb;
+   Bool        thisFree, lastWasFree;
+   Word*       b;
+   Word*       b_prev;
+   UInt        arena_bytes_on_loan;
+   Arena*      a;
+
+#  define BOMB VG_(panic)("vg_mallocSanityCheckArena")
+
+   a = arenaId_to_ArenaP(aid);
+   
+   /* First, traverse all the superblocks, inspecting the chunks in
+      each. */
+   superblockctr = blockctr_sb = blockctr_sb_free = 0;
+   arena_bytes_on_loan = 0;
+   sb = a->sblocks;
+   while (sb) {
+      lastWasFree = False;
+      superblockctr++;
+      i = 0;
+      while (True) {
+         if (i >= sb->n_payload_words) break;
+         blockctr_sb++;
+         b     = &sb->payload_words[i];
+         b_bszW = get_bszW_lo(b);
+         if (!blockSane(a, b)) {
+            VG_(printf)( "mallocSanityCheck: sb %p, block %d (bszW %d): "
+                         "BAD\n",
+                         sb, i, b_bszW );
+            BOMB;
+         }
+         thisFree = !is_inuse_bszW(b_bszW);
+         if (thisFree && lastWasFree) {
+            VG_(printf)( "mallocSanityCheck: sb %p, block %d (bszW %d): "
+                         "UNMERGED FREES\n",
+                         sb, i, b_bszW );
+            BOMB;
+         }
+         lastWasFree = thisFree;
+         if (thisFree) blockctr_sb_free++;
+         if (!thisFree) 
+            arena_bytes_on_loan += sizeof(Word) * bszW_to_pszW(a, b_bszW);
+         i += mk_plain_bszW(b_bszW);
+      }
+      if (i > sb->n_payload_words) {
+         VG_(printf)( "mallocSanityCheck: sb %p: last block "
+                      "overshoots end\n", sb);
+         BOMB;
+      }
+      sb = sb->next;
+   }
+
+   if (arena_bytes_on_loan != a->bytes_on_loan) {
+            VG_(printf)( 
+                    "mallocSanityCheck: a->bytes_on_loan %d, "
+                    "arena_bytes_on_loan %d: "
+                    "MISMATCH\n", a->bytes_on_loan, arena_bytes_on_loan);
+      ppSuperblocks(a);
+      BOMB;
+   }
+
+   /* Second, traverse each list, checking that the back pointers make
+      sense, counting blocks encountered, and checking that each block
+      is an appropriate size for this list. */
+   blockctr_li = 0;
+   for (listno = 0; listno < VG_N_MALLOC_LISTS; listno++) {
+      list_min_pszW = listNo_to_pszW_min(listno);
+      list_max_pszW = listNo_to_pszW_max(listno);
+      b = a->freelist[listno];
+      if (b == NULL) continue;
+      while (True) {
+         b_prev = b;
+         b = get_next_p(b);
+         if (get_prev_p(b) != b_prev) {
+            VG_(printf)( "mallocSanityCheck: list %d at %p: "
+                         "BAD LINKAGE\n", 
+                         listno, b );
+            BOMB;
+         }
+         b_pszW = bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(b)));
+         if (b_pszW < list_min_pszW || b_pszW > list_max_pszW) {
+            VG_(printf)( 
+               "mallocSanityCheck: list %d at %p: "
+               "WRONG CHAIN SIZE %d (%d, %d)\n", 
+               listno, b, b_pszW, list_min_pszW, list_max_pszW );
+            BOMB;
+         }
+         blockctr_li++;
+         if (b == a->freelist[listno]) break;
+      }
+   }
+
+   if (blockctr_sb_free != blockctr_li) {
+      VG_(printf)( 
+         "mallocSanityCheck: BLOCK COUNT MISMATCH "
+         "(via sbs %d, via lists %d)\n",
+         blockctr_sb_free, blockctr_li );
+      ppSuperblocks(a);
+      BOMB;
+   }
+
+   VG_(message)(Vg_DebugMsg,
+                "mSC [%s]: %2d sbs, %5d tot bs, %4d/%-4d free bs, "
+                "%2d lists, %7d mmap, %7d loan", 
+                a->name,
+                superblockctr,
+                blockctr_sb, blockctr_sb_free, blockctr_li, 
+                VG_N_MALLOC_LISTS, 
+                a->bytes_mmaped, a->bytes_on_loan);   
+#  undef BOMB
+}
+
+
+void VG_(mallocSanityCheckAll) ( void )
+{
+   Int i;
+   for (i = 0; i < VG_N_ARENAS; i++)
+      VG_(mallocSanityCheckArena) ( i );
+}
+
+
+/* Really, this isn't the right place for this.  Nevertheless: find
+   out if an arena is empty -- currently has no bytes on loan.  This
+   is useful for checking for memory leaks (of valgrind, not the
+   client.) 
+*/
+Bool VG_(is_empty_arena) ( ArenaId aid )
+{
+   Arena*      a;
+   Superblock* sb;
+   WordF*      b;
+   Int         b_bszW;
+   ensure_mm_init();
+   a = arenaId_to_ArenaP(aid);
+   for (sb = a->sblocks; sb != NULL; sb = sb->next) {
+      /* If the superblock is empty, it should contain a single free
+         block, of the right size. */
+      b = &(sb->payload_words[0]);
+      b_bszW = get_bszW_lo(b);
+      if (is_inuse_bszW(b_bszW)) return False;
+      if (mk_plain_bszW(b_bszW) != sb->n_payload_words) return False;
+      /* So this block is not in use and is of the right size.  Keep
+         going. */
+   }
+   return True;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Externally-visible functions.                        ---*/
+/*------------------------------------------------------------*/
+
+void* VG_(malloc) ( ArenaId aid, Int req_pszB )
+{
+   Int         req_pszW, req_bszW, frag_bszW, b_bszW, lno;
+   Superblock* new_sb;
+   Word*       b;
+   Arena*      a;
+
+   VGP_PUSHCC(VgpMalloc);
+
+   ensure_mm_init();
+   a = arenaId_to_ArenaP(aid);
+
+   vg_assert(req_pszB >= 0);
+   vg_assert(req_pszB < 0x7FFFFFF0);
+
+   req_pszW = (req_pszB + VKI_BYTES_PER_WORD - 1) / VKI_BYTES_PER_WORD;
+
+   /* Keep gcc -O happy: */
+   b = NULL;
+
+   /* Start searching at this list. */
+   lno = pszW_to_listNo(req_pszW);
+
+   /* This loop finds a list which has a block big enough, or sets
+      req_listno to N_LISTS if no such block exists. */
+   while (True) {
+      if (lno == VG_N_MALLOC_LISTS) break;
+      /* If this list is empty, try the next one. */
+      if (a->freelist[lno] == NULL) {
+         lno++;
+         continue;
+      }
+      /* Scan a->list[lno] to find a big-enough chunk. */
+      b = a->freelist[lno];
+      b_bszW = mk_plain_bszW(get_bszW_lo(b));
+      while (True) {
+         if (bszW_to_pszW(a, b_bszW) >= req_pszW) break;
+         b = get_next_p(b);
+         b_bszW = mk_plain_bszW(get_bszW_lo(b));
+         if (b == a->freelist[lno]) break;
+      }
+      if (bszW_to_pszW(a, b_bszW) >= req_pszW) break;
+      /* No luck?  Try a larger list. */
+      lno++;
+   }
+
+   /* Either lno < VG_N_MALLOC_LISTS and b points to the selected
+      block, or lno == VG_N_MALLOC_LISTS, and we have to allocate a
+      new superblock. */
+
+   if (lno == VG_N_MALLOC_LISTS) {
+      req_bszW = pszW_to_bszW(a, req_pszW);      
+      new_sb = newSuperblock(a, req_bszW);
+      vg_assert(new_sb != NULL);
+      new_sb->next = a->sblocks;
+      a->sblocks = new_sb;
+      b = &(new_sb->payload_words[0]);
+      lno = pszW_to_listNo(bszW_to_pszW(a, new_sb->n_payload_words));
+      mkFreeBlock ( a, b, new_sb->n_payload_words, lno);
+   }
+
+   /* Ok, we can allocate from b, which lives in list req_listno. */
+   vg_assert(b != NULL);
+   vg_assert(lno >= 0 && lno < VG_N_MALLOC_LISTS);
+   vg_assert(a->freelist[lno] != NULL);
+   b_bszW = mk_plain_bszW(get_bszW_lo(b));
+   req_bszW = pszW_to_bszW(a, req_pszW);
+   /* req_bszW is the size of the block we are after.  b_bszW is the
+      size of what we've actually got. */
+   vg_assert(b_bszW >= req_bszW);
+
+   /* Could we split this block and still get a useful fragment?
+      Where "useful" means that the payload size of the frag is at
+      least one word.  */
+   frag_bszW = b_bszW - req_bszW;
+   if (frag_bszW > overhead_szW(a)) {
+      splitChunk(a, b, lno, req_bszW);
+   } else {
+      /* No, mark as in use and use as-is. */
+      unlinkBlock(a, b, lno);
+      /*
+      set_bszW_lo(b, mk_inuse_bszW(b_bszW));
+      set_bszW_hi(b, mk_inuse_bszW(b_bszW));
+      */
+      mkInuseBlock(a, b, b_bszW);
+   }
+   vg_assert(req_bszW <= mk_plain_bszW(get_bszW_lo(b)));
+
+   a->bytes_on_loan 
+      += sizeof(Word) 
+         * bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(b)));
+   if (a->bytes_on_loan > a->bytes_on_loan_max)
+      a->bytes_on_loan_max = a->bytes_on_loan;
+
+#  ifdef DEBUG_MALLOC
+   VG_(mallocSanityCheckArena)(aid);
+#  endif
+
+   VGP_POPCC;
+   return first_to_payload(a, b);
+}
+
+ 
+void VG_(free) ( ArenaId aid, void* ptr )
+{
+   Superblock* sb;
+   UInt*       sb_payl_firstw;
+   UInt*       sb_payl_lastw;
+   UInt*       other;
+   UInt*       ch;
+   Int         ch_bszW, ch_pszW, other_bszW, ch_listno;
+   Arena*      a;
+
+   VGP_PUSHCC(VgpMalloc);
+
+   ensure_mm_init();
+   a = arenaId_to_ArenaP(aid);
+
+   if (ptr == NULL) return;
+
+   ch = payload_to_first(a, ptr);
+
+#  ifdef DEBUG_MALLOC
+   vg_assert(blockSane(a,ch));
+#  endif
+
+   a->bytes_on_loan 
+      -= sizeof(Word) 
+         * bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(ch)));
+
+   sb             = findSb( a, ch );
+   sb_payl_firstw = &(sb->payload_words[0]);
+   sb_payl_lastw  = &(sb->payload_words[sb->n_payload_words-1]);
+
+   /* Put this chunk back on a list somewhere. */
+   ch_bszW    = get_bszW_lo(ch);
+   ch_pszW    = bszW_to_pszW(a, ch_bszW);
+   ch_listno  = pszW_to_listNo(ch_pszW);
+   mkFreeBlock( a, ch, ch_bszW, ch_listno );
+
+   /* See if this block can be merged with the following one. */
+   other = ch + ch_bszW;
+   /* overhead_szW(a) is the smallest possible bszW for this arena.
+      So the nearest possible end to the block beginning at other is
+      other+overhead_szW(a)-1.  Hence the test below. */
+   if (other+overhead_szW(a)-1 <= sb_payl_lastw) {
+      other_bszW = get_bszW_lo(other);
+      if (!is_inuse_bszW(other_bszW)) {
+         /* VG_(printf)( "merge-successor\n"); */
+         other_bszW = mk_plain_bszW(other_bszW);
+#        ifdef DEBUG_MALLOC
+         vg_assert(blockSane(a, other));
+#        endif
+         unlinkBlock( a, ch, ch_listno );
+         unlinkBlock( a, other, pszW_to_listNo(bszW_to_pszW(a,other_bszW)) );
+         ch_bszW += other_bszW; 
+         ch_listno = pszW_to_listNo(bszW_to_pszW(a, ch_bszW));
+         mkFreeBlock( a, ch, ch_bszW, ch_listno );
+      }
+   }
+
+   /* See if this block can be merged with its predecessor. */
+   if (ch-overhead_szW(a) >= sb_payl_firstw) {
+      other_bszW = get_bszW_hi_from_last_word( ch-1 );
+      if (!is_inuse_bszW(other_bszW)) {
+         /* VG_(printf)( "merge-predecessor\n"); */
+         other = last_to_first( ch-1 );
+         other_bszW = mk_plain_bszW(other_bszW);         
+         unlinkBlock( a, ch, ch_listno );
+         unlinkBlock( a, other, pszW_to_listNo(bszW_to_pszW(a, other_bszW)) );
+         ch = other;
+         ch_bszW += other_bszW;
+         ch_listno = pszW_to_listNo(bszW_to_pszW(a, ch_bszW));
+         mkFreeBlock( a, ch, ch_bszW, ch_listno );
+      }
+   }
+
+#  ifdef DEBUG_MALLOC
+   VG_(mallocSanityCheckArena)(aid);
+#  endif
+
+   VGP_POPCC;
+}
+
+
+/*
+   The idea for malloc_aligned() is to allocate a big block, base, and
+   then split it into two parts: frag, which is returned to the the
+   free pool, and align, which is the bit we're really after.  Here's
+   a picture.  L and H denote the block lower and upper overheads, in
+   words.  The details are gruesome.  Note it is slightly complicated
+   because the initial request to generate base may return a bigger
+   block than we asked for, so it is important to distinguish the base
+   request size and the base actual size.
+
+   frag_b                   align_b
+   |                        |
+   |    frag_p              |    align_p
+   |    |                   |    |
+   v    v                   v    v
+
+   +---+                +---+---+               +---+
+   | L |----------------| H | L |---------------| H |
+   +---+                +---+---+               +---+
+
+   ^    ^                        ^
+   |    |                        :
+   |    base_p                   this addr must be aligned
+   |
+   base_b
+
+   .    .               .   .   .               .   .
+   <------ frag_bszW ------->   .               .   .
+   .    <------------- base_pszW_act ----------->   .
+   .    .               .   .   .               .   .
+
+*/
+void* VG_(malloc_aligned) ( ArenaId aid, Int req_alignB, Int req_pszB )
+{
+   Int    req_alignW, req_pszW, base_pszW_req, base_pszW_act, frag_bszW;
+   Word   *base_b, *base_p, *align_p;
+   UInt   saved_bytes_on_loan;
+   Arena* a;
+
+   ensure_mm_init();
+   a = arenaId_to_ArenaP(aid);
+
+   vg_assert(req_pszB >= 0);
+   vg_assert(req_pszB < 0x7FFFFFF0);
+
+   /* Check that the requested alignment seems reasonable; that is, is
+      a power of 2.  There must be a better way to do this.  What is
+      it? */
+   switch (req_alignB) {
+      case 8: case 16: case 32: case 64: case 128: case 256: 
+      case 512: case 1024: case 2048: case 4096: case 8192: 
+      case 16384: case 32768: case 65536: case 131072: 
+      case 1048576: 
+         /* can't be bothered to calculate larger ones */
+         break;
+      default:
+         VG_(printf)("vg_malloc_aligned(%p, %d, %d)\nbad alignment request", 
+                     a, req_pszB, req_alignB );
+         VG_(panic)("vg_malloc_aligned");
+         /*NOTREACHED*/
+   }
+
+   /* Required alignment, in words.  Since it's constrained to be a
+      power of 2 >= word size, no need to align the alignment.  Still,
+      we check. */
+   req_alignW = req_alignB / VKI_BYTES_PER_WORD;
+   vg_assert(req_alignB == req_alignW * VKI_BYTES_PER_WORD);
+
+   /* Required payload size for the aligned chunk. */
+   req_pszW = (req_pszB + VKI_BYTES_PER_WORD - 1) / VKI_BYTES_PER_WORD;
+   
+   /* Payload size to request for the big block that we will split
+      up. */
+   base_pszW_req = req_pszW + overhead_szW(a) + req_alignW;
+
+   /* Payload ptr for the block we are going to split.  Note this
+      changes a->bytes_on_loan; we save and restore it ourselves. */
+   saved_bytes_on_loan = a->bytes_on_loan;
+   base_p = VG_(malloc) ( aid, base_pszW_req * VKI_BYTES_PER_WORD );
+   a->bytes_on_loan = saved_bytes_on_loan;
+
+   /* Block ptr for the block we are going to split. */
+   base_b = payload_to_first ( a, base_p );
+
+   /* Pointer to the payload of the aligned block we are going to
+      return.  This has to be suitably aligned. */
+   align_p = align_upwards ( base_b + 2 * overhead_szW_lo(a) 
+                                    + overhead_szW_hi(a),
+                             req_alignB );
+
+   /* The block size of the fragment we will create.  This must be big
+      enough to actually create a fragment. */
+   frag_bszW = align_p - overhead_szW_lo(a) - base_b;
+   vg_assert(frag_bszW >= overhead_szW(a));
+
+   /* The actual payload size of the block we are going to split. */
+   base_pszW_act = bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(base_b)));
+
+   /* Create the fragment block, and put it back on the relevant free
+      list. */
+   mkFreeBlock ( a, base_b, frag_bszW, 
+                 pszW_to_listNo(bszW_to_pszW(a, frag_bszW)) );
+
+   /* Create the aligned block. */
+   mkInuseBlock ( a,
+                  align_p - overhead_szW_lo(a), 
+                  base_p + base_pszW_act 
+                         + overhead_szW_hi(a) 
+                         - (align_p - overhead_szW_lo(a)) );
+
+   /* Final sanity checks. */
+   vg_assert(( (UInt)align_p % req_alignB) == 0);
+
+   vg_assert(is_inuse_bszW(get_bszW_lo(payload_to_first(a, align_p))));
+
+   vg_assert(req_pszW 
+             <= 
+             bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(
+                payload_to_first(a, align_p))))
+            );
+
+   a->bytes_on_loan 
+      += sizeof(Word)
+         * bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(
+              payload_to_first(a, align_p))));
+   if (a->bytes_on_loan > a->bytes_on_loan_max)
+      a->bytes_on_loan_max = a->bytes_on_loan;
+
+#  ifdef DEBUG_MALLOC
+   VG_(mallocSanityCheckArena)(aid);
+#  endif
+
+   return align_p;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Services layered on top of malloc/free.              ---*/
+/*------------------------------------------------------------*/
+
+void* VG_(calloc) ( ArenaId aid, Int nmemb, Int nbytes )
+{
+   Int    i, size;
+   UChar* p;
+   size = nmemb * nbytes;
+   vg_assert(size > 0);
+   p = VG_(malloc) ( aid, size );
+   for (i = 0; i < size; i++) p[i] = 0;
+   return p;
+}
+
+
+void* VG_(realloc) ( ArenaId aid, void* ptr, Int req_pszB )
+{
+   Arena* a;
+   Int    old_bszW, old_pszW, old_pszB, i;
+   UChar  *p_old, *p_new;
+   UInt*  ch;
+
+   ensure_mm_init();
+   a = arenaId_to_ArenaP(aid);
+
+   vg_assert(req_pszB >= 0);
+   vg_assert(req_pszB < 0x7FFFFFF0);
+
+   ch = payload_to_first(a, ptr);
+   vg_assert(blockSane(a, ch));
+
+   old_bszW = get_bszW_lo(ch);
+   vg_assert(is_inuse_bszW(old_bszW));
+   old_bszW = mk_plain_bszW(old_bszW);
+   old_pszW = bszW_to_pszW(a, old_bszW);
+   old_pszB = old_pszW * VKI_BYTES_PER_WORD;
+
+   if (req_pszB <= old_pszB) return ptr;
+
+   p_new = VG_(malloc) ( aid, req_pszB );
+   p_old = (UChar*)ptr;
+   for (i = 0; i < old_pszB; i++)
+      p_new[i] = p_old[i];
+
+   VG_(free)(aid, p_old);
+   return p_new;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- The original test driver machinery.                  ---*/
+/*------------------------------------------------------------*/
+
+#if 0
+
+#if 1
+#define N_TEST_TRANSACTIONS 100000000
+#define N_TEST_ARR 200000
+#define M_TEST_MALLOC 1000
+#else
+#define N_TEST_TRANSACTIONS 500000
+#define N_TEST_ARR 30000
+#define M_TEST_MALLOC 500
+#endif
+
+
+void* test_arr[N_TEST_ARR];
+
+int main ( int argc, char** argv )
+{
+   Int i, j, k, nbytes, qq;
+   unsigned char* chp;
+   Arena* a = &arena[VG_AR_PRIVATE];
+   srandom(1);
+   for (i = 0; i < N_TEST_ARR; i++)
+      test_arr[i] = NULL;
+
+   for (i = 0; i < N_TEST_TRANSACTIONS; i++) {
+      if (i % 50000 == 0) mallocSanityCheck(a);
+      j = random() % N_TEST_ARR;
+      if (test_arr[j]) {
+         vg_free(a, test_arr[j]);
+         test_arr[j] = NULL;
+      } else {
+         nbytes = 1 + random() % M_TEST_MALLOC;
+         qq = random()%64;
+         if (qq == 32) 
+            nbytes *= 17;
+         else if (qq == 33)
+            nbytes = 0;
+         test_arr[j] 
+           = (i % 17) == 0
+                ? vg_memalign(a, nbytes, 1<< (3+(random()%10)))
+                : vg_malloc( a, nbytes );
+         chp = test_arr[j];
+         for (k = 0; k < nbytes; k++) 
+            chp[k] = (unsigned char)(k + 99);
+      }
+   }
+
+
+   for (i = 0; i < N_TEST_ARR; i++) {
+      if (test_arr[i]) {
+         vg_free(a, test_arr[i]);
+         test_arr[i] = NULL;
+      }
+   }
+   mallocSanityCheck(a);
+
+   fprintf(stderr, "ALL DONE\n");
+
+   show_arena_stats(a);
+   fprintf(stderr, "%d max useful, %d bytes mmap'd (%4.1f%%), %d useful\n",
+           a->bytes_on_loan_max, 
+           a->bytes_mmaped, 
+	   100.0 * (double)a->bytes_on_loan_max / (double)a->bytes_mmaped,
+           a->bytes_on_loan );
+
+   return 0;
+}
+#endif /* 0 */
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                             vg_malloc2.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_memory.c b/vg_memory.c
new file mode 100644
index 000000000..13ae15795
--- /dev/null
+++ b/vg_memory.c
@@ -0,0 +1,2300 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Maintain bitmaps of memory, tracking the accessibility (A)   ---*/
+/*--- and validity (V) status of each byte.                        ---*/
+/*---                                                  vg_memory.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+
+/* Define to debug the mem audit system. */
+/* #define VG_DEBUG_MEMORY */
+
+/* Define to debug the memory-leak-detector. */
+/* #define VG_DEBUG_LEAKCHECK */
+
+/* Define to collect detailed performance info. */
+/* #define VG_PROFILE_MEMORY */
+
+
+/*------------------------------------------------------------*/
+/*--- Low-level support for memory checking.               ---*/
+/*------------------------------------------------------------*/
+
+/* 
+   All reads and writes are checked against a memory map, which
+   records the state of all memory in the process.  The memory map is
+   organised like this:
+
+   The top 16 bits of an address are used to index into a top-level
+   map table, containing 65536 entries.  Each entry is a pointer to a
+   second-level map, which records the accesibililty and validity
+   permissions for the 65536 bytes indexed by the lower 16 bits of the
+   address.  Each byte is represented by nine bits, one indicating
+   accessibility, the other eight validity.  So each second-level map
+   contains 73728 bytes.  This two-level arrangement conveniently
+   divides the 4G address space into 64k lumps, each size 64k bytes.
+
+   All entries in the primary (top-level) map must point to a valid
+   secondary (second-level) map.  Since most of the 4G of address
+   space will not be in use -- ie, not mapped at all -- there is a
+   distinguished secondary map, which indicates `not addressible and
+   not valid' writeable for all bytes.  Entries in the primary map for
+   which the entire 64k is not in use at all point at this
+   distinguished map.
+
+   [...] lots of stuff deleted due to out of date-ness
+
+   As a final optimisation, the alignment and address checks for
+   4-byte loads and stores are combined in a neat way.  The primary
+   map is extended to have 262144 entries (2^18), rather than 2^16.
+   The top 3/4 of these entries are permanently set to the
+   distinguished secondary map.  For a 4-byte load/store, the
+   top-level map is indexed not with (addr >> 16) but instead f(addr),
+   where
+
+    f( XXXX XXXX XXXX XXXX ____ ____ ____ __YZ )
+        = ____ ____ ____ __YZ XXXX XXXX XXXX XXXX  or 
+        = ____ ____ ____ __ZY XXXX XXXX XXXX XXXX
+
+   ie the lowest two bits are placed above the 16 high address bits.
+   If either of these two bits are nonzero, the address is misaligned;
+   this will select a secondary map from the upper 3/4 of the primary
+   map.  Because this is always the distinguished secondary map, a
+   (bogus) address check failure will result.  The failure handling
+   code can then figure out whether this is a genuine addr check
+   failure or whether it is a possibly-legitimate access at a
+   misaligned address.  
+*/
+
+
+/*------------------------------------------------------------*/
+/*--- Crude profiling machinery.                           ---*/
+/*------------------------------------------------------------*/
+
+#ifdef VG_PROFILE_MEMORY
+
+#define N_PROF_EVENTS 120
+
+static UInt event_ctr[N_PROF_EVENTS];
+
+static void init_prof_mem ( void )
+{
+   Int i;
+   for (i = 0; i < N_PROF_EVENTS; i++)
+      event_ctr[i] = 0;
+}
+
+void VG_(done_prof_mem) ( void )
+{
+   Int i;
+   for (i = 0; i < N_PROF_EVENTS; i++) {
+      if ((i % 10) == 0) 
+         VG_(printf)("\n");
+      if (event_ctr[i] > 0)
+         VG_(printf)( "prof mem event %2d: %d\n", i, event_ctr[i] );
+   }
+   VG_(printf)("\n");
+}
+
+#define PROF_EVENT(ev)                                  \
+   do { vg_assert((ev) >= 0 && (ev) < N_PROF_EVENTS);   \
+        event_ctr[ev]++;                                \
+   } while (False);
+
+#else
+
+static void init_prof_mem ( void ) { }
+       void VG_(done_prof_mem) ( void ) { }
+
+#define PROF_EVENT(ev) /* */
+
+#endif
+
+/* Event index.  If just the name of the fn is given, this means the
+   number of calls to the fn.  Otherwise it is the specified event.
+
+   10   alloc_secondary_map
+
+   20   get_abit
+   21   get_vbyte
+   22   set_abit
+   23   set_vbyte
+   24   get_abits4_ALIGNED
+   25   get_vbytes4_ALIGNED
+
+   30   set_address_range_perms
+   31   set_address_range_perms(lower byte loop)
+   32   set_address_range_perms(quadword loop)
+   33   set_address_range_perms(upper byte loop)
+   
+   35   make_noaccess
+   36   make_writable
+   37   make_readable
+
+   40   copy_address_range_perms
+   41   copy_address_range_perms(byte loop)
+   42   check_writable
+   43   check_writable(byte loop)
+   44   check_readable
+   45   check_readable(byte loop)
+   46   check_readable_asciiz
+   47   check_readable_asciiz(byte loop)
+
+   50   make_aligned_word_NOACCESS
+   51   make_aligned_word_WRITABLE
+
+   60   helperc_LOADV4
+   61   helperc_STOREV4
+   62   helperc_LOADV2
+   63   helperc_STOREV2
+   64   helperc_LOADV1
+   65   helperc_STOREV1
+
+   70   rim_rd_V4_SLOWLY
+   71   rim_wr_V4_SLOWLY
+   72   rim_rd_V2_SLOWLY
+   73   rim_wr_V2_SLOWLY
+   74   rim_rd_V1_SLOWLY
+   75   rim_wr_V1_SLOWLY
+
+   80   fpu_read
+   81   fpu_read aligned 4
+   82   fpu_read aligned 8
+   83   fpu_read 2
+   84   fpu_read 10
+
+   85   fpu_write
+   86   fpu_write aligned 4
+   87   fpu_write aligned 8
+   88   fpu_write 2
+   89   fpu_write 10
+
+   90   fpu_read_check_SLOWLY
+   91   fpu_read_check_SLOWLY(byte loop)
+   92   fpu_write_check_SLOWLY
+   93   fpu_write_check_SLOWLY(byte loop)
+
+   100  is_plausible_stack_addr
+   101  handle_esp_assignment
+   102  handle_esp_assignment(-4)
+   103  handle_esp_assignment(+4)
+   104  handle_esp_assignment(+16)
+   105  handle_esp_assignment(-12)
+   106  handle_esp_assignment(+8)
+   107  handle_esp_assignment(-8)
+
+   110  vg_handle_esp_assignment_SLOWLY
+   111  vg_handle_esp_assignment_SLOWLY(normal; move down)
+   112  vg_handle_esp_assignment_SLOWLY(normal; move up)
+   113  vg_handle_esp_assignment_SLOWLY(normal)
+   114  vg_handle_esp_assignment_SLOWLY(>= HUGE_DELTA)
+*/
+
+/*------------------------------------------------------------*/
+/*--- Function declarations.                               ---*/
+/*------------------------------------------------------------*/
+
+/* Set permissions for an address range.  Not speed-critical. */
+void VGM_(make_noaccess) ( Addr a, UInt len );
+void VGM_(make_writable) ( Addr a, UInt len );
+void VGM_(make_readable) ( Addr a, UInt len );
+
+/* Check permissions for an address range.  Not speed-critical. */
+Bool VGM_(check_writable) ( Addr a, UInt len, Addr* bad_addr );
+Bool VGM_(check_readable) ( Addr a, UInt len, Addr* bad_addr );
+Bool VGM_(check_readable_asciiz) ( Addr a, Addr* bad_addr );
+
+static UInt vgm_rd_V4_SLOWLY ( Addr a );
+static UInt vgm_rd_V2_SLOWLY ( Addr a );
+static UInt vgm_rd_V1_SLOWLY ( Addr a );
+static void vgm_wr_V4_SLOWLY ( Addr a, UInt vbytes );
+static void vgm_wr_V2_SLOWLY ( Addr a, UInt vbytes );
+static void vgm_wr_V1_SLOWLY ( Addr a, UInt vbytes );
+static void fpu_read_check_SLOWLY ( Addr addr, Int size );
+static void fpu_write_check_SLOWLY ( Addr addr, Int size );
+
+
+/*------------------------------------------------------------*/
+/*--- Data defns.                                          ---*/
+/*------------------------------------------------------------*/
+
+typedef 
+   struct {
+      UChar abits[8192];
+      UChar vbyte[65536];
+   }
+   SecMap;
+
+/* These two are statically allocated.  Should they be non-public? */
+SecMap* VG_(primary_map)[ /*65536*/ 262144 ];
+static SecMap  vg_distinguished_secondary_map;
+
+#define IS_DISTINGUISHED_SM(smap) \
+   ((smap) == &vg_distinguished_secondary_map)
+
+#define ENSURE_MAPPABLE(addr,caller)                                   \
+   do {                                                                \
+      if (IS_DISTINGUISHED_SM(VG_(primary_map)[(addr) >> 16])) {       \
+         VG_(primary_map)[(addr) >> 16] = alloc_secondary_map(caller); \
+         /* VG_(printf)("new 2map because of %p\n", addr);   */       \
+      }                                                                \
+   } while(0)
+
+#define BITARR_SET(aaa_p,iii_p)                         \
+   do {                                                 \
+      UInt   iii = (UInt)iii_p;                         \
+      UChar* aaa = (UChar*)aaa_p;                       \
+      aaa[iii >> 3] |= (1 << (iii & 7));                \
+   } while (0)
+
+#define BITARR_CLEAR(aaa_p,iii_p)                       \
+   do {                                                 \
+      UInt   iii = (UInt)iii_p;                         \
+      UChar* aaa = (UChar*)aaa_p;                       \
+      aaa[iii >> 3] &= ~(1 << (iii & 7));               \
+   } while (0)
+
+#define BITARR_TEST(aaa_p,iii_p)                        \
+      (0 != (((UChar*)aaa_p)[ ((UInt)iii_p) >> 3 ]      \
+               & (1 << (((UInt)iii_p) & 7))))           \
+
+
+#define VGM_BIT_VALID      0
+#define VGM_BIT_INVALID    1
+
+#define VGM_NIBBLE_VALID   0
+#define VGM_NIBBLE_INVALID 0xF
+
+#define VGM_BYTE_VALID     0
+#define VGM_BYTE_INVALID   0xFF
+
+#define VGM_WORD_VALID     0
+#define VGM_WORD_INVALID   0xFFFFFFFF
+
+#define VGM_EFLAGS_VALID   0xFFFFFFFE
+#define VGM_EFLAGS_INVALID 0xFFFFFFFF
+
+
+#define IS_ALIGNED4_ADDR(aaa_p) (0 == (((UInt)(aaa_p)) & 3))
+
+
+/*------------------------------------------------------------*/
+/*--- Basic bitmap management, reading and writing.        ---*/
+/*------------------------------------------------------------*/
+
+/* Allocate and initialise a secondary map. */
+
+static SecMap* alloc_secondary_map ( __attribute__ ((unused)) 
+                                     Char* caller )
+{
+   SecMap* map;
+   UInt  i;
+   PROF_EVENT(10);
+
+   /* Mark all bytes as invalid access and invalid value. */
+
+   /* It just happens that a SecMap occupies exactly 18 pages --
+      although this isn't important, so the following assert is
+      spurious. */
+   vg_assert(0 == (sizeof(SecMap) % VKI_BYTES_PER_PAGE));
+   map = VG_(get_memory_from_mmap)( sizeof(SecMap) );
+
+   for (i = 0; i < 8192; i++)
+      map->abits[i] = VGM_BYTE_INVALID; /* Invalid address */
+   for (i = 0; i < 65536; i++)
+      map->vbyte[i] = VGM_BYTE_INVALID; /* Invalid Value */
+
+   /* VG_(printf)("ALLOC_2MAP(%s)\n", caller ); */
+   return map;
+}
+
+
+/* Basic reading/writing of the bitmaps, for byte-sized accesses. */
+
+static __inline__ UChar get_abit ( Addr a )
+{
+   SecMap* sm     = VG_(primary_map)[a >> 16];
+   UInt    sm_off = a & 0xFFFF;
+   PROF_EVENT(20);
+   return BITARR_TEST(sm->abits, sm_off) 
+             ? VGM_BIT_INVALID : VGM_BIT_VALID;
+}
+
+static __inline__ UChar get_vbyte ( Addr a )
+{
+   SecMap* sm     = VG_(primary_map)[a >> 16];
+   UInt    sm_off = a & 0xFFFF;
+   PROF_EVENT(21);
+   return sm->vbyte[sm_off];
+}
+
+static __inline__ void set_abit ( Addr a, UChar abit )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   PROF_EVENT(22);
+   ENSURE_MAPPABLE(a, "set_abit");
+   sm     = VG_(primary_map)[a >> 16];
+   sm_off = a & 0xFFFF;
+   if (abit) 
+      BITARR_SET(sm->abits, sm_off);
+   else
+      BITARR_CLEAR(sm->abits, sm_off);
+}
+
+static __inline__ void set_vbyte ( Addr a, UChar vbyte )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   PROF_EVENT(23);
+   ENSURE_MAPPABLE(a, "set_vbyte");
+   sm     = VG_(primary_map)[a >> 16];
+   sm_off = a & 0xFFFF;
+   sm->vbyte[sm_off] = vbyte;
+}
+
+
+/* Reading/writing of the bitmaps, for aligned word-sized accesses. */
+
+static __inline__ UChar get_abits4_ALIGNED ( Addr a )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   UChar   abits8;
+   PROF_EVENT(24);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+#  endif
+   sm     = VG_(primary_map)[a >> 16];
+   sm_off = a & 0xFFFF;
+   abits8 = sm->abits[sm_off >> 3];
+   abits8 >>= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
+   abits8 &= 0x0F;
+   return abits8;
+}
+
+static UInt __inline__ get_vbytes4_ALIGNED ( Addr a )
+{
+   SecMap* sm     = VG_(primary_map)[a >> 16];
+   UInt    sm_off = a & 0xFFFF;
+   PROF_EVENT(25);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+#  endif
+   return ((UInt*)(sm->vbyte))[sm_off >> 2];
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Setting permissions over address ranges.             ---*/
+/*------------------------------------------------------------*/
+
+static void set_address_range_perms ( Addr a, UInt len, 
+                                      UInt example_a_bit,
+                                      UInt example_v_bit )
+{
+   UChar   vbyte, abyte8;
+   UInt    vword4, sm_off;
+   SecMap* sm;
+
+   PROF_EVENT(30);
+
+   if (len == 0)
+      return;
+
+   if (len > 100 * 1000 * 1000) 
+      VG_(message)(Vg_UserMsg, 
+                   "Warning: set address range perms: "
+                   "large range %d, a %d, v %d",
+                   len, example_a_bit, example_v_bit );
+
+   VGP_PUSHCC(VgpSARP);
+
+   /* Requests to change permissions of huge address ranges may
+      indicate bugs in our machinery.  30,000,000 is arbitrary, but so
+      far all legitimate requests have fallen beneath that size. */
+   /* 4 Mar 02: this is just stupid; get rid of it. */
+   /* vg_assert(len < 30000000); */
+
+   /* Check the permissions make sense. */
+   vg_assert(example_a_bit == VGM_BIT_VALID 
+             || example_a_bit == VGM_BIT_INVALID);
+   vg_assert(example_v_bit == VGM_BIT_VALID 
+             || example_v_bit == VGM_BIT_INVALID);
+   if (example_a_bit == VGM_BIT_INVALID)
+      vg_assert(example_v_bit == VGM_BIT_INVALID);
+
+   /* The validity bits to write. */
+   vbyte = example_v_bit==VGM_BIT_VALID 
+              ? VGM_BYTE_VALID : VGM_BYTE_INVALID;
+
+   /* In order that we can charge through the address space at 8
+      bytes/main-loop iteration, make up some perms. */
+   abyte8 = (example_a_bit << 7)
+            | (example_a_bit << 6)
+            | (example_a_bit << 5)
+            | (example_a_bit << 4)
+            | (example_a_bit << 3)
+            | (example_a_bit << 2)
+            | (example_a_bit << 1)
+            | (example_a_bit << 0);
+   vword4 = (vbyte << 24) | (vbyte << 16) | (vbyte << 8) | vbyte;
+
+#  ifdef VG_DEBUG_MEMORY
+   /* Do it ... */
+   while (True) {
+      PROF_EVENT(31);
+      if (len == 0) break;
+      set_abit ( a, example_a_bit );
+      set_vbyte ( a, vbyte );
+      a++;
+      len--;
+   }
+
+#  else
+   /* Slowly do parts preceding 8-byte alignment. */
+   while (True) {
+      PROF_EVENT(31);
+      if (len == 0) break;
+      if ((a % 8) == 0) break;
+      set_abit ( a, example_a_bit );
+      set_vbyte ( a, vbyte );
+      a++;
+      len--;
+   }   
+
+   if (len == 0) {
+      VGP_POPCC;
+      return;
+   }
+   vg_assert((a % 8) == 0 && len > 0);
+
+   /* Once aligned, go fast. */
+   while (True) {
+      PROF_EVENT(32);
+      if (len < 8) break;
+      ENSURE_MAPPABLE(a, "set_address_range_perms(fast)");
+      sm = VG_(primary_map)[a >> 16];
+      sm_off = a & 0xFFFF;
+      sm->abits[sm_off >> 3] = abyte8;
+      ((UInt*)(sm->vbyte))[(sm_off >> 2) + 0] = vword4;
+      ((UInt*)(sm->vbyte))[(sm_off >> 2) + 1] = vword4;
+      a += 8;
+      len -= 8;
+   }
+
+   if (len == 0) {
+      VGP_POPCC;
+      return;
+   }
+   vg_assert((a % 8) == 0 && len > 0 && len < 8);
+
+   /* Finish the upper fragment. */
+   while (True) {
+      PROF_EVENT(33);
+      if (len == 0) break;
+      set_abit ( a, example_a_bit );
+      set_vbyte ( a, vbyte );
+      a++;
+      len--;
+   }   
+#  endif
+
+   /* Check that zero page and highest page have not been written to
+      -- this could happen with buggy syscall wrappers.  Today
+      (2001-04-26) had precisely such a problem with
+      __NR_setitimer. */
+   vg_assert(VG_(first_and_last_secondaries_look_plausible));
+   VGP_POPCC;
+}
+
+
+/* Set permissions for address ranges ... */
+
+void VGM_(make_noaccess) ( Addr a, UInt len )
+{
+   PROF_EVENT(35);
+   set_address_range_perms ( a, len, VGM_BIT_INVALID, VGM_BIT_INVALID );
+}
+
+void VGM_(make_writable) ( Addr a, UInt len )
+{
+   PROF_EVENT(36);
+   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_INVALID );
+}
+
+void VGM_(make_readable) ( Addr a, UInt len )
+{
+   PROF_EVENT(37);
+   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_VALID );
+}
+
+void VGM_(make_readwritable) ( Addr a, UInt len )
+{
+   PROF_EVENT(38);
+   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_VALID );
+}
+
+/* Block-copy permissions (needed for implementing realloc()). */
+
+void VGM_(copy_address_range_perms) ( Addr src, Addr dst, UInt len )
+{
+   UInt i;
+   PROF_EVENT(40);
+   for (i = 0; i < len; i++) {
+      UChar abit  = get_abit ( src+i );
+      UChar vbyte = get_vbyte ( src+i );
+      PROF_EVENT(41);
+      set_abit ( dst+i, abit );
+      set_vbyte ( dst+i, vbyte );
+   }
+}
+
+
+/* Check permissions for address range.  If inadequate permissions
+   exist, *bad_addr is set to the offending address, so the caller can
+   know what it is. */
+
+Bool VGM_(check_writable) ( Addr a, UInt len, Addr* bad_addr )
+{
+   UInt  i;
+   UChar abit;
+   PROF_EVENT(42);
+   for (i = 0; i < len; i++) {
+      PROF_EVENT(43);
+      abit = get_abit(a);
+      if (abit == VGM_BIT_INVALID) {
+         if (bad_addr != NULL) *bad_addr = a;
+         return False;
+      }
+      a++;
+   }
+   return True;
+}
+
+Bool VGM_(check_readable) ( Addr a, UInt len, Addr* bad_addr )
+{
+   UInt  i;
+   UChar abit;
+   UChar vbyte;
+   PROF_EVENT(44);
+   for (i = 0; i < len; i++) {
+      abit  = get_abit(a);
+      vbyte = get_vbyte(a);
+      PROF_EVENT(45);
+      if (abit != VGM_BIT_VALID || vbyte != VGM_BYTE_VALID) {
+         if (bad_addr != NULL) *bad_addr = a;
+         return False;
+      }
+      a++;
+   }
+   return True;
+}
+
+
+/* Check a zero-terminated ascii string.  Tricky -- don't want to
+   examine the actual bytes, to find the end, until we're sure it is
+   safe to do so. */
+
+Bool VGM_(check_readable_asciiz) ( Addr a, Addr* bad_addr )
+{
+   UChar abit;
+   UChar vbyte;
+   PROF_EVENT(46);
+   while (True) {
+      PROF_EVENT(47);
+      abit  = get_abit(a);
+      vbyte = get_vbyte(a);
+      if (abit != VGM_BIT_VALID || vbyte != VGM_BYTE_VALID) {
+         if (bad_addr != NULL) *bad_addr = a;
+         return False;
+      }
+      /* Ok, a is safe to read. */
+      if (* ((UChar*)a) == 0) return True;
+      a++;
+   }
+}
+
+
+/* Setting permissions for aligned words.  This supports fast stack
+   operations. */
+
+static __inline__ void make_aligned_word_NOACCESS ( Addr a )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   UChar   mask;
+   PROF_EVENT(50);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+#  endif
+   ENSURE_MAPPABLE(a, "make_aligned_word_NOACCESS");
+   sm     = VG_(primary_map)[a >> 16];
+   sm_off = a & 0xFFFF;
+   ((UInt*)(sm->vbyte))[sm_off >> 2] = VGM_WORD_INVALID;
+   mask = 0x0F;
+   mask <<= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
+   /* mask now contains 1s where we wish to make address bits
+      invalid (1s). */
+   sm->abits[sm_off >> 3] |= mask;
+}
+
+static __inline__ void make_aligned_word_WRITABLE ( Addr a )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   UChar   mask;
+   PROF_EVENT(51);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+#  endif
+   ENSURE_MAPPABLE(a, "make_aligned_word_WRITABLE");
+   sm     = VG_(primary_map)[a >> 16];
+   sm_off = a & 0xFFFF;
+   ((UInt*)(sm->vbyte))[sm_off >> 2] = VGM_WORD_INVALID;
+   mask = 0x0F;
+   mask <<= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
+   /* mask now contains 1s where we wish to make address bits
+      invalid (0s). */
+   sm->abits[sm_off >> 3] &= ~mask;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Functions called directly from generated code.       ---*/
+/*------------------------------------------------------------*/
+
+static __inline__ UInt rotateRight16 ( UInt x )
+{
+   /* Amazingly, gcc turns this into a single rotate insn. */
+   return (x >> 16) | (x << 16);
+}
+
+
+static __inline__ UInt shiftRight16 ( UInt x )
+{
+   return x >> 16;
+}
+
+
+/* Read/write 1/2/4 sized V bytes, and emit an address error if
+   needed. */
+
+/* VG_(helperc_{LD,ST}V{1,2,4}) handle the common case fast.
+   Under all other circumstances, it defers to the relevant _SLOWLY
+   function, which can handle all situations.
+*/
+
+UInt VG_(helperc_LOADV4) ( Addr a )
+{
+#  ifdef VG_DEBUG_MEMORY
+   return vgm_rd_V4_SLOWLY(a);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x3FFFF;
+   SecMap* sm     = VG_(primary_map)[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   UChar   abits  = sm->abits[a_off];
+   abits >>= (a & 4);
+   abits &= 15;
+   PROF_EVENT(60);
+   if (abits == VGM_NIBBLE_VALID) {
+      /* Handle common case quickly: a is suitably aligned, is mapped,
+         and is addressible. */
+      UInt v_off = a & 0xFFFF;
+      return ((UInt*)(sm->vbyte))[ v_off >> 2 ];
+   } else {
+      /* Slow but general case. */
+      return vgm_rd_V4_SLOWLY(a);
+   }
+#  endif
+}
+
+void VG_(helperc_STOREV4) ( Addr a, UInt vbytes )
+{
+#  ifdef VG_DEBUG_MEMORY
+   vgm_wr_V4_SLOWLY(a, vbytes);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x3FFFF;
+   SecMap* sm     = VG_(primary_map)[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   UChar   abits  = sm->abits[a_off];
+   abits >>= (a & 4);
+   abits &= 15;
+   PROF_EVENT(61);
+   if (abits == VGM_NIBBLE_VALID) {
+      /* Handle common case quickly: a is suitably aligned, is mapped,
+         and is addressible. */
+      UInt v_off = a & 0xFFFF;
+      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = vbytes;
+   } else {
+      /* Slow but general case. */
+      vgm_wr_V4_SLOWLY(a, vbytes);
+   }
+#  endif
+}
+
+UInt VG_(helperc_LOADV2) ( Addr a )
+{
+#  ifdef VG_DEBUG_MEMORY
+   return vgm_rd_V2_SLOWLY(a);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x1FFFF;
+   SecMap* sm     = VG_(primary_map)[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(62);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      UInt v_off = a & 0xFFFF;
+      return 0xFFFF0000 
+             |  
+             (UInt)( ((UShort*)(sm->vbyte))[ v_off >> 1 ] );
+   } else {
+      /* Slow but general case. */
+      return vgm_rd_V2_SLOWLY(a);
+   }
+#  endif
+}
+
+void VG_(helperc_STOREV2) ( Addr a, UInt vbytes )
+{
+#  ifdef VG_DEBUG_MEMORY
+   vgm_wr_V2_SLOWLY(a, vbytes);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x1FFFF;
+   SecMap* sm     = VG_(primary_map)[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(63);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      UInt v_off = a & 0xFFFF;
+      ((UShort*)(sm->vbyte))[ v_off >> 1 ] = vbytes & 0x0000FFFF;
+   } else {
+      /* Slow but general case. */
+      vgm_wr_V2_SLOWLY(a, vbytes);
+   }
+#  endif
+}
+
+UInt VG_(helperc_LOADV1) ( Addr a )
+{
+#  ifdef VG_DEBUG_MEMORY
+   return vgm_rd_V1_SLOWLY(a);
+#  else
+   UInt    sec_no = shiftRight16(a);
+   SecMap* sm     = VG_(primary_map)[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(64);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      UInt v_off = a & 0xFFFF;
+      return 0xFFFFFF00
+             |
+             (UInt)( ((UChar*)(sm->vbyte))[ v_off ] );
+   } else {
+      /* Slow but general case. */
+      return vgm_rd_V1_SLOWLY(a);
+   }
+#  endif
+}
+
+void VG_(helperc_STOREV1) ( Addr a, UInt vbytes )
+{
+#  ifdef VG_DEBUG_MEMORY
+   vgm_wr_V1_SLOWLY(a, vbytes);
+#  else
+   UInt    sec_no = shiftRight16(a);
+   SecMap* sm     = VG_(primary_map)[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(65);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      UInt v_off = a & 0xFFFF;
+      ((UChar*)(sm->vbyte))[ v_off ] = vbytes & 0x000000FF;
+   } else {
+      /* Slow but general case. */
+      vgm_wr_V1_SLOWLY(a, vbytes);
+   }
+#  endif
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Fallback functions to handle cases that the above    ---*/
+/*--- VG_(helperc_{LD,ST}V{1,2,4}) can't manage.           ---*/
+/*------------------------------------------------------------*/
+
+static UInt vgm_rd_V4_SLOWLY ( Addr a )
+{
+   Bool a0ok, a1ok, a2ok, a3ok;
+   UInt vb0, vb1, vb2, vb3;
+
+   PROF_EVENT(70);
+
+   /* First establish independently the addressibility of the 4 bytes
+      involved. */
+   a0ok = get_abit(a+0) == VGM_BIT_VALID;
+   a1ok = get_abit(a+1) == VGM_BIT_VALID;
+   a2ok = get_abit(a+2) == VGM_BIT_VALID;
+   a3ok = get_abit(a+3) == VGM_BIT_VALID;
+
+   /* Also get the validity bytes for the address. */
+   vb0 = (UInt)get_vbyte(a+0);
+   vb1 = (UInt)get_vbyte(a+1);
+   vb2 = (UInt)get_vbyte(a+2);
+   vb3 = (UInt)get_vbyte(a+3);
+
+   /* Now distinguish 3 cases */
+
+   /* Case 1: the address is completely valid, so:
+      - no addressing error
+      - return V bytes as read from memory
+   */
+   if (a0ok && a1ok && a2ok && a3ok) {
+      UInt vw = VGM_WORD_INVALID;
+      vw <<= 8; vw |= vb3;
+      vw <<= 8; vw |= vb2;
+      vw <<= 8; vw |= vb1;
+      vw <<= 8; vw |= vb0;
+      return vw;
+   }
+
+   /* Case 2: the address is completely invalid.  
+      - emit addressing error
+      - return V word indicating validity.  
+      This sounds strange, but if we make loads from invalid addresses 
+      give invalid data, we also risk producing a number of confusing
+      undefined-value errors later, which confuses the fact that the
+      error arose in the first place from an invalid address. 
+   */
+   /* VG_(printf)("%p (%d %d %d %d)\n", a, a0ok, a1ok, a2ok, a3ok); */
+   if (!VG_(clo_partial_loads_ok) 
+       || ((a & 3) != 0)
+       || (!a0ok && !a1ok && !a2ok && !a3ok)) {
+      VG_(record_address_error)( a, 4, False );
+      return (VGM_BYTE_VALID << 24) | (VGM_BYTE_VALID << 16) 
+             | (VGM_BYTE_VALID << 8) | VGM_BYTE_VALID;
+   }
+
+   /* Case 3: the address is partially valid.  
+      - no addressing error
+      - returned V word is invalid where the address is invalid, 
+        and contains V bytes from memory otherwise. 
+      Case 3 is only allowed if VG_(clo_partial_loads_ok) is True
+      (which is the default), and the address is 4-aligned.  
+      If not, Case 2 will have applied.
+   */
+   vg_assert(VG_(clo_partial_loads_ok));
+   {
+      UInt vw = VGM_WORD_INVALID;
+      vw <<= 8; vw |= (a3ok ? vb3 : VGM_BYTE_INVALID);
+      vw <<= 8; vw |= (a2ok ? vb2 : VGM_BYTE_INVALID);
+      vw <<= 8; vw |= (a1ok ? vb1 : VGM_BYTE_INVALID);
+      vw <<= 8; vw |= (a0ok ? vb0 : VGM_BYTE_INVALID);
+      return vw;
+   }
+}
+
+static void vgm_wr_V4_SLOWLY ( Addr a, UInt vbytes )
+{
+   /* Check the address for validity. */
+   Bool aerr = False;
+   PROF_EVENT(71);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+2) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+3) != VGM_BIT_VALID) aerr = True;
+
+   /* Store the V bytes, remembering to do it little-endian-ly. */
+   set_vbyte( a+0, vbytes & 0x000000FF ); vbytes >>= 8;
+   set_vbyte( a+1, vbytes & 0x000000FF ); vbytes >>= 8;
+   set_vbyte( a+2, vbytes & 0x000000FF ); vbytes >>= 8;
+   set_vbyte( a+3, vbytes & 0x000000FF );
+
+   /* If an address error has happened, report it. */
+   if (aerr)
+      VG_(record_address_error)( a, 4, True );
+}
+
+static UInt vgm_rd_V2_SLOWLY ( Addr a )
+{
+   /* Check the address for validity. */
+   UInt vw   = VGM_WORD_INVALID;
+   Bool aerr = False;
+   PROF_EVENT(72);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
+
+   /* Fetch the V bytes, remembering to do it little-endian-ly. */
+   vw <<= 8; vw |= (UInt)get_vbyte(a+1);
+   vw <<= 8; vw |= (UInt)get_vbyte(a+0);
+
+   /* If an address error has happened, report it. */
+   if (aerr) {
+      VG_(record_address_error)( a, 2, False );
+      vw = (VGM_BYTE_INVALID << 24) | (VGM_BYTE_INVALID << 16) 
+           | (VGM_BYTE_VALID << 8) | (VGM_BYTE_VALID);
+   }
+   return vw;   
+}
+
+static void vgm_wr_V2_SLOWLY ( Addr a, UInt vbytes )
+{
+   /* Check the address for validity. */
+   Bool aerr = False;
+   PROF_EVENT(73);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
+
+   /* Store the V bytes, remembering to do it little-endian-ly. */
+   set_vbyte( a+0, vbytes & 0x000000FF ); vbytes >>= 8;
+   set_vbyte( a+1, vbytes & 0x000000FF );
+
+   /* If an address error has happened, report it. */
+   if (aerr)
+      VG_(record_address_error)( a, 2, True );
+}
+
+static UInt vgm_rd_V1_SLOWLY ( Addr a )
+{
+   /* Check the address for validity. */
+   UInt vw   = VGM_WORD_INVALID;
+   Bool aerr = False;
+   PROF_EVENT(74);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+
+   /* Fetch the V byte. */
+   vw <<= 8; vw |= (UInt)get_vbyte(a+0);
+
+   /* If an address error has happened, report it. */
+   if (aerr) {
+      VG_(record_address_error)( a, 1, False );
+      vw = (VGM_BYTE_INVALID << 24) | (VGM_BYTE_INVALID << 16) 
+           | (VGM_BYTE_INVALID << 8) | (VGM_BYTE_VALID);
+   }
+   return vw;   
+}
+
+static void vgm_wr_V1_SLOWLY ( Addr a, UInt vbytes )
+{
+   /* Check the address for validity. */
+   Bool aerr = False;
+   PROF_EVENT(75);
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+
+   /* Store the V bytes, remembering to do it little-endian-ly. */
+   set_vbyte( a+0, vbytes & 0x000000FF );
+
+   /* If an address error has happened, report it. */
+   if (aerr)
+      VG_(record_address_error)( a, 1, True );
+}
+
+
+/* ---------------------------------------------------------------------
+   Called from generated code, or from the assembly helpers.
+   Handlers for value check failures.
+   ------------------------------------------------------------------ */
+
+void VG_(helperc_value_check0_fail) ( void )
+{
+   VG_(record_value_error) ( 0 );
+}
+
+void VG_(helperc_value_check1_fail) ( void )
+{
+   VG_(record_value_error) ( 1 );
+}
+
+void VG_(helperc_value_check2_fail) ( void )
+{
+   VG_(record_value_error) ( 2 );
+}
+
+void VG_(helperc_value_check4_fail) ( void )
+{
+   VG_(record_value_error) ( 4 );
+}
+
+
+/* ---------------------------------------------------------------------
+   FPU load and store checks, called from generated code.
+   ------------------------------------------------------------------ */
+
+void VGM_(fpu_read_check) ( Addr addr, Int size )
+{
+   /* Ensure the read area is both addressible and valid (ie,
+      readable).  If there's an address error, don't report a value
+      error too; but if there isn't an address error, check for a
+      value error. 
+
+      Try to be reasonably fast on the common case; wimp out and defer
+      to fpu_read_check_SLOWLY for everything else.  */
+
+   SecMap* sm;
+   UInt    sm_off, v_off, a_off;
+   Addr    addr4;
+
+   PROF_EVENT(80);
+
+#  ifdef VG_DEBUG_MEMORY
+   fpu_read_check_SLOWLY ( addr, size );
+#  else
+
+   if (size == 4) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow4;
+      PROF_EVENT(81);
+      /* Properly aligned. */
+      sm     = VG_(primary_map)[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow4;
+      /* Properly aligned and addressible. */
+      v_off = addr & 0xFFFF;
+      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
+         goto slow4;
+      /* Properly aligned, addressible and with valid data. */
+      return;
+     slow4:
+      fpu_read_check_SLOWLY ( addr, 4 );
+      return;
+   }
+
+   if (size == 8) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow8;
+      PROF_EVENT(82);
+      /* Properly aligned.  Do it in two halves. */
+      addr4 = addr + 4;
+      /* First half. */
+      sm     = VG_(primary_map)[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* First half properly aligned and addressible. */
+      v_off = addr & 0xFFFF;
+      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
+         goto slow8;
+      /* Second half. */
+      sm     = VG_(primary_map)[addr4 >> 16];
+      sm_off = addr4 & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* Second half properly aligned and addressible. */
+      v_off = addr4 & 0xFFFF;
+      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
+         goto slow8;
+      /* Both halves properly aligned, addressible and with valid
+         data. */
+      return;
+     slow8:
+      fpu_read_check_SLOWLY ( addr, 8 );
+      return;
+   }
+
+   /* Can't be bothered to huff'n'puff to make these (allegedly) rare
+      cases go quickly.  */
+   if (size == 2) {
+      PROF_EVENT(83);
+      fpu_read_check_SLOWLY ( addr, 2 );
+      return;
+   }
+
+   if (size == 10) {
+      PROF_EVENT(84);
+      fpu_read_check_SLOWLY ( addr, 10 );
+      return;
+   }
+
+   VG_(printf)("size is %d\n", size);
+   VG_(panic)("vgm_fpu_read_check: unhandled size");
+#  endif
+}
+
+
+void VGM_(fpu_write_check) ( Addr addr, Int size )
+{
+   /* Ensure the written area is addressible, and moan if otherwise.
+      If it is addressible, make it valid, otherwise invalid. 
+   */
+
+   SecMap* sm;
+   UInt    sm_off, v_off, a_off;
+   Addr    addr4;
+
+   PROF_EVENT(85);
+
+#  ifdef VG_DEBUG_MEMORY
+   fpu_write_check_SLOWLY ( addr, size );
+#  else
+
+   if (size == 4) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow4;
+      PROF_EVENT(86);
+      /* Properly aligned. */
+      sm     = VG_(primary_map)[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow4;
+      /* Properly aligned and addressible.  Make valid. */
+      v_off = addr & 0xFFFF;
+      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
+      return;
+     slow4:
+      fpu_write_check_SLOWLY ( addr, 4 );
+      return;
+   }
+
+   if (size == 8) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow8;
+      PROF_EVENT(87);
+      /* Properly aligned.  Do it in two halves. */
+      addr4 = addr + 4;
+      /* First half. */
+      sm     = VG_(primary_map)[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* First half properly aligned and addressible.  Make valid. */
+      v_off = addr & 0xFFFF;
+      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
+      /* Second half. */
+      sm     = VG_(primary_map)[addr4 >> 16];
+      sm_off = addr4 & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* Second half properly aligned and addressible. */
+      v_off = addr4 & 0xFFFF;
+      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
+      /* Properly aligned, addressible and with valid data. */
+      return;
+     slow8:
+      fpu_write_check_SLOWLY ( addr, 8 );
+      return;
+   }
+
+   /* Can't be bothered to huff'n'puff to make these (allegedly) rare
+      cases go quickly.  */
+   if (size == 2) {
+      PROF_EVENT(88);
+      fpu_write_check_SLOWLY ( addr, 2 );
+      return;
+   }
+
+   if (size == 10) {
+      PROF_EVENT(89);
+      fpu_write_check_SLOWLY ( addr, 10 );
+      return;
+   }
+
+   VG_(printf)("size is %d\n", size);
+   VG_(panic)("vgm_fpu_write_check: unhandled size");
+#  endif
+}
+
+
+/* ---------------------------------------------------------------------
+   Slow, general cases for FPU load and store checks.
+   ------------------------------------------------------------------ */
+
+/* Generic version.  Test for both addr and value errors, but if
+   there's an addr error, don't report a value error even if it
+   exists. */
+
+void fpu_read_check_SLOWLY ( Addr addr, Int size )
+{
+   Int  i;
+   Bool aerr = False;
+   Bool verr = False;
+   PROF_EVENT(90);
+   for (i = 0; i < size; i++) {
+      PROF_EVENT(91);
+      if (get_abit(addr+i) != VGM_BIT_VALID)
+         aerr = True;
+      if (get_vbyte(addr+i) != VGM_BYTE_VALID)
+         verr = True;
+   }
+
+   if (aerr) {
+      VG_(record_address_error)( addr, size, False );
+   } else {
+     if (verr)
+        VG_(record_value_error)( size );
+   }
+}
+
+
+/* Generic version.  Test for addr errors.  Valid addresses are
+   given valid values, and invalid addresses invalid values. */
+
+void fpu_write_check_SLOWLY ( Addr addr, Int size )
+{
+   Int  i;
+   Addr a_here;
+   Bool a_ok;
+   Bool aerr = False;
+   PROF_EVENT(92);
+   for (i = 0; i < size; i++) {
+      PROF_EVENT(93);
+      a_here = addr+i;
+      a_ok = get_abit(a_here) == VGM_BIT_VALID;
+      if (a_ok) {
+	set_vbyte(a_here, VGM_BYTE_VALID);
+      } else {
+	set_vbyte(a_here, VGM_BYTE_INVALID);
+        aerr = True;
+      }
+   }
+   if (aerr) {
+      VG_(record_address_error)( addr, size, True );
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Tracking permissions around %esp changes.            ---*/
+/*------------------------------------------------------------*/
+
+/*
+   The stack
+   ~~~~~~~~~
+   The stack's segment seems to be dynamically extended downwards
+   by the kernel as the stack pointer moves down.  Initially, a
+   1-page (4k) stack is allocated.  When %esp moves below that for
+   the first time, presumably a page fault occurs.  The kernel
+   detects that the faulting address is in the range from %esp upwards
+   to the current valid stack.  It then extends the stack segment
+   downwards for enough to cover the faulting address, and resumes
+   the process (invisibly).  The process is unaware of any of this.
+
+   That means that Valgrind can't spot when the stack segment is
+   being extended.  Fortunately, we want to precisely and continuously
+   update stack permissions around %esp, so we need to spot all
+   writes to %esp anyway.
+
+   The deal is: when %esp is assigned a lower value, the stack is
+   being extended.  Create a secondary maps to fill in any holes
+   between the old stack ptr and this one, if necessary.  Then 
+   mark all bytes in the area just "uncovered" by this %esp change
+   as write-only.
+
+   When %esp goes back up, mark the area receded over as unreadable
+   and unwritable.
+
+   Just to record the %esp boundary conditions somewhere convenient:
+   %esp always points to the lowest live byte in the stack.  All
+   addresses below %esp are not live; those at and above it are.  
+*/
+
+/* Does this address look like something in the program's main 
+   stack ? */
+Bool VG_(is_plausible_stack_addr) ( Addr aa )
+{
+   UInt a = (UInt)aa;
+   PROF_EVENT(100);
+   if (a < VG_STACK_STARTS_AT && 
+       a > VG_STACK_STARTS_AT - VG_PLAUSIBLE_STACK_SIZE)
+      return True;
+   else
+      return False;
+}
+
+
+/* Is this address within some small distance below %ESP?  Used only
+   for the --workaround-gcc296-bugs kludge. */
+Bool VG_(is_just_below_ESP)( Addr aa )
+{
+   UInt esp = VG_(baseBlock)[VGOFF_(m_esp)];
+   if (esp > (UInt)aa
+       && (esp - (UInt)aa) <= VG_GCC296_BUG_STACK_SLOP)
+      return True;
+   else
+      return False;
+}
+
+
+/* Kludgey ... how much does %esp have to change before we reckon that
+   the application is switching stacks ? */
+#define VG_HUGE_DELTA (VG_PLAUSIBLE_STACK_SIZE / 4)
+
+static Addr get_page_base ( Addr a )
+{
+   return a & ~(VKI_BYTES_PER_PAGE-1);
+}
+
+
+static void vg_handle_esp_assignment_SLOWLY ( Addr );
+
+void VGM_(handle_esp_assignment) ( Addr new_espA )
+{
+   UInt old_esp = VG_(baseBlock)[VGOFF_(m_esp)];
+   UInt new_esp = (UInt)new_espA;
+   Int  delta   = ((Int)new_esp) - ((Int)old_esp);
+
+   PROF_EVENT(101);
+
+#  ifndef VG_DEBUG_MEMORY
+
+   if (IS_ALIGNED4_ADDR(old_esp)) {
+
+      /* Deal with the most common cases fast.  These are ordered in
+         the sequence most common first. */
+
+      if (delta == -4) {
+         /* Moving down by 4 and properly aligned.. */
+         PROF_EVENT(102);
+         make_aligned_word_WRITABLE(new_esp);
+         return;
+      }
+
+      if (delta == 4) {
+         /* Moving up by 4 and properly aligned. */
+         PROF_EVENT(103);
+         make_aligned_word_NOACCESS(old_esp);
+         return;
+      }
+
+      if (delta == 16) {
+         /* Also surprisingly common. */
+         PROF_EVENT(104);
+         make_aligned_word_NOACCESS(old_esp);
+         make_aligned_word_NOACCESS(old_esp+4);
+         make_aligned_word_NOACCESS(old_esp+8);
+         make_aligned_word_NOACCESS(old_esp+12);
+         return;
+      }
+
+      if (delta == -12) {
+         PROF_EVENT(105);
+         make_aligned_word_WRITABLE(new_esp);
+         make_aligned_word_WRITABLE(new_esp+4);
+         make_aligned_word_WRITABLE(new_esp+8);
+         return;
+      }
+
+      if (delta == 8) {
+         PROF_EVENT(106);
+         make_aligned_word_NOACCESS(old_esp);
+         make_aligned_word_NOACCESS(old_esp+4);
+         return;
+      }
+
+      if (delta == -8) {
+         PROF_EVENT(107);
+         make_aligned_word_WRITABLE(new_esp);
+         make_aligned_word_WRITABLE(new_esp+4);
+         return;
+      }
+   }
+
+#  endif
+
+   /* The above special cases handle 90% to 95% of all the stack
+      adjustments.  The rest we give to the slow-but-general
+      mechanism. */
+   vg_handle_esp_assignment_SLOWLY ( new_espA );
+}
+
+
+static void vg_handle_esp_assignment_SLOWLY ( Addr new_espA )
+{
+   UInt old_esp = VG_(baseBlock)[VGOFF_(m_esp)];
+   UInt new_esp = (UInt)new_espA;
+   Int  delta   = ((Int)new_esp) - ((Int)old_esp);
+
+   PROF_EVENT(110);
+   if (-(VG_HUGE_DELTA) < delta && delta < VG_HUGE_DELTA) {
+      /* "Ordinary" stack change. */
+      if (new_esp < old_esp) {
+         /* Moving down; the stack is growing. */
+         PROF_EVENT(111);
+         VGM_(make_writable) ( new_esp, old_esp - new_esp );
+         return;
+      }
+      if (new_esp > old_esp) {
+         /* Moving up; the stack is shrinking. */
+         PROF_EVENT(112);
+         VGM_(make_noaccess) ( old_esp, new_esp - old_esp );
+         return;
+      }
+      PROF_EVENT(113);
+      return; /* when old_esp == new_esp */
+   }
+
+   /* %esp has changed by more than HUGE_DELTA.  We take this to mean
+      that the application is switching to a new stack, for whatever
+      reason, and we attempt to initialise the permissions around the
+      new stack in some plausible way.  All pretty kludgey; needed to
+      make netscape-4.07 run without generating thousands of error
+      contexts.
+
+      If we appear to be switching back to the main stack, don't mess
+      with the permissions in the area at and above the stack ptr.
+      Otherwise, we're switching to an alternative stack; make the
+      area above %esp readable -- this doesn't seem right -- the right
+      thing to do would be to make it writable -- but is needed to
+      avoid huge numbers of errs in netscape.  To be investigated. */
+
+   { Addr invalid_down_to = get_page_base(new_esp) 
+                            - 0 * VKI_BYTES_PER_PAGE;
+     Addr valid_up_to     = get_page_base(new_esp) + VKI_BYTES_PER_PAGE
+                            + 0 * VKI_BYTES_PER_PAGE;
+     PROF_EVENT(114);
+     if (VG_(clo_verbosity) > 1)
+        VG_(message)(Vg_UserMsg, "Warning: client switching stacks?  "
+                                 "%%esp: %p --> %p",
+                                  old_esp, new_esp);
+     /* VG_(printf)("na %p,   %%esp %p,   wr %p\n",
+                    invalid_down_to, new_esp, valid_up_to ); */
+     VGM_(make_noaccess) ( invalid_down_to, new_esp - invalid_down_to );
+     if (!VG_(is_plausible_stack_addr)(new_esp)) {
+        VGM_(make_readable) ( new_esp, valid_up_to - new_esp );
+     }
+   }
+}
+
+
+/*--------------------------------------------------------------*/
+/*--- Initialise the memory audit system on program startup. ---*/
+/*--------------------------------------------------------------*/
+
+/* Handle one entry derived from /proc/self/maps. */
+
+static
+void init_memory_audit_callback ( 
+        Addr start, UInt size, 
+        Char rr, Char ww, Char xx, 
+        UInt foffset, UChar* filename )
+{
+   UChar example_a_bit;
+   UChar example_v_bit;
+   UInt  r_esp;
+   Bool  is_stack_segment;
+
+   /* Sanity check ... if this is the executable's text segment,
+      ensure it is loaded where we think it ought to be.  Any file
+      name which doesn't contain ".so" is assumed to be the
+      executable. */
+   if (filename != NULL
+       && xx == 'x'
+       && VG_(strstr(filename, ".so")) == NULL
+      ) {
+      /* We assume this is the executable. */
+      if (start != VG_ASSUMED_EXE_BASE) {
+         VG_(message)(Vg_UserMsg,
+                      "FATAL: executable base addr not as assumed.");
+         VG_(message)(Vg_UserMsg, "name %s, actual %p, assumed %p.",
+                      filename, start, VG_ASSUMED_EXE_BASE);
+         VG_(panic)("VG_ASSUMED_EXE_BASE doesn't match reality");
+      }
+   }
+    
+   if (0)
+      VG_(message)(Vg_DebugMsg, 
+                   "initial map %8x-%8x %c%c%c? %8x (%d) (%s)",
+                   start,start+size,rr,ww,xx,foffset,
+                   size, filename?filename:(UChar*)"NULL");
+
+   r_esp = VG_(baseBlock)[VGOFF_(m_esp)];
+   is_stack_segment = start <= r_esp && r_esp < start+size;
+
+   /* Figure out the segment's permissions.
+
+      All segments are addressible -- since a process can read its
+      own text segment.
+
+      A read-but-not-write segment presumably contains initialised
+      data, so is all valid.  Read-write segments presumably contains
+      uninitialised data, so is all invalid.  */
+
+   /* ToDo: make this less bogus. */
+   if (rr != 'r' && xx != 'x' && ww != 'w') {
+      /* Very bogus; this path never gets taken. */
+      /* A no, V no */
+      example_a_bit = VGM_BIT_INVALID;
+      example_v_bit = VGM_BIT_INVALID;
+   } else {
+      /* A yes, V yes */
+      example_a_bit = VGM_BIT_VALID;
+      example_v_bit = VGM_BIT_VALID;
+      /* Causes a lot of errs for unknown reasons. 
+         if (filename is valgrind.so 
+               [careful about end conditions on filename]) {
+            example_a_bit = VGM_BIT_INVALID;
+            example_v_bit = VGM_BIT_INVALID;
+         }
+      */
+   }
+
+   set_address_range_perms ( start, size, 
+                             example_a_bit, example_v_bit );
+
+   if (is_stack_segment) {
+      /* This is the stack segment.  Mark all below %esp as
+         noaccess. */
+      if (0)
+         VG_(message)(Vg_DebugMsg, 
+                      "invalidating stack area: %x .. %x",
+                      start,r_esp);
+      VGM_(make_noaccess)( start, r_esp-start );
+   }
+}
+
+
+
+/* ONLY HERE for sbrk() */
+#include <unistd.h>
+
+/* Initialise the memory audit system. */
+void VGM_(init_memory_audit) ( void )
+{
+   Int i;
+
+   init_prof_mem();
+
+   for (i = 0; i < 8192; i++)
+      vg_distinguished_secondary_map.abits[i] 
+         = VGM_BYTE_INVALID; /* Invalid address */
+   for (i = 0; i < 65536; i++)
+      vg_distinguished_secondary_map.vbyte[i] 
+         = VGM_BYTE_INVALID; /* Invalid Value */
+
+   /* These entries gradually get overwritten as the used address
+      space expands. */
+   for (i = 0; i < 65536; i++)
+      VG_(primary_map)[i] = &vg_distinguished_secondary_map;
+   /* These ones should never change; it's a bug in Valgrind if they
+      do. */
+   for (i = 65536; i < 262144; i++)
+      VG_(primary_map)[i] = &vg_distinguished_secondary_map;
+
+   /* Read the initial memory mapping from the /proc filesystem, and
+      set up our own maps accordingly. */
+   VG_(read_procselfmaps) ( init_memory_audit_callback );
+
+   /* Last but not least, set up the shadow regs with reasonable (sic)
+      values.  All regs are claimed to have valid values.
+   */
+   VG_(baseBlock)[VGOFF_(sh_esp)]    = VGM_WORD_VALID;
+   VG_(baseBlock)[VGOFF_(sh_ebp)]    = VGM_WORD_VALID;
+   VG_(baseBlock)[VGOFF_(sh_eax)]    = VGM_WORD_VALID;
+   VG_(baseBlock)[VGOFF_(sh_ecx)]    = VGM_WORD_VALID;
+   VG_(baseBlock)[VGOFF_(sh_edx)]    = VGM_WORD_VALID;
+   VG_(baseBlock)[VGOFF_(sh_ebx)]    = VGM_WORD_VALID;
+   VG_(baseBlock)[VGOFF_(sh_esi)]    = VGM_WORD_VALID;
+   VG_(baseBlock)[VGOFF_(sh_edi)]    = VGM_WORD_VALID;
+   VG_(baseBlock)[VGOFF_(sh_eflags)] = VGM_EFLAGS_VALID;
+
+   /* Record the end of the data segment, so that vg_syscall_mem.c
+      can make sense of calls to brk(). 
+   */
+   VGM_(curr_dataseg_end) = (Addr)sbrk(0);
+   if (VGM_(curr_dataseg_end) == (Addr)(-1))
+      VG_(panic)("vgm_init_memory_audit: can't determine data-seg end");
+
+   if (0)
+      VG_(printf)("DS END is %p\n", VGM_(curr_dataseg_end));
+
+   /* Read the list of errors to suppress.  This should be found in
+      the file specified by vg_clo_suppressions. */
+   VG_(load_suppressions)();
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Low-level address-space scanning, for the leak       ---*/
+/*--- detector.                                            ---*/
+/*------------------------------------------------------------*/
+
+static 
+jmp_buf memscan_jmpbuf;
+
+static
+void vg_scan_all_valid_memory_sighandler ( Int sigNo )
+{
+   __builtin_longjmp(memscan_jmpbuf, 1);
+}
+
+UInt VG_(scan_all_valid_memory) ( void (*notify_word)( Addr, UInt ) )
+{
+   /* All volatile, because some gccs seem paranoid about longjmp(). */
+   volatile UInt res, numPages, page, vbytes, primaryMapNo, nWordsNotified;
+   volatile Addr pageBase, addr;
+   volatile SecMap* sm;
+   volatile UChar abits;
+   volatile UInt page_first_word;
+
+   vki_ksigaction sigbus_saved;
+   vki_ksigaction sigbus_new;
+   vki_ksigaction sigsegv_saved;
+   vki_ksigaction sigsegv_new;
+   vki_ksigset_t  blockmask_saved;
+   vki_ksigset_t  unblockmask_new;
+
+   /* Temporarily install a new sigsegv and sigbus handler, and make
+      sure SIGBUS, SIGSEGV and SIGTERM are unblocked.  (Perhaps the
+      first two can never be blocked anyway?)  */
+
+   sigbus_new.ksa_handler = vg_scan_all_valid_memory_sighandler;
+   sigbus_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
+   sigbus_new.ksa_restorer = NULL;
+   res = VG_(ksigemptyset)( &sigbus_new.ksa_mask );
+   vg_assert(res == 0);
+
+   sigsegv_new.ksa_handler = vg_scan_all_valid_memory_sighandler;
+   sigsegv_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
+   sigsegv_new.ksa_restorer = NULL;
+   res = VG_(ksigemptyset)( &sigsegv_new.ksa_mask );
+   vg_assert(res == 0+0);
+
+   res =  VG_(ksigemptyset)( &unblockmask_new );
+   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGBUS );
+   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGSEGV );
+   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGTERM );
+   vg_assert(res == 0+0+0);
+
+   res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_new, &sigbus_saved );
+   vg_assert(res == 0+0+0+0);
+
+   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_new, &sigsegv_saved );
+   vg_assert(res == 0+0+0+0+0);
+
+   res = VG_(ksigprocmask)( VKI_SIG_UNBLOCK, &unblockmask_new, &blockmask_saved );
+   vg_assert(res == 0+0+0+0+0+0);
+
+   /* The signal handlers are installed.  Actually do the memory scan. */
+   numPages = 1 << (32-VKI_BYTES_PER_PAGE_BITS);
+   vg_assert(numPages == 1048576);
+   vg_assert(4096 == (1 << VKI_BYTES_PER_PAGE_BITS));
+
+   nWordsNotified = 0;
+
+   for (page = 0; page < numPages; page++) {
+      pageBase = page << VKI_BYTES_PER_PAGE_BITS;
+      primaryMapNo = pageBase >> 16;
+      sm = VG_(primary_map)[primaryMapNo];
+      if (IS_DISTINGUISHED_SM(sm)) continue;
+      if (__builtin_setjmp(memscan_jmpbuf) == 0) {
+         /* try this ... */
+         page_first_word = * (volatile UInt*)pageBase;
+         /* we get here if we didn't get a fault */
+         /* Scan the page */
+         for (addr = pageBase; addr < pageBase+VKI_BYTES_PER_PAGE; addr += 4) {
+            abits  = get_abits4_ALIGNED(addr);
+            vbytes = get_vbytes4_ALIGNED(addr);
+            if (abits == VGM_NIBBLE_VALID 
+                && vbytes == VGM_WORD_VALID) {
+               nWordsNotified++;
+               notify_word ( addr, *(UInt*)addr );
+	    }
+         }
+      } else {
+         /* We get here if reading the first word of the page caused a
+            fault, which in turn caused the signal handler to longjmp.
+            Ignore this page. */
+         if (0)
+         VG_(printf)(
+            "vg_scan_all_valid_memory_sighandler: ignoring page at %p\n",
+            pageBase 
+         );
+      }
+   }
+
+   /* Restore signal state to whatever it was before. */
+   res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_saved, NULL );
+   vg_assert(res == 0 +0);
+
+   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_saved, NULL );
+   vg_assert(res == 0 +0 +0);
+
+   res = VG_(ksigprocmask)( VKI_SIG_SETMASK, &blockmask_saved, NULL );
+   vg_assert(res == 0 +0 +0 +0);
+
+   return nWordsNotified;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
+/*------------------------------------------------------------*/
+
+/* A block is either 
+   -- Proper-ly reached; a pointer to its start has been found
+   -- Interior-ly reached; only an interior pointer to it has been found
+   -- Unreached; so far, no pointers to any part of it have been found. 
+*/
+typedef 
+   enum { Unreached, Interior, Proper } 
+   Reachedness;
+
+/* A block record, used for generating err msgs. */
+typedef
+   struct _LossRecord {
+      struct _LossRecord* next;
+      /* Where these lost blocks were allocated. */
+      ExeContext*  allocated_at;
+      /* Their reachability. */
+      Reachedness  loss_mode;
+      /* Number of blocks and total # bytes involved. */
+      UInt         total_bytes;
+      UInt         num_blocks;
+   }
+   LossRecord;
+
+
+/* Find the i such that ptr points at or inside the block described by
+   shadows[i].  Return -1 if none found.  This assumes that shadows[]
+   has been sorted on the ->data field. */
+
+#ifdef VG_DEBUG_LEAKCHECK
+/* Used to sanity-check the fast binary-search mechanism. */
+static Int find_shadow_for_OLD ( Addr          ptr, 
+                                 ShadowChunk** shadows,
+                                 Int           n_shadows )
+
+{
+   Int  i;
+   Addr a_lo, a_hi;
+   PROF_EVENT(70);
+   for (i = 0; i < n_shadows; i++) {
+      PROF_EVENT(71);
+      a_lo = shadows[i]->data;
+      a_hi = ((Addr)shadows[i]->data) + shadows[i]->size - 1;
+      if (a_lo <= ptr && ptr <= a_hi)
+         return i;
+   }
+   return -1;
+}
+#endif
+
+
+static Int find_shadow_for ( Addr          ptr, 
+                             ShadowChunk** shadows,
+                             Int           n_shadows )
+{
+   Addr a_mid_lo, a_mid_hi;
+   Int lo, mid, hi, retVal;
+   PROF_EVENT(70);
+   /* VG_(printf)("find shadow for %p = ", ptr); */
+   retVal = -1;
+   lo = 0;
+   hi = n_shadows-1;
+   while (True) {
+      PROF_EVENT(71);
+
+      /* invariant: current unsearched space is from lo to hi,
+         inclusive. */
+      if (lo > hi) break; /* not found */
+
+      mid      = (lo + hi) / 2;
+      a_mid_lo = shadows[mid]->data;
+      a_mid_hi = ((Addr)shadows[mid]->data) + shadows[mid]->size - 1;
+
+      if (ptr < a_mid_lo) {
+         hi = mid-1;
+         continue;
+      } 
+      if (ptr > a_mid_hi) {
+         lo = mid+1;
+         continue;
+      }
+      vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
+      retVal = mid;
+      break;
+   }
+
+#  ifdef VG_DEBUG_LEAKCHECK
+   vg_assert(retVal == find_shadow_for_OLD ( ptr, shadows, n_shadows ));
+#  endif
+   /* VG_(printf)("%d\n", retVal); */
+   return retVal;
+}
+
+
+
+static void sort_malloc_shadows ( ShadowChunk** shadows, UInt n_shadows )
+{
+   Int   incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+                      9841, 29524, 88573, 265720,
+                      797161, 2391484 };
+   Int          lo = 0;
+   Int          hi = n_shadows-1;
+   Int          i, j, h, bigN, hp;
+   ShadowChunk* v;
+
+   PROF_EVENT(72);
+   bigN = hi - lo + 1; if (bigN < 2) return;
+   hp = 0; while (incs[hp] < bigN) hp++; hp--;
+
+   for (; hp >= 0; hp--) {
+      PROF_EVENT(73);
+      h = incs[hp];
+      i = lo + h;
+      while (1) {
+         PROF_EVENT(74);
+         if (i > hi) break;
+         v = shadows[i];
+         j = i;
+         while (shadows[j-h]->data > v->data) {
+            PROF_EVENT(75);
+            shadows[j] = shadows[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         shadows[j] = v;
+         i++;
+      }
+   }
+}
+
+/* Globals, for the callback used by VG_(detect_memory_leaks). */
+
+static ShadowChunk** vglc_shadows;
+static Int           vglc_n_shadows;
+static Reachedness*  vglc_reachedness;
+static Addr          vglc_min_mallocd_addr;
+static Addr          vglc_max_mallocd_addr;
+
+static 
+void vg_detect_memory_leaks_notify_addr ( Addr a, UInt word_at_a )
+{
+   Int sh_no;
+   Addr ptr = (Addr)word_at_a;
+   if (ptr >= vglc_min_mallocd_addr && ptr <= vglc_max_mallocd_addr) {
+      /* Might be legitimate; we'll have to investigate further. */
+      sh_no = find_shadow_for ( ptr, vglc_shadows, vglc_n_shadows );
+      if (sh_no != -1) {
+         /* Found a block at/into which ptr points. */
+         vg_assert(sh_no >= 0 && sh_no < vglc_n_shadows);
+         vg_assert(ptr < vglc_shadows[sh_no]->data 
+                         + vglc_shadows[sh_no]->size);
+         /* Decide whether Proper-ly or Interior-ly reached. */
+         if (ptr == vglc_shadows[sh_no]->data) {
+            vglc_reachedness[sh_no] = Proper;
+         } else {
+            if (vglc_reachedness[sh_no] == Unreached)
+               vglc_reachedness[sh_no] = Interior;
+         }
+      }
+   }
+}
+
+
+void VG_(detect_memory_leaks) ( void )
+{
+   Int    i;
+   Int    blocks_leaked, bytes_leaked;
+   Int    blocks_dubious, bytes_dubious;
+   Int    blocks_reachable, bytes_reachable;
+   Int    n_lossrecords;
+   UInt   bytes_notified;
+   
+   LossRecord*  errlist;
+   LossRecord*  p;
+
+   Bool (*ec_comparer_fn) ( ExeContext*, ExeContext* );
+   PROF_EVENT(76);
+   vg_assert(VG_(clo_instrument));
+
+   /* Decide how closely we want to match ExeContexts in leak
+      records. */
+   switch (VG_(clo_leak_resolution)) {
+      case 2: 
+         ec_comparer_fn = VG_(eq_ExeContext_top2); 
+         break;
+      case 4: 
+         ec_comparer_fn = VG_(eq_ExeContext_top4); 
+         break;
+      case VG_DEEPEST_BACKTRACE: 
+         ec_comparer_fn = VG_(eq_ExeContext_all); 
+         break;
+      default: 
+         VG_(panic)("VG_(detect_memory_leaks): "
+                    "bad VG_(clo_leak_resolution)");
+         break;
+   }
+
+   /* vg_get_malloc_shadows allocates storage for shadows */
+   vglc_shadows = VG_(get_malloc_shadows)( &vglc_n_shadows );
+   if (vglc_n_shadows == 0) {
+      vg_assert(vglc_shadows == NULL);
+      VG_(message)(Vg_UserMsg, 
+                   "No malloc'd blocks -- no leaks are possible.\n");
+      return;
+   }
+
+   VG_(message)(Vg_UserMsg, 
+                "searching for pointers to %d not-freed blocks.", 
+                vglc_n_shadows );
+   sort_malloc_shadows ( vglc_shadows, vglc_n_shadows );
+
+   /* Sanity check; assert that the blocks are now in order and that
+      they don't overlap. */
+   for (i = 0; i < vglc_n_shadows-1; i++) {
+      vg_assert( ((Addr)vglc_shadows[i]->data)
+                 < ((Addr)vglc_shadows[i+1]->data) );
+      vg_assert( ((Addr)vglc_shadows[i]->data) + vglc_shadows[i]->size
+                 < ((Addr)vglc_shadows[i+1]->data) );
+   }
+
+   vglc_min_mallocd_addr = ((Addr)vglc_shadows[0]->data);
+   vglc_max_mallocd_addr = ((Addr)vglc_shadows[vglc_n_shadows-1]->data)
+                         + vglc_shadows[vglc_n_shadows-1]->size - 1;
+
+   vglc_reachedness 
+      = VG_(malloc)( VG_AR_PRIVATE, vglc_n_shadows * sizeof(Reachedness) );
+   for (i = 0; i < vglc_n_shadows; i++)
+      vglc_reachedness[i] = Unreached;
+
+   /* Do the scan of memory. */
+   bytes_notified
+       = VG_(scan_all_valid_memory)( &vg_detect_memory_leaks_notify_addr )
+         * VKI_BYTES_PER_WORD;
+
+   VG_(message)(Vg_UserMsg, "checked %d bytes.", bytes_notified);
+
+   blocks_leaked    = bytes_leaked    = 0;
+   blocks_dubious   = bytes_dubious   = 0;
+   blocks_reachable = bytes_reachable = 0;
+
+   for (i = 0; i < vglc_n_shadows; i++) {
+      if (vglc_reachedness[i] == Unreached) {
+         blocks_leaked++;
+         bytes_leaked += vglc_shadows[i]->size;
+      }
+      else if (vglc_reachedness[i] == Interior) {
+         blocks_dubious++;
+         bytes_dubious += vglc_shadows[i]->size;
+      }
+      else if (vglc_reachedness[i] == Proper) {
+         blocks_reachable++;
+         bytes_reachable += vglc_shadows[i]->size;
+      }
+   }
+
+   VG_(message)(Vg_UserMsg, "");
+   VG_(message)(Vg_UserMsg, "definitely lost: %d bytes in %d blocks.", 
+                            bytes_leaked, blocks_leaked );
+   VG_(message)(Vg_UserMsg, "possibly lost:   %d bytes in %d blocks.", 
+                            bytes_dubious, blocks_dubious );
+   VG_(message)(Vg_UserMsg, "still reachable: %d bytes in %d blocks.", 
+                            bytes_reachable, blocks_reachable );
+
+
+   /* Common up the lost blocks so we can print sensible error
+      messages. */
+
+   n_lossrecords = 0;
+   errlist       = NULL;
+   for (i = 0; i < vglc_n_shadows; i++) {
+      for (p = errlist; p != NULL; p = p->next) {
+         if (p->loss_mode == vglc_reachedness[i]
+             && ec_comparer_fn (
+                   p->allocated_at, 
+                   vglc_shadows[i]->where) ) {
+            break;
+	 }
+      }
+      if (p != NULL) {
+         p->num_blocks  ++;
+         p->total_bytes += vglc_shadows[i]->size;
+      } else {
+         n_lossrecords ++;
+         p = VG_(malloc)(VG_AR_PRIVATE, sizeof(LossRecord));
+         p->loss_mode    = vglc_reachedness[i];
+         p->allocated_at = vglc_shadows[i]->where;
+         p->total_bytes  = vglc_shadows[i]->size;
+         p->num_blocks   = 1;
+         p->next         = errlist;
+         errlist         = p;
+      }
+   }
+   
+   for (i = 0; i < n_lossrecords; i++) {
+      LossRecord* p_min = NULL;
+      UInt        n_min = 0xFFFFFFFF;
+      for (p = errlist; p != NULL; p = p->next) {
+         if (p->num_blocks > 0 && p->total_bytes < n_min) {
+            n_min = p->total_bytes;
+            p_min = p;
+         }
+      }
+      vg_assert(p_min != NULL);
+
+      if ( (!VG_(clo_show_reachable)) && p_min->loss_mode == Proper) {
+         p_min->num_blocks = 0;
+         continue;
+      }
+
+      VG_(message)(Vg_UserMsg, "");
+      VG_(message)(
+         Vg_UserMsg,
+         "%d bytes in %d blocks are %s in loss record %d of %d",
+         p_min->total_bytes, p_min->num_blocks,
+         p_min->loss_mode==Unreached ? "definitely lost" :
+            (p_min->loss_mode==Interior ? "possibly lost"
+                                        : "still reachable"),
+         i+1, n_lossrecords
+      );
+      VG_(pp_ExeContext)(p_min->allocated_at);
+      p_min->num_blocks = 0;
+   }
+
+   VG_(message)(Vg_UserMsg, "");
+   VG_(message)(Vg_UserMsg, "LEAK SUMMARY:");
+   VG_(message)(Vg_UserMsg, "   possibly lost:   %d bytes in %d blocks.", 
+                            bytes_dubious, blocks_dubious );
+   VG_(message)(Vg_UserMsg, "   definitely lost: %d bytes in %d blocks.", 
+                            bytes_leaked, blocks_leaked );
+   VG_(message)(Vg_UserMsg, "   still reachable: %d bytes in %d blocks.", 
+                            bytes_reachable, blocks_reachable );
+   if (!VG_(clo_show_reachable)) {
+      VG_(message)(Vg_UserMsg, 
+         "Reachable blocks (those to which a pointer was found) are not shown.");
+      VG_(message)(Vg_UserMsg, 
+         "To see them, rerun with: --show-reachable=yes");
+   }
+   VG_(message)(Vg_UserMsg, "");
+
+   VG_(free) ( VG_AR_PRIVATE, vglc_shadows );
+   VG_(free) ( VG_AR_PRIVATE, vglc_reachedness );
+}
+
+
+/* ---------------------------------------------------------------------
+   Sanity check machinery (permanently engaged).
+   ------------------------------------------------------------------ */
+
+/* Check that nobody has spuriously claimed that the first or last 16
+   pages (64 KB) of address space have become accessible.  Failure of
+   the following do not per se indicate an internal consistency
+   problem, but they are so likely to that we really want to know
+   about it if so. */
+
+Bool VG_(first_and_last_secondaries_look_plausible) ( void )
+{
+   if (IS_DISTINGUISHED_SM(VG_(primary_map)[0])
+       && IS_DISTINGUISHED_SM(VG_(primary_map)[65535])) {
+      return True;
+   } else {
+      return False;
+   }
+}
+
+
+/* A fast sanity check -- suitable for calling circa once per
+   millisecond. */
+
+void VG_(do_sanity_checks) ( Bool force_expensive )
+{
+   Int    i;
+   Bool   do_expensive_checks;
+
+   if (VG_(sanity_level) < 1) return;
+
+   /* --- First do all the tests that we can do quickly. ---*/
+
+   VG_(sanity_fast_count)++;
+
+   /* Check that we haven't overrun our private stack. */
+   for (i = 0; i < 10; i++) {
+      vg_assert(VG_(stack)[i]
+                == ((UInt)(&VG_(stack)[i]) ^ 0xA4B3C2D1));
+      vg_assert(VG_(stack)[10000-1-i] 
+                == ((UInt)(&VG_(stack)[10000-i-1]) ^ 0xABCD4321));
+   }
+
+   /* Check stuff pertaining to the memory check system. */
+
+   if (VG_(clo_instrument)) {
+
+      /* Check that the eflags tag is as expected. */
+      UInt vv = VG_(baseBlock)[VGOFF_(sh_eflags)];
+      vg_assert(vv == VGM_EFLAGS_VALID || VGM_EFLAGS_INVALID);
+
+      /* Check that nobody has spuriously claimed that the first or
+         last 16 pages of memory have become accessible [...] */
+      vg_assert(VG_(first_and_last_secondaries_look_plausible));
+   }
+
+#  if 0
+   if ( (VG_(baseBlock)[VGOFF_(sh_eflags)] & 1) == 1)
+     VG_(printf)("UNDEF\n") ; else 
+       VG_(printf)("def\n") ;
+#  endif
+
+   /* --- Now some more expensive checks. ---*/
+
+   /* Once every 25 times, check some more expensive stuff. */
+
+   do_expensive_checks = False;
+   if (force_expensive) 
+      do_expensive_checks = True;
+   if (VG_(sanity_level) > 1) 
+      do_expensive_checks = True;
+   if (VG_(sanity_level) == 1 
+       && (VG_(sanity_fast_count) % 25) == 0)
+      do_expensive_checks = True;
+
+   if (do_expensive_checks) {
+      VG_(sanity_slow_count)++;
+
+#     if 0
+      { void zzzmemscan(void); zzzmemscan(); }
+#     endif
+
+      if ((VG_(sanity_fast_count) % 250) == 0)
+         VG_(sanity_check_tc_tt)();
+
+      if (VG_(clo_instrument)) {
+         /* Make sure nobody changed the distinguished secondary. */
+         for (i = 0; i < 8192; i++)
+            vg_assert(vg_distinguished_secondary_map.abits[i] 
+                      == VGM_BYTE_INVALID);
+         for (i = 0; i < 65536; i++)
+            vg_assert(vg_distinguished_secondary_map.vbyte[i] 
+                      == VGM_BYTE_INVALID);
+
+         /* Make sure that the upper 3/4 of the primary map hasn't
+            been messed with. */
+         for (i = 65536; i < 262144; i++)
+            vg_assert(VG_(primary_map)[i] 
+                      == & vg_distinguished_secondary_map);
+      }
+      /* 
+      if ((VG_(sanity_fast_count) % 500) == 0) VG_(mallocSanityCheckAll)(); 
+      */
+   }
+
+   if (VG_(sanity_level) > 1) {
+      /* Check sanity of the low-level memory manager.  Note that bugs
+         in the client's code can cause this to fail, so we don't do
+         this check unless specially asked for.  And because it's
+         potentially very expensive. */
+      VG_(mallocSanityCheckAll)();
+   }
+}
+
+
+/* ---------------------------------------------------------------------
+   Debugging machinery (turn on to debug).  Something of a mess.
+   ------------------------------------------------------------------ */
+
+/* Print the value tags on the 8 integer registers & flag reg. */
+
+static void uint_to_bits ( UInt x, Char* str )
+{
+   Int i;
+   Int w = 0;
+   /* str must point to a space of at least 36 bytes. */
+   for (i = 31; i >= 0; i--) {
+      str[w++] = (x & ( ((UInt)1) << i)) ? '1' : '0';
+      if (i == 24 || i == 16 || i == 8)
+         str[w++] = ' ';
+   }
+   str[w++] = 0;
+   vg_assert(w == 36);
+}
+
+void VG_(show_reg_tags) ( void )
+{
+   Char buf1[36];
+   Char buf2[36];
+   UInt z_eax, z_ebx, z_ecx, z_edx, 
+        z_esi, z_edi, z_ebp, z_esp, z_eflags;
+
+   z_eax    = VG_(baseBlock)[VGOFF_(sh_eax)];
+   z_ebx    = VG_(baseBlock)[VGOFF_(sh_ebx)];
+   z_ecx    = VG_(baseBlock)[VGOFF_(sh_ecx)];
+   z_edx    = VG_(baseBlock)[VGOFF_(sh_edx)];
+   z_esi    = VG_(baseBlock)[VGOFF_(sh_esi)];
+   z_edi    = VG_(baseBlock)[VGOFF_(sh_edi)];
+   z_ebp    = VG_(baseBlock)[VGOFF_(sh_ebp)];
+   z_esp    = VG_(baseBlock)[VGOFF_(sh_esp)];
+   z_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
+   
+   uint_to_bits(z_eflags, buf1);
+   VG_(message)(Vg_DebugMsg, "efl %\n", buf1);
+
+   uint_to_bits(z_eax, buf1);
+   uint_to_bits(z_ebx, buf2);
+   VG_(message)(Vg_DebugMsg, "eax %s   ebx %s\n", buf1, buf2);
+
+   uint_to_bits(z_ecx, buf1);
+   uint_to_bits(z_edx, buf2);
+   VG_(message)(Vg_DebugMsg, "ecx %s   edx %s\n", buf1, buf2);
+
+   uint_to_bits(z_esi, buf1);
+   uint_to_bits(z_edi, buf2);
+   VG_(message)(Vg_DebugMsg, "esi %s   edi %s\n", buf1, buf2);
+
+   uint_to_bits(z_ebp, buf1);
+   uint_to_bits(z_esp, buf2);
+   VG_(message)(Vg_DebugMsg, "ebp %s   esp %s\n", buf1, buf2);
+}
+
+
+#if 0
+/* For debugging only.  Scan the address space and touch all allegedly
+   addressible words.  Useful for establishing where Valgrind's idea of
+   addressibility has diverged from what the kernel believes. */
+
+static 
+void zzzmemscan_notify_word ( Addr a, UInt w )
+{
+}
+
+void zzzmemscan ( void )
+{
+   Int n_notifies
+      = VG_(scan_all_valid_memory)( zzzmemscan_notify_word );
+   VG_(printf)("zzzmemscan: n_bytes = %d\n", 4 * n_notifies );
+}
+#endif
+
+
+
+
+#if 0
+static Int zzz = 0;
+
+void show_bb ( Addr eip_next )
+{
+   VG_(printf)("[%4d] ", zzz);
+   VG_(show_reg_tags)( &VG_(m_shadow );
+   VG_(translate) ( eip_next, NULL, NULL, NULL );
+}
+#endif /* 0 */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              vg_memory.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_messages.c b/vg_messages.c
new file mode 100644
index 000000000..343a85962
--- /dev/null
+++ b/vg_messages.c
@@ -0,0 +1,105 @@
+
+/*--------------------------------------------------------------------*/
+/*--- For sending error/informative messages.                      ---*/
+/*---                                                 vg_message.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+
+#include "vg_include.h"
+
+
+static char vg_mbuf[M_VG_MSGBUF];
+static int vg_n_mbuf;
+
+static void add_to_buf ( Char c )
+{
+  if (vg_n_mbuf >= (M_VG_MSGBUF-1)) return;
+  vg_mbuf[vg_n_mbuf++] = c;
+  vg_mbuf[vg_n_mbuf]   = 0;
+}
+
+
+/* Publically visible from here onwards. */
+
+void
+VG_(add_to_msg) ( Char *format, ... )
+{
+   va_list vargs;
+   va_start(vargs,format);
+   VG_(vprintf) ( add_to_buf, format, vargs );
+   va_end(vargs);
+}
+
+/* Send a simple single-part message. */
+void VG_(message) ( VgMsgKind kind, Char* format, ... )
+{
+   va_list vargs;
+   va_start(vargs,format);
+   VG_(start_msg) ( kind );
+   VG_(vprintf) ( add_to_buf, format, vargs );
+   va_end(vargs);
+   VG_(end_msg)();
+}
+
+void VG_(start_msg) ( VgMsgKind kind )
+{
+   Char c;
+   vg_n_mbuf = 0;
+   vg_mbuf[vg_n_mbuf] = 0;
+   switch (kind) {
+      case Vg_UserMsg:       c = '='; break;
+      case Vg_DebugMsg:      c = '-'; break;
+      case Vg_DebugExtraMsg: c = '+'; break;
+      default:               c = '?'; break;
+   }
+   VG_(add_to_msg)( "%c%c%d%c%c ", 
+                    c,c, VG_(getpid)(), c,c );
+}
+
+
+void VG_(end_msg) ( void )
+{
+   if (VG_(clo_logfile_fd) >= 0) {
+      add_to_buf('\n');
+      VG_(write)(VG_(clo_logfile_fd), vg_mbuf, VG_(strlen)(vg_mbuf));
+   }
+}
+
+
+void VG_(startup_logging) ( void )
+{
+}
+
+void VG_(shutdown_logging) ( void )
+{
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                             vg_message.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_mylibc.c b/vg_mylibc.c
new file mode 100644
index 000000000..2ba0753d3
--- /dev/null
+++ b/vg_mylibc.c
@@ -0,0 +1,929 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Reimplementation of some C library stuff, to avoid depending ---*/
+/*--- on libc.so.                                                  ---*/
+/*---                                                  vg_mylibc.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+
+
+
+/* ---------------------------------------------------------------------
+   Really Actually DO system calls.
+   ------------------------------------------------------------------ */
+
+/* Ripped off from /usr/include/asm/unistd.h. */
+
+static
+UInt vg_do_syscall0 ( UInt syscallno )
+{ 
+   UInt __res;
+   __asm__ volatile ("int $0x80"
+                     : "=a" (__res)
+                     : "0" (syscallno) );
+   return __res;
+}
+
+
+static
+UInt vg_do_syscall1 ( UInt syscallno, UInt arg1 )
+{ 
+   UInt __res;
+   __asm__ volatile ("int $0x80"
+                     : "=a" (__res)
+                     : "0" (syscallno),
+                       "b" (arg1) );
+   return __res;
+}
+
+
+static
+UInt vg_do_syscall2 ( UInt syscallno, 
+                      UInt arg1, UInt arg2 )
+{ 
+   UInt __res;
+   __asm__ volatile ("int $0x80"
+                     : "=a" (__res)
+                     : "0" (syscallno),
+                       "b" (arg1),
+                       "c" (arg2) );
+   return __res;
+}
+
+
+static
+UInt vg_do_syscall3 ( UInt syscallno, 
+                      UInt arg1, UInt arg2, UInt arg3 )
+{ 
+   UInt __res;
+   __asm__ volatile ("int $0x80"
+                     : "=a" (__res)
+                     : "0" (syscallno),
+                       "b" (arg1),
+                       "c" (arg2),
+                       "d" (arg3) );
+   return __res;
+}
+
+
+static
+UInt vg_do_syscall4 ( UInt syscallno, 
+                      UInt arg1, UInt arg2, UInt arg3, UInt arg4 )
+{ 
+   UInt __res;
+   __asm__ volatile ("int $0x80"
+                     : "=a" (__res)
+                     : "0" (syscallno),
+                       "b" (arg1),
+                       "c" (arg2),
+                       "d" (arg3),
+                       "S" (arg4) );
+   return __res;
+}
+
+
+#if 0
+static
+UInt vg_do_syscall5 ( UInt syscallno, 
+                      UInt arg1, UInt arg2, UInt arg3, UInt arg4, 
+                      UInt arg5 )
+{ 
+   UInt __res;
+   __asm__ volatile ("int $0x80"
+                     : "=a" (__res)
+                     : "0" (syscallno),
+                       "b" (arg1),
+                       "c" (arg2),
+                       "d" (arg3),
+                       "S" (arg4),
+                       "D" (arg5) );
+   return __res;
+}
+#endif
+
+/* ---------------------------------------------------------------------
+   Wrappers around system calls, and other stuff, to do with signals.
+   ------------------------------------------------------------------ */
+
+/* sigemptyset, sigfullset, sigaddset and sigdelset return 0 on
+   success and -1 on error.  
+*/
+Int VG_(ksigfillset)( vki_ksigset_t* set )
+{
+   Int i;
+   if (set == NULL)
+      return -1;
+   for (i = 0; i < VKI_KNSIG_WORDS; i++)
+      set->ws[i] = 0xFFFFFFFF;
+   return 0;
+}
+
+Int VG_(ksigemptyset)( vki_ksigset_t* set )
+{
+   Int i;
+   if (set == NULL)
+      return -1;
+   for (i = 0; i < VKI_KNSIG_WORDS; i++)
+      set->ws[i] = 0x0;
+   return 0;
+}
+
+Int VG_(ksigaddset)( vki_ksigset_t* set, Int signum )
+{
+   if (set == NULL)
+      return -1;
+   if (signum < 1 && signum > VKI_KNSIG)
+      return -1;
+   signum--;
+   set->ws[signum / VKI_KNSIG_BPW] |= (1 << (signum % VKI_KNSIG_BPW));
+   return 0;
+}
+
+Int VG_(ksigismember) ( vki_ksigset_t* set, Int signum )
+{
+   if (set == NULL)
+      return -1;
+   if (signum < 1 && signum > VKI_KNSIG)
+      return -1;
+   signum--;
+   if (1 & ((set->ws[signum / VKI_KNSIG_BPW]) >> (signum % VKI_KNSIG_BPW)))
+      return 1;
+   else
+      return 0;
+}
+
+
+/* The functions sigaction, sigprocmask, sigpending and sigsuspend
+   return 0 on success and -1 on error.  
+*/
+Int VG_(ksigprocmask)( Int how, 
+                       const vki_ksigset_t* set, 
+                       vki_ksigset_t* oldset)
+{
+   Int res 
+      = vg_do_syscall4(__NR_rt_sigprocmask, 
+                       how, (UInt)set, (UInt)oldset, 
+                       VKI_KNSIG_WORDS * VKI_BYTES_PER_WORD);
+   return VG_(is_kerror)(res) ? -1 : 0;
+}
+
+
+Int VG_(ksigaction) ( Int signum,  
+                      const vki_ksigaction* act,  
+                      vki_ksigaction* oldact)
+{
+   Int res
+     = vg_do_syscall4(__NR_rt_sigaction,
+                      signum, (UInt)act, (UInt)oldact, 
+                      VKI_KNSIG_WORDS * VKI_BYTES_PER_WORD);
+   return VG_(is_kerror)(res) ? -1 : 0;
+}
+
+
+Int VG_(ksigaltstack)( const vki_kstack_t* ss, vki_kstack_t* oss )
+{
+   Int res
+     = vg_do_syscall2(__NR_sigaltstack, (UInt)ss, (UInt)oss);
+   return VG_(is_kerror)(res) ? -1 : 0;
+}
+
+ 
+Int VG_(ksignal)(Int signum, void (*sighandler)(Int))
+{
+   Int res;
+   vki_ksigaction sa;
+   sa.ksa_handler = sighandler;
+   sa.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
+   sa.ksa_restorer = NULL;
+   res = VG_(ksigemptyset)( &sa.ksa_mask );
+   vg_assert(res == 0);
+   res = vg_do_syscall4(__NR_rt_sigaction,
+                        signum, (UInt)(&sa), (UInt)NULL,
+                        VKI_KNSIG_WORDS * VKI_BYTES_PER_WORD);
+   return VG_(is_kerror)(res) ? -1 : 0;
+}
+
+
+/* ---------------------------------------------------------------------
+   mmap/munmap, exit
+   ------------------------------------------------------------------ */
+
+/* Returns -1 on failure. */
+void* VG_(mmap)( void* start, UInt length, 
+                 UInt prot, UInt flags, UInt fd, UInt offset)
+{
+   Int  res;
+   UInt args[6];
+   args[0] = (UInt)start;
+   args[1] = length;
+   args[2] = prot;
+   args[3] = flags;
+   args[4] = fd;
+   args[5] = offset;
+   res = vg_do_syscall1(__NR_mmap, (UInt)(&(args[0])) );
+   return VG_(is_kerror)(res) ? ((void*)(-1)) : (void*)res;
+}
+
+/* Returns -1 on failure. */
+Int VG_(munmap)( void* start, Int length )
+{
+   Int res = vg_do_syscall2(__NR_munmap, (UInt)start, (UInt)length );
+   return VG_(is_kerror)(res) ? -1 : 0;
+}
+
+void VG_(exit)( Int status )
+{
+   (void)vg_do_syscall1(__NR_exit, (UInt)status );
+   /* Why are we still alive here? */
+   /*NOTREACHED*/
+   vg_assert(2+2 == 5);
+}
+
+/* ---------------------------------------------------------------------
+   printf implementation.  The key function, vg_vprintf(), emits chars 
+   into a caller-supplied function.  Distantly derived from:
+
+      vprintf replacement for Checker.
+      Copyright 1993, 1994, 1995 Tristan Gingold
+      Written September 1993 Tristan Gingold
+      Tristan Gingold, 8 rue Parmentier, F-91120 PALAISEAU, FRANCE
+
+   (Checker itself was GPL'd.)
+   ------------------------------------------------------------------ */
+
+
+/* Some flags.  */
+#define VG_MSG_SIGNED    1 /* The value is signed. */
+#define VG_MSG_ZJUSTIFY  2 /* Must justify with '0'. */
+#define VG_MSG_LJUSTIFY  4 /* Must justify on the left. */
+
+
+/* Copy a string into the buffer. */
+static void
+myvprintf_str ( void(*send)(Char), Int flags, Int width, Char* str, 
+                Bool capitalise )
+{
+#  define MAYBE_TOUPPER(ch) (capitalise ? VG_(toupper)(ch) : (ch))
+
+   Int i, extra;
+   Int len = VG_(strlen)(str);
+
+   if (width == 0) {
+      for (i = 0; i < len; i++)
+         send(MAYBE_TOUPPER(str[i]));
+      return;
+   }
+
+   if (len > width) {
+      for (i = 0; i < width; i++)
+         send(MAYBE_TOUPPER(str[i]));
+      return;
+   }
+
+   extra = width - len;
+   if (flags & VG_MSG_LJUSTIFY) {
+      for (i = 0; i < extra; i++)
+         send(' ');
+   }
+   for (i = 0; i < len; i++)
+      send(MAYBE_TOUPPER(str[i]));
+   if (!(flags & VG_MSG_LJUSTIFY)) {
+      for (i = 0; i < extra; i++)
+         send(' ');
+   }
+
+#  undef MAYBE_TOUPPER
+}
+
+/* Write P into the buffer according to these args:
+ *  If SIGN is true, p is a signed.
+ *  BASE is the base.
+ *  If WITH_ZERO is true, '0' must be added.
+ *  WIDTH is the width of the field.
+ */
+static void
+myvprintf_int64 ( void(*send)(Char), Int flags, Int base, Int width, ULong p)
+{
+   Char buf[40];
+   Int  ind = 0;
+   Int  i;
+   Bool neg = False;
+   Char *digits = "0123456789ABCDEF";
+ 
+   if (base < 2 || base > 16)
+      return;
+ 
+   if ((flags & VG_MSG_SIGNED) && (Long)p < 0) {
+      p   = - (Long)p;
+      neg = True;
+   }
+
+   if (p == 0)
+      buf[ind++] = '0';
+   else {
+      while (p > 0) {
+         buf[ind++] = digits[p % base];
+         p /= base;
+       }
+   }
+
+   if (neg)
+      buf[ind++] = '-';
+
+   if (width > 0 && !(flags & VG_MSG_LJUSTIFY)) {
+      for(; ind < width; ind++) {
+         vg_assert(ind < 39);
+         buf[ind] = (flags & VG_MSG_ZJUSTIFY) ? '0': ' ';
+      }
+   }
+
+   /* Reverse copy to buffer.  */
+   for (i = ind -1; i >= 0; i--)
+      send(buf[i]);
+
+   if (width > 0 && (flags & VG_MSG_LJUSTIFY)) {
+      for(; ind < width; ind++)
+         send((flags & VG_MSG_ZJUSTIFY) ? '0': ' ');
+   }
+}
+
+
+/* A simple vprintf().  */
+void
+VG_(vprintf) ( void(*send)(Char), const Char *format, va_list vargs )
+{
+   int i;
+   int flags;
+   int width;
+   Bool is_long;
+
+   /* We assume that vargs has already been initialised by the 
+      caller, using va_start, and that the caller will similarly
+      clean up with va_end.
+   */
+
+   for (i = 0; format[i] != 0; i++) {
+      if (format[i] != '%') {
+         send(format[i]);
+         continue;
+      }
+      i++;
+      /* A '%' has been found.  Ignore a trailing %. */
+      if (format[i] == 0)
+         break;
+      if (format[i] == '%') {
+         /* `%%' is replaced by `%'. */
+         send('%');
+         continue;
+      }
+      flags = 0;
+      is_long = False;
+      width = 0; /* length of the field. */
+      /* If '-' follows '%', justify on the left. */
+      if (format[i] == '-') {
+         flags |= VG_MSG_LJUSTIFY;
+         i++;
+      }
+      /* If '0' follows '%', pads will be inserted. */
+      if (format[i] == '0') {
+         flags |= VG_MSG_ZJUSTIFY;
+         i++;
+      }
+      /* Compute the field length. */
+      while (format[i] >= '0' && format[i] <= '9') {
+         width *= 10;
+         width += format[i++] - '0';
+      }
+      while (format[i] == 'l') {
+         i++;
+         is_long = True;
+      }
+
+      switch (format[i]) {
+         case 'd': /* %d */
+            flags |= VG_MSG_SIGNED;
+            if (is_long)
+               myvprintf_int64(send, flags, 10, width, 
+                               (ULong)(va_arg (vargs, Long)));
+            else
+               myvprintf_int64(send, flags, 10, width, 
+                               (ULong)(va_arg (vargs, Int)));
+            break;
+         case 'u': /* %u */
+            if (is_long)
+               myvprintf_int64(send, flags, 10, width, 
+                               (ULong)(va_arg (vargs, ULong)));
+            else
+               myvprintf_int64(send, flags, 10, width, 
+                               (ULong)(va_arg (vargs, UInt)));
+            break;
+         case 'p': /* %p */
+            send('0');
+            send('x');
+            myvprintf_int64(send, flags, 16, width, 
+                            (ULong)((UInt)va_arg (vargs, void *)));
+            break;
+         case 'x': /* %x */
+            if (is_long)
+               myvprintf_int64(send, flags, 16, width, 
+                               (ULong)(va_arg (vargs, ULong)));
+            else
+               myvprintf_int64(send, flags, 16, width, 
+                               (ULong)(va_arg (vargs, UInt)));
+            break;
+         case 'c': /* %c */
+            send(va_arg (vargs, int));
+            break;
+         case 's': case 'S': { /* %s */
+            char *str = va_arg (vargs, char *);
+            if (str == (char*) 0) str = "(null)";
+            myvprintf_str(send, flags, width, str, format[i]=='S');
+            break;
+         }
+         default:
+            break;
+      }
+   }
+}
+
+
+/* A general replacement for printf().  Note that only low-level 
+   debugging info should be sent via here.  The official route is to
+   to use vg_message().  This interface is deprecated.
+*/
+static char myprintf_buf[100];
+static int  n_myprintf_buf;
+
+static void add_to_myprintf_buf ( Char c )
+{
+   if (n_myprintf_buf >= 100-10 /*paranoia*/ ) {
+      if (VG_(clo_logfile_fd) >= 0)
+         VG_(write)
+           (VG_(clo_logfile_fd), myprintf_buf, VG_(strlen)(myprintf_buf));
+      n_myprintf_buf = 0;
+      myprintf_buf[n_myprintf_buf] = 0;      
+   }
+   myprintf_buf[n_myprintf_buf++] = c;
+   myprintf_buf[n_myprintf_buf] = 0;
+}
+
+void VG_(printf) ( const char *format, ... )
+{
+   va_list vargs;
+   va_start(vargs,format);
+   
+   n_myprintf_buf = 0;
+   myprintf_buf[n_myprintf_buf] = 0;      
+   VG_(vprintf) ( add_to_myprintf_buf, format, vargs );
+
+   if (n_myprintf_buf > 0 && VG_(clo_logfile_fd) >= 0)
+      VG_(write)
+         ( VG_(clo_logfile_fd), myprintf_buf, VG_(strlen)(myprintf_buf));
+
+   va_end(vargs);
+}
+
+
+/* A general replacement for sprintf(). */
+static Char* vg_sprintf_ptr;
+
+static void add_to_vg_sprintf_buf ( Char c )
+{
+   *vg_sprintf_ptr++ = c;
+}
+
+void VG_(sprintf) ( Char* buf, Char *format, ... )
+{
+   va_list vargs;
+   va_start(vargs,format);
+
+   vg_sprintf_ptr = buf;
+   VG_(vprintf) ( add_to_vg_sprintf_buf, format, vargs );
+   add_to_vg_sprintf_buf(0);
+
+   va_end(vargs);
+}
+
+
+/* ---------------------------------------------------------------------
+   Misc str* functions.
+   ------------------------------------------------------------------ */
+
+Bool VG_(isspace) ( Char c )
+{
+   return (c == ' ' || c == '\n' || c == '\t' || c == 0);
+}
+
+
+Int VG_(strlen) ( const Char* str )
+{
+   Int i = 0;
+   while (str[i] != 0) i++;
+   return i;
+}
+
+
+Long VG_(atoll) ( Char* str )
+{
+   Bool neg = False;
+   Long n = 0;
+   if (*str == '-') { str++; neg = True; };
+   while (*str >= '0' && *str <= '9') {
+      n = 10*n + (Long)(*str - '0');
+      str++;
+   }
+   if (neg) n = -n;
+   return n;
+}
+
+
+Char* VG_(strcat) ( Char* dest, const Char* src )
+{
+   Char* dest_orig = dest;
+   while (*dest) dest++;
+   while (*src) *dest++ = *src++;
+   *dest = 0;
+   return dest_orig;
+}
+
+
+Char* VG_(strncat) ( Char* dest, const Char* src, Int n )
+{
+   Char* dest_orig = dest;
+   while (*dest) dest++;
+   while (*src && n > 0) { *dest++ = *src++; n--; }
+   *dest = 0;
+   return dest_orig;
+}
+
+
+Char* VG_(strpbrk) ( const Char* s, const Char* accept )
+{
+   const Char* a;
+   while (*s) {
+      a = accept;
+      while (*a)
+         if (*a++ == *s)
+            return (Char *) s;
+      s++;
+   }
+   return NULL;
+}
+
+
+Char* VG_(strcpy) ( Char* dest, const Char* src )
+{
+   Char* dest_orig = dest;
+   while (*src) *dest++ = *src++;
+   *dest = 0;
+   return dest_orig;
+}
+
+
+/* Copy bytes, not overrunning the end of dest and always ensuring
+   zero termination. */
+void VG_(strncpy_safely) ( Char* dest, const Char* src, Int ndest )
+{
+   Int i;
+   vg_assert(ndest > 0);
+   i = 0;
+   dest[i] = 0;
+   while (True) {
+      if (src[i] == 0) return;
+      if (i >= ndest-1) return;
+      dest[i] = src[i];
+      i++;
+      dest[i] = 0;
+   }
+}
+
+
+void VG_(strncpy) ( Char* dest, const Char* src, Int ndest )
+{
+   VG_(strncpy_safely)( dest, src, ndest+1 ); 
+}
+
+
+Int VG_(strcmp) ( const Char* s1, const Char* s2 )
+{
+   while (True) {
+      if (*s1 == 0 && *s2 == 0) return 0;
+      if (*s1 == 0) return -1;
+      if (*s2 == 0) return 1;
+
+      if (*(UChar*)s1 < *(UChar*)s2) return -1;
+      if (*(UChar*)s1 > *(UChar*)s2) return 1;
+
+      s1++; s2++;
+   }
+}
+
+
+Int VG_(strcmp_ws) ( const Char* s1, const Char* s2 )
+{
+   while (True) {
+      if (VG_(isspace)(*s1) && VG_(isspace)(*s2)) return 0;
+      if (VG_(isspace)(*s1)) return -1;
+      if (VG_(isspace)(*s2)) return 1;
+
+      if (*(UChar*)s1 < *(UChar*)s2) return -1;
+      if (*(UChar*)s1 > *(UChar*)s2) return 1;
+
+      s1++; s2++;
+   }
+}
+
+
+Int VG_(strncmp) ( const Char* s1, const Char* s2, Int nmax )
+{
+   Int n = 0;
+   while (True) {
+      if (n >= nmax) return 0;
+      if (*s1 == 0 && *s2 == 0) return 0;
+      if (*s1 == 0) return -1;
+      if (*s2 == 0) return 1;
+
+      if (*(UChar*)s1 < *(UChar*)s2) return -1;
+      if (*(UChar*)s1 > *(UChar*)s2) return 1;
+
+      s1++; s2++; n++;
+   }
+}
+
+
+Int VG_(strncmp_ws) ( const Char* s1, const Char* s2, Int nmax )
+{
+   Int n = 0;
+   while (True) {
+      if (n >= nmax) return 0;
+      if (VG_(isspace)(*s1) && VG_(isspace)(*s2)) return 0;
+      if (VG_(isspace)(*s1)) return -1;
+      if (VG_(isspace)(*s2)) return 1;
+
+      if (*(UChar*)s1 < *(UChar*)s2) return -1;
+      if (*(UChar*)s1 > *(UChar*)s2) return 1;
+
+      s1++; s2++; n++;
+   }
+}
+
+
+Char* VG_(strstr) ( const Char* haystack, Char* needle )
+{
+   Int n = VG_(strlen)(needle);
+   while (True) {
+      if (haystack[0] == 0) 
+         return NULL;
+      if (VG_(strncmp)(haystack, needle, n) == 0) 
+         return (Char*)haystack;
+      haystack++;
+   }
+}
+
+
+Char* VG_(strchr) ( const Char* s, Char c )
+{
+   while (True) {
+      if (*s == c) return (Char*)s;
+      if (*s == 0) return NULL;
+      s++;
+   }
+}
+
+
+Char VG_(toupper) ( Char c )
+{
+   if (c >= 'a' && c <= 'z')
+      return c + ('A' - 'a'); 
+   else
+      return c;
+}
+
+
+Char* VG_(strdup) ( ArenaId aid, const Char* s )
+{
+    Int   i;
+    Int   len = VG_(strlen)(s) + 1;
+    Char* res = VG_(malloc) (aid, len);
+    for (i = 0; i < len; i++)
+       res[i] = s[i];
+    return res;
+}
+
+
+/* ---------------------------------------------------------------------
+   A simple string matching routine, purloined from Hugs98.
+      `*'    matches any sequence of zero or more characters
+      `?'    matches any single character exactly 
+      `\c'   matches the character c only (ignoring special chars)
+      c      matches the character c only
+   ------------------------------------------------------------------ */
+
+/* Keep track of recursion depth. */
+static Int recDepth;
+
+static Bool stringMatch_wrk ( Char* pat, Char* str )
+{
+   vg_assert(recDepth >= 0 && recDepth < 500);
+   recDepth++;
+   for (;;) {
+      switch (*pat) {
+         case '\0' : return (*str=='\0');
+         case '*'  : do {
+                        if (stringMatch_wrk(pat+1,str)) {
+                           recDepth--;
+                           return True;
+                        }
+                     } while (*str++);
+                     recDepth--;
+                     return False;
+         case '?'  : if (*str++=='\0') {
+                        recDepth--;
+                        return False;
+                     }
+                     pat++;
+                     break;
+         case '\\' : if (*++pat == '\0') {
+                        recDepth--;
+                        return False; /* spurious trailing \ in pattern */
+                     }
+                     /* falls through to ... */
+         default   : if (*pat++ != *str++) {
+                        recDepth--;
+                        return False;
+                     }
+                     break;
+      }
+   }
+}
+
+Bool VG_(stringMatch) ( Char* pat, Char* str )
+{
+   Bool b;
+   recDepth = 0;
+   b = stringMatch_wrk ( pat, str );
+   /*
+   VG_(printf)("%s   %s   %s\n",
+	       b?"TRUE ":"FALSE", pat, str);
+   */
+   return b;
+}
+
+
+/* ---------------------------------------------------------------------
+   Assertery.
+   ------------------------------------------------------------------ */
+
+#define EMAIL_ADDR "jseward@acm.org"
+
+void VG_(assert_fail) ( Char* expr, Char* file, Int line, Char* fn )
+{
+   VG_(printf)("\n%s: %s:%d (%s): Assertion `%s' failed.\n",
+               "valgrind", file, line, fn, expr );
+   VG_(printf)("Please report this bug to me at: %s\n\n", EMAIL_ADDR);
+   VG_(shutdown_logging)();
+   /* vg_restore_SIGABRT(); */
+   VG_(exit)(1);
+}
+
+void VG_(panic) ( Char* str )
+{
+   VG_(printf)("\nvalgrind: the `impossible' happened:\n   %s\n", str);
+   VG_(printf)("Basic block ctr is approximately %llu\n", VG_(bbs_done) );
+   VG_(printf)("Please report this bug to me at: %s\n\n", EMAIL_ADDR);
+   VG_(shutdown_logging)();
+   /* vg_restore_SIGABRT(); */
+   VG_(exit)(1);
+}
+
+#undef EMAIL_ADDR
+
+
+/* ---------------------------------------------------------------------
+   Primitive support for reading files.
+   ------------------------------------------------------------------ */
+
+/* Returns -1 on failure. */
+Int VG_(open_read) ( Char* pathname )
+{
+   Int fd;
+   /* VG_(printf)("vg_open_read %s\n", pathname ); */
+
+   /* This gets a segmentation fault if pathname isn't a valid file.
+      I don't know why.  It seems like the call to open is getting
+      intercepted and messed with by glibc ... */
+   /* fd = open( pathname, O_RDONLY ); */
+   /* ... so we go direct to the horse's mouth, which seems to work
+      ok: */
+   const int O_RDONLY = 0; /* See /usr/include/bits/fcntl.h */
+   fd = vg_do_syscall3(__NR_open, (UInt)pathname, O_RDONLY, 0);
+   /* VG_(printf)("result = %d\n", fd); */
+   if (VG_(is_kerror)(fd)) fd = -1;
+   return fd;
+}
+ 
+
+void VG_(close) ( Int fd )
+{
+   vg_do_syscall1(__NR_close, fd);
+}
+
+
+Int VG_(read) ( Int fd, void* buf, Int count)
+{
+   Int res;
+   /* res = read( fd, buf, count ); */
+   res = vg_do_syscall3(__NR_read, fd, (UInt)buf, count);
+   if (VG_(is_kerror)(res)) res = -1;
+   return res;
+}
+
+Int VG_(write) ( Int fd, void* buf, Int count)
+{
+   Int res;
+   /* res = write( fd, buf, count ); */
+   res = vg_do_syscall3(__NR_write, fd, (UInt)buf, count);
+   if (VG_(is_kerror)(res)) res = -1;
+   return res;
+}
+
+/* Misc functions looking for a proper home. */
+
+/* We do getenv without libc's help by snooping around in
+   VG_(client_env) as determined at startup time. */
+Char* VG_(getenv) ( Char* varname )
+{
+   Int i, n;
+   n = VG_(strlen)(varname);
+   for (i = 0; VG_(client_envp)[i] != NULL; i++) {
+      Char* s = VG_(client_envp)[i];
+      if (VG_(strncmp)(varname, s, n) == 0 && s[n] == '=') {
+         return & s[n+1];
+      }
+   }
+   return NULL;
+}
+
+/* You'd be amazed how many places need to know the current pid. */
+Int VG_(getpid) ( void )
+{
+   Int res;
+   /* res = getpid(); */
+   res = vg_do_syscall0(__NR_getpid);
+   return res;
+}
+
+
+/* ---------------------------------------------------------------------
+   Primitive support for bagging memory via mmap.
+   ------------------------------------------------------------------ */
+
+void* VG_(get_memory_from_mmap) ( Int nBytes )
+{
+   static UInt tot_alloc = 0;
+   void* p = VG_(mmap)( 0, nBytes,
+                        VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC, 
+                        VKI_MAP_PRIVATE|VKI_MAP_ANONYMOUS, -1, 0 );
+   if (p != ((void*)(-1))) {
+      tot_alloc += (UInt)nBytes;
+      if (0)
+         VG_(printf)("get_memory_from_mmap: %d tot, %d req\n",
+                     tot_alloc, nBytes);
+      return p;
+   }
+   VG_(printf)("vg_get_memory_from_mmap failed on request of %d\n", 
+               nBytes);
+   VG_(panic)("vg_get_memory_from_mmap: out of memory!  Fatal!  Bye!\n");
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              vg_mylibc.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_procselfmaps.c b/vg_procselfmaps.c
new file mode 100644
index 000000000..b0733539f
--- /dev/null
+++ b/vg_procselfmaps.c
@@ -0,0 +1,201 @@
+
+/*--------------------------------------------------------------------*/
+/*--- A simple parser for /proc/self/maps on Linux 2.4.X           ---*/
+/*---                                            vg_procselfmaps.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+
+#include "vg_include.h"
+
+
+/* static ... to keep it out of the stack frame. */
+
+static Char procmap_buf[M_PROCMAP_BUF];
+
+
+/* Helper fns. */
+
+static Int hexdigit ( Char c )
+{
+   if (c >= '0' && c <= '9') return (Int)(c - '0');
+   if (c >= 'a' && c <= 'f') return 10 + (Int)(c - 'a');
+   if (c >= 'A' && c <= 'F') return 10 + (Int)(c - 'A');
+   return -1;
+}
+
+static Int readchar ( Char* buf, Char* ch )
+{
+   if (*buf == 0) return 0;
+   *ch = *buf;
+   return 1;
+}
+
+static Int readhex ( Char* buf, UInt* val )
+{
+   Int n = 0;
+   *val = 0;
+   while (hexdigit(*buf) >= 0) {
+      *val = (*val << 4) + hexdigit(*buf);
+      n++; buf++;
+   }
+   return n;
+}
+
+
+
+/* Read /proc/self/maps.  For each map entry, call
+   record_mapping, passing it, in this order:
+
+      start address in memory
+      length
+      r permissions char; either - or r
+      w permissions char; either - or w
+      x permissions char; either - or x
+      offset in file, or zero if no file
+      filename, zero terminated, or NULL if no file
+
+   So the sig of the called fn might be
+
+      void (*record_mapping)( Addr start, UInt size, 
+                              Char r, Char w, Char x, 
+                              UInt foffset, UChar* filename )
+
+   Note that the supplied filename is transiently stored; record_mapping 
+   should make a copy if it wants to keep it.
+
+   If there's a syntax error or other failure, just abort.  
+*/
+
+void VG_(read_procselfmaps) (
+   void (*record_mapping)( Addr, UInt, Char, Char, Char, UInt, UChar* )
+)
+{
+   Int    i, j, n_tot, n_chunk, fd, i_eol;
+   Addr   start, endPlusOne;
+   UChar* filename;
+   UInt   foffset;
+   UChar  rr, ww, xx, pp, ch;
+
+   /* Read the initial memory mapping from the /proc filesystem. */
+   fd = VG_(open_read) ( "/proc/self/maps" );
+   if (fd == -1) {
+      VG_(message)(Vg_UserMsg, "FATAL: can't open /proc/self/maps");
+      VG_(exit)(1);
+   }
+   n_tot = 0;
+   do {
+      n_chunk = VG_(read) ( fd, &procmap_buf[n_tot], M_PROCMAP_BUF - n_tot );
+      n_tot += n_chunk;
+   } while ( n_chunk > 0 && n_tot < M_PROCMAP_BUF );
+   VG_(close)(fd);
+   if (n_tot >= M_PROCMAP_BUF-5) {
+      VG_(message)(Vg_UserMsg, "FATAL: M_PROCMAP_BUF is too small; "
+                               "increase it and recompile");
+       VG_(exit)(1);
+   }
+   if (n_tot == 0) {
+      VG_(message)(Vg_UserMsg, "FATAL: I/O error on /proc/self/maps" );
+       VG_(exit)(1);
+   }
+   procmap_buf[n_tot] = 0;
+   if (0)
+      VG_(message)(Vg_DebugMsg, "raw:\n%s", procmap_buf );
+
+   /* Ok, it's safely aboard.  Parse the entries. */
+
+   i = 0;
+   while (True) {
+      if (i >= n_tot) break;
+
+      /* Read (without fscanf :) the pattern %8x-%8x %c%c%c%c %8x */
+      j = readhex(&procmap_buf[i], &start);
+      if (j > 0) i += j; else goto syntaxerror;
+      j = readchar(&procmap_buf[i], &ch);
+      if (j == 1 && ch == '-') i += j; else goto syntaxerror;
+      j = readhex(&procmap_buf[i], &endPlusOne);
+      if (j > 0) i += j; else goto syntaxerror;
+
+      j = readchar(&procmap_buf[i], &ch);
+      if (j == 1 && ch == ' ') i += j; else goto syntaxerror;
+
+      j = readchar(&procmap_buf[i], &rr);
+      if (j == 1 && (rr == 'r' || rr == '-')) i += j; else goto syntaxerror;
+      j = readchar(&procmap_buf[i], &ww);
+      if (j == 1 && (ww == 'w' || ww == '-')) i += j; else goto syntaxerror;
+      j = readchar(&procmap_buf[i], &xx);
+      if (j == 1 && (xx == 'x' || xx == '-')) i += j; else goto syntaxerror;
+      /* I haven't a clue what this last field means. */
+      j = readchar(&procmap_buf[i], &pp);
+      if (j == 1 && (pp == 'p' || pp == '-' || pp == 's')) 
+                                              i += j; else goto syntaxerror;
+
+      j = readchar(&procmap_buf[i], &ch);
+      if (j == 1 && ch == ' ') i += j; else goto syntaxerror;
+
+      j = readhex(&procmap_buf[i], &foffset);
+      if (j > 0) i += j; else goto syntaxerror;
+      
+      goto read_line_ok;
+
+    syntaxerror:
+      VG_(message)(Vg_UserMsg, "FATAL: syntax error reading /proc/self/maps");
+      { Int k;
+        VG_(printf)("last 50 chars: `");
+        for (k = i-50; k <= i; k++) VG_(printf)("%c", procmap_buf[k]);
+        VG_(printf)("'\n");
+      }
+       VG_(exit)(1);
+
+    read_line_ok:
+      /* Try and find the name of the file mapped to this segment, if
+         it exists. */
+      while (procmap_buf[i] != '\n' && i < M_PROCMAP_BUF-1) i++;
+      i_eol = i;
+      i--;
+      while (!VG_(isspace)(procmap_buf[i]) && i >= 0) i--;
+      i++;
+      if (i < i_eol-1 && procmap_buf[i] == '/') {
+         filename = &procmap_buf[i];
+         filename[i_eol - i] = '\0';
+      } else {
+         filename = NULL;
+         foffset = 0;
+      }
+
+      (*record_mapping) ( start, endPlusOne-start, 
+                          rr, ww, xx, 
+                          foffset, filename );
+
+      i = i_eol + 1;
+   }
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                        vg_procselfmaps.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_profile.c b/vg_profile.c
new file mode 100644
index 000000000..ed10eded2
--- /dev/null
+++ b/vg_profile.c
@@ -0,0 +1,112 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Profiling machinery -- not for release builds!               ---*/
+/*---                                                 vg_profile.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+
+#ifdef VG_PROFILE
+
+/* get rid of these, if possible */
+#include <signal.h>
+#include <sys/time.h>
+
+#define VGP_PAIR(enumname,str) str
+static const Char* vgp_names[VGP_M_CCS] = { VGP_LIST };
+#undef VGP_PAIR
+
+static Int   vgp_nticks;
+static Int   vgp_counts[VGP_M_CCS];
+static Int   vgp_entries[VGP_M_CCS];
+
+static Int   vgp_sp;
+static VgpCC vgp_stack[VGP_M_STACK];
+
+void VGP_(tick) ( int sigNo )
+{
+   Int cc;
+   vgp_nticks++;
+   cc = vgp_stack[vgp_sp];
+   vg_assert(cc >= 0 && cc < VGP_M_CCS);
+   vgp_counts[ cc ]++;
+}
+
+void VGP_(init_profiling) ( void )
+{
+   struct itimerval value;
+   Int i, ret;
+
+   for (i = 0; i < VGP_M_CCS; i++)
+     vgp_counts[i] = vgp_entries[i] = 0;
+
+   vgp_nticks = 0;
+   vgp_sp = -1;
+   VGP_(pushcc) ( VgpRun );
+
+   value.it_interval.tv_sec  = 0;
+   value.it_interval.tv_usec = 10 * 1000;
+   value.it_value = value.it_interval;
+
+   signal(SIGPROF, VGP_(tick) );
+   ret = setitimer(ITIMER_PROF, &value, NULL);
+   if (ret != 0) VG_(panic)("vgp_init_profiling");
+}
+
+void VGP_(done_profiling) ( void )
+{
+   Int i;
+   VG_(printf)("Profiling done, %d ticks\n", vgp_nticks);
+   for (i = 0; i < VGP_M_CCS; i++)
+      VG_(printf)("%2d: %4d (%3d %%%%) ticks,  %8d entries   for  %s\n",
+                  i, vgp_counts[i], 
+                  (Int)(1000.0 * (double)vgp_counts[i] / (double)vgp_nticks),
+                  vgp_entries[i],
+                  vgp_names[i] );
+}
+
+void VGP_(pushcc) ( VgpCC cc )
+{
+   if (vgp_sp >= VGP_M_STACK-1) VG_(panic)("vgp_pushcc");
+   vgp_sp++;
+   vgp_stack[vgp_sp] = cc;
+   vgp_entries[ cc ] ++;
+}
+
+void VGP_(popcc) ( void )
+{
+   if (vgp_sp <= 0) VG_(panic)("vgp_popcc");
+   vgp_sp--;
+}
+
+#endif /* VG_PROFILE */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                             vg_profile.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_signals.c b/vg_signals.c
new file mode 100644
index 000000000..ed7ef67cb
--- /dev/null
+++ b/vg_signals.c
@@ -0,0 +1,823 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Implementation of POSIX signals.                             ---*/
+/*---                                                 vg_signals.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+
+#include "vg_include.h"
+#include "vg_constants.h"
+#include "vg_unsafe.h"
+
+
+/* ---------------------------------------------------------------------
+   An implementation of signal sets and other grunge, identical to 
+   that in the target kernels (Linux 2.2.X and 2.4.X).
+   ------------------------------------------------------------------ */
+
+
+
+/* ---------------------------------------------------------------------
+   Signal state for this process.
+   ------------------------------------------------------------------ */
+
+/* For each signal, the current action.  Is NULL if the client hasn't
+   asked to handle the signal.  Consequently, we expect never to
+   receive a signal for which the corresponding handler is NULL. */
+void* VG_(sighandler)[VKI_KNSIG];
+
+/* For each signal, either:
+   -- VG_SIGIDLE if not pending and not running
+   -- Handler address if pending
+   -- VG_SIGRUNNING if the handler is running and hasn't (returned or 
+      unblocked the signal using sigprocmask following a longjmp out 
+      of the handler).
+ */
+#define VG_SIGIDLE    ((void*)0)
+#define VG_SIGRUNNING ((void*)1)
+
+void* VG_(sigpending)[VKI_KNSIG];
+
+/* See decl in vg_include.h for explanation. */
+Int VG_(syscall_depth) = 0;
+
+
+/* ---------------------------------------------------------------------
+   The signal simulation proper.  A simplified version of what the 
+   Linux kernel does.
+   ------------------------------------------------------------------ */
+
+/* A structure in which to save the application's registers
+   during the execution of signal handlers. */
+
+typedef
+   struct {
+      UInt retaddr;  /* Sig handler's (bogus) return address */
+      Int  sigNo;    /* The arg to the sig handler.  */
+      UInt magicPI;
+      UInt fpustate[VG_SIZE_OF_FPUSTATE_W];
+      UInt eax;
+      UInt ecx;
+      UInt edx;
+      UInt ebx;
+      UInt ebp;
+      UInt esp;
+      UInt esi;
+      UInt edi;
+      Addr eip;
+      UInt eflags;
+      UInt magicE;
+   }
+   VgSigContext;
+
+
+
+/* This is the bogus return address which the implementation
+   of RET in vg_cpu.c checks for.  If it spots a return to 
+   here, it calls vg_signal_returns().  We should never actually
+   enter this procedure, neither on the real nor simulated CPU.
+*/
+void VG_(signalreturn_bogusRA) ( void )
+{
+   VG_(panic) ( "vg_signalreturn_bogusRA -- something is badly wrong" );
+}
+
+
+/* Set up a stack frame (VgSigContext) for the client's signal
+   handler.  This includes the signal number and a bogus return
+   address.  */
+static
+void vg_push_signal_frame ( int sigNo )
+{
+   Int          i;
+   UInt         esp;
+   VgSigContext sigctx;
+   for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
+      sigctx.fpustate[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
+
+   sigctx.magicPI    = 0x31415927;
+   sigctx.magicE     = 0x27182818;
+   sigctx.eax        = VG_(baseBlock)[VGOFF_(m_eax)];
+   sigctx.ecx        = VG_(baseBlock)[VGOFF_(m_ecx)];
+   sigctx.edx        = VG_(baseBlock)[VGOFF_(m_edx)];
+   sigctx.ebx        = VG_(baseBlock)[VGOFF_(m_ebx)];
+   sigctx.ebp        = VG_(baseBlock)[VGOFF_(m_ebp)];
+   sigctx.esp        = VG_(baseBlock)[VGOFF_(m_esp)];
+   sigctx.esi        = VG_(baseBlock)[VGOFF_(m_esi)];
+   sigctx.edi        = VG_(baseBlock)[VGOFF_(m_edi)];
+   sigctx.eflags     = VG_(baseBlock)[VGOFF_(m_eflags)];
+   sigctx.eip        = VG_(baseBlock)[VGOFF_(m_eip)];
+   sigctx.retaddr    = (UInt)(&VG_(signalreturn_bogusRA));
+   sigctx.sigNo      = sigNo;
+
+   esp = VG_(baseBlock)[VGOFF_(m_esp)];
+   vg_assert((sizeof(VgSigContext) & 0x3) == 0);
+
+   esp -= sizeof(VgSigContext);
+   for (i = 0; i < sizeof(VgSigContext)/4; i++)
+      ((UInt*)esp)[i] = ((UInt*)(&sigctx))[i];
+
+   /* Make sigNo and retaddr fields readable -- at 0(%ESP) and 4(%ESP) */
+   if (VG_(clo_instrument)) {
+      VGM_(make_readable) ( ((Addr)esp)+0 ,4 );
+      VGM_(make_readable) ( ((Addr)esp)+4 ,4 );
+   }
+
+   VG_(baseBlock)[VGOFF_(m_esp)] = esp;
+   VG_(baseBlock)[VGOFF_(m_eip)] = (Addr)VG_(sigpending)[sigNo];
+   /* 
+   VG_(printf)("pushed signal frame; %%ESP now = %p, next %%EBP = %p\n", 
+               esp, VG_(baseBlock)[VGOFF_(m_eip)]);
+   */
+}
+
+
+/* Clear the signal frame created by vg_push_signal_frame, restore the
+   simulated machine state, and return the signal number that the
+   frame was for. */
+static
+Int vg_pop_signal_frame ( void )
+{
+   UInt          esp;
+   Int           sigNo, i;
+   VgSigContext* sigctx;
+   /* esp is now pointing at the magicPI word on the stack, viz,
+      eight bytes above the bottom of the vg_sigcontext.
+   */
+   esp    = VG_(baseBlock)[VGOFF_(m_esp)];
+   sigctx = (VgSigContext*)(esp-4);
+
+   vg_assert(sigctx->magicPI == 0x31415927);
+   vg_assert(sigctx->magicE  == 0x27182818);
+   if (VG_(clo_trace_signals))
+      VG_(message)(Vg_DebugMsg, "vg_pop_signal_frame: valid magic");
+
+   /* restore machine state */
+   for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
+      VG_(baseBlock)[VGOFF_(m_fpustate) + i] = sigctx->fpustate[i];
+
+   /* Mark the sigctx structure as nonaccessible.  Has to happen
+      _before_ vg_m_state.m_esp is given a new value.*/
+   if (VG_(clo_instrument)) 
+      VGM_(handle_esp_assignment) ( sigctx->esp );
+
+   /* Restore machine state from the saved context. */
+   VG_(baseBlock)[VGOFF_(m_eax)]     = sigctx->eax;
+   VG_(baseBlock)[VGOFF_(m_ecx)]     = sigctx->ecx;
+   VG_(baseBlock)[VGOFF_(m_edx)]     = sigctx->edx;
+   VG_(baseBlock)[VGOFF_(m_ebx)]     = sigctx->ebx;
+   VG_(baseBlock)[VGOFF_(m_ebp)]     = sigctx->ebp;
+   VG_(baseBlock)[VGOFF_(m_esp)]     = sigctx->esp;
+   VG_(baseBlock)[VGOFF_(m_esi)]     = sigctx->esi;
+   VG_(baseBlock)[VGOFF_(m_edi)]     = sigctx->edi;
+   VG_(baseBlock)[VGOFF_(m_eflags)]  = sigctx->eflags;
+   VG_(baseBlock)[VGOFF_(m_eip)]     = sigctx->eip;
+   sigNo                             = sigctx->sigNo;
+   return sigNo;
+}
+
+
+/* A handler is returning.  Restore the machine state from the stacked
+   VgSigContext and continue with whatever was going on before the
+   handler ran.  */
+
+void VG_(signal_returns) ( void )
+{
+   Int            sigNo, ret;
+   vki_ksigset_t  block_procmask;
+   vki_ksigset_t  saved_procmask;
+
+   /* Block host signals ... */
+   VG_(ksigfillset)(&block_procmask);
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &block_procmask, &saved_procmask);
+   vg_assert(ret == 0);
+
+   sigNo = vg_pop_signal_frame();
+
+   /* You would have thought that the following assertion made sense
+      here:
+
+         vg_assert(vg_sigpending[sigNo] == VG_SIGRUNNING);
+
+      Alas, you would be wrong.  If a sigprocmask has been intercepted
+      and it unblocks this signal, then vg_sigpending[sigNo] will
+      either be VG_SIGIDLE, or (worse) another instance of it will
+      already have arrived, so that the stored value is that of the
+      handler.
+
+      Note that these anomalies can only occur when a signal handler
+      unblocks its own signal inside itself AND THEN RETURNS anyway
+      (which seems a bizarre thing to do).
+
+      Ho Hum.  This seems like a race condition which surely isn't
+      handled correctly.  */
+
+   vg_assert(sigNo >= 1 && sigNo < VKI_KNSIG);
+   VG_(sigpending)[sigNo] = VG_SIGIDLE;
+
+   /* Unlock and return. */
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &saved_procmask, NULL);
+   vg_assert(ret == 0);
+
+   /* The main dispatch loop now continues at vg_m_eip. */
+}
+
+
+/* Restore the default host behaviour of SIGABRT, and unblock it,
+   so we can exit the simulator cleanly by doing exit/abort/assert fail.
+*/
+void VG_(restore_SIGABRT) ( void )
+{
+   vki_ksigset_t   set;
+   vki_ksigaction  act;
+   act.ksa_flags   = VKI_SA_RESTART;
+   act.ksa_handler = VKI_SIG_DFL;
+   VG_(ksigemptyset)(&act.ksa_mask);
+
+   VG_(ksigemptyset)(&set);
+   VG_(ksigaddset)(&set,VKI_SIGABRT);
+
+   /* If this doesn't work, tough.  Don't check return code. */
+   VG_(ksigaction)(VKI_SIGABRT, &act, NULL);
+   VG_(ksigprocmask)(VKI_SIG_UNBLOCK, &set, NULL);   
+}
+
+
+/* Deliver all pending signals, by building stack frames for their
+   handlers. */
+void VG_(deliver_signals) ( void )
+{
+   vki_ksigset_t  block_procmask;
+   vki_ksigset_t  saved_procmask;
+   Int            ret, sigNo;
+   Bool           found;
+ 
+   /* A cheap check.  We don't need to have exclusive access
+      to the queue, because in the worst case, vg_oursignalhandler
+      will add signals, causing us to return, thinking there
+      are no signals to deliver, when in fact there are some.
+      A subsequent call here will handle the signal(s) we missed.
+   */
+   found = False;
+   for (sigNo = 1; sigNo < VKI_KNSIG; sigNo++)
+      if (VG_(sigpending)[sigNo] != VG_SIGIDLE &&
+          VG_(sigpending)[sigNo] != VG_SIGRUNNING) found = True;
+
+   if (!found) return;
+
+   /* Now we have to do it properly.  Get exclusive access by
+      blocking all the host's signals.  That means vg_oursignalhandler
+      can't run whilst we are messing with stuff.
+   */
+   VG_(ksigfillset)(&block_procmask);
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &block_procmask, &saved_procmask);
+   vg_assert(ret == 0);
+
+   for (sigNo = 1; sigNo < VKI_KNSIG; sigNo++) {
+      if (VG_(sigpending)[sigNo] == VG_SIGIDLE ||
+          VG_(sigpending)[sigNo] == VG_SIGRUNNING) continue;
+
+      if (VG_(clo_trace_signals))
+         VG_(message)(Vg_DebugMsg,"delivering signal %d", sigNo );
+
+      /* Create a signal delivery frame, and set the client's %ESP and
+         %EIP so that when execution continues, we will enter the
+         signal handler with the frame on top of the client's stack,
+         as it expects. */
+      vg_push_signal_frame ( sigNo );
+
+      /* Signify that the signal has been delivered. */
+      VG_(sigpending)[sigNo] = VG_SIGRUNNING;
+   }
+
+   /* Unlock and return. */
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &saved_procmask, NULL);
+   vg_assert(ret == 0);
+   return;
+}
+
+
+/* ----------- HACK ALERT ----------- */
+/* Note carefully that this runs with all host signals disabled! */
+static
+void vg_deliver_signal_immediately ( Int sigNo )
+{
+   Int   n_bbs_done;
+   Int   sigNo2;
+   Addr  next_orig_addr;
+   Addr  next_trans_addr;
+
+   if (VG_(clo_verbosity) > 0
+       && (True || VG_(clo_trace_signals)))
+      VG_(message)(Vg_DebugExtraMsg,
+         "deliver signal %d immediately: BEGIN", sigNo );
+   /* VG_(printf)("resumption addr is %p\n", 
+      VG_(baseBlock)[VGOFF_(m_eip)]); */
+
+   vg_push_signal_frame ( sigNo );
+   n_bbs_done = 0;
+
+   /* Single-step the client (ie, run the handler) until it jumps to
+      VG_(signalreturn_bogusRA) */
+
+   while (True) {
+
+      if (n_bbs_done >= VG_MAX_BBS_IN_IMMEDIATE_SIGNAL)
+         VG_(unimplemented)(
+            "handling signal whilst client blocked in syscall: "
+            "handler runs too long"
+         );
+
+      next_orig_addr = VG_(baseBlock)[VGOFF_(m_eip)];
+
+      if (next_orig_addr == (Addr)(&VG_(trap_here)))
+         VG_(unimplemented)(
+            "handling signal whilst client blocked in syscall: "
+            "handler calls malloc (et al)"
+         );
+
+      /* VG_(printf)("next orig addr = %p\n", next_orig_addr); */
+      if (next_orig_addr == (Addr)(&VG_(signalreturn_bogusRA)))
+         break;
+
+      next_trans_addr = VG_(search_transtab) ( next_orig_addr );
+      if (next_trans_addr == (Addr)NULL) {
+         VG_(create_translation_for) ( next_orig_addr );
+         next_trans_addr = VG_(search_transtab) ( next_orig_addr );
+      }
+
+      vg_assert(next_trans_addr != (Addr)NULL);
+      next_orig_addr = VG_(run_singleton_translation)(next_trans_addr);
+      VG_(baseBlock)[VGOFF_(m_eip)] = next_orig_addr;
+      n_bbs_done++;
+   }
+
+   sigNo2 = vg_pop_signal_frame();
+   vg_assert(sigNo2 == sigNo);
+
+   if (VG_(clo_verbosity) > 0
+       && (True || VG_(clo_trace_signals)))
+     VG_(message)(Vg_DebugExtraMsg,
+         "deliver signal %d immediately: END, %d bbs done", 
+         sigNo, n_bbs_done );
+
+   /* Invalidate the tt_fast cache.  We've been (potentially) adding
+      translations and even possibly doing LRUs without keeping it up
+      to date, so we'd better nuke it before going any further, to
+      avoid inconsistencies with the main TT/TC structure. */
+   VG_(invalidate_tt_fast)();
+}
+
+
+/* ----------- end of HACK ALERT ----------- */
+
+
+/* Receive a signal from the host, and either discard it or park it in
+   the queue of pending signals.  All other signals will be blocked
+   when this handler runs.  Runs with all host signals blocked, so as
+   to have mutual exclusion when adding stuff to the queue. */
+
+static void VG_(oursignalhandler) ( Int sigNo )
+{
+   Int           ret;
+   vki_ksigset_t block_procmask;
+   vki_ksigset_t saved_procmask;
+
+   if (VG_(clo_trace_signals)) {
+      VG_(start_msg)(Vg_DebugMsg);
+      VG_(add_to_msg)("signal %d arrived ... ", sigNo );
+   }
+   vg_assert(sigNo >= 1 && sigNo < VKI_KNSIG);
+
+   /* Sanity check.  Ensure we're really running on the signal stack
+      we asked for. */
+   if ( !(
+            ((Char*)(&(VG_(sigstack)[0])) <= (Char*)(&ret))
+            &&
+            ((Char*)(&ret) < (Char*)(&(VG_(sigstack)[10000])))
+         )
+        ) {
+     VG_(message)(Vg_DebugMsg, "FATAL: signal delivered on the wrong stack?!");
+     VG_(message)(Vg_DebugMsg, "A possible workaround follows.  Please tell me");
+     VG_(message)(Vg_DebugMsg, "(jseward@acm.org) if the suggested workaround doesn't help.");
+     VG_(unimplemented)
+        ("support for progs compiled with -p/-pg; rebuild your prog without -p/-pg");
+   }
+
+   vg_assert((Char*)(&(VG_(sigstack)[0])) <= (Char*)(&ret));
+   vg_assert((Char*)(&ret) < (Char*)(&(VG_(sigstack)[10000])));
+
+   if (sigNo == VKI_SIGABRT && VG_(sighandler)[sigNo] == NULL) {
+      /* We get here if SIGABRT is delivered and the client hasn't
+         asked to catch it.  The aim is to exit in a controlled
+         manner. */
+      if (VG_(clo_trace_signals)) {
+         VG_(add_to_msg)("catching SIGABRT");
+         VG_(end_msg)();
+      }
+      VG_(ksignal)(VKI_SIGABRT, VKI_SIG_DFL);
+      VG_(interrupt_reason) = VG_Y_EXIT;
+      VG_(longjmpd_on_signal) = VKI_SIGABRT;
+      __builtin_longjmp(VG_(toploop_jmpbuf),1);
+   }
+
+   /* Block all host signals. */
+   VG_(ksigfillset)(&block_procmask);
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &block_procmask, &saved_procmask);
+   vg_assert(ret == 0);
+
+   if (VG_(sighandler)[sigNo] == NULL) {
+      if (VG_(clo_trace_signals)) {
+         VG_(add_to_msg)("unexpected!");
+         VG_(end_msg)();
+      }
+      VG_(panic)("vg_oursignalhandler: unexpected signal");
+   }
+
+   /* Decide what to do with it. */
+   if (VG_(sigpending)[sigNo] == VG_SIGRUNNING) {
+       /* Already running; ignore it. */
+      if (VG_(clo_trace_signals)) {
+         VG_(add_to_msg)("already running; discarded" );
+         VG_(end_msg)();
+      }
+   }
+   else
+   if (VG_(sigpending)[sigNo] != VG_SIGRUNNING && 
+       VG_(sigpending)[sigNo] != VG_SIGIDLE) {
+      /* Not running and not idle == pending; ignore it. */
+      if (VG_(clo_trace_signals)) {
+         VG_(add_to_msg)("already pending; discarded" );
+         VG_(end_msg)();
+      }
+   } 
+   else {
+      /* Ok, we'd better deliver it to the client, one way or another. */
+      vg_assert(VG_(sigpending)[sigNo] == VG_SIGIDLE);
+
+      if (VG_(syscall_depth) == 0) {
+         /* The usual case; delivering a signal to the client, and the
+            client is not currently in a syscall.  Queue it up for
+            delivery at some point in the future. */
+         VG_(sigpending)[sigNo] = VG_(sighandler)[sigNo];
+         if (VG_(clo_trace_signals)) {
+            VG_(add_to_msg)("queued" );
+            VG_(end_msg)();
+         }
+      } else {
+         /* The nasty case, which was causing kmail to freeze up: the
+            client is (presumably blocked) in a syscall.  We have to
+            deliver the signal right now, because it may be that
+            running the sighandler is the only way that the syscall
+            will be able to return.  In which case, if we don't do
+            that, the client will deadlock. */
+         if (VG_(clo_trace_signals)) {
+            VG_(add_to_msg)("delivering immediately" );
+            VG_(end_msg)();
+         }
+         /* Note that this runs with all host signals blocked. */
+         VG_(sigpending)[sigNo] = VG_(sighandler)[sigNo];
+         vg_deliver_signal_immediately(sigNo);
+         VG_(sigpending)[sigNo] = VG_SIGIDLE;
+         /* VG_(printf)("resuming at %p\n", VG_(baseBlock)[VGOFF_(m_eip)]); */
+      }
+   }
+
+   /* We've finished messing with the queue, so re-enable host signals. */
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &saved_procmask, NULL);
+
+   vg_assert(ret == 0);
+   if (sigNo == VKI_SIGSEGV || sigNo == VKI_SIGBUS 
+       || sigNo == VKI_SIGFPE || sigNo == VKI_SIGILL) {
+      /* Can't continue; must longjmp and thus enter the sighandler
+         immediately. */
+      VG_(longjmpd_on_signal) = sigNo;
+      __builtin_longjmp(VG_(toploop_jmpbuf),1);
+   }
+}
+
+
+/* The outer insn loop calls here to reenable a host signal if
+   vg_oursighandler longjmp'd.
+*/
+void VG_(unblock_host_signal) ( Int sigNo )
+{
+   Int ret;
+   vki_ksigset_t set;
+   VG_(ksigemptyset)(&set);
+   ret = VG_(ksigaddset)(&set,sigNo);
+   vg_assert(ret == 0);
+   ret = VG_(ksigprocmask)(VKI_SIG_UNBLOCK,&set,NULL);
+   vg_assert(ret == 0);
+}
+
+
+static __attribute((unused))
+void pp_vg_ksigaction ( vki_ksigaction* sa )
+{
+   Int i;
+   VG_(printf)("vg_ksigaction: handler %p, flags 0x%x, restorer %p\n", 
+               sa->ksa_handler, sa->ksa_flags, sa->ksa_restorer);
+   VG_(printf)("vg_ksigaction: { ");
+   for (i = 1; i < VKI_KNSIG; i++)
+      if (VG_(ksigismember(&(sa->ksa_mask),i)))
+         VG_(printf)("%d ", i);
+   VG_(printf)("}\n");
+}
+
+
+/* Copy the process' real signal state to the sim state.  Whilst
+   doing this, block all real signals.
+*/
+void VG_(sigstartup_actions) ( void )
+{
+   Int i, ret;
+
+   vki_ksigset_t  block_procmask;
+   vki_ksigset_t  saved_procmask;
+   vki_kstack_t   altstack_info;
+   vki_ksigaction sa;
+
+   /*  VG_(printf)("SIGSTARTUP\n"); */
+   /* Block all signals.  
+      saved_procmask remembers the previous mask. */
+   VG_(ksigfillset)(&block_procmask);
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &block_procmask, &saved_procmask);
+   vg_assert(ret == 0);
+
+   /* Register an alternative stack for our own signal handler to run
+      on. */
+   altstack_info.ss_sp = &(VG_(sigstack)[0]);
+   altstack_info.ss_size = 10000 * sizeof(UInt);
+   altstack_info.ss_flags = 0;
+   ret = VG_(ksigaltstack)(&altstack_info, NULL);
+   if (ret != 0) {
+      VG_(panic)(
+         "vg_sigstartup_actions: couldn't install alternative sigstack");
+   }
+   if (VG_(clo_trace_signals)) {
+      VG_(message)(Vg_DebugExtraMsg, 
+         "vg_sigstartup_actions: sigstack installed ok");
+   }
+
+   /* Set initial state for the signal simulation. */
+   for (i = 1; i < VKI_KNSIG; i++)
+      VG_(sighandler[i]) = VG_(sigpending[i]) = NULL;
+
+   for (i = 1; i < VKI_KNSIG; i++) {
+
+      /* Get the old host action */
+      ret = VG_(ksigaction)(i, NULL, &sa);
+      vg_assert(ret == 0);
+
+      /* If there's already a handler set, record it, then route the
+         signal through to our handler. */
+      if (sa.ksa_handler != VKI_SIG_IGN && sa.ksa_handler != VKI_SIG_DFL) {
+         if (VG_(clo_trace_signals))
+            VG_(printf)("snaffling handler 0x%x for signal %d\n", 
+                        (Addr)(sa.ksa_handler), i );
+         if ((sa.ksa_flags & VKI_SA_ONSTACK) != 0)
+            VG_(unimplemented)
+               ("signals on an alternative stack (SA_ONSTACK)");
+         VG_(sighandler[i]) = sa.ksa_handler;
+         sa.ksa_handler = &VG_(oursignalhandler);
+         ret = VG_(ksigaction)(i, &sa, NULL);
+         vg_assert(ret == 0);
+      }
+   }
+
+   VG_(ksignal)(VKI_SIGABRT, &VG_(oursignalhandler));
+
+   /* Finally, restore the blocking mask. */
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &saved_procmask, NULL);
+   vg_assert(ret == 0);   
+}
+
+
+/* Copy the process' sim signal state to the real state,
+   for when we transfer from the simulated to real CPU.
+   PROBLEM: what if we're running a signal handler when we
+   get here?  Hmm.
+   I guess we wind up in vg_signalreturn_bogusRA, *or* the
+   handler has done/will do a longjmp, in which case we're ok.
+
+   It is important (see vg_startup.S) that this proc does not
+   change the state of the real FPU, since it is called when
+   running the program on the real CPU.
+*/
+void VG_(sigshutdown_actions) ( void )
+{
+   Int i, ret;
+
+   vki_ksigset_t  block_procmask;
+   vki_ksigset_t  saved_procmask;
+   vki_ksigaction sa;
+
+   /* Block all signals. */
+   VG_(ksigfillset)(&block_procmask);
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &block_procmask, &saved_procmask);
+   vg_assert(ret == 0);
+
+   /* copy the sim signal actions to the real ones. */
+   for (i = 1; i < VKI_KNSIG; i++) {
+      if (i == VKI_SIGKILL || i == VKI_SIGSTOP) continue;
+      if (VG_(sighandler)[i] == NULL) continue;
+      ret = VG_(ksigaction)(i, NULL, &sa);
+      vg_assert(ret == 0);
+      sa.ksa_handler = VG_(sighandler)[i];
+      ret = VG_(ksigaction)(i, &sa, NULL);      
+   }
+
+   /* Finally, copy the simulated process mask to the real one. */
+   ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, &saved_procmask, NULL);
+   vg_assert(ret == 0);
+}
+
+
+/* ---------------------------------------------------------------------
+   Handle signal-related syscalls from the simulatee.
+   ------------------------------------------------------------------ */
+
+/* Do more error checking? */
+void VG_(do__NR_sigaction) ( void )
+{
+   UInt res;
+   void* our_old_handler;
+   vki_ksigaction* new_action;
+   vki_ksigaction* old_action;
+   UInt param1
+      = VG_(baseBlock)[VGOFF_(m_ebx)]; /* int sigNo */
+   UInt param2 
+      = VG_(baseBlock)[VGOFF_(m_ecx)]; /* k_sigaction* new_action */
+   UInt param3 
+      = VG_(baseBlock)[VGOFF_(m_edx)]; /* k_sigaction* old_action */
+   new_action  = (vki_ksigaction*)param2;
+   old_action  = (vki_ksigaction*)param3;
+
+   if (VG_(clo_trace_signals))
+      VG_(message)(Vg_DebugExtraMsg, 
+         "__NR_sigaction: sigNo %d, "
+         "new 0x%x, old 0x%x, new flags 0x%x",
+         param1,(UInt)new_action,(UInt)old_action,
+         (UInt)(new_action ? new_action->ksa_flags : 0) );
+   /* VG_(ppSigProcMask)(); */
+
+   if (param1 < 1 || param1 >= VKI_KNSIG) goto bad;
+
+   our_old_handler = VG_(sighandler)[param1];
+   /* VG_(printf)("old handler = 0x%x\n", our_old_handler); */
+   /* If a new handler has been specified, mess with its handler. */
+   if (new_action) {
+      if (new_action->ksa_handler == VKI_SIG_IGN ||
+          new_action->ksa_handler == VKI_SIG_DFL) {
+         VG_(sighandler)[param1] = NULL; 
+         VG_(sigpending)[param1] = NULL;
+         /* Dangerous!  Could lose signals like this. */
+      } else {
+         /* VG_(printf)("new handler = 0x%x\n", new_action->ksa_handler); */
+         /* The client isn't allowed to use an alternative signal
+            stack.  We, however, must. */
+         if ((new_action->ksa_flags & VKI_SA_ONSTACK) != 0)
+            VG_(unimplemented)
+               ("signals on an alternative stack (SA_ONSTACK)");
+         new_action->ksa_flags |= VKI_SA_ONSTACK;
+         VG_(sighandler)[param1] = new_action->ksa_handler;
+         new_action->ksa_handler = &VG_(oursignalhandler);
+      }
+   }
+
+   KERNEL_DO_SYSCALL(res);
+   /* VG_(printf)("RES = %d\n", res); */
+   /* If the client asks for the old handler, maintain our fiction
+      by stuffing in the handler it thought it asked for ... */
+   if (old_action) {
+      if (old_action->ksa_handler == VKI_SIG_IGN ||
+          old_action->ksa_handler == VKI_SIG_DFL) {
+         /* No old action; we should have a NULL handler. */
+         vg_assert(our_old_handler == NULL);
+      } else {
+         /* There's a handler. */
+         if (param1 != VKI_SIGKILL && param1 != VKI_SIGABRT) {
+            vg_assert(old_action->ksa_handler == &VG_(oursignalhandler));
+	    vg_assert((old_action->ksa_flags & VKI_SA_ONSTACK) != 0);
+         }
+         old_action->ksa_handler = our_old_handler;
+         /* Since the client is not allowed to ask for an alternative
+            sig stack, unset the bit for anything we pass back to
+            it. */
+         old_action->ksa_flags &= ~VKI_SA_ONSTACK;
+      }
+   }
+
+   VG_(ksignal)(VKI_SIGABRT, &VG_(oursignalhandler));
+   goto good;
+
+  good:
+   VG_(baseBlock)[VGOFF_(m_eax)] = (UInt)0;
+   return;
+
+  bad:
+   VG_(message)(Vg_UserMsg,
+                "Warning: bad signal number %d in __NR_sigaction.", 
+                param1);
+   VG_(baseBlock)[VGOFF_(m_eax)] = (UInt)(-1);
+   return;
+}
+
+
+/* The kernel handles sigprocmask in the usual way, but we also need
+   to inspect it, so as to spot requests to unblock signals.  We then
+   inspect vg_sigpending, which records the current state of signal
+   delivery to the client.  The problematic case is when a signal is
+   delivered to the client, in which case the relevant vg_sigpending
+   slot is set to VG_SIGRUNNING.  This inhibits further signal
+   deliveries.  This mechanism implements the POSIX requirement that a
+   signal is blocked in its own handler.
+
+   If the handler returns normally, the slot is changed back to
+   VG_SIGIDLE, so that further instances of the signal can be
+   delivered.  The problem occurs when the handler never returns, but
+   longjmps.  POSIX mandates that you then have to do an explicit
+   setprocmask to re-enable the signal.  That is what we try and spot
+   here.  Although the call is passed to the kernel, we also need to
+   spot unblocked signals whose state is VG_SIGRUNNING, and change it
+   back to VG_SIGIDLE.  
+*/
+void VG_(do__NR_sigprocmask) ( Int how, vki_ksigset_t* set )
+{
+   Int i;
+   if (VG_(clo_trace_signals))
+      VG_(message)(Vg_DebugMsg, 
+                   "vg_do__NR_sigprocmask: how = %d (%s), set = %p", 
+                   how,
+                   how==VKI_SIG_BLOCK ? "SIG_BLOCK" : (
+                      how==VKI_SIG_UNBLOCK ? "SIG_UNBLOCK" : (
+                      how==VKI_SIG_SETMASK ? "SIG_SETMASK" : "???")),
+                   set
+                  );
+
+   /* Sometimes this happens.  I don't know what it signifies. */
+   if (set == NULL) 
+      return;
+
+   /* Not interested in blocking of signals. */
+   if (how == VKI_SIG_BLOCK) 
+      return;
+
+   /* Detect and ignore unknown action. */
+   if (how != VKI_SIG_UNBLOCK && how != VKI_SIG_SETMASK) {
+      VG_(message)(Vg_DebugMsg, 
+                  "sigprocmask: unknown `how' field %d", how);
+      return;
+   }
+
+   for (i = 1; i < VKI_KNSIG; i++) {
+      Bool unblock_me = False;
+      if (how == VKI_SIG_SETMASK) {
+         if (!VG_(ksigismember)(set,i))
+            unblock_me = True;
+      } else { /* how == SIG_UNBLOCK */
+         if (VG_(ksigismember)(set,i))
+            unblock_me = True;
+      }
+      if (unblock_me && VG_(sigpending)[i] == VG_SIGRUNNING) {
+         VG_(sigpending)[i] = VG_SIGIDLE;
+	 if (VG_(clo_verbosity) > 1)
+            VG_(message)(Vg_UserMsg, 
+                         "Warning: unblocking signal %d "
+                         "due to sigprocmask", i );
+      }
+   }
+}
+
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                             vg_signals.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_startup.S b/vg_startup.S
new file mode 100644
index 000000000..3fa965cc8
--- /dev/null
+++ b/vg_startup.S
@@ -0,0 +1,221 @@
+
+##--------------------------------------------------------------------##
+##--- Startup and shutdown code for Valgrind.                      ---##
+##---                                                 vg_startup.S ---##
+##--------------------------------------------------------------------##
+
+/*
+  This file is part of Valgrind, an x86 protected-mode emulator 
+  designed for debugging and profiling binaries on x86-Unixes.
+
+  Copyright (C) 2000-2002 Julian Seward 
+     jseward@acm.org
+     Julian_Seward@muraroa.demon.co.uk
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_constants.h"
+
+
+#---------------------------------------------------------------------
+#
+# Startup and shutdown code for Valgrind.  Particularly hairy.
+#
+# The dynamic linker, ld.so, will run the contents of the .init
+# section, once it has located, mmap-d and and linked the shared
+# libraries needed by the program.  Valgrind is itself a shared
+# library.  ld.so then runs code in the .init sections of each
+# library in turn, in order to give them a chance to initialise
+# themselves.  We hijack this mechanism.  Our startup routine
+# does return -- and execution continues -- except on the
+# synthetic CPU, not the real one.  But ld.so, and the program
+# it is starting, cant tell the difference.
+#
+# The management apologise for the lack of apostrophes in these
+# comments.  GNU as seems to object to them, for some reason.
+
+
+.section .init
+	call VG_(startup)
+.section .fini
+	call VG_(shutdown)
+.section .text
+	
+
+
+VG_(startup):
+        # Record %esp as it was when we got here.  This is because argv/c
+	# and envp[] are passed as args to this function, and we need to see
+	# envp so we can get at the env var VG_ARGS without help from libc.
+	# The stack layout at this point depends on the version of glibc in
+	# use.  See process_cmd_line_options() in vg_main.c for details.
+        movl    %esp, VG_(esp_at_startup)
+        
+	# We have control!  Save the state of the machine in
+	# the simulators state, and switch stacks.
+	# Except ... we cant copy the machines registers into their
+	# final places in vg_baseBlock, because the offsets to them
+	# have not yet been set up.  Instead, they are copied to a
+	# temporary place (m_state_static).  In vg_main.c, once the
+	# baseBlock offsets are set up, values are copied into baseBlock.
+	movl	%eax, VG_(m_state_static)+0
+	movl	%ecx, VG_(m_state_static)+4
+	movl	%edx, VG_(m_state_static)+8
+	movl	%ebx, VG_(m_state_static)+12
+	movl	%esp, VG_(m_state_static)+16
+	movl	%ebp, VG_(m_state_static)+20
+	movl	%esi, VG_(m_state_static)+24
+	movl	%edi, VG_(m_state_static)+28
+	pushfl
+	popl	%eax
+	movl	%eax, VG_(m_state_static)+32
+	fwait
+	fnsave	VG_(m_state_static)+40
+	frstor	VG_(m_state_static)+40
+
+	# keep the first and last 10 words free to check for overruns	
+	movl	$VG_(stack)+39996 -40, %esp
+
+	# Now some real magic.  We need this procedure to return,
+	# since thats what ld.so expects, but running on the
+	# simulator.  So vg_main starts the simulator running at
+	# the insn labelled first_insn_to_simulate.
+
+	movl	$first_insn_to_simulate, VG_(m_state_static)+36
+	jmp	VG_(main)
+first_insn_to_simulate:
+	# Nothing else to do -- just return in the "normal" way.
+	ret
+
+
+
+.global VG_(shutdown)	
+VG_(shutdown):
+	# ld.so will call here after execution of the program proper
+	# is complete, to allow libraries to close down cleanly.
+	# Note that we will enter here on the synthetic CPU, not
+	# the real one!  So the interpreter must notice when this
+	# procedure is called, and use that as its cue to switch
+	# back to the real CPU.  That means the code placed here is
+	# utterly irrelevant, since it will never get run, but I
+	# place a RET here anyway, since it is the traditional way
+	# to return from a subroutine :-)
+	ret
+
+
+
+.global	VG_(switch_to_real_CPU)
+VG_(switch_to_real_CPU):
+	# Once Valgrind has decided it needs to exit, either
+	# because it has detected a call to vg_shutdown, or
+	# because the specified number of insns have been completed
+	# during a debugging run, it jumps here, which copies the
+	# simulators state into the real machine state.  Execution
+	# of the rest of the program continues on the real CPU,
+	# and there is no way for the simulator to regain control
+	# after this point.
+	frstor	VG_(m_state_static)+40
+	movl	VG_(m_state_static)+32, %eax
+	pushl	%eax
+	popfl
+	movl	VG_(m_state_static)+0, %eax
+	movl	VG_(m_state_static)+4, %ecx
+	movl	VG_(m_state_static)+8, %edx
+	movl	VG_(m_state_static)+12, %ebx
+	movl	VG_(m_state_static)+16, %esp
+	movl	VG_(m_state_static)+20, %ebp
+	movl	VG_(m_state_static)+24, %esi
+	movl	VG_(m_state_static)+28, %edi
+
+	pushal
+	pushfl
+	# We hope that vg_sigshutdown_actions does not alter
+	# the FPU state.
+	call	 VG_(sigshutdown_actions)
+	popfl
+	popal
+	# re-restore the FPU state anyway ...
+	frstor	VG_(m_state_static)+40	
+	jmp	*VG_(m_state_static)+36
+
+
+
+/*------------------------------------------------------------*/
+/*--- A function to temporarily copy %ESP/%EBP into        ---*/
+/*--- %esp/%ebp and then start up GDB.                     ---*/
+/*------------------------------------------------------------*/
+
+/*--- This is clearly not re-entrant! ---*/
+.data
+vg_ebp_saved_over_GDB_start:
+	.word	0
+vg_esp_saved_over_GDB_start:
+	.word	0
+.text
+	
+.global VG_(swizzle_esp_then_start_GDB)	
+VG_(swizzle_esp_then_start_GDB):
+	pushal
+
+	# remember the simulators current stack/frame pointers
+	movl	%ebp, vg_ebp_saved_over_GDB_start
+	movl	%esp, vg_esp_saved_over_GDB_start
+	
+	movl	$VG_(baseBlock), %ebx
+
+	# fetch %ESP into %esp
+	movl	VGOFF_(m_esp), %esi
+	movl	(%ebx, %esi, 4), %esp
+
+	### %esp now refers to clients stack
+	### mess with the clients stack to make it look as if it
+	### called this procedure, since otherwise it will look to gdb
+	### as if the top (currently executing) stack frame of the
+	### client is missing.
+	
+	# push %EIP, via %eax.  This is a faked-up return address.
+	movl	VGOFF_(m_eip), %esi
+	movl	(%ebx, %esi, 4), %eax
+	pushl	%eax
+
+	# push %EBP, via %eax.  This is a faked %ebp-chain pointer.
+	movl	VGOFF_(m_ebp), %esi
+	movl	(%ebx, %esi, 4), %eax
+	pushl	%eax
+
+	movl	%esp, %ebp
+	
+	call	VG_(start_GDB_whilst_on_client_stack)
+
+	# restore the simulators stack/frame pointer
+	movl	vg_ebp_saved_over_GDB_start, %ebp
+	movl	vg_esp_saved_over_GDB_start, %esp
+	
+	popal
+	ret
+
+# gcc puts this construction at the end of every function.  I think it
+# allows the linker to figure out the size of the function.  So we do
+# the same, in the vague hope that it might help GDBs navigation.
+.Lend_of_swizzle:
+	.size	VG_(swizzle_esp_then_start_GDB), .Lend_of_swizzle-VG_(swizzle_esp_then_start_GDB)
+
+##--------------------------------------------------------------------##
+##--- end                                             vg_startup.S ---##
+##--------------------------------------------------------------------##
diff --git a/vg_symtab2.c b/vg_symtab2.c
new file mode 100644
index 000000000..cfb6a58bc
--- /dev/null
+++ b/vg_symtab2.c
@@ -0,0 +1,1435 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Management of symbols and debugging information.             ---*/
+/*---                                                 vg_symtab2.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+#include "vg_unsafe.h"
+
+#include <elf.h>          /* ELF defns                      */
+#include <a.out.h>        /* stabs defns                    */
+
+/* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
+   dlopen()ed libraries, which is something that KDE3 does a lot.
+   Still kludgey, though less than before:
+
+   * we don't check whether we should throw away some symbol tables 
+     when munmap() happens
+
+   * symbol table reading code for ELF binaries is a shambles.  
+     Use GHC's fptools/ghc/rts/Linker.c as the basis for something better.
+*/
+
+/*------------------------------------------------------------*/
+/*--- Structs n stuff                                      ---*/
+/*------------------------------------------------------------*/
+
+/* A structure to hold an ELF symbol (very crudely). */
+typedef 
+   struct { 
+      Addr addr;   /* lowest address of entity */
+      UInt size;   /* size in bytes */
+      Int  nmoff;  /* offset of name in this SegInfo's str tab */
+   }
+   RiSym;
+
+
+/* A structure to hold addr-to-source info for a single line. */
+typedef
+   struct {
+      Addr   addr;   /* lowest address for this line */
+      Int    fnmoff; /* source filename; offset in this SegInfo's str tab */
+      UShort lineno; /* source line number, or zero */
+      UShort size;   /* size in bytes; we go to a bit of trouble to
+                        catch overflows of this */
+   }
+   RiLoc;
+
+
+/* A structure which contains information pertaining to one mapped
+   text segment. */
+typedef
+   struct _SegInfo {
+      struct _SegInfo* next;
+      /* Description of the mapped segment. */
+      Addr   start;
+      UInt   size;
+      UChar* filename; /* in mallocville */
+      UInt   foffset;
+      /* An expandable array of symbols. */
+      RiSym* symtab;
+      UInt   symtab_used;
+      UInt   symtab_size;
+      /* An expandable array of locations. */
+      RiLoc* loctab;
+      UInt   loctab_used;
+      UInt   loctab_size;
+      /* An expandable array of characters -- the string table. */
+      Char*  strtab;
+      UInt   strtab_used;
+      UInt   strtab_size;
+      /* offset    is what we need to add to symbol table entries
+                   to get the real location of that symbol in memory.
+                   For executables, offset is zero.  
+                   For .so's, offset == base_addr.
+                   This seems like a giant kludge to me.
+      */
+      UInt   offset;
+   } 
+   SegInfo;
+
+
+/* -- debug helper -- */
+static void ppSegInfo ( SegInfo* si )
+{
+   VG_(printf)("name: %s\n"
+               "start %p, size %d, foffset %d\n",
+               si->filename?si->filename : (UChar*)"NULL",
+               si->start, si->size, si->foffset );
+}
+
+static void freeSegInfo ( SegInfo* si )
+{
+   vg_assert(si != NULL);
+   if (si->filename) VG_(free)(VG_AR_SYMTAB, si->filename);
+   if (si->symtab) VG_(free)(VG_AR_SYMTAB, si->symtab);
+   if (si->loctab) VG_(free)(VG_AR_SYMTAB, si->loctab);
+   if (si->strtab) VG_(free)(VG_AR_SYMTAB, si->strtab);
+   VG_(free)(VG_AR_SYMTAB, si);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Adding stuff                                         ---*/
+/*------------------------------------------------------------*/
+
+/* Add a str to the string table, including terminating zero, and
+   return offset of the string in vg_strtab. */
+
+static __inline__
+Int addStr ( SegInfo* si, Char* str )
+{
+   Char* new_tab;
+   Int   new_sz, i, space_needed;
+   
+   space_needed = 1 + VG_(strlen)(str);
+   if (si->strtab_used + space_needed > si->strtab_size) {
+      new_sz = 2 * si->strtab_size;
+      if (new_sz == 0) new_sz = 5000;
+      new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz);
+      if (si->strtab != NULL) {
+         for (i = 0; i < si->strtab_used; i++)
+            new_tab[i] = si->strtab[i];
+         VG_(free)(VG_AR_SYMTAB, si->strtab);
+      }
+      si->strtab      = new_tab;
+      si->strtab_size = new_sz;
+   }
+
+   for (i = 0; i < space_needed; i++)
+      si->strtab[si->strtab_used+i] = str[i];
+
+   si->strtab_used += space_needed;
+   vg_assert(si->strtab_used <= si->strtab_size);
+   return si->strtab_used - space_needed;
+}
+
+/* Add a symbol to the symbol table. */
+
+static __inline__
+void addSym ( SegInfo* si, RiSym* sym )
+{
+   Int    new_sz, i;
+   RiSym* new_tab;
+
+   /* Ignore zero-sized syms. */
+   if (sym->size == 0) return;
+
+   if (si->symtab_used == si->symtab_size) {
+      new_sz = 2 * si->symtab_size;
+      if (new_sz == 0) new_sz = 500;
+      new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiSym) );
+      if (si->symtab != NULL) {
+         for (i = 0; i < si->symtab_used; i++)
+            new_tab[i] = si->symtab[i];
+         VG_(free)(VG_AR_SYMTAB, si->symtab);
+      }
+      si->symtab = new_tab;
+      si->symtab_size = new_sz;
+   }
+
+   si->symtab[si->symtab_used] = *sym;
+   si->symtab_used++;
+   vg_assert(si->symtab_used <= si->symtab_size);
+}
+
+/* Add a location to the location table. */
+
+static __inline__
+void addLoc ( SegInfo* si, RiLoc* loc )
+{
+   Int    new_sz, i;
+   RiLoc* new_tab;
+
+   /* Ignore zero-sized locs. */
+   if (loc->size == 0) return;
+
+   if (si->loctab_used == si->loctab_size) {
+      new_sz = 2 * si->loctab_size;
+      if (new_sz == 0) new_sz = 500;
+      new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiLoc) );
+      if (si->loctab != NULL) {
+         for (i = 0; i < si->loctab_used; i++)
+            new_tab[i] = si->loctab[i];
+         VG_(free)(VG_AR_SYMTAB, si->loctab);
+      }
+      si->loctab = new_tab;
+      si->loctab_size = new_sz;
+   }
+
+   si->loctab[si->loctab_used] = *loc;
+   si->loctab_used++;
+   vg_assert(si->loctab_used <= si->loctab_size);
+}
+
+
+
+/*------------------------------------------------------------*/
+/*--- Helpers                                              ---*/
+/*------------------------------------------------------------*/
+
+/* Non-fatal -- use vg_panic if terminal. */
+static 
+void vg_symerr ( Char* msg )
+{
+   if (VG_(clo_verbosity) > 1)
+      VG_(message)(Vg_UserMsg,"%s", msg );
+}
+
+
+/* Print a symbol. */
+static
+void printSym ( SegInfo* si, Int i )
+{
+  VG_(printf)( "%5d:  %8p .. %8p (%d)      %s\n",
+               i,
+               si->symtab[i].addr, 
+               si->symtab[i].addr + si->symtab[i].size - 1, si->symtab[i].size,
+                &si->strtab[si->symtab[i].nmoff] );
+}
+
+
+#if 0
+/* Print the entire sym tab. */
+static __attribute__ ((unused))
+void printSymtab ( void )
+{
+   Int i;
+   VG_(printf)("\n------ BEGIN vg_symtab ------\n");
+   for (i = 0; i < vg_symtab_used; i++)
+      printSym(i);
+   VG_(printf)("------ BEGIN vg_symtab ------\n");
+}
+#endif
+
+#if 0
+/* Paranoid strcat. */
+static
+void safeCopy ( UChar* dst, UInt maxlen, UChar* src )
+{
+   UInt i = 0, j = 0;
+   while (True) {
+      if (i >= maxlen) return;
+      if (dst[i] == 0) break;
+      i++;
+   }
+   while (True) {
+      if (i >= maxlen) return;
+      dst[i] = src[j];
+      if (src[j] == 0) return;
+      i++; j++;
+   }
+}
+#endif
+
+/*------------------------------------------------------------*/
+/*--- Canonicalisers                                       ---*/
+/*------------------------------------------------------------*/
+
+/* Sort the symtab by starting address, and emit warnings if any
+   symbols have overlapping address ranges.  We use that old chestnut,
+   shellsort.  Mash the table around so as to establish the property
+   that addresses are in order and the ranges to not overlap.  This
+   facilitates using binary search to map addresses to symbols when we
+   come to query the table.
+*/
+static 
+void canonicaliseSymtab ( SegInfo* si )
+{
+   /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
+   Int   incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
+                      4592, 13776, 33936, 86961, 198768, 
+                      463792, 1391376 };
+   Int   lo = 0;
+   Int   hi = si->symtab_used-1;
+   Int   i, j, h, bigN, hp, n_merged, n_truncated;
+   RiSym v;
+   Addr  s1, s2, e1, e2;
+
+#  define SWAP(ty,aa,bb) \
+      do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0)
+
+   bigN = hi - lo + 1; if (bigN < 2) return;
+   hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
+   vg_assert(0 <= hp && hp < 16);
+
+   for (; hp >= 0; hp--) {
+      h = incs[hp];
+      i = lo + h;
+      while (1) {
+         if (i > hi) break;
+         v = si->symtab[i];
+         j = i;
+         while (si->symtab[j-h].addr > v.addr) {
+            si->symtab[j] = si->symtab[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         si->symtab[j] = v;
+         i++;
+      }
+   }
+
+  cleanup_more:
+ 
+   /* If two symbols have identical address ranges, favour the
+      one with the longer name. 
+   */
+   do {
+      n_merged = 0;
+      j = si->symtab_used;
+      si->symtab_used = 0;
+      for (i = 0; i < j; i++) {
+         if (i < j-1
+             && si->symtab[i].addr   == si->symtab[i+1].addr
+             && si->symtab[i].size   == si->symtab[i+1].size) {
+            n_merged++;
+            /* merge the two into one */
+            if (VG_(strlen)(&si->strtab[si->symtab[i].nmoff]) 
+                > VG_(strlen)(&si->strtab[si->symtab[i+1].nmoff])) {
+               si->symtab[si->symtab_used++] = si->symtab[i];
+            } else {
+               si->symtab[si->symtab_used++] = si->symtab[i+1];
+            }
+            i++;
+         } else {
+            si->symtab[si->symtab_used++] = si->symtab[i];
+         }
+      }
+      if (VG_(clo_trace_symtab))
+         VG_(printf)( "%d merged\n", n_merged);
+   }
+   while (n_merged > 0);
+
+   /* Detect and "fix" overlapping address ranges. */
+   n_truncated = 0;
+
+   for (i = 0; i < si->symtab_used-1; i++) {
+
+      vg_assert(si->symtab[i].addr <= si->symtab[i+1].addr);
+
+      /* Check for common (no overlap) case. */ 
+      if (si->symtab[i].addr + si->symtab[i].size 
+          <= si->symtab[i+1].addr)
+         continue;
+
+      /* There's an overlap.  Truncate one or the other. */
+      if (VG_(clo_trace_symtab)) {
+         VG_(printf)("overlapping address ranges in symbol table\n\t");
+         printSym(si,i);
+         VG_(printf)("\t");
+         printSym(si,i+1);
+         VG_(printf)("\n");
+      }
+
+      /* Truncate one or the other. */
+      s1 = si->symtab[i].addr;
+      s2 = si->symtab[i+1].addr;
+      e1 = s1 + si->symtab[i].size - 1;
+      e2 = s2 + si->symtab[i+1].size - 1;
+      if (s1 < s2) {
+         e1 = s2-1;
+      } else {
+         vg_assert(s1 == s2);
+         if (e1 > e2) { 
+            s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2); 
+         } else 
+         if (e1 < e2) {
+            s2 = e1+1;
+         } else {
+	   /* e1 == e2.  Identical addr ranges.  We'll eventually wind
+              up back at cleanup_more, which will take care of it. */
+	 }
+      }
+      si->symtab[i].addr   = s1;
+      si->symtab[i+1].addr = s2;
+      si->symtab[i].size   = e1 - s1 + 1;
+      si->symtab[i+1].size = e2 - s2 + 1;
+      vg_assert(s1 <= s2);
+      vg_assert(si->symtab[i].size > 0);
+      vg_assert(si->symtab[i+1].size > 0);
+      /* It may be that the i+1 entry now needs to be moved further
+         along to maintain the address order requirement. */
+      j = i+1;
+      while (j < si->symtab_used-1 
+             && si->symtab[j].addr > si->symtab[j+1].addr) {
+         SWAP(RiSym,si->symtab[j],si->symtab[j+1]);
+         j++;
+      }
+      n_truncated++;
+   }
+
+   if (n_truncated > 0) goto cleanup_more;
+
+   /* Ensure relevant postconditions hold. */
+   for (i = 0; i < si->symtab_used-1; i++) {
+      /* No zero-sized symbols. */
+      vg_assert(si->symtab[i].size > 0);
+      /* In order. */
+      vg_assert(si->symtab[i].addr < si->symtab[i+1].addr);
+      /* No overlaps. */
+      vg_assert(si->symtab[i].addr + si->symtab[i].size - 1
+                < si->symtab[i+1].addr);
+   }
+#  undef SWAP
+}
+
+
+
+/* Sort the location table by starting address.  Mash the table around
+   so as to establish the property that addresses are in order and the
+   ranges do not overlap.  This facilitates using binary search to map
+   addresses to locations when we come to query the table.  */
+static 
+void canonicaliseLoctab ( SegInfo* si )
+{
+   /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
+   Int   incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
+                      4592, 13776, 33936, 86961, 198768, 
+                      463792, 1391376 };
+   Int   lo = 0;
+   Int   hi = si->loctab_used-1;
+   Int   i, j, h, bigN, hp;
+   RiLoc v;
+
+#  define SWAP(ty,aa,bb) \
+      do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0);
+
+   /* Sort by start address. */
+
+   bigN = hi - lo + 1; if (bigN < 2) return;
+   hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
+   vg_assert(0 <= hp && hp < 16);
+
+   for (; hp >= 0; hp--) {
+      h = incs[hp];
+      i = lo + h;
+      while (1) {
+         if (i > hi) break;
+         v = si->loctab[i];
+         j = i;
+         while (si->loctab[j-h].addr > v.addr) {
+            si->loctab[j] = si->loctab[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         si->loctab[j] = v;
+         i++;
+      }
+   }
+
+   /* If two adjacent entries overlap, truncate the first. */
+   for (i = 0; i < si->loctab_used-1; i++) {
+      vg_assert(si->loctab[i].size < 10000);
+      if (si->loctab[i].addr + si->loctab[i].size > si->loctab[i+1].addr) {
+         /* Do this in signed int32 because the actual .size fields
+            are unsigned 16s. */
+         Int new_size = si->loctab[i+1].addr - si->loctab[i].addr;
+         if (new_size < 0) {
+            si->loctab[i].size = 0;
+         } else
+         if (new_size >= 65536) {
+           si->loctab[i].size = 65535;
+         } else {
+           si->loctab[i].size = (UShort)new_size;
+         }
+      }
+   }
+
+   /* Zap any zero-sized entries resulting from the truncation
+      process. */
+   j = 0;
+   for (i = 0; i < si->loctab_used; i++) {
+      if (si->loctab[i].size > 0) {
+         si->loctab[j] = si->loctab[i];
+         j++;
+      }
+   }
+   si->loctab_used = j;
+
+   /* Ensure relevant postconditions hold. */
+   for (i = 0; i < si->loctab_used-1; i++) {
+      /* 
+      VG_(printf)("%d   (%d) %d 0x%x\n", 
+                   i, si->loctab[i+1].confident, 
+                   si->loctab[i+1].size, si->loctab[i+1].addr );
+      */
+      /* No zero-sized symbols. */
+      vg_assert(si->loctab[i].size > 0);
+      /* In order. */
+      vg_assert(si->loctab[i].addr < si->loctab[i+1].addr);
+      /* No overlaps. */
+      vg_assert(si->loctab[i].addr + si->loctab[i].size - 1
+                < si->loctab[i+1].addr);
+   }
+#  undef SWAP
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Read info from a .so/exe file.                       ---*/
+/*------------------------------------------------------------*/
+
+static __inline__
+void addLineInfo ( SegInfo* si,
+                   Int      fnmoff,
+                   Addr     start,
+                   Addr     end,
+                   UInt     lineno )
+{
+   RiLoc loc;
+   UInt size = end - start + 1;
+#  if 0
+   if (size > 10000)
+   VG_(printf)( "line %4d: %p .. %p, in %s\n",
+                lineno, start, end, 
+                &si->strtab[fnmoff] );
+#  endif
+   /* Sanity ... */
+   if (size > 10000) return;
+
+   if (start >= si->start+si->size 
+       || end < si->start) return;
+
+   loc.addr      = start;
+   loc.size      = (UShort)size;
+   loc.lineno    = lineno;
+   loc.fnmoff    = fnmoff;
+   addLoc ( si, &loc );
+}
+
+
+/* Read the symbols from the object/exe specified by the SegInfo into
+   the tables within the supplied SegInfo.  */
+static
+void vg_read_lib_symbols ( SegInfo* si )
+{
+   Elf32_Ehdr*   ehdr;       /* The ELF header                          */
+   Elf32_Shdr*   shdr;       /* The section table                       */
+   UChar*        sh_strtab;  /* The section table's string table        */
+   struct nlist* stab;       /* The .stab table                         */
+   UChar*        stabstr;    /* The .stab string table                  */
+   Int           stab_sz;    /* Size in bytes of the .stab table        */
+   Int           stabstr_sz; /* Size in bytes of the .stab string table */
+   Int           fd;
+   Int           i;
+   Bool          ok;
+   Addr          oimage;
+   Int           n_oimage;
+   struct stat   stat_buf;
+
+   /* for the .stabs reader */
+   Int    curr_filenmoff;
+   Addr   curr_fnbaseaddr;
+   Addr   range_startAddr;
+   Int    range_lineno;
+
+   oimage = (Addr)NULL;
+   if (VG_(clo_verbosity) > 1)
+      VG_(message)(Vg_UserMsg, "Reading syms from %s", 
+                               si->filename );
+
+   /* mmap the object image aboard, so that we can read symbols and
+      line number info out of it.  It will be munmapped immediately
+      thereafter; it is only aboard transiently. */
+
+   i = stat(si->filename, &stat_buf);
+   if (i != 0) {
+      vg_symerr("Can't stat .so/.exe (to determine its size)?!");
+      return;
+   }
+   n_oimage = stat_buf.st_size;
+
+   fd = VG_(open_read)(si->filename);
+   if (fd == -1) {
+      vg_symerr("Can't open .so/.exe to read symbols?!");
+      return;
+   }
+
+   oimage = (Addr)VG_(mmap)( NULL, n_oimage, PROT_READ, MAP_PRIVATE, fd, 0 );
+   if (oimage == ((Addr)(-1))) {
+      VG_(message)(Vg_UserMsg,
+                   "mmap failed on %s", si->filename );
+      VG_(close)(fd);
+      return;
+   }
+
+   VG_(close)(fd);
+
+   /* Ok, the object image is safely in oimage[0 .. n_oimage-1]. 
+      Now verify that it is a valid ELF .so or executable image.
+   */
+   ok = (n_oimage >= sizeof(Elf32_Ehdr));
+   ehdr = (Elf32_Ehdr*)oimage;
+
+   if (ok) {
+      ok &= (ehdr->e_ident[EI_MAG0] == 0x7F
+             && ehdr->e_ident[EI_MAG1] == 'E'
+             && ehdr->e_ident[EI_MAG2] == 'L'
+             && ehdr->e_ident[EI_MAG3] == 'F');
+      ok &= (ehdr->e_ident[EI_CLASS] == ELFCLASS32
+             && ehdr->e_ident[EI_DATA] == ELFDATA2LSB
+             && ehdr->e_ident[EI_VERSION] == EV_CURRENT);
+      ok &= (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN);
+      ok &= (ehdr->e_machine == EM_386);
+      ok &= (ehdr->e_version == EV_CURRENT);
+      ok &= (ehdr->e_shstrndx != SHN_UNDEF);
+      ok &= (ehdr->e_shoff != 0 && ehdr->e_shnum != 0);
+   }
+
+   if (!ok) {
+      vg_symerr("Invalid ELF header, or missing stringtab/sectiontab.");
+      VG_(munmap) ( (void*)oimage, n_oimage );
+      return;
+   }
+
+   if (VG_(clo_trace_symtab))
+      VG_(printf)( 
+          "shoff = %d,  shnum = %d,  size = %d,  n_vg_oimage = %d\n",
+          ehdr->e_shoff, ehdr->e_shnum, sizeof(Elf32_Shdr), n_oimage );
+
+   if (ehdr->e_shoff + ehdr->e_shnum*sizeof(Elf32_Shdr) > n_oimage) {
+      vg_symerr("ELF section header is beyond image end?!");
+      VG_(munmap) ( (void*)oimage, n_oimage );
+      return;
+   }
+
+   shdr = (Elf32_Shdr*)(oimage + ehdr->e_shoff);
+   sh_strtab = (UChar*)(oimage + shdr[ehdr->e_shstrndx].sh_offset);
+
+   /* try and read the object's symbol table */
+   {
+      UChar*     o_strtab    = NULL;
+      Elf32_Sym* o_symtab    = NULL;
+      UInt       o_strtab_sz = 0;
+      UInt       o_symtab_sz = 0;
+
+      UChar*     o_got = NULL;
+      UChar*     o_plt = NULL;
+      UInt       o_got_sz = 0;
+      UInt       o_plt_sz = 0;
+
+      Bool       snaffle_it;
+      Addr       sym_addr;
+
+      /* find the .stabstr and .stab sections */
+      for (i = 0; i < ehdr->e_shnum; i++) {
+         if (0 == VG_(strcmp)(".symtab",sh_strtab + shdr[i].sh_name)) {
+            o_symtab    = (Elf32_Sym*)(oimage + shdr[i].sh_offset);
+            o_symtab_sz = shdr[i].sh_size;
+            vg_assert((o_symtab_sz % sizeof(Elf32_Sym)) == 0);
+            /* check image overrun here */
+         }
+         if (0 == VG_(strcmp)(".strtab",sh_strtab + shdr[i].sh_name)) {
+            o_strtab    = (UChar*)(oimage + shdr[i].sh_offset);
+            o_strtab_sz = shdr[i].sh_size;
+            /* check image overrun here */
+         }
+
+         /* find out where the .got and .plt sections will be in the
+            executable image, not in the object image transiently loaded.
+         */
+         if (0 == VG_(strcmp)(".got",sh_strtab + shdr[i].sh_name)) {
+            o_got    = (UChar*)(si->offset
+                                + shdr[i].sh_offset);
+            o_got_sz = shdr[i].sh_size;
+            /* check image overrun here */
+         }
+         if (0 == VG_(strcmp)(".plt",sh_strtab + shdr[i].sh_name)) {
+            o_plt    = (UChar*)(si->offset
+                                + shdr[i].sh_offset);
+            o_plt_sz = shdr[i].sh_size;
+            /* check image overrun here */
+         }
+
+      }
+
+      if (VG_(clo_trace_symtab)) {
+         if (o_plt) VG_(printf)( "PLT: %p .. %p\n",
+                                 o_plt, o_plt + o_plt_sz - 1 );
+         if (o_got) VG_(printf)( "GOT: %p .. %p\n",
+                                 o_got, o_got + o_got_sz - 1 );
+      }
+
+      if (o_strtab == NULL || o_symtab == NULL) {
+         vg_symerr("   object doesn't have a symbol table");
+      } else {
+         /* Perhaps should start at i = 1; ELF docs suggest that entry
+            0 always denotes `unknown symbol'. */
+         for (i = 1; i < o_symtab_sz/sizeof(Elf32_Sym); i++){
+#           if 0
+            VG_(printf)("raw symbol: ");
+            switch (ELF32_ST_BIND(o_symtab[i].st_info)) {
+               case STB_LOCAL:  VG_(printf)("LOC "); break;
+               case STB_GLOBAL: VG_(printf)("GLO "); break;
+               case STB_WEAK:   VG_(printf)("WEA "); break;
+               case STB_LOPROC: VG_(printf)("lop "); break;
+               case STB_HIPROC: VG_(printf)("hip "); break;
+               default:         VG_(printf)("??? "); break;
+            }
+            switch (ELF32_ST_TYPE(o_symtab[i].st_info)) {
+               case STT_NOTYPE:  VG_(printf)("NOT "); break;
+               case STT_OBJECT:  VG_(printf)("OBJ "); break;
+               case STT_FUNC:    VG_(printf)("FUN "); break;
+               case STT_SECTION: VG_(printf)("SEC "); break;
+               case STT_FILE:    VG_(printf)("FIL "); break;
+               case STT_LOPROC:  VG_(printf)("lop "); break;
+               case STT_HIPROC:  VG_(printf)("hip "); break;
+               default:          VG_(printf)("??? "); break;
+            }
+            VG_(printf)(
+                ": value %p, size %d, name %s\n",
+                si->offset+(UChar*)o_symtab[i].st_value,
+                o_symtab[i].st_size,
+                o_symtab[i].st_name 
+                   ? ((Char*)o_strtab+o_symtab[i].st_name) 
+                   : (Char*)"NONAME");                
+#           endif
+
+            /* Figure out if we're interested in the symbol.
+               Firstly, is it of the right flavour? 
+            */
+            snaffle_it
+               =  ( (ELF32_ST_BIND(o_symtab[i].st_info) == STB_GLOBAL ||
+                     ELF32_ST_BIND(o_symtab[i].st_info) == STB_LOCAL /* ||
+		     ELF32_ST_BIND(o_symtab[i].st_info) == STB_WEAK */)
+                    &&
+                    (ELF32_ST_TYPE(o_symtab[i].st_info) == STT_FUNC /*||
+                     ELF32_ST_TYPE(o_symtab[i].st_info) == STT_OBJECT*/)
+                  );
+
+            /* Secondly, if it's apparently in a GOT or PLT, it's really
+               a reference to a symbol defined elsewhere, so ignore it. 
+            */
+            sym_addr = si->offset
+                       + (UInt)o_symtab[i].st_value;
+            if (o_got != NULL
+                && sym_addr >= (Addr)o_got 
+                && sym_addr < (Addr)(o_got+o_got_sz)) {
+               snaffle_it = False;
+               if (VG_(clo_trace_symtab)) {
+	          VG_(printf)( "in GOT: %s\n", 
+                               o_strtab+o_symtab[i].st_name);
+               }
+            }
+            if (o_plt != NULL
+                && sym_addr >= (Addr)o_plt 
+                && sym_addr < (Addr)(o_plt+o_plt_sz)) {
+               snaffle_it = False;
+               if (VG_(clo_trace_symtab)) {
+	          VG_(printf)( "in PLT: %s\n", 
+                               o_strtab+o_symtab[i].st_name);
+               }
+            }
+
+            /* Don't bother if nameless, or zero-sized. */
+            if (snaffle_it
+                && (o_symtab[i].st_name == (Elf32_Word)NULL
+                    || /* VG_(strlen)(o_strtab+o_symtab[i].st_name) == 0 */
+                       /* equivalent but cheaper ... */
+                       * ((UChar*)(o_strtab+o_symtab[i].st_name)) == 0
+                    || o_symtab[i].st_size == 0)) {
+               snaffle_it = False;
+               if (VG_(clo_trace_symtab)) {
+	          VG_(printf)( "size=0: %s\n", 
+                               o_strtab+o_symtab[i].st_name);
+               }
+            }
+
+#           if 0
+            /* Avoid _dl_ junk.  (Why?) */
+            /* 01-02-24: disabled until I find out if it really helps. */
+            if (snaffle_it
+                && (VG_(strncmp)("_dl_", o_strtab+o_symtab[i].st_name, 4) == 0
+                    || VG_(strncmp)("_r_debug", 
+                                   o_strtab+o_symtab[i].st_name, 8) == 0)) {
+               snaffle_it = False;
+               if (VG_(clo_trace_symtab)) {
+                  VG_(printf)( "_dl_ junk: %s\n", 
+                               o_strtab+o_symtab[i].st_name);
+               }
+            }
+#           endif
+
+            /* This seems to significantly reduce the number of junk
+               symbols, and particularly reduces the number of
+               overlapping address ranges.  Don't ask me why ... */
+	    if (snaffle_it && (Int)o_symtab[i].st_value == 0) {
+               snaffle_it = False;
+               if (VG_(clo_trace_symtab)) {
+                  VG_(printf)( "valu=0: %s\n", 
+                               o_strtab+o_symtab[i].st_name);
+               }
+            }
+
+	    /* If no part of the symbol falls within the mapped range,
+               ignore it. */
+            if (sym_addr+o_symtab[i].st_size <= si->start
+                || sym_addr >= si->start+si->size) {
+               snaffle_it = False;
+	    }
+
+            if (snaffle_it) {
+               /* it's an interesting symbol; record ("snaffle") it. */
+               RiSym sym;
+               Char* t0 = o_symtab[i].st_name 
+                             ? (Char*)(o_strtab+o_symtab[i].st_name) 
+                             : (Char*)"NONAME";
+               Int nmoff = addStr ( si, t0 );
+               vg_assert(nmoff >= 0 
+                         /* && 0==VG_(strcmp)(t0,&vg_strtab[nmoff]) */ );
+               vg_assert( (Int)o_symtab[i].st_value >= 0);
+               /* VG_(printf)("%p + %d:   %s\n", si->addr, 
+                              (Int)o_symtab[i].st_value, t0 ); */
+               sym.addr  = sym_addr;
+               sym.size  = o_symtab[i].st_size;
+               sym.nmoff = nmoff;
+               addSym ( si, &sym );
+	    }
+         }
+      }
+   }
+
+   /* Reading of the "stabs" debug format information, if any. */
+   stabstr    = NULL;
+   stab       = NULL;
+   stabstr_sz = 0;
+   stab_sz    = 0;
+   /* find the .stabstr and .stab sections */
+   for (i = 0; i < ehdr->e_shnum; i++) {
+      if (0 == VG_(strcmp)(".stab",sh_strtab + shdr[i].sh_name)) {
+         stab = (struct nlist *)(oimage + shdr[i].sh_offset);
+         stab_sz = shdr[i].sh_size;
+      }
+      if (0 == VG_(strcmp)(".stabstr",sh_strtab + shdr[i].sh_name)) {
+         stabstr = (UChar*)(oimage + shdr[i].sh_offset);
+         stabstr_sz = shdr[i].sh_size;
+      }
+   }
+
+   if (stab == NULL || stabstr == NULL) {
+      vg_symerr("   object doesn't have any debug info");
+      VG_(munmap) ( (void*)oimage, n_oimage );
+      return;
+   }
+
+   if ( stab_sz + (UChar*)stab > n_oimage + (UChar*)oimage
+        || stabstr_sz + (UChar*)stabstr 
+           > n_oimage + (UChar*)oimage ) {
+      vg_symerr("   ELF debug data is beyond image end?!");
+      VG_(munmap) ( (void*)oimage, n_oimage );
+      return;
+   }
+
+   /* Ok.  It all looks plausible.  Go on and read debug data. 
+         stab kinds: 100   N_SO     a source file name
+                      68   N_SLINE  a source line number
+                      36   N_FUN ?  start of a function
+
+      In this loop, we maintain a current file name, updated
+      as N_SOs appear, and a current function base address,
+      updated as N_FUNs appear.  Based on that, address ranges
+      for N_SLINEs are calculated, and stuffed into the 
+      line info table.
+
+      N_SLINE indicates the start of a source line.  Functions are
+      delimited by N_FUNS, at the start with a non-empty string and at
+      the end with an empty string.  The latter facilitates detecting
+      where to close the last N_SLINE for a function. 
+   */
+   curr_filenmoff  = addStr(si,"???");
+   curr_fnbaseaddr = (Addr)NULL;
+   range_startAddr = 0;
+   range_lineno    = 0;
+
+   for (i = 0; i < stab_sz/(int)sizeof(struct nlist); i++) {
+#     if 0
+      VG_(printf) ( "   %2d  ", i );
+      VG_(printf) ( "type=0x%x   othr=%d   desc=%d   value=0x%x   strx=%d  %s",
+                    stab[i].n_type, stab[i].n_other, stab[i].n_desc, 
+                    (int)stab[i].n_value,
+                    (int)stab[i].n_un.n_strx, 
+                    stabstr + stab[i].n_un.n_strx );
+      VG_(printf)("\n");
+#     endif
+
+      switch (stab[i].n_type) {
+
+         case 68: { /* N_SLINE */
+            /* flush the current line, if any, and start a new one */
+            Addr range_endAddr 
+               = curr_fnbaseaddr 
+                    + (UInt)stab[i].n_value - 1;
+            if (range_startAddr != 0) {
+               addLineInfo ( si,
+                             curr_filenmoff,
+                             range_startAddr,
+                             range_endAddr,
+                             range_lineno );
+            }
+            range_startAddr = range_endAddr + 1;
+            range_lineno = stab[i].n_desc;              
+            break;
+         }
+
+         case 36: { /* N_FUN */
+            if ('\0' == * (stabstr + stab[i].n_un.n_strx) ) {
+               /* N_FUN with no name -- indicates the end of a fn.
+                  Flush the current line, if any, but don't start a
+                  new one. */
+               Addr range_endAddr 
+                  = curr_fnbaseaddr 
+                       + (UInt)stab[i].n_value - 1;
+               if (range_startAddr != 0) {
+                  addLineInfo ( si,
+                                curr_filenmoff,
+                                range_startAddr,
+                                range_endAddr,
+                                range_lineno );
+               }
+               range_startAddr = 0;
+            } else {
+               /* N_FUN with a name -- indicates the start of a fn.  */
+               curr_fnbaseaddr = si->offset
+                                 + (Addr)stab[i].n_value;
+               range_startAddr = curr_fnbaseaddr;
+            }
+            break;
+         }
+
+         case 100: /* N_SO */
+         case 132: /* N_SOL */
+         /* seems to give lots of locations in header files */
+         /* case 130: */ /* BINCL */
+         { 
+            UChar* nm = stabstr + stab[i].n_un.n_strx;
+            UInt len = VG_(strlen)(nm);
+            if (len > 0 && nm[len-1] != '/')
+               curr_filenmoff = addStr ( si, nm );
+            else
+               if (len == 0)
+                  curr_filenmoff = addStr ( si, "?1\0" );
+            break;
+         }
+
+#        if 0
+         case 162: /* EINCL */
+            curr_filenmoff = addStr ( si, "?2\0" );
+            break;
+#        endif
+
+         default:
+            break;
+      }
+   } /* for (i = 0; i < stab_sz/(int)sizeof(struct nlist); i++) */
+
+   /* Last, but not least, heave the oimage back overboard. */
+   VG_(munmap) ( (void*)oimage, n_oimage );
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Main entry point for symbols table reading.          ---*/
+/*------------------------------------------------------------*/
+
+/* The root structure for the entire symbol table system.  It is a
+   linked list of SegInfos.  Note that this entire mechanism assumes
+   that what we read from /proc/self/maps doesn't contain overlapping
+   address ranges, and as a result the SegInfos in this list describe
+   disjoint address ranges. 
+*/
+static SegInfo* segInfo = NULL;
+
+
+static
+void read_symtab_callback ( 
+        Addr start, UInt size, 
+        Char rr, Char ww, Char xx, 
+        UInt foffset, UChar* filename )
+{
+   SegInfo* si;
+
+   /* Stay sane ... */
+   if (size == 0)
+      return;
+
+   /* We're only interested in collecting symbols in executable
+      segments which are associated with a real file.  Hence: */
+   if (filename == NULL || xx != 'x')
+      return;
+   if (0 == VG_(strcmp)(filename, "/dev/zero"))
+      return;
+
+   /* Perhaps we already have this one?  If so, skip. */
+   for (si = segInfo; si != NULL; si = si->next) {
+      /*
+      if (0==VG_(strcmp)(si->filename, filename)) 
+         VG_(printf)("same fnames: %c%c%c (%p, %d) (%p, %d) %s\n", 
+                     rr,ww,xx,si->start,si->size,start,size,filename);
+      */
+      /* For some reason the observed size of a mapping can change, so
+         we don't use that to determine uniqueness. */
+      if (si->start == start
+          /* && si->size == size */
+          && 0==VG_(strcmp)(si->filename, filename)) {
+         return;
+      }
+   }
+
+   /* Get the record initialised right. */
+   si = VG_(malloc)(VG_AR_SYMTAB, sizeof(SegInfo));
+   si->next = segInfo;
+   segInfo = si;
+
+   si->start    = start;
+   si->size     = size;
+   si->foffset  = foffset;
+   si->filename = VG_(malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(filename));
+   VG_(strcpy)(si->filename, filename);
+
+   si->symtab = NULL;
+   si->symtab_size = si->symtab_used = 0;
+   si->loctab = NULL;
+   si->loctab_size = si->loctab_used = 0;
+   si->strtab = NULL;
+   si->strtab_size = si->strtab_used = 0;
+
+   /* Kludge ... */
+   si->offset 
+      = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
+
+   /* And actually fill it up. */
+   vg_read_lib_symbols ( si );
+   canonicaliseSymtab ( si );
+   canonicaliseLoctab ( si );
+}
+
+
+/* This one really is the Head Honcho.  Update the symbol tables to
+   reflect the current state of /proc/self/maps.  Rather than re-read
+   everything, just read the entries which are not already in segInfo.
+   So we can call here repeatedly, after every mmap of a non-anonymous
+   segment with execute permissions, for example, to pick up new
+   libraries as they are dlopen'd.  Conversely, when the client does
+   munmap(), vg_symtab_notify_munmap() throws away any symbol tables
+   which happen to correspond to the munmap()d area.  */
+void VG_(read_symbols) ( void )
+{
+   if (! VG_(clo_instrument)) 
+     return;
+
+   VG_(read_procselfmaps) ( read_symtab_callback );
+
+   /* Do a sanity check on the symbol tables: ensure that the address
+      space pieces they cover do not overlap (otherwise we are severely
+      hosed).  This is a quadratic algorithm, but there shouldn't be
+      many of them.  
+   */
+   { SegInfo *si, *si2;
+     for (si = segInfo; si != NULL; si = si->next) {
+        /* Check no overlap between *si and those in the rest of the
+           list. */
+        for (si2 = si->next; si2 != NULL; si2 = si2->next) {
+           Addr lo = si->start;
+           Addr hi = si->start + si->size - 1;
+           Addr lo2 = si2->start;
+           Addr hi2 = si2->start + si2->size - 1;
+           Bool overlap;
+           vg_assert(lo < hi);
+	   vg_assert(lo2 < hi2);
+           /* the main assertion */
+           overlap = (lo <= lo2 && lo2 <= hi)
+                      || (lo <= hi2 && hi2 <= hi);
+           //vg_assert(!overlap);
+	   if (overlap) {
+              VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" );
+              ppSegInfo ( si );
+              ppSegInfo ( si2 );
+              VG_(printf)("\n\n"); 
+              vg_assert(! overlap);
+	   }
+        }
+     }
+   }    
+}
+
+
+/* When an munmap() call happens, check to see whether it corresponds
+   to a segment for a .so, and if so discard the relevant SegInfo.
+   This might not be a very clever idea from the point of view of
+   accuracy of error messages, but we need to do it in order to
+   maintain the no-overlapping invariant.  
+*/
+void VG_(symtab_notify_munmap) ( Addr start, UInt length )
+{
+   SegInfo *prev, *curr;
+
+   if (! VG_(clo_instrument)) 
+     return;
+
+   prev = NULL;
+   curr = segInfo;
+   while (True) {
+      if (curr == NULL) break;
+      if (start == curr->start) break;
+      prev = curr;
+      curr = curr->next;
+   }
+   if (curr == NULL) return;
+
+   VG_(message)(Vg_UserMsg, 
+                "discard syms in %s due to munmap()", 
+                curr->filename ? curr->filename : (UChar*)"???");
+
+   vg_assert(prev == NULL || prev->next == curr);
+
+   if (prev == NULL) {
+      segInfo = curr->next;
+   } else {
+      prev->next = curr->next;
+   }
+
+   freeSegInfo(curr);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Use of symbol table & location info to create        ---*/
+/*--- plausible-looking stack dumps.                       ---*/
+/*------------------------------------------------------------*/
+
+/* Find a symbol-table index containing the specified pointer, or -1
+   if not found.  Binary search.  */
+
+static Int search_one_symtab ( SegInfo* si, Addr ptr )
+{
+   Addr a_mid_lo, a_mid_hi;
+   Int  mid, 
+        lo = 0, 
+        hi = si->symtab_used-1;
+   while (True) {
+      /* current unsearched space is from lo to hi, inclusive. */
+      if (lo > hi) return -1; /* not found */
+      mid      = (lo + hi) / 2;
+      a_mid_lo = si->symtab[mid].addr;
+      a_mid_hi = ((Addr)si->symtab[mid].addr) + si->symtab[mid].size - 1;
+
+      if (ptr < a_mid_lo) { hi = mid-1; continue; } 
+      if (ptr > a_mid_hi) { lo = mid+1; continue; }
+      vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
+      return mid;
+   }
+}
+
+
+/* Search all symtabs that we know about to locate ptr.  If found, set
+   *psi to the relevant SegInfo, and *symno to the symtab entry number
+   within that.  If not found, *psi is set to NULL.  */
+
+static void search_all_symtabs ( Addr ptr, SegInfo** psi, Int* symno )
+{
+   Int      sno;
+   SegInfo* si;
+   for (si = segInfo; si != NULL; si = si->next) {
+      if (si->start <= ptr && ptr < si->start+si->size) {
+         sno = search_one_symtab ( si, ptr );
+         if (sno == -1) goto not_found;
+         *symno = sno;
+         *psi = si;
+         return;
+      }
+   }
+  not_found:
+   *psi = NULL;
+}
+
+
+/* Find a location-table index containing the specified pointer, or -1
+   if not found.  Binary search.  */
+
+static Int search_one_loctab ( SegInfo* si, Addr ptr )
+{
+   Addr a_mid_lo, a_mid_hi;
+   Int  mid, 
+        lo = 0, 
+        hi = si->loctab_used-1;
+   while (True) {
+      /* current unsearched space is from lo to hi, inclusive. */
+      if (lo > hi) return -1; /* not found */
+      mid      = (lo + hi) / 2;
+      a_mid_lo = si->loctab[mid].addr;
+      a_mid_hi = ((Addr)si->loctab[mid].addr) + si->loctab[mid].size - 1;
+
+      if (ptr < a_mid_lo) { hi = mid-1; continue; } 
+      if (ptr > a_mid_hi) { lo = mid+1; continue; }
+      vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
+      return mid;
+   }
+}
+
+
+/* Search all loctabs that we know about to locate ptr.  If found, set
+   *psi to the relevant SegInfo, and *locno to the loctab entry number
+   within that.  If not found, *psi is set to NULL.
+*/
+static void search_all_loctabs ( Addr ptr, SegInfo** psi, Int* locno )
+{
+   Int      lno;
+   SegInfo* si;
+   for (si = segInfo; si != NULL; si = si->next) {
+      if (si->start <= ptr && ptr < si->start+si->size) {
+         lno = search_one_loctab ( si, ptr );
+         if (lno == -1) goto not_found;
+         *locno = lno;
+         *psi = si;
+         return;
+      }
+   }
+  not_found:
+   *psi = NULL;
+}
+
+
+/* The whole point of this whole big deal: map a code address to a
+   plausible symbol name.  Returns False if no idea; otherwise True.
+   Caller supplies buf and nbuf.  If no_demangle is True, don't do
+   demangling, regardless of vg_clo_demangle -- probably because the
+   call has come from vg_what_fn_or_object_is_this. */
+static
+Bool vg_what_fn_is_this ( Bool no_demangle, Addr a, 
+                          Char* buf, Int nbuf )
+{
+   SegInfo* si;
+   Int      sno;
+   search_all_symtabs ( a, &si, &sno );
+   if (si == NULL) 
+      return False;
+   if (no_demangle) {
+      VG_(strncpy_safely) 
+         ( buf, & si->strtab[si->symtab[sno].nmoff], nbuf );
+   } else {
+      VG_(demangle) ( & si->strtab[si->symtab[sno].nmoff], buf, nbuf );
+   }
+   return True;
+}
+
+
+/* Map a code address to the name of a shared object file.  Returns
+   False if no idea; otherwise False.  Caller supplies buf and
+   nbuf. */
+static
+Bool vg_what_object_is_this ( Addr a, Char* buf, Int nbuf )
+{
+   SegInfo* si;
+   for (si = segInfo; si != NULL; si = si->next) {
+      if (si->start <= a && a < si->start+si->size) {
+         VG_(strncpy_safely)(buf, si->filename, nbuf);
+         return True;
+      }
+   }
+   return False;
+}
+
+/* Return the name of an erring fn in a way which is useful
+   for comparing against the contents of a suppressions file. 
+   Always writes something to buf.  Also, doesn't demangle the
+   name, because we want to refer to mangled names in the 
+   suppressions file.
+*/
+void VG_(what_obj_and_fun_is_this) ( Addr a,
+                                     Char* obj_buf, Int n_obj_buf,
+                                     Char* fun_buf, Int n_fun_buf )
+{
+   (void)vg_what_object_is_this ( a, obj_buf, n_obj_buf );
+   (void)vg_what_fn_is_this ( True, a, fun_buf, n_fun_buf );
+}
+
+
+/* Map a code address to a (filename, line number) pair.  
+   Returns True if successful.
+*/
+static
+Bool vg_what_line_is_this ( Addr a, 
+                            UChar* filename, Int n_filename, 
+                            UInt* lineno )
+{
+   SegInfo* si;
+   Int      locno;
+   search_all_loctabs ( a, &si, &locno );
+   if (si == NULL) 
+      return False;
+   VG_(strncpy_safely)(filename, & si->strtab[si->loctab[locno].fnmoff], 
+                       n_filename);
+   *lineno = si->loctab[locno].lineno;
+   return True;
+}
+
+
+/* Print a mini stack dump, showing the current location. */
+void VG_(mini_stack_dump) ( ExeContext* ec )
+{
+
+#define APPEND(str)                                              \
+   { UChar* sss;                                                 \
+     for (sss = str; n < M_VG_ERRTXT-1 && *sss != 0; n++,sss++)  \
+        buf[n] = *sss;                                           \
+     buf[n] = 0;                                                 \
+   }
+
+   Bool   know_fnname;
+   Bool   know_objname;
+   Bool   know_srcloc;
+   UInt   lineno; 
+   UChar  ibuf[20];
+   UInt   i, n, clueless;
+
+   UChar  buf[M_VG_ERRTXT];
+   UChar  buf_fn[M_VG_ERRTXT];
+   UChar  buf_obj[M_VG_ERRTXT];
+   UChar  buf_srcloc[M_VG_ERRTXT];
+
+   Int stop_at = VG_(clo_backtrace_size);
+
+   n = 0;
+
+   know_fnname  = vg_what_fn_is_this(False,ec->eips[0], buf_fn, M_VG_ERRTXT);
+   know_objname = vg_what_object_is_this(ec->eips[0], buf_obj, M_VG_ERRTXT);
+   know_srcloc  = vg_what_line_is_this(ec->eips[0], 
+                                       buf_srcloc, M_VG_ERRTXT, 
+                                       &lineno);
+
+   APPEND("   at ");
+   VG_(sprintf)(ibuf,"0x%x: ", ec->eips[0]);
+   APPEND(ibuf);
+   if (know_fnname) { 
+      APPEND(buf_fn);
+      if (!know_srcloc && know_objname) {
+         APPEND(" (in ");
+         APPEND(buf_obj);
+         APPEND(")");
+      }
+   } else if (know_objname && !know_srcloc) {
+      APPEND("(within ");
+      APPEND(buf_obj);
+      APPEND(")");
+   } else {
+      APPEND("???");
+   }
+   if (know_srcloc) {
+      APPEND(" (");
+      APPEND(buf_srcloc);
+      APPEND(":");
+      VG_(sprintf)(ibuf,"%d",lineno);
+      APPEND(ibuf);
+      APPEND(")");
+   }
+   VG_(message)(Vg_UserMsg, "%s", buf);
+
+   clueless = 0;
+   for (i = 1; i < stop_at; i++) {
+      know_fnname  = vg_what_fn_is_this(False,ec->eips[i], buf_fn, M_VG_ERRTXT);
+      know_objname = vg_what_object_is_this(ec->eips[i],buf_obj, M_VG_ERRTXT);
+      know_srcloc  = vg_what_line_is_this(ec->eips[i], 
+                                          buf_srcloc, M_VG_ERRTXT, 
+                                          &lineno);
+      n = 0;
+      APPEND("   by ");
+      if (ec->eips[i] == 0) {
+         APPEND("<bogus frame pointer> ");
+      } else {
+         VG_(sprintf)(ibuf,"0x%x: ",ec->eips[i]);
+         APPEND(ibuf);
+      }
+      if (know_fnname) { 
+         APPEND(buf_fn) 
+         if (!know_srcloc && know_objname) {
+            APPEND(" (in ");
+            APPEND(buf_obj);
+            APPEND(")");
+         }
+      } else {
+         if (know_objname && !know_srcloc) {
+            APPEND("(within ");
+            APPEND(buf_obj);
+            APPEND(")"); 
+         } else {
+            APPEND("???");
+         }
+         if (!know_srcloc) clueless++;
+         if (clueless == 2)
+            i = stop_at; /* force exit after this iteration */
+      };
+      if (know_srcloc) {
+         APPEND(" (");
+         APPEND(buf_srcloc);
+         APPEND(":");
+         VG_(sprintf)(ibuf,"%d",lineno);
+         APPEND(ibuf);
+         APPEND(")");
+      }
+      VG_(message)(Vg_UserMsg, "%s", buf);
+   }   
+}
+
+#undef APPEND
+
+/*--------------------------------------------------------------------*/
+/*--- end                                             vg_symtab2.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_syscall.S b/vg_syscall.S
new file mode 100644
index 000000000..210328a69
--- /dev/null
+++ b/vg_syscall.S
@@ -0,0 +1,179 @@
+
+##--------------------------------------------------------------------##
+##--- Support for doing system calls.                              ---##
+##---                                                 vg_syscall.S ---##
+##--------------------------------------------------------------------##
+
+/*
+  This file is part of Valgrind, an x86 protected-mode emulator 
+  designed for debugging and profiling binaries on x86-Unixes.
+
+  Copyright (C) 2000-2002 Julian Seward 
+     jseward@acm.org
+     Julian_Seward@muraroa.demon.co.uk
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_constants.h"
+
+
+.globl	VG_(do_syscall)
+
+# NOTE that this routine expects the simulated machines state
+# to be in m_state_static.  Therefore it needs to be wrapped by
+# code which copies from baseBlock before the call, into
+# m_state_static, and back afterwards.
+	
+VG_(do_syscall):
+	cmpl	$2, VG_(syscall_depth)
+	jz	do_syscall_DEPTH_2
+
+	# depth 1 copy follows ...
+	# Save all the int registers of the real machines state on the
+	# simulators stack.
+	pushal
+
+	# and save the real FPU state too
+	fwait
+	fnsave	VG_(real_fpu_state_saved_over_syscall_d1)
+	frstor	VG_(real_fpu_state_saved_over_syscall_d1)
+
+	# remember what the simulators stack pointer is
+	movl	%esp, VG_(esp_saved_over_syscall_d1)
+	
+	# Now copy the simulated machines state into the real one
+	# esp still refers to the simulators stack
+	frstor	VG_(m_state_static)+40
+	movl	VG_(m_state_static)+32, %eax
+	pushl	%eax
+	popfl
+	movl	VG_(m_state_static)+0, %eax
+	movl	VG_(m_state_static)+4, %ecx
+	movl	VG_(m_state_static)+8, %edx
+	movl	VG_(m_state_static)+12, %ebx
+	movl	VG_(m_state_static)+16, %esp
+	movl	VG_(m_state_static)+20, %ebp
+	movl	VG_(m_state_static)+24, %esi
+	movl	VG_(m_state_static)+28, %edi
+
+	# esp now refers to the simulatees stack
+	# Do the actual system call
+	int	$0x80
+
+	# restore stack as soon as possible
+	# esp refers to simulatees stack
+	movl	%esp, VG_(m_state_static)+16
+	movl	VG_(esp_saved_over_syscall_d1), %esp
+	# esp refers to simulators stack
+
+	# ... and undo everything else.  
+	# Copy real state back to simulated state.	
+	movl	%eax, VG_(m_state_static)+0
+	movl	%ecx, VG_(m_state_static)+4
+	movl	%edx, VG_(m_state_static)+8
+	movl	%ebx, VG_(m_state_static)+12
+	movl	%ebp, VG_(m_state_static)+20
+	movl	%esi, VG_(m_state_static)+24
+	movl	%edi, VG_(m_state_static)+28
+	pushfl
+	popl	%eax
+	movl	%eax, VG_(m_state_static)+32
+	fwait
+	fnsave	VG_(m_state_static)+40
+	frstor	VG_(m_state_static)+40
+
+	# Restore the state of the simulator
+	frstor	VG_(real_fpu_state_saved_over_syscall_d1)
+	popal
+
+	ret
+
+
+
+
+
+
+
+
+do_syscall_DEPTH_2:
+
+	# depth 2 copy follows ...
+	# Save all the int registers of the real machines state on the
+	# simulators stack.
+	pushal
+
+	# and save the real FPU state too
+	fwait
+	fnsave	VG_(real_fpu_state_saved_over_syscall_d2)
+	frstor	VG_(real_fpu_state_saved_over_syscall_d2)
+
+	# remember what the simulators stack pointer is
+	movl	%esp, VG_(esp_saved_over_syscall_d2)
+	
+	# Now copy the simulated machines state into the real one
+	# esp still refers to the simulators stack
+	frstor	VG_(m_state_static)+40
+	movl	VG_(m_state_static)+32, %eax
+	pushl	%eax
+	popfl
+	movl	VG_(m_state_static)+0, %eax
+	movl	VG_(m_state_static)+4, %ecx
+	movl	VG_(m_state_static)+8, %edx
+	movl	VG_(m_state_static)+12, %ebx
+	movl	VG_(m_state_static)+16, %esp
+	movl	VG_(m_state_static)+20, %ebp
+	movl	VG_(m_state_static)+24, %esi
+	movl	VG_(m_state_static)+28, %edi
+
+	# esp now refers to the simulatees stack
+	# Do the actual system call
+	int	$0x80
+
+	# restore stack as soon as possible
+	# esp refers to simulatees stack
+	movl	%esp, VG_(m_state_static)+16
+	movl	VG_(esp_saved_over_syscall_d2), %esp
+	# esp refers to simulators stack
+
+	# ... and undo everything else.  
+	# Copy real state back to simulated state.	
+	movl	%eax, VG_(m_state_static)+0
+	movl	%ecx, VG_(m_state_static)+4
+	movl	%edx, VG_(m_state_static)+8
+	movl	%ebx, VG_(m_state_static)+12
+	movl	%ebp, VG_(m_state_static)+20
+	movl	%esi, VG_(m_state_static)+24
+	movl	%edi, VG_(m_state_static)+28
+	pushfl
+	popl	%eax
+	movl	%eax, VG_(m_state_static)+32
+	fwait
+	fnsave	VG_(m_state_static)+40
+	frstor	VG_(m_state_static)+40
+
+	# Restore the state of the simulator
+	frstor	VG_(real_fpu_state_saved_over_syscall_d2)
+	popal
+
+	ret
+
+
+##--------------------------------------------------------------------##
+##--- end                                             vg_syscall.S ---##
+##--------------------------------------------------------------------##
diff --git a/vg_syscall_mem.c b/vg_syscall_mem.c
new file mode 100644
index 000000000..3cea05b4a
--- /dev/null
+++ b/vg_syscall_mem.c
@@ -0,0 +1,2560 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Update the byte permission maps following a system call.     ---*/
+/*---                                             vg_syscall_mem.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+
+/* vg_unsafe.h should NOT be included into any file except this
+   one. */
+#include "vg_unsafe.h"
+
+
+/* All system calls are channelled through vg_wrap_syscall.  It does
+   three things:
+
+   * optionally, checks the permissions for the args to the call
+
+   * perform the syscall, usually by passing it along to the kernel
+     unmodified.  However, because we simulate signals ourselves,
+     signal-related syscalls are routed to vg_signal.c, and are not
+     delivered to the kernel.
+
+   * Update the permission maps following the syscall.
+
+   A magical piece of assembly code, vg_do_syscall(), in vg_syscall.S
+   does the tricky bit of passing a syscall to the kernel, whilst
+   having the simulator retain control.
+*/
+
+static void make_noaccess ( Addr a, UInt len )
+{
+   if (VG_(clo_instrument))
+      VGM_(make_noaccess) ( a, len );
+}
+
+static void make_writable ( Addr a, UInt len )
+{
+   if (VG_(clo_instrument))
+      VGM_(make_writable) ( a, len );
+}
+
+static void make_readable ( Addr a, UInt len )
+{
+   if (VG_(clo_instrument))
+      VGM_(make_readable) ( a, len );
+}
+
+static void make_readwritable ( Addr a, UInt len )
+{
+   if (VG_(clo_instrument))
+      VGM_(make_readwritable) ( a, len );
+}
+
+static
+void must_be_writable ( Char* syscall_name, UInt base, UInt size )
+{
+   Bool ok;
+   Addr bad_addr;
+   /* VG_(message)(Vg_DebugMsg,"must be writable: %x .. %x",
+                               base,base+size-1); */
+   if (!VG_(clo_instrument)) 
+      return;
+   ok = VGM_(check_writable) ( base, size, &bad_addr );
+   if (!ok)
+      VG_(record_param_err) ( bad_addr, True, syscall_name );
+}
+
+static
+void must_be_readable ( Char* syscall_name, UInt base, UInt size )
+{
+   Bool ok;
+   Addr bad_addr;
+   /* VG_(message)(Vg_DebugMsg,"must be readable: %x .. %x",
+                               base,base+size-1); */
+   if (!VG_(clo_instrument)) 
+      return;
+   ok = VGM_(check_readable) ( base, size, &bad_addr );
+   if (!ok)
+      VG_(record_param_err) ( bad_addr, False, syscall_name );
+}
+
+static
+void must_be_readable_asciiz ( Char* syscall_name, UInt str )
+{
+   Bool ok = True;
+   Addr bad_addr;
+   /* VG_(message)(Vg_DebugMsg,"must be readable asciiz: 0x%x",str); */
+   if (!VG_(clo_instrument)) 
+      return;
+   ok = VGM_(check_readable_asciiz) ( (Addr)str, &bad_addr );
+   if (!ok)
+      VG_(record_param_err) ( bad_addr, False, syscall_name );
+}
+
+
+/* Set memory permissions, based on PROT_* values for mmap/mprotect,
+   into the permissions our scheme understands.  Dunno if this is
+   really correct.  */
+
+static void approximate_mmap_permissions ( Addr a, UInt len, UInt prot )
+{
+   /* PROT_READ and PROT_WRITE --> readable
+      PROT_READ only           --> readable
+      PROT_WRITE only          --> writable
+      NEITHER                  --> noaccess
+   */
+   if (prot & PROT_READ)
+      make_readable(a,len);
+   else
+   if (prot & PROT_WRITE)
+      make_writable(a,len);
+   else
+      make_noaccess(a,len);
+}
+
+
+/* Dereference a pointer, but only after checking that it's
+   safe to do so.  If not, return the default.
+*/
+static
+UInt safe_dereference ( Addr aa, UInt defawlt )
+{
+   if (!VG_(clo_instrument)) 
+      return * (UInt*)aa;
+   if (VGM_(check_readable)(aa,4,NULL))
+      return * (UInt*)aa;
+   else
+      return defawlt;
+}
+
+
+/* Is this a Linux kernel error return value? */
+/* From:
+   http://sources.redhat.com/cgi-bin/cvsweb.cgi/libc/sysdeps/unix/sysv/
+   linux/i386/sysdep.h?
+   rev=1.28&content-type=text/x-cvsweb-markup&cvsroot=glibc
+
+   QUOTE:
+
+   Linux uses a negative return value to indicate syscall errors,
+   unlike most Unices, which use the condition codes' carry flag.
+
+   Since version 2.1 the return value of a system call might be
+   negative even if the call succeeded.  E.g., the `lseek' system call
+   might return a large offset.  Therefore we must not anymore test
+   for < 0, but test for a real error by making sure the value in %eax
+   is a real error number.  Linus said he will make sure the no syscall
+   returns a value in -1 .. -4095 as a valid result so we can savely
+   test with -4095.  
+
+   END QUOTE
+*/
+Bool VG_(is_kerror) ( Int res )
+{
+   if (res >= -4095 && res <= -1)
+      return True;
+   else
+      return False;
+}
+
+static
+UInt get_shm_size ( Int shmid )
+{
+   struct shmid_ds buf;
+   long __res;
+    __asm__ volatile ( "int $0x80"
+                       : "=a" (__res)
+                       : "0" (__NR_ipc),
+                         "b" ((long)(24) /*IPCOP_shmctl*/),
+                         "c" ((long)(shmid)),
+                         "d" ((long)(IPC_STAT)),
+                         "S" ((long)(0)),
+                         "D" ((long)(&buf)) );
+    if ( VG_(is_kerror) ( __res ) )
+       return 0;
+ 
+   return buf.shm_segsz;
+}
+ 
+static
+Char *strdupcat( const Char *s1, const Char *s2, ArenaId aid )
+{
+   UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
+   Char *result = VG_(malloc) ( aid, len );
+   VG_(strcpy) ( result, s1 );
+   VG_(strcat) ( result, s2 );
+   return result;
+}
+
+static 
+void must_be_readable_sendmsg( Char *msg, UInt base, UInt size )
+{
+   Char *outmsg = strdupcat ( "socketcall.sendmsg", msg, VG_AR_TRANSIENT );
+   must_be_readable ( outmsg, base, size );
+   VG_(free) ( VG_AR_TRANSIENT, outmsg );
+}
+
+static 
+void must_be_writable_recvmsg( Char *msg, UInt base, UInt size )
+{
+   Char *outmsg = strdupcat ( "socketcall.recvmsg", msg, VG_AR_TRANSIENT );
+   must_be_writable ( outmsg, base, size );
+   VG_(free) ( VG_AR_TRANSIENT, outmsg );
+}
+
+static
+void make_readable_recvmsg( Char *fieldName, UInt base, UInt size )
+{
+   make_readable( base, size );
+}
+ 
+static
+void msghdr_foreachfield ( struct msghdr *msg, 
+                           void (*foreach_func)( Char *, UInt, UInt ) )
+{
+   if ( !msg )
+      return;
+
+   foreach_func ( "(msg)", (Addr)msg, sizeof( struct msghdr ) );
+
+   if ( msg->msg_name )
+      foreach_func ( "(msg.msg_name)", 
+                     (Addr)msg->msg_name, msg->msg_namelen );
+
+   if ( msg->msg_iov ) {
+      struct iovec *iov = msg->msg_iov;
+      UInt i;
+
+      foreach_func ( "(msg.msg_iov)", 
+                     (Addr)iov, msg->msg_iovlen * sizeof( struct iovec ) );
+
+      for ( i = 0; i < msg->msg_iovlen; ++i, ++iov )
+         foreach_func ( "(msg.msg_iov[i]", 
+                        (Addr)iov->iov_base, iov->iov_len );
+   }
+
+   if ( msg->msg_control )
+      foreach_func ( "(msg.msg_control)", 
+                     (Addr)msg->msg_control, msg->msg_controllen );
+}
+
+
+/* Records the current end of the data segment so we can make sense of
+   calls to brk().  Initial value set by hdm_init_memory_audit(). */
+Addr VGM_(curr_dataseg_end);
+
+
+/* The Main Entertainment ... */
+
+void VG_(wrap_syscall) ( void )
+{
+   Bool sane_before_call = True;
+   Bool sane_after_call  = True;
+
+   UInt syscallno = VG_(baseBlock)[VGOFF_(m_eax)];
+   UInt arg1      = VG_(baseBlock)[VGOFF_(m_ebx)];
+   UInt arg2      = VG_(baseBlock)[VGOFF_(m_ecx)];
+   UInt arg3      = VG_(baseBlock)[VGOFF_(m_edx)];
+   UInt arg4      = VG_(baseBlock)[VGOFF_(m_esi)];
+   UInt arg5      = VG_(baseBlock)[VGOFF_(m_edi)];
+
+   /* Do not make this unsigned! */
+   Int res;
+
+   /* Keep track of nested syscalls, and do some sanity checks. */
+   Int syscall_depth_saved = VG_(syscall_depth);
+   if (VG_(syscall_depth) > 1)
+     VG_(unimplemented)
+        ("recursion between blocked syscalls and signal handlers");
+   vg_assert( VG_(syscall_depth) == 0 || VG_(syscall_depth) == 1 );
+   VG_(syscall_depth) ++;
+
+   VGP_PUSHCC(VgpSyscall);
+
+   /* Since buggy syscall wrappers sometimes break this, we may as well 
+      check ourselves. */
+   if (! VG_(first_and_last_secondaries_look_plausible))
+      sane_before_call = False;
+
+   /* the syscall no is in %eax.  For syscalls with <= 5 args,
+      args 1 .. 5 to the syscall are in %ebx %ecx %edx %esi %edi.
+      For calls with > 5 args, %ebx points to a lump of memory
+      containing the args.
+
+      The result is returned in %eax.  If this value >= 0, the call
+      succeeded, and this is the return value.  If < 0, it failed, and
+      the negation of this value is errno.  To be more specific, 
+      if res is in the range -EMEDIUMTYPE (-124) .. -EPERM (-1)
+      (kernel 2.4.9 sources, include/asm-i386/errno.h)
+      then it indicates an error.  Otherwise it doesn't.
+
+      Dirk Mueller (mueller@kde.org) says that values -4095 .. -1
+      (inclusive?) indicate error returns.  Not sure where the -4095
+      comes from.
+   */
+
+   if (VG_(clo_trace_syscalls))
+      VG_(printf)("SYSCALL[%d, %d](%3d): ", 
+                  VG_(syscall_depth), VG_(getpid)(), syscallno);
+
+   switch (syscallno) {
+
+      case __NR_sigaltstack:
+         VG_(unimplemented)
+            ("client signals on alternative stack (SA_ONSTACK)");
+         break;
+
+      case __NR_clone:
+         VG_(unimplemented)
+            ("clone(): Valgrind doesn't support threads; sorry.");
+         break;
+
+#     if defined(__NR_modify_ldt)
+      case __NR_modify_ldt:
+         VG_(unimplemented)
+            ("modify_ldt(): I (JRS) haven't investigated this yet; sorry.");
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls, 14 Mar 02 !!!!!!!!!! */
+
+#     if defined(__NR_setfsuid32)
+      case __NR_setfsuid32: /* syscall 215 */
+         /* int setfsuid(uid_t fsuid); */
+          if (VG_(clo_trace_syscalls))
+             VG_(printf)("setfsuid ( %d )\n", arg1);
+          KERNEL_DO_SYSCALL(res);
+          break;
+#     endif
+
+#     if defined(__NR__sysctl)
+      case __NR__sysctl:
+      /* int _sysctl(struct __sysctl_args *args); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("_sysctl ( %p )\n", arg1 );
+         must_be_writable ( "_sysctl(args)", arg1, 
+                            sizeof(struct __sysctl_args) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res))
+            make_readable ( arg1, sizeof(struct __sysctl_args) );
+         break;
+#     endif
+
+#     if defined(__NR_sched_getscheduler)
+      case __NR_sched_getscheduler:
+         /* int sched_getscheduler(pid_t pid); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("sched_getscheduler ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+#     if defined(__NR_sched_setscheduler)
+      case __NR_sched_setscheduler:
+         /* int sched_setscheduler(pid_t pid, int policy, 
+                const struct sched_param *p); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("sched_setscheduler ( %d, %d, %p )\n",arg1,arg2,arg3);
+         if (arg3 != (UInt)NULL)
+            must_be_readable( "sched_setscheduler(struct sched_param *p)", 
+                              arg3, sizeof(struct sched_param));
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+#     if defined(__NR_mlockall)
+      case __NR_mlockall:
+         /* int mlockall(int flags); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("mlockall ( %x )\n", arg1);
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+#     if defined(__NR_munlockall)
+      case __NR_munlockall:
+         /* int munlockall(void); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("munlockall ( )\n");
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+#if   defined(__NR_sched_get_priority_max)
+      case __NR_sched_get_priority_max:
+         /* int sched_get_priority_max(int policy); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("sched_get_priority_max ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+#     if defined(__NR_setfsgid)
+      case __NR_setfsgid: /* syscall 139 */
+         /* int setfsgid(gid_t gid); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("setfsgid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+#     if defined(__NR_setregid)
+      case __NR_setregid: /* syscall 71 */
+         /* int setregid(gid_t rgid, gid_t egid); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("setregid ( %d, %d )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+#     if defined(__NR_setresuid)
+      case __NR_setresuid: /* syscall 164 */
+         /* int setresuid(uid_t ruid, uid_t euid, uid_t suid); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("setresuid ( %d, %d, %d )\n", arg1, arg2, arg3);
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+#     if defined(__NR_setfsuid)
+      case __NR_setfsuid: /* syscall 138 */
+         /* int setfsuid(uid_t uid); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("setfsuid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls, 8 Mar 02 !!!!!!!!!!! */
+
+#     if defined(__NR_sendfile)
+      case __NR_sendfile: /* syscall 187 */
+         /* ssize_t sendfile(int out_fd, int in_fd, off_t *offset, 
+                             size_t count) */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("sendfile ( %d, %d, %p, %d )\n",arg1,arg2,arg3,arg4);
+         must_be_writable( "sendfile(offset)", arg3, sizeof(off_t) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res)) {
+            make_readable( arg3, sizeof( off_t ) );
+         }
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls, 7 Mar 02 !!!!!!!!!!! */
+
+#     if defined(__NR_pwrite)
+      case __NR_pwrite: /* syscall 181 */
+         /* ssize_t pwrite (int fd, const void *buf, size_t nbytes,
+                            off_t offset); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("pwrite ( %d, %p, %d, %d )\n", arg1, arg2, arg3, arg4);
+         must_be_readable( "pwrite(buf)", arg2, arg3 );
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls, 6 Mar 02 !!!!!!!!!!! */
+
+      case __NR_sync: /* syscall 36 */
+         /* int sync(); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("sync ( )\n");
+         KERNEL_DO_SYSCALL(res);
+         break; 
+ 
+      case __NR_fstatfs: /* syscall 100 */
+         /* int fstatfs(int fd, struct statfs *buf); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("fstatfs ( %d, %p )\n",arg1,arg2);
+         must_be_writable( "stat(buf)", arg2, sizeof(struct statfs) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res))
+            make_readable( arg2, sizeof(struct statfs) );
+         break;
+
+      /* !!!!!!!!!! New, untested syscalls, 4 Mar 02 !!!!!!!!!!! */
+
+      case __NR_pause: /* syscall 29 */
+         /* int pause(void); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("pause ( )\n");
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_getsid: /* syscall 147 */
+         /* pid_t getsid(pid_t pid); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getsid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+#     if defined(__NR_pread)
+      case __NR_pread: /* syscall 180 */
+         /* ssize_t pread(int fd, void *buf, size_t count, off_t offset); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("pread ( %d, %p, %d, %d ) ...\n",arg1,arg2,arg3,arg4);
+         must_be_writable( "pread(buf)", arg2, arg3 );
+         KERNEL_DO_SYSCALL(res);
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("SYSCALL[%d, %d]       pread ( %d, %p, %d, %d ) --> %d\n",
+                        VG_(syscall_depth), VG_(getpid)(),
+                        arg1, arg2, arg3, arg4, res);
+         if (!VG_(is_kerror)(res) && res > 0) {
+            make_readable( arg2, res );
+         }
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls, 27 Feb 02 !!!!!!!!!! */
+
+      case __NR_mknod: /* syscall 14 */
+         /* int mknod(const char *pathname, mode_t mode, dev_t dev); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("mknod ( %p, 0x%x, 0x%x )\n", arg1, arg2, arg3 );
+         must_be_readable_asciiz( "mknod(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_flock: /* syscall 143 */
+         /* int flock(int fd, int operation); */
+         if (VG_(clo_trace_syscalls)) 
+            VG_(printf)("flock ( %d, %d )\n", arg1, arg2 );
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+#     if defined(__NR_rt_sigsuspend)
+      /* Viewed with great suspicion by me, but, hey, let's do it
+         anyway ... */
+      case __NR_rt_sigsuspend: /* syscall 179 */
+         /* int sigsuspend(const sigset_t *mask); */
+         if (VG_(clo_trace_syscalls)) 
+            VG_(printf)("sigsuspend ( %p )\n", arg1 );
+         if (arg1 != (Addr)NULL) {
+            /* above NULL test is paranoia */
+            must_be_readable( "sigsuspend(mask)", arg1, 
+                              sizeof(vki_ksigset_t) );
+         }
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+      case __NR_init_module: /* syscall 128 */
+         /* int init_module(const char *name, struct module *image); */
+         if (VG_(clo_trace_syscalls)) 
+            VG_(printf)("init_module ( %p, %p )\n", arg1, arg2 );
+         must_be_readable_asciiz( "init_module(name)", arg1 );
+         must_be_readable( "init_module(image)", arg2, 
+                           sizeof(struct module) );
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_ioperm: /* syscall 101 */
+         /* int ioperm(unsigned long from, unsigned long num, int turn_on); */
+         if (VG_(clo_trace_syscalls)) 
+            VG_(printf)("ioperm ( %d, %d, %d )\n", arg1, arg2, arg3 );
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_capget: /* syscall 184 */
+         /* int capget(cap_user_header_t header, cap_user_data_t data); */
+         if (VG_(clo_trace_syscalls)) 
+            VG_(printf)("capget ( %p, %p )\n", arg1, arg2 );
+         must_be_readable( "capget(header)", arg1, 
+                                             sizeof(vki_cap_user_header_t) );
+         must_be_writable( "capget(data)", arg2, 
+                                           sizeof( vki_cap_user_data_t) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && arg2 != (Addr)NULL)
+            make_readable ( arg2, sizeof( vki_cap_user_data_t) );
+         break;
+
+      /* !!!!!!!!!!!!!!!!!!!!! mutant ones !!!!!!!!!!!!!!!!!!!!! */
+
+      case __NR_execve:
+         /* int execve (const char *filename, 
+                        char *const argv [], 
+                        char *const envp[]); */
+         if (VG_(clo_trace_syscalls)) 
+            VG_(printf)("execve ( %p(%s), %p, %p ) --- NOT CHECKED\n", 
+                        arg1, arg1, arg2, arg3);
+         /* Make any binding for LD_PRELOAD disappear, so that child
+            processes don't get traced into. */
+         if (!VG_(clo_trace_children)) {
+            Int i;
+            Char** envp = (Char**)arg3;
+            for (i = 0; envp[i] != NULL; i++) {
+               if (VG_(strncmp)(envp[i], "LD_PRELOAD=", 11) == 0) {
+                  VG_(mash_LD_PRELOAD_string)(&envp[i][11]);
+               }
+            }
+         }
+         KERNEL_DO_SYSCALL(res);
+         /* Should we still be alive here?  Don't think so. */
+         /* Actually, above comment is wrong.  execve can fail, just
+            like any other syscall -- typically the file to exec does
+            not exist.  Hence: */
+         vg_assert(VG_(is_kerror)(res));
+         break;
+
+      case __NR_exit: /* syscall 1 */
+         /* void _exit(int status); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("exit ( %d )\n", arg1);
+         VG_(message)(Vg_UserMsg, 
+            "Warning: client exiting by calling exit(%d).  Bye!",
+            arg1);
+
+         KERNEL_DO_SYSCALL(res);
+         /* Definitely should not be alive here :) */
+         break;
+
+      /* !!!!!!!!!!!!!!!!!!!!!     end     !!!!!!!!!!!!!!!!!!!!! */
+
+      case __NR_access: /* syscall 33 */
+         /* int access(const char *pathname, int mode); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("access ( %p, %d )\n", arg1,arg2);
+         must_be_readable_asciiz( "access(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_alarm: /* syscall 27 */
+         /* unsigned int alarm(unsigned int seconds); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("alarm ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_brk: /* syscall 45 */
+         /* Haven't a clue if this is really right. */
+         /* int brk(void *end_data_segment); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("brk ( %p ) --> ",arg1);
+         KERNEL_DO_SYSCALL(res);
+         if (VG_(clo_trace_syscalls)) 
+            VG_(printf)("0x%x\n", res);
+
+         if (!VG_(is_kerror)(res)) {
+            if (arg1 == 0) {
+               /* Just asking where the current end is. (???) */
+               VGM_(curr_dataseg_end) = res;
+            } else
+            if (arg1 < VGM_(curr_dataseg_end)) {
+               /* shrinking the data segment. */
+               make_noaccess( (Addr)arg1, 
+                              VGM_(curr_dataseg_end)-arg1 );
+               VGM_(curr_dataseg_end) = arg1;
+            } else
+            if (arg1 > VGM_(curr_dataseg_end) && res != 0) {
+               /* asked for more memory, and got it */
+               /* 
+               VG_(printf)("BRK: new area %x .. %x\n", 
+                           VGM_(curr_dataseg_end, arg1-1 );
+               */
+               make_writable ( (Addr)VGM_(curr_dataseg_end), 
+                               arg1-VGM_(curr_dataseg_end) );
+               VGM_(curr_dataseg_end) = arg1;         
+            }
+         }
+         break;
+
+      case __NR_chdir: /* syscall 12 */
+         /* int chdir(const char *path); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("chdir ( %p )\n", arg1);
+         must_be_readable_asciiz( "chdir(path)", arg1 );
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_chmod: /* syscall 15 */
+         /* int chmod(const char *path, mode_t mode); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("chmod ( %p, %d )\n", arg1,arg2);
+         must_be_readable_asciiz( "chmod(path)", arg1 );
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+#     if defined(__NR_chown32)
+      case __NR_chown32: /* syscall 212 */
+#     endif
+#     if defined(__NR_lchown32)
+      case __NR_lchown32: /* syscall 198 */
+#     endif
+      case __NR_chown: /* syscall 16 */
+         /* int chown(const char *path, uid_t owner, gid_t group); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("chown ( %p, 0x%x, 0x%x )\n", arg1,arg2,arg3);
+         must_be_readable_asciiz( "chown(path)", arg1 );
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_close: /* syscall 6 */
+         /* int close(int fd); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("close ( %d )\n",arg1);
+         /* Detect and negate attempts by the client to close Valgrind's
+            logfile fd ... */
+         if (arg1 == VG_(clo_logfile_fd)) {
+            VG_(message)(Vg_UserMsg, 
+              "Warning: client attempted to close "
+               "Valgrind's logfile fd (%d).", 
+               VG_(clo_logfile_fd));
+            VG_(message)(Vg_UserMsg, 
+              "   Use --logfile-fd=<number> to select an "
+              "alternative logfile fd." );
+         } else {
+            KERNEL_DO_SYSCALL(res);
+         }
+         break;
+
+      case __NR_dup: /* syscall 41 */
+         /* int dup(int oldfd); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("dup ( %d ) --> ", arg1);
+         KERNEL_DO_SYSCALL(res);
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("%d\n", res);
+         break;
+
+      case __NR_dup2: /* syscall 63 */
+         /* int dup2(int oldfd, int newfd); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("dup2 ( %d, %d ) ...\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(res);
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("SYSCALL[%d, %d]       dup2 ( %d, %d ) = %d\n", 
+                        VG_(syscall_depth), VG_(getpid)(), 
+                        arg1, arg2, res);
+         break;
+
+      case __NR_fcntl: /* syscall 55 */
+         /* int fcntl(int fd, int cmd); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("fcntl ( %d, %d )\n",arg1,arg2);
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_fchdir: /* syscall 133 */
+         /* int fchdir(int fd); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("fchdir ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_fchmod: /* syscall 94 */
+         /* int fchmod(int fildes, mode_t mode); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("fchmod ( %d, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     if defined(__NR_fcntl64)
+      case __NR_fcntl64: /* syscall 221 */
+         /* I don't know what the prototype for this is supposed to be. */
+         /* ??? int fcntl(int fd, int cmd); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("fcntl64 (?!) ( %d, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+      case __NR_fstat: /* syscall 108 */
+         /* int fstat(int filedes, struct stat *buf); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("fstat ( %d, %p )\n",arg1,arg2);
+         must_be_writable( "fstat", arg2, sizeof(struct stat) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res))
+            make_readable( arg2, sizeof(struct stat) );
+         break;
+
+      case __NR_vfork: /* syscall 190 */
+         /* pid_t vfork(void); */
+         if (VG_(clo_trace_syscalls)) 
+            VG_(printf)("vfork ( ) ... becomes ... ");
+         /* KLUDGE: we prefer to do a fork rather than vfork. 
+            vfork gives a SIGSEGV, and the stated semantics looks
+            pretty much impossible for us. */
+         VG_(baseBlock)[VGOFF_(m_eax)] = __NR_fork;
+         /* fall through ... */
+      case __NR_fork: /* syscall 2 */
+         /* pid_t fork(void); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("fork ()\n");
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_fsync: /* syscall 118 */
+         /* int fsync(int fd); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("fsync ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_ftruncate: /* syscall 93 */
+         /* int ftruncate(int fd, size_t length); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("ftruncate ( %d, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+#     if defined(__NR_getalife)
+      case __NR_getalife: /* syscall 999 */
+         /* If you've read this far, you're a really sad person.  Turn
+            off your computer, leave the building, meet people, and get
+            a life.  Go learn to dance, or some such. */
+         break;
+#     endif
+
+      case __NR_getdents: /* syscall 141 */
+         /* int getdents(unsigned int fd, struct dirent *dirp, 
+                         unsigned int count); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getdents ( %d, %p, %d )\n",arg1,arg2,arg3);
+         must_be_writable( "getdents(dirp)", arg2, arg3 );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && res > 0)
+            make_readable( arg2, res );
+         break;
+
+#     if defined(__NR_getdents64)
+      case __NR_getdents64: /* syscall 220 */
+         /* int getdents(unsigned int fd, struct dirent64 *dirp, 
+                         unsigned int count); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getdents64 ( %d, %p, %d )\n",arg1,arg2,arg3);
+         must_be_writable( "getdents64(dirp)", arg2, arg3 );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && res > 0)
+            make_readable( arg2, res );
+         break;
+#     endif
+
+#     if defined(__NR_getgroups32)
+      case __NR_getgroups32: /* syscall 205 */
+#     endif
+      case __NR_getgroups: /* syscall 80 */
+         /* int getgroups(int size, gid_t list[]); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getgroups ( %d, %p )\n", arg1, arg2);
+         if (arg1 > 0)
+            must_be_writable ( "getgroups(list)", arg2, 
+                               arg1 * sizeof(gid_t) );
+         KERNEL_DO_SYSCALL(res);
+         if (arg1 > 0 && !VG_(is_kerror)(res) && res > 0)
+            make_readable ( arg2, res * sizeof(gid_t) );
+         break;
+
+      case __NR_getcwd: /* syscall 183 */
+         /* char *getcwd(char *buf, size_t size); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getcwd ( %p, %d )\n",arg1,arg2);
+         must_be_writable( "getcwd(buf)", arg1, arg2 );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && res != (Addr)NULL)
+            make_readable ( arg1, arg2 );
+         /* Not really right -- really we should have the asciiz
+            string starting at arg1 readable, or up to arg2 bytes,
+            whichever finishes first. */
+         break;
+
+      case __NR_geteuid: /* syscall 49 */
+         /* uid_t geteuid(void); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("geteuid ( )\n");
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+#     if defined(__NR_geteuid32)
+      case __NR_geteuid32: /* syscall 201 */
+         /* ?? uid_t geteuid32(void); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("geteuid32(?) ( )\n");
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+      case __NR_getegid: /* syscall 50 */
+         /* gid_t getegid(void); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getegid ()\n");
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+#     if defined(__NR_getegid32)
+      case __NR_getegid32: /* syscall 202 */
+         /* gid_t getegid32(void); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getegid32 ()\n");
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+      case __NR_getgid: /* syscall 47 */
+         /* gid_t getgid(void); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getgid ()\n");
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+#     if defined(__NR_getgid32)
+      case __NR_getgid32: /* syscall 200 */
+         /* gid_t getgid32(void); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getgid32 ()\n");
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+      case __NR_getpid: /* syscall 20 */
+         /* pid_t getpid(void); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getpid ()\n");
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_getpgid: /* syscall 132 */
+         /* pid_t getpgid(pid_t pid); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getpgid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_getpgrp: /* syscall 65 */
+         /* pid_t getpprp(void); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getpgrp ()\n");
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_getppid: /* syscall 64 */
+         /* pid_t getppid(void); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getppid ()\n");
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_getresgid: /* syscall 171 */
+         /* int getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getresgid ( %p, %p, %p )\n", arg1,arg2,arg3);
+         must_be_writable ( "getresgid(rgid)", arg1, sizeof(gid_t) );
+         must_be_writable ( "getresgid(egid)", arg2, sizeof(gid_t) );
+         must_be_writable ( "getresgid(sgid)", arg3, sizeof(gid_t) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            make_readable ( arg1, sizeof(gid_t) );
+            make_readable ( arg2, sizeof(gid_t) );
+            make_readable ( arg3, sizeof(gid_t) );
+         }
+         break;
+
+#     if defined(__NR_getresgid32)
+      case __NR_getresgid32: /* syscall 211 */
+         /* int getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getresgid32 ( %p, %p, %p )\n", arg1,arg2,arg3);
+         must_be_writable ( "getresgid32(rgid)", arg1, sizeof(gid_t) );
+         must_be_writable ( "getresgid32(egid)", arg2, sizeof(gid_t) );
+         must_be_writable ( "getresgid32(sgid)", arg3, sizeof(gid_t) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            make_readable ( arg1, sizeof(gid_t) );
+            make_readable ( arg2, sizeof(gid_t) );
+            make_readable ( arg3, sizeof(gid_t) );
+         }
+         break;
+#     endif
+
+      case __NR_getresuid: /* syscall 165 */
+         /* int getresuid(uid_t *ruid, uid_t *euid, uid_t *suid); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getresuid ( %p, %p, %p )\n", arg1,arg2,arg3);
+         must_be_writable ( "getresuid(ruid)", arg1, sizeof(uid_t) );
+         must_be_writable ( "getresuid(euid)", arg2, sizeof(uid_t) );
+         must_be_writable ( "getresuid(suid)", arg3, sizeof(uid_t) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            make_readable ( arg1, sizeof(uid_t) );
+            make_readable ( arg2, sizeof(uid_t) );
+            make_readable ( arg3, sizeof(uid_t) );
+         }
+         break;
+
+#     if defined(__NR_getresuid32)
+      case __NR_getresuid32: /* syscall 209 */
+         /* int getresuid(uid_t *ruid, uid_t *euid, uid_t *suid); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getresuid32 ( %p, %p, %p )\n", arg1,arg2,arg3);
+         must_be_writable ( "getresuid32(ruid)", arg1, sizeof(uid_t) );
+         must_be_writable ( "getresuid32(euid)", arg2, sizeof(uid_t) );
+         must_be_writable ( "getresuid32(suid)", arg3, sizeof(uid_t) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            make_readable ( arg1, sizeof(uid_t) );
+            make_readable ( arg2, sizeof(uid_t) );
+            make_readable ( arg3, sizeof(uid_t) );
+         }
+         break;
+#     endif
+
+#     if defined(__NR_ugetrlimit)
+      case __NR_ugetrlimit: /* syscall 191 */
+#     endif
+      case __NR_getrlimit: /* syscall 76 */
+         /* int getrlimit (int resource, struct rlimit *rlim); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getrlimit ( %d, %p )\n", arg1,arg2);
+         must_be_writable( "getrlimit(rlim)", arg2, sizeof(struct rlimit) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && res == 0)
+            make_readable( arg2, sizeof(struct rlimit) );
+         break;
+
+      case __NR_getrusage: /* syscall 77 */
+         /* int getrusage (int who, struct rusage *usage); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getrusage ( %d, %p )\n", arg1,arg2);
+         must_be_writable( "getrusage(usage)", arg2, sizeof(struct rusage) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && res == 0)
+            make_readable(arg2, sizeof(struct rusage) );
+         break;
+
+      case __NR_gettimeofday: /* syscall 78 */
+         /* int gettimeofday(struct timeval *tv, struct timezone *tz); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("gettimeofday ( %p, %p )\n",arg1,arg2);
+         must_be_writable( "gettimeofday(tv)", arg1, sizeof(struct timeval) );
+         if (arg2 != 0)
+            must_be_writable( "gettimeofday(tz)", arg2, 
+                              sizeof(struct timezone) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            make_readable( arg1, sizeof(struct timeval) );
+            if (arg2 != 0)
+               make_readable( arg2, sizeof(struct timezone) );
+         }
+         break;
+
+      case __NR_getuid: /* syscall 24 */
+         /* uid_t getuid(void); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getuid ( )\n");
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+#     if defined(__NR_getuid32)
+      case __NR_getuid32: /* syscall 199 */
+         /* ???uid_t getuid32(void); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("getuid32 ( )\n");
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+      case __NR_ipc: /* syscall 117 */
+         /* int ipc ( unsigned int call, int first, int second, 
+                      int third, void *ptr, long fifth); */
+         {
+         UInt arg6 = VG_(baseBlock)[VGOFF_(m_ebp)];
+
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("ipc ( %d, %d, %d, %d, %p, %d )\n",
+                        arg1,arg2,arg3,arg4,arg5,arg6);
+         switch (arg1 /* call */) {
+            case 1: /* IPCOP_semop */
+            case 2: /* IPCOP_semget */
+            case 3: /* IPCOP_semctl */
+            case 11: /* IPCOP_msgsnd */
+            case 12: /* IPCOP_msgrcv */
+            case 13: /* IPCOP_msgget */
+            case 14: /* IPCOP_msgctl */
+               KERNEL_DO_SYSCALL(res);
+               break;
+            case 21: /* IPCOP_shmat */
+               {
+                  Int shmid = arg2;
+                  Int shmflag = arg3;
+                  UInt addr;
+
+                  KERNEL_DO_SYSCALL(res);
+
+                  if ( VG_(is_kerror) ( res ) )
+                     break;
+                  
+                  /* force readability. before the syscall it is
+                   * indeed uninitialized, as can be seen in
+                   * glibc/sysdeps/unix/sysv/linux/shmat.c */
+                  make_readable ( arg4, sizeof( ULong ) );
+
+                  addr = safe_dereference ( arg4, 0 );
+                  if ( addr > 0 ) { 
+                     UInt segmentSize = get_shm_size ( shmid );
+                     if ( segmentSize > 0 ) {
+                        if ( shmflag & SHM_RDONLY )
+                           make_readable ( addr, segmentSize );
+                        else
+                           make_readwritable ( addr, segmentSize );
+                     }
+                  }
+                  break;
+               }
+            case 22: /* IPCOP_shmdt */
+                  KERNEL_DO_SYSCALL(res);
+                  /* ### FIXME: this should call make_noaccess on the
+                   * area passed to shmdt. But there's no way to
+                   * figure out the size of the shared memory segment
+                   * just from the address...  Maybe we want to keep a
+                   * copy of the exiting mappings inside valgrind? */
+                  break;
+            case 23: /* IPCOP_shmget */
+                KERNEL_DO_SYSCALL(res);
+                break;
+            case 24: /* IPCOP_shmctl */
+               {
+                  if ( arg3 > 0 ) {
+                     must_be_readable ( "shmctl(buf)", arg3, 
+                                        sizeof( struct shmid_ds ) ); 
+
+                     if ( arg2 == SHM_STAT )
+                        must_be_writable( "shmctl(IPC_STAT,buf)", arg3, 
+                                          sizeof( struct shmid_ds ) );
+                  }
+
+                  KERNEL_DO_SYSCALL(res);
+                  break;
+               }
+            default:
+               VG_(message)(Vg_DebugMsg,
+                            "FATAL: unhandled syscall(ipc) %d",
+                            arg1 );
+               VG_(panic)("... bye!\n");
+               break; /*NOTREACHED*/
+         }
+         }
+         break;
+
+      case __NR_ioctl: /* syscall 54 */
+         /* int ioctl(int d, int request, ...)
+            [The  "third"  argument  is traditionally char *argp, 
+             and will be so named for this discussion.]
+         */
+         /*
+         VG_(message)(
+            Vg_DebugMsg, 
+            "is an IOCTL,  request = 0x%x,   d = %d,   argp = 0x%x", 
+            arg2,arg1,arg3);
+         */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("ioctl ( %d, 0x%x, %p )\n",arg1,arg2,arg3);
+         switch (arg2 /* request */) {
+            case TCSETS:
+            case TCSETSW:
+            case TCSETSF:
+               must_be_readable( "ioctl(TCSETSW)", arg3, 
+                                 VKI_SIZEOF_STRUCT_TERMIOS );
+               KERNEL_DO_SYSCALL(res);
+               break; 
+            case TCGETS:
+               must_be_writable( "ioctl(TCGETS)", arg3, 
+                                 VKI_SIZEOF_STRUCT_TERMIOS );
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  make_readable ( arg3, VKI_SIZEOF_STRUCT_TERMIOS );
+               break;
+            case TCSBRK:
+            case TCSBRKP:
+            case TCFLSH:
+               /* These just take an int by value */
+               KERNEL_DO_SYSCALL(res);
+               break;
+            case TIOCGWINSZ:
+               must_be_writable( "ioctl(TIOCGWINSZ)", arg3, 
+                                 sizeof(struct winsize) );
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  make_readable ( arg3, sizeof(struct winsize) );
+               break;
+            case TIOCSWINSZ:
+               must_be_readable( "ioctl(TIOCSWINSZ)", arg3, 
+                                 sizeof(struct winsize) );
+               KERNEL_DO_SYSCALL(res);
+               break;
+            case TIOCGPTN: /* Get Pty Number (of pty-mux device) */
+               must_be_writable("ioctl(TIOCGPTN)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                   make_readable ( arg3, sizeof(int));
+               break;
+            case TIOCSPTLCK: /* Lock/unlock Pty */
+               must_be_readable( "ioctl(TIOCSPTLCK)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(res);
+               break;
+            case FIONBIO:
+               must_be_readable( "ioctl(FIONBIO)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(res);
+               break;
+            case FIOASYNC:
+               must_be_readable( "ioctl(FIOASYNC)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(res);
+               break;
+            case FIONREAD:
+               must_be_writable( "ioctl(FIONREAD)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  make_readable( arg3, sizeof(int) );
+               break;
+
+            /* If you get compilation problems here, change the #if
+               1 to #if 0 and get rid of <scsi/sg.h> in
+               vg_unsafe.h. */
+#       if 1
+            case SG_SET_COMMAND_Q:
+               must_be_readable( "ioctl(SG_SET_COMMAND_Q)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(res);
+               break;
+#           if defined(SG_IO)
+            case SG_IO:
+               must_be_writable( "ioctl(SG_IO)", arg3, 
+                                 sizeof(struct sg_io_hdr) );
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  make_readable (arg3, sizeof(struct sg_io_hdr));
+               break;
+#           endif /* SG_IO */
+            case SG_GET_SCSI_ID:
+               /* Note: sometimes sg_scsi_id is called sg_scsi_id_t */
+               must_be_writable( "ioctl(SG_GET_SCSI_ID)", arg3, 
+                                 sizeof(struct sg_scsi_id) );
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  make_readable (arg3, sizeof(struct sg_scsi_id));
+               break;
+            case SG_SET_RESERVED_SIZE:
+               must_be_readable( "ioctl(SG_SET_RESERVED_SIZE)", 
+                                 arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(res);
+               break;
+            case SG_SET_TIMEOUT:
+               must_be_readable( "ioctl(SG_SET_TIMEOUT)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(res);
+               break;
+            case SG_GET_RESERVED_SIZE:
+               must_be_writable( "ioctl(SG_GET_RESERVED_SIZE)", arg3, 
+                                 sizeof(int) );
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  make_readable (arg3, sizeof(int));
+               break;
+            case SG_GET_TIMEOUT:
+               must_be_writable( "ioctl(SG_GET_TIMEOUT)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  make_readable (arg3, sizeof(int));
+               break;
+            case SG_GET_VERSION_NUM:
+               must_be_readable( "ioctl(SG_GET_VERSION_NUM)", 
+                                 arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(res);
+               break;
+#       endif
+
+            case IIOCGETCPS:
+               /* In early 2.4 kernels, ISDN_MAX_CHANNELS was only defined
+                * when KERNEL was. I never saw a larger value than 64 though */
+#              ifndef ISDN_MAX_CHANNELS
+#              define ISDN_MAX_CHANNELS 64
+#              endif
+               must_be_writable( "ioctl(IIOCGETCPS)", arg3,
+                                 ISDN_MAX_CHANNELS 
+                                 * 2 * sizeof(unsigned long) );
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  make_readable ( arg3, ISDN_MAX_CHANNELS 
+                                        * 2 * sizeof(unsigned long) );
+               break;
+            case IIOCNETGPN:
+               must_be_readable( "ioctl(IIOCNETGPN)",
+                                 (UInt)&((isdn_net_ioctl_phone *)arg3)->name,
+                                 sizeof(((isdn_net_ioctl_phone *)arg3)->name) );
+               must_be_writable( "ioctl(IIOCNETGPN)", arg3,
+                                 sizeof(isdn_net_ioctl_phone) );
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  make_readable ( arg3, sizeof(isdn_net_ioctl_phone) );
+               break;
+
+            /* These all use struct ifreq AFAIK */
+            case SIOCGIFINDEX:
+            case SIOCGIFFLAGS:        /* get flags                    */
+            case SIOCGIFHWADDR:       /* Get hardware address         */
+            case SIOCGIFMTU:          /* get MTU size                 */
+            case SIOCGIFADDR:         /* get PA address               */
+            case SIOCGIFNETMASK:      /* get network PA mask          */
+            case SIOCGIFMETRIC:       /* get metric                   */
+            case SIOCGIFMAP:          /* Get device parameters        */
+            case SIOCGIFTXQLEN:       /* Get the tx queue length      */
+            case SIOCGIFDSTADDR:      /* get remote PA address        */
+            case SIOCGIFBRDADDR:      /* get broadcast PA address     */
+            case SIOCGIFNAME:         /* get iface name               */
+               must_be_writable("ioctl(SIOCGIFINDEX)", arg3, 
+                                sizeof(struct ifreq));
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  make_readable (arg3, sizeof(struct ifreq));
+               break;
+            case SIOCGIFCONF:         /* get iface list               */
+               /* WAS:
+               must_be_writable("ioctl(SIOCGIFCONF)", arg3, 
+                                sizeof(struct ifconf));
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  make_readable (arg3, sizeof(struct ifconf));
+               */
+               must_be_readable("ioctl(SIOCGIFCONF)", arg3, 
+                                sizeof(struct ifconf));
+               if ( arg3 ) {
+                  // TODO len must be readable and writable
+                  // buf pointer only needs to be readable
+                  struct ifconf *ifc = (struct ifconf *) arg3;
+                  must_be_writable("ioctl(SIOCGIFCONF).ifc_buf",
+                                   (Addr)(ifc->ifc_buf), (UInt)(ifc->ifc_len) );
+               }
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res == 0 && arg3 ) {
+                  struct ifconf *ifc = (struct ifconf *) arg3;
+                  make_readable ( (Addr)(ifc->ifc_buf), (UInt)(ifc->ifc_len) );
+               }
+               break;
+            case SIOCGSTAMP:
+               must_be_writable("ioctl(SIOCGSTAMP)", arg3, 
+                                sizeof(struct timeval));
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  make_readable (arg3, sizeof(struct timeval));
+               break;
+            case SIOCGRARP:           /* get RARP table entry         */
+            case SIOCGARP:            /* get ARP table entry          */
+               must_be_writable("ioctl(SIOCGARP)", arg3, 
+                                sizeof(struct arpreq));
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  make_readable (arg3, sizeof(struct arpreq));
+               break;
+                    
+            case SIOCSIFFLAGS:        /* set flags                    */
+            case SIOCSIFMAP:          /* Set device parameters        */
+            case SIOCSIFTXQLEN:       /* Set the tx queue length      */
+            case SIOCSIFDSTADDR:      /* set remote PA address        */
+            case SIOCSIFBRDADDR:      /* set broadcast PA address     */
+            case SIOCSIFNETMASK:      /* set network PA mask          */
+            case SIOCSIFMETRIC:       /* set metric                   */
+            case SIOCSIFADDR:         /* set PA address               */
+            case SIOCSIFMTU:          /* set MTU size                 */
+            case SIOCSIFHWADDR:       /* set hardware address         */
+               must_be_readable("ioctl(SIOCSIFFLAGS)", arg3, 
+                                sizeof(struct ifreq));
+               KERNEL_DO_SYSCALL(res);
+               break;
+            /* Routing table calls.  */
+            case SIOCADDRT:           /* add routing table entry      */
+            case SIOCDELRT:           /* delete routing table entry   */
+               must_be_readable("ioctl(SIOCADDRT/DELRT)", arg3, 
+                                sizeof(struct rtentry));
+               KERNEL_DO_SYSCALL(res);
+               break;
+
+            /* RARP cache control calls. */
+            case SIOCDRARP:           /* delete RARP table entry      */
+            case SIOCSRARP:           /* set RARP table entry         */
+            /* ARP cache control calls. */
+            case SIOCSARP:            /* set ARP table entry          */
+            case SIOCDARP:            /* delete ARP table entry       */
+               must_be_readable("ioctl(SIOCSIFFLAGS)", arg3, 
+                                sizeof(struct ifreq));
+               KERNEL_DO_SYSCALL(res);
+               break;
+
+            case SIOCSPGRP:
+               must_be_readable( "ioctl(SIOCSPGRP)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(res);
+               break;
+
+            /* linux/soundcard interface (OSS) */
+            case SNDCTL_SEQ_GETOUTCOUNT:
+            case SNDCTL_SEQ_GETINCOUNT:
+            case SNDCTL_SEQ_PERCMODE:
+            case SNDCTL_SEQ_TESTMIDI:
+            case SNDCTL_SEQ_RESETSAMPLES:
+            case SNDCTL_SEQ_NRSYNTHS:
+            case SNDCTL_SEQ_NRMIDIS:
+            case SNDCTL_SEQ_GETTIME:
+            case SNDCTL_DSP_GETFMTS:
+            case SNDCTL_DSP_GETTRIGGER:
+            case SNDCTL_DSP_GETODELAY:
+#           if defined(SNDCTL_DSP_GETSPDIF)
+            case SNDCTL_DSP_GETSPDIF:
+#           endif
+            case SNDCTL_DSP_GETCAPS:
+            case SOUND_PCM_READ_RATE:
+            case SOUND_PCM_READ_CHANNELS:
+            case SOUND_PCM_READ_BITS:
+            case (SOUND_PCM_READ_BITS|0x40000000): /* what the fuck ? */
+            case SOUND_PCM_READ_FILTER:
+               must_be_writable("ioctl(SNDCTL_XXX|SOUND_XXX (SIOR, int))", arg3,
+                                sizeof(int));
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  make_readable (arg3, sizeof(int));
+               break;
+            case SNDCTL_SEQ_CTRLRATE:
+            case SNDCTL_DSP_SPEED:
+            case SNDCTL_DSP_STEREO:
+            case SNDCTL_DSP_GETBLKSIZE: 
+            case SNDCTL_DSP_CHANNELS:
+            case SOUND_PCM_WRITE_FILTER:
+            case SNDCTL_DSP_SUBDIVIDE:
+            case SNDCTL_DSP_SETFRAGMENT:
+            case SNDCTL_DSP_GETCHANNELMASK:
+            case SNDCTL_DSP_BIND_CHANNEL:
+            case SNDCTL_TMR_TIMEBASE:
+            case SNDCTL_TMR_TEMPO:
+            case SNDCTL_TMR_SOURCE:
+            case SNDCTL_MIDI_PRETIME:
+            case SNDCTL_MIDI_MPUMODE:
+               must_be_readable("ioctl(SNDCTL_XXX|SOUND_XXX (SIOWR, int))", 
+                                arg3, sizeof(int));
+               must_be_writable("ioctl(SNDCTL_XXX|SOUND_XXX (SIOWR, int))", 
+                                arg3, sizeof(int));
+               KERNEL_DO_SYSCALL(res);
+               break;
+            case SNDCTL_DSP_GETOSPACE:
+            case SNDCTL_DSP_GETISPACE:
+               must_be_writable("ioctl(SNDCTL_XXX|SOUND_XXX "
+                                "(SIOR, audio_buf_info))", arg3,
+                                sizeof(audio_buf_info));
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  make_readable (arg3, sizeof(audio_buf_info));
+               break;
+            case SNDCTL_DSP_SETTRIGGER:
+               must_be_readable("ioctl(SNDCTL_XXX|SOUND_XXX (SIOW, int))", 
+                                arg3, sizeof(int));
+               KERNEL_DO_SYSCALL(res);
+               break;
+          
+            /* We don't have any specific information on it, so
+               try to do something reasonable based on direction and
+               size bits.  The encoding scheme is described in
+               /usr/include/asm/ioctl.h. */
+            default: {
+               UInt dir  = _IOC_DIR(arg2);
+               UInt size = _IOC_SIZE(arg2);
+               if (/* size == 0 || */ dir == _IOC_NONE) {
+                  VG_(message)(Vg_UserMsg, 
+                     "Warning: noted but unhandled ioctl 0x%x"
+                     " with no size/direction hints",
+                     arg2); 
+                  VG_(message)(Vg_UserMsg, 
+                     "   This could cause spurious value errors"
+                     " to appear.");
+                  VG_(message)(Vg_UserMsg, 
+                     "   See README_MISSING_SYSCALL_OR_IOCTL for guidance on"
+                     " writing a proper wrapper." );
+               } else {
+                  if ((dir & _IOC_READ) && size > 0)
+                     must_be_readable("ioctl(generic)", arg3, size);
+                  if ((dir & _IOC_WRITE) && size > 0)
+                     must_be_writable("ioctl(generic)", arg3, size);
+               }
+               KERNEL_DO_SYSCALL(res);
+               if (size > 0 && (dir & _IOC_WRITE)
+                   && !VG_(is_kerror)(res) && res == 0)
+                  make_readable (arg3, size);
+               break;
+            }
+         }
+         break;
+
+      case __NR_kill: /* syscall 37 */
+         /* int kill(pid_t pid, int sig); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("kill ( %d, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_link: /* syscall 9 */
+         /* int link(const char *oldpath, const char *newpath); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("link ( %p, %p)\n", arg1, arg2);
+         must_be_readable_asciiz( "link(oldpath)", arg1);
+         must_be_readable_asciiz( "link(newpath)", arg2);
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_lseek: /* syscall 19 */
+         /* off_t lseek(int fildes, off_t offset, int whence); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("lseek ( %d, %d, %d )\n",arg1,arg2,arg3);
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR__llseek: /* syscall 140 */
+         /* int _llseek(unsigned int fd, unsigned long offset_high,       
+                        unsigned long  offset_low, 
+                        loff_t * result, unsigned int whence); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("llseek ( %d, 0x%x, 0x%x, %p, %d )\n",
+                        arg1,arg2,arg3,arg4,arg5);
+         must_be_writable( "llseek(result)", arg4, sizeof(loff_t));
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && res == 0)
+            make_readable( arg4, sizeof(loff_t) );
+         break;
+
+      case __NR_lstat: /* syscall 107 */
+         /* int lstat(const char *file_name, struct stat *buf); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("lstat ( %p, %p )\n",arg1,arg2);
+         must_be_readable_asciiz( "lstat(file_name)", arg1 );
+         must_be_writable( "lstat(buf)", arg2, sizeof(struct stat) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            make_readable( arg2, sizeof(struct stat) );
+         }
+         break;
+
+#     if defined(__NR_lstat64)
+      case __NR_lstat64: /* syscall 196 */
+         /* int lstat64(const char *file_name, struct stat64 *buf); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("lstat64 ( %p, %p )\n",arg1,arg2);
+         must_be_readable_asciiz( "lstat64(file_name)", arg1 );
+         must_be_writable( "lstat64(buf)", arg2, sizeof(struct stat64) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            make_readable( arg2, sizeof(struct stat64) );
+         }
+         break;
+#     endif
+
+      case __NR_mkdir: /* syscall 39 */
+         /* int mkdir(const char *pathname, mode_t mode); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("mkdir ( %p, %d )\n", arg1,arg2);
+         must_be_readable_asciiz( "mkdir(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+#     if defined(__NR_mmap2)
+      case __NR_mmap2: /* syscall 192 */
+         /* My impression is that this is exactly like __NR_mmap 
+            except that all 6 args are passed in regs, rather than in 
+            a memory-block. */
+         /* void* mmap(void *start, size_t length, int prot, 
+                       int flags, int fd, off_t offset); 
+         */
+         {
+         UInt arg6 = VG_(baseBlock)[VGOFF_(m_ebp)];
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("mmap2 ( %p, %d, %d, %d, %d, %d )\n",
+                        arg1, arg2, arg3, arg4, arg5, arg6 );
+         KERNEL_DO_SYSCALL(res);
+         /* !!! shouldn't we also be doing the symtab loading stuff as
+            in __NR_mmap ? */
+         if (!VG_(is_kerror)(res))
+            approximate_mmap_permissions( (Addr)res, arg2, arg3 );
+         }
+         break;
+#     endif
+
+      case __NR_mmap: /* syscall 90 */
+         /* void* mmap(void *start, size_t length, int prot, 
+                       int flags, int fd, off_t offset); 
+         */
+         {
+         Bool arg_block_readable
+                 = VG_(clo_instrument)
+                 ? VGM_(check_readable)(arg1, 6*sizeof(UInt), NULL)
+                 : True;
+         must_be_readable( "mmap(args)", arg1, 6*sizeof(UInt) );
+         if (arg_block_readable) {
+            UInt* arg_block = (UInt*)arg1;
+            UInt arg6;
+            arg1 = arg_block[0];
+            arg2 = arg_block[1];
+            arg3 = arg_block[2];
+            arg4 = arg_block[3];
+            arg5 = arg_block[4];
+            arg6 = arg_block[5];
+            if (VG_(clo_trace_syscalls))
+               VG_(printf)("mmap ( %p, %d, %d, %d, %d, %d )\n",
+                           arg1, arg2, arg3, arg4, arg5, arg6 );
+         }
+         KERNEL_DO_SYSCALL(res);
+         if (arg_block_readable && !VG_(is_kerror)(res))
+            approximate_mmap_permissions( (Addr)res, arg2, arg3 );
+         if (arg_block_readable && !VG_(is_kerror)(res)
+             && (arg3 & PROT_EXEC)) {
+            /* The client mmap'ed a segment with executable
+               permissions.  Tell the symbol-table loader, so that it
+               has an opportunity to pick up more symbols if this mmap
+               was caused by the client loading a new .so via
+               dlopen().  This is important for debugging KDE. */
+            VG_(read_symbols)();
+         }
+         }
+         
+         break;
+
+      case __NR_mprotect: /* syscall 125 */
+         /* int mprotect(const void *addr, size_t len, int prot); */
+         /* should addr .. addr+len-1 be checked before the call? */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("mprotect ( %p, %d, %d )\n", arg1,arg2,arg3);
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res))
+            approximate_mmap_permissions ( arg1, arg2, arg3 );
+         break;
+
+      case __NR_munmap: /* syscall 91 */
+         /* int munmap(void *start, size_t length); */
+         /* should start .. start+length-1 be checked before the call? */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("munmap ( %p, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res)) {
+            /* Mash around start and length so that the area passed to
+               make_noaccess() exactly covers an integral number of
+               pages.  If we don't do that, our idea of addressible
+               memory diverges from that of the kernel's, which causes
+               the leak detector to crash. */
+            Addr start = arg1;
+            Addr length = arg2;
+            while ((start % VKI_BYTES_PER_PAGE) > 0) { start--; length++; }
+            while (((start+length) % VKI_BYTES_PER_PAGE) > 0) { length++; }
+            /*
+            VG_(printf)("MUNMAP: correct (%p for %d) to (%p for %d) %s\n", 
+               arg1, arg2, start, length, (arg1!=start || arg2!=length) 
+                                             ? "CHANGE" : "");
+            */
+            make_noaccess( start, length );
+            /* Tell our symbol table machinery about this, so that if
+               this happens to be a .so being unloaded, the relevant
+               symbols are removed too. */
+            VG_(symtab_notify_munmap) ( start, length );
+         }
+         break;
+
+      case __NR_nanosleep: /* syscall 162 */
+         /* int nanosleep(const struct timespec *req, struct timespec *rem); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("nanosleep ( %p, %p )\n", arg1,arg2);
+         must_be_readable ( "nanosleep(req)", arg1, 
+                                              sizeof(struct timespec) );
+         if (arg2 != (UInt)NULL)
+            must_be_writable ( "nanosleep(rem)", arg2, 
+                               sizeof(struct timespec) );
+         KERNEL_DO_SYSCALL(res);
+         /* Somewhat bogus ... is only written by the kernel if
+            res == -1 && errno == EINTR. */
+         if (!VG_(is_kerror)(res) && arg2 != (UInt)NULL)
+            make_readable ( arg2, sizeof(struct timespec) );
+         break;
+
+      case __NR__newselect: /* syscall 142 */
+         /* int select(int n,  
+                       fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 
+                       struct timeval *timeout);
+         */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("newselect ( %d, %p, %p, %p, %p )\n",
+                        arg1,arg2,arg3,arg4,arg5);
+         if (arg2 != 0)
+            must_be_readable( "newselect(readfds)",   
+                              arg2, arg1/8 /* __FD_SETSIZE/8 */ );
+         if (arg3 != 0)
+            must_be_readable( "newselect(writefds)",  
+                              arg3, arg1/8 /* __FD_SETSIZE/8 */ );
+         if (arg4 != 0)
+            must_be_readable( "newselect(exceptfds)", 
+                              arg4, arg1/8 /* __FD_SETSIZE/8 */ );
+         if (arg5 != 0)
+            must_be_readable( "newselect(timeout)", arg5, 
+                              sizeof(struct timeval) );
+         KERNEL_DO_SYSCALL(res);
+         break;
+         
+      case __NR_open: /* syscall 5 */
+         /* int open(const char *pathname, int flags); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("open ( %p(%s), %d ) --> ",arg1,arg1,arg2);
+         must_be_readable_asciiz( "open(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(res);
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("%d\n",res);
+         break;
+
+      case __NR_pipe: /* syscall 42 */
+         /* int pipe(int filedes[2]); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("pipe ( %p ) ...\n", arg1);
+         must_be_writable( "pipe(filedes)", arg1, 2*sizeof(int) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res))
+            make_readable ( arg1, 2*sizeof(int) );
+         if (VG_(clo_trace_syscalls) && !VG_(is_kerror)(res))
+            VG_(printf)("SYSCALL[%d, %d]       pipe --> (rd %d, wr %d)\n", 
+                        VG_(syscall_depth), VG_(getpid)(), 
+                        ((UInt*)arg1)[0], ((UInt*)arg1)[1] );
+         break;
+
+      case __NR_poll: /* syscall 168 */
+         /* struct pollfd {
+               int fd;           -- file descriptor
+               short events;     -- requested events
+               short revents;    -- returned events
+            };
+           int poll(struct pollfd *ufds, unsigned int nfds, 
+                                         int timeout) 
+         */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("poll ( %d, %d, %d )\n",arg1,arg2,arg3);
+         /* In fact some parts of this struct should be readable too.
+            This should be fixed properly. */
+         must_be_writable( "poll(ufds)", arg1, arg2 * sizeof(struct pollfd) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && res > 0) {
+            Int i;
+            struct pollfd * arr = (struct pollfd *)arg1;
+            for (i = 0; i < arg2; i++)
+               make_readable( (Addr)(&arr[i].revents), sizeof(Short) );
+         }
+         break;
+ 
+      case __NR_read: /* syscall 3 */
+         /* size_t read(int fd, void *buf, size_t count); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("read ( %d, %p, %d ) ...\n",arg1,arg2,arg3);
+         must_be_writable( "read(buf)", arg2, arg3 );
+         KERNEL_DO_SYSCALL(res);
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("SYSCALL[%d, %d]       read ( %d, %p, %d ) --> %d\n", 
+                        VG_(syscall_depth), VG_(getpid)(), 
+                        arg1, arg2, arg3, res);
+         if (!VG_(is_kerror)(res) && res > 0) {
+            make_readable( arg2, res );
+         }
+         break;
+
+      case __NR_readlink: /* syscall 85 */
+         /* int readlink(const char *path, char *buf, size_t bufsiz); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("readlink ( %p, %p, %d )\n", arg1,arg2,arg3);
+         must_be_readable_asciiz( "readlink(path)", arg1 );
+         must_be_writable ( "readlink(buf)", arg2,arg3 );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && res > 0) {
+            make_readable ( arg2, res );
+         }
+         break;
+
+      case __NR_readv: { /* syscall 145 */
+         /* int readv(int fd, const struct iovec * vector, size_t count); */
+         UInt i;
+         struct iovec * vec;
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("readv ( %d, %p, %d )\n",arg1,arg2,arg3);
+         must_be_readable( "readv(vector)", 
+                           arg2, arg3 * sizeof(struct iovec) );
+         /* ToDo: don't do any of the following if the vector is invalid */
+         vec = (struct iovec *)arg2;
+         for (i = 0; i < arg3; i++)
+            must_be_writable( "readv(vector[...])",
+                              (UInt)vec[i].iov_base,vec[i].iov_len );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && res > 0) {
+            /* res holds the number of bytes read. */
+            for (i = 0; i < arg3; i++) {
+               Int nReadThisBuf = vec[i].iov_len;
+               if (nReadThisBuf > res) nReadThisBuf = res;
+               make_readable( (UInt)vec[i].iov_base, nReadThisBuf );
+               res -= nReadThisBuf;
+               if (res < 0) VG_(panic)("vg_wrap_syscall: readv: res < 0");
+            }
+         }
+         break;
+      }
+
+      case __NR_rename: /* syscall 38 */
+         /* int rename(const char *oldpath, const char *newpath); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("rename ( %p, %p )\n", arg1, arg2 );
+         must_be_readable_asciiz( "rename(oldpath)", arg1 );
+         must_be_readable_asciiz( "rename(newpath)", arg2 );
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_rmdir: /* syscall 40 */
+         /* int rmdir(const char *pathname); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("rmdir ( %p )\n", arg1);
+         must_be_readable_asciiz( "rmdir(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_sched_setparam:
+      case __NR_sched_getparam:
+      case __NR_sched_yield:
+      case __NR_sched_get_priority_min:
+         if (VG_(clo_instrument)) {
+            VG_(message)(Vg_UserMsg, 
+               "Warning: noted but unhandled __NR_sched_* syscall (%d).", 
+               syscallno);
+            VG_(message)(Vg_UserMsg, 
+               "   This could cause spurious value errors"
+               " to appear.");
+         }
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_select: /* syscall 82 */
+         /* struct sel_arg_struct {
+              unsigned long n;
+              fd_set *inp, *outp, *exp;
+              struct timeval *tvp;
+            };
+            int old_select(struct sel_arg_struct *arg);
+         */
+         {
+         Bool arg_block_readable
+                 = VG_(clo_instrument)
+                 ? VGM_(check_readable)(arg1, 5*sizeof(UInt), NULL)
+                 : True;
+         must_be_readable ( "select(args)", arg1, 5*sizeof(UInt) );
+         if (arg_block_readable) {
+            UInt* arg_struct = (UInt*)arg1;
+            arg1 = arg_struct[0];
+            arg2 = arg_struct[1];
+            arg3 = arg_struct[2];
+            arg4 = arg_struct[3];
+            arg5 = arg_struct[4];
+
+            if (VG_(clo_trace_syscalls)) 
+               VG_(printf)("select ( %d, %p, %p, %p, %p )\n", 
+                           arg1,arg2,arg3,arg4,arg5);
+            if (arg2 != (Addr)NULL)
+               must_be_readable("select(readfds)", arg2, 
+                                arg1/8 /* __FD_SETSIZE/8 */ );
+            if (arg3 != (Addr)NULL)
+               must_be_readable("select(writefds)", arg3, 
+                                arg1/8 /* __FD_SETSIZE/8 */ );
+            if (arg4 != (Addr)NULL)
+               must_be_readable("select(exceptfds)", arg4, 
+                                arg1/8 /* __FD_SETSIZE/8 */ );
+            if (arg5 != (Addr)NULL)
+               must_be_readable("select(timeout)", arg5, 
+                                sizeof(struct timeval) );
+         }
+         }
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_setitimer: /* syscall 104 */
+         /* setitimer(int which, const struct itimerval *value,
+                                 struct itimerval *ovalue); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("setitimer ( %d, %p, %p )\n", arg1,arg2,arg3);
+         must_be_readable("setitimer(value)", 
+                          arg2, sizeof(struct itimerval) );
+         if (arg3 != (Addr)NULL)
+            must_be_writable("setitimer(ovalue)", 
+                             arg3, sizeof(struct itimerval));
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && arg3 != (Addr)NULL) {
+            make_readable(arg3, sizeof(struct itimerval));
+         }
+         break;
+
+#     if defined(__NR_setfsgid32)
+      case __NR_setfsgid32: /* syscall 216 */
+         /* int setfsgid(uid_t fsgid); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("setfsgid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+#     if defined(__NR_setgid32)
+      case __NR_setgid32: /* syscall 214 */
+#     endif
+      case __NR_setgid: /* syscall 46 */
+         /* int setgid(gid_t gid); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("setgid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_setsid: /* syscall 66 */
+         /* pid_t setsid(void); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("setsid ()\n");
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+#     if defined(__NR_setgroups32)
+      case __NR_setgroups32: /* syscall 206 */
+         /* int setgroups(size_t size, const gid_t *list); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("setgroups ( %d, %p )\n", arg1, arg2);
+         if (arg1 > 0)
+            must_be_readable ( "setgroups(list)", arg2, 
+                               arg1 * sizeof(gid_t) );
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+      case __NR_setpgid: /* syscall 57 */
+         /* int setpgid(pid_t pid, pid_t pgid); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("setpgid ( %d, %d )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+#     if defined(__NR_setregid32)
+      case __NR_setregid32: /* syscall 204 */
+         /* int setregid(gid_t rgid, gid_t egid); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("setregid32(?) ( %d, %d )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+#     if defined(__NR_setresuid32)
+      case __NR_setresuid32: /* syscall 208 */
+         /* int setresuid(uid_t ruid, uid_t euid, uid_t suid); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("setresuid32(?) ( %d, %d, %d )\n", arg1, arg2, arg3);
+         KERNEL_DO_SYSCALL(res);
+         break;
+#     endif
+
+#     if defined(__NR_setreuid32)
+      case __NR_setreuid32: /* syscall 203 */
+#     endif
+      case __NR_setreuid: /* syscall 70 */
+         /* int setreuid(uid_t ruid, uid_t euid); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("setreuid ( 0x%x, 0x%x )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_setrlimit: /* syscall 75 */
+         /* int setrlimit (int resource, const struct rlimit *rlim); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("setrlimit ( %d, %p )\n", arg1,arg2);
+         must_be_readable( "setrlimit(rlim)", arg2, sizeof(struct rlimit) );
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+#     if defined(__NR_setuid32)
+      case __NR_setuid32: /* syscall 213 */
+#     endif
+      case __NR_setuid: /* syscall 23 */
+         /* int setuid(uid_t uid); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("setuid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_socketcall: /* syscall 102 */
+         /* int socketcall(int call, unsigned long *args); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("socketcall ( %d, %p )\n",arg1,arg2);
+         switch (arg1 /* request */) {
+
+            case SYS_SOCKETPAIR:
+               /* int socketpair(int d, int type, int protocol, int sv[2]); */
+               must_be_readable( "socketcall.socketpair(args)", 
+                                 arg2, 4*sizeof(Addr) );
+               must_be_writable( "socketcall.socketpair(sv)", 
+                                 ((UInt*)arg2)[3], 2*sizeof(int) );
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res))
+                  make_readable ( ((UInt*)arg2)[3], 2*sizeof(int) );
+               break;
+
+            case SYS_SOCKET:
+               /* int socket(int domain, int type, int protocol); */
+               must_be_readable( "socketcall.socket(args)", 
+                                 arg2, 3*sizeof(Addr) );
+               KERNEL_DO_SYSCALL(res);
+               break;
+
+            case SYS_BIND:
+               /* int bind(int sockfd, struct sockaddr *my_addr, 
+                           int addrlen); */
+               must_be_readable( "socketcall.bind(args)", 
+                                 arg2, 3*sizeof(Addr) );
+               must_be_readable( "socketcall.bind(my_addr)", 
+                                 ((UInt*)arg2)[1], ((UInt*)arg2)[2] );
+               KERNEL_DO_SYSCALL(res);
+               break;
+
+            case SYS_LISTEN:
+               /* int listen(int s, int backlog); */
+               must_be_readable( "socketcall.listen(args)", 
+                                 arg2, 2*sizeof(Addr) );
+               KERNEL_DO_SYSCALL(res);
+               break;
+
+            case SYS_ACCEPT: {
+               /* int accept(int s, struct sockaddr *addr, int *p_addrlen); */
+               Addr addr;
+               Addr p_addrlen;
+               UInt addrlen_in, addrlen_out;
+               must_be_readable( "socketcall.accept(args)", 
+                                 arg2, 3*sizeof(Addr) );
+               addr      = ((UInt*)arg2)[1];
+               p_addrlen = ((UInt*)arg2)[2];
+               if (p_addrlen != (Addr)NULL) {
+                  must_be_readable ( "socketcall.accept(addrlen)", 
+                                     p_addrlen, sizeof(int) );
+                  addrlen_in = safe_dereference( p_addrlen, 0 );
+                  must_be_writable ( "socketcall.accept(addr)", 
+                                     addr, addrlen_in );
+               }
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res >= 0 && p_addrlen != (Addr)NULL) {
+                  addrlen_out = safe_dereference( p_addrlen, 0 );
+                  if (addrlen_out > 0)
+                     make_readable( addr, addrlen_out );
+               }
+               break;
+            }
+
+            case SYS_SENDTO:
+               /* int sendto(int s, const void *msg, int len, 
+                             unsigned int flags, 
+                             const struct sockaddr *to, int tolen); */
+               must_be_readable( "socketcall.sendto(args)", arg2, 
+                                 6*sizeof(Addr) );
+               must_be_readable( "socketcall.sendto(msg)",
+                                 ((UInt*)arg2)[1], /* msg */
+                                 ((UInt*)arg2)[2]  /* len */ );
+               must_be_readable( "socketcall.sendto(to)",
+                                 ((UInt*)arg2)[4], /* to */
+                                 ((UInt*)arg2)[5]  /* tolen */ );
+               KERNEL_DO_SYSCALL(res);
+               break;
+
+            case SYS_SEND:
+               /* int send(int s, const void *msg, size_t len, int flags); */
+               must_be_readable( "socketcall.send(args)", arg2,
+                                 4*sizeof(Addr) );
+               must_be_readable( "socketcall.send(msg)",
+                                 ((UInt*)arg2)[1], /* msg */
+                                  ((UInt*)arg2)[2]  /* len */ );
+               KERNEL_DO_SYSCALL(res);
+               break;
+
+            case SYS_RECVFROM:
+               /* int recvfrom(int s, void *buf, int len, unsigned int flags,
+                               struct sockaddr *from, int *fromlen); */
+               must_be_readable( "socketcall.recvfrom(args)", 
+                                 arg2, 6*sizeof(Addr) );
+               if ( ((UInt*)arg2)[4] /* from */ != 0) {
+                  must_be_readable( "socketcall.recvfrom(fromlen)",
+                                    ((UInt*)arg2)[5] /* fromlen */, 
+                                    sizeof(int) );
+                  must_be_writable( "socketcall.recvfrom(from)",
+                                    ((UInt*)arg2)[4], /*from*/
+                                    safe_dereference( (Addr)
+                                                      ((UInt*)arg2)[5], 0 ) );
+               }
+               must_be_writable( "socketcall.recvfrom(buf)", 
+                                 ((UInt*)arg2)[1], /* buf */
+                                 ((UInt*)arg2)[2]  /* len */ );
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res >= 0) {
+                  make_readable( ((UInt*)arg2)[1], /* buf */
+                                 ((UInt*)arg2)[2]  /* len */ );
+                  if ( ((UInt*)arg2)[4] /* from */ != 0) {
+                     make_readable( 
+                        ((UInt*)arg2)[4], /*from*/
+                        safe_dereference( (Addr) ((UInt*)arg2)[5], 0 ) );
+                  }
+               }
+               /* phew! */
+               break;
+
+            case SYS_RECV:
+               /* int recv(int s, void *buf, int len, unsigned int flags); */
+               /* man 2 recv says:
+               The  recv call is normally used only on a connected socket
+               (see connect(2)) and is identical to recvfrom with a  NULL
+               from parameter.
+               */
+               must_be_readable( "socketcall.recv(args)", 
+                                 arg2, 4*sizeof(Addr) );
+               must_be_writable( "socketcall.recv(buf)", 
+                                 ((UInt*)arg2)[1], /* buf */
+                                 ((UInt*)arg2)[2]  /* len */ );
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res) && res >= 0 
+                                   && ((UInt*)arg2)[1] != (UInt)NULL) {
+                  make_readable( ((UInt*)arg2)[1], /* buf */
+                                 ((UInt*)arg2)[2]  /* len */ );
+               }
+               break;
+
+            case SYS_CONNECT: {
+               struct sockaddr *sa;
+               /* int connect(int sockfd, 
+                              struct sockaddr *serv_addr, int addrlen ); */
+               must_be_readable( "socketcall.connect(args)", 
+                                 arg2, 3*sizeof(Addr) );
+               must_be_readable( "socketcall.connect(serv_addr.sa_family)",
+                                 ((UInt*)arg2)[1], /* serv_addr */
+                                 sizeof (sa_family_t));
+               sa = (struct sockaddr *) (((UInt*)arg2)[1]);
+               if (sa->sa_family == AF_UNIX)
+                  must_be_readable_asciiz( 
+                     "socketcall.connect(serv_addr.sun_path)",
+                     (UInt) ((struct sockaddr_un *) sa)->sun_path);
+               /* XXX There probably should be more cases here since not
+                  all of the struct sockaddr_XXX must be initialized.  But
+                  wait until something pops up.  */
+               else
+                  must_be_readable( "socketcall.connect(serv_addr)",
+                                    ((UInt*)arg2)[1], /* serv_addr */
+                                    ((UInt*)arg2)[2]  /* addrlen */ );
+               KERNEL_DO_SYSCALL(res);
+               break;
+           }
+
+            case SYS_SETSOCKOPT:
+               /* int setsockopt(int s, int level, int optname, 
+                                 const void *optval, int optlen); */
+               must_be_readable( "socketcall.setsockopt(args)", 
+                                 arg2, 5*sizeof(Addr) );
+               must_be_readable( "socketcall.setsockopt(optval)",
+                                 ((UInt*)arg2)[3], /* optval */
+                                 ((UInt*)arg2)[4]  /* optlen */ );
+               KERNEL_DO_SYSCALL(res);
+               break;
+
+            case SYS_GETSOCKOPT:
+               /* int setsockopt(int s, int level, int optname, 
+                                 void *optval, socklen_t *optlen); */
+               must_be_readable( "socketcall.getsockopt(args)", 
+                                 arg2, 5*sizeof(Addr) );
+               {
+               Addr optval_p = ((UInt*)arg2)[3];
+               Addr optlen_p = ((UInt*)arg2)[4];
+               //vg_assert(sizeof(socklen_t) == sizeof(UInt));
+               UInt optlen_after;
+               UInt optlen = safe_dereference ( optlen_p, 0 );
+               if (optlen > 0) 
+                  must_be_writable( "socketcall.getsockopt(optval)", 
+                                    optval_p, optlen );
+               KERNEL_DO_SYSCALL(res);
+               optlen_after = safe_dereference ( optlen_p, 0 );
+               if (!VG_(is_kerror)(res) && optlen > 0 && optlen_after > 0) 
+                  make_readable( optval_p, optlen_after );
+               }
+               break;
+
+            case SYS_GETSOCKNAME:
+               /* int getsockname(int s, struct sockaddr* name, 
+                                  int* namelen) */
+               must_be_readable( "socketcall.getsockname(args)", 
+                                 arg2, 3*sizeof(Addr) );
+               {
+               UInt namelen = safe_dereference( (Addr) ((UInt*)arg2)[2], 0);
+               if (namelen > 0)
+                  must_be_writable( "socketcall.getsockname(name)", 
+                                    ((UInt*)arg2)[1], namelen );
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res)) {
+                  namelen = safe_dereference( (Addr) ((UInt*)arg2)[2], 0);
+                  if (namelen > 0 
+                      && ((UInt*)arg2)[1] != (UInt)NULL)
+                     make_readable( ((UInt*)arg2)[1], namelen );
+               }
+               }
+               break;
+
+            case SYS_GETPEERNAME:
+               /* int getpeername(int s, struct sockaddr* name, 
+                                  int* namelen) */
+               must_be_readable( "socketcall.getpeername(args)", 
+                                 arg2, 3*sizeof(Addr) );
+               {
+               UInt namelen = safe_dereference( (Addr) ((UInt*)arg2)[2], 0);
+               if (namelen > 0)
+                  must_be_writable( "socketcall.getpeername(name)", 
+                                    ((UInt*)arg2)[1], namelen );
+               KERNEL_DO_SYSCALL(res);
+               if (!VG_(is_kerror)(res)) {
+                  namelen = safe_dereference( (Addr) ((UInt*)arg2)[2], 0);
+                  if (namelen > 0 
+                      && ((UInt*)arg2)[1] != (UInt)NULL)
+                     make_readable( ((UInt*)arg2)[1], namelen );
+               }
+               }
+               break;
+
+            case SYS_SHUTDOWN:
+               /* int shutdown(int s, int how); */
+               must_be_readable( "socketcall.shutdown(args)", 
+                                  arg2, 2*sizeof(Addr) );
+               KERNEL_DO_SYSCALL(res);
+               break;
+
+            case SYS_SENDMSG:
+               {
+                  /* int sendmsg(int s, const struct msghdr *msg, int flags); */
+
+                  /* this causes warnings, and I don't get why. glibc bug?
+                   * (after all it's glibc providing the arguments array)
+                  must_be_readable( "socketcall.sendmsg(args)", 
+                                     arg2, 3*sizeof(Addr) );
+                  */
+
+                  struct msghdr *msg = (struct msghdr *)((UInt *)arg2)[ 1 ];
+                  msghdr_foreachfield ( msg, must_be_readable_sendmsg );
+
+                  KERNEL_DO_SYSCALL(res);
+                  break;
+               }
+
+            case SYS_RECVMSG:
+               {
+                  /* int recvmsg(int s, struct msghdr *msg, int flags); */
+
+                  /* this causes warnings, and I don't get why. glibc bug?
+                   * (after all it's glibc providing the arguments array)
+                  must_be_readable( "socketcall.recvmsg(args)", 
+                                     arg2, 3*sizeof(Addr) );
+                  */
+
+                  struct msghdr *msg = (struct msghdr *)((UInt *)arg2)[ 1 ];
+                  msghdr_foreachfield ( msg, must_be_writable_recvmsg );
+
+                  KERNEL_DO_SYSCALL(res);
+
+                  if ( !VG_(is_kerror)( res ) )
+                     msghdr_foreachfield( msg, make_readable_recvmsg );
+
+                  break;
+               }
+
+            default:
+               VG_(message)(Vg_DebugMsg,"FATAL: unhandled socketcall 0x%x",arg1);
+               VG_(panic)("... bye!\n");
+               break; /*NOTREACHED*/
+         }
+         break;
+
+      case __NR_stat: /* syscall 106 */
+         /* int stat(const char *file_name, struct stat *buf); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("stat ( %p, %p )\n",arg1,arg2);
+         must_be_readable_asciiz( "stat(file_name)", arg1 );
+         must_be_writable( "stat(buf)", arg2, sizeof(struct stat) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res))
+            make_readable( arg2, sizeof(struct stat) );
+         break;
+
+      case __NR_statfs: /* syscall 99 */
+         /* int statfs(const char *path, struct statfs *buf); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("statfs ( %p, %p )\n",arg1,arg2);
+         must_be_readable_asciiz( "statfs(path)", arg1 );
+         must_be_writable( "stat(buf)", arg2, sizeof(struct statfs) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res))
+            make_readable( arg2, sizeof(struct statfs) );
+         break;
+
+      case __NR_symlink: /* syscall 83 */
+         /* int symlink(const char *oldpath, const char *newpath); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("symlink ( %p, %p )\n",arg1,arg2);
+         must_be_readable_asciiz( "symlink(oldpath)", arg1 );
+         must_be_readable_asciiz( "symlink(newpath)", arg2 );
+         KERNEL_DO_SYSCALL(res);
+         break; 
+
+#     if defined(__NR_stat64)
+      case __NR_stat64: /* syscall 195 */
+         /* int stat64(const char *file_name, struct stat64 *buf); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("stat64 ( %p, %p )\n",arg1,arg2);
+         must_be_readable_asciiz( "stat64(file_name)", arg1 );
+         must_be_writable( "stat64(buf)", arg2, sizeof(struct stat64) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res))
+            make_readable( arg2, sizeof(struct stat64) );
+         break;
+#     endif
+
+#     if defined(__NR_fstat64)
+      case __NR_fstat64: /* syscall 197 */
+         /* int fstat64(int filedes, struct stat64 *buf); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("fstat64 ( %d, %p )\n",arg1,arg2);
+         must_be_writable( "fstat64(buf)", arg2, sizeof(struct stat64) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res))
+            make_readable( arg2, sizeof(struct stat64) );
+         break;
+#     endif
+
+      case __NR_sysinfo: /* syscall 116 */
+         /* int sysinfo(struct sysinfo *info); */
+         if (VG_(clo_trace_syscalls)) 
+            VG_(printf)("sysinfo ( %p )\n",arg1);
+         must_be_writable( "sysinfo(info)", arg1, sizeof(struct sysinfo) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res))
+            make_readable( arg1, sizeof(struct sysinfo) );
+         break;
+
+      case __NR_time: /* syscall 13 */
+         /* time_t time(time_t *t); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("time ( %p )\n",arg1);
+         if (arg1 != (UInt)NULL) {
+            must_be_writable( "time", arg1, sizeof(time_t) );
+         }
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && arg1 != (UInt)NULL) {
+            make_readable( arg1, sizeof(time_t) );
+         }
+         break;
+
+      case __NR_times: /* syscall 43 */
+         /* clock_t times(struct tms *buf); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("times ( %p )\n",arg1);
+         must_be_writable( "times(buf)", arg1, sizeof(struct tms) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && arg1 != (UInt)NULL) {
+            make_readable( arg1, sizeof(struct tms) );
+         }
+         break;
+
+      case __NR_truncate: /* syscall 92 */
+         /* int truncate(const char *path, size_t length); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("truncate ( %p, %d )\n", arg1,arg2);
+         must_be_readable_asciiz( "truncate(path)", arg1 );
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_umask: /* syscall 60 */
+         /* mode_t umask(mode_t mask); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("umask ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_unlink: /* syscall 10 */
+         /* int unlink(const char *pathname) */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("ulink ( %p )\n",arg1);
+         must_be_readable_asciiz( "unlink(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_uname: /* syscall 122 */
+         /* int uname(struct utsname *buf); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("uname ( %p )\n",arg1);
+         must_be_writable( "uname(buf)", arg1, sizeof(struct utsname) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && arg1 != (UInt)NULL) {
+            make_readable( arg1, sizeof(struct utsname) );
+         }
+         break;
+
+      case __NR_utime: /* syscall 30 */
+         /* int utime(const char *filename, struct utimbuf *buf); */
+         if (VG_(clo_trace_syscalls)) 
+            VG_(printf)("utime ( %p, %p )\n", arg1,arg2);
+         must_be_readable_asciiz( "utime(filename)", arg1 );
+         if (arg2 != (UInt)NULL)
+            must_be_readable( "utime(buf)", arg2, 
+                                            sizeof(struct utimbuf) );
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_wait4: /* syscall 114 */
+         /* pid_t wait4(pid_t pid, int *status, int options,
+                        struct rusage *rusage) */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("wait4 ( %d, %p, %d, %p )\n",
+                      arg1,arg2,arg3,arg4);
+         if (arg2 != (Addr)NULL)
+            must_be_writable( "wait4(status)", arg2, sizeof(int) );
+         if (arg4 != (Addr)NULL)
+            must_be_writable( "wait4(rusage)", arg4, sizeof(struct rusage) );
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res)) {
+            if (arg2 != (Addr)NULL)
+               make_readable( arg2, sizeof(int) );
+            if (arg4 != (Addr)NULL)
+               make_readable( arg4, sizeof(struct rusage) );
+         }
+         break;
+
+      case __NR_write: /* syscall 4 */
+         /* size_t write(int fd, const void *buf, size_t count); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("write ( %d, %p, %d )\n",arg1,arg2,arg3);
+         must_be_readable( "write(buf)", arg2, arg3 );
+         KERNEL_DO_SYSCALL(res);
+         break;
+
+      case __NR_writev: { /* syscall 146 */
+         /* int writev(int fd, const struct iovec * vector, size_t count); */
+         UInt i;
+         struct iovec * vec;
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("writev ( %d, %p, %d )\n",arg1,arg2,arg3);
+         must_be_readable( "writev(vector)", 
+                           arg2, arg3 * sizeof(struct iovec) );
+         /* ToDo: don't do any of the following if the vector is invalid */
+         vec = (struct iovec *)arg2;
+         for (i = 0; i < arg3; i++)
+            must_be_readable( "writev(vector[...])",
+                              (UInt)vec[i].iov_base,vec[i].iov_len );
+         KERNEL_DO_SYSCALL(res);
+         break;
+      }
+
+      /*-------------------------- SIGNALS --------------------------*/
+
+      /* Normally set to 1, so that Valgrind's signal-simulation machinery
+         is engaged.  Sometimes useful to disable (set to 0), for
+         debugging purposes, to make clients more deterministic. */
+#     define SIGNAL_SIMULATION 1
+
+      case __NR_rt_sigaction:
+      case __NR_sigaction:
+         /* int sigaction(int signum, struct k_sigaction *act, 
+                                      struct k_sigaction *oldact); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("sigaction ( %d, %p, %p )\n",arg1,arg2,arg3);
+         if (arg2 != (UInt)NULL)
+            must_be_readable( "sigaction(act)", 
+                              arg2, sizeof(vki_ksigaction));
+         if (arg3 != (UInt)NULL)
+            must_be_writable( "sigaction(oldact)", 
+                              arg3, sizeof(vki_ksigaction));
+         /* We do this one ourselves! */
+#        if SIGNAL_SIMULATION
+         VG_(do__NR_sigaction)();
+         res = VG_(baseBlock)[VGOFF_(m_eax)];
+#        else
+         /* debugging signals; when we don't handle them. */
+         KERNEL_DO_SYSCALL(res);
+#        endif
+         if (!VG_(is_kerror)(res) && res == 0 && arg3 != (UInt)NULL)
+            make_readable( arg3, sizeof(vki_ksigaction));
+         break;
+
+      case __NR_rt_sigprocmask:
+      case __NR_sigprocmask:
+         /* int sigprocmask(int how, k_sigset_t *set, 
+                                     k_sigset_t *oldset); */
+         if (VG_(clo_trace_syscalls))
+            VG_(printf)("sigprocmask ( %d, %p, %p )\n",arg1,arg2,arg3);
+         if (arg2 != (UInt)NULL)
+            must_be_readable( "sigprocmask(set)", 
+                              arg2, sizeof(vki_ksigset_t));
+         if (arg3 != (UInt)NULL)
+            must_be_writable( "sigprocmask(oldset)", 
+                              arg3, sizeof(vki_ksigset_t));
+         KERNEL_DO_SYSCALL(res);
+         if (!VG_(is_kerror)(res) && res == 0 && arg3 != (UInt)NULL)
+            make_readable( arg3, sizeof(vki_ksigset_t));
+#        if SIGNAL_SIMULATION
+         /* For the reason why both the kernel and Valgrind process
+            sigprocmask, see the detailed comment at
+            vg_do__NR_sigprocmask(). */
+         VG_(do__NR_sigprocmask) ( arg1 /*how*/, (vki_ksigset_t*) arg2 );
+#        endif
+         break;
+
+      default:
+         VG_(message)
+            (Vg_DebugMsg,"FATAL: unhandled syscall: %d",syscallno);
+         VG_(message)
+            (Vg_DebugMsg,"Do not panic.  You may be able to fix this easily.");
+         VG_(message)
+            (Vg_DebugMsg,"Read the file README_MISSING_SYSCALL_OR_IOCTL.");
+         VG_(unimplemented)("no wrapper for the above system call");
+         vg_assert(3+3 == 7);
+         break; /*NOTREACHED*/
+   }
+
+   /* Tell the signal handler machinery that we've finished the
+      syscall.  */
+   VG_(syscall_depth) --;
+      
+   /* { void zzzmemscan(void); zzzmemscan(); } */
+
+   /* Finish off with some sanity checks.  */
+   vg_assert( VG_(syscall_depth) == syscall_depth_saved );
+
+   if (! VG_(first_and_last_secondaries_look_plausible))
+      sane_before_call = False;
+
+   if (sane_before_call && (!sane_after_call)) {
+      VG_(message)(Vg_DebugMsg, "valgrind syscall handler: ");
+      VG_(message)(Vg_DebugMsg,
+                   "probable sanity check failure for syscall number %d\n", 
+                   syscallno );
+      VG_(panic)("aborting due to the above ... bye!"); 
+   }
+
+   VGP_POPCC;
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                         vg_syscall_mem.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_to_ucode.c b/vg_to_ucode.c
new file mode 100644
index 000000000..f31214889
--- /dev/null
+++ b/vg_to_ucode.c
@@ -0,0 +1,4309 @@
+
+/*--------------------------------------------------------------------*/
+/*--- The JITter: translate x86 code to ucode.                     ---*/
+/*---                                                vg_to_ucode.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Renamings of frequently-used global functions.       ---*/
+/*------------------------------------------------------------*/
+
+#define uInstr0   VG_(newUInstr0)
+#define uInstr1   VG_(newUInstr1)
+#define uInstr2   VG_(newUInstr2)
+#define uInstr3   VG_(newUInstr3)
+#define dis       VG_(disassemble)
+#define nameIReg  VG_(nameOfIntReg)
+#define nameISize VG_(nameOfIntSize)
+#define newTemp   VG_(getNewTemp)
+#define uLiteral  VG_(setLiteralField)
+
+
+/*------------------------------------------------------------*/
+/*--- Here so it can be inlined everywhere.                ---*/
+/*------------------------------------------------------------*/
+
+/* Allocate a new temp reg number. */
+__inline__ Int VG_(getNewTemp) ( UCodeBlock* cb )
+{
+   Int t = cb->nextTemp;
+   cb->nextTemp += 2;
+   return t;
+}
+
+Int VG_(getNewShadow) ( UCodeBlock* cb )
+{
+   Int t = cb->nextTemp;
+   cb->nextTemp += 2;
+   return SHADOW(t);
+}
+
+/* Handy predicates. */
+#define SMC_IF_SOME(cb)                              \
+   do {                                              \
+      if (VG_(clo_smc_check) >= VG_CLO_SMC_SOME) {   \
+           LAST_UINSTR((cb)).smc_check = True;       \
+      }                                              \
+   } while (0)
+
+#define SMC_IF_ALL(cb)                               \
+   do {                                              \
+      if (VG_(clo_smc_check) == VG_CLO_SMC_ALL) {    \
+         LAST_UINSTR((cb)).smc_check = True;         \
+      }                                              \
+   } while (0)
+
+
+/*------------------------------------------------------------*/
+/*--- Helper bits and pieces for deconstructing the        ---*/
+/*--- x86 insn stream.                                     ---*/
+/*------------------------------------------------------------*/
+
+static Char* nameGrp1 ( Int opc_aux )
+{
+   static Char* grp1_names[8] 
+     = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
+   if (opc_aux < 0 || opc_aux > 7) VG_(panic)("nameGrp1");
+   return grp1_names[opc_aux];
+}
+
+static Char* nameGrp2 ( Int opc_aux )
+{
+   static Char* grp2_names[8] 
+     = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
+   if (opc_aux < 0 || opc_aux > 7) VG_(panic)("nameGrp2");
+   return grp2_names[opc_aux];
+}
+
+static Char* nameGrp4 ( Int opc_aux )
+{
+   static Char* grp4_names[8] 
+     = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
+   if (opc_aux < 0 || opc_aux > 1) VG_(panic)("nameGrp4");
+   return grp4_names[opc_aux];
+}
+
+static Char* nameGrp5 ( Int opc_aux )
+{
+   static Char* grp5_names[8] 
+     = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
+   if (opc_aux < 0 || opc_aux > 6) VG_(panic)("nameGrp5");
+   return grp5_names[opc_aux];
+}
+
+static Char* nameGrp8 ( Int opc_aux )
+{
+   static Char* grp8_names[8] 
+     = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
+   if (opc_aux < 4 || opc_aux > 7) VG_(panic)("nameGrp8");
+   return grp8_names[opc_aux];
+}
+
+Char* VG_(nameOfIntReg) ( Int size, Int reg )
+{
+   static Char* ireg32_names[8] 
+     = { "%eax", "%ecx", "%edx", "%ebx", 
+         "%esp", "%ebp", "%esi", "%edi" };
+   static Char* ireg16_names[8] 
+     = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" };
+   static Char* ireg8_names[8] 
+     = { "%al", "%cl", "%dl", "%bl", "%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" };
+   if (reg < 0 || reg > 7) goto bad;
+   switch (size) {
+      case 4: return ireg32_names[reg];
+      case 2: return ireg16_names[reg];
+      case 1: return ireg8_names[reg];
+   }
+  bad:
+   VG_(panic)("nameOfIntReg");
+   return NULL; /*notreached*/
+}
+
+Char VG_(nameOfIntSize) ( Int size )
+{
+   switch (size) {
+      case 4: return 'l';
+      case 2: return 'w';
+      case 1: return 'b';
+      default: VG_(panic)("nameOfIntSize");
+   }
+}
+
+__inline__ UInt VG_(extend_s_8to32) ( UInt x )
+{
+   return (UInt)((((Int)x) << 24) >> 24);
+}
+
+__inline__ static UInt extend_s_16to32 ( UInt x )
+{
+   return (UInt)((((Int)x) << 16) >> 16);
+}
+
+
+/* Get a byte value out of the insn stream and sign-extend to 32
+   bits. */
+__inline__ static UInt getSDisp8 ( Addr eip0 )
+{
+   UChar* eip = (UChar*)eip0;
+   return VG_(extend_s_8to32)( (UInt) (eip[0]) );
+}
+
+__inline__ static UInt getSDisp16 ( Addr eip0 )
+{
+   UChar* eip = (UChar*)eip0;
+   UInt d = *eip++;
+   d |= ((*eip++) << 8);
+   return extend_s_16to32(d);
+}
+
+/* Get a 32-bit value out of the insn stream. */
+__inline__ static UInt getUDisp32 ( Addr eip0 )
+{
+   UChar* eip = (UChar*)eip0;
+   UInt v = eip[3]; v <<= 8;
+   v |= eip[2]; v <<= 8;
+   v |= eip[1]; v <<= 8;
+   v |= eip[0];
+   return v;
+}
+
+__inline__ static UInt getUDisp16 ( Addr eip0 )
+{
+   UChar* eip = (UChar*)eip0;
+   UInt v = eip[1]; v <<= 8;
+   v |= eip[0];
+   return v;
+}
+
+__inline__ static UChar getUChar ( Addr eip0 )
+{
+   UChar* eip = (UChar*)eip0;
+   return eip[0];
+}
+
+__inline__ static UInt LOW24 ( UInt x )
+{
+   return x & 0x00FFFFFF;
+}
+
+__inline__ static UInt HI8 ( UInt x )
+{
+   return x >> 24;
+}
+
+__inline__ static UInt getUDisp ( Int size, Addr eip )
+{
+   switch (size) {
+      case 4: return getUDisp32(eip);
+      case 2: return getUDisp16(eip);
+      case 1: return getUChar(eip);
+      default: VG_(panic)("getUDisp");
+  }
+  return 0; /*notreached*/
+}
+
+__inline__ static UInt getSDisp ( Int size, Addr eip )
+{
+   switch (size) {
+      case 4: return getUDisp32(eip);
+      case 2: return getSDisp16(eip);
+      case 1: return getSDisp8(eip);
+      default: VG_(panic)("getUDisp");
+  }
+  return 0; /*notreached*/
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Flag-related helpers.                                ---*/
+/*------------------------------------------------------------*/
+
+/* For the last uinsn inserted into cb, set the read, written and
+   undefined flags.  Undefined flags are counted as written, but it
+   seems worthwhile to distinguish them. 
+*/
+static __inline__ void uFlagsRWU ( UCodeBlock* cb,
+                                   FlagSet rr, FlagSet ww, FlagSet uu )
+{
+   VG_(setFlagRW)(
+      &LAST_UINSTR(cb), rr, VG_UNION_FLAG_SETS(ww,uu)
+   );
+}
+
+
+static void setFlagsFromUOpcode ( UCodeBlock* cb, Int uopc )
+{
+   switch (uopc) {
+      case XOR: case OR: case AND:
+         uFlagsRWU(cb, FlagsEmpty, FlagsOSZCP,  FlagA); break;
+      case ADC: case SBB: 
+         uFlagsRWU(cb, FlagC,      FlagsOSZACP, FlagsEmpty); break;
+      case ADD: case SUB: case NEG: 
+         uFlagsRWU(cb, FlagsEmpty, FlagsOSZACP, FlagsEmpty); break;
+      case INC: case DEC:
+         uFlagsRWU(cb, FlagsEmpty, FlagsOSZAP,  FlagsEmpty); break;
+      case SHR: case SAR: case SHL:
+         uFlagsRWU(cb, FlagsEmpty, FlagsOSZCP,  FlagA); break;
+      case ROL: case ROR:
+         uFlagsRWU(cb, FlagsEmpty, FlagsOC,     FlagsEmpty); break;
+      case RCR: case RCL: 
+         uFlagsRWU(cb, FlagC,      FlagsOC,     FlagsEmpty); break;
+      case NOT:
+         uFlagsRWU(cb, FlagsEmpty, FlagsEmpty,  FlagsEmpty); break;
+      default: 
+         VG_(printf)("unhandled case is %s\n", 
+                     VG_(nameUOpcode)(True, uopc));
+         VG_(panic)("setFlagsFromUOpcode: unhandled case");
+   }
+}
+
+static __inline__ void uCond ( UCodeBlock* cb, Condcode cond )
+{
+   LAST_UINSTR(cb).cond = cond;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Disassembling addressing modes                       ---*/
+/*------------------------------------------------------------*/
+
+/* Generate ucode to calculate an address indicated by a ModRM and
+   following SIB bytes, getting the value in a new temporary.  The
+   temporary, and the number of bytes in the address mode, are
+   returned, as a pair (length << 8) | temp.  Note that this fn should
+   not be called if the R/M part of the address denotes a register
+   instead of memory.  If buf is non-NULL, text of the addressing mode
+   is placed therein. */
+
+static UInt disAMode ( UCodeBlock* cb, Addr eip0, UChar* buf )
+{
+   UChar* eip        = (UChar*)eip0;
+   UChar  mod_reg_rm = *eip++;
+   Int    tmp        = newTemp(cb);
+
+   /* squeeze out the reg field from mod_reg_rm, since a 256-entry
+      jump table seems a bit excessive. 
+   */
+   mod_reg_rm &= 0xC7;               /* is now XX000YYY */
+   mod_reg_rm |= (mod_reg_rm >> 3);  /* is now XX0XXYYY */
+   mod_reg_rm &= 0x1F;               /* is now 000XXYYY */
+   switch (mod_reg_rm) {
+
+      /* (%eax) .. (%edi), not including (%esp) or (%ebp).
+         --> GET %reg, t 
+      */
+      case 0x00: case 0x01: case 0x02: case 0x03: 
+      /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
+         { UChar rm  = mod_reg_rm;
+           uInstr2(cb, GET, 4, ArchReg, rm,  TempReg, tmp);
+           if (buf) VG_(sprintf)(buf,"(%s)", nameIReg(4,rm));
+           return (1<<24 | tmp);
+         }
+
+      /* d8(%eax) ... d8(%edi), not including d8(%esp) 
+         --> GET %reg, t ; ADDL d8, t
+      */
+      case 0x08: case 0x09: case 0x0A: case 0x0B: 
+      /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
+         { UChar rm  = mod_reg_rm & 7;
+           Int   tmq = newTemp(cb);
+           UInt  d   = getSDisp8((Addr)eip); eip++;
+           uInstr2(cb, GET,  4, ArchReg, rm,  TempReg, tmq);
+           uInstr2(cb, LEA1, 4, TempReg, tmq, TempReg, tmp);
+           LAST_UINSTR(cb).lit32 = d;
+           if (buf) VG_(sprintf)(buf,"%d(%s)", d, nameIReg(4,rm));
+           return (2<<24 | tmp);
+         }
+
+      /* d32(%eax) ... d32(%edi), not including d32(%esp)
+         --> GET %reg, t ; ADDL d8, t
+      */
+      case 0x10: case 0x11: case 0x12: case 0x13: 
+      /* ! 14 */ case 0x15: case 0x16: case 0x17:
+         { UChar rm  = mod_reg_rm & 7;
+           Int   tmq = newTemp(cb);
+           UInt  d   = getUDisp32((Addr)eip); eip += 4;
+           uInstr2(cb, GET,  4, ArchReg, rm,  TempReg, tmq);
+           uInstr2(cb, LEA1, 4, TempReg, tmq, TempReg, tmp);
+           LAST_UINSTR(cb).lit32 = d;
+           if (buf) VG_(sprintf)(buf,"0x%x(%s)", d, nameIReg(4,rm));
+           return (5<<24 | tmp);
+         }
+
+      /* a register, %eax .. %edi.  This shouldn't happen. */
+      case 0x18: case 0x19: case 0x1A: case 0x1B:
+      case 0x1C: case 0x1D: case 0x1E: case 0x1F:
+         VG_(panic)("disAMode: not an addr!");
+
+      /* a 32-bit literal address
+         --> MOV d32, tmp 
+      */
+      case 0x05: 
+         { UInt d = getUDisp32((Addr)eip); eip += 4;
+           uInstr2(cb, MOV, 4, Literal, 0, TempReg, tmp);
+           uLiteral(cb, d);
+           if (buf) VG_(sprintf)(buf,"(0x%x)", d);
+           return (5<<24 | tmp);
+         }
+
+      case 0x04: {
+         /* SIB, with no displacement.  Special cases:
+            -- %esp cannot act as an index value.  
+               If index_r indicates %esp, zero is used for the index.
+            -- when mod is zero and base indicates EBP, base is instead
+               a 32-bit literal.
+            It's all madness, I tell you.  Extract %index, %base and 
+            scale from the SIB byte.  The value denoted is then:
+               | %index == %ESP && %base == %EBP
+               = d32 following SIB byte
+               | %index == %ESP && %base != %EBP
+               = %base
+               | %index != %ESP && %base == %EBP
+               = d32 following SIB byte + (%index << scale)
+               | %index != %ESP && %base != %ESP
+               = %base + (%index << scale)
+
+            What happens to the souls of CPU architects who dream up such
+            horrendous schemes, do you suppose?  
+         */
+         UChar sib     = *eip++;
+         UChar scale   = (sib >> 6) & 3;
+         UChar index_r = (sib >> 3) & 7;
+         UChar base_r  = sib & 7;
+
+         if (index_r != R_ESP && base_r != R_EBP) {
+            Int index_tmp = newTemp(cb);
+            Int base_tmp  = newTemp(cb);
+            uInstr2(cb, GET,  4, ArchReg, index_r,  TempReg, index_tmp);
+            uInstr2(cb, GET,  4, ArchReg, base_r,   TempReg, base_tmp);
+            uInstr3(cb, LEA2, 4, TempReg, base_tmp, TempReg, index_tmp, 
+                                 TempReg, tmp);
+            LAST_UINSTR(cb).lit32   = 0;
+            LAST_UINSTR(cb).extra4b = 1 << scale;
+            if (buf) VG_(sprintf)(buf,"(%s,%s,%d)", nameIReg(4,base_r),
+                                  nameIReg(4,index_r),1<<scale);
+            return (2<<24 | tmp);
+         }
+
+         if (index_r != R_ESP && base_r == R_EBP) {
+            Int index_tmp = newTemp(cb);
+            UInt d = getUDisp32((Addr)eip); eip += 4;
+            uInstr2(cb, GET,  4, ArchReg, index_r,  TempReg, index_tmp);
+            uInstr2(cb, MOV,  4, Literal, 0,        TempReg, tmp);
+            uLiteral(cb, 0);
+            uInstr3(cb, LEA2, 4, TempReg, tmp,      TempReg, index_tmp, 
+                                 TempReg, tmp);
+            LAST_UINSTR(cb).lit32   = d;
+            LAST_UINSTR(cb).extra4b = 1 << scale;
+            if (buf) VG_(sprintf)(buf,"0x%x(,%s,%d)", d, 
+                                  nameIReg(4,index_r),1<<scale);
+            return (6<<24 | tmp);
+         }
+
+         if (index_r == R_ESP && base_r != R_EBP) {
+            uInstr2(cb, GET, 4, ArchReg, base_r, TempReg, tmp);
+            if (buf) VG_(sprintf)(buf,"(%s,,)", nameIReg(4,base_r));
+            return (2<<24 | tmp);
+         }
+
+         if (index_r == R_ESP && base_r == R_EBP) {
+            UInt d = getUDisp32((Addr)eip); eip += 4;
+            uInstr2(cb, MOV, 4, Literal, 0, TempReg, tmp);
+	    uLiteral(cb, d);
+            if (buf) VG_(sprintf)(buf,"0x%x()", d);
+            return (6<<24 | tmp);
+         }
+
+         vg_assert(0);
+      }
+
+      /* SIB, with 8-bit displacement.  Special cases:
+         -- %esp cannot act as an index value.  
+            If index_r indicates %esp, zero is used for the index.
+         Denoted value is:
+            | %index == %ESP
+            = d8 + %base
+            | %index != %ESP
+            = d8 + %base + (%index << scale)
+      */
+      case 0x0C: {
+         UChar sib     = *eip++;
+         UChar scale   = (sib >> 6) & 3;
+         UChar index_r = (sib >> 3) & 7;
+         UChar base_r  = sib & 7;
+         UInt d        = getSDisp8((Addr)eip); eip++;
+
+         if (index_r == R_ESP) {
+            Int tmq = newTemp(cb);
+            uInstr2(cb, GET,  4, ArchReg, base_r,  TempReg, tmq);
+            uInstr2(cb, LEA1, 4, TempReg, tmq, TempReg, tmp);
+            LAST_UINSTR(cb).lit32 = d;
+            if (buf) VG_(sprintf)(buf,"%d(%s,,)", d, nameIReg(4,base_r));
+            return (3<<24 | tmp);
+         } else {
+            Int index_tmp = newTemp(cb);
+            Int base_tmp  = newTemp(cb);
+            uInstr2(cb, GET, 4,  ArchReg, index_r,  TempReg, index_tmp);
+            uInstr2(cb, GET, 4,  ArchReg, base_r,   TempReg, base_tmp);
+            uInstr3(cb, LEA2, 4, TempReg, base_tmp, TempReg, index_tmp, 
+                                 TempReg, tmp);
+            LAST_UINSTR(cb).lit32   = d;
+            LAST_UINSTR(cb).extra4b = 1 << scale;
+            if (buf) VG_(sprintf)(buf,"%d(%s,%s,%d)", d, nameIReg(4,base_r), 
+                                  nameIReg(4,index_r), 1<<scale);
+            return (3<<24 | tmp);
+         }
+         vg_assert(0);
+      }
+
+      /* SIB, with 32-bit displacement.  Special cases:
+         -- %esp cannot act as an index value.  
+            If index_r indicates %esp, zero is used for the index.
+         Denoted value is:
+            | %index == %ESP
+            = d32 + %base
+            | %index != %ESP
+            = d32 + %base + (%index << scale)
+      */
+      case 0x14: {
+         UChar sib     = *eip++;
+         UChar scale   = (sib >> 6) & 3;
+         UChar index_r = (sib >> 3) & 7;
+         UChar base_r  = sib & 7;
+         UInt d        = getUDisp32((Addr)eip); eip += 4;
+
+         if (index_r == R_ESP) {
+            Int tmq = newTemp(cb);
+            uInstr2(cb, GET,  4, ArchReg, base_r,  TempReg, tmq);
+            uInstr2(cb, LEA1, 4, TempReg, tmq, TempReg, tmp);
+            LAST_UINSTR(cb).lit32 = d;
+            if (buf) VG_(sprintf)(buf,"%d(%s,,)", d, nameIReg(4,base_r));
+            return (6<<24 | tmp);
+         } else {
+            Int index_tmp = newTemp(cb);
+            Int base_tmp = newTemp(cb);
+            uInstr2(cb, GET,  4, ArchReg, index_r, TempReg, index_tmp);
+            uInstr2(cb, GET,  4, ArchReg, base_r, TempReg, base_tmp);
+            uInstr3(cb, LEA2, 4, TempReg, base_tmp, TempReg, index_tmp, 
+                                 TempReg, tmp);
+            LAST_UINSTR(cb).lit32   = d;
+            LAST_UINSTR(cb).extra4b = 1 << scale;
+            if (buf) VG_(sprintf)(buf,"%d(%s,%s,%d)", d, nameIReg(4,base_r), 
+                                  nameIReg(4,index_r), 1<<scale);
+            return (6<<24 | tmp);
+         }
+         vg_assert(0);
+      }
+
+      default:
+         VG_(panic)("disAMode");
+         return 0; /*notreached*/
+   }
+}
+
+
+/* Figure out the number of (insn-stream) bytes constituting the amode
+   beginning at eip0.  Is useful for getting hold of literals beyond
+   the end of the amode before it has been disassembled.  */
+
+static UInt lengthAMode ( Addr eip0 )
+{
+   UChar* eip        = (UChar*)eip0;
+   UChar  mod_reg_rm = *eip++;
+
+   /* squeeze out the reg field from mod_reg_rm, since a 256-entry
+      jump table seems a bit excessive. 
+   */
+   mod_reg_rm &= 0xC7;               /* is now XX000YYY */
+   mod_reg_rm |= (mod_reg_rm >> 3);  /* is now XX0XXYYY */
+   mod_reg_rm &= 0x1F;               /* is now 000XXYYY */
+   switch (mod_reg_rm) {
+
+      /* (%eax) .. (%edi), not including (%esp) or (%ebp). */
+      case 0x00: case 0x01: case 0x02: case 0x03: 
+      /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
+         return 1;
+
+      /* d8(%eax) ... d8(%edi), not including d8(%esp). */ 
+      case 0x08: case 0x09: case 0x0A: case 0x0B: 
+      /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
+         return 2;
+
+      /* d32(%eax) ... d32(%edi), not including d32(%esp). */
+      case 0x10: case 0x11: case 0x12: case 0x13: 
+      /* ! 14 */ case 0x15: case 0x16: case 0x17:
+         return 5;
+
+      /* a register, %eax .. %edi.  (Not an addr, but still handled.) */
+      case 0x18: case 0x19: case 0x1A: case 0x1B:
+      case 0x1C: case 0x1D: case 0x1E: case 0x1F:
+         return 1;
+
+      /* a 32-bit literal address. */
+      case 0x05: return 5;
+
+      /* SIB, no displacement.  */
+      case 0x04: {
+         UChar sib     = *eip++;
+         UChar base_r  = sib & 7;
+         if (base_r == R_EBP) return 6; else return 2;
+      }
+      /* SIB, with 8-bit displacement.  */
+      case 0x0C: return 3;
+
+      /* SIB, with 32-bit displacement.  */
+      case 0x14: return 6;
+
+      default:
+         VG_(panic)("amode_from_RM");
+         return 0; /*notreached*/
+   }
+}
+
+
+/* Extract the reg field from a modRM byte. */
+static __inline__ Int gregOfRM ( UChar mod_reg_rm )
+{
+   return (Int)( (mod_reg_rm >> 3) & 7 );
+}
+
+/* Figure out whether the mod and rm parts of a modRM byte refer to a
+   register or memory.  If so, the byte will have the form 11XXXYYY,
+   where YYY is the register number. */
+static __inline__ Bool epartIsReg ( UChar mod_reg_rm )
+{
+   return (0xC0 == (mod_reg_rm & 0xC0));
+}
+
+/* ... and extract the register number ... */
+static __inline__ Int eregOfRM ( UChar mod_reg_rm )
+{
+   return (Int)(mod_reg_rm & 0x7);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Disassembling common idioms                          ---*/
+/*------------------------------------------------------------*/
+
+static
+void codegen_XOR_reg_with_itself ( UCodeBlock* cb, Int size, 
+                                   Int ge_reg, Int tmp )
+{
+   if (dis) 
+      VG_(printf)("xor%c %s, %s\n", nameISize(size),
+                  nameIReg(size,ge_reg), nameIReg(size,ge_reg) );
+   uInstr2(cb, MOV, size, Literal, 0, TempReg, tmp);
+   uLiteral(cb, 0);
+   uInstr2(cb, XOR, size, TempReg, tmp, TempReg, tmp);
+   setFlagsFromUOpcode(cb, XOR);
+   uInstr2(cb, PUT, size, TempReg, tmp, ArchReg, ge_reg);
+}
+
+
+/* Handle binary integer instructions of the form
+      op E, G  meaning
+      op reg-or-mem, reg
+   Is passed the a ptr to the modRM byte, the actual operation, and the
+   data size.  Returns the address advanced completely over this
+   instruction.
+
+   E(src) is reg-or-mem
+   G(dst) is reg.
+
+   If E is reg, -->    GET %G,  tmp
+                       OP %E,   tmp
+                       PUT tmp, %G
+ 
+   If E is mem and OP is not reversible, 
+                -->    (getAddr E) -> tmpa
+                       LD (tmpa), tmpa
+                       GET %G, tmp2
+                       OP tmpa, tmp2
+                       PUT tmp2, %G
+
+   If E is mem and OP is reversible
+                -->    (getAddr E) -> tmpa
+                       LD (tmpa), tmpa
+                       OP %G, tmpa
+                       PUT tmpa, %G
+*/
+static
+Addr dis_op2_E_G ( UCodeBlock* cb, 
+                   Opcode      opc, 
+                   Bool        keep,
+                   Int         size, 
+                   Addr        eip0,
+                   Char*       t_x86opc )
+{
+   Bool  reversible;
+   UChar rm = getUChar(eip0);
+   UChar dis_buf[50];
+
+   if (epartIsReg(rm)) {
+      Int tmp = newTemp(cb);
+
+      /* Specially handle XOR reg,reg, because that doesn't really
+         depend on reg, and doing the obvious thing potentially
+         generates a spurious value check failure due to the bogus
+         dependency. */
+      if (opc == XOR && gregOfRM(rm) == eregOfRM(rm)) {
+         codegen_XOR_reg_with_itself ( cb, size, gregOfRM(rm), tmp );
+         return 1+eip0;
+      }
+
+      uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tmp);
+      if (opc == AND || opc == OR) {
+         Int tao = newTemp(cb);
+         uInstr2(cb, GET, size, ArchReg, eregOfRM(rm), TempReg, tao); 
+         uInstr2(cb, opc, size, TempReg, tao, TempReg, tmp);
+         setFlagsFromUOpcode(cb, opc);
+      } else {
+         uInstr2(cb, opc, size, ArchReg, eregOfRM(rm), TempReg, tmp);
+         setFlagsFromUOpcode(cb, opc);
+      }
+      if (keep)
+         uInstr2(cb, PUT, size, TempReg, tmp, ArchReg, gregOfRM(rm));
+      if (dis) VG_(printf)("%s%c %s,%s\n", t_x86opc, nameISize(size), 
+                           nameIReg(size,eregOfRM(rm)),
+                           nameIReg(size,gregOfRM(rm)));
+      return 1+eip0;
+   }
+
+   /* E refers to memory */    
+   reversible
+      = (opc == ADD || opc == OR || opc == AND || opc == XOR || opc == ADC)
+           ? True : False;
+   if (reversible) {
+      UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL);
+      Int  tmpa = LOW24(pair);
+      uInstr2(cb, LOAD, size, TempReg, tmpa, TempReg, tmpa);
+
+      if (opc == AND || opc == OR) {
+         Int tao = newTemp(cb);
+         uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tao); 
+         uInstr2(cb, opc, size, TempReg, tao, TempReg, tmpa);
+         setFlagsFromUOpcode(cb, opc);
+      } else {
+         uInstr2(cb, opc,  size, ArchReg, gregOfRM(rm), TempReg, tmpa);
+         setFlagsFromUOpcode(cb, opc);
+      }
+      if (keep)
+         uInstr2(cb, PUT,  size, TempReg, tmpa, ArchReg, gregOfRM(rm));
+      if (dis) VG_(printf)("%s%c %s,%s\n", t_x86opc, nameISize(size), 
+                           dis_buf,nameIReg(size,gregOfRM(rm)));
+      return HI8(pair)+eip0;
+   } else {
+      UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL);
+      Int  tmpa = LOW24(pair);
+      Int  tmp2 = newTemp(cb);
+      uInstr2(cb, LOAD, size, TempReg, tmpa, TempReg, tmpa);
+      uInstr2(cb, GET,  size, ArchReg, gregOfRM(rm), TempReg, tmp2);
+      uInstr2(cb, opc,  size, TempReg, tmpa, TempReg, tmp2);
+      setFlagsFromUOpcode(cb, opc);
+      if (keep)
+         uInstr2(cb, PUT,  size, TempReg, tmp2, ArchReg, gregOfRM(rm));
+      if (dis) VG_(printf)("%s%c %s,%s\n", t_x86opc, nameISize(size), 
+                           dis_buf,nameIReg(size,gregOfRM(rm)));
+      return HI8(pair)+eip0;
+   }
+}
+
+
+
+/* Handle binary integer instructions of the form
+      op G, E  meaning
+      op reg, reg-or-mem
+   Is passed the a ptr to the modRM byte, the actual operation, and the
+   data size.  Returns the address advanced completely over this
+   instruction.
+
+   G(src) is reg.
+   E(dst) is reg-or-mem
+
+   If E is reg, -->    GET %E,  tmp
+                       OP %G,   tmp
+                       PUT tmp, %E
+ 
+   If E is mem, -->    (getAddr E) -> tmpa
+                       LD (tmpa), tmpv
+                       OP %G, tmpv
+                       ST tmpv, (tmpa)
+*/
+static
+Addr dis_op2_G_E ( UCodeBlock* cb, 
+                   Opcode      opc, 
+                   Bool        keep,
+                   Int         size, 
+                   Addr        eip0,
+                   Char*       t_x86opc )
+{
+   UChar rm = getUChar(eip0);
+   UChar dis_buf[50];
+
+   if (epartIsReg(rm)) {
+      Int tmp = newTemp(cb);
+
+      /* Specially handle XOR reg,reg, because that doesn't really
+         depend on reg, and doing the obvious thing potentially
+         generates a spurious value check failure due to the bogus
+         dependency. */
+      if (opc == XOR && gregOfRM(rm) == eregOfRM(rm)) {
+         codegen_XOR_reg_with_itself ( cb, size, gregOfRM(rm), tmp );
+         return 1+eip0;
+      }
+
+      uInstr2(cb, GET, size, ArchReg, eregOfRM(rm), TempReg, tmp);
+
+      if (opc == AND || opc == OR) {
+         Int tao = newTemp(cb);
+         uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tao); 
+         uInstr2(cb, opc, size, TempReg, tao, TempReg, tmp);
+         setFlagsFromUOpcode(cb, opc);
+      } else {
+         uInstr2(cb, opc, size, ArchReg, gregOfRM(rm), TempReg, tmp);
+         setFlagsFromUOpcode(cb, opc);
+      }
+      if (keep)
+         uInstr2(cb, PUT, size, TempReg, tmp, ArchReg, eregOfRM(rm));
+      if (dis) VG_(printf)("%s%c %s,%s\n", t_x86opc, nameISize(size), 
+                           nameIReg(size,gregOfRM(rm)),
+                           nameIReg(size,eregOfRM(rm)));
+      return 1+eip0;
+   }
+
+   /* E refers to memory */    
+   {
+      UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL);
+      Int  tmpa = LOW24(pair);
+      Int  tmpv = newTemp(cb);
+      uInstr2(cb, LOAD,  size, TempReg, tmpa, TempReg, tmpv);
+
+      if (opc == AND || opc == OR) {
+         Int tao = newTemp(cb);
+         uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tao); 
+         uInstr2(cb, opc, size, TempReg, tao, TempReg, tmpv);
+         setFlagsFromUOpcode(cb, opc);
+      } else {
+         uInstr2(cb, opc, size, ArchReg, gregOfRM(rm), TempReg, tmpv);
+         setFlagsFromUOpcode(cb, opc);
+      }
+      if (keep) {
+         uInstr2(cb, STORE, size, TempReg, tmpv, TempReg, tmpa);
+         SMC_IF_ALL(cb);
+      }
+      if (dis) VG_(printf)("%s%c %s,%s\n", t_x86opc, nameISize(size), 
+                           nameIReg(size,gregOfRM(rm)), dis_buf);
+      return HI8(pair)+eip0;
+   }
+}
+
+
+/* Handle move instructions of the form
+      mov E, G  meaning
+      mov reg-or-mem, reg
+   Is passed the a ptr to the modRM byte, and the data size.  Returns
+   the address advanced completely over this instruction.
+
+   E(src) is reg-or-mem
+   G(dst) is reg.
+
+   If E is reg, -->    GET %G,  tmpv
+                       PUT tmpv, %G
+ 
+   If E is mem  -->    (getAddr E) -> tmpa
+                       LD (tmpa), tmpb
+                       PUT tmpb, %G
+*/
+static
+Addr dis_mov_E_G ( UCodeBlock* cb, 
+                   Int         size, 
+                   Addr        eip0 )
+{
+   UChar rm  = getUChar(eip0);
+   UChar dis_buf[50];
+
+   if (epartIsReg(rm)) {
+      Int tmpv = newTemp(cb);
+      uInstr2(cb, GET, size, ArchReg, eregOfRM(rm), TempReg, tmpv);
+      uInstr2(cb, PUT, size, TempReg, tmpv, ArchReg, gregOfRM(rm));
+      if (dis) VG_(printf)("mov%c %s,%s\n", nameISize(size), 
+                           nameIReg(size,eregOfRM(rm)),
+                           nameIReg(size,gregOfRM(rm)));
+      return 1+eip0;
+   }
+
+   /* E refers to memory */    
+   {
+      UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL);
+      Int  tmpa = LOW24(pair);
+      Int  tmpb = newTemp(cb);
+      uInstr2(cb, LOAD, size, TempReg, tmpa, TempReg, tmpb);
+      uInstr2(cb, PUT,  size, TempReg, tmpb, ArchReg, gregOfRM(rm));
+      if (dis) VG_(printf)("mov%c %s,%s\n", nameISize(size), 
+                           dis_buf,nameIReg(size,gregOfRM(rm)));
+      return HI8(pair)+eip0;
+   }
+}
+
+
+/* Handle move instructions of the form
+      mov G, E  meaning
+      mov reg, reg-or-mem
+   Is passed the a ptr to the modRM byte, and the data size.  Returns
+   the address advanced completely over this instruction.
+
+   G(src) is reg.
+   E(dst) is reg-or-mem
+
+   If E is reg, -->    GET %G,  tmp
+                       PUT tmp, %E
+ 
+   If E is mem, -->    (getAddr E) -> tmpa
+                       GET %G, tmpv
+                       ST tmpv, (tmpa) 
+*/
+static
+Addr dis_mov_G_E ( UCodeBlock* cb, 
+                   Int         size, 
+                   Addr        eip0 )
+{
+   UChar rm = getUChar(eip0);
+   UChar dis_buf[50];
+
+   if (epartIsReg(rm)) {
+      Int tmpv = newTemp(cb);
+      uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tmpv);
+      uInstr2(cb, PUT, size, TempReg, tmpv, ArchReg, eregOfRM(rm));
+      if (dis) VG_(printf)("mov%c %s,%s\n", nameISize(size), 
+                           nameIReg(size,gregOfRM(rm)),
+                           nameIReg(size,eregOfRM(rm)));
+      return 1+eip0;
+   }
+
+   /* E refers to memory */    
+   {
+      UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL);
+      Int  tmpa = LOW24(pair);
+      Int  tmpv = newTemp(cb);
+      uInstr2(cb, GET,   size, ArchReg, gregOfRM(rm), TempReg, tmpv);
+      uInstr2(cb, STORE, size, TempReg, tmpv, TempReg, tmpa);
+      SMC_IF_SOME(cb);
+      if (dis) VG_(printf)("mov%c %s,%s\n", nameISize(size), 
+                           nameIReg(size,gregOfRM(rm)), dis_buf);
+      return HI8(pair)+eip0;
+   }
+}
+
+
+/* op $immediate, AL/AX/EAX. */
+static
+Addr dis_op_imm_A ( UCodeBlock* cb, 
+                    Int         size,
+                    Opcode      opc,
+                    Bool        keep,
+                    Addr        eip,
+                    Char*       t_x86opc )
+{
+   Int  tmp = newTemp(cb);
+   UInt lit = getUDisp(size,eip);
+   uInstr2(cb, GET, size, ArchReg, R_EAX, TempReg, tmp);
+   if (opc == AND || opc == OR) {
+      Int tao = newTemp(cb);
+      uInstr2(cb, MOV, size, Literal, 0, TempReg, tao);
+      uLiteral(cb, lit);
+      uInstr2(cb, opc, size, TempReg, tao, TempReg, tmp);
+      setFlagsFromUOpcode(cb, opc);
+   } else {
+      uInstr2(cb, opc, size, Literal, 0, TempReg, tmp);
+      uLiteral(cb, lit);
+      setFlagsFromUOpcode(cb, opc);
+   }
+   if (keep)
+      uInstr2(cb, PUT, size, TempReg, tmp, ArchReg, R_EAX);
+   if (dis) VG_(printf)("%s%c $0x%x, %s\n", t_x86opc, nameISize(size), 
+                        lit, nameIReg(size,R_EAX));
+   return eip+size;
+}
+
+
+/* Sign- and Zero-extending moves. */
+static
+Addr dis_movx_E_G ( UCodeBlock* cb, 
+                    Addr eip, Int szs, Int szd, Bool sign_extend )
+{
+   UChar dis_buf[50];
+   UChar rm = getUChar(eip);
+   if (epartIsReg(rm)) {
+      Int tmpv = newTemp(cb);
+      uInstr2(cb, GET, szs, ArchReg, eregOfRM(rm), TempReg, tmpv);
+      uInstr1(cb, WIDEN, szd, TempReg, tmpv);
+      LAST_UINSTR(cb).extra4b = szs;
+      LAST_UINSTR(cb).signed_widen = sign_extend;
+      uInstr2(cb, PUT, szd, TempReg, tmpv, ArchReg, gregOfRM(rm));
+      if (dis) VG_(printf)("mov%c%c%c %s,%s\n", 
+                           sign_extend ? 's' : 'z',
+                           nameISize(szs), nameISize(szd),
+                           nameIReg(szs,eregOfRM(rm)),
+                           nameIReg(szd,gregOfRM(rm)));
+      return 1+eip;
+   }
+
+   /* E refers to memory */    
+   {
+      UInt pair = disAMode ( cb, eip, dis?dis_buf:NULL);
+      Int  tmpa = LOW24(pair);
+      uInstr2(cb, LOAD, szs, TempReg, tmpa, TempReg, tmpa);
+      uInstr1(cb, WIDEN, szd, TempReg, tmpa);
+      LAST_UINSTR(cb).extra4b = szs;
+      LAST_UINSTR(cb).signed_widen = sign_extend;
+      uInstr2(cb, PUT, szd, TempReg, tmpa, ArchReg, gregOfRM(rm));
+      if (dis) VG_(printf)("mov%c%c%c %s,%s\n", 
+                           sign_extend ? 's' : 'z',
+                           nameISize(szs), nameISize(szd),
+                           dis_buf,
+                           nameIReg(szd,gregOfRM(rm)));
+      return HI8(pair)+eip;
+   }
+}
+
+
+/* Generate code to divide ArchRegs EDX:EAX / DX:AX / AX by the 32 /
+   16 / 8 bit quantity in the given TempReg.  */
+static
+void codegen_div ( UCodeBlock* cb, Int sz, Int t, Bool signed_divide )
+{
+   Int  helper;
+   Int  ta = newTemp(cb);
+   Int  td = newTemp(cb);
+
+   switch (sz) {
+      case 4: helper = (signed_divide ? VGOFF_(helper_idiv_64_32) 
+                                      : VGOFF_(helper_div_64_32));
+              break;
+      case 2: helper = (signed_divide ? VGOFF_(helper_idiv_32_16) 
+                                      : VGOFF_(helper_div_32_16));
+              break;
+      case 1: helper = (signed_divide ? VGOFF_(helper_idiv_16_8)
+                                      : VGOFF_(helper_div_16_8));
+              break;
+      default: VG_(panic)("codegen_div");
+   }
+   uInstr0(cb, CALLM_S, 0);
+   if (sz == 4 || sz == 2) {
+      uInstr1(cb, PUSH,  sz, TempReg, t);
+      uInstr2(cb, GET,   sz, ArchReg, R_EAX,  TempReg, ta);
+      uInstr1(cb, PUSH,  sz, TempReg, ta);
+      uInstr2(cb, GET,   sz, ArchReg, R_EDX,  TempReg, td);
+      uInstr1(cb, PUSH,  sz, TempReg, td);
+      uInstr1(cb, CALLM,  0, Lit16,   helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsOSZACP);
+      uInstr1(cb, POP,   sz, TempReg, t);
+      uInstr2(cb, PUT,   sz, TempReg, t,      ArchReg, R_EDX);
+      uInstr1(cb, POP,   sz, TempReg, t);
+      uInstr2(cb, PUT,   sz, TempReg, t,      ArchReg, R_EAX);
+      uInstr1(cb, CLEAR,  0, Lit16,   4);
+   } else {
+      uInstr1(cb, PUSH,  1, TempReg, t);
+      uInstr2(cb, GET,   2, ArchReg, R_EAX,  TempReg, ta);
+      uInstr1(cb, PUSH,  2, TempReg, ta);
+      uInstr2(cb, MOV,   1, Literal, 0,      TempReg, td);
+      uLiteral(cb, 0);
+      uInstr1(cb, PUSH,  1, TempReg, td);
+      uInstr1(cb, CALLM, 0, Lit16,   helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsOSZACP);
+      uInstr1(cb, POP,   1, TempReg, t);
+      uInstr2(cb, PUT,   1, TempReg, t,      ArchReg, R_AL);
+      uInstr1(cb, POP,   1, TempReg, t);
+      uInstr2(cb, PUT,   1, TempReg, t,      ArchReg, R_AH);
+      uInstr1(cb, CLEAR, 0, Lit16,   4);
+   }
+   uInstr0(cb, CALLM_E, 0);
+}
+
+
+static 
+Addr dis_Grp1 ( UCodeBlock* cb, Addr eip, UChar modrm, 
+                Int am_sz, Int d_sz, Int sz, UInt d32 )
+{
+   Int   t1, t2, uopc;
+   UInt  pair;
+   UChar dis_buf[50];
+   if (epartIsReg(modrm)) {
+      vg_assert(am_sz == 1);
+      t1  = newTemp(cb);
+      uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+      switch (gregOfRM(modrm)) {
+         case 0: uopc = ADD; break;  case 1: uopc = OR;  break;
+         case 2: uopc = ADC; break;  case 3: uopc = SBB; break;
+         case 4: uopc = AND; break;  case 5: uopc = SUB; break;
+         case 6: uopc = XOR; break;  case 7: uopc = SUB; break;
+         default: VG_(panic)("dis_Grp1(Reg): unhandled case");
+      }
+      if (uopc == AND || uopc == OR) {
+         Int tao = newTemp(cb);
+         uInstr2(cb, MOV, sz, Literal, 0, TempReg, tao);
+         uLiteral(cb, d32);
+         uInstr2(cb, uopc, sz, TempReg, tao, TempReg, t1);
+         setFlagsFromUOpcode(cb, uopc);
+      } else {
+         uInstr2(cb, uopc, sz, Literal, 0, TempReg, t1);
+         uLiteral(cb, d32);
+         setFlagsFromUOpcode(cb, uopc);
+      }
+      if (gregOfRM(modrm) < 7)
+         uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm));
+      eip += (am_sz + d_sz);
+      if (dis)
+         VG_(printf)("%s%c $0x%x, %s\n",
+                     nameGrp1(gregOfRM(modrm)), nameISize(sz), d32, 
+                     nameIReg(sz,eregOfRM(modrm)));
+   } else {
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL);
+      t1   = LOW24(pair);
+      t2   = newTemp(cb);
+      eip  += HI8(pair);
+      eip  += d_sz;
+      uInstr2(cb, LOAD, sz, TempReg, t1, TempReg, t2);
+      switch (gregOfRM(modrm)) {
+         case 0: uopc = ADD; break;  case 1: uopc = OR;  break;
+         case 2: uopc = ADC; break;  case 3: uopc = SBB; break;
+         case 4: uopc = AND; break;  case 5: uopc = SUB; break;
+         case 6: uopc = XOR; break;  case 7: uopc = SUB; break;
+         default: VG_(panic)("dis_Grp1(Mem): unhandled case");
+      }
+      if (uopc == AND || uopc == OR) {
+         Int tao = newTemp(cb);
+         uInstr2(cb, MOV, sz, Literal, 0, TempReg, tao);
+         uLiteral(cb, d32);
+         uInstr2(cb, uopc, sz, TempReg, tao, TempReg, t2);
+         setFlagsFromUOpcode(cb, uopc);
+      } else {
+         uInstr2(cb, uopc, sz, Literal, 0, TempReg, t2);
+         uLiteral(cb, d32);
+         setFlagsFromUOpcode(cb, uopc);
+      }
+      if (gregOfRM(modrm) < 7) {
+         uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1);
+         SMC_IF_ALL(cb);
+      }
+      if (dis)
+         VG_(printf)("%s%c $0x%x, %s\n",
+                     nameGrp1(gregOfRM(modrm)), nameISize(sz), d32, 
+                     dis_buf);
+   }
+   return eip;
+}
+
+
+/* Group 2 extended opcodes. */
+static
+Addr dis_Grp2 ( UCodeBlock* cb, Addr eip, UChar modrm,
+                Int am_sz, Int d_sz, Int sz, 
+                Tag orig_src_tag, UInt orig_src_val )
+{
+   /* orig_src_tag and orig_src_val denote either ArchReg(%CL) or a
+      Literal.  And eip on entry points at the modrm byte. */
+   Int   t1, t2, uopc;
+   UInt  pair;
+   UChar dis_buf[50];
+   UInt  src_val;
+   Tag   src_tag;
+
+   /* Get the amount to be shifted by into src_tag/src_val. */
+   if (orig_src_tag == ArchReg) {
+      src_val = newTemp(cb);
+      src_tag = TempReg;
+      uInstr2(cb, GET, 1, orig_src_tag, orig_src_val, TempReg, src_val);
+   } else {
+      src_val = orig_src_val;
+      src_tag = Literal;
+   }
+
+   if (epartIsReg(modrm)) {
+      vg_assert(am_sz == 1);
+      t1  = newTemp(cb);
+      uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+      switch (gregOfRM(modrm)) {
+         case 0: uopc = ROL; break;  case 1: uopc = ROR; break;
+         case 2: uopc = RCL; break;  case 3: uopc = RCR; break;
+         case 4: uopc = SHL; break;  case 5: uopc = SHR; break;
+         case 7: uopc = SAR; break;
+         default: VG_(panic)("dis_Grp2(Reg): unhandled case");
+      }
+      if (src_tag == Literal) {
+          uInstr2(cb, uopc, sz, Literal, 0, TempReg, t1);
+	  uLiteral(cb, src_val);
+      } else {
+          uInstr2(cb, uopc, sz, src_tag, src_val, TempReg, t1);
+      }
+      setFlagsFromUOpcode(cb, uopc);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm));
+      eip += (am_sz + d_sz);
+      if (dis) {
+         if (orig_src_tag == Literal)
+            VG_(printf)("%s%c $0x%x, %s\n",
+                        nameGrp2(gregOfRM(modrm)), nameISize(sz), 
+                        orig_src_val, nameIReg(sz,eregOfRM(modrm)));
+         else
+            VG_(printf)("%s%c %s, %s\n",
+                        nameGrp2(gregOfRM(modrm)), nameISize(sz),
+                        nameIReg(1,orig_src_val),
+                        nameIReg(sz,eregOfRM(modrm)));
+      }
+   } else {
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL);
+      t1   = LOW24(pair);
+      t2   = newTemp(cb);
+      eip  += HI8(pair);
+      eip  += d_sz;
+      uInstr2(cb, LOAD, sz, TempReg, t1, TempReg, t2);
+      switch (gregOfRM(modrm)) {
+         case 0: uopc = ROL; break;  case 1: uopc = ROR; break;
+         case 2: uopc = RCL; break;  case 3: uopc = RCR; break;
+         case 4: uopc = SHL; break;  case 5: uopc = SHR; break;
+         case 7: uopc = SAR; break;
+         default: VG_(panic)("dis_Grp2(Reg): unhandled case");
+      }
+      if (src_tag == Literal) {
+         uInstr2(cb, uopc, sz, Literal, 0, TempReg, t2);
+	 uLiteral(cb, src_val);
+      } else {
+         uInstr2(cb, uopc, sz, src_tag, src_val, TempReg, t2);
+      }
+      setFlagsFromUOpcode(cb, uopc);
+      uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1);
+      SMC_IF_ALL(cb);
+      if (dis) {
+         if (orig_src_tag == Literal)
+            VG_(printf)("%s%c $0x%x, %s\n",
+                        nameGrp2(gregOfRM(modrm)), nameISize(sz), 
+                        orig_src_val, dis_buf);
+         else 
+            VG_(printf)("%s%c %s, %s\n",
+                        nameGrp2(gregOfRM(modrm)), nameISize(sz), 
+                        nameIReg(1,orig_src_val),
+                        dis_buf);
+      }
+   }
+   return eip;
+}
+
+
+
+/* Group 8 extended opcodes. */
+static
+Addr dis_Grp8 ( UCodeBlock* cb, Addr eip, UChar modrm,
+                Int am_sz, Int sz, UInt src_val )
+{
+   /* src_val denotes a d8.
+      And eip on entry points at the modrm byte. */
+   Int   t1, t2, helper;
+   UInt  pair;
+   UChar dis_buf[50];
+
+   switch (gregOfRM(modrm)) {
+      case 4: helper = VGOFF_(helper_bt);  break;
+      case 5: helper = VGOFF_(helper_bts); break;
+      case 6: helper = VGOFF_(helper_btr); break;
+      case 7: helper = VGOFF_(helper_btc); break;
+      /* If this needs to be extended, be careful to do the flag
+         setting in the parts below correctly. */
+      default: VG_(panic)("dis_Grp8");
+   }
+
+   t1 = newTemp(cb);
+   uInstr2(cb, MOV,  4, Literal, 0, TempReg, t1);
+   uLiteral(cb, src_val);
+   uInstr0(cb, CALLM_S, 0);
+   uInstr1(cb, PUSH, 4, TempReg, t1);
+   
+   if (epartIsReg(modrm)) {
+      vg_assert(am_sz == 1);
+      t2 = newTemp(cb);
+      uInstr2(cb, GET,   sz, ArchReg, eregOfRM(modrm), TempReg, t2);
+      uInstr1(cb, PUSH,  sz, TempReg, t2);
+      uInstr1(cb, CALLM, 0,  Lit16,   helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagC, FlagsOSZAP);
+      uInstr1(cb, POP,   sz, TempReg, t2);
+      uInstr2(cb, PUT,   sz, TempReg, t2, ArchReg, eregOfRM(modrm));
+      uInstr1(cb, CLEAR, 0,  Lit16,   4);
+      eip += (am_sz + 1);
+      if (dis)
+         VG_(printf)("%s%c $0x%x, %s\n",
+                     nameGrp8(gregOfRM(modrm)), nameISize(sz),
+                     src_val,
+                     nameIReg(sz,eregOfRM(modrm)));
+   } else {
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL);
+      t1   = LOW24(pair);
+      t2   = newTemp(cb);
+      eip  += HI8(pair);
+      eip  += 1;
+      uInstr2(cb, LOAD,  sz, TempReg, t1, TempReg, t2);
+      uInstr1(cb, PUSH,  sz, TempReg, t2);
+      uInstr1(cb, CALLM, 0,  Lit16,   helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagC, FlagsOSZAP);
+      uInstr1(cb, POP,   sz, TempReg, t2);
+      uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1);
+      SMC_IF_ALL(cb);
+      uInstr1(cb, CLEAR, 0, Lit16,    4);
+      if (dis)
+            VG_(printf)("%s%c $0x%x, %s\n",
+                        nameGrp8(gregOfRM(modrm)), nameISize(sz), src_val, 
+                        dis_buf);
+   }
+	uInstr0(cb, CALLM_E, 0);
+   return eip;
+}
+
+
+
+
+/* Generate ucode to multiply the value in EAX/AX/AL by the register
+   specified by the ereg of modrm, and park the result in
+   EDX:EAX/DX:AX/AX. */
+static void codegen_mul_A_D_Reg ( UCodeBlock* cb, Int sz, 
+                                  UChar modrm, Bool signed_multiply )
+{
+   Int helper = signed_multiply 
+                ?
+                   (sz==1 ? VGOFF_(helper_imul_8_16) 
+                          : (sz==2 ? VGOFF_(helper_imul_16_32) 
+                                   : VGOFF_(helper_imul_32_64)))
+                :
+                   (sz==1 ? VGOFF_(helper_mul_8_16)
+                          : (sz==2 ? VGOFF_(helper_mul_16_32) 
+                                   : VGOFF_(helper_mul_32_64)));
+   Int t1 = newTemp(cb);
+   Int ta = newTemp(cb);
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, GET,   sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+   uInstr1(cb, PUSH,  sz, TempReg, t1);
+   uInstr2(cb, GET,   sz, ArchReg, R_EAX,  TempReg, ta);
+   uInstr1(cb, PUSH,  sz, TempReg, ta);
+   uInstr1(cb, CALLM, 0,  Lit16,   helper);
+   uFlagsRWU(cb, FlagsEmpty, FlagsOC, FlagsSZAP);
+   if (sz > 1) {
+      uInstr1(cb, POP, sz, TempReg, t1);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, R_EDX);
+      uInstr1(cb, POP, sz, TempReg, t1);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, R_EAX);
+   } else {
+      uInstr1(cb, CLEAR, 0, Lit16,   4);
+      uInstr1(cb, POP,   2, TempReg, t1);
+      uInstr2(cb, PUT,   2, TempReg, t1, ArchReg, R_EAX);
+   }
+	uInstr0(cb, CALLM_E, 0);
+   if (dis) VG_(printf)("%s%c %s\n", signed_multiply ? "imul" : "mul",
+                        nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
+
+}
+
+
+/* Generate ucode to multiply the value in EAX/AX/AL by the value in
+   TempReg temp, and park the result in EDX:EAX/DX:AX/AX. */
+static void codegen_mul_A_D_Temp ( UCodeBlock* cb, Int sz, 
+                                   Int temp, Bool signed_multiply,
+                                   UChar* dis_buf )
+{
+   Int helper = signed_multiply 
+                ?
+                   (sz==1 ? VGOFF_(helper_imul_8_16) 
+                          : (sz==2 ? VGOFF_(helper_imul_16_32) 
+                                   : VGOFF_(helper_imul_32_64)))
+                :
+                   (sz==1 ? VGOFF_(helper_mul_8_16) 
+                          : (sz==2 ? VGOFF_(helper_mul_16_32)
+                                   : VGOFF_(helper_mul_32_64)));
+   Int t1 = newTemp(cb);
+   uInstr0(cb, CALLM_S, 0);
+   uInstr1(cb, PUSH,  sz, TempReg, temp);
+   uInstr2(cb, GET,   sz, ArchReg, R_EAX,  TempReg, t1);
+   uInstr1(cb, PUSH,  sz, TempReg, t1);
+   uInstr1(cb, CALLM, 0,  Lit16,   helper);
+   uFlagsRWU(cb, FlagsEmpty, FlagsOC, FlagsSZAP);
+   if (sz > 1) {
+      uInstr1(cb, POP, sz, TempReg, t1);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, R_EDX);
+      uInstr1(cb, POP, sz, TempReg, t1);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, R_EAX);
+   } else {
+      uInstr1(cb, CLEAR, 0, Lit16,   4);
+      uInstr1(cb, POP,   2, TempReg, t1);
+      uInstr2(cb, PUT,   2, TempReg, t1, ArchReg, R_EAX);
+   }
+   uInstr0(cb, CALLM_E, 0);
+   if (dis) VG_(printf)("%s%c %s\n", signed_multiply ? "imul" : "mul",
+                        nameISize(sz), dis_buf);
+}
+
+
+/* Group 3 extended opcodes. */
+static 
+Addr dis_Grp3 ( UCodeBlock* cb, Int sz, Addr eip )
+{
+   Int   t1, t2;
+   UInt  pair, d32;
+   UChar modrm;
+   UChar dis_buf[50];
+   t1 = t2 = INVALID_TEMPREG;
+   modrm = getUChar(eip);
+   if (epartIsReg(modrm)) {
+      t1 = newTemp(cb);
+      switch (gregOfRM(modrm)) {
+         case 0: { /* TEST */
+            Int tao = newTemp(cb);
+            eip++; d32 = getUDisp(sz, eip); eip += sz;
+            uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+	    uInstr2(cb, MOV, sz, Literal, 0, TempReg, tao);
+	    uLiteral(cb, d32);
+            uInstr2(cb, AND, sz, TempReg, tao, TempReg, t1);
+            setFlagsFromUOpcode(cb, AND);
+            if (dis)
+               VG_(printf)("test%c $0x%x, %s\n",
+                   nameISize(sz), d32, nameIReg(sz, eregOfRM(modrm)));
+            break;
+         }
+         case 2: /* NOT */
+            eip++;
+            uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+            uInstr1(cb, NOT, sz, TempReg, t1);
+            setFlagsFromUOpcode(cb, NOT);
+            uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm));
+            if (dis)
+               VG_(printf)("not%c %s\n",
+                   nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
+            break;
+         case 3: /* NEG */
+            eip++;
+            uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+            uInstr1(cb, NEG, sz, TempReg, t1);
+            setFlagsFromUOpcode(cb, NEG);
+            uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm));
+            if (dis)
+               VG_(printf)("neg%c %s\n",
+                   nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
+            break;
+         case 4: /* MUL */
+            eip++;
+            codegen_mul_A_D_Reg ( cb, sz, modrm, False );
+            break;
+         case 5: /* IMUL */
+            eip++;
+            codegen_mul_A_D_Reg ( cb, sz, modrm, True );
+            break;
+         case 6: /* DIV */
+            eip++;
+            uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+            codegen_div ( cb, sz, t1, False );
+            if (dis)
+               VG_(printf)("div%c %s\n", nameISize(sz), 
+                           nameIReg(sz, eregOfRM(modrm)));
+            break;
+         case 7: /* IDIV */
+            eip++;
+            uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+            codegen_div ( cb, sz, t1, True );
+            if (dis)
+               VG_(printf)("idiv%c %s\n", nameISize(sz), 
+                           nameIReg(sz, eregOfRM(modrm)));
+            break;
+         default: 
+            VG_(printf)(
+               "unhandled Grp3(R) case %d\n", (UInt)gregOfRM(modrm));
+            VG_(panic)("Grp3");
+      }
+   } else {
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+      t2   = LOW24(pair);
+      t1   = newTemp(cb);
+      eip  += HI8(pair);
+      uInstr2(cb, LOAD, sz, TempReg, t2, TempReg, t1);
+      switch (gregOfRM(modrm)) {
+         case 0: { /* TEST */
+            Int tao = newTemp(cb);
+            d32 = getUDisp(sz, eip); eip += sz;
+            uInstr2(cb, MOV, sz, Literal, 0, TempReg, tao);
+            uLiteral(cb, d32);
+            uInstr2(cb, AND, sz, TempReg, tao, TempReg, t1);
+            setFlagsFromUOpcode(cb, AND);
+            if (dis)
+               VG_(printf)("test%c $0x%x, %s\n", 
+                           nameISize(sz), d32, dis_buf);
+            break;
+         }
+         case 2: /* NOT */
+            uInstr1(cb, NOT, sz, TempReg, t1);
+            setFlagsFromUOpcode(cb, NOT);
+            uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
+            SMC_IF_ALL(cb);
+            if (dis)
+               VG_(printf)("not%c %s\n", nameISize(sz), dis_buf);
+            break;
+         case 3: /* NEG */
+            uInstr1(cb, NEG, sz, TempReg, t1);
+            setFlagsFromUOpcode(cb, NEG);
+            uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
+            SMC_IF_ALL(cb);
+            if (dis)
+               VG_(printf)("neg%c %s\n", nameISize(sz), dis_buf);
+            break;
+         case 4: /* MUL */
+            codegen_mul_A_D_Temp ( cb, sz, t1, False, 
+                                   dis?dis_buf:NULL );
+            break;
+         case 5: /* IMUL */
+            codegen_mul_A_D_Temp ( cb, sz, t1, True, dis?dis_buf:NULL );
+            break;
+         case 6: /* DIV */
+            codegen_div ( cb, sz, t1, False );
+            if (dis)
+               VG_(printf)("div%c %s\n", nameISize(sz), dis_buf);
+            break;
+         case 7: /* IDIV */
+            codegen_div ( cb, sz, t1, True );
+            if (dis)
+               VG_(printf)("idiv%c %s\n", nameISize(sz), dis_buf);
+            break;
+         default: 
+            VG_(printf)(
+               "unhandled Grp3(M) case %d\n", (UInt)gregOfRM(modrm));
+            VG_(panic)("Grp3");
+      }
+   }
+   return eip;
+}
+
+
+/* Group 4 extended opcodes. */
+static
+Addr dis_Grp4 ( UCodeBlock* cb, Addr eip )
+{
+   Int   t1, t2;
+   UInt  pair;
+   UChar modrm;
+   UChar dis_buf[50];
+   t1 = t2 = INVALID_TEMPREG;
+
+   modrm = getUChar(eip);
+   if (epartIsReg(modrm)) {
+      t1 = newTemp(cb);
+      uInstr2(cb, GET, 1, ArchReg, eregOfRM(modrm), TempReg, t1);
+      switch (gregOfRM(modrm)) {
+         case 0: /* INC */
+            uInstr1(cb, INC, 1, TempReg, t1);
+            setFlagsFromUOpcode(cb, INC);
+            uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, eregOfRM(modrm));
+            break;
+         case 1: /* DEC */
+            uInstr1(cb, DEC, 1, TempReg, t1);
+            setFlagsFromUOpcode(cb, DEC);
+            uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, eregOfRM(modrm));
+            break;
+         default: 
+            VG_(printf)(
+               "unhandled Grp4(R) case %d\n", (UInt)gregOfRM(modrm));
+            VG_(panic)("Grp4");
+      }
+      eip++;
+      if (dis)
+         VG_(printf)("%sb %s\n", nameGrp4(gregOfRM(modrm)),
+                     nameIReg(1, eregOfRM(modrm)));
+   } else {
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+      t2   = LOW24(pair);
+      t1   = newTemp(cb);
+      uInstr2(cb, LOAD, 1, TempReg, t2, TempReg, t1);
+      switch (gregOfRM(modrm)) {
+         case 0: /* INC */ 
+            uInstr1(cb, INC, 1, TempReg, t1);
+            setFlagsFromUOpcode(cb, INC);
+            uInstr2(cb, STORE, 1, TempReg, t1, TempReg, t2);
+            SMC_IF_ALL(cb);
+            break;
+         case 1: /* DEC */
+            uInstr1(cb, DEC, 1, TempReg, t1);
+            setFlagsFromUOpcode(cb, DEC);
+            uInstr2(cb, STORE, 1, TempReg, t1, TempReg, t2);
+            SMC_IF_ALL(cb);
+            break;
+         default: 
+            VG_(printf)(
+               "unhandled Grp4(M) case %d\n", (UInt)gregOfRM(modrm));
+            VG_(panic)("Grp4");
+      }
+      eip += HI8(pair);
+      if (dis)
+         VG_(printf)("%sb %s\n", nameGrp4(gregOfRM(modrm)), dis_buf);
+   }
+   return eip;
+}
+
+
+/* Group 5 extended opcodes. */
+static
+Addr dis_Grp5 ( UCodeBlock* cb, Int sz, Addr eip, Bool* isEnd )
+{
+   Int   t1, t2, t3, t4;
+   UInt  pair;
+   UChar modrm;
+   UChar dis_buf[50];
+   t1 = t2 = t3 = t4 = INVALID_TEMPREG;
+
+   modrm = getUChar(eip);
+   if (epartIsReg(modrm)) {
+      t1 = newTemp(cb);
+      uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+      switch (gregOfRM(modrm)) {
+         case 0: /* INC */
+            uInstr1(cb, INC, sz, TempReg, t1);
+            setFlagsFromUOpcode(cb, INC);
+            uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm));
+            break;
+         case 1: /* DEC */
+            uInstr1(cb, DEC, sz, TempReg, t1);
+            setFlagsFromUOpcode(cb, DEC);
+            uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm));
+            break;
+         case 2: /* call Ev */
+            t3 = newTemp(cb); t4 = newTemp(cb);
+            uInstr2(cb, GET,   4, ArchReg, R_ESP, TempReg, t3);
+            uInstr2(cb, SUB,   4, Literal, 0,     TempReg, t3);
+	    uLiteral(cb, 4);
+            uInstr2(cb, PUT,   4, TempReg, t3,    ArchReg, R_ESP);
+            uInstr2(cb, MOV,   4, Literal, 0,     TempReg, t4);
+	    uLiteral(cb, eip+1);
+            uInstr2(cb, STORE, 4, TempReg, t4,    TempReg, t3);
+            SMC_IF_ALL(cb);
+            uInstr1(cb, JMP,   0, TempReg, t1);
+            uCond(cb, CondAlways);
+            LAST_UINSTR(cb).call_dispatch = True;
+            *isEnd = True;
+            break;
+         case 4: /* jmp Ev */
+            uInstr1(cb, JMP, 0, TempReg, t1);
+            uCond(cb, CondAlways);
+            *isEnd = True;
+            break;
+         default: 
+            VG_(printf)(
+               "unhandled Grp5(R) case %d\n", (UInt)gregOfRM(modrm));
+            VG_(panic)("Grp5");
+      }
+      eip++;
+      if (dis)
+         VG_(printf)("%s%c %s\n", nameGrp5(gregOfRM(modrm)),
+                     nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
+   } else {
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+      t2   = LOW24(pair);
+      t1   = newTemp(cb);
+      uInstr2(cb, LOAD, sz, TempReg, t2, TempReg, t1);
+      switch (gregOfRM(modrm)) {
+         case 0: /* INC */ 
+            uInstr1(cb, INC, sz, TempReg, t1);
+            setFlagsFromUOpcode(cb, INC);
+            uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
+            SMC_IF_ALL(cb);
+            break;
+         case 1: /* DEC */
+            uInstr1(cb, DEC, sz, TempReg, t1);
+            setFlagsFromUOpcode(cb, DEC);
+            uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
+            SMC_IF_ALL(cb);
+            break;
+         case 2: /* call Ev */
+            t3 = newTemp(cb); t4 = newTemp(cb);
+            uInstr2(cb, GET,   4, ArchReg, R_ESP, TempReg, t3);
+            uInstr2(cb, SUB,   4, Literal, 0,     TempReg, t3);
+            uLiteral(cb, 4);
+            uInstr2(cb, PUT,   4, TempReg, t3,    ArchReg, R_ESP);
+            uInstr2(cb, MOV,   4, Literal, 0,     TempReg, t4);
+	         uLiteral(cb, eip+HI8(pair));
+            uInstr2(cb, STORE, 4, TempReg, t4,    TempReg, t3);
+            SMC_IF_ALL(cb);
+            uInstr1(cb, JMP,   0, TempReg, t1);
+            uCond(cb, CondAlways);
+            LAST_UINSTR(cb).call_dispatch = True;
+            *isEnd = True;
+            break;
+         case 4: /* JMP Ev */
+            uInstr1(cb, JMP, 0, TempReg, t1);
+            uCond(cb, CondAlways);
+            *isEnd = True;
+            break;
+         case 6: /* PUSH Ev */
+            t3 = newTemp(cb);
+            uInstr2(cb, GET,    4, ArchReg, R_ESP, TempReg, t3);
+            uInstr2(cb, SUB,    4, Literal, 0,     TempReg, t3);
+	    uLiteral(cb, sz);
+            uInstr2(cb, PUT,    4, TempReg, t3,    ArchReg, R_ESP);
+            uInstr2(cb, STORE, sz, TempReg, t1,    TempReg, t3);
+            SMC_IF_ALL(cb);
+            break;
+         default: 
+            VG_(printf)(
+               "unhandled Grp5(M) case %d\n", (UInt)gregOfRM(modrm));
+            VG_(panic)("Grp5");
+      }
+      eip += HI8(pair);
+      if (dis)
+         VG_(printf)("%s%c %s\n", nameGrp5(gregOfRM(modrm)),
+                     nameISize(sz), dis_buf);
+   }
+   return eip;
+}
+
+
+/* Template for REPE CMPS<sz>.  Assumes this insn is the last one in
+   the basic block, and so emits a jump to the next insn. */
+static 
+void codegen_REPE_CMPS ( UCodeBlock* cb, Int sz, Addr eip, Addr eip_next )
+{
+   Int tc,  /* ECX */
+       td,  /* EDI */   ts, /* ESI */
+       tdv, /* (EDI) */ tsv /* (ESI) */;
+
+   tdv = newTemp(cb);
+   tsv = newTemp(cb);
+   td = newTemp(cb);
+   ts = newTemp(cb);
+   tc = newTemp(cb);
+
+   uInstr2(cb, GET,   4, ArchReg, R_ECX, TempReg, tc);
+   uInstr2(cb, JIFZ,  4, TempReg, tc,    Literal, 0);
+   uLiteral(cb, eip_next);
+   uInstr1(cb, DEC,   4, TempReg, tc);
+   uInstr2(cb, PUT,   4, TempReg, tc,    ArchReg, R_ECX);
+
+   uInstr2(cb, GET,   4, ArchReg, R_EDI, TempReg, td);
+   uInstr2(cb, GET,   4, ArchReg, R_ESI, TempReg, ts);
+
+   uInstr2(cb, LOAD, sz, TempReg, td,    TempReg, tdv);
+   uInstr2(cb, LOAD, sz, TempReg, ts,    TempReg, tsv);
+
+   uInstr2(cb, SUB,  sz, TempReg, tdv,   TempReg, tsv);
+   setFlagsFromUOpcode(cb, SUB);
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, MOV,   4, Literal, 0,     TempReg, tdv);
+   uLiteral(cb, 0);
+   uInstr1(cb, PUSH,  4, TempReg, tdv);
+
+   uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
+   uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+   uInstr1(cb, POP,   4, TempReg, tdv);
+	uInstr0(cb, CALLM_E, 0);
+   if (sz == 4 || sz == 2) {
+      uInstr2(cb, SHL, 4, Literal, 0, TempReg, tdv);
+      uLiteral(cb, sz/2);
+   }
+   uInstr2(cb, ADD,   4, TempReg, tdv,    TempReg, td);
+   uInstr2(cb, ADD,   4, TempReg, tdv,    TempReg, ts);
+
+   uInstr2(cb, PUT,   4, TempReg, td,     ArchReg, R_EDI);
+   uInstr2(cb, PUT,   4, TempReg, ts,     ArchReg, R_ESI);
+
+   uInstr1(cb, JMP,   0, Literal, 0);
+   uLiteral(cb, eip);
+   uCond(cb, CondZ);
+   uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+   uInstr1(cb, JMP,   0, Literal, 0);
+   uLiteral(cb, eip_next);
+   uCond(cb, CondAlways);
+}
+
+
+/* Template for REPNE SCAS<sz>.  Assumes this insn is the last one in
+   the basic block, and so emits a jump to the next insn. */
+static 
+void codegen_REPNE_SCAS ( UCodeBlock* cb, Int sz, Addr eip, Addr eip_next )
+{
+   Int ta /* EAX */, tc /* ECX */, td /* EDI */, tv;
+   ta = newTemp(cb);
+   tc = newTemp(cb);
+   tv = newTemp(cb);
+   td = newTemp(cb);
+
+   uInstr2(cb, GET,   4, ArchReg, R_ECX, TempReg, tc);
+   uInstr2(cb, JIFZ,  4, TempReg, tc,    Literal, 0);
+   uLiteral(cb, eip_next);
+   uInstr1(cb, DEC,   4, TempReg, tc);
+   uInstr2(cb, PUT,   4, TempReg, tc,    ArchReg, R_ECX);
+
+   uInstr2(cb, GET,  sz, ArchReg, R_EAX, TempReg, ta);
+   uInstr2(cb, GET,   4, ArchReg, R_EDI, TempReg, td);
+   uInstr2(cb, LOAD, sz, TempReg, td,    TempReg, tv);
+   /* next uinstr kills ta, but that's ok -- don't need it again */
+   uInstr2(cb, SUB,  sz, TempReg, tv,    TempReg, ta);
+   setFlagsFromUOpcode(cb, SUB);
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, MOV,   4, Literal, 0,     TempReg, tv);
+   uLiteral(cb, 0);
+   uInstr1(cb, PUSH,  4, TempReg, tv);
+
+   uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
+   uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+   uInstr1(cb, POP,   4, TempReg, tv);
+	uInstr0(cb, CALLM_E, 0);
+
+   if (sz == 4 || sz == 2) {
+      uInstr2(cb, SHL, 4, Literal, 0, TempReg, tv);
+      uLiteral(cb, sz/2);
+   }
+   uInstr2(cb, ADD,   4, TempReg, tv,    TempReg, td);
+   uInstr2(cb, PUT,   4, TempReg, td,    ArchReg, R_EDI);
+   uInstr1(cb, JMP,   0, Literal, 0);
+   uLiteral(cb, eip);
+   uCond(cb, CondNZ);
+   uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+   uInstr1(cb, JMP,   0, Literal, 0);
+   uLiteral(cb, eip_next);
+   uCond(cb, CondAlways);
+}
+
+
+/* Template for REPE MOVS<sz>.  Assumes this insn is the last one in
+   the basic block, and so emits a jump to the next insn. */
+static 
+void codegen_REPE_MOVS ( UCodeBlock* cb, Int sz, Addr eip, Addr eip_next )
+{
+   Int ts /* ESI */, tc /* ECX */, td /* EDI */, tv;
+   tc = newTemp(cb);
+   td = newTemp(cb);
+   ts = newTemp(cb);
+   tv = newTemp(cb);
+
+   uInstr2(cb, GET,   4, ArchReg, R_ECX, TempReg, tc);
+   uInstr2(cb, JIFZ,  4, TempReg, tc,    Literal, 0);
+   uLiteral(cb, eip_next);
+   uInstr1(cb, DEC,   4, TempReg, tc);
+   uInstr2(cb, PUT,   4, TempReg, tc,    ArchReg, R_ECX);
+
+   uInstr2(cb, GET,   4, ArchReg, R_EDI, TempReg, td);
+   uInstr2(cb, GET,   4, ArchReg, R_ESI, TempReg, ts);
+
+   uInstr2(cb, LOAD,  sz, TempReg, ts,    TempReg, tv);
+   uInstr2(cb, STORE, sz, TempReg, tv,    TempReg, td);
+   SMC_IF_SOME(cb);
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, MOV,   4, Literal, 0,     TempReg, tv);
+   uLiteral(cb, 0);
+   uInstr1(cb, PUSH,  4, TempReg, tv);
+
+   uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
+   uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+   uInstr1(cb, POP,   4, TempReg, tv);
+	uInstr0(cb, CALLM_E, 0);
+
+   if (sz == 4 || sz == 2) {
+      uInstr2(cb, SHL, 4, Literal, 0, TempReg, tv);
+      uLiteral(cb, sz/2);
+   }
+   uInstr2(cb, ADD,   4, TempReg, tv,    TempReg, td);
+   uInstr2(cb, ADD,   4, TempReg, tv,    TempReg, ts);
+
+   uInstr2(cb, PUT,   4, TempReg, td,    ArchReg, R_EDI);
+   uInstr2(cb, PUT,   4, TempReg, ts,    ArchReg, R_ESI);
+
+   uInstr1(cb, JMP,   0, Literal, 0);
+   uLiteral(cb, eip);
+   uCond(cb, CondAlways);
+}
+
+
+/* Template for REPE STOS<sz>.  Assumes this insn is the last one in
+   the basic block, and so emits a jump to the next insn. */
+static 
+void codegen_REPE_STOS ( UCodeBlock* cb, Int sz, Addr eip, Addr eip_next )
+{
+   Int ta /* EAX */, tc /* ECX */, td /* EDI */;
+   ta = newTemp(cb);
+   tc = newTemp(cb);
+   td = newTemp(cb);
+
+   uInstr2(cb, GET,    4, ArchReg, R_ECX, TempReg, tc);
+   uInstr2(cb, JIFZ,   4, TempReg, tc,    Literal, 0);
+   uLiteral(cb, eip_next);
+   uInstr1(cb, DEC,    4, TempReg, tc);
+   uInstr2(cb, PUT,    4, TempReg, tc,    ArchReg, R_ECX);
+
+   uInstr2(cb, GET,   sz, ArchReg, R_EAX, TempReg, ta);
+   uInstr2(cb, GET,    4, ArchReg, R_EDI, TempReg, td);
+   uInstr2(cb, STORE, sz, TempReg, ta,    TempReg, td);
+   SMC_IF_SOME(cb);
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, MOV,   4, Literal, 0,     TempReg, ta);
+   uLiteral(cb, 0);
+   uInstr1(cb, PUSH,  4, TempReg, ta);
+
+   uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
+   uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+   uInstr1(cb, POP,   4, TempReg, ta);
+	uInstr0(cb, CALLM_E, 0);
+
+   if (sz == 4 || sz == 2) {
+      uInstr2(cb, SHL, 4, Literal, 0, TempReg, ta);
+      uLiteral(cb, sz/2);
+   }
+   uInstr2(cb, ADD,   4, TempReg, ta,    TempReg, td);
+   uInstr2(cb, PUT,   4, TempReg, td,    ArchReg, R_EDI);
+
+   uInstr1(cb, JMP,   0, Literal, 0);
+   uLiteral(cb, eip);
+   uCond(cb, CondAlways);
+}
+
+
+/* Template for CMPS<sz>, _not_ preceded by a REP prefix. */
+static 
+void codegen_CMPS ( UCodeBlock* cb, Int sz )
+{
+   Int td,  /* EDI */   ts, /* ESI */
+       tdv, /* (EDI) */ tsv /* (ESI) */;
+   tdv = newTemp(cb);
+   tsv = newTemp(cb);
+   td  = newTemp(cb);
+   ts  = newTemp(cb);
+
+   uInstr2(cb, GET,   4, ArchReg, R_EDI, TempReg, td);
+   uInstr2(cb, GET,   4, ArchReg, R_ESI, TempReg, ts);
+
+   uInstr2(cb, LOAD, sz, TempReg, td,    TempReg, tdv);
+   uInstr2(cb, LOAD, sz, TempReg, ts,    TempReg, tsv);
+
+   uInstr2(cb, SUB,  sz, TempReg, tdv,   TempReg, tsv); 
+   setFlagsFromUOpcode(cb, SUB);
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, MOV,   4, Literal, 0,     TempReg, tdv);
+   uLiteral(cb, 0);
+   uInstr1(cb, PUSH,  4, TempReg, tdv);
+
+   uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
+   uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+   uInstr1(cb, POP,   4, TempReg, tdv);
+	uInstr0(cb, CALLM_E, 0);
+
+   if (sz == 4 || sz == 2) {
+      uInstr2(cb, SHL, 4, Literal, 0, TempReg, tdv);
+      uLiteral(cb, sz/2);
+   }
+   uInstr2(cb, ADD,   4, TempReg, tdv,    TempReg, td);
+   uInstr2(cb, ADD,   4, TempReg, tdv,    TempReg, ts);
+
+   uInstr2(cb, PUT,   4, TempReg, td,    ArchReg, R_EDI);
+   uInstr2(cb, PUT,   4, TempReg, ts,    ArchReg, R_ESI);
+}
+
+
+/* Template for MOVS<sz>, _not_ preceded by a REP prefix. */
+static 
+void codegen_MOVS ( UCodeBlock* cb, Int sz )
+{
+   Int tv, /* the value being copied */
+       td, /* EDI */ ts /* ESI */;
+   tv = newTemp(cb);
+   td = newTemp(cb);
+   ts = newTemp(cb);
+
+   uInstr2(cb, GET,   4, ArchReg, R_EDI, TempReg, td);
+   uInstr2(cb, GET,   4, ArchReg, R_ESI, TempReg, ts);
+
+   uInstr2(cb, LOAD,  sz, TempReg, ts,    TempReg, tv);
+   uInstr2(cb, STORE, sz, TempReg, tv,    TempReg, td);
+   SMC_IF_SOME(cb);
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, MOV,   4, Literal, 0,     TempReg, tv);
+   uLiteral(cb, 0);
+   uInstr1(cb, PUSH,  4, TempReg, tv);
+
+   uInstr1(cb, CALLM, 0, Lit16,   VGOFF_(helper_get_dirflag));
+   uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+   uInstr1(cb, POP,   4, TempReg, tv);
+	uInstr0(cb, CALLM_E, 0);
+
+   if (sz == 4 || sz == 2) {
+      uInstr2(cb, SHL, 4, Literal, 0, TempReg, tv);
+      uLiteral(cb, sz/2);
+   }
+   uInstr2(cb, ADD,   4, TempReg, tv,    TempReg, td);
+   uInstr2(cb, ADD,   4, TempReg, tv,    TempReg, ts);
+
+   uInstr2(cb, PUT,   4, TempReg, td,    ArchReg, R_EDI);
+   uInstr2(cb, PUT,   4, TempReg, ts,    ArchReg, R_ESI);
+}
+
+
+/* Template for STOS<sz>, _not_ preceded by a REP prefix. */
+static 
+void codegen_STOS ( UCodeBlock* cb, Int sz )
+{
+   Int ta /* EAX */, td /* EDI */;
+   ta = newTemp(cb);
+   td = newTemp(cb);
+
+   uInstr2(cb, GET,   sz, ArchReg, R_EAX, TempReg, ta);
+   uInstr2(cb, GET,    4, ArchReg, R_EDI, TempReg, td);
+   uInstr2(cb, STORE, sz, TempReg, ta,    TempReg, td);
+   SMC_IF_SOME(cb);
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, MOV,   4, Literal, 0,     TempReg, ta);
+   uLiteral(cb, 0);
+   uInstr1(cb, PUSH,  4, TempReg, ta);
+
+   uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
+   uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+   uInstr1(cb, POP,   4, TempReg, ta);
+	uInstr0(cb, CALLM_E, 0);
+
+   if (sz == 4 || sz == 2) {
+      uInstr2(cb, SHL, 4, Literal, 0, TempReg, ta);
+      uLiteral(cb, sz/2);
+   }
+   uInstr2(cb, ADD,   4, TempReg, ta,    TempReg, td);
+   uInstr2(cb, PUT,   4, TempReg, td,    ArchReg, R_EDI);
+}
+
+
+/* Template for LODS<sz>, _not_ preceded by a REP prefix. */
+static 
+void codegen_LODS ( UCodeBlock* cb, Int sz )
+{
+   Int ta /* EAX */, ts /* ESI */;
+   ta = newTemp(cb);
+   ts = newTemp(cb);
+
+   uInstr2(cb, GET,    4, ArchReg, R_ESI, TempReg, ts);
+   uInstr2(cb, LOAD,  sz, TempReg, ts,    TempReg, ta);
+   uInstr2(cb, PUT,   sz, TempReg, ta,    ArchReg, R_EAX);
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, MOV,   4, Literal, 0,     TempReg, ta);
+   uLiteral(cb, 0);
+   uInstr1(cb, PUSH,  4, TempReg, ta);
+
+   uInstr1(cb, CALLM, 0, Lit16,   VGOFF_(helper_get_dirflag));
+   uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+   uInstr1(cb, POP,   4, TempReg, ta);
+	uInstr0(cb, CALLM_E, 0);
+
+   if (sz == 4 || sz == 2) {
+      uInstr2(cb, SHL, 4, Literal, 0, TempReg, ta);
+      uLiteral(cb, sz/2);
+   }
+   uInstr2(cb, ADD,   4, TempReg, ta,    TempReg, ts);
+   uInstr2(cb, PUT,   4, TempReg, ts,    ArchReg, R_ESI);
+}
+
+
+/* Template for REPNE SCAS<sz>, _not_ preceded by a REP prefix. */
+static 
+void codegen_SCAS ( UCodeBlock* cb, Int sz )
+{
+   Int ta /* EAX */, td /* EDI */, tv;
+   ta = newTemp(cb);
+   tv = newTemp(cb);
+   td = newTemp(cb);
+
+   uInstr2(cb, GET,  sz, ArchReg, R_EAX, TempReg, ta);
+   uInstr2(cb, GET,   4, ArchReg, R_EDI, TempReg, td);
+   uInstr2(cb, LOAD, sz, TempReg, td,    TempReg, tv);
+   /* next uinstr kills ta, but that's ok -- don't need it again */
+   uInstr2(cb, SUB,  sz, TempReg, tv,    TempReg, ta);
+   setFlagsFromUOpcode(cb, SUB);
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, MOV,   4, Literal, 0,     TempReg, tv);
+   uLiteral(cb, 0);
+   uInstr1(cb, PUSH,  4, TempReg, tv);
+
+   uInstr1(cb, CALLM, 0, Lit16,   VGOFF_(helper_get_dirflag));
+   uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+   uInstr1(cb, POP,   4, TempReg, tv);
+	uInstr0(cb, CALLM_E, 0);
+
+   if (sz == 4 || sz == 2) {
+      uInstr2(cb, SHL, 4, Literal, 0, TempReg, tv);
+      uLiteral(cb, sz/2);
+   }
+   uInstr2(cb, ADD,   4, TempReg, tv,    TempReg, td);
+   uInstr2(cb, PUT,   4, TempReg, td,    ArchReg, R_EDI);
+}
+
+
+/* (I)MUL E, G.  Supplied eip points to the modR/M byte. */
+static
+Addr dis_mul_E_G ( UCodeBlock* cb, 
+                   Int         size, 
+                   Addr        eip0,
+                   Bool        signed_multiply )
+{
+   Int ta, tg, te, helper;
+   UChar dis_buf[50];
+   UChar rm = getUChar(eip0);
+   ta = INVALID_TEMPREG;
+   te = newTemp(cb);
+   tg = newTemp(cb);
+
+   switch (size) {
+      case 4: helper = signed_multiply ? VGOFF_(helper_imul_32_64) 
+                                       : VGOFF_(helper_mul_32_64);
+              break;
+      case 2: helper = signed_multiply ? VGOFF_(helper_imul_16_32) 
+                                       : VGOFF_(helper_mul_16_32);
+              break;
+      case 1: helper = signed_multiply ? VGOFF_(helper_imul_8_16)
+                                       : VGOFF_(helper_mul_8_16);
+              break;
+      default: VG_(panic)("dis_mul_E_G");
+   }
+
+   uInstr0(cb, CALLM_S, 0);
+   if (epartIsReg(rm)) {
+      uInstr2(cb, GET,   size, ArchReg, eregOfRM(rm), TempReg, te);
+      uInstr2(cb, GET,   size, ArchReg, gregOfRM(rm), TempReg, tg);
+      uInstr1(cb, PUSH,  size, TempReg, te);
+      uInstr1(cb, PUSH,  size, TempReg, tg);
+      uInstr1(cb, CALLM, 0,    Lit16,   helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagsOC, FlagsSZAP);
+      uInstr1(cb, CLEAR, 0,    Lit16,   4);
+      uInstr1(cb, POP,   size, TempReg, tg);
+      uInstr2(cb, PUT,   size, TempReg, tg,   ArchReg, gregOfRM(rm));
+      uInstr0(cb, CALLM_E, 0);
+      if (dis) VG_(printf)("%smul%c %s, %s\n",
+                           signed_multiply ? "i" : "",
+                           nameISize(size), 
+                           nameIReg(size,eregOfRM(rm)),
+                           nameIReg(size,gregOfRM(rm)));
+      return 1+eip0;
+   } else {
+      UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL);
+      ta = LOW24(pair);
+      uInstr2(cb, LOAD,  size, TempReg, ta, TempReg, te);
+      uInstr2(cb, GET,   size, ArchReg, gregOfRM(rm), TempReg, tg);
+      uInstr1(cb, PUSH,  size, TempReg, te);
+      uInstr1(cb, PUSH,  size, TempReg, tg);
+      uInstr1(cb, CALLM, 0,    Lit16, helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagsOC, FlagsSZAP);
+      uInstr1(cb, CLEAR, 0,    Lit16,   4);
+      uInstr1(cb, POP,   size, TempReg, tg);
+      uInstr2(cb, PUT,   size, TempReg, tg,   ArchReg, gregOfRM(rm));
+      uInstr0(cb, CALLM_E, 0);
+      if (dis) VG_(printf)("%smul%c %s, %s\n",
+                           signed_multiply ? "i" : "",
+                           nameISize(size), 
+                           dis_buf,nameIReg(size,gregOfRM(rm)));
+      return HI8(pair)+eip0;
+   }
+}
+
+
+/* IMUL I * E -> G.  Supplied eip points to the modR/M byte. */
+static
+Addr dis_imul_I_E_G ( UCodeBlock* cb, 
+                      Int         size, 
+                      Addr        eip,
+                      Int         litsize )
+{
+   Int ta, te, tl, helper, d32;
+   UChar dis_buf[50];
+   UChar rm = getUChar(eip);
+   ta = INVALID_TEMPREG;
+   te = newTemp(cb);
+   tl = newTemp(cb);
+
+   switch (size) {
+      case 4: helper = VGOFF_(helper_imul_32_64); break;
+      case 2: helper = VGOFF_(helper_imul_16_32); break;
+      case 1: helper = VGOFF_(helper_imul_8_16); break;
+      default: VG_(panic)("dis_imul_I_E_G");
+   }
+
+   uInstr0(cb, CALLM_S, 0);
+   if (epartIsReg(rm)) {
+      uInstr2(cb, GET,   size, ArchReg, eregOfRM(rm), TempReg, te);
+      uInstr1(cb, PUSH,  size, TempReg, te);
+      eip++;
+   } else {
+      UInt pair = disAMode ( cb, eip, dis?dis_buf:NULL);
+      ta = LOW24(pair);
+      uInstr2(cb, LOAD,  size, TempReg, ta, TempReg, te);
+      uInstr1(cb, PUSH,  size, TempReg, te);
+      eip += HI8(pair);
+   }
+
+   d32 = getSDisp(litsize,eip);
+   eip += litsize;
+
+   uInstr2(cb, MOV,   size, Literal, 0,   TempReg, tl);
+   uLiteral(cb, d32);
+   uInstr1(cb, PUSH,  size, TempReg, tl);
+   uInstr1(cb, CALLM, 0,    Lit16, helper);
+   uFlagsRWU(cb, FlagsEmpty, FlagsOC, FlagsSZAP);
+   uInstr1(cb, CLEAR, 0,    Lit16,   4);
+   uInstr1(cb, POP,   size, TempReg, te);
+   uInstr2(cb, PUT,   size, TempReg, te,   ArchReg, gregOfRM(rm));
+   uInstr0(cb, CALLM_E, 0);
+
+   if (dis) {
+      if (epartIsReg(rm)) {
+         VG_(printf)("imul %d, %s, %s\n", d32, nameIReg(size,eregOfRM(rm)),
+                                          nameIReg(size,gregOfRM(rm)));
+      } else {
+         VG_(printf)("imul %d, %s, %s\n", d32, dis_buf,
+                                          nameIReg(size,gregOfRM(rm)));
+      }
+   }
+
+   return eip;
+}   
+
+
+/* Handle FPU insns which read/write memory.  On entry, eip points to
+   the second byte of the insn (the one following D8 .. DF). */
+static 
+Addr dis_fpu_mem ( UCodeBlock* cb, Int size, Bool is_write, 
+                   Addr eip, UChar first_byte )
+{
+   Int   ta;
+   UInt  pair;
+   UChar dis_buf[50];
+   UChar second_byte = getUChar(eip);
+   vg_assert(second_byte < 0xC0);
+   second_byte &= 0x38;
+   pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+   ta   = LOW24(pair);
+   eip  += HI8(pair);
+   uInstr2(cb, is_write ? FPU_W : FPU_R, size,
+               Lit16, 
+               (((UShort)first_byte) << 8) | ((UShort)second_byte),
+               TempReg, ta);
+   if (is_write) SMC_IF_ALL(cb);
+   if (dis) {
+      if (is_write)
+         VG_(printf)("fpu_w_%d 0x%x:0x%x, %s\n",
+                     size, (UInt)first_byte, 
+                           (UInt)second_byte, dis_buf );
+      else
+         VG_(printf)("fpu_r_%d %s, 0x%x:0x%x\n",
+                     size, dis_buf,
+                     (UInt)first_byte, 
+                     (UInt)second_byte );
+   }
+   return eip;
+}
+
+
+/* Handle FPU insns which don't reference memory.  On entry, eip points to
+   the second byte of the insn (the one following D8 .. DF). */
+static 
+Addr dis_fpu_no_mem ( UCodeBlock* cb, Addr eip, UChar first_byte )
+{
+   UChar second_byte = getUChar(eip); eip++;
+   vg_assert(second_byte >= 0xC0);
+   uInstr1(cb, FPU, 0,
+               Lit16,
+               (((UShort)first_byte) << 8) | ((UShort)second_byte)
+          );
+   if (dis) VG_(printf)("fpu 0x%x:0x%x\n",
+                        (UInt)first_byte, (UInt)second_byte );
+   return eip;
+}
+
+
+/* Top-level handler for all FPU insns.  On entry, eip points to the
+   second byte of the insn. */
+static
+Addr dis_fpu ( UCodeBlock* cb, UChar first_byte, Addr eip )
+{
+   const Bool rd = False; 
+   const Bool wr = True;
+   UChar second_byte = getUChar(eip);
+
+   /* Handle FSTSW %ax specially. */
+   if (first_byte == 0xDF && second_byte == 0xE0) {
+      Int t1 = newTemp(cb);
+      uInstr0(cb, CALLM_S, 0);
+      uInstr2(cb, MOV,   4, Literal, 0,  TempReg, t1);
+      uLiteral(cb, 0);
+      uInstr1(cb, PUSH,  4, TempReg, t1);
+      uInstr1(cb, CALLM, 0, Lit16,   VGOFF_(helper_fstsw_AX) );
+      uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
+      uInstr1(cb, POP,   2,  TempReg, t1);
+      uInstr2(cb, PUT,   2,  TempReg, t1, ArchReg, R_EAX);
+      uInstr0(cb, CALLM_E, 0);
+      if (dis) VG_(printf)("fstsw %%ax\n");
+      eip++;
+      return eip;
+   }
+
+   /* Handle all non-memory FPU ops simply. */
+   if (second_byte >= 0xC0)
+      return dis_fpu_no_mem ( cb, eip, first_byte );
+
+   /* The insn references memory; need to determine 
+      whether it reads or writes, and at what size. */
+   switch (first_byte) {
+
+      case 0xD8:
+         switch ((second_byte >> 3) & 7) {
+            case 0: /* FADDs */
+            case 1: /* FMULs */
+            case 2: /* FCOMs */
+            case 3: /* FCOMPs */
+            case 4: /* FSUBs */
+            case 5: /* FSUBRs */
+            case 6: /* FDIVs */
+            case 7: /* FDIVRs */
+               return dis_fpu_mem(cb, 4, rd, eip, first_byte); 
+            default: 
+               goto unhandled;
+         }
+         break;
+
+      case 0xD9:
+         switch ((second_byte >> 3) & 7) {
+            case 0: /* FLDs */
+               return dis_fpu_mem(cb, 4, rd, eip, first_byte); 
+            case 2: /* FSTs */
+            case 3: /* FSTPs */
+               return dis_fpu_mem(cb, 4, wr, eip, first_byte); 
+            case 5: /* FLDCW */
+               return dis_fpu_mem(cb, 2, rd, eip, first_byte); 
+            case 7: /* FSTCW */
+               /* HACK!  FSTCW actually writes 2 bytes, not 4.  glibc
+                  gets lots of moaning in __floor() if we do the right
+                  thing here. */
+               /* Later ... hack disabled .. we do do the Right Thing. */
+               return dis_fpu_mem(cb, /*4*/ 2, wr, eip, first_byte); 
+            default: 
+               goto unhandled;
+         }
+         break;
+
+      case 0xDA:
+         switch ((second_byte >> 3) & 7) {
+            case 0: /* FIADD */
+            case 1: /* FIMUL */
+            case 2: /* FICOM */
+            case 3: /* FICOMP */
+            case 4: /* FISUB */
+            case 5: /* FISUBR */
+            case 6: /* FIDIV */
+            case 7: /* FIDIVR */
+               return dis_fpu_mem(cb, 4, rd, eip, first_byte); 
+            default: 
+               goto unhandled;
+         }
+         break;
+
+      case 0xDB:
+         switch ((second_byte >> 3) & 7) {
+            case 0: /* FILD dword-integer */
+               return dis_fpu_mem(cb, 4, rd, eip, first_byte); 
+            case 2: /* FIST dword-integer */
+               return dis_fpu_mem(cb, 4, wr, eip, first_byte); 
+            case 3: /* FISTPl */
+               return dis_fpu_mem(cb, 4, wr, eip, first_byte); 
+            case 5: /* FLD extended-real */
+               return dis_fpu_mem(cb, 10, rd, eip, first_byte); 
+            case 7: /* FSTP extended-real */
+               return dis_fpu_mem(cb, 10, wr, eip, first_byte); 
+            default: 
+               goto unhandled;
+         }
+         break;
+
+      case 0xDC:
+         switch ((second_byte >> 3) & 7) {
+            case 0: /* FADD double-real */
+            case 1: /* FMUL double-real */
+            case 2: /* FCOM double-real */
+            case 3: /* FCOMP double-real */
+            case 4: /* FSUB double-real */
+            case 5: /* FSUBR double-real */
+            case 6: /* FDIV double-real */
+            case 7: /* FDIVR double-real */
+               return dis_fpu_mem(cb, 8, rd, eip, first_byte); 
+            default: 
+               goto unhandled;
+         }
+         break;
+
+      case 0xDD:
+         switch ((second_byte >> 3) & 7) {
+            case 0: /* FLD double-real */
+               return dis_fpu_mem(cb, 8, rd, eip, first_byte); 
+            case 2: /* FST double-real */
+            case 3: /* FSTP double-real */
+               return dis_fpu_mem(cb, 8, wr, eip, first_byte); 
+            default: 
+               goto unhandled;
+         }
+         break;
+
+      case 0xDF:
+         switch ((second_byte >> 3) & 7) {
+            case 0: /* FILD word-integer */
+               return dis_fpu_mem(cb, 2, rd, eip, first_byte); 
+            case 2: /* FIST word-integer */
+               return dis_fpu_mem(cb, 2, wr, eip, first_byte); 
+            case 3: /* FISTP word-integer */
+               return dis_fpu_mem(cb, 2, wr, eip, first_byte); 
+            case 5: /* FILD qword-integer */
+               return dis_fpu_mem(cb, 8, rd, eip, first_byte); 
+            case 7: /* FISTP qword-integer */
+               return dis_fpu_mem(cb, 8, wr, eip, first_byte); 
+            default: 
+               goto unhandled;
+         }
+         break;
+
+      default: goto unhandled;
+   }
+
+  unhandled: 
+   VG_(printf)("dis_fpu: unhandled memory case 0x%2x:0x%2x(%d)\n",
+               (UInt)first_byte, (UInt)second_byte, 
+               (UInt)((second_byte >> 3) & 7) );
+   VG_(panic)("dis_fpu: unhandled opcodes");
+}
+
+
+/* Double length left shifts.  Apparently only required in v-size (no
+   b- variant). */
+static
+Addr dis_SHLRD_Gv_Ev ( UCodeBlock* cb, Addr eip, UChar modrm,
+                       Int sz, 
+                       Tag amt_tag, UInt amt_val,
+                       Bool left_shift )
+{
+   /* amt_tag and amt_val denote either ArchReg(%CL) or a Literal.
+      And eip on entry points at the modrm byte. */
+   Int   t, t1, t2, ta, helper;
+   UInt  pair;
+   UChar dis_buf[50];
+
+   vg_assert(sz == 2 || sz == 4);
+
+   helper = left_shift 
+               ? (sz==4 ? VGOFF_(helper_shldl) 
+                        : VGOFF_(helper_shldw))
+               : (sz==4 ? VGOFF_(helper_shrdl) 
+                        : VGOFF_(helper_shrdw));
+
+   /* Get the amount to be shifted by onto the stack. */
+   t = newTemp(cb);
+   t1 = newTemp(cb);
+   t2 = newTemp(cb);
+   if (amt_tag == ArchReg) {
+      vg_assert(amt_val == R_CL);
+      uInstr2(cb, GET, 1, ArchReg, amt_val, TempReg, t);
+   } else {
+      uInstr2(cb, MOV, 1, Literal, 0, TempReg, t);
+      uLiteral(cb, amt_val);
+   }
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr1(cb, PUSH, 1, TempReg, t);
+
+   /* The E-part is the destination; this is shifted.  The G-part
+      supplies bits to be shifted into the E-part, but is not
+      changed. */
+
+   uInstr2(cb, GET,  sz, ArchReg, gregOfRM(modrm), TempReg, t1);
+   uInstr1(cb, PUSH, sz, TempReg, t1);
+
+   if (epartIsReg(modrm)) {
+      eip++;
+      uInstr2(cb, GET,   sz, ArchReg, eregOfRM(modrm), TempReg, t2);
+      uInstr1(cb, PUSH,  sz, TempReg, t2);
+      uInstr1(cb, CALLM, 0,  Lit16,   helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagsOSZACP, FlagsEmpty);
+      uInstr1(cb, POP,   sz, TempReg, t);
+      uInstr2(cb, PUT,   sz, TempReg, t, ArchReg, eregOfRM(modrm));
+      if (dis)
+         VG_(printf)("shld%c %%cl, %s, %s\n",
+                     nameISize(sz), nameIReg(sz, gregOfRM(modrm)), 
+                     nameIReg(sz, eregOfRM(modrm)));
+   } else {
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+      ta   = LOW24(pair);
+      eip  += HI8(pair);
+      uInstr2(cb, LOAD,  sz, TempReg, ta,     TempReg, t2);
+      uInstr1(cb, PUSH,  sz, TempReg, t2);
+      uInstr1(cb, CALLM, 0,  Lit16,   helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagsOSZACP, FlagsEmpty);
+      uInstr1(cb, POP,   sz, TempReg, t);
+      uInstr2(cb, STORE, sz, TempReg, t,      TempReg, ta);
+      SMC_IF_ALL(cb);
+      if (dis)
+         VG_(printf)("shld%c %%cl, %s, %s\n",
+                     nameISize(sz), nameIReg(sz, gregOfRM(modrm)), 
+                     dis_buf);
+   }
+  
+   if (amt_tag == Literal) eip++;
+   uInstr1(cb, CLEAR, 0, Lit16, 8);
+
+   uInstr0(cb, CALLM_E, 0);
+   return eip;
+}
+
+
+/* Handle BT/BTS/BTR/BTC Gv, Ev.  Apparently b-size is not
+   required. */
+
+typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
+
+static Char* nameBtOp ( BtOp op )
+{
+   switch (op) {
+      case BtOpNone:  return "";
+      case BtOpSet:   return "s";
+      case BtOpReset: return "r";
+      case BtOpComp:  return "c";
+      default: VG_(panic)("nameBtOp");
+   }
+}
+
+static
+Addr dis_bt_G_E ( UCodeBlock* cb, Int sz, Addr eip, BtOp op )
+{
+   Int   t, t2, ta, helper;
+   UInt  pair;
+   UChar dis_buf[50];
+   UChar modrm;
+
+   vg_assert(sz == 2 || sz == 4);
+   vg_assert(sz == 4);
+   switch (op) {
+      case BtOpNone:  helper = VGOFF_(helper_bt); break;
+      case BtOpSet:   helper = VGOFF_(helper_bts); break;
+      case BtOpReset: helper = VGOFF_(helper_btr); break;
+      case BtOpComp:  helper = VGOFF_(helper_btc); break;
+      default: VG_(panic)("dis_bt_G_E");
+   }
+
+   modrm  = getUChar(eip);
+
+   t = newTemp(cb);
+   t2 = newTemp(cb);
+   uInstr0(cb, CALLM_S, 0);
+   uInstr2(cb, GET,  sz, ArchReg, gregOfRM(modrm), TempReg, t);
+   uInstr1(cb, PUSH, sz, TempReg, t);
+
+   if (epartIsReg(modrm)) {
+      eip++;
+      uInstr2(cb, GET,   sz, ArchReg, eregOfRM(modrm), TempReg, t2);
+      uInstr1(cb, PUSH,  sz, TempReg, t2);
+      uInstr1(cb, CALLM, 0,  Lit16, helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagC, FlagsOSZAP);
+      uInstr1(cb, POP,   sz, TempReg, t);
+      uInstr2(cb, PUT,   sz, TempReg, t, ArchReg, eregOfRM(modrm));
+      if (dis)
+         VG_(printf)("bt%s%c %s, %s\n",
+                     nameBtOp(op),
+                     nameISize(sz), nameIReg(sz, gregOfRM(modrm)), 
+                     nameIReg(sz, eregOfRM(modrm)));
+   } else {
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+      ta   = LOW24(pair);
+      eip  += HI8(pair);
+      uInstr2(cb, LOAD,  sz, TempReg, ta,     TempReg, t2);
+      uInstr1(cb, PUSH,  sz, TempReg, t2);
+      uInstr1(cb, CALLM, 0,  Lit16, helper);
+      uFlagsRWU(cb, FlagsEmpty, FlagC, FlagsOSZAP);
+      uInstr1(cb, POP,   sz, TempReg, t);
+      uInstr2(cb, STORE, sz, TempReg, t,      TempReg, ta);
+      SMC_IF_ALL(cb);
+      if (dis)
+         VG_(printf)("bt%s%c %s, %s\n",
+                     nameBtOp(op),
+                     nameISize(sz), nameIReg(sz, gregOfRM(modrm)), 
+                     dis_buf);
+   }
+  
+   uInstr1(cb, CLEAR, 0, Lit16, 4);
+   uInstr0(cb, CALLM_E, 0);
+
+   return eip;
+}
+
+
+/* Handle BSF/BSR.  Only v-size seems necessary. */
+static
+Addr dis_bs_E_G ( UCodeBlock* cb, Int sz, Addr eip, Bool fwds )
+{
+   Int   t, ta, helper;
+   UInt  pair;
+   UChar dis_buf[50];
+   UChar modrm;
+
+   vg_assert(sz == 2 || sz == 4);
+   vg_assert(sz==4);
+
+   helper = fwds ? VGOFF_(helper_bsf) : VGOFF_(helper_bsr);
+   modrm  = getUChar(eip);
+   t      = newTemp(cb);
+
+   if (epartIsReg(modrm)) {
+      eip++;
+      uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t);
+      if (dis)
+         VG_(printf)("bs%c%c %s, %s\n",
+                     fwds ? 'f' : 'r',
+                     nameISize(sz), nameIReg(sz, eregOfRM(modrm)), 
+                     nameIReg(sz, gregOfRM(modrm)));
+   } else {
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+      ta   = LOW24(pair);
+      eip  += HI8(pair);
+      uInstr2(cb, LOAD, sz, TempReg, ta, TempReg, t);
+      if (dis)
+         VG_(printf)("bs%c%c %s, %s\n",
+                     fwds ? 'f' : 'r',
+                     nameISize(sz), dis_buf,
+                     nameIReg(sz, gregOfRM(modrm)));
+   }
+
+   uInstr0(cb, CALLM_S, 0);
+   uInstr1(cb, PUSH,  sz,  TempReg, t);
+   uInstr1(cb, CALLM, 0,   Lit16, helper);
+   uFlagsRWU(cb, FlagsEmpty, FlagZ, FlagsOSACP);
+   uInstr1(cb, POP,   sz,  TempReg, t);
+   uInstr2(cb, PUT,   sz,  TempReg, t, ArchReg, gregOfRM(modrm));
+   uInstr0(cb, CALLM_E, 0);
+
+   return eip;
+}
+
+
+static 
+void codegen_xchg_eAX_Reg ( UCodeBlock* cb, Int sz, Int reg )
+{
+   Int t1, t2;
+   vg_assert(sz == 2 || sz == 4);
+   t1 = newTemp(cb);
+   t2 = newTemp(cb);
+   uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, t1);
+   uInstr2(cb, GET, sz, ArchReg, reg,   TempReg, t2);
+   uInstr2(cb, PUT, sz, TempReg, t2,    ArchReg, R_EAX);
+   uInstr2(cb, PUT, sz, TempReg, t1,    ArchReg, reg);
+   if (dis)
+      VG_(printf)("xchg%c %s, %s\n", nameISize(sz),
+                  nameIReg(sz, R_EAX), nameIReg(sz, reg));
+}
+
+
+static 
+void codegen_SAHF ( UCodeBlock* cb )
+{
+   Int t  = newTemp(cb);
+   uInstr2(cb, GET,   4, ArchReg, R_EAX, TempReg, t);
+   uInstr0(cb, CALLM_S, 0);
+   uInstr1(cb, PUSH,  4, TempReg, t);
+   uInstr1(cb, CALLM, 0, Lit16,   VGOFF_(helper_SAHF));
+   uFlagsRWU(cb, FlagsEmpty, FlagsSZACP, FlagsEmpty);
+   uInstr1(cb, CLEAR, 0, Lit16, 4);
+   uInstr0(cb, CALLM_E, 0);
+}
+
+
+static
+Addr dis_cmpxchg_G_E ( UCodeBlock* cb, 
+                       Int         size, 
+                       Addr        eip0 )
+{
+   Int   ta, junk, dest, src, acc;
+   UChar dis_buf[50];
+   UChar rm;
+
+   rm   = getUChar(eip0);
+   acc  = newTemp(cb);
+   src  = newTemp(cb);
+   dest = newTemp(cb);
+   junk = newTemp(cb);
+   /* Only needed to get gcc's dataflow analyser off my back. */
+   ta   = INVALID_TEMPREG;
+
+   if (epartIsReg(rm)) {
+     uInstr2(cb, GET, size, ArchReg, eregOfRM(rm), TempReg, dest);
+     eip0++;
+     if (dis) VG_(printf)("cmpxchg%c %s,%s\n", 
+                          nameISize(size),
+                          nameIReg(size,gregOfRM(rm)),
+                          nameIReg(size,eregOfRM(rm)) );
+     nameIReg(size,eregOfRM(rm));
+   } else {
+      UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL );
+      ta        = LOW24(pair);
+      uInstr2(cb, LOAD, size, TempReg, ta, TempReg, dest);
+      eip0 += HI8(pair);
+      if (dis) VG_(printf)("cmpxchg%c %s,%s\n",  nameISize(size), 
+                           nameIReg(size,gregOfRM(rm)), dis_buf);
+   }
+
+   uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, src);
+   uInstr2(cb, GET, size, ArchReg, R_EAX,        TempReg, acc);
+   uInstr2(cb, MOV, size, TempReg, acc,          TempReg, junk);
+   uInstr2(cb, SUB, size, TempReg, dest,         TempReg, junk);
+   setFlagsFromUOpcode(cb, SUB);
+
+   uInstr2(cb, CMOV, 4, TempReg, src,  TempReg, dest);
+   uCond(cb, CondZ);
+   uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+   uInstr2(cb, CMOV, 4, TempReg, dest, TempReg, acc);
+   uCond(cb, CondNZ);
+   uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+
+   uInstr2(cb, PUT, size, TempReg, acc, ArchReg, R_EAX);
+   if (epartIsReg(rm)) {
+     uInstr2(cb, PUT,   size, TempReg, dest, ArchReg, eregOfRM(rm));
+   } else {
+     uInstr2(cb, STORE, size, TempReg, dest, TempReg, ta);
+   }
+
+   return eip0;
+}
+
+
+/* Handle conditional move instructions of the form
+      cmovcc E(reg-or-mem), G(reg)
+
+   E(src) is reg-or-mem
+   G(dst) is reg.
+
+   If E is reg, -->    GET %E, tmps
+                       GET %G, tmpd
+                       CMOVcc tmps, tmpd
+                       PUT tmpd, %G
+ 
+   If E is mem  -->    (getAddr E) -> tmpa
+                       LD (tmpa), tmps
+                       GET %G, tmpd
+                       CMOVcc tmps, tmpd
+                       PUT tmpd, %G
+*/
+static
+Addr dis_cmov_E_G ( UCodeBlock* cb, 
+                    Int         size, 
+                    Condcode    cond,
+                    Addr        eip0 )
+{
+   UChar rm  = getUChar(eip0);
+   UChar dis_buf[50];
+
+   Int tmps = newTemp(cb);
+   Int tmpd = newTemp(cb);   
+
+   if (epartIsReg(rm)) {
+      uInstr2(cb, GET,  size, ArchReg, eregOfRM(rm), TempReg, tmps);
+      uInstr2(cb, GET,  size, ArchReg, gregOfRM(rm), TempReg, tmpd);
+      uInstr2(cb, CMOV,    4, TempReg, tmps, TempReg, tmpd);
+      uCond(cb, cond);
+      uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+      uInstr2(cb, PUT, size, TempReg, tmpd, ArchReg, gregOfRM(rm));
+      if (dis) VG_(printf)("cmov%c%s %s,%s\n", 
+                           nameISize(size), 
+                           VG_(nameCondcode)(cond),
+                           nameIReg(size,eregOfRM(rm)),
+                           nameIReg(size,gregOfRM(rm)));
+      return 1+eip0;
+   }
+
+   /* E refers to memory */    
+   {
+      UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL);
+      Int  tmpa = LOW24(pair);
+      uInstr2(cb, LOAD, size, TempReg, tmpa, TempReg, tmps);
+      uInstr2(cb, GET,  size, ArchReg, gregOfRM(rm), TempReg, tmpd);
+      uInstr2(cb, CMOV,    4, TempReg, tmps, TempReg, tmpd);
+      uCond(cb, cond);
+      uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+      uInstr2(cb, PUT, size, TempReg, tmpd, ArchReg, gregOfRM(rm));
+      if (dis) VG_(printf)("cmov%c%s %s,%s\n", 
+                           nameISize(size), 
+                           VG_(nameCondcode)(cond),
+                           dis_buf,
+                           nameIReg(size,gregOfRM(rm)));
+      return HI8(pair)+eip0;
+   }
+}
+
+
+static
+Addr dis_xadd_G_E ( UCodeBlock* cb, 
+                    Int         sz, 
+                    Addr        eip0 )
+{
+   UChar rm  = getUChar(eip0);
+   UChar dis_buf[50];
+
+   Int tmpd = newTemp(cb);   
+   Int tmpt = newTemp(cb);
+
+   if (epartIsReg(rm)) {
+      uInstr2(cb, GET, sz, ArchReg, eregOfRM(rm), TempReg, tmpd);
+      uInstr2(cb, GET, sz, ArchReg, gregOfRM(rm), TempReg, tmpt);
+      uInstr2(cb, ADD, sz, TempReg, tmpd, TempReg, tmpt);
+      setFlagsFromUOpcode(cb, ADD);
+      uInstr2(cb, PUT, sz, TempReg, tmpt, ArchReg, eregOfRM(rm));
+      uInstr2(cb, PUT, sz, TempReg, tmpd, ArchReg, gregOfRM(rm));
+      if (dis)
+         VG_(printf)("xadd%c %s, %s\n", nameISize(sz), 
+                     nameIReg(sz,gregOfRM(rm)), 
+                     nameIReg(sz,eregOfRM(rm)));
+      return 1+eip0;
+   } else {
+      UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL);
+      Int  tmpa  = LOW24(pair);
+      uInstr2(cb, LOAD, sz, TempReg, tmpa,          TempReg, tmpd);
+      uInstr2(cb, GET,  sz, ArchReg, gregOfRM(rm),  TempReg, tmpt);
+      uInstr2(cb,  ADD, sz, TempReg, tmpd, TempReg, tmpt);
+      setFlagsFromUOpcode(cb, ADD);
+      uInstr2(cb, STORE, sz, TempReg, tmpt, TempReg, tmpa);
+      SMC_IF_SOME(cb);
+      uInstr2(cb, PUT, sz, TempReg, tmpd, ArchReg, gregOfRM(rm));
+      if (dis)
+         VG_(printf)("xadd%c %s, %s\n", nameISize(sz), 
+                     nameIReg(sz,gregOfRM(rm)), 
+                     dis_buf);
+      return HI8(pair)+eip0;
+   }
+}
+
+
+/* Push %ECX, %EBX and %EAX, call helper_do_client_request, and put
+   the resulting %EAX value back. */
+static 
+void dis_ClientRequest ( UCodeBlock* cb )
+{
+   Int tmpc = newTemp(cb);
+   Int tmpb = newTemp(cb);
+   Int tmpa = newTemp(cb);
+   uInstr2(cb, GET,  4, ArchReg, R_ECX, TempReg, tmpc);
+   uInstr2(cb, GET,  4, ArchReg, R_EBX, TempReg, tmpb);
+   uInstr2(cb, GET,  4, ArchReg, R_EAX, TempReg, tmpa);
+   uInstr0(cb, CALLM_S, 0);
+   uInstr1(cb, PUSH, 4, TempReg, tmpc);
+   uInstr1(cb, PUSH, 4, TempReg, tmpb);
+   uInstr1(cb, PUSH, 4, TempReg, tmpa);
+   uInstr1(cb, CALLM, 0, Lit16,   VGOFF_(helper_do_client_request));
+   uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
+   uInstr1(cb, POP, 4, TempReg, tmpa);
+   uInstr1(cb, CLEAR, 0, Lit16, 8);
+   uInstr0(cb, CALLM_E, 0);
+   uInstr2(cb, PUT, 4, TempReg, tmpa, ArchReg, R_EAX);
+   if (dis) 
+      VG_(printf)("%%eax = client_request ( %%eax, %%ebx, %%ecx )\n");
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Disassembling entire basic blocks                    ---*/
+/*------------------------------------------------------------*/
+
+/* Disassemble a single instruction into ucode, returning the update
+   eip, and setting *isEnd to True if this is the last insn in a basic
+   block.  Also do debug printing if (dis). */
+
+static Addr disInstr ( UCodeBlock* cb, Addr eip, Bool* isEnd )
+{
+   UChar opc, modrm, abyte;
+   UInt  d32, pair;
+   Int   t1, t2, t3, t4;
+   UChar dis_buf[50];
+   Int   am_sz, d_sz;
+
+   Int   sz           = 4;
+   Int   first_uinstr = cb->used;
+   *isEnd = False;
+   t1 = t2 = t3 = t4 = INVALID_TEMPREG;
+
+   if (dis) VG_(printf)("\t0x%x:  ", eip);
+
+   /* Spot the client-request magic sequence, if required. */
+   if (VG_(clo_client_perms)) {
+      UChar* myeip = (UChar*)eip;
+      /* Spot this:
+         C1C01D                roll $29, %eax
+         C1C003                roll $3,  %eax
+         C1C01B                roll $27, %eax
+         C1C005                roll $5,  %eax
+      */
+      if (myeip[0] == 0xC1 && myeip[1] == 0xC0 && myeip[2] == 0x1D &&
+          myeip[3] == 0xC1 && myeip[4] == 0xC0 && myeip[5] == 0x03 &&
+          myeip[6] == 0xC1 && myeip[7] == 0xC0 && myeip[8] == 0x1B &&
+          myeip[9] == 0xC1 && myeip[10] == 0xC0 && myeip[11] == 0x05) {
+         vg_assert(VG_(clo_instrument));
+         dis_ClientRequest(cb);
+         eip += 12;
+         return eip;
+      }
+   }
+
+   /* Skip a LOCK prefix. */
+   if (getUChar(eip) == 0xF0) eip++;
+
+   /* Crap out if we see a segment override prefix. */
+   if (getUChar(eip) == 0x65) {
+      VG_(message)(Vg_DebugMsg, "");
+      VG_(message)(Vg_DebugMsg, "Possible workaround for the following abort: do not use special");
+      VG_(message)(Vg_DebugMsg, "PII/PIII-specific pthreads library (possibly in /lib/i686/*.so).");
+      VG_(message)(Vg_DebugMsg, "You might be able to kludge around this by renaming /lib/i686 to");
+      VG_(message)(Vg_DebugMsg, "/lib/i686-HIDDEN.  On RedHat 7.2 this causes ld.so to fall back");
+      VG_(message)(Vg_DebugMsg, "to using the less specialised versions in /lib instead, which");
+      VG_(message)(Vg_DebugMsg, "valgrind might be able to better deal with.");
+      VG_(message)(Vg_DebugMsg, "");
+      VG_(message)(Vg_DebugMsg, "WARNING. WARNING. WARNING. WARNING. WARNING. WARNING. WARNING.");
+      VG_(message)(Vg_DebugMsg, "WARNING: The suggested kludge may also render your system unbootable");
+      VG_(message)(Vg_DebugMsg, "WARNING: or otherwise totally screw it up.  Only try this if you");
+      VG_(message)(Vg_DebugMsg, "WARNING: know what you are doing, and are prepared to take risks.");
+      VG_(message)(Vg_DebugMsg, "YOU HAVE BEEN WARNED. YOU HAVE BEEN WARNED. YOU HAVE BEEN WARNED.");
+      VG_(message)(Vg_DebugMsg, "");
+      VG_(message)(Vg_DebugMsg, "Another consideration is that this may well mean your application");
+      VG_(message)(Vg_DebugMsg, "uses threads, which valgrind doesn't currently support, so even if");
+      VG_(message)(Vg_DebugMsg, "you work around this problem, valgrind may abort later if it sees");
+      VG_(message)(Vg_DebugMsg, "a clone() system call.");
+      VG_(unimplemented)("x86 segment override (SEG=GS) prefix; see above for details");
+   }
+
+   /* Detect operand-size overrides. */
+   if (getUChar(eip) == 0x66) { sz = 2; eip++; };
+
+   opc = getUChar(eip); eip++;
+
+   switch (opc) {
+
+   /* ------------------------ Control flow --------------- */
+
+   case 0xC2: /* RET imm16 */
+      d32 = getUDisp16(eip); eip += 2;
+      goto do_Ret;
+   case 0xC3: /* RET */
+      d32 = 0;
+      goto do_Ret;
+   do_Ret:
+      t1 = newTemp(cb); t2 = newTemp(cb);
+      uInstr2(cb, GET,  4, ArchReg, R_ESP, TempReg, t1);
+      uInstr2(cb, LOAD, 4, TempReg, t1,    TempReg, t2);
+      uInstr2(cb, ADD,  4, Literal, 0,     TempReg, t1);
+      uLiteral(cb, 4+d32);
+      uInstr2(cb, PUT,  4, TempReg, t1,    ArchReg, R_ESP);
+      uInstr1(cb, JMP,  0, TempReg, t2);
+      uCond(cb, CondAlways);
+
+      if (d32 == 0)
+         LAST_UINSTR(cb).ret_dispatch = True;
+
+      *isEnd = True;
+      if (dis) {
+         if (d32 == 0) VG_(printf)("ret\n"); 
+                  else VG_(printf)("ret %d\n", d32);
+      }
+      break;
+      
+   case 0xE8: /* CALL J4 */
+      d32 = getUDisp32(eip); eip += 4;
+      d32 += eip; /* eip now holds return-to addr, d32 is call-to addr */
+      if (d32 == (Addr)&VG_(shutdown)) {
+         /* Set vg_dispatch_ctr to 1, vg_interrupt_reason to VG_Y_EXIT,
+            and get back to the dispatch loop.  We ask for a jump to this
+            CALL insn because vg_dispatch will ultimately transfer control
+            to the real CPU, and we want this call to be the first insn
+            it does. */
+         uInstr0(cb, CALLM_S, 0);
+         uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_request_normal_exit));
+         uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
+         uInstr0(cb, CALLM_E, 0);
+         uInstr1(cb, JMP, 0, Literal, 0);
+         uLiteral(cb, eip-5);
+         uCond(cb, CondAlways);
+         *isEnd = True;
+         if (dis) VG_(printf)("call 0x%x\n",d32);
+      } else
+      if (d32 == eip && getUChar(eip) >= 0x58 
+                     && getUChar(eip) <= 0x5F) {
+         /* Specially treat the position-independent-code idiom 
+                 call X
+              X: popl %reg
+            as 
+                 movl %eip, %reg.
+            since this generates better code, but for no other reason. */
+         Int archReg = getUChar(eip) - 0x58;
+         /* VG_(printf)("-- fPIC thingy\n"); */
+         t1 = newTemp(cb);
+         uInstr2(cb, MOV, 4, Literal, 0, TempReg, t1);
+         uLiteral(cb, eip);
+         uInstr2(cb, PUT, 4, TempReg, t1,  ArchReg, archReg);
+         eip++; /* Step over the POP */
+         if (dis) 
+            VG_(printf)("call 0x%x ; popl %s\n",d32,nameIReg(4,archReg));
+      } else {
+         /* The normal sequence for a call. */
+         t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb);
+         uInstr2(cb, GET,   4, ArchReg, R_ESP, TempReg, t3);
+         uInstr2(cb, MOV,   4, TempReg, t3,    TempReg, t1);
+         uInstr2(cb, SUB,   4, Literal, 0,     TempReg, t1);
+	 uLiteral(cb, 4);
+         uInstr2(cb, PUT,   4, TempReg, t1,    ArchReg, R_ESP);
+         uInstr2(cb, MOV,   4, Literal, 0,     TempReg, t2);
+	 uLiteral(cb, eip);
+         uInstr2(cb, STORE, 4, TempReg, t2,    TempReg, t1);
+         SMC_IF_ALL(cb);
+         uInstr1(cb, JMP,   0, Literal, 0);
+	 uLiteral(cb, d32);
+         uCond(cb, CondAlways);
+         LAST_UINSTR(cb).call_dispatch = True;
+         *isEnd = True;
+         if (dis) VG_(printf)("call 0x%x\n",d32);
+      }
+      break;
+
+   case 0xC9: /* LEAVE */
+      t1 = newTemp(cb); t2 = newTemp(cb);
+      uInstr2(cb, GET,  4, ArchReg, R_EBP, TempReg, t1);
+      uInstr2(cb, PUT,  4, TempReg, t1, ArchReg, R_ESP);
+      uInstr2(cb, LOAD, 4, TempReg, t1, TempReg, t2);
+      uInstr2(cb, PUT,  4, TempReg, t2, ArchReg, R_EBP);
+      uInstr2(cb, ADD,  4, Literal, 0, TempReg, t1);
+      uLiteral(cb, 4);
+      uInstr2(cb, PUT,  4, TempReg, t1, ArchReg, R_ESP);
+      if (dis) VG_(printf)("leave");
+      break;
+
+   /* ------------------------ CWD/CDQ -------------------- */
+
+   case 0x98: /* CBW */
+      t1 = newTemp(cb);
+      if (sz == 4) {
+         uInstr2(cb, GET,   2, ArchReg, R_EAX, TempReg, t1);
+         uInstr1(cb, WIDEN, 4, TempReg, t1); /* 4 == dst size */
+         LAST_UINSTR(cb).extra4b = 2; /* the source size */
+         LAST_UINSTR(cb).signed_widen = True;
+         uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX);
+         if (dis) VG_(printf)("cwd\n");
+      } else {
+         vg_assert(sz == 2);
+         uInstr2(cb, GET,   1, ArchReg, R_EAX, TempReg, t1);
+         uInstr1(cb, WIDEN, 2, TempReg, t1); /* 2 == dst size */
+         LAST_UINSTR(cb).extra4b = 1; /* the source size */
+         LAST_UINSTR(cb).signed_widen = True;
+         uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX);
+         if (dis) VG_(printf)("cbw\n");
+      }
+      break;
+
+   case 0x99: /* CWD/CDQ */
+      t1 = newTemp(cb);
+      uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, t1);
+      uInstr2(cb, SAR, sz, Literal, 0,     TempReg, t1);
+      uLiteral(cb, sz == 2 ? 15  : 31);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, R_EDX);
+      if (dis) VG_(printf)(sz == 2 ? "cwdq\n" : "cdqq\n");
+      break;
+
+   /* ------------------------ FPU ops -------------------- */
+
+   case 0x9E: /* SAHF */
+      codegen_SAHF ( cb );
+      if (dis) VG_(printf)("sahf\n");
+      break;
+
+   case 0x9B: /* FWAIT */
+      /* ignore? */
+      if (dis) VG_(printf)("fwait\n");
+      break;
+
+   case 0xD8:
+   case 0xD9:
+   case 0xDA:
+   case 0xDB:
+   case 0xDC:
+   case 0xDD:
+   case 0xDE:
+   case 0xDF:
+      eip = dis_fpu ( cb, opc, eip );
+      break;
+
+   /* ------------------------ INC & DEC ------------------ */
+
+   case 0x40: /* INC eAX */
+   case 0x41: /* INC eCX */
+   case 0x42: /* INC eDX */
+   case 0x43: /* INC eBX */
+   case 0x45: /* INC eBP */
+   case 0x46: /* INC eSI */
+   case 0x47: /* INC eDI */
+      t1 = newTemp(cb);
+      uInstr2(cb, GET, sz, ArchReg, (UInt)(opc - 0x40),
+                             TempReg, t1);
+      uInstr1(cb, INC, sz, TempReg, t1);
+      setFlagsFromUOpcode(cb, INC);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg,
+                             (UInt)(opc - 0x40));
+      if (dis)
+         VG_(printf)("inc%c %s\n", nameISize(sz), nameIReg(sz,opc-0x40));
+      break;
+
+   case 0x48: /* DEC eAX */
+   case 0x49: /* DEC eCX */
+   case 0x4A: /* DEC eDX */
+   case 0x4B: /* DEC eBX */
+   case 0x4D: /* DEC eBP */
+   case 0x4E: /* DEC eSI */
+   case 0x4F: /* DEC eDI */
+      t1 = newTemp(cb);
+      uInstr2(cb, GET, sz, ArchReg, (UInt)(opc - 0x48),
+                             TempReg, t1);
+      uInstr1(cb, DEC, sz, TempReg, t1);
+      setFlagsFromUOpcode(cb, DEC);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg,
+                             (UInt)(opc - 0x48));
+      if (dis)
+         VG_(printf)("dec%c %s\n", nameISize(sz), nameIReg(sz,opc-0x48));
+      break;
+
+   /* ------------------------ INT ------------------------ */
+
+   case 0xCD: /* INT imm8 */
+      d32 = getUChar(eip); eip++;
+      if (d32 != 0x80) VG_(panic)("disInstr: INT but not 0x80 !");
+      /* It's important that all ArchRegs carry their up-to-date value
+         at this point.  So we declare an end-of-block here, which
+         forces any TempRegs caching ArchRegs to be flushed. */
+      t1 = newTemp(cb);
+      uInstr0(cb, CALLM_S, 0);
+      uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_do_syscall) );
+      uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
+      uInstr0(cb, CALLM_E, 0);
+      uInstr1(cb, JMP,  0, Literal, 0);
+      uLiteral(cb, eip);
+      uCond(cb, CondAlways);
+      *isEnd = True;
+      if (dis) VG_(printf)("int $0x80\n");
+      break;
+
+   /* ------------------------ Jcond, byte offset --------- */
+
+   case 0xEB: /* Jb (jump, byte offset) */
+      d32 = (eip+1) + getSDisp8(eip); eip++;
+      uInstr1(cb, JMP, 0, Literal, 0);
+      uLiteral(cb, d32);
+      uCond(cb, CondAlways);
+      *isEnd = True;
+      if (dis)
+         VG_(printf)("jmp-8 0x%x\n", d32);
+      break;
+
+   case 0xE9: /* Jv (jump, 16/32 offset) */
+      d32 = (eip+sz) + getSDisp(sz,eip); eip += sz;
+      uInstr1(cb, JMP, 0, Literal, 0);
+      uLiteral(cb, d32);
+      uCond(cb, CondAlways);
+      *isEnd = True;
+      if (dis)
+        VG_(printf)("jmp 0x%x\n", d32);
+      break;
+
+   case 0x70:
+   case 0x71:
+   case 0x72: /* JBb/JNAEb (jump below) */
+   case 0x73: /* JNBb/JAEb (jump not below) */
+   case 0x74: /* JZb/JEb (jump zero) */
+   case 0x75: /* JNZb/JNEb (jump not zero) */
+   case 0x76: /* JBEb/JNAb (jump below or equal) */
+   case 0x77: /* JNBEb/JAb (jump not below or equal) */
+   case 0x78: /* JSb (jump negative) */
+   case 0x79: /* JSb (jump not negative) */
+   case 0x7A: /* JP (jump parity even) */
+   case 0x7B: /* JNP/JPO (jump parity odd) */
+   case 0x7C: /* JLb/JNGEb (jump less) */
+   case 0x7D: /* JGEb/JNLb (jump greater or equal) */
+   case 0x7E: /* JLEb/JNGb (jump less or equal) */
+   case 0x7F: /* JGb/JNLEb (jump greater) */
+      d32 = (eip+1) + getSDisp8(eip); eip++;
+      uInstr1(cb, JMP, 0, Literal, 0);
+      uLiteral(cb, d32);
+      uCond(cb, (Condcode)(opc - 0x70));
+      uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+      /* It's actually acceptable not to end this basic block at a
+         control transfer, reducing the number of jumps through
+         vg_dispatch, at the expense of possibly translating the insns
+         following this jump twice.  This does give faster code, but
+         on the whole I don't think the effort is worth it. */
+      uInstr1(cb, JMP, 0, Literal, 0);
+      uLiteral(cb, eip);
+      uCond(cb, CondAlways);
+      *isEnd = True;
+      /* The above 3 lines would be removed if the bb was not to end
+         here. */
+      if (dis)
+         VG_(printf)("j%s-8 0x%x\n", VG_(nameCondcode)(opc - 0x70), d32);
+      break;
+
+   case 0xE3: /* JECXZ or perhaps JCXZ, depending on OSO ?  Intel
+                 manual says it depends on address size override,
+                 which doesn't sound right to me. */
+      d32 = (eip+1) + getSDisp8(eip); eip++;
+      t1 = newTemp(cb);
+      uInstr2(cb, GET,  4,  ArchReg, R_ECX, TempReg, t1);
+      uInstr2(cb, JIFZ, 4,  TempReg, t1,    Literal, 0);
+      uLiteral(cb, d32);
+      if (dis)
+         VG_(printf)("j%sz 0x%x\n", nameIReg(sz, R_ECX), d32);
+      break;
+
+   case 0xE2: /* LOOP disp8 */
+      /* Again, the docs say this uses ECX/CX as a count depending on
+         the address size override, not the operand one.  Since we
+         don't handle address size overrides, I guess that means
+         ECX. */
+      d32 = (eip+1) + getSDisp8(eip); eip++;
+      t1 = newTemp(cb);
+      uInstr2(cb, GET,  4, ArchReg, R_ECX, TempReg, t1);
+      uInstr1(cb, DEC,  4, TempReg, t1);
+      uInstr2(cb, PUT,  4, TempReg, t1,    ArchReg, R_ECX);
+      uInstr2(cb, JIFZ, 4, TempReg, t1,    Literal, 0);
+      uLiteral(cb, eip);
+      uInstr1(cb, JMP,  0, Literal, 0);
+      uLiteral(cb, d32);
+      uCond(cb, CondAlways);
+      *isEnd = True;
+      if (dis)
+         VG_(printf)("loop 0x%x\n", d32);
+      break;
+
+   /* ------------------------ IMUL ----------------------- */
+
+   case 0x69: /* IMUL Iv, Ev, Gv */
+      eip = dis_imul_I_E_G ( cb, sz, eip, sz );
+      break;
+   case 0x6B: /* IMUL Ib, Ev, Gv */
+      eip = dis_imul_I_E_G ( cb, sz, eip, 1 );
+      break;
+
+   /* ------------------------ MOV ------------------------ */
+
+   case 0x88: /* MOV Gb,Eb */
+      eip = dis_mov_G_E(cb, 1, eip);
+      break;
+
+   case 0x89: /* MOV Gv,Ev */
+      eip = dis_mov_G_E(cb, sz, eip);
+      break;
+
+   case 0x8A: /* MOV Eb,Gb */
+      eip = dis_mov_E_G(cb, 1, eip);
+      break;
+ 
+   case 0x8B: /* MOV Ev,Gv */
+      eip = dis_mov_E_G(cb, sz, eip);
+      break;
+ 
+   case 0x8D: /* LEA M,Gv */
+      modrm = getUChar(eip);
+      if (epartIsReg(modrm)) 
+         VG_(panic)("LEA M,Gv: modRM refers to register");
+      pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+      eip  += HI8(pair);
+      t1   = LOW24(pair);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, gregOfRM(modrm));
+      if (dis)
+         VG_(printf)("lea%c %s, %s\n", nameISize(sz), dis_buf, 
+                                       nameIReg(sz,gregOfRM(modrm)));
+      break;
+
+   case 0xA0: /* MOV Ob,AL */
+      sz = 1;
+      /* Fall through ... */
+   case 0xA1: /* MOV Ov,eAX */
+      d32 = getUDisp32(eip); eip += 4;
+      t1 = newTemp(cb); t2 = newTemp(cb);
+      uInstr2(cb, MOV,   4, Literal, 0,   TempReg, t2);
+      uLiteral(cb, d32);
+      uInstr2(cb, LOAD, sz, TempReg, t2,  TempReg, t1);
+      uInstr2(cb, PUT,  sz, TempReg, t1,  ArchReg, R_EAX);
+      if (dis) VG_(printf)("mov%c 0x%x,%s\n", nameISize(sz), 
+                           d32, nameIReg(sz,R_EAX));
+      break;
+
+   case 0xA2: /* MOV AL,Ob */
+      sz = 1;
+      /* Fall through ... */
+   case 0xA3: /* MOV eAX,Ov */
+      d32 = getUDisp32(eip); eip += 4;
+      t1 = newTemp(cb); t2 = newTemp(cb);
+      uInstr2(cb, GET,   sz, ArchReg, R_EAX, TempReg, t1);
+      uInstr2(cb, MOV,    4, Literal, 0,     TempReg, t2);
+      uLiteral(cb, d32);
+      uInstr2(cb, STORE, sz, TempReg, t1,    TempReg, t2);
+      SMC_IF_SOME(cb);
+      if (dis) VG_(printf)("mov%c %s,0x%x\n", nameISize(sz), 
+                           nameIReg(sz,R_EAX), d32);
+      break;
+
+   case 0xB0: /* MOV imm,AL */
+   case 0xB1: /* MOV imm,CL */
+   case 0xB2: /* MOV imm,DL */
+   case 0xB3: /* MOV imm,BL */
+   case 0xB4: /* MOV imm,AH */
+   case 0xB5: /* MOV imm,CH */
+   case 0xB6: /* MOV imm,DH */
+   case 0xB7: /* MOV imm,BH */
+      d32 = getUChar(eip); eip += 1;
+      t1 = newTemp(cb);
+      uInstr2(cb, MOV, 1, Literal, 0,  TempReg, t1);
+      uLiteral(cb, d32);
+      uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, opc-0xB0);
+      if (dis) VG_(printf)("movb $0x%x,%s\n", d32,
+                           nameIReg(1,opc-0xB0));
+      break;
+
+   case 0xB8: /* MOV imm,eAX */
+   case 0xB9: /* MOV imm,eCX */
+   case 0xBA: /* MOV imm,eDX */
+   case 0xBB: /* MOV imm,eBX */
+   case 0xBD: /* MOV imm,eBP */
+   case 0xBE: /* MOV imm,eSI */
+   case 0xBF: /* MOV imm,eDI */
+      d32 = getUDisp(sz,eip); eip += sz;
+      t1 = newTemp(cb);
+      uInstr2(cb, MOV, sz, Literal, 0,  TempReg, t1);
+      uLiteral(cb, d32);
+      uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, opc-0xB8);
+      if (dis) VG_(printf)("mov%c $0x%x,%s\n", nameISize(sz), d32,
+                           nameIReg(sz,opc-0xB8));
+      break;
+
+   case 0xC6: /* MOV Ib,Eb */
+      sz = 1;
+      goto do_Mov_I_E;
+   case 0xC7: /* MOV Iv,Ev */
+      goto do_Mov_I_E;
+
+   do_Mov_I_E:
+      modrm = getUChar(eip);
+      if (epartIsReg(modrm)) {
+         d32 = getUDisp(sz,eip); eip += sz;
+         t1 = newTemp(cb);
+         uInstr2(cb, MOV, sz, Literal, 0,  TempReg, t1);
+	 uLiteral(cb, d32);
+         uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm));
+         if (dis) VG_(printf)("mov%c $0x%x, %s\n", nameISize(sz), d32, 
+                              nameIReg(sz,eregOfRM(modrm)));
+      } else {
+         pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+         eip += HI8(pair);
+         d32 = getUDisp(sz,eip); eip += sz;
+         t1 = newTemp(cb);
+         t2 = LOW24(pair);
+         uInstr2(cb, MOV, sz, Literal, 0, TempReg, t1);
+	 uLiteral(cb, d32);
+         uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
+         SMC_IF_SOME(cb);
+         if (dis) VG_(printf)("mov%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
+      }
+      break;
+
+   /* ------------------------ opl imm, A ----------------- */
+
+   case 0x04: /* ADD Ib, AL */
+      eip = dis_op_imm_A(cb, 1, ADD, True, eip, "add" );
+      break;
+   case 0x05: /* ADD Iv, eAX */
+      eip = dis_op_imm_A(cb, sz, ADD, True, eip, "add" );
+      break;
+
+   case 0x0C: /* OR Ib, AL */
+      eip = dis_op_imm_A(cb, 1, OR, True, eip, "or" );
+      break;
+   case 0x0D: /* OR Iv, eAX */
+      eip = dis_op_imm_A(cb, sz, OR, True, eip, "or" );
+      break;
+
+   case 0x24: /* AND Ib, AL */
+      eip = dis_op_imm_A(cb, 1, AND, True, eip, "and" );
+      break;
+   case 0x25: /* AND Iv, eAX */
+      eip = dis_op_imm_A(cb, sz, AND, True, eip, "and" );
+      break;
+
+   case 0x2C: /* SUB Ib, AL */
+      eip = dis_op_imm_A(cb, 1, SUB, True, eip, "sub" );
+      break;
+   case 0x2D: /* SUB Iv, eAX */
+      eip = dis_op_imm_A(cb, sz, SUB, True, eip, "sub" );
+      break;
+
+   case 0x34: /* XOR Ib, AL */
+      eip = dis_op_imm_A(cb, 1, XOR, True, eip, "xor" );
+      break;
+   case 0x35: /* XOR Iv, eAX */
+      eip = dis_op_imm_A(cb, sz, XOR, True, eip, "xor" );
+      break;
+
+   case 0x3C: /* CMP Ib, AL */
+      eip = dis_op_imm_A(cb, 1, SUB, False, eip, "cmp" );
+      break;
+   case 0x3D: /* CMP Iv, eAX */
+      eip = dis_op_imm_A(cb, sz, SUB, False, eip, "cmp" );
+      break;
+
+   case 0xA8: /* TEST Ib, AL */
+      eip = dis_op_imm_A(cb, 1, AND, False, eip, "test" );
+      break;
+   case 0xA9: /* TEST Iv, eAX */
+      eip = dis_op_imm_A(cb, sz, AND, False, eip, "test" );
+      break;
+
+   /* ------------------------ opl Ev, Gv ----------------- */
+
+   case 0x02: /* ADD Eb,Gb */
+      eip = dis_op2_E_G ( cb, ADD, True, 1, eip, "add" );
+      break;
+   case 0x03: /* ADD Ev,Gv */
+      eip = dis_op2_E_G ( cb, ADD, True, sz, eip, "add" );
+      break;
+
+   case 0x0A: /* OR Eb,Gb */
+      eip = dis_op2_E_G ( cb, OR, True, 1, eip, "or" );
+      break;
+   case 0x0B: /* OR Ev,Gv */
+      eip = dis_op2_E_G ( cb, OR, True, sz, eip, "or" );
+      break;
+
+   case 0x13: /* ADC Ev,Gv */
+      eip = dis_op2_E_G ( cb, ADC, True, sz, eip, "adc" );
+      break;
+
+   case 0x1B: /* SBB Ev,Gv */
+      eip = dis_op2_E_G ( cb, SBB, True, sz, eip, "sbb" );
+      break;
+
+   case 0x22: /* AND Eb,Gb */
+      eip = dis_op2_E_G ( cb, AND, True, 1, eip, "and" );
+      break;
+   case 0x23: /* AND Ev,Gv */
+      eip = dis_op2_E_G ( cb, AND, True, sz, eip, "and" );
+      break;
+
+   case 0x2A: /* SUB Eb,Gb */
+      eip = dis_op2_E_G ( cb, SUB, True, 1, eip, "sub" );
+      break;
+   case 0x2B: /* SUB Ev,Gv */
+      eip = dis_op2_E_G ( cb, SUB, True, sz, eip, "sub" );
+      break;
+
+   case 0x32: /* XOR Eb,Gb */
+      eip = dis_op2_E_G ( cb, XOR, True, 1, eip, "xor" );
+      break;
+   case 0x33: /* XOR Ev,Gv */
+      eip = dis_op2_E_G ( cb, XOR, True, sz, eip, "xor" );
+      break;
+
+   case 0x3A: /* CMP Eb,Gb */
+      eip = dis_op2_E_G ( cb, SUB, False, 1, eip, "cmp" );
+      break;
+   case 0x3B: /* CMP Ev,Gv */
+      eip = dis_op2_E_G ( cb, SUB, False, sz, eip, "cmp" );
+      break;
+
+   case 0x84: /* TEST Eb,Gb */
+      eip = dis_op2_E_G ( cb, AND, False, 1, eip, "test" );
+      break;
+   case 0x85: /* TEST Ev,Gv */
+      eip = dis_op2_E_G ( cb, AND, False, sz, eip, "test" );
+      break;
+
+   /* ------------------------ opl Gv, Ev ----------------- */
+
+   case 0x00: /* ADD Gb,Eb */
+      eip = dis_op2_G_E ( cb, ADD, True, 1, eip, "add" );
+      break;
+   case 0x01: /* ADD Gv,Ev */
+      eip = dis_op2_G_E ( cb, ADD, True, sz, eip, "add" );
+      break;
+
+   case 0x08: /* OR Gb,Eb */
+      eip = dis_op2_G_E ( cb, OR, True, 1, eip, "or" );
+      break;
+   case 0x09: /* OR Gv,Ev */
+      eip = dis_op2_G_E ( cb, OR, True, sz, eip, "or" );
+      break;
+
+   case 0x11: /* ADC Gv,Ev */
+      eip = dis_op2_G_E ( cb, ADC, True, sz, eip, "adc" );
+      break;
+
+   case 0x19: /* SBB Gv,Ev */
+      eip = dis_op2_G_E ( cb, SBB, True, sz, eip, "sbb" );
+      break;
+
+   case 0x20: /* AND Gb,Eb */
+      eip = dis_op2_G_E ( cb, AND, True, 1, eip, "and" );
+      break;
+   case 0x21: /* AND Gv,Ev */
+      eip = dis_op2_G_E ( cb, AND, True, sz, eip, "and" );
+      break;
+
+   case 0x28: /* SUB Gb,Eb */
+      eip = dis_op2_G_E ( cb, SUB, True, 1, eip, "sub" );
+      break;
+   case 0x29: /* SUB Gv,Ev */
+      eip = dis_op2_G_E ( cb, SUB, True, sz, eip, "sub" );
+      break;
+
+   case 0x30: /* XOR Gb,Eb */
+      eip = dis_op2_G_E ( cb, XOR, True, 1, eip, "xor" );
+      break;
+   case 0x31: /* XOR Gv,Ev */
+      eip = dis_op2_G_E ( cb, XOR, True, sz, eip, "xor" );
+      break;
+
+   case 0x38: /* CMP Gb,Eb */
+      eip = dis_op2_G_E ( cb, SUB, False, 1, eip, "cmp" );
+      break;
+   case 0x39: /* CMP Gv,Ev */
+      eip = dis_op2_G_E ( cb, SUB, False, sz, eip, "cmp" );
+      break;
+
+   /* ------------------------ POP ------------------------ */
+
+   case 0x58: /* POP eAX */
+   case 0x59: /* POP eCX */
+   case 0x5A: /* POP eDX */
+   case 0x5B: /* POP eBX */
+   case 0x5C: /* POP eSP */
+   case 0x5D: /* POP eBP */
+   case 0x5E: /* POP eSI */
+   case 0x5F: /* POP eDI */
+      t1 = newTemp(cb); t2 = newTemp(cb);
+      uInstr2(cb, GET,    4, ArchReg, R_ESP,    TempReg, t2);
+      uInstr2(cb, LOAD,  sz, TempReg, t2,       TempReg, t1);
+      uInstr2(cb, ADD,    4, Literal, 0,        TempReg, t2);
+      uLiteral(cb, sz);
+      uInstr2(cb, PUT,    4, TempReg, t2,       ArchReg, R_ESP);
+      uInstr2(cb, PUT,   sz, TempReg, t1,       ArchReg, opc-0x58);
+      if (dis) 
+         VG_(printf)("pop%c %s\n", nameISize(sz), nameIReg(sz,opc-0x58));
+      break;
+
+   case 0x9D: /* POPF */
+      vg_assert(sz == 2 || sz == 4);
+      t1 = newTemp(cb); t2 = newTemp(cb);
+      uInstr2(cb, GET,    4, ArchReg, R_ESP,    TempReg, t2);
+      uInstr2(cb, LOAD,  sz, TempReg, t2,       TempReg, t1);
+      uInstr2(cb, ADD,    4, Literal, 0,        TempReg, t2);
+      uLiteral(cb, sz);
+      uInstr2(cb, PUT,    4, TempReg, t2,       ArchReg, R_ESP);
+      uInstr1(cb, PUTF,  sz, TempReg, t1);
+      /* PUTF writes all the flags we are interested in */
+      uFlagsRWU(cb, FlagsEmpty, FlagsALL, FlagsEmpty);
+      if (dis) 
+         VG_(printf)("popf%c\n", nameISize(sz));
+      break;
+
+   case 0x61: /* POPA */
+    { Int reg;
+      /* Just to keep things sane, we assert for a size 4.  It's
+         probably OK for size 2 as well, but I'd like to find a test
+         case; ie, have the assertion fail, before committing to it.
+         If it fails for you, uncomment the sz == 2 bit, try again,
+         and let me know whether or not it works.  (jseward@acm.org).  */
+      vg_assert(sz == 4 /* || sz == 2 */);
+
+      /* Eight values are popped, one per register, but the value of
+         %esp on the stack is ignored and instead incremented (in one
+         hit at the end) for each of the values. */
+      t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb);
+      uInstr2(cb, GET,    4, ArchReg, R_ESP, TempReg, t2);
+      uInstr2(cb, MOV,    4, TempReg, t2,    TempReg, t3);
+
+      /* Do %edi, %esi, %ebp */
+      for (reg = 7; reg >= 5; reg--) {
+          uInstr2(cb, LOAD,  sz, TempReg, t2, TempReg, t1);
+          uInstr2(cb, ADD,    4, Literal, 0,  TempReg, t2);
+          uLiteral(cb, sz);
+          uInstr2(cb, PUT,   sz, TempReg, t1, ArchReg, reg);
+      }
+      /* Ignore (skip) value of %esp on stack. */
+      uInstr2(cb, ADD,    4, Literal, 0,  TempReg, t2);
+      uLiteral(cb, sz);
+      /* Do %ebx, %edx, %ecx, %eax */
+      for (reg = 3; reg >= 0; reg--) {
+          uInstr2(cb, LOAD,  sz, TempReg, t2, TempReg, t1);
+          uInstr2(cb, ADD,    4, Literal, 0,  TempReg, t2);
+          uLiteral(cb, sz);
+          uInstr2(cb, PUT,   sz, TempReg, t1, ArchReg, reg);
+      }
+      uInstr2(cb, ADD,    4, Literal, 0,  TempReg, t3);
+      uLiteral(cb, sz * 8);             /* One 'sz' per register */
+      uInstr2(cb, PUT,    4, TempReg, t3, ArchReg, R_ESP);
+      if (dis)
+         VG_(printf)("popa%c\n", nameISize(sz));
+      break;
+    }
+
+   /* ------------------------ PUSH ----------------------- */
+
+   case 0x50: /* PUSH eAX */
+   case 0x51: /* PUSH eCX */
+   case 0x52: /* PUSH eDX */
+   case 0x54: /* PUSH eSP */
+   case 0x53: /* PUSH eBX */
+   case 0x55: /* PUSH eBP */
+   case 0x56: /* PUSH eSI */
+   case 0x57: /* PUSH eDI */
+      /* This is the Right Way, in that the value to be pushed is
+         established before %esp is changed, so that pushl %esp
+         correctly pushes the old value. */
+      t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb);
+      uInstr2(cb, GET,   sz, ArchReg, opc-0x50, TempReg, t1);
+      uInstr2(cb, GET,    4, ArchReg, R_ESP,    TempReg, t3);
+      uInstr2(cb, MOV,    4, TempReg, t3,       TempReg, t2);
+      uInstr2(cb, SUB,    4, Literal, 0,        TempReg, t2);
+      uLiteral(cb, sz);
+      uInstr2(cb, PUT,    4, TempReg, t2,       ArchReg, R_ESP);
+      uInstr2(cb, STORE, sz, TempReg, t1,       TempReg, t2);
+      SMC_IF_ALL(cb);
+      if (dis) 
+         VG_(printf)("push%c %s\n", nameISize(sz), nameIReg(sz,opc-0x50));
+      break;
+
+   case 0x68: /* PUSH Iv */
+      d32 = getUDisp(sz,eip); eip += sz;
+      goto do_push_I;
+   case 0x6A: /* PUSH Ib, sign-extended to sz */
+      d32 = getSDisp8(eip); eip += 1;
+      goto do_push_I;
+   do_push_I:
+      t1 = newTemp(cb); t2 = newTemp(cb);
+      uInstr2(cb, GET,    4, ArchReg, R_ESP, TempReg, t1);
+      uInstr2(cb, SUB,    4, Literal, 0,     TempReg, t1);
+      uLiteral(cb, sz);
+      uInstr2(cb, PUT,    4, TempReg, t1,    ArchReg, R_ESP);
+      uInstr2(cb, MOV,   sz, Literal, 0,     TempReg, t2);
+      uLiteral(cb, d32);
+      uInstr2(cb, STORE, sz, TempReg, t2,    TempReg, t1);
+      SMC_IF_ALL(cb);
+      if (dis) 
+         VG_(printf)("push%c $0x%x\n", nameISize(sz), d32);
+      break;
+
+   case 0x9C: /* PUSHF */
+      vg_assert(sz == 2 || sz == 4);
+      t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb);
+      uInstr1(cb, GETF,  sz, TempReg, t1);
+      /* GETF reads all the flags we are interested in */
+      uFlagsRWU(cb, FlagsALL, FlagsEmpty, FlagsEmpty);
+      uInstr2(cb, GET,    4, ArchReg, R_ESP,    TempReg, t3);
+      uInstr2(cb, MOV,    4, TempReg, t3,       TempReg, t2);
+      uInstr2(cb, SUB,    4, Literal, 0,        TempReg, t2);
+      uLiteral(cb, sz);
+      uInstr2(cb, PUT,    4, TempReg, t2,       ArchReg, R_ESP);
+      uInstr2(cb, STORE, sz, TempReg, t1,       TempReg, t2);
+      SMC_IF_ALL(cb);
+      if (dis) 
+         VG_(printf)("pushf%c\n", nameISize(sz));
+      break;
+
+   case 0x60: /* PUSHA */
+    { Int reg;
+      /* Just to keep things sane, we assert for a size 4.  It's
+         probably OK for size 2 as well, but I'd like to find a test
+         case; ie, have the assertion fail, before committing to it.
+         If it fails for you, uncomment the sz == 2 bit, try again,
+         and let me know whether or not it works.  (jseward@acm.org).  */
+      vg_assert(sz == 4 /* || sz == 2 */);
+
+      /* This is the Right Way, in that the value to be pushed is
+         established before %esp is changed, so that pusha
+         correctly pushes the old %esp value.  New value of %esp is
+         pushed at start. */
+      t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb);
+      t4 = newTemp(cb);
+      uInstr2(cb, GET,    4, ArchReg, R_ESP, TempReg, t3);
+      uInstr2(cb, MOV,    4, TempReg, t3,    TempReg, t2);
+      uInstr2(cb, MOV,    4, TempReg, t3,    TempReg, t4);
+      uInstr2(cb, SUB,    4, Literal, 0,     TempReg, t4);
+      uLiteral(cb, sz * 8);             /* One 'sz' per register. */
+      uInstr2(cb, PUT,    4, TempReg, t4,  ArchReg, R_ESP);
+      /* Do %eax, %ecx, %edx, %ebx */
+      for (reg = 0; reg <= 3; reg++) {
+         uInstr2(cb, GET,   sz, ArchReg, reg, TempReg, t1);
+         uInstr2(cb, SUB,    4, Literal,   0, TempReg, t2);
+         uLiteral(cb, sz);
+         uInstr2(cb, STORE, sz, TempReg,  t1, TempReg, t2);
+         SMC_IF_ALL(cb);
+      }
+      /* Push old value of %esp */
+      uInstr2(cb, SUB,    4, Literal,   0, TempReg, t2);
+      uLiteral(cb, sz);
+      uInstr2(cb, STORE, sz, TempReg,  t3, TempReg, t2);
+      SMC_IF_ALL(cb);
+      /* Do %ebp, %esi, %edi */
+      for (reg = 5; reg <= 7; reg++) {
+         uInstr2(cb, GET,   sz, ArchReg, reg, TempReg, t1);
+         uInstr2(cb, SUB,    4, Literal,   0, TempReg, t2);
+         uLiteral(cb, sz);
+         uInstr2(cb, STORE, sz, TempReg,  t1, TempReg, t2);
+         SMC_IF_ALL(cb);
+      }
+      if (dis)
+         VG_(printf)("pusha%c\n", nameISize(sz));
+      break;
+    }
+
+   /* ------------------------ SCAS et al ----------------- */
+
+   case 0xA4: /* MOVSb, no REP prefix */
+      codegen_MOVS ( cb, 1 );
+      if (dis) VG_(printf)("movsb\n");
+      break;
+   case 0xA5: /* MOVSv, no REP prefix */
+      codegen_MOVS ( cb, sz );
+      if (dis) VG_(printf)("movs%c\n", nameISize(sz));
+      break;
+
+   case 0xA6: /* CMPSb, no REP prefix */
+      codegen_CMPS ( cb, 1 );
+      if (dis) VG_(printf)("cmpsb\n");
+      break;
+
+   case 0xAA: /* STOSb, no REP prefix */
+      codegen_STOS ( cb, 1 );
+      if (dis) VG_(printf)("stosb\n");
+      break;
+   case 0xAB: /* STOSv, no REP prefix */
+      codegen_STOS ( cb, sz );
+      if (dis) VG_(printf)("stos%c\n", nameISize(sz));
+      break;
+
+   case 0xAC: /* LODSb, no REP prefix */
+      codegen_LODS ( cb, 1 );
+      if (dis) VG_(printf)("lodsb\n");
+      break;
+
+   case 0xAE: /* SCASb, no REP prefix */
+      codegen_SCAS ( cb, 1 );
+      if (dis) VG_(printf)("scasb\n");
+      break;
+
+   case 0xFC: /* CLD */
+      uInstr0(cb, CALLM_S, 0);
+      uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CLD));
+      uFlagsRWU(cb, FlagsEmpty, FlagD, FlagsEmpty);
+      uInstr0(cb, CALLM_E, 0);
+      if (dis) VG_(printf)("cld\n");
+      break;
+
+   case 0xFD: /* STD */
+      uInstr0(cb, CALLM_S, 0);
+      uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_STD));
+      uFlagsRWU(cb, FlagsEmpty, FlagD, FlagsEmpty);
+      uInstr0(cb, CALLM_E, 0);
+      if (dis) VG_(printf)("std\n");
+      break;
+
+   case 0xF2: { /* REPNE prefix insn */
+      Addr eip_orig = eip - 1;
+      abyte = getUChar(eip); eip++;
+      if (abyte == 0x66) { sz = 2; abyte = getUChar(eip); eip++; }
+
+      if (abyte == 0xAE || 0xAF) { /* REPNE SCAS<sz> */
+         if (abyte == 0xAE) sz = 1;
+         codegen_REPNE_SCAS ( cb, sz, eip_orig, eip );
+         *isEnd = True;         
+         if (dis) VG_(printf)("repne scas%c\n", nameISize(sz));
+      }
+      else {
+         VG_(printf)("REPNE then 0x%x\n", (UInt)abyte);
+         VG_(panic)("Unhandled REPNE case");
+      }
+      break;
+   }
+
+   case 0xF3: { /* REPE prefix insn */
+      Addr eip_orig = eip - 1;
+      abyte = getUChar(eip); eip++;
+      if (abyte == 0x66) { sz = 2; abyte = getUChar(eip); eip++; }
+
+      if (abyte == 0xA4 || abyte == 0xA5) { /* REPE MOV<sz> */
+         if (abyte == 0xA4) sz = 1;
+         codegen_REPE_MOVS ( cb, sz, eip_orig, eip );
+         *isEnd = True;
+         if (dis) VG_(printf)("repe mov%c\n", nameISize(sz));
+      }
+      else 
+      if (abyte == 0xA6 || abyte == 0xA7) { /* REPE CMP<sz> */
+         if (abyte == 0xA6) sz = 1;
+         codegen_REPE_CMPS ( cb, sz, eip_orig, eip );
+         *isEnd = True;
+         if (dis) VG_(printf)("repe cmps%c\n", nameISize(sz));
+      } 
+      else
+      if (abyte == 0xAA || abyte == 0xAB) { /* REPE STOS<sz> */
+         if (abyte == 0xAA) sz = 1;
+         codegen_REPE_STOS ( cb, sz, eip_orig, eip );
+         *isEnd = True;
+         if (dis) VG_(printf)("repe stos%c\n", nameISize(sz));
+      } else {
+         VG_(printf)("REPE then 0x%x\n", (UInt)abyte);
+         VG_(panic)("Unhandled REPE case");
+      }
+      break;
+   }
+
+   /* ------------------------ XCHG ----------------------- */
+
+   case 0x86: /* XCHG Gb,Eb */
+      sz = 1;
+      /* Fall through ... */
+   case 0x87: /* XCHG Gv,Ev */
+      modrm = getUChar(eip);
+      t1 = newTemp(cb); t2 = newTemp(cb);
+      if (epartIsReg(modrm)) {
+         uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1);
+         uInstr2(cb, GET, sz, ArchReg, gregOfRM(modrm), TempReg, t2);
+         uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, gregOfRM(modrm));
+         uInstr2(cb, PUT, sz, TempReg, t2, ArchReg, eregOfRM(modrm));
+         eip++;
+         if (dis)
+            VG_(printf)("xchg%c %s, %s\n", nameISize(sz), 
+                        nameIReg(sz,gregOfRM(modrm)), 
+                        nameIReg(sz,eregOfRM(modrm)));
+      } else {
+         pair = disAMode ( cb, eip, dis?dis_buf:NULL);
+         t3   = LOW24(pair);
+         uInstr2(cb, LOAD, sz, TempReg, t3, TempReg, t1);
+         uInstr2(cb, GET, sz, ArchReg, gregOfRM(modrm), TempReg, t2);
+         uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t3);
+         SMC_IF_SOME(cb);
+         uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, gregOfRM(modrm));
+         eip += HI8(pair);
+         if (dis)
+            VG_(printf)("xchg%c %s, %s\n", nameISize(sz), 
+                        nameIReg(sz,gregOfRM(modrm)), 
+                        dis_buf);
+      }
+      break;
+
+   case 0x90: /* XCHG eAX,eAX */
+      if (dis) VG_(printf)("nop\n");
+      break;
+   case 0x91: /* XCHG eCX,eSI */
+   case 0x96: /* XCHG eAX,eSI */
+   case 0x97: /* XCHG eAX,eDI */
+      codegen_xchg_eAX_Reg ( cb, sz, opc - 0x90 );
+      break;
+
+   /* ------------------------ (Grp1 extensions) ---------- */
+
+   case 0x80: /* Grp1 Ib,Eb */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      sz    = 1;
+      d_sz  = 1;
+      d32   = getSDisp8(eip + am_sz);
+      eip   = dis_Grp1 ( cb, eip, modrm, am_sz, d_sz, sz, d32 );
+      break;
+
+   case 0x81: /* Grp1 Iv,Ev */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      d_sz  = sz;
+      d32   = getUDisp(d_sz, eip + am_sz);
+      eip   = dis_Grp1 ( cb, eip, modrm, am_sz, d_sz, sz, d32 );
+      break;
+
+   case 0x83: /* Grp1 Ib,Ev */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      d_sz  = 1;
+      d32   = getSDisp8(eip + am_sz);
+      eip   = dis_Grp1 ( cb, eip, modrm, am_sz, d_sz, sz, d32 );
+      break;
+
+   /* ------------------------ (Grp2 extensions) ---------- */
+
+   case 0xC0: /* Grp2 Ib,Eb */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      d_sz  = 1;
+      d32   = getSDisp8(eip + am_sz);
+      sz    = 1;
+      eip   = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, Literal, d32 );
+      break;
+
+   case 0xC1: /* Grp2 Ib,Ev */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      d_sz  = 1;
+      d32   = getSDisp8(eip + am_sz);
+      eip   = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, Literal, d32 );
+      break;
+
+   case 0xD0: /* Grp2 1,Eb */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      d_sz  = 0;
+      d32   = 1;
+      sz    = 1;
+      eip   = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, Literal, d32 );
+      break;
+
+   case 0xD1: /* Grp2 1,Ev */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      d_sz  = 0;
+      d32   = 1;
+      eip   = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, Literal, d32 );
+      break;
+
+   case 0xD3: /* Grp2 CL,Ev */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      d_sz  = 0;
+      eip   = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, ArchReg, R_ECX );
+      break;
+
+   /* ------------------------ (Grp3 extensions) ---------- */
+
+   case 0xF6: /* Grp3 Eb */
+      eip = dis_Grp3 ( cb, 1, eip );
+      break;
+   case 0xF7: /* Grp3 Ev */
+      eip = dis_Grp3 ( cb, sz, eip );
+      break;
+
+   /* ------------------------ (Grp4 extensions) ---------- */
+
+   case 0xFE: /* Grp4 Eb */
+      eip = dis_Grp4 ( cb, eip );
+      break;
+
+   /* ------------------------ (Grp5 extensions) ---------- */
+
+   case 0xFF: /* Grp5 Ev */
+      eip = dis_Grp5 ( cb, sz, eip, isEnd );
+      break;
+
+   /* ------------------------ Escapes to 2-byte opcodes -- */
+
+   case 0x0F: {
+      opc = getUChar(eip); eip++;
+      switch (opc) {
+
+      /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */
+
+      case 0xBA: /* Grp8 Ib,Ev */
+         modrm = getUChar(eip);
+         am_sz = lengthAMode(eip);
+         d32   = getSDisp8(eip + am_sz);
+         eip = dis_Grp8 ( cb, eip, modrm, am_sz, sz, d32 );
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */
+
+      case 0xBC: /* BSF Gv,Ev */
+         eip = dis_bs_E_G ( cb, sz, eip, True );
+         break;
+      case 0xBD: /* BSR Gv,Ev */
+         eip = dis_bs_E_G ( cb, sz, eip, False );
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */
+
+      case 0xC8: /* BSWAP %eax */
+      case 0xC9:
+      case 0xCA:
+      case 0xCB:
+      case 0xCC:
+      case 0xCD:
+      case 0xCE:
+      case 0xCF: /* BSWAP %edi */
+         /* AFAICS from the Intel docs, this only exists at size 4. */
+         vg_assert(sz == 4);
+         t1 = newTemp(cb);
+         uInstr2(cb, GET,   4, ArchReg, opc-0xC8, TempReg, t1);
+	 uInstr1(cb, BSWAP, 4, TempReg, t1);
+         uInstr2(cb, PUT,   4, TempReg, t1, ArchReg, opc-0xC8);
+         if (dis) VG_(printf)("bswapl %s\n", nameIReg(4, opc-0xC8));
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
+
+      case 0xA3: /* BT Gv,Ev */
+         eip = dis_bt_G_E ( cb, sz, eip, BtOpNone );
+         break;
+      case 0xB3: /* BTR Gv,Ev */
+         eip = dis_bt_G_E ( cb, sz, eip, BtOpReset );
+         break;
+      case 0xAB: /* BTS Gv,Ev */
+         eip = dis_bt_G_E ( cb, sz, eip, BtOpSet );
+         break;
+      case 0xBB: /* BTC Gv,Ev */
+         eip = dis_bt_G_E ( cb, sz, eip, BtOpComp );
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
+
+      case 0x40:
+      case 0x41:
+      case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
+      case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
+      case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
+      case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
+      case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
+      case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
+      case 0x48: /* CMOVSb (cmov negative) */
+      case 0x49: /* CMOVSb (cmov not negative) */
+      case 0x4A: /* CMOVP (cmov parity even) */
+      case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
+      case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
+      case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
+      case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
+         eip = dis_cmov_E_G(cb, sz, (Condcode)(opc - 0x40), eip);
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
+
+      case 0xB1: /* CMPXCHG Gv,Ev */
+         eip = dis_cmpxchg_G_E ( cb, sz, eip );
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
+
+      case 0xA2: /* CPUID */
+         t1 = newTemp(cb);
+         t2 = newTemp(cb);
+         t3 = newTemp(cb);
+         t4 = newTemp(cb);
+         uInstr0(cb, CALLM_S, 0);
+
+         uInstr2(cb, GET,   4, ArchReg, R_EAX, TempReg, t1);
+         uInstr1(cb, PUSH,  4, TempReg, t1);
+
+         uInstr2(cb, MOV,   4, Literal, 0, TempReg, t2);
+         uLiteral(cb, 0);
+         uInstr1(cb, PUSH,  4, TempReg, t2);
+
+         uInstr2(cb, MOV,   4, Literal, 0, TempReg, t3);
+         uLiteral(cb, 0);
+         uInstr1(cb, PUSH,  4, TempReg, t3);
+
+         uInstr2(cb, MOV,   4, Literal, 0, TempReg, t4);
+         uLiteral(cb, 0);
+         uInstr1(cb, PUSH,  4, TempReg, t4);
+
+         uInstr1(cb, CALLM, 0, Lit16,   VGOFF_(helper_CPUID));
+         uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
+
+         uInstr1(cb, POP,   4, TempReg, t4);
+         uInstr2(cb, PUT,   4, TempReg, t4, ArchReg, R_EDX);
+
+         uInstr1(cb, POP,   4, TempReg, t3);
+         uInstr2(cb, PUT,   4, TempReg, t3, ArchReg, R_ECX);
+
+         uInstr1(cb, POP,   4, TempReg, t2);
+         uInstr2(cb, PUT,   4, TempReg, t2, ArchReg, R_EBX);
+
+         uInstr1(cb, POP,   4, TempReg, t1);
+         uInstr2(cb, PUT,   4, TempReg, t1, ArchReg, R_EAX);
+
+         uInstr0(cb, CALLM_E, 0);
+         if (dis) VG_(printf)("cpuid\n");
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */
+
+      case 0xB6: /* MOVZXb Eb,Gv */
+         eip = dis_movx_E_G ( cb, eip, 1, 4, False );
+         break;
+      case 0xB7: /* MOVZXw Ew,Gv */
+         eip = dis_movx_E_G ( cb, eip, 2, 4, False );
+         break;
+
+      case 0xBE: /* MOVSXb Eb,Gv */
+         eip = dis_movx_E_G ( cb, eip, 1, 4, True );
+         break;
+      case 0xBF: /* MOVSXw Ew,Gv */
+         eip = dis_movx_E_G ( cb, eip, 2, 4, True );
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
+
+      case 0xAF: /* IMUL Ev, Gv */
+         eip = dis_mul_E_G ( cb, sz, eip, True );
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */
+      case 0x80:
+      case 0x81:
+      case 0x82: /* JBb/JNAEb (jump below) */
+      case 0x83: /* JNBb/JAEb (jump not below) */
+      case 0x84: /* JZb/JEb (jump zero) */
+      case 0x85: /* JNZb/JNEb (jump not zero) */
+      case 0x86: /* JBEb/JNAb (jump below or equal) */
+      case 0x87: /* JNBEb/JAb (jump not below or equal) */
+      case 0x88: /* JSb (jump negative) */
+      case 0x89: /* JSb (jump not negative) */
+      case 0x8A: /* JP (jump parity even) */
+      case 0x8C: /* JLb/JNGEb (jump less) */
+      case 0x8D: /* JGEb/JNLb (jump greater or equal) */
+      case 0x8E: /* JLEb/JNGb (jump less or equal) */
+      case 0x8F: /* JGb/JNLEb (jump greater) */
+         d32 = (eip+4) + getUDisp32(eip); eip += 4;
+         uInstr1(cb, JMP, 0, Literal, 0);
+	 uLiteral(cb, d32);
+         uCond(cb, (Condcode)(opc - 0x80));
+         uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+         uInstr1(cb, JMP, 0, Literal, 0);
+	 uLiteral(cb, eip);
+         uCond(cb, CondAlways);
+         *isEnd = True;
+         if (dis)
+            VG_(printf)("j%s-32 0x%x\n", 
+                        VG_(nameCondcode)(opc - 0x80), d32);
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */
+
+      case 0x31: /* RDTSC */
+         t1 = newTemp(cb);
+         t2 = newTemp(cb);
+         t3 = newTemp(cb);
+         uInstr0(cb, CALLM_S, 0);
+         uInstr2(cb, MOV,   4, Literal, 0, TempReg, t1);
+         uLiteral(cb, 0);
+         uInstr1(cb, PUSH,  4, TempReg, t1);
+         uInstr2(cb, MOV,   4, Literal, 0, TempReg, t2);
+         uLiteral(cb, 0);
+         uInstr1(cb, PUSH,  4, TempReg, t2);
+         uInstr1(cb, CALLM, 0, Lit16,   VGOFF_(helper_RDTSC));
+         uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
+         uInstr1(cb, POP,   4, TempReg, t3);
+         uInstr2(cb, PUT,   4, TempReg, t3, ArchReg, R_EDX);
+         uInstr1(cb, POP,   4, TempReg, t3);
+         uInstr2(cb, PUT,   4, TempReg, t3, ArchReg, R_EAX);
+         uInstr0(cb, CALLM_E, 0);
+         if (dis) VG_(printf)("rdtsc\n");
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */
+      case 0x90:
+      case 0x91:
+      case 0x92: /* set-Bb/set-NAEb (jump below) */
+      case 0x93: /* set-NBb/set-AEb (jump not below) */
+      case 0x94: /* set-Zb/set-Eb (jump zero) */
+      case 0x95: /* set-NZb/set-NEb (jump not zero) */
+      case 0x96: /* set-BEb/set-NAb (jump below or equal) */
+      case 0x97: /* set-NBEb/set-Ab (jump not below or equal) */
+      case 0x98: /* set-Sb (jump negative) */
+      case 0x99: /* set-Sb (jump not negative) */
+      case 0x9A: /* set-P (jump parity even) */
+      case 0x9B: /* set-NP (jump parity odd) */
+      case 0x9C: /* set-Lb/set-NGEb (jump less) */
+      case 0x9D: /* set-GEb/set-NLb (jump greater or equal) */
+      case 0x9E: /* set-LEb/set-NGb (jump less or equal) */
+      case 0x9F: /* set-Gb/set-NLEb (jump greater) */
+         modrm = getUChar(eip);
+         t1 = newTemp(cb);
+         if (epartIsReg(modrm)) {
+            eip++;
+            uInstr1(cb, CC2VAL, 1, TempReg, t1);
+            uCond(cb, (Condcode)(opc-0x90));
+            uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+            uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, eregOfRM(modrm));
+            if (dis) VG_(printf)("set%s %s\n", 
+                                 VG_(nameCondcode)(opc-0x90), 
+                                 nameIReg(1,eregOfRM(modrm)));
+         } else {
+            pair = disAMode ( cb, eip, dis?dis_buf:NULL );
+            t2 = LOW24(pair);
+            eip += HI8(pair);
+            uInstr1(cb, CC2VAL, 1, TempReg, t1);
+            uCond(cb, (Condcode)(opc-0x90));
+            uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+            uInstr2(cb, STORE, 1, TempReg, t1, TempReg, t2);
+            SMC_IF_ALL(cb);
+            if (dis) VG_(printf)("set%s %s\n", 
+                                 VG_(nameCondcode)(opc-0x90), 
+                                 dis_buf);
+         }
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */
+
+      case 0xA4: /* SHLDv imm8,Gv,Ev */
+         modrm = getUChar(eip);
+         eip = dis_SHLRD_Gv_Ev ( 
+                  cb, eip, modrm, sz, 
+                  Literal, getUChar(eip + lengthAMode(eip)),
+                  True );
+         break;
+      case 0xA5: /* SHLDv %cl,Gv,Ev */
+         modrm = getUChar(eip);
+         eip = dis_SHLRD_Gv_Ev ( 
+                  cb, eip, modrm, sz, ArchReg, R_CL, True );
+         break;
+
+      case 0xAC: /* SHRDv imm8,Gv,Ev */
+         modrm = getUChar(eip);
+         eip = dis_SHLRD_Gv_Ev ( 
+                  cb, eip, modrm, sz, 
+                  Literal, getUChar(eip + lengthAMode(eip)),
+                  False );
+         break;
+      case 0xAD: /* SHRDv %cl,Gv,Ev */
+         modrm = getUChar(eip);
+         eip = dis_SHLRD_Gv_Ev ( 
+                  cb, eip, modrm, sz, ArchReg, R_CL, False );
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
+
+      case 0xC1: /* XADD Gv,Ev */
+         eip = dis_xadd_G_E ( cb, sz, eip );
+         break;
+
+      /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */
+
+      default:
+         VG_(printf)("disInstr: unhandled 2-byte opcode 0x%x\n", 
+                     (UInt)opc);
+         VG_(unimplemented)("unhandled x86 0x0F 2-byte opcode");
+      }
+
+      break;
+   }
+
+   /* ------------------------ ??? ------------------------ */
+
+   default:
+      VG_(printf)("disInstr: unhandled opcode 0x%x then 0x%x\n", 
+                  (UInt)opc, (UInt)getUChar(eip));
+      VG_(panic)("unhandled x86 opcode");
+   }
+
+   if (dis)
+      VG_(printf)("\n");
+   for (; first_uinstr < cb->used; first_uinstr++) {
+      Bool sane = VG_(saneUInstr)(True, &cb->instrs[first_uinstr]);
+      if (dis || !sane) 
+         VG_(ppUInstr)(sane ? first_uinstr : -1,
+                       &cb->instrs[first_uinstr]);
+      vg_assert(sane);
+   }
+
+   return eip;
+}
+
+
+/* Disassemble a complete basic block, starting at eip, and dumping
+   the ucode into cb.  Returns the size, in bytes, of the basic
+   block. */
+
+Int VG_(disBB) ( UCodeBlock* cb, Addr eip0 )
+{
+   Addr eip   = eip0;
+   Bool isEnd = False;
+   Bool block_sane;
+   if (dis) VG_(printf)("\n");
+
+   if (VG_(clo_single_step)) {
+      eip = disInstr ( cb, eip, &isEnd );
+      uInstr1(cb, JMP, 0, Literal, 0);
+      uLiteral(cb, eip);
+      uCond(cb, CondAlways);
+      if (dis) VG_(ppUInstr)(cb->used-1, &cb->instrs[cb->used-1]);
+   } else {
+      Int delta = 0;
+      Addr eip2;
+      while (True) {
+         if (isEnd) break;
+         eip2 = disInstr ( cb, eip, &isEnd );
+         delta += (eip2 - eip);
+         eip = eip2;
+         if (delta > 4 && !isEnd) {
+            uInstr1(cb, INCEIP, 0, Lit16, delta);
+            if (dis) VG_(ppUInstr)(cb->used-1, &cb->instrs[cb->used-1]);
+            delta = 0;
+         }
+         /* Split up giant basic blocks into pieces, so the
+            translations fall within 64k. */
+         if (eip - eip0 > 2000) {
+            if (VG_(clo_verbosity) > 0)
+               VG_(message)(Vg_DebugMsg, 
+                  "Warning: splitting giant basic block into pieces");
+            uInstr1(cb, JMP, 0, Literal, 0);
+            uLiteral(cb, eip);
+            uCond(cb, CondAlways);
+            if (dis) VG_(ppUInstr)(cb->used-1, &cb->instrs[cb->used-1]);
+            if (dis) VG_(printf)("\n");
+            break;
+         }
+         if (dis) VG_(printf)("\n");
+      }
+   }
+
+   block_sane = VG_(saneUCodeBlock)(cb);
+   if (!block_sane) {
+      VG_(ppUCodeBlock)(cb, "block failing sanity check");
+      vg_assert(block_sane);
+   }
+
+   return eip - eip0;
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_to_ucode.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_translate.c b/vg_translate.c
new file mode 100644
index 000000000..430aebab8
--- /dev/null
+++ b/vg_translate.c
@@ -0,0 +1,3096 @@
+
+/*--------------------------------------------------------------------*/
+/*--- The JITter proper: register allocation & code improvement    ---*/
+/*---                                               vg_translate.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Renamings of frequently-used global functions.       ---*/
+/*------------------------------------------------------------*/
+
+#define uInstr1   VG_(newUInstr1)
+#define uInstr2   VG_(newUInstr2)
+#define uInstr3   VG_(newUInstr3)
+#define dis       VG_(disassemble)
+#define nameIReg  VG_(nameOfIntReg)
+#define nameISize VG_(nameOfIntSize)
+#define uLiteral  VG_(setLiteralField)
+#define newTemp   VG_(getNewTemp)
+#define newShadow VG_(getNewShadow)
+
+
+/*------------------------------------------------------------*/
+/*--- Memory management for the translater.                ---*/
+/*------------------------------------------------------------*/
+
+#define N_JITBLOCKS    4
+#define N_JITBLOCK_SZ  5000
+
+static UChar jitstorage[N_JITBLOCKS][N_JITBLOCK_SZ];
+static Bool  jitstorage_inuse[N_JITBLOCKS];
+static Bool  jitstorage_initdone = False;
+
+static __inline__ void jitstorage_initialise ( void )
+{
+   Int i;
+   if (jitstorage_initdone) return;
+   jitstorage_initdone = True;
+   for (i = 0; i < N_JITBLOCKS; i++)
+      jitstorage_inuse[i] = False; 
+}
+
+void* VG_(jitmalloc) ( Int nbytes )
+{
+   Int i;
+   jitstorage_initialise();
+   if (nbytes > N_JITBLOCK_SZ) {
+      /* VG_(printf)("too large: %d\n", nbytes); */
+      return VG_(malloc)(VG_AR_PRIVATE, nbytes);
+   }
+   for (i = 0; i < N_JITBLOCKS; i++) {
+      if (!jitstorage_inuse[i]) {
+         jitstorage_inuse[i] = True;
+         /* VG_(printf)("alloc %d -> %d\n", nbytes, i ); */
+         return & jitstorage[i][0];
+      }
+   }
+   VG_(panic)("out of slots in vg_jitmalloc\n");
+   return VG_(malloc)(VG_AR_PRIVATE, nbytes);
+}
+
+void VG_(jitfree) ( void* ptr )
+{
+   Int i;
+   jitstorage_initialise();
+   for (i = 0; i < N_JITBLOCKS; i++) {
+      if (ptr == & jitstorage[i][0]) {
+         vg_assert(jitstorage_inuse[i]);
+         jitstorage_inuse[i] = False;
+         return;
+      }
+   }
+   VG_(free)(VG_AR_PRIVATE, ptr);
+}
+
+/*------------------------------------------------------------*/
+/*--- Basics                                               ---*/
+/*------------------------------------------------------------*/
+
+static UCodeBlock* allocCodeBlock ( void )
+{
+   UCodeBlock* cb = VG_(malloc)(VG_AR_PRIVATE, sizeof(UCodeBlock));
+   cb->used = cb->size = cb->nextTemp = 0;
+   cb->instrs = NULL;
+   return cb;
+}
+
+
+static void freeCodeBlock ( UCodeBlock* cb )
+{
+   if (cb->instrs) VG_(free)(VG_AR_PRIVATE, cb->instrs);
+   VG_(free)(VG_AR_PRIVATE, cb);
+}
+
+
+/* Ensure there's enough space in a block to add one uinstr. */
+static __inline__
+void ensureUInstr ( UCodeBlock* cb )
+{
+   if (cb->used == cb->size) {
+      if (cb->instrs == NULL) {
+         vg_assert(cb->size == 0);
+         vg_assert(cb->used == 0);
+         cb->size = 8;
+         cb->instrs = VG_(malloc)(VG_AR_PRIVATE, 8 * sizeof(UInstr));
+      } else {
+         Int i;
+         UInstr* instrs2 = VG_(malloc)(VG_AR_PRIVATE, 
+                                       2 * sizeof(UInstr) * cb->size);
+         for (i = 0; i < cb->used; i++)
+            instrs2[i] = cb->instrs[i];
+         cb->size *= 2;
+         VG_(free)(VG_AR_PRIVATE, cb->instrs);
+         cb->instrs = instrs2;
+      }
+   }
+
+   vg_assert(cb->used < cb->size);
+}
+
+
+__inline__ 
+void VG_(emptyUInstr) ( UInstr* u )
+{
+   u->val1 = u->val2 = u->val3 = 0;
+   u->tag1 = u->tag2 = u->tag3 = NoValue;
+   u->flags_r = u->flags_w = FlagsEmpty;
+   u->call_dispatch = False;
+   u->smc_check = u->signed_widen = u->ret_dispatch = False;
+   u->lit32    = 0;
+   u->opcode   = 0;
+   u->size     = 0;
+   u->cond     = 0;
+   u->extra4b  = 0;
+}
+
+
+/* Add an instruction to a ucode block, and return the index of the
+   instruction. */
+__inline__
+void VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
+                       Tag tag1, UInt val1,
+                       Tag tag2, UInt val2,
+                       Tag tag3, UInt val3 )
+{
+   UInstr* ui;
+   ensureUInstr(cb);
+   ui = & cb->instrs[cb->used];
+   cb->used++;
+   VG_(emptyUInstr)(ui);
+   ui->val1   = val1;
+   ui->val2   = val2;
+   ui->val3   = val3;
+   ui->opcode = opcode;
+   ui->tag1   = tag1;
+   ui->tag2   = tag2;
+   ui->tag3   = tag3;
+   ui->size   = sz;
+   if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
+   if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
+   if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG);
+}
+
+
+__inline__
+void VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
+                       Tag tag1, UInt val1,
+                       Tag tag2, UInt val2 )
+{
+   UInstr* ui;
+   ensureUInstr(cb);
+   ui = & cb->instrs[cb->used];
+   cb->used++;
+   VG_(emptyUInstr)(ui);
+   ui->val1   = val1;
+   ui->val2   = val2;
+   ui->opcode = opcode;
+   ui->tag1   = tag1;
+   ui->tag2   = tag2;
+   ui->size   = sz;
+   if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
+   if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
+}
+
+
+__inline__
+void VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
+                       Tag tag1, UInt val1 )
+{
+   UInstr* ui;
+   ensureUInstr(cb);
+   ui = & cb->instrs[cb->used];
+   cb->used++;
+   VG_(emptyUInstr)(ui);
+   ui->val1   = val1;
+   ui->opcode = opcode;
+   ui->tag1   = tag1;
+   ui->size   = sz;
+   if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
+}
+
+
+__inline__
+void VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz )
+{
+   UInstr* ui;
+   ensureUInstr(cb);
+   ui = & cb->instrs[cb->used];
+   cb->used++;
+   VG_(emptyUInstr)(ui);
+   ui->opcode = opcode;
+   ui->size   = sz;
+}
+
+
+/* Copy an instruction into the given codeblock. */
+static __inline__ 
+void copyUInstr ( UCodeBlock* cb, UInstr* instr )
+{
+   ensureUInstr(cb);
+   cb->instrs[cb->used] = *instr;
+   cb->used++;
+}
+
+
+/* Copy auxiliary info from one uinstr to another. */
+static __inline__ 
+void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
+{
+   dst->cond          = src->cond;
+   dst->extra4b       = src->extra4b;
+   dst->smc_check     = src->smc_check;
+   dst->signed_widen  = src->signed_widen;
+   dst->ret_dispatch  = src->ret_dispatch;
+   dst->call_dispatch = src->call_dispatch;
+   dst->flags_r       = src->flags_r;
+   dst->flags_w       = src->flags_w;
+}
+
+
+/* Set the flag R/W sets on a uinstr. */
+void VG_(setFlagRW) ( UInstr* u, FlagSet fr, FlagSet fw )
+{
+   /* VG_(ppUInstr)(-1,u); */
+   vg_assert(fr == (fr & FlagsALL));
+   vg_assert(fw == (fw & FlagsALL));
+   u->flags_r = fr;
+   u->flags_w = fw;
+}
+
+
+/* Set the lit32 field of the most recent uinsn. */
+void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 )
+{
+   LAST_UINSTR(cb).lit32 = lit32;
+}
+
+
+Bool VG_(anyFlagUse) ( UInstr* u )
+{
+   return (u->flags_r != FlagsEmpty 
+           || u->flags_w != FlagsEmpty);
+}
+
+
+
+
+/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
+   register number.  This effectively defines the order in which real
+   registers are allocated.  %ebp is excluded since it is permanently
+   reserved for pointing at VG_(baseBlock).  %edi is a general spare
+   temp used for Left4 and various misc tag ops.
+
+   Important!  If you change the set of allocatable registers from
+   %eax, %ebx, %ecx, %edx, %esi you must change the
+   save/restore sequences in vg_helper_smc_check4 to match!  
+*/
+__inline__ Int VG_(rankToRealRegNo) ( Int rank )
+{
+   switch (rank) {
+#     if 1
+      /* Probably the best allocation ordering. */
+      case 0: return R_EAX;
+      case 1: return R_EBX;
+      case 2: return R_ECX;
+      case 3: return R_EDX;
+      case 4: return R_ESI;
+#     else
+      /* Contrary; probably the worst.  Helpful for debugging, tho. */
+      case 4: return R_EAX;
+      case 3: return R_EBX;
+      case 2: return R_ECX;
+      case 1: return R_EDX;
+      case 0: return R_ESI;
+#     endif
+      default: VG_(panic)("rankToRealRegNo");
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Sanity checking uinstrs.                             ---*/
+/*------------------------------------------------------------*/
+
+/* This seems as good a place as any to record some important stuff
+   about ucode semantics.
+
+   * TempRegs are 32 bits wide.  LOADs of 8/16 bit values into a
+     TempReg are defined to zero-extend the loaded value to 32 bits.
+     This is needed to make the translation of movzbl et al work
+     properly.
+
+   * Similarly, GETs of a 8/16 bit ArchRegs are zero-extended.
+
+   * Arithmetic on TempRegs is at the specified size.  For example,
+     SUBW t1, t2 has to result in a real 16 bit x86 subtraction 
+     being emitted -- not a 32 bit one.
+
+   * On some insns we allow the cc bit to be set.  If so, the
+     intention is that the simulated machine's %eflags register
+     is copied into that of the real machine before the insn,
+     and copied back again afterwards.  This means that the 
+     code generated for that insn must be very careful only to
+     update %eflags in the intended way.  This is particularly
+     important for the routines referenced by CALL insns.
+*/
+
+/* Meaning of operand kinds is as follows:
+
+   ArchReg  is a register of the simulated CPU, stored in memory,
+            in vg_m_state.m_eax .. m_edi.  These values are stored
+            using the Intel register encoding.
+
+   RealReg  is a register of the real CPU.  There are VG_MAX_REALREGS
+            available for allocation.  As with ArchRegs, these values
+            are stored using the Intel register encoding.
+
+   TempReg  is a temporary register used to express the results of
+            disassembly.  There is an unlimited supply of them -- 
+            register allocation and spilling eventually assigns them 
+            to RealRegs.
+
+   SpillNo  is a spill slot number.  The number of required spill
+            slots is VG_MAX_PSEUDOS, in general.  Only allowed 
+            as the ArchReg operand of GET and PUT.
+
+   Lit16    is a signed 16-bit literal value.
+
+   Literal  is a 32-bit literal value.  Each uinstr can only hold
+            one of these.
+
+   The disassembled code is expressed purely in terms of ArchReg,
+   TempReg and Literal operands.  Eventually, register allocation
+   removes all the TempRegs, giving a result using ArchRegs, RealRegs,
+   and Literals.  New x86 code can easily be synthesised from this.
+   There are carefully designed restrictions on which insns can have
+   which operands, intended to make it possible to generate x86 code
+   from the result of register allocation on the ucode efficiently and
+   without need of any further RealRegs.
+
+   Restrictions on insns (as generated by the disassembler) are as
+   follows:
+
+      A=ArchReg   S=SpillNo   T=TempReg   L=Literal   R=RealReg
+      N=NoValue
+
+         GETF       T       N       N
+         PUTF       T       N       N
+
+         GET        A,S     T       N
+         PUT        T       A,S     N
+         LOAD       T       T       N
+         STORE      T       T       N
+         MOV        T,L     T       N
+         CMOV       T       T       N
+         WIDEN      T       N       N
+         JMP        T,L     N       N
+         CALLM      L       N       N
+         CALLM_S    N       N       N
+         CALLM_E    N       N       N
+         PUSH,POP   T       N       N
+         CLEAR      L       N       N
+
+         AND, OR
+                    T       T       N
+
+         ADD, ADC, XOR, SUB, SBB
+                    A,L,T   T       N
+
+         SHL, SHR, SAR, ROL, ROR, RCL, RCR
+                    L,T     T       N
+
+         NOT, NEG, INC, DEC, CC2VAL, BSWAP
+                    T       N       N
+
+         JIFZ       T       L       N
+
+         FPU_R      L       T       N
+         FPU_W      L       T       N
+         FPU        L       T       N
+
+         LEA1       T       T   (const in a seperate field)
+         LEA2       T       T       T   (const & shift ditto)
+
+         INCEIP     L       N       N
+ 
+   and for instrumentation insns:
+
+         LOADV      T       T       N
+         STOREV     T,L     T       N
+         GETV       A       T       N
+         PUTV       T,L     A       N
+         GETVF      T       N       N
+         PUTVF      T       N       N
+         WIDENV     T       N       N
+         TESTV      A,T     N       N
+         SETV       A,T     N       N
+         TAG1       T       N       N
+         TAG2       T       T       N
+
+   Before register allocation, S operands should not appear anywhere.
+   After register allocation, all T operands should have been
+   converted into Rs, and S operands are allowed in GET and PUT --
+   denoting spill saves/restores.  
+
+   The size field should be 0 for insns for which it is meaningless,
+   ie those which do not directly move/operate on data.
+*/
+Bool VG_(saneUInstr) ( Bool beforeRA, UInstr* u )
+{
+#  define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
+#  define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
+#  define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
+#  define A1  (u->tag1 == ArchReg)
+#  define A2  (u->tag2 == ArchReg)
+#  define AS1 ((u->tag1 == ArchReg) || ((!beforeRA && (u->tag1 == SpillNo))))
+#  define AS2 ((u->tag2 == ArchReg) || ((!beforeRA && (u->tag2 == SpillNo))))
+#  define AS3 ((u->tag3 == ArchReg) || ((!beforeRA && (u->tag3 == SpillNo))))
+#  define L1  (u->tag1 == Literal && u->val1 == 0)
+#  define L2  (u->tag2 == Literal && u->val2 == 0)
+#  define Ls1 (u->tag1 == Lit16)
+#  define Ls3 (u->tag3 == Lit16)
+#  define N1  (u->tag1 == NoValue)
+#  define N2  (u->tag2 == NoValue)
+#  define N3  (u->tag3 == NoValue)
+#  define SZ4 (u->size == 4)
+#  define SZ2 (u->size == 2)
+#  define SZ1 (u->size == 1)
+#  define SZ0 (u->size == 0)
+#  define CC0 (u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty)
+#  define FLG_RD (u->flags_r == FlagsALL && u->flags_w == FlagsEmpty)
+#  define FLG_WR (u->flags_r == FlagsEmpty && u->flags_w == FlagsALL)
+#  define CC1 (!(CC0))
+#  define SZ4_IF_TR1 ((u->tag1 == TempReg || u->tag1 == RealReg) \
+                      ? (u->size == 4) : True)
+
+   Int n_lits = 0;
+   if (u->tag1 == Literal) n_lits++;
+   if (u->tag2 == Literal) n_lits++;
+   if (u->tag3 == Literal) n_lits++;
+   if (n_lits > 1) 
+      return False;
+
+   switch (u->opcode) {
+      case GETF:
+	return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_RD;
+      case PUTF:
+	return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_WR;
+      case CALLM_S: case CALLM_E:
+         return SZ0 && N1 && N2 && N3;
+      case INCEIP:
+         return SZ0 && CC0 && Ls1 && N2 && N3;
+      case LEA1:
+         return CC0 && TR1 && TR2 && N3 && SZ4;
+      case LEA2:
+         return CC0 && TR1 && TR2 && TR3 && SZ4;
+      case NOP: 
+         return SZ0 && CC0 && N1 && N2 && N3;
+      case GET: 
+         return CC0 && AS1 && TR2 && N3;
+      case PUT: 
+         return CC0 && TR1 && AS2 && N3;
+      case LOAD: case STORE: 
+         return CC0 && TR1 && TR2 && N3;
+      case MOV:
+         return CC0 && (TR1 || L1) && TR2 && N3 && SZ4_IF_TR1;
+      case CMOV:
+         return CC1 && TR1 && TR2 && N3 && SZ4;
+      case JMP: 
+         return (u->cond==CondAlways ? CC0 : CC1)
+                && (TR1 || L1) && N2 && SZ0 && N3;
+      case CLEAR:
+         return CC0 && Ls1 && N2 && SZ0 && N3;
+      case CALLM:
+         return SZ0 && Ls1 && N2 && N3;
+      case PUSH: case POP:
+         return CC0 && TR1 && N2 && N3;
+      case AND: case OR:
+         return TR1 && TR2 && N3;
+      case ADD: case ADC: case XOR: case SUB: case SBB:
+         return (A1 || TR1 || L1) && TR2 && N3;
+      case SHL: case SHR: case SAR: case ROL: case ROR: case RCL: case RCR:
+         return       (TR1 || L1) && TR2 && N3;
+      case NOT: case NEG: case INC: case DEC:
+         return        TR1 && N2 && N3;
+      case BSWAP:
+         return TR1 && N2 && N3 && CC0 && SZ4;
+      case CC2VAL: 
+         return CC1 && SZ1 && TR1 && N2 && N3;
+      case JIFZ:
+         return CC0 && SZ4 && TR1 && L2 && N3;
+      case FPU_R:  case FPU_W: 
+         return CC0 && Ls1 && TR2 && N3;
+      case FPU: 
+         return SZ0 && CC0 && Ls1 && N2 && N3;
+      case LOADV:
+         return CC0 && TR1 && TR2 && N3;
+      case STOREV:
+         return CC0 && (TR1 || L1) && TR2 && N3;
+      case GETV: 
+         return CC0 && A1 && TR2 && N3;
+      case PUTV: 
+         return CC0 && (TR1 || L1) && A2 && N3;
+      case GETVF: 
+         return CC0 && TR1 && N2 && N3 && SZ0;
+      case PUTVF: 
+         return CC0 && TR1 && N2 && N3 && SZ0;
+      case WIDEN:
+         return CC0 && TR1 && N2 && N3;
+      case TESTV: 
+         return CC0 && (A1 || TR1) && N2 && N3;
+      case SETV:
+         return CC0 && (A1 || TR1) && N2 && N3;
+      case TAG1:
+         return CC0 && TR1 && N2 && Ls3 && SZ0;
+      case TAG2:
+         return CC0 && TR1 && TR2 && Ls3 && SZ0;
+      default: 
+         VG_(panic)("vg_saneUInstr: unhandled opcode");
+   }
+#  undef SZ4_IF_TR1
+#  undef CC0
+#  undef CC1
+#  undef SZ4
+#  undef SZ2
+#  undef SZ1
+#  undef SZ0
+#  undef TR1
+#  undef TR2
+#  undef TR3
+#  undef A1
+#  undef A2
+#  undef AS1
+#  undef AS2
+#  undef AS3
+#  undef L1
+#  undef Ls1
+#  undef L2
+#  undef Ls3
+#  undef N1
+#  undef N2
+#  undef N3
+#  undef FLG_RD
+#  undef FLG_WR
+}
+
+
+/* Sanity checks to do with CALLMs in UCodeBlocks. */
+Bool VG_(saneUCodeBlock) ( UCodeBlock* cb )
+{
+   Int  callm = 0;
+   Int  callm_s = 0;
+   Int  callm_e = 0;
+   Int  callm_ptr, calls_ptr;
+   Int  i, j, t;
+   Bool incall = False;
+
+   /* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */
+
+   for (i = 0; i < cb->used; i++) {
+      switch (cb->instrs[i].opcode) {
+         case CALLM:
+            if (!incall) return False;
+            callm++; 
+            break;
+         case CALLM_S: 
+            if (incall) return False;
+            incall = True;
+            callm_s++; 
+            break;
+         case CALLM_E: 
+            if (!incall) return False;
+            incall = False;
+            callm_e++; 
+            break;
+         case PUSH: case POP: case CLEAR:
+            if (!incall) return False;
+            break;
+         default:
+            break;
+      }
+   }
+   if (incall) return False;
+   if (callm != callm_s || callm != callm_e) return False;
+
+   /* Check the sections between CALLM_S and CALLM's.  Ensure that no
+      PUSH uinsn pushes any TempReg that any other PUSH in the same
+      section pushes.  Ie, check that the TempReg args to PUSHes in
+      the section are unique.  If not, the instrumenter generates
+      incorrect code for CALLM insns. */
+
+   callm_ptr = 0;
+
+ find_next_CALLM:
+   /* Search for the next interval, making calls_ptr .. callm_ptr
+      bracket it. */
+   while (callm_ptr < cb->used 
+          && cb->instrs[callm_ptr].opcode != CALLM)
+      callm_ptr++;
+   if (callm_ptr == cb->used)
+      return True;
+   vg_assert(cb->instrs[callm_ptr].opcode == CALLM);
+
+   calls_ptr = callm_ptr - 1;
+   while (cb->instrs[calls_ptr].opcode != CALLM_S)
+      calls_ptr--;
+   vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S);
+   vg_assert(calls_ptr >= 0);
+
+   /* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */
+
+   /* For each PUSH insn in the interval ... */
+   for (i = calls_ptr + 1; i < callm_ptr; i++) {
+      if (cb->instrs[i].opcode != PUSH) continue;
+      t = cb->instrs[i].val1;
+      /* Ensure no later PUSH insns up to callm_ptr push the same
+         TempReg.  Return False if any such are found. */
+      for (j = i+1; j < callm_ptr; j++) {
+         if (cb->instrs[j].opcode == PUSH &&
+             cb->instrs[j].val1 == t)
+            return False;
+      }
+   }
+
+   /* This interval is clean.  Keep going ... */
+   callm_ptr++;
+   goto find_next_CALLM;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Printing uinstrs.                                    ---*/
+/*------------------------------------------------------------*/
+
+Char* VG_(nameCondcode) ( Condcode cond )
+{
+   switch (cond) {
+      case CondO:      return "o";
+      case CondNO:     return "no";
+      case CondB:      return "b";
+      case CondNB:     return "nb";
+      case CondZ:      return "z";
+      case CondNZ:     return "nz";
+      case CondBE:     return "be";
+      case CondNBE:    return "nbe";
+      case CondS:      return "s";
+      case ConsNS:     return "ns";
+      case CondP:      return "p";
+      case CondNP:     return "np";
+      case CondL:      return "l";
+      case CondNL:     return "nl";
+      case CondLE:     return "le";
+      case CondNLE:    return "nle";
+      case CondAlways: return "MP"; /* hack! */
+      default: VG_(panic)("nameCondcode");
+   }
+}
+
+
+static void vg_ppFlagSet ( Char* prefix, FlagSet set )
+{
+   VG_(printf)("%s", prefix);
+   if (set & FlagD) VG_(printf)("D");
+   if (set & FlagO) VG_(printf)("O");
+   if (set & FlagS) VG_(printf)("S");
+   if (set & FlagZ) VG_(printf)("Z");
+   if (set & FlagA) VG_(printf)("A");
+   if (set & FlagC) VG_(printf)("C");
+   if (set & FlagP) VG_(printf)("P");
+}
+
+
+static void ppTempReg ( Int tt )
+{
+   if ((tt & 1) == 0)
+      VG_(printf)("t%d", tt);
+   else
+      VG_(printf)("q%d", tt-1);
+}
+
+
+static void ppUOperand ( UInstr* u, Int operandNo, Int sz, Bool parens )
+{
+   UInt tag, val;
+   switch (operandNo) {
+      case 1: tag = u->tag1; val = u->val1; break;
+      case 2: tag = u->tag2; val = u->val2; break;
+      case 3: tag = u->tag3; val = u->val3; break;
+      default: VG_(panic)("ppUOperand(1)");
+   }
+   if (tag == Literal) val = u->lit32;
+
+   if (parens) VG_(printf)("(");
+   switch (tag) {
+      case TempReg: ppTempReg(val); break;
+      case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break;
+      case Literal: VG_(printf)("$0x%x", val); break;
+      case Lit16:   VG_(printf)("$0x%x", val); break;
+      case NoValue: VG_(printf)("NoValue"); break;
+      case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
+      case SpillNo: VG_(printf)("spill%d", val); break;
+      default: VG_(panic)("ppUOperand(2)");
+   }
+   if (parens) VG_(printf)(")");
+}
+
+
+Char* VG_(nameUOpcode) ( Bool upper, Opcode opc )
+{
+   switch (opc) {
+      case ADD:   return (upper ? "ADD" : "add");
+      case ADC:   return (upper ? "ADC" : "adc");
+      case AND:   return (upper ? "AND" : "and");
+      case OR:    return (upper ? "OR"  : "or");
+      case XOR:   return (upper ? "XOR" : "xor");
+      case SUB:   return (upper ? "SUB" : "sub");
+      case SBB:   return (upper ? "SBB" : "sbb");
+      case SHL:   return (upper ? "SHL" : "shl");
+      case SHR:   return (upper ? "SHR" : "shr");
+      case SAR:   return (upper ? "SAR" : "sar");
+      case ROL:   return (upper ? "ROL" : "rol");
+      case ROR:   return (upper ? "ROR" : "ror");
+      case RCL:   return (upper ? "RCL" : "rcl");
+      case RCR:   return (upper ? "RCR" : "rcr");
+      case NOT:   return (upper ? "NOT" : "not");
+      case NEG:   return (upper ? "NEG" : "neg");
+      case INC:   return (upper ? "INC" : "inc");
+      case DEC:   return (upper ? "DEC" : "dec");
+      case BSWAP: return (upper ? "BSWAP" : "bswap");
+      default:    break;
+   }
+   if (!upper) VG_(panic)("vg_nameUOpcode: invalid !upper");
+   switch (opc) {
+      case GETVF:   return "GETVF";
+      case PUTVF:   return "PUTVF";
+      case TAG1:    return "TAG1";
+      case TAG2:    return "TAG2";
+      case CALLM_S: return "CALLM_S";
+      case CALLM_E: return "CALLM_E";
+      case INCEIP:  return "INCEIP";
+      case LEA1:    return "LEA1";
+      case LEA2:    return "LEA2";
+      case NOP:     return "NOP";
+      case GET:     return "GET";
+      case PUT:     return "PUT";
+      case GETF:    return "GETF";
+      case PUTF:    return "PUTF";
+      case LOAD:    return "LD" ;
+      case STORE:   return "ST" ;
+      case MOV:     return "MOV";
+      case CMOV:    return "CMOV";
+      case WIDEN:   return "WIDEN";
+      case JMP:     return "J"    ;
+      case JIFZ:    return "JIFZ" ;
+      case CALLM:   return "CALLM";
+      case PUSH:    return "PUSH" ;
+      case POP:     return "POP"  ;
+      case CLEAR:   return "CLEAR";
+      case CC2VAL:  return "CC2VAL";
+      case FPU_R:   return "FPU_R";
+      case FPU_W:   return "FPU_W";
+      case FPU:     return "FPU"  ;
+      case LOADV:   return "LOADV";
+      case STOREV:  return "STOREV";
+      case GETV:    return "GETV";
+      case PUTV:    return "PUTV";
+      case TESTV:   return "TESTV";
+      case SETV:    return "SETV";
+      default:      VG_(panic)("nameUOpcode: unhandled case");
+   }
+}
+
+
+void VG_(ppUInstr) ( Int instrNo, UInstr* u )
+{
+   VG_(printf)("\t%4d: %s", instrNo, 
+                            VG_(nameUOpcode)(True, u->opcode));
+   if (u->opcode == JMP || u->opcode == CC2VAL)
+      VG_(printf)("%s", VG_(nameCondcode(u->cond)));
+
+   switch (u->size) {
+      case 0:  VG_(printf)("o"); break;
+      case 1:  VG_(printf)("B"); break;
+      case 2:  VG_(printf)("W"); break;
+      case 4:  VG_(printf)("L"); break;
+      case 8:  VG_(printf)("Q"); break;
+      default: VG_(printf)("%d", (Int)u->size); break;
+   }
+
+   switch (u->opcode) {
+
+      case TAG1:
+         VG_(printf)("\t");
+         ppUOperand(u, 1, 4, False);
+         VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
+         ppUOperand(u, 1, 4, False);
+         VG_(printf)(" )");
+         break;
+
+      case TAG2:
+         VG_(printf)("\t");
+         ppUOperand(u, 2, 4, False);
+         VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
+         ppUOperand(u, 1, 4, False);
+         VG_(printf)(", ");
+         ppUOperand(u, 2, 4, False);
+         VG_(printf)(" )");
+         break;
+
+      case CALLM_S: case CALLM_E:
+         break;
+
+      case INCEIP:
+         VG_(printf)("\t$%d", u->val1);
+         break;
+
+      case LEA2:
+         VG_(printf)("\t%d(" , u->lit32);
+         ppUOperand(u, 1, 4, False);
+         VG_(printf)(",");
+         ppUOperand(u, 2, 4, False);
+         VG_(printf)(",%d), ", (Int)u->extra4b);
+         ppUOperand(u, 3, 4, False);
+         break;
+
+      case LEA1:
+         VG_(printf)("\t%d" , u->lit32);
+         ppUOperand(u, 1, 4, True);
+         VG_(printf)(", ");
+         ppUOperand(u, 2, 4, False);
+         break;
+
+      case NOP:
+         break;
+
+      case FPU_W:
+         VG_(printf)("\t0x%x:0x%x, ",
+                     (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
+         ppUOperand(u, 2, 4, True);
+         break;
+
+      case FPU_R:
+         VG_(printf)("\t");
+         ppUOperand(u, 2, 4, True);
+         VG_(printf)(", 0x%x:0x%x",
+                     (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
+         break;
+
+      case FPU:
+         VG_(printf)("\t0x%x:0x%x",
+                     (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
+         break;
+
+      case STOREV: case LOADV:
+      case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
+         VG_(printf)("\t");
+         ppUOperand(u, 1, u->size, u->opcode==LOAD || u->opcode==LOADV); 
+         VG_(printf)(", ");
+         ppUOperand(u, 2, u->size, u->opcode==STORE || u->opcode==STOREV);
+         break;
+
+      case GETF: case PUTF:
+         VG_(printf)("\t");
+         ppUOperand(u, 1, u->size, False);
+         break;
+
+      case JMP: case CC2VAL:
+      case PUSH: case POP: case CLEAR: case CALLM:
+         if (u->opcode == JMP && u->ret_dispatch)
+            VG_(printf)("-r");
+         if (u->opcode == JMP && u->call_dispatch)
+            VG_(printf)("-c");
+         VG_(printf)("\t");
+         ppUOperand(u, 1, u->size, False);
+         break;
+
+      case JIFZ:
+         VG_(printf)("\t");
+         ppUOperand(u, 1, u->size, False);
+         VG_(printf)(", ");
+         ppUOperand(u, 2, u->size, False);
+         break;
+
+      case PUTVF: case GETVF:
+         VG_(printf)("\t");
+         ppUOperand(u, 1, 0, False); 
+         break;
+
+      case NOT: case NEG: case INC: case DEC: case BSWAP:
+         VG_(printf)("\t");
+         ppUOperand(u, 1, u->size, False); 
+         break;
+
+      case ADD: case ADC: case AND: case OR:  
+      case XOR: case SUB: case SBB:   
+      case SHL: case SHR: case SAR: 
+      case ROL: case ROR: case RCL: case RCR:   
+         VG_(printf)("\t");
+         ppUOperand(u, 1, u->size, False); 
+         VG_(printf)(", ");
+         ppUOperand(u, 2, u->size, False);
+         break;
+
+      case GETV: case PUTV:
+         VG_(printf)("\t");
+         ppUOperand(u, 1, u->opcode==PUTV ? 4 : u->size, False);
+         VG_(printf)(", ");
+         ppUOperand(u, 2, u->opcode==GETV ? 4 : u->size, False);
+         break;
+
+      case WIDEN:
+         VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
+                              u->signed_widen?'s':'z');
+         VG_(printf)("\t");
+         ppUOperand(u, 1, u->size, False);
+         break;
+
+      case TESTV: case SETV:
+         VG_(printf)("\t");
+         ppUOperand(u, 1, u->size, False);
+         break;
+
+      default: VG_(panic)("ppUInstr: unhandled opcode");
+   }
+
+   if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) {
+      VG_(printf)("  (");
+      if (u->flags_r != FlagsEmpty) 
+         vg_ppFlagSet("-r", u->flags_r);
+      if (u->flags_w != FlagsEmpty) 
+         vg_ppFlagSet("-w", u->flags_w);
+      VG_(printf)(")");
+   }
+   VG_(printf)("\n");
+}
+
+
+void VG_(ppUCodeBlock) ( UCodeBlock* cb, Char* title )
+{
+   Int i;
+   VG_(printf)("\n%s\n", title);
+   for (i = 0; i < cb->used; i++)
+      if (0 || cb->instrs[i].opcode != NOP)
+         VG_(ppUInstr) ( i, &cb->instrs[i] );
+   VG_(printf)("\n");
+}
+
+
+/*------------------------------------------------------------*/
+/*--- uinstr helpers for register allocation               ---*/
+/*--- and code improvement.                                ---*/
+/*------------------------------------------------------------*/
+
+/* A structure for communicating temp uses, and for indicating
+   temp->real register mappings for patchUInstr. */
+typedef
+   struct {
+      Int   realNo;
+      Int   tempNo;
+      Bool  isWrite;
+   }
+   TempUse;
+
+
+/* Get the temp use of a uinstr, parking them in an array supplied by
+   the caller, which is assumed to be big enough.  Return the number
+   of entries.  Insns which read _and_ write a register wind up
+   mentioning it twice.  Entries are placed in the array in program
+   order, so that if a reg is read-modified-written, it appears first
+   as a read and then as a write.  
+*/
+static __inline__ 
+Int getTempUsage ( UInstr* u, TempUse* arr )
+{
+
+#  define RD(ono)                                  \
+      if (mycat(u->tag,ono) == TempReg)            \
+         { arr[n].tempNo  = mycat(u->val,ono);     \
+           arr[n].isWrite = False; n++; }
+#  define WR(ono)                                  \
+      if (mycat(u->tag,ono) == TempReg)            \
+         { arr[n].tempNo  = mycat(u->val,ono);     \
+           arr[n].isWrite = True; n++; }
+
+   Int n = 0;
+   switch (u->opcode) {
+      case LEA1: RD(1); WR(2); break;
+      case LEA2: RD(1); RD(2); WR(3); break;
+
+      case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E: break;
+      case FPU_R: case FPU_W: RD(2); break;
+
+      case GETF:  WR(1); break;
+      case PUTF:  RD(1); break;
+
+      case GET:   WR(2); break;
+      case PUT:   RD(1); break;
+      case LOAD:  RD(1); WR(2); break;
+      case STORE: RD(1); RD(2); break;
+      case MOV:   RD(1); WR(2); break;
+
+      case JMP:   RD(1); break;
+      case CLEAR: case CALLM: break;
+
+      case PUSH: RD(1); break;
+      case POP:  WR(1); break;
+
+      case TAG2:
+      case CMOV:
+      case ADD: case ADC: case AND: case OR:  
+      case XOR: case SUB: case SBB:   
+         RD(1); RD(2); WR(2); break;
+
+      case SHL: case SHR: case SAR: 
+      case ROL: case ROR: case RCL: case RCR:
+         RD(1); RD(2); WR(2); break;
+
+      case NOT: case NEG: case INC: case DEC: case TAG1: case BSWAP:
+         RD(1); WR(1); break;
+
+      case WIDEN: RD(1); WR(1); break;
+
+      case CC2VAL: WR(1); break;
+      case JIFZ: RD(1); break;
+
+      /* These sizes are only ever consulted when the instrumentation
+         code is being added, so the following can return
+         manifestly-bogus sizes. */
+      case LOADV:   RD(1); WR(2); break;
+      case STOREV:  RD(1); RD(2); break;
+      case GETV:    WR(2); break;
+      case PUTV:    RD(1); break;
+      case TESTV:   RD(1); break;
+      case SETV:    WR(1); break;
+      case PUTVF:   RD(1); break;
+      case GETVF:   WR(1); break;
+
+      default: VG_(panic)("getTempUsage: unhandled opcode");
+   }
+   return n;
+
+#  undef RD
+#  undef WR
+}
+
+
+/* Change temp regs in u into real regs, as directed by tmap. */
+static __inline__ 
+void patchUInstr ( UInstr* u, TempUse* tmap, Int n_tmap )
+{
+   Int i;
+   if (u->tag1 == TempReg) {
+      for (i = 0; i < n_tmap; i++)
+         if (tmap[i].tempNo == u->val1) break;
+      if (i == n_tmap) VG_(panic)("patchUInstr(1)");
+      u->tag1 = RealReg;
+      u->val1 = tmap[i].realNo;
+   }
+   if (u->tag2 == TempReg) {
+      for (i = 0; i < n_tmap; i++)
+         if (tmap[i].tempNo == u->val2) break;
+      if (i == n_tmap) VG_(panic)("patchUInstr(2)");
+      u->tag2 = RealReg;
+      u->val2 = tmap[i].realNo;
+   }
+   if (u->tag3 == TempReg) {
+      for (i = 0; i < n_tmap; i++)
+         if (tmap[i].tempNo == u->val3) break;
+      if (i == n_tmap) VG_(panic)("patchUInstr(3)");
+      u->tag3 = RealReg;
+      u->val3 = tmap[i].realNo;
+   }
+}
+
+
+/* Tedious x86-specific hack which compensates for the fact that the
+   register numbers for %ah .. %dh do not correspond to those for %eax
+   .. %edx.  It maps a (reg size, reg no) pair to the number of the
+   containing 32-bit reg. */
+static __inline__ 
+Int containingArchRegOf ( Int sz, Int aregno )
+{
+   switch (sz) {
+      case 4: return aregno;
+      case 2: return aregno;
+      case 1: return aregno >= 4 ? aregno-4 : aregno;
+      default: VG_(panic)("containingArchRegOf");
+   }
+}
+
+
+/* If u reads an ArchReg, return the number of the containing arch
+   reg.  Otherwise return -1.  Used in redundant-PUT elimination. */
+static __inline__ 
+Int maybe_uinstrReadsArchReg ( UInstr* u )
+{
+   switch (u->opcode) {
+      case GET:
+      case ADD: case ADC: case AND: case OR:  
+      case XOR: case SUB: case SBB:   
+      case SHL: case SHR: case SAR: case ROL: 
+      case ROR: case RCL: case RCR:
+         if (u->tag1 == ArchReg) 
+            return containingArchRegOf ( u->size, u->val1 ); 
+         else
+            return -1;
+
+      case GETF: case PUTF:
+      case CALLM_S: case CALLM_E:
+      case INCEIP:
+      case LEA1:
+      case LEA2:
+      case NOP:
+      case PUT:
+      case LOAD:
+      case STORE:
+      case MOV:
+      case CMOV:
+      case JMP:
+      case CALLM: case CLEAR: case PUSH: case POP:
+      case NOT: case NEG: case INC: case DEC: case BSWAP:
+      case CC2VAL:
+      case JIFZ:
+      case FPU: case FPU_R: case FPU_W:
+      case WIDEN:
+         return -1;
+
+      default: 
+         VG_(ppUInstr)(0,u);
+         VG_(panic)("maybe_uinstrReadsArchReg: unhandled opcode");
+   }
+}
+
+static __inline__
+Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
+{
+   Int i, k;
+   TempUse tempUse[3];
+   k = getTempUsage ( u, &tempUse[0] );
+   for (i = 0; i < k; i++)
+      if (tempUse[i].tempNo == tempreg)
+         return True;
+   return False;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- ucode improvement.                                   ---*/
+/*------------------------------------------------------------*/
+
+/* Improve the code in cb by doing
+   -- Redundant ArchReg-fetch elimination
+   -- Redundant PUT elimination
+   -- Redundant cond-code restore/save elimination
+   The overall effect of these is to allow target registers to be
+   cached in host registers over multiple target insns.  
+*/
+static void vg_improve ( UCodeBlock* cb )
+{
+   Int     i, j, k, m, n, ar, tr, told, actual_areg;
+   Int     areg_map[8];
+   Bool    annul_put[8];
+   TempUse tempUse[3];
+   UInstr* u;
+   Bool    wr;
+   Int*    last_live_before;
+   FlagSet future_dead_flags;
+
+   if (cb->nextTemp > 0)
+      last_live_before = VG_(jitmalloc) ( cb->nextTemp * sizeof(Int) );
+   else
+      last_live_before = NULL;
+
+   
+   /* PASS 1: redundant GET elimination.  (Actually, more general than
+      that -- eliminates redundant fetches of ArchRegs). */
+
+   /* Find the live-range-ends for all temporaries.  Duplicates code
+      in the register allocator :-( */
+
+   for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1;
+
+   for (i = cb->used-1; i >= 0; i--) {
+      u = &cb->instrs[i];
+
+      k = getTempUsage(u, &tempUse[0]);
+
+      /* For each temp usage ... bwds in program order. */
+      for (j = k-1; j >= 0; j--) {
+         tr = tempUse[j].tempNo;
+         wr = tempUse[j].isWrite;
+         if (last_live_before[tr] == -1) {
+            vg_assert(tr >= 0 && tr < cb->nextTemp);
+            last_live_before[tr] = wr ? (i+1) : i;
+         }
+      }
+
+   }
+
+#  define BIND_ARCH_TO_TEMP(archreg,tempreg)\
+   { Int q;                                           \
+     /* Invalidate any old binding(s) to tempreg. */  \
+     for (q = 0; q < 8; q++)                          \
+        if (areg_map[q] == tempreg) areg_map[q] = -1; \
+     /* Add the new binding. */                       \
+     areg_map[archreg] = (tempreg);                   \
+   }
+
+   /* Set up the A-reg map. */
+   for (i = 0; i < 8; i++) areg_map[i] = -1;
+
+   /* Scan insns. */
+   for (i = 0; i < cb->used; i++) {
+      u = &cb->instrs[i];
+      if (u->opcode == GET && u->size == 4) {
+         /* GET; see if it can be annulled. */
+         vg_assert(u->tag1 == ArchReg);
+         vg_assert(u->tag2 == TempReg);
+         ar   = u->val1;
+         tr   = u->val2;
+         told = areg_map[ar];
+         if (told != -1 && last_live_before[told] <= i) {
+            /* ar already has an old mapping to told, but that runs
+               out here.  Annul this GET, rename tr to told for the
+               rest of the block, and extend told's live range to that
+               of tr.  */
+            u->opcode = NOP;
+            u->tag1 = u->tag2 = NoValue;
+            n = last_live_before[tr] + 1;
+            if (n > cb->used) n = cb->used;
+            last_live_before[told] = last_live_before[tr];
+            last_live_before[tr] = i-1;
+            if (VG_(disassemble))
+               VG_(printf)(
+                  "at %d: delete GET, rename t%d to t%d in (%d .. %d)\n", 
+                  i, tr, told,i+1, n-1);
+            for (m = i+1; m < n; m++) {
+               if (cb->instrs[m].tag1 == TempReg 
+                   && cb->instrs[m].val1 == tr) 
+                 cb->instrs[m].val1 = told;
+               if (cb->instrs[m].tag2 == TempReg 
+                   && cb->instrs[m].val2 == tr) 
+                 cb->instrs[m].val2 = told;
+            }
+            BIND_ARCH_TO_TEMP(ar,told);
+         }
+         else
+            BIND_ARCH_TO_TEMP(ar,tr);
+      }
+      else if (u->opcode == GET && u->size != 4) {
+         /* Invalidate any mapping for this archreg.  */
+         actual_areg = containingArchRegOf ( u->size, u->val1 );
+         areg_map[actual_areg] = -1;
+      } 
+      else if (u->opcode == PUT && u->size == 4) {
+         /* PUT; re-establish t -> a binding */
+         vg_assert(u->tag1 == TempReg);
+         vg_assert(u->tag2 == ArchReg);
+         BIND_ARCH_TO_TEMP(u->val2, u->val1);
+      }
+      else if (u->opcode == PUT && u->size != 4) {
+         /* Invalidate any mapping for this archreg. */
+         actual_areg = containingArchRegOf ( u->size, u->val2 );
+         areg_map[actual_areg] = -1;
+      } else {
+
+         /* see if insn has an archreg as a read operand; if so try to
+            map it. */
+         if (u->tag1 == ArchReg && u->size == 4 
+                                && areg_map[u->val1] != -1) {
+            switch (u->opcode) {
+               case ADD: case SUB: case AND: case OR: case XOR:
+               case ADC: case SBB:
+               case SHL: case SHR: case SAR: case ROL: case ROR:
+               case RCL: case RCR:
+                  if (VG_(disassemble)) 
+                     VG_(printf)(
+                        "at %d: change ArchReg %S to TempReg t%d\n", 
+                        i, nameIReg(4,u->val1), areg_map[u->val1]);
+                  u->tag1 = TempReg;
+                  u->val1 = areg_map[u->val1];
+                  /* Remember to extend the live range of the TempReg,
+                     if necessary. */
+                  if (last_live_before[u->val1] < i)
+                     last_live_before[u->val1] = i;
+                  break;
+               default: 
+                  break;
+            }
+         }
+
+         /* boring insn; invalidate any mappings to temps it writes */
+         k = getTempUsage(u, &tempUse[0]);
+
+         for (j = 0; j < k; j++) {
+            wr  = tempUse[j].isWrite;
+            if (!wr) continue;
+            tr = tempUse[j].tempNo;
+            for (m = 0; m < 8; m++)
+               if (areg_map[m] == tr) areg_map[m] = -1;
+         }
+      }
+         
+   }
+
+#  undef BIND_ARCH_TO_TEMP
+
+   /* PASS 2: redundant PUT elimination.  If doing instrumentation,
+      don't annul (delay) puts of %ESP, since the memory check
+      machinery always requires the in-memory value of %ESP to be up
+      to date. 
+   */
+   for (j = 0; j < 8; j++)
+      annul_put[j] = False;
+
+   for (i = cb->used-1; i >= 0; i--) {
+      u = &cb->instrs[i];
+      if (u->opcode == NOP) continue;
+
+      if (u->opcode == PUT && u->size == 4) {
+         vg_assert(u->tag2 == ArchReg);
+         actual_areg = containingArchRegOf ( 4, u->val2 );
+         if (annul_put[actual_areg]) {
+            u->opcode = NOP;
+            u->tag1 = u->tag2 = NoValue;
+            if (VG_(disassemble)) 
+               VG_(printf)("at %d: delete PUT\n", i );
+         } else {
+            if (!(VG_(clo_instrument) && actual_areg == R_ESP))
+               annul_put[actual_areg] = True;
+         }
+      } 
+      else if (u->opcode == PUT && u->size != 4) { 
+         actual_areg = containingArchRegOf ( u->size, u->val2 );
+         annul_put[actual_areg] = False;
+      } 
+      else if (u->opcode == JMP || u->opcode == JIFZ
+               || u->opcode == CALLM) {
+         for (j = 0; j < 8; j++)
+            annul_put[j] = False;
+      }
+      else {
+         /* If an instruction reads an ArchReg, the immediately
+            preceding PUT cannot be annulled. */
+         actual_areg = maybe_uinstrReadsArchReg ( u );
+         if (actual_areg != -1)      
+            annul_put[actual_areg] = False;
+      }
+   }
+
+   /* PASS 2a: redundant-move elimination.  Given MOV t1, t2 and t1 is
+      dead after this point, annul the MOV insn and rename t2 to t1.
+      Further modifies the last_live_before map. */
+
+#  if 0
+   VG_(ppUCodeBlock)(cb, "Before MOV elimination" );
+   for (i = 0; i < cb->nextTemp; i++)
+     VG_(printf)("llb[t%d]=%d   ", i, last_live_before[i]);
+   VG_(printf)("\n");
+#  endif
+
+   for (i = 0; i < cb->used-1; i++) {
+      u = &cb->instrs[i];
+      if (u->opcode != MOV) continue;
+      if (u->tag1 == Literal) continue;
+      vg_assert(u->tag1 == TempReg);
+      vg_assert(u->tag2 == TempReg);
+      if (last_live_before[u->val1] == i) {
+         if (VG_(disassemble))
+            VG_(printf)(
+               "at %d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
+               i, u->val2, u->val1, i+1, last_live_before[u->val2] );
+         for (j = i+1; j <= last_live_before[u->val2]; j++) {
+            if (cb->instrs[j].tag1 == TempReg 
+                && cb->instrs[j].val1 == u->val2)
+               cb->instrs[j].val1 = u->val1;
+            if (cb->instrs[j].tag2 == TempReg 
+                && cb->instrs[j].val2 == u->val2)
+               cb->instrs[j].val2 = u->val1;
+         }
+         last_live_before[u->val1] = last_live_before[u->val2];
+         last_live_before[u->val2] = i-1;
+         u->opcode = NOP;
+         u->tag1 = u->tag2 = NoValue;
+      }
+   }
+
+   /* PASS 3: redundant condition-code restore/save elimination.
+      Scan backwards from the end.  future_dead_flags records the set
+      of flags which are dead at this point, that is, will be written
+      before they are next read.  Earlier uinsns which write flags
+      already in future_dead_flags can have their writes annulled.  
+   */
+   future_dead_flags = FlagsEmpty;
+
+   for (i = cb->used-1; i >= 0; i--) {
+      u = &cb->instrs[i];
+
+      /* We might never make it to insns beyond this one, so be
+         conservative. */
+      if (u->opcode == JIFZ || u->opcode == JMP) {
+         future_dead_flags = FlagsEmpty;
+         continue;
+      } 
+
+      /* We can annul the flags written by this insn if it writes a
+         subset (or eq) of the set of flags known to be dead after
+         this insn.  If not, just record the flags also written by
+         this insn.*/
+      if (u->flags_w != FlagsEmpty
+          && VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
+         if (VG_(disassemble)) {
+            VG_(printf)("at %d: annul flag write ", i);
+            vg_ppFlagSet("", u->flags_w);
+            VG_(printf)(" due to later ");
+            vg_ppFlagSet("", future_dead_flags);
+            VG_(printf)("\n");
+         }
+         u->flags_w = FlagsEmpty;
+      } else {
+        future_dead_flags 
+           = VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags );
+      }
+
+      /* If this insn also reads flags, empty out future_dead_flags so
+         as to force preceding writes not to be annulled. */
+      if (u->flags_r != FlagsEmpty)
+         future_dead_flags = FlagsEmpty;
+   }
+
+   if (last_live_before) 
+      VG_(jitfree) ( last_live_before );
+}
+
+
+/*------------------------------------------------------------*/
+/*--- The new register allocator.                          ---*/
+/*------------------------------------------------------------*/
+
+typedef
+   struct {
+      /* Becomes live for the first time after this insn ... */
+      Int live_after;
+      /* Becomes dead for the last time after this insn ... */
+      Int dead_before;
+      /* The "home" spill slot, if needed.  Never changes. */
+      Int spill_no;
+      /* Where is it?  VG_NOVALUE==in a spill slot; else in reg. */
+      Int real_no;
+   }
+   TempInfo;
+
+
+/* Take a ucode block and allocate its TempRegs to RealRegs, or put
+   them in spill locations, and add spill code, if there are not
+   enough real regs.  The usual register allocation deal, in short.  
+
+   Important redundancy of representation:
+
+     real_to_temp maps real reg ranks (RRRs) to TempReg nos, or
+     to VG_NOVALUE if the real reg has no currently assigned TempReg.
+
+     The .real_no field of a TempInfo gives the current RRR for
+     this TempReg, or VG_NOVALUE if the TempReg is currently
+     in memory, in which case it is in the SpillNo denoted by
+     spillno.
+
+   These pieces of information (a fwds-bwds mapping, really) must 
+   be kept consistent!
+
+   This allocator uses the so-called Second Chance Bin Packing
+   algorithm, as described in "Quality and Speed in Linear-scan
+   Register Allocation" (Traub, Holloway and Smith, ACM PLDI98,
+   pp142-151).  It is simple and fast and remarkably good at
+   minimising the amount of spill code introduced.
+*/
+
+static
+UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 )
+{
+   TempInfo*    temp_info;
+   Int          real_to_temp[VG_MAX_REALREGS];
+   Bool         is_spill_cand[VG_MAX_REALREGS];
+   Int          ss_busy_until_before[VG_MAX_SPILLSLOTS];
+   Int          i, j, k, m, r, tno, max_ss_no;
+   Bool         wr, defer, isRead, spill_reqd;
+   TempUse      tempUse[3];
+   UCodeBlock*  c2;
+
+   /* Used to denote ... well, "no value" in this fn. */
+#  define VG_NOTHING (-2)
+
+   /* Initialise the TempReg info.  */
+   if (c1->nextTemp > 0)
+      temp_info = VG_(jitmalloc)(c1->nextTemp * sizeof(TempInfo) );
+   else
+      temp_info = NULL;
+
+   for (i = 0; i < c1->nextTemp; i++) {
+      temp_info[i].live_after  = VG_NOTHING;
+      temp_info[i].dead_before = VG_NOTHING;
+      temp_info[i].spill_no    = VG_NOTHING;
+      /* temp_info[i].real_no is not yet relevant. */
+   }
+
+   spill_reqd = False;
+
+   /* Scan fwds to establish live ranges. */
+
+   for (i = 0; i < c1->used; i++) {
+      k = getTempUsage(&c1->instrs[i], &tempUse[0]);
+      vg_assert(k >= 0 && k <= 3);
+
+      /* For each temp usage ... fwds in program order */
+      for (j = 0; j < k; j++) {
+         tno = tempUse[j].tempNo;
+         wr  = tempUse[j].isWrite;
+         if (wr) {
+            /* Writes hold a reg live until after this insn. */
+            if (temp_info[tno].live_after == VG_NOTHING)
+               temp_info[tno].live_after = i;
+            if (temp_info[tno].dead_before < i + 1)
+               temp_info[tno].dead_before = i + 1;
+         } else {
+            /* First use of a tmp should be a write. */
+            vg_assert(temp_info[tno].live_after != VG_NOTHING);
+            /* Reads only hold it live until before this insn. */
+            if (temp_info[tno].dead_before < i)
+               temp_info[tno].dead_before = i;
+         }
+      }
+   }
+
+#  if 0
+   /* Sanity check on live ranges.  Expensive but correct. */
+   for (i = 0; i < c1->nextTemp; i++) {
+      vg_assert( (temp_info[i].live_after == VG_NOTHING 
+                  && temp_info[i].dead_before == VG_NOTHING)
+                 || (temp_info[i].live_after != VG_NOTHING 
+                     && temp_info[i].dead_before != VG_NOTHING) );
+   }
+#  endif
+
+   /* Do a rank-based allocation of TempRegs to spill slot numbers.
+      We put as few as possible values in spill slots, but
+      nevertheless need to have an assignment to them just in case. */
+
+   max_ss_no = -1;
+
+   for (i = 0; i < VG_MAX_SPILLSLOTS; i++)
+      ss_busy_until_before[i] = 0;
+  
+   for (i = 0; i < c1->nextTemp; i++) {
+
+      /* True iff this temp is unused. */
+      if (temp_info[i].live_after == VG_NOTHING) 
+         continue;
+
+      /* Find the lowest-numbered spill slot which is available at the
+         start point of this interval, and assign the interval to
+         it. */
+      for (j = 0; j < VG_MAX_SPILLSLOTS; j++)
+         if (ss_busy_until_before[j] <= temp_info[i].live_after)
+            break;
+      if (j == VG_MAX_SPILLSLOTS) {
+         VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n");
+         VG_(panic)("register allocation failed -- out of spill slots");
+      }
+      ss_busy_until_before[j] = temp_info[i].dead_before;
+      temp_info[i].spill_no = j;
+      if (j > max_ss_no)
+         max_ss_no = j;
+   }
+
+   VG_(total_reg_rank) += (max_ss_no+1);
+
+   /* Show live ranges and assigned spill slot nos. */
+
+   if (VG_(disassemble)) {
+      VG_(printf)("Live Range Assignments\n");
+
+      for (i = 0; i < c1->nextTemp; i++) {
+         if (temp_info[i].live_after == VG_NOTHING) 
+            continue;
+         VG_(printf)(
+            "   LR %d is   after %d to before %d   spillno %d\n",
+            i,
+            temp_info[i].live_after,
+            temp_info[i].dead_before,
+            temp_info[i].spill_no
+         );
+      }
+   }
+
+   /* Now that we've established a spill slot number for each used
+      temporary, we can go ahead and do the core of the "Second-chance
+      binpacking" allocation algorithm. */
+
+   /* Resulting code goes here.  We generate it all in a forwards
+      pass. */
+   c2 = allocCodeBlock();
+
+   /* At the start, no TempRegs are assigned to any real register.
+      Correspondingly, all temps claim to be currently resident in
+      their spill slots, as computed by the previous two passes. */
+   for (i = 0; i < VG_MAX_REALREGS; i++)
+      real_to_temp[i] = VG_NOTHING;
+   for (i = 0; i < c1->nextTemp; i++)
+      temp_info[i].real_no = VG_NOTHING;
+
+   if (VG_(disassemble))
+      VG_(printf)("\n");
+
+   /* Process each insn in turn. */
+   for (i = 0; i < c1->used; i++) {
+
+      if (c1->instrs[i].opcode == NOP) continue;
+      VG_(uinstrs_prealloc)++;
+
+#     if 0
+      /* Check map consistency.  Expensive but correct. */
+      for (r = 0; r < VG_MAX_REALREGS; r++) {
+         if (real_to_temp[r] != VG_NOTHING) {
+            tno = real_to_temp[r];
+            vg_assert(tno >= 0 && tno < c1->nextTemp);
+            vg_assert(temp_info[tno].real_no == r);
+         }
+      }
+      for (tno = 0; tno < c1->nextTemp; tno++) {
+         if (temp_info[tno].real_no != VG_NOTHING) {
+            r = temp_info[tno].real_no;
+            vg_assert(r >= 0 && r < VG_MAX_REALREGS);
+            vg_assert(real_to_temp[r] == tno);
+         }
+      }
+#     endif
+
+      if (VG_(disassemble))
+         VG_(ppUInstr)(i, &c1->instrs[i]);
+
+      /* First, free up enough real regs for this insn.  This may
+         generate spill stores since we may have to evict some TempRegs
+         currently in real regs.  Also generates spill loads. */
+
+      k = getTempUsage(&c1->instrs[i], &tempUse[0]);
+      vg_assert(k >= 0 && k <= 3);
+
+      /* For each ***different*** temp mentioned in the insn .... */
+      for (j = 0; j < k; j++) {
+
+         /* First check if the temp is mentioned again later; if so,
+            ignore this mention.  We only want to process each temp
+            used by the insn once, even if it is mentioned more than
+            once. */
+         defer = False;
+         tno = tempUse[j].tempNo;
+         for (m = j+1; m < k; m++)
+            if (tempUse[m].tempNo == tno) 
+               defer = True;
+         if (defer) 
+            continue;
+
+         /* Now we're trying to find a register for tempUse[j].tempNo.
+            First of all, if it already has a register assigned, we
+            don't need to do anything more. */
+         if (temp_info[tno].real_no != VG_NOTHING)
+            continue;
+
+         /* No luck.  The next thing to do is see if there is a
+            currently unassigned register available.  If so, bag it. */
+         for (r = 0; r < VG_MAX_REALREGS; r++) {
+            if (real_to_temp[r] == VG_NOTHING)
+               break;
+         }
+         if (r < VG_MAX_REALREGS) {
+            real_to_temp[r]        = tno;
+            temp_info[tno].real_no = r;
+            continue;
+         }
+
+         /* Unfortunately, that didn't pan out either.  So we'll have
+            to eject some other unfortunate TempReg into a spill slot
+            in order to free up a register.  Of course, we need to be
+            careful not to eject some other TempReg needed by this
+            insn.
+
+            Select r in 0 .. VG_MAX_REALREGS-1 such that
+            real_to_temp[r] is not mentioned in 
+            tempUse[0 .. k-1].tempNo, since it would be just plain 
+            wrong to eject some other TempReg which we need to use in 
+            this insn.
+
+            It is here that it is important to make a good choice of
+            register to spill.  */
+
+         /* First, mark those regs which are not spill candidates. */
+         for (r = 0; r < VG_MAX_REALREGS; r++) {
+            is_spill_cand[r] = True;
+            for (m = 0; m < k; m++) {
+               if (real_to_temp[r] == tempUse[m].tempNo) {
+                  is_spill_cand[r] = False;
+                  break;
+               }
+            }
+         }
+
+         /* We can choose any r satisfying is_spill_cand[r].  However,
+            try to make a good choice.  First, try and find r such
+            that the associated TempReg is already dead. */
+         for (r = 0; r < VG_MAX_REALREGS; r++) {
+            if (is_spill_cand[r] && 
+                temp_info[real_to_temp[r]].dead_before <= i)
+               goto have_spill_cand;
+         }
+
+         /* No spill cand is mapped to a dead TempReg.  Now we really
+           _do_ have to generate spill code.  Choose r so that the
+           next use of its associated TempReg is as far ahead as
+           possible, in the hope that this will minimise the number of
+           consequent reloads required.  This is a bit expensive, but
+           we don't have to do it very often. */
+         {
+            Int furthest_r = VG_MAX_REALREGS;
+            Int furthest = 0;
+            for (r = 0; r < VG_MAX_REALREGS; r++) {
+               if (!is_spill_cand[r]) continue;
+               for (m = i+1; m < c1->used; m++)
+                  if (uInstrMentionsTempReg(&c1->instrs[m], 
+                                            real_to_temp[r]))
+                     break;
+               if (m > furthest) {
+                  furthest   = m;
+                  furthest_r = r;
+               }
+            }
+            r = furthest_r;
+            goto have_spill_cand;
+         }
+
+         have_spill_cand:
+         if (r == VG_MAX_REALREGS)
+            VG_(panic)("new reg alloc: out of registers ?!");
+
+         /* Eject r.  Important refinement: don't bother if the
+            associated TempReg is now dead. */
+         vg_assert(real_to_temp[r] != VG_NOTHING);
+         vg_assert(real_to_temp[r] != tno);
+         temp_info[real_to_temp[r]].real_no = VG_NOTHING;
+         if (temp_info[real_to_temp[r]].dead_before > i) {
+            uInstr2(c2, PUT, 4, 
+                        RealReg, VG_(rankToRealRegNo)(r), 
+                        SpillNo, temp_info[real_to_temp[r]].spill_no);
+            VG_(uinstrs_spill)++;
+            spill_reqd = True;
+            if (VG_(disassemble))
+               VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
+         }
+
+         /* Decide if tno is read. */
+         isRead = False;
+         for (m = 0; m < k; m++)
+            if (tempUse[m].tempNo == tno && !tempUse[m].isWrite) 
+               isRead = True;
+
+         /* If so, generate a spill load. */
+         if (isRead) {
+            uInstr2(c2, GET, 4, 
+                        SpillNo, temp_info[tno].spill_no, 
+                        RealReg, VG_(rankToRealRegNo)(r) );
+            VG_(uinstrs_spill)++;
+            spill_reqd = True;
+            if (VG_(disassemble))
+               VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
+         }
+
+         /* Update the forwards and backwards maps. */
+         real_to_temp[r]        = tno;
+         temp_info[tno].real_no = r;
+      }
+
+      /* By this point, all TempRegs mentioned by the insn have been
+         bought into real regs.  We now copy the insn to the output
+         and use patchUInstr to convert its rTempRegs into
+         realregs. */
+      for (j = 0; j < k; j++)
+         tempUse[j].realNo 
+            = VG_(rankToRealRegNo)(temp_info[tempUse[j].tempNo].real_no);
+      copyUInstr(c2, &c1->instrs[i]);
+      patchUInstr(&LAST_UINSTR(c2), &tempUse[0], k);
+
+      if (VG_(disassemble)) {
+         VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
+         VG_(printf)("\n");
+      }
+   }
+
+   if (temp_info != NULL)
+      VG_(jitfree)(temp_info);
+
+   freeCodeBlock(c1);
+
+   if (spill_reqd) 
+      VG_(translations_needing_spill)++;
+
+   return c2;
+
+#  undef VG_NOTHING
+
+}
+
+
+/*------------------------------------------------------------*/
+/*--- New instrumentation machinery.                       ---*/
+/*------------------------------------------------------------*/
+
+static
+VgTagOp get_VgT_ImproveOR_TQ ( Int sz )
+{
+   switch (sz) {
+      case 4: return VgT_ImproveOR4_TQ;
+      case 2: return VgT_ImproveOR2_TQ;
+      case 1: return VgT_ImproveOR1_TQ;
+      default: VG_(panic)("get_VgT_ImproveOR_TQ");
+   }
+}
+
+
+static
+VgTagOp get_VgT_ImproveAND_TQ ( Int sz )
+{
+   switch (sz) {
+      case 4: return VgT_ImproveAND4_TQ;
+      case 2: return VgT_ImproveAND2_TQ;
+      case 1: return VgT_ImproveAND1_TQ;
+      default: VG_(panic)("get_VgT_ImproveAND_TQ");
+   }
+}
+
+
+static
+VgTagOp get_VgT_Left ( Int sz )
+{
+   switch (sz) {
+      case 4: return VgT_Left4;
+      case 2: return VgT_Left2;
+      case 1: return VgT_Left1;
+      default: VG_(panic)("get_VgT_Left");
+   }
+}
+
+
+static
+VgTagOp get_VgT_UifU ( Int sz )
+{
+   switch (sz) {
+      case 4: return VgT_UifU4;
+      case 2: return VgT_UifU2;
+      case 1: return VgT_UifU1;
+      case 0: return VgT_UifU0;
+      default: VG_(panic)("get_VgT_UifU");
+   }
+}
+
+
+static
+VgTagOp get_VgT_DifD ( Int sz )
+{
+   switch (sz) {
+      case 4: return VgT_DifD4;
+      case 2: return VgT_DifD2;
+      case 1: return VgT_DifD1;
+      default: VG_(panic)("get_VgT_DifD");
+   }
+}
+
+
+static 
+VgTagOp get_VgT_PCast ( Int szs, Int szd )
+{
+   if (szs == 4 && szd == 0) return VgT_PCast40;
+   if (szs == 2 && szd == 0) return VgT_PCast20;
+   if (szs == 1 && szd == 0) return VgT_PCast10;
+   if (szs == 0 && szd == 1) return VgT_PCast01;
+   if (szs == 0 && szd == 2) return VgT_PCast02;
+   if (szs == 0 && szd == 4) return VgT_PCast04;
+   if (szs == 1 && szd == 4) return VgT_PCast14;
+   if (szs == 1 && szd == 2) return VgT_PCast12;
+   if (szs == 1 && szd == 1) return VgT_PCast11;
+   VG_(printf)("get_VgT_PCast(%d,%d)\n", szs, szd);
+   VG_(panic)("get_VgT_PCast");
+}
+
+
+static 
+VgTagOp get_VgT_Widen ( Bool syned, Int szs, Int szd )
+{
+   if (szs == 1 && szd == 2 && syned)  return VgT_SWiden12;
+   if (szs == 1 && szd == 2 && !syned) return VgT_ZWiden12;
+
+   if (szs == 1 && szd == 4 && syned)  return VgT_SWiden14;
+   if (szs == 1 && szd == 4 && !syned) return VgT_ZWiden14;
+
+   if (szs == 2 && szd == 4 && syned)  return VgT_SWiden24;
+   if (szs == 2 && szd == 4 && !syned) return VgT_ZWiden24;
+
+   VG_(printf)("get_VgT_Widen(%d,%d,%d)\n", (Int)syned, szs, szd);
+   VG_(panic)("get_VgT_Widen");
+}
+
+/* Pessimally cast the spec'd shadow from one size to another. */
+static 
+void create_PCast ( UCodeBlock* cb, Int szs, Int szd, Int tempreg )
+{
+   if (szs == 0 && szd == 0)
+      return;
+   uInstr3(cb, TAG1, 0, TempReg, tempreg, 
+                        NoValue, 0, 
+                        Lit16,   get_VgT_PCast(szs,szd));
+}
+
+
+/* Create a signed or unsigned widen of the spec'd shadow from one
+   size to another.  The only allowed size transitions are 1->2, 1->4
+   and 2->4. */
+static 
+void create_Widen ( UCodeBlock* cb, Bool signed_widen,
+                    Int szs, Int szd, Int tempreg )
+{
+   if (szs == szd) return;
+   uInstr3(cb, TAG1, 0, TempReg, tempreg, 
+                        NoValue, 0, 
+                        Lit16,   get_VgT_Widen(signed_widen,szs,szd));
+}
+
+
+/* Get the condition codes into a new shadow, at the given size. */
+static
+Int create_GETVF ( UCodeBlock* cb, Int sz )
+{
+   Int tt = newShadow(cb);
+   uInstr1(cb, GETVF, 0, TempReg, tt);
+   create_PCast(cb, 0, sz, tt);
+   return tt;
+}
+
+
+/* Save the condition codes from the spec'd shadow. */
+static
+void create_PUTVF ( UCodeBlock* cb, Int sz, Int tempreg )
+{
+   if (sz == 0) {
+      uInstr1(cb, PUTVF, 0, TempReg, tempreg);
+   } else { 
+      Int tt = newShadow(cb);
+      uInstr2(cb, MOV, 4, TempReg, tempreg, TempReg, tt);
+      create_PCast(cb, sz, 0, tt);
+      uInstr1(cb, PUTVF, 0, TempReg, tt);
+   }
+}
+
+
+/* Do Left on the spec'd shadow. */
+static 
+void create_Left ( UCodeBlock* cb, Int sz, Int tempreg )
+{
+   uInstr3(cb, TAG1, 0, 
+               TempReg, tempreg,
+               NoValue, 0, 
+               Lit16, get_VgT_Left(sz));
+}
+
+
+/* Do UifU on ts and td, putting the result in td. */
+static 
+void create_UifU ( UCodeBlock* cb, Int sz, Int ts, Int td )
+{
+   uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
+               Lit16, get_VgT_UifU(sz));
+}
+
+
+/* Do DifD on ts and td, putting the result in td. */
+static 
+void create_DifD ( UCodeBlock* cb, Int sz, Int ts, Int td )
+{
+   uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
+               Lit16, get_VgT_DifD(sz));
+}
+
+
+/* Do HelpAND on value tval and tag tqqq, putting the result in
+   tqqq. */
+static 
+void create_ImproveAND_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
+{
+   uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
+               Lit16, get_VgT_ImproveAND_TQ(sz));
+}
+
+
+/* Do HelpOR on value tval and tag tqqq, putting the result in
+   tqqq. */
+static 
+void create_ImproveOR_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
+{
+   uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
+               Lit16, get_VgT_ImproveOR_TQ(sz));
+}
+
+
+/* Get the shadow for an operand described by (tag, val).  Emit code
+   to do this and return the identity of the shadow holding the
+   result.  The result tag is always copied into a new shadow, so it
+   can be modified without trashing the original.*/
+static
+Int /* TempReg */ getOperandShadow ( UCodeBlock* cb, 
+                                     Int sz, Int tag, Int val )
+{
+   Int sh;
+   sh = newShadow(cb);
+   if (tag == TempReg) {
+      uInstr2(cb, MOV, 4, TempReg, SHADOW(val), TempReg, sh);
+      return sh;
+   }
+   if (tag == Literal) {
+      uInstr1(cb, SETV, sz, TempReg, sh);
+      return sh;
+   }
+   if (tag == ArchReg) {
+      uInstr2(cb, GETV, sz, ArchReg, val, TempReg, sh);
+      return sh;
+   }
+   VG_(panic)("getOperandShadow");
+}
+
+
+
+/* Create and return an instrumented version of cb_in.  Free cb_in
+   before returning. */
+static UCodeBlock* vg_instrument ( UCodeBlock* cb_in )
+{
+   UCodeBlock* cb;
+   Int         i, j;
+   UInstr*     u_in;
+   Int         qs, qd, qt, qtt;
+   cb = allocCodeBlock();
+   cb->nextTemp = cb_in->nextTemp;
+
+   for (i = 0; i < cb_in->used; i++) {
+      qs = qd = qt = qtt = INVALID_TEMPREG;
+      u_in = &cb_in->instrs[i];
+
+      /* if (i > 0) uInstr1(cb, NOP, 0, NoValue, 0); */
+
+      /* VG_(ppUInstr)(0, u_in); */
+      switch (u_in->opcode) {
+
+         case NOP:
+            break;
+
+         case INCEIP:
+            copyUInstr(cb, u_in);
+            break;
+
+         /* Loads and stores.  Test the V bits for the address.
+            The LOADV/STOREV does an addressibility check for the
+            address. */
+         case LOAD: 
+            uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
+            uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
+            uInstr2(cb, LOADV, u_in->size, 
+                        TempReg, u_in->val1,
+                        TempReg, SHADOW(u_in->val2));
+            copyUInstr(cb, u_in);
+            break;
+         case STORE:
+            uInstr1(cb, TESTV,  4, TempReg, SHADOW(u_in->val2));
+            uInstr1(cb, SETV,   4, TempReg, SHADOW(u_in->val2));
+            uInstr2(cb, STOREV, u_in->size,
+                        TempReg, SHADOW(u_in->val1), 
+                        TempReg, u_in->val2);
+            copyUInstr(cb, u_in);
+            break;
+
+         /* Moving stuff around.  Make the V bits follow accordingly,
+            but don't do anything else.  */
+
+         case GET:
+            uInstr2(cb, GETV, u_in->size,
+                        ArchReg, u_in->val1,
+                        TempReg, SHADOW(u_in->val2));
+            copyUInstr(cb, u_in);
+            break;
+         case PUT:
+            uInstr2(cb, PUTV, u_in->size, 
+                        TempReg, SHADOW(u_in->val1),
+                        ArchReg, u_in->val2);
+            copyUInstr(cb, u_in);
+            break;
+
+         case GETF:
+            /* This is not the smartest way to do it, but should work. */
+            qd = create_GETVF(cb, u_in->size);
+            uInstr2(cb, MOV, 4, TempReg, qd, TempReg, SHADOW(u_in->val1));
+            copyUInstr(cb, u_in);
+            break;
+         case PUTF:
+            create_PUTVF(cb, u_in->size, SHADOW(u_in->val1));
+            copyUInstr(cb, u_in);
+            break;
+
+         case MOV:
+            switch (u_in->tag1) {
+               case TempReg: 
+                  uInstr2(cb, MOV, 4,
+                              TempReg, SHADOW(u_in->val1),
+                              TempReg, SHADOW(u_in->val2));
+                  break;
+               case Literal: 
+                  uInstr1(cb, SETV, u_in->size, 
+                              TempReg, SHADOW(u_in->val2));
+                  break;
+               default: 
+                  VG_(panic)("vg_instrument: MOV");
+            }
+            copyUInstr(cb, u_in);
+            break;
+
+         /* Special case of add, where one of the operands is a literal.
+            lea1(t) = t + some literal.
+            Therefore: lea1#(qa) = left(qa) 
+         */
+         case LEA1:
+            vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
+            qs = SHADOW(u_in->val1);
+            qd = SHADOW(u_in->val2);
+            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qd);
+            create_Left(cb, u_in->size, qd);
+            copyUInstr(cb, u_in);
+            break;
+
+         /* Another form of add.  
+            lea2(ts,tt,shift) = ts + (tt << shift); shift is a literal
+                                and is 0,1,2 or 3.
+            lea2#(qs,qt) = left(qs `UifU` (qt << shift)).
+            Note, subtly, that the shift puts zeroes at the bottom of qt,
+            meaning Valid, since the corresponding shift of tt puts 
+            zeroes at the bottom of tb.
+         */
+         case LEA2: {
+            Int shift;
+            vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
+            switch (u_in->extra4b) {
+               case 1: shift = 0; break;
+               case 2: shift = 1; break;
+               case 4: shift = 2; break;
+               case 8: shift = 3; break;
+               default: VG_(panic)( "vg_instrument(LEA2)" );
+            }
+            qs = SHADOW(u_in->val1);
+            qt = SHADOW(u_in->val2);
+            qd = SHADOW(u_in->val3);
+            uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qd);
+            if (shift > 0) {
+               uInstr2(cb, SHL, 4, Literal, 0, TempReg, qd);
+               uLiteral(cb, shift);
+            }
+            create_UifU(cb, 4, qs, qd);
+            create_Left(cb, u_in->size, qd);
+            copyUInstr(cb, u_in);
+            break;
+         }
+
+         /* inc#/dec#(qd) = q `UifU` left(qd) = left(qd) */
+         case INC: case DEC:
+            qd = SHADOW(u_in->val1);
+            create_Left(cb, u_in->size, qd);
+            if (u_in->flags_w != FlagsEmpty)
+               create_PUTVF(cb, u_in->size, qd);
+            copyUInstr(cb, u_in);
+            break;
+
+         /* This is a HACK (approximation :-) */
+         /* rcl#/rcr#(qs,qd) 
+               = let q0 = pcast-sz-0(qd) `UifU` pcast-sz-0(qs) `UifU` eflags#
+                 eflags# = q0
+                 qd =pcast-0-sz(q0)
+            Ie, cast everything down to a single bit, then back up.
+            This assumes that any bad bits infect the whole word and 
+            the eflags.
+         */
+         case RCL: case RCR:
+	    vg_assert(u_in->flags_r != FlagsEmpty);
+            /* The following assertion looks like it makes sense, but is
+               actually wrong.  Consider this:
+                  rcll    %eax
+                  imull   %eax, %eax
+               The rcll writes O and C but so does the imull, so the O and C 
+               write of the rcll is annulled by the prior improvement pass.
+               Noticed by Kevin Ryde <user42@zip.com.au>
+            */
+	    /* vg_assert(u_in->flags_w != FlagsEmpty); */
+            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
+            /* We can safely modify qs; cast it to 0-size. */
+            create_PCast(cb, u_in->size, 0, qs);
+            qd = SHADOW(u_in->val2);
+            create_PCast(cb, u_in->size, 0, qd);
+            /* qs is cast-to-0(shift count#), and qd is cast-to-0(value#). */
+            create_UifU(cb, 0, qs, qd);
+            /* qs is now free; reuse it for the flag definedness. */
+            qs = create_GETVF(cb, 0);
+            create_UifU(cb, 0, qs, qd);
+            create_PUTVF(cb, 0, qd);
+            create_PCast(cb, 0, u_in->size, qd);
+            copyUInstr(cb, u_in);
+            break;
+
+         /* for OP in shl shr sar rol ror
+            (qs is shift count#, qd is value to be OP#d)
+            OP(ts,td)
+            OP#(qs,qd)
+               = pcast-1-sz(qs) `UifU` OP(ts,qd)
+            So we apply OP to the tag bits too, and then UifU with
+            the shift count# to take account of the possibility of it
+            being undefined.
+            
+            A bit subtle:
+               ROL/ROR rearrange the tag bits as per the value bits.
+               SHL/SHR shifts zeroes into the value, and corresponding 
+                  zeroes indicating Definedness into the tag.
+               SAR copies the top bit of the value downwards, and therefore
+                  SAR also copies the definedness of the top bit too.
+            So in all five cases, we just apply the same op to the tag 
+            bits as is applied to the value bits.  Neat!
+         */
+         case SHL:
+         case SHR: case SAR:
+         case ROL: case ROR: {
+            Int t_amount = INVALID_TEMPREG;
+            vg_assert(u_in->tag1 == TempReg || u_in->tag1 == Literal);
+            vg_assert(u_in->tag2 == TempReg);
+            qd = SHADOW(u_in->val2);
+
+            /* Make qs hold shift-count# and make
+               t_amount be a TempReg holding the shift count. */
+            if (u_in->tag1 == Literal) {
+               t_amount = newTemp(cb);
+               uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_amount);
+               uLiteral(cb, u_in->lit32);
+               qs = SHADOW(t_amount);
+               uInstr1(cb, SETV, 1, TempReg, qs);
+            } else {
+               t_amount = u_in->val1;
+               qs = SHADOW(u_in->val1);
+            }
+
+            uInstr2(cb, u_in->opcode, 
+                        u_in->size, 
+                        TempReg, t_amount, 
+                        TempReg, qd);
+            qt = newShadow(cb);
+            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
+            create_PCast(cb, 1, u_in->size, qt);
+            create_UifU(cb, u_in->size, qt, qd);
+            copyUInstr(cb, u_in);
+            break;
+         }
+
+         /* One simple tag operation. */
+         case WIDEN:
+            vg_assert(u_in->tag1 == TempReg);
+            create_Widen(cb, u_in->signed_widen, u_in->extra4b, u_in->size, 
+                             SHADOW(u_in->val1));
+            copyUInstr(cb, u_in);
+            break;
+
+         /* not#(x) = x (since bitwise independent) */
+         case NOT:
+            vg_assert(u_in->tag1 == TempReg);
+            copyUInstr(cb, u_in);
+            break;
+
+         /* neg#(x) = left(x) (derivable from case for SUB) */
+         case NEG:
+            vg_assert(u_in->tag1 == TempReg);
+            create_Left(cb, u_in->size, SHADOW(u_in->val1));
+            copyUInstr(cb, u_in);
+            break;
+
+         /* bswap#(x) = bswap(x) */
+         case BSWAP:
+            vg_assert(u_in->tag1 == TempReg);
+            vg_assert(u_in->size == 4);
+            qd = SHADOW(u_in->val1);
+            uInstr1(cb, BSWAP, 4, TempReg, qd);
+            copyUInstr(cb, u_in);
+            break;
+
+         /* cc2val#(qd) = pcast-0-to-size(eflags#) */
+         case CC2VAL:
+            vg_assert(u_in->tag1 == TempReg);
+            vg_assert(u_in->flags_r != FlagsEmpty);
+            qt = create_GETVF(cb, u_in->size);
+            uInstr2(cb, MOV, 4, TempReg, qt, TempReg, SHADOW(u_in->val1));
+            copyUInstr(cb, u_in);
+            break;
+
+         /* cmov#(qs,qd) = cmov(qs,qd)
+            That is, do the cmov of tags using the same flags as for
+            the data (obviously).  However, first do a test on the 
+            validity of the flags.
+         */
+         case CMOV:
+            vg_assert(u_in->size == 4);
+            vg_assert(u_in->tag1 == TempReg);
+            vg_assert(u_in->tag2 == TempReg);
+            vg_assert(u_in->flags_r != FlagsEmpty);
+            vg_assert(u_in->flags_w == FlagsEmpty);
+            qs = SHADOW(u_in->val1);
+            qd = SHADOW(u_in->val2);
+            qt = create_GETVF(cb, 0);
+            uInstr1(cb, TESTV, 0, TempReg, qt);
+            /* qt should never be referred to again.  Nevertheless
+               ... */
+            uInstr1(cb, SETV, 0, TempReg, qt);
+
+            uInstr2(cb, CMOV, 4, TempReg, qs, TempReg, qd);
+            LAST_UINSTR(cb).cond    = u_in->cond;
+            LAST_UINSTR(cb).flags_r = u_in->flags_r;
+
+            copyUInstr(cb, u_in);
+            break;
+
+         /* add#/sub#(qs,qd) 
+               = qs `UifU` qd `UifU` left(qs) `UifU` left(qd)
+               = left(qs) `UifU` left(qd)
+               = left(qs `UifU` qd)
+            adc#/sbb#(qs,qd)
+               = left(qs `UifU` qd) `UifU` pcast(eflags#)
+            Second arg (dest) is TempReg.
+            First arg (src) is Literal or TempReg or ArchReg. 
+         */
+         case ADD: case SUB:
+         case ADC: case SBB:
+            qd = SHADOW(u_in->val2);
+            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
+            create_UifU(cb, u_in->size, qs, qd);
+            create_Left(cb, u_in->size, qd);
+            if (u_in->opcode == ADC || u_in->opcode == SBB) {
+               vg_assert(u_in->flags_r != FlagsEmpty);
+               qt = create_GETVF(cb, u_in->size);
+               create_UifU(cb, u_in->size, qt, qd);
+            }
+            if (u_in->flags_w != FlagsEmpty) {
+               create_PUTVF(cb, u_in->size, qd);
+            }
+            copyUInstr(cb, u_in);
+            break;
+
+         /* xor#(qs,qd) = qs `UifU` qd */
+         case XOR:
+            qd = SHADOW(u_in->val2);
+            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
+            create_UifU(cb, u_in->size, qs, qd);
+            if (u_in->flags_w != FlagsEmpty) {
+               create_PUTVF(cb, u_in->size, qd);
+            }
+            copyUInstr(cb, u_in);
+            break;
+
+         /* and#/or#(qs,qd) 
+               = (qs `UifU` qd) `DifD` improve(vs,qs) 
+                                `DifD` improve(vd,qd)
+            where improve is the relevant one of
+                Improve{AND,OR}_TQ
+            Use the following steps, with qt as a temp:
+               qt = improve(vd,qd)
+               qd = qs `UifU` qd
+               qd = qt `DifD` qd
+               qt = improve(vs,qs)
+               qd = qt `DifD` qd
+         */
+         case AND: case OR:
+            vg_assert(u_in->tag1 == TempReg);
+            vg_assert(u_in->tag2 == TempReg);
+            qd = SHADOW(u_in->val2);
+            qs = SHADOW(u_in->val1);
+            qt = newShadow(cb);
+
+            /* qt = improve(vd,qd) */
+            uInstr2(cb, MOV, 4, TempReg, qd, TempReg, qt);
+            if (u_in->opcode == AND)
+               create_ImproveAND_TQ(cb, u_in->size, u_in->val2, qt);
+            else
+               create_ImproveOR_TQ(cb, u_in->size, u_in->val2, qt);
+            /* qd = qs `UifU` qd */
+            create_UifU(cb, u_in->size, qs, qd);
+            /* qd = qt `DifD` qd */
+            create_DifD(cb, u_in->size, qt, qd);
+            /* qt = improve(vs,qs) */
+            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
+            if (u_in->opcode == AND)
+               create_ImproveAND_TQ(cb, u_in->size, u_in->val1, qt);
+            else
+               create_ImproveOR_TQ(cb, u_in->size, u_in->val1, qt);
+            /* qd = qt `DifD` qd */
+               create_DifD(cb, u_in->size, qt, qd);
+            /* So, finally qd is the result tag. */
+            if (u_in->flags_w != FlagsEmpty) {
+               create_PUTVF(cb, u_in->size, qd);
+            }
+            copyUInstr(cb, u_in);
+            break;
+
+         /* Machinery to do with supporting CALLM.  Copy the start and
+            end markers only to make the result easier to read
+            (debug); they generate no code and have no effect. 
+         */
+         case CALLM_S: case CALLM_E:
+            copyUInstr(cb, u_in);
+            break;
+
+         /* Copy PUSH and POP verbatim.  Arg/result absval
+            calculations are done when the associated CALL is
+            processed.  CLEAR has no effect on absval calculations but
+            needs to be copied.  
+         */
+         case PUSH: case POP: case CLEAR:
+            copyUInstr(cb, u_in);
+            break;
+
+         /* In short:
+               callm#(a1# ... an#) = (a1# `UifU` ... `UifU` an#)
+            We have to decide on a size to do the computation at,
+            although the choice doesn't affect correctness.  We will
+            do a pcast to the final size anyway, so the only important
+            factor is to choose a size which minimises the total
+            number of casts needed.  Valgrind: just use size 0,
+            regardless.  It may not be very good for performance
+            but does simplify matters, mainly by reducing the number
+            of different pessimising casts which have to be implemented.
+         */
+         case CALLM: {
+            UInstr* uu;
+            Bool res_used;
+
+            /* Now generate the code.  Get the final result absval
+               into qt. */
+            qt  = newShadow(cb);
+            qtt = newShadow(cb);
+            uInstr1(cb, SETV, 0, TempReg, qt);
+            for (j = i-1; cb_in->instrs[j].opcode != CALLM_S; j--) {
+               uu = & cb_in->instrs[j];
+               if (uu->opcode != PUSH) continue;
+               /* cast via a temporary */
+               uInstr2(cb, MOV, 4, TempReg, SHADOW(uu->val1),
+                                   TempReg, qtt);
+               create_PCast(cb, uu->size, 0, qtt);
+               create_UifU(cb, 0, qtt, qt);
+            }
+            /* Remembering also that flags read count as inputs. */
+            if (u_in->flags_r != FlagsEmpty) {
+               qtt = create_GETVF(cb, 0);
+               create_UifU(cb, 0, qtt, qt);
+            }
+
+            /* qt now holds the result tag.  If any results from the
+               call are used, either by fetching with POP or
+               implicitly by writing the flags, we copy the result
+               absval to the relevant location.  If not used, the call
+               must have been for its side effects, so we test qt here
+               and now.  Note that this assumes that all values
+               removed by POP continue to be live.  So dead args
+               *must* be removed with CLEAR, not by POPping them into
+               a dummy tempreg. 
+            */
+            res_used = False;
+            for (j = i+1; cb_in->instrs[j].opcode != CALLM_E; j++) {
+               uu = & cb_in->instrs[j];
+               if (uu->opcode != POP) continue;
+               /* Cast via a temp. */
+               uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qtt);
+               create_PCast(cb, 0, uu->size, qtt);
+               uInstr2(cb, MOV, 4, TempReg, qtt, 
+                                   TempReg, SHADOW(uu->val1));
+               res_used = True;
+            }
+            if (u_in->flags_w != FlagsEmpty) {
+               create_PUTVF(cb, 0, qt);
+               res_used = True;
+            }
+            if (!res_used) {
+               uInstr1(cb, TESTV, 0, TempReg, qt);
+               /* qt should never be referred to again.  Nevertheless
+                  ... */
+               uInstr1(cb, SETV, 0, TempReg, qt);
+            }
+            copyUInstr(cb, u_in);
+            break;
+         }
+         /* Whew ... */
+
+         case JMP:
+            if (u_in->tag1 == TempReg) {
+               uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
+               uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
+            } else {
+               vg_assert(u_in->tag1 == Literal);
+            }
+            if (u_in->cond != CondAlways) {
+               vg_assert(u_in->flags_r != FlagsEmpty);
+               qt = create_GETVF(cb, 0);
+               uInstr1(cb, TESTV, 0, TempReg, qt);
+               /* qt should never be referred to again.  Nevertheless
+                  ... */
+               uInstr1(cb, SETV, 0, TempReg, qt);
+            }
+            copyUInstr(cb, u_in);
+            break;
+
+         case JIFZ:
+            uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
+            uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
+            copyUInstr(cb, u_in);
+            break;
+
+         /* Emit a check on the address used.  For FPU_R, the value
+            loaded into the FPU is checked at the time it is read from
+            memory (see synth_fpu_mem_check_actions).  */
+         case FPU_R: case FPU_W:
+            vg_assert(u_in->tag2 == TempReg);
+            uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
+            uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val2));
+            copyUInstr(cb, u_in);
+            break;
+
+         /* For FPU insns not referencing memory, just copy thru. */
+         case FPU: 
+            copyUInstr(cb, u_in);
+            break;
+
+         default:
+            VG_(ppUInstr)(0, u_in);
+            VG_(panic)( "vg_instrument: unhandled case");
+
+      } /* end of switch (u_in->opcode) */
+
+   } /* end of for loop */
+
+   freeCodeBlock(cb_in);
+   return cb;
+}
+
+/*------------------------------------------------------------*/
+/*--- Clean up mem check instrumentation.                  ---*/
+/*------------------------------------------------------------*/
+
+#define VGC_IS_SHADOW(tempreg) ((tempreg % 2) == 1)
+#define VGC_UNDEF ((UChar)100)
+#define VGC_VALUE ((UChar)101)
+
+#define NOP_no_msg(uu)                                         \
+   do { uu->opcode = NOP; } while (False)
+
+#define NOP_tag1_op(uu)                                        \
+   do { uu->opcode = NOP;                                      \
+        if (VG_(disassemble))                                  \
+           VG_(printf)("at %d: delete %s due to defd arg\n",   \
+                       i, VG_(nameOfTagOp(u->val3)));          \
+   } while (False)
+
+#define SETV_tag1_op(uu,newsz)                                 \
+   do { uu->opcode = SETV;                                     \
+        uu->size = newsz;                                      \
+        uu->tag2 = uu->tag3 = NoValue;                         \
+        if (VG_(disassemble))                                  \
+           VG_(printf)("at %d: convert %s to SETV%d "          \
+                       "due to defd arg\n",                    \
+                       i, VG_(nameOfTagOp(u->val3)), newsz);   \
+   } while (False)
+
+
+
+/* Run backwards and delete SETVs on shadow temps for which the next
+   action is a write.  Needs an env saying whether or not the next
+   action is a write.  The supplied UCodeBlock is destructively
+   modified.
+*/
+static void vg_delete_redundant_SETVs ( UCodeBlock* cb )
+{
+   Bool*   next_is_write;
+   Int     i, j, k, n_temps;
+   UInstr* u;
+   TempUse tempUse[3];
+
+   n_temps = cb->nextTemp;
+   if (n_temps == 0) return;
+
+   next_is_write = VG_(jitmalloc)(n_temps * sizeof(Bool));
+
+   for (i = 0; i < n_temps; i++) next_is_write[i] = True;
+
+   for (i = cb->used-1; i >= 0; i--) {
+      u = &cb->instrs[i];
+
+      if (u->opcode == MOV && VGC_IS_SHADOW(u->val2) 
+                           && next_is_write[u->val2]) {
+         /* This MOV is pointless because the target is dead at this
+            point.  Delete it. */
+         u->opcode = NOP;
+         u->size = 0;
+         if (VG_(disassemble)) 
+            VG_(printf)("at %d: delete MOV\n", i);
+      } else
+
+      if (u->opcode == SETV) {
+         if (u->tag1 == TempReg) {
+            vg_assert(VGC_IS_SHADOW(u->val1));
+            if (next_is_write[u->val1]) {
+               /* This write is pointless, so annul it. */
+               u->opcode = NOP;
+               u->size = 0;
+               if (VG_(disassemble)) 
+                  VG_(printf)("at %d: delete SETV\n", i);
+            } else {
+               /* This write has a purpose; don't annul it, but do
+                  notice that we did it. */
+               next_is_write[u->val1] = True;
+            }
+              
+         }
+
+      } else {
+         /* Find out what this insn does to the temps. */
+         k = getTempUsage(u, &tempUse[0]);
+         vg_assert(k <= 3);
+         for (j = k-1; j >= 0; j--) {
+            next_is_write[ tempUse[j].tempNo ]
+                         = tempUse[j].isWrite;
+         }
+      }
+
+   }
+
+   VG_(jitfree)(next_is_write);
+}
+
+
+/* Run forwards, propagating and using the is-completely-defined
+   property.  This removes a lot of redundant tag-munging code.
+   Unfortunately it requires intimate knowledge of how each uinstr and
+   tagop modifies its arguments.  This duplicates knowledge of uinstr
+   tempreg uses embodied in getTempUsage(), which is unfortunate. 
+   The supplied UCodeBlock* is modified in-place.
+
+   For each value temp, def[] should hold VGC_VALUE.
+
+   For each shadow temp, def[] may hold 4,2,1 or 0 iff that shadow is
+   definitely known to be fully defined at that size.  In all other
+   circumstances a shadow's def[] entry is VGC_UNDEF, meaning possibly
+   undefined.  In cases of doubt, VGC_UNDEF is always safe.
+*/
+static void vg_propagate_definedness ( UCodeBlock* cb )
+{
+   UChar*  def;
+   Int     i, j, k, t, n_temps;
+   UInstr* u;
+   TempUse tempUse[3];
+
+   n_temps = cb->nextTemp;
+   if (n_temps == 0) return;
+
+   def = VG_(jitmalloc)(n_temps * sizeof(UChar));
+   for (i = 0; i < n_temps; i++) 
+      def[i] = VGC_IS_SHADOW(i) ? VGC_UNDEF : VGC_VALUE;
+
+   /* Run forwards, detecting and using the all-defined property. */
+
+   for (i = 0; i < cb->used; i++) {
+      u = &cb->instrs[i];
+      switch (u->opcode) {
+
+      /* Tag-handling uinstrs. */
+
+         /* Deal with these quickly. */
+         case NOP:
+         case INCEIP:
+            break;
+
+         /* Make a tag defined. */
+         case SETV:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            def[u->val1] = u->size;
+            break;
+
+         /* Check definedness of a tag. */
+         case TESTV:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            if (def[u->val1] <= 4) { 
+               vg_assert(def[u->val1] == u->size); 
+               NOP_no_msg(u);
+               if (VG_(disassemble)) 
+                  VG_(printf)("at %d: delete TESTV on defd arg\n", i);
+            }
+            break;
+
+         /* Applies to both values and tags.  Propagate Definedness
+            property through copies.  Note that this isn't optional;
+            we *have* to do this to keep def[] correct. */
+         case MOV:
+            vg_assert(u->tag2 == TempReg);
+            if (u->tag1 == TempReg) {
+               if (VGC_IS_SHADOW(u->val1)) {
+                  vg_assert(VGC_IS_SHADOW(u->val2));
+                  def[u->val2] = def[u->val1];
+               }
+            }
+            break;
+
+         case PUTV:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            if (def[u->val1] <= 4) {
+               vg_assert(def[u->val1] == u->size);
+               u->tag1 = Literal;
+               u->val1 = 0;
+               switch (u->size) {
+                  case 4: u->lit32 = 0x00000000; break;
+                  case 2: u->lit32 = 0xFFFF0000; break;
+                  case 1: u->lit32 = 0xFFFFFF00; break;
+                  default: VG_(panic)("vg_cleanup(PUTV)");
+               }
+               if (VG_(disassemble)) 
+                  VG_(printf)(
+                     "at %d: propagate definedness into PUTV\n", i);
+            }
+            break;
+
+         case STOREV:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            if (def[u->val1] <= 4) {
+               vg_assert(def[u->val1] == u->size);
+               u->tag1 = Literal;
+               u->val1 = 0;
+               switch (u->size) {
+                  case 4: u->lit32 = 0x00000000; break;
+                  case 2: u->lit32 = 0xFFFF0000; break;
+                  case 1: u->lit32 = 0xFFFFFF00; break;
+                  default: VG_(panic)("vg_cleanup(STOREV)");
+               }
+               if (VG_(disassemble)) 
+                  VG_(printf)(
+                     "at %d: propagate definedness into STandV\n", i);
+            }
+            break;
+
+         /* Nothing interesting we can do with this, I think. */
+         case PUTVF:
+            break;
+
+         /* Tag handling operations. */
+         case TAG2:
+            vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
+            vg_assert(u->tag3 == Lit16);
+            /* Ultra-paranoid "type" checking. */
+            switch (u->val3) {
+               case VgT_ImproveAND4_TQ: case VgT_ImproveAND2_TQ:
+               case VgT_ImproveAND1_TQ: case VgT_ImproveOR4_TQ:
+               case VgT_ImproveOR2_TQ: case VgT_ImproveOR1_TQ:
+                  vg_assert(u->tag1 == TempReg && !VGC_IS_SHADOW(u->val1));
+                  break;
+               default:
+                  vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+                  break;
+            }
+            switch (u->val3) {
+               Int sz;
+               case VgT_UifU4: 
+                  sz = 4; goto do_UifU;
+               case VgT_UifU2: 
+                  sz = 2; goto do_UifU;
+               case VgT_UifU1:
+                  sz = 1; goto do_UifU;
+               case VgT_UifU0:
+                  sz = 0; goto do_UifU;
+               do_UifU:
+                  vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+                  vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
+                  if (def[u->val1] <= 4) {
+                     /* UifU.  The first arg is defined, so result is
+                        simply second arg.  Delete this operation. */
+                     vg_assert(def[u->val1] == sz);
+                     NOP_no_msg(u);
+                     if (VG_(disassemble)) 
+                        VG_(printf)(
+                           "at %d: delete UifU%d due to defd arg1\n", 
+                           i, sz);
+                  }
+                  else 
+                  if (def[u->val2] <= 4) {
+                     /* UifU.  The second arg is defined, so result is
+                        simply first arg.  Copy to second. */
+                     vg_assert(def[u->val2] == sz);
+                     u->opcode = MOV; 
+                     u->size = 4;
+                     u->tag3 = NoValue;
+                     def[u->val2] = def[u->val1];
+                     if (VG_(disassemble)) 
+                        VG_(printf)(
+                           "at %d: change UifU%d to MOV due to defd"
+                           " arg2\n", 
+                           i, sz);
+                  }
+                  break;
+               case VgT_ImproveAND4_TQ:
+                  sz = 4; goto do_ImproveAND;
+               case VgT_ImproveAND1_TQ:
+                  sz = 1; goto do_ImproveAND;
+               do_ImproveAND:
+                  /* Implements Q = T OR Q.  So if Q is entirely defined,
+                     ie all 0s, we get MOV T, Q. */
+		  if (def[u->val2] <= 4) {
+                     vg_assert(def[u->val2] == sz);
+                     u->size = 4; /* Regardless of sz */
+                     u->opcode = MOV;
+                     u->tag3 = NoValue;
+                     def[u->val2] = VGC_UNDEF;
+                     if (VG_(disassemble)) 
+                        VG_(printf)(
+                            "at %d: change ImproveAND%d_TQ to MOV due "
+                            "to defd arg2\n", 
+                            i, sz);
+                  }
+                  break;
+               default: 
+                  goto unhandled;
+            }
+            break;
+
+         case TAG1:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            if (def[u->val1] > 4) break;
+            /* We now know that the arg to the op is entirely defined.
+               If the op changes the size of the arg, we must replace
+               it with a SETV at the new size.  If it doesn't change
+               the size, we can delete it completely. */
+            switch (u->val3) {
+               /* Maintain the same size ... */
+               case VgT_Left4: 
+                  vg_assert(def[u->val1] == 4);
+                  NOP_tag1_op(u);
+                  break;
+               case VgT_PCast11: 
+                  vg_assert(def[u->val1] == 1);
+                  NOP_tag1_op(u);
+                  break;
+               /* Change size ... */
+               case VgT_PCast40: 
+                  vg_assert(def[u->val1] == 4);
+                  SETV_tag1_op(u,0);
+                  def[u->val1] = 0;
+                  break;
+               case VgT_PCast14: 
+                  vg_assert(def[u->val1] == 1);
+                  SETV_tag1_op(u,4);
+                  def[u->val1] = 4;
+                  break;
+               case VgT_PCast12: 
+                  vg_assert(def[u->val1] == 1);
+                  SETV_tag1_op(u,2);
+                  def[u->val1] = 2;
+                  break;
+               case VgT_PCast10: 
+                  vg_assert(def[u->val1] == 1);
+                  SETV_tag1_op(u,0);
+                  def[u->val1] = 0;
+                  break;
+               case VgT_PCast02: 
+                  vg_assert(def[u->val1] == 0);
+                  SETV_tag1_op(u,2);
+                  def[u->val1] = 2;
+                  break;
+               default: 
+                  goto unhandled;
+            }
+            if (VG_(disassemble)) 
+               VG_(printf)(
+                  "at %d: delete TAG1 %s due to defd arg\n",
+                  i, VG_(nameOfTagOp(u->val3)));
+            break;
+
+         default:
+         unhandled:
+            /* We don't know how to handle this uinstr.  Be safe, and 
+               set to VGC_VALUE or VGC_UNDEF all temps written by it. */
+            k = getTempUsage(u, &tempUse[0]);
+            vg_assert(k <= 3);
+            for (j = 0; j < k; j++) {
+               t = tempUse[j].tempNo;
+               vg_assert(t >= 0 && t < n_temps);
+               if (!tempUse[j].isWrite) {
+                  /* t is read; ignore it. */
+                  if (0&& VGC_IS_SHADOW(t) && def[t] <= 4)
+                     VG_(printf)("ignoring def %d at %s %s\n", 
+                                 def[t], 
+                                 VG_(nameUOpcode)(True, u->opcode),
+                                 (u->opcode == TAG1 || u->opcode == TAG2)
+                                    ? VG_(nameOfTagOp)(u->val3) 
+                                    : (Char*)"");
+               } else {
+                  /* t is written; better nullify it. */
+                  def[t] = VGC_IS_SHADOW(t) ? VGC_UNDEF : VGC_VALUE;
+               }
+            }
+      }
+   }
+
+   VG_(jitfree)(def);
+}
+
+
+/* Top level post-instrumentation cleanup function. */
+static void vg_cleanup ( UCodeBlock* cb )
+{
+   vg_propagate_definedness ( cb );
+   vg_delete_redundant_SETVs ( cb );
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Main entry point for the JITter.                     ---*/
+/*------------------------------------------------------------*/
+
+/* Translate the basic block beginning at orig_addr, placing the
+   translation in a vg_malloc'd block, the address and size of which
+   are returned in trans_addr and trans_size.  Length of the original
+   block is also returned in orig_size.  If the latter three are NULL,
+   this call is being done for debugging purposes, in which case (a)
+   throw away the translation once it is made, and (b) produce a load
+   of debugging output. 
+*/
+void VG_(translate) ( Addr  orig_addr,
+                      UInt* orig_size,
+                      Addr* trans_addr,
+                      UInt* trans_size )
+{
+   Int         n_disassembled_bytes, final_code_size;
+   Bool        debugging_translation;
+   UChar*      final_code;
+   UCodeBlock* cb;
+
+   VGP_PUSHCC(VgpTranslate);
+   debugging_translation
+      = orig_size == NULL || trans_addr == NULL || trans_size == NULL;
+
+   dis = True;
+   dis = debugging_translation;
+
+   /* Check if we're being asked to jump to a silly address, and if so
+      record an error message before potentially crashing the entire
+      system. */
+   if (VG_(clo_instrument) && !debugging_translation && !dis) {
+      Addr bad_addr;
+      Bool ok = VGM_(check_readable) ( orig_addr, 1, &bad_addr );
+      if (!ok) {
+         VG_(record_jump_error)(bad_addr);
+      }
+   }
+
+   /* if (VG_(overall_in_count) >= 4800) dis=True; */
+   if (VG_(disassemble))
+      VG_(printf)("\n");
+   if (0 || dis 
+       || (VG_(overall_in_count) > 0 &&
+           (VG_(overall_in_count) % 1000 == 0))) {
+      if (0&& (VG_(clo_verbosity) > 1 || dis))
+         VG_(message)(Vg_UserMsg,
+              "trans# %d, bb# %lu, in %d, out %d",
+              VG_(overall_in_count), 
+              VG_(bbs_done),
+              VG_(overall_in_osize), VG_(overall_in_tsize),
+              orig_addr );
+   }
+   cb = allocCodeBlock();
+
+   /* Disassemble this basic block into cb. */
+   VGP_PUSHCC(VgpToUCode);
+   n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
+   VGP_POPCC;
+   /* dis=True; */
+   /* if (0&& VG_(translations_done) < 617)  */
+   /*    dis=False; */
+   /* Try and improve the code a bit. */
+   if (VG_(clo_optimise)) {
+      VGP_PUSHCC(VgpImprove);
+      vg_improve ( cb );
+      if (VG_(disassemble)) 
+         VG_(ppUCodeBlock) ( cb, "Improved code:" );
+      VGP_POPCC;
+   }
+   /* dis=False; */
+   /* Add instrumentation code. */
+   if (VG_(clo_instrument)) {
+      VGP_PUSHCC(VgpInstrument);
+      cb = vg_instrument(cb);
+      VGP_POPCC;
+      if (VG_(disassemble)) 
+         VG_(ppUCodeBlock) ( cb, "Instrumented code:" );
+      if (VG_(clo_cleanup)) {
+         VGP_PUSHCC(VgpCleanup);
+         vg_cleanup(cb);
+         VGP_POPCC;
+         if (VG_(disassemble)) 
+            VG_(ppUCodeBlock) ( cb, "Cleaned-up instrumented code:" );
+      }
+   }
+
+   /* Allocate registers. */
+   VGP_PUSHCC(VgpRegAlloc);
+   cb = vg_do_register_allocation ( cb );
+   VGP_POPCC;
+   /* dis=False; */
+   /* 
+   if (VG_(disassemble))
+      VG_(ppUCodeBlock) ( cb, "After Register Allocation:");
+   */
+
+   VGP_PUSHCC(VgpFromUcode);
+   /* NB final_code is allocated with VG_(jitmalloc), not VG_(malloc)
+      and so must be VG_(jitfree)'d. */
+   final_code = VG_(emit_code)(cb, &final_code_size );
+   VGP_POPCC;
+   freeCodeBlock(cb);
+
+   if (debugging_translation) {
+      /* Only done for debugging -- throw away final result. */
+      VG_(jitfree)(final_code);
+   } else {
+      /* Doing it for real -- return values to caller. */
+      *orig_size = n_disassembled_bytes;
+      *trans_addr = (Addr)final_code;
+      *trans_size = final_code_size;
+   }
+   VGP_POPCC;
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                           vg_translate.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_transtab.c b/vg_transtab.c
new file mode 100644
index 000000000..b93fe40df
--- /dev/null
+++ b/vg_transtab.c
@@ -0,0 +1,693 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Management of the translation table and cache.               ---*/
+/*---                                                vg_transtab.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+#include "vg_constants.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Management of the LRU-based translation table+cache. ---*/
+/*------------------------------------------------------------*/
+
+/* These sizes were set up so as to be able to debug large KDE 3
+   applications (are there any small ones?) without excessive amounts
+   of code retranslation.  */
+
+/* Size of the translation cache, in bytes. */
+#define VG_TC_SIZE 16000000 
+
+/* Do a LRU pass when the translation cache becomes this full. */
+#define VG_TC_LIMIT_PERCENT 95
+
+/* When doing an LRU pass, reduce TC fullness to this level. */
+#define VG_TC_TARGET_PERCENT 85
+
+/* Number of entries in the translation table.  This must be a prime
+   number in order to make the hashing work properly. */
+#define VG_TT_SIZE /*19997*/ /*29989*/ /*50497*/ /*75083*/ 100129
+
+/* Do an LRU pass when the translation table becomes this full. */
+#define VG_TT_LIMIT_PERCENT /*67*/ 80
+
+/* When doing an LRU pass, reduce TT fullness to this level. */
+#define VG_TT_TARGET_PERCENT /*60*/ 70
+
+/* The number of age steps we track.  0 means the current epoch,
+   N_EPOCHS-1 means used the epoch N_EPOCHS-1 or more ago.  */
+#define VG_N_EPOCHS /*2000*/ 4000
+
+/* This TT entry is empty. */
+#define VG_TTE_EMPTY   ((Addr)1)
+/* This TT entry has been deleted. */
+#define VG_TTE_DELETED ((Addr)3)
+
+/* The TC.  This used to be statically allocated, but that forces many
+   SecMap arrays to be pointlessly allocated at startup, bloating the
+   process size by about 22M and making startup slow.  So now we
+   dynamically allocate it at startup time.
+   was: static UChar vg_tc[VG_TC_SIZE];
+*/
+static UChar* vg_tc = NULL;
+
+/* Count of bytes used in the TC. */
+static Int vg_tc_used = 0;
+
+/* The TT.  Like TC, for the same reason, is dynamically allocated at
+   startup. 
+   was: static TTEntry vg_tt[VG_TT_SIZE];
+*/
+static TTEntry* vg_tt = NULL;
+
+/* Count of non-empty, non-deleted TT entries. */
+static Int vg_tt_used = 0;
+
+/* Fast helper for the TT.  A direct-mapped cache which holds a
+   pointer to a TT entry which may or may not be the correct one, but
+   which we hope usually is.  This array is referred to directly from
+   vg_dispatch.S. */
+Addr VG_(tt_fast)[VG_TT_FAST_SIZE];
+
+/* For reading/writing the misaligned TT-index word at immediately
+   preceding every translation in TC. */
+#define VG_READ_MISALIGNED_WORD(aaa) (*((UInt*)(aaa)))
+#define VG_WRITE_MISALIGNED_WORD(aaa,vvv) *((UInt*)(aaa)) = ((UInt)(vvv))
+
+/* Used for figuring out an age threshold for translations. */
+static Int vg_bytes_in_epoch[VG_N_EPOCHS];
+static Int vg_entries_in_epoch[VG_N_EPOCHS];
+
+
+/* Just so these counts can be queried without making them globally
+   visible. */
+void VG_(get_tt_tc_used) ( UInt* tt_used, UInt* tc_used )
+{
+   *tt_used = vg_tt_used;
+   *tc_used = vg_tc_used;
+}
+
+
+/* Do the LRU thing on TT/TC, clearing them back to the target limits
+   if they are over the threshold limits. 
+*/
+void VG_(maybe_do_lru_pass) ( void )
+{
+   Int i, j, r, w, thresh, ttno;
+   TTEntry* tte;
+
+   const Int tc_limit  = (Int)((VG_TC_SIZE * VG_TC_LIMIT_PERCENT) / 100.0);
+   const Int tt_limit  = (Int)((VG_TT_SIZE * VG_TT_LIMIT_PERCENT) / 100.0);
+   const Int tc_target = (Int)((VG_TC_SIZE * VG_TC_TARGET_PERCENT) / 100.0);
+   const Int tt_target = (Int)((VG_TT_SIZE * VG_TT_TARGET_PERCENT) / 100.0);
+
+   /* Decide quickly if we need to do an LRU pass ? */
+   if (vg_tc_used <= tc_limit && vg_tt_used <= tt_limit)
+      return;
+
+   VGP_PUSHCC(VgpDoLRU);
+   /*   
+   VG_(printf)(
+      "limits: tc_limit %d, tt_limit %d, tc_target %d, tt_target %d\n",
+      tc_limit, tt_limit, tc_target, tt_target);
+   */
+
+   if (VG_(clo_verbosity) > 2)
+      VG_(printf)(" pre-LRU: tc %d (target %d),  tt %d (target %d)\n",
+	          vg_tc_used, tc_target, vg_tt_used, tt_target);
+
+   /* Yes we do.  Figure out what threshold age is required in order to
+      shrink both the TC and TT occupancy below TC_TARGET_PERCENT and
+      TT_TARGET_PERCENT respectively. */
+
+   VG_(number_of_lrus)++;
+
+   /* Count the number of TC bytes and TT entries in each epoch. */
+   for (i = 0; i < VG_N_EPOCHS; i++)
+      vg_bytes_in_epoch[i] = vg_entries_in_epoch[i] = 0;
+
+   for (i = 0; i < VG_TT_SIZE; i++) {
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY || 
+          vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      j = vg_tt[i].mru_epoch;
+      vg_assert(j <= VG_(current_epoch));
+      j = VG_(current_epoch) - j;
+      if (j >= VG_N_EPOCHS) j = VG_N_EPOCHS-1;
+      vg_assert(0 <= j && j < VG_N_EPOCHS);
+      /* Greater j now means older. */
+      vg_entries_in_epoch[j]++;
+      vg_bytes_in_epoch[j] += 4+vg_tt[i].trans_size;
+   }
+
+   /*
+   for (i = 0; i < VG_N_EPOCHS; i++)
+      VG_(printf)("epoch %d: ents %d, bytes %d\n", 
+                  i, vg_entries_in_epoch[i], vg_bytes_in_epoch[i]);
+   */
+
+   /* Cumulatise.  Make vg_{bytes,entries}_in_epoch[n] contain the
+      counts for itself and all younger epochs. */
+   for (i = 1; i < VG_N_EPOCHS; i++) {
+      vg_entries_in_epoch[i] += vg_entries_in_epoch[i-1];
+      vg_bytes_in_epoch[i] += vg_bytes_in_epoch[i-1];
+   }
+
+   for (thresh = 0; thresh < VG_N_EPOCHS; thresh++) {
+      if (vg_entries_in_epoch[thresh] > tt_target 
+          || vg_bytes_in_epoch[thresh] >= tc_target)
+         break;
+   }
+
+   if (VG_(clo_verbosity) > 2)
+      VG_(printf)(
+         "     LRU: discard translations %d or more epochs since last use\n",
+         thresh
+      );
+
+   thresh = VG_(current_epoch) - thresh;
+
+   /* Ok, so we will hit our targets if we retain all entries most
+      recently used at most thresh epochs ago.  Traverse the TT and
+      mark such entries as deleted. */
+   for (i = 0; i < VG_TT_SIZE; i++) {
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY || 
+         vg_tt[i].orig_addr == VG_TTE_DELETED) continue;
+      if (vg_tt[i].mru_epoch <= thresh) {
+         vg_tt[i].orig_addr = VG_TTE_DELETED;
+         vg_tt_used--;
+	 VG_(this_epoch_out_count) ++;
+	 VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
+	 VG_(this_epoch_out_tsize) += vg_tt[i].trans_size;
+	 VG_(overall_out_count) ++;
+	 VG_(overall_out_osize) += vg_tt[i].orig_size;
+	 VG_(overall_out_tsize) += vg_tt[i].trans_size;
+      }
+   }
+
+   vg_assert(vg_tt_used >= 0);
+   vg_assert(vg_tt_used <= tt_target);
+
+   /* Now compact the TC, sliding live entries downwards to fill spaces
+      left by deleted entries.  In this loop, r is the offset in TC of
+      the current translation under consideration, and w is the next
+      allocation point. */
+   r = w = 0;
+   while (True) {
+      if (r >= vg_tc_used) break;
+      /* The first four bytes of every translation contain the index
+         of its TT entry.  The TT entry's .trans_addr field points at
+         the start of the code proper, not at this 4-byte index, so
+         that we don't constantly have to keep adding 4 in the main
+         lookup/dispatch loop. */
+      ttno = VG_READ_MISALIGNED_WORD(&vg_tc[r]);
+      vg_assert(ttno >= 0 && ttno < VG_TT_SIZE);
+      tte = & vg_tt[ ttno ];
+      vg_assert(tte->orig_addr != VG_TTE_EMPTY);
+      if (tte->orig_addr != VG_TTE_DELETED) {
+         /* We want to keep this one alive. */
+         /* Sanity check the pointer back to TC. */
+         vg_assert(tte->trans_addr == (Addr)&vg_tc[r+4]);
+         for (i = 0; i < 4+tte->trans_size; i++)
+            vg_tc[w+i] = vg_tc[r+i];
+         tte->trans_addr = (Addr)&vg_tc[w+4];
+         w += 4+tte->trans_size;
+      }
+      r += 4+tte->trans_size;
+   }
+   /* should have traversed an exact number of translations, with no
+      slop at the end. */
+   vg_assert(w <= r);
+   vg_assert(r == vg_tc_used);
+   vg_assert(w <= r);
+   vg_assert(w <= tc_target);
+   vg_tc_used = w;
+
+   /* Invalidate the fast cache, since it is now out of date.  It will get
+      reconstructed incrementally when the client resumes. */
+   VG_(invalidate_tt_fast)();
+
+   if (VG_(clo_verbosity) > 2)
+      VG_(printf)("post-LRU: tc %d (target %d),  tt %d (target %d)\n",
+	          vg_tc_used, tc_target, vg_tt_used, tt_target);
+
+   if (VG_(clo_verbosity) > 1)
+      VG_(message)(Vg_UserMsg,   
+         "epoch %d (bb %luk): thresh %d, "
+         "out %d (%dk -> %dk), new TT %d, TC %dk",
+         VG_(current_epoch), 
+         VG_(bbs_done) / 1000,
+         VG_(current_epoch) - thresh, 
+         VG_(this_epoch_out_count),
+         VG_(this_epoch_out_osize) / 1000,
+         VG_(this_epoch_out_tsize) / 1000,
+         vg_tt_used, vg_tc_used / 1000
+      );
+
+   /* Reconstruct the SMC detection structures. */
+
+   VGP_POPCC;
+}
+
+
+/* Do a sanity check on TT/TC.
+*/
+void VG_(sanity_check_tc_tt) ( void )
+{
+   Int      i, counted_entries, counted_bytes;
+   TTEntry* tte;
+   counted_entries = 0;
+   counted_bytes   = 0;
+   for (i = 0; i < VG_TT_SIZE; i++) {
+      tte = &vg_tt[i];
+      if (tte->orig_addr == VG_TTE_EMPTY) continue;
+      if (tte->orig_addr == VG_TTE_DELETED) continue;
+      vg_assert(tte->mru_epoch >= 0);
+      vg_assert(tte->mru_epoch <= VG_(current_epoch));
+      counted_entries++;
+      counted_bytes += 4+tte->trans_size;
+      vg_assert(tte->trans_addr >= (Addr)&vg_tc[4]);
+      vg_assert(tte->trans_addr < (Addr)&vg_tc[vg_tc_used]);
+      vg_assert(VG_READ_MISALIGNED_WORD(tte->trans_addr-4) == i);
+   }
+   vg_assert(counted_entries == vg_tt_used);
+   vg_assert(counted_bytes == vg_tc_used);
+}
+
+
+/* Add this already-filled-in entry to the TT.  Assumes that the
+   relevant code chunk has been placed in TC, along with a dummy back
+   pointer, which is inserted here.  
+*/
+extern void VG_(add_to_trans_tab) ( TTEntry* tte )
+{
+   Int i;
+   /*
+   VG_(printf)("add_to_trans_tab(%d) %x %d %x %d\n",
+               vg_tt_used, tte->orig_addr, tte->orig_size, 
+               tte->trans_addr, tte->trans_size);
+   */
+   vg_assert(tte->orig_addr != VG_TTE_DELETED 
+             && tte->orig_addr != VG_TTE_EMPTY);
+   /* Hash to get initial probe point. */
+   i = ((UInt)(tte->orig_addr)) % VG_TT_SIZE;
+   while (True) {
+      if (vg_tt[i].orig_addr == tte->orig_addr)
+         VG_(panic)("add_to_trans_tab: duplicate");
+      if (vg_tt[i].orig_addr == VG_TTE_DELETED ||
+          vg_tt[i].orig_addr == VG_TTE_EMPTY) {
+         /* Put it here, and set the back pointer. */
+         vg_tt[i] = *tte;
+         VG_WRITE_MISALIGNED_WORD(tte->trans_addr-4, i);
+         vg_tt_used++;
+         return;
+      }
+      i++;
+      if (i == VG_TT_SIZE) i = 0;
+   }
+}
+
+
+/* Copy a new translation's code into TC, leaving a 4-byte hole for
+   the back pointer, and returning a pointer to the code proper (not
+   the hole) in TC. 
+*/
+Addr VG_(copy_to_transcache) ( Addr trans_addr, Int trans_size )
+{
+   Int i;
+   Addr ret_addr;
+   if (4+trans_size > VG_TC_SIZE-vg_tc_used)
+      VG_(panic)("copy_to_transcache: not enough free space?!");
+   /* Leave a hole for the back pointer to the TT entry. */
+   vg_tc_used += 4;
+   ret_addr = (Addr)&vg_tc[vg_tc_used];
+   for (i = 0; i < trans_size; i++)
+      vg_tc[vg_tc_used+i] = ((UChar*)trans_addr)[i];
+   vg_tc_used += trans_size;
+   return ret_addr;
+}
+
+
+/* Invalidate the tt_fast cache, for whatever reason.  Tricky.  We
+   have to find a TTE_EMPTY slot to point all entries at. */
+void VG_(invalidate_tt_fast)( void )
+{
+   Int i, j;
+   for (i = 0; i < VG_TT_SIZE && vg_tt[i].orig_addr != VG_TTE_EMPTY; i++)
+      ;
+   vg_assert(i < VG_TT_SIZE 
+             && vg_tt[i].orig_addr == VG_TTE_EMPTY);
+   for (j = 0; j < VG_TT_FAST_SIZE; j++)
+      VG_(tt_fast)[j] = (Addr)&vg_tt[i];
+}
+
+
+/* Search TT to find the translated address of the supplied original,
+   or NULL if not found.  This routine is used when we miss in
+   VG_(tt_fast). 
+*/
+static __inline__ TTEntry* search_trans_table ( Addr orig_addr )
+{
+  //static Int queries = 0;
+  //static Int probes = 0;
+   Int i;
+   /* Hash to get initial probe point. */
+   //   if (queries == 10000) {
+   //  VG_(printf)("%d queries, %d probes\n", queries, probes);
+   //  queries = probes = 0;
+   //}
+   //queries++;
+   i = ((UInt)orig_addr) % VG_TT_SIZE;
+   while (True) {
+     //probes++;
+      if (vg_tt[i].orig_addr == orig_addr)
+         return &vg_tt[i];
+      if (vg_tt[i].orig_addr == VG_TTE_EMPTY)
+         return NULL;
+      i++;
+      if (i == VG_TT_SIZE) i = 0;
+   }
+}
+
+
+/* Find the translation address for a given (original) code address.
+   If found, update VG_(tt_fast) so subsequent lookups are fast.  If
+   no translation can be found, return zero.  This routine is (the
+   only one) called from vg_run_innerloop.  */
+Addr VG_(search_transtab) ( Addr original_addr )
+{
+   TTEntry* tte;
+   VGP_PUSHCC(VgpSlowFindT);
+   tte = search_trans_table ( original_addr );
+   if (tte == NULL) {
+      /* We didn't find it.  vg_run_innerloop will have to request a
+         translation. */
+      VGP_POPCC;
+      return (Addr)0;
+   } else {
+      /* Found it.  Put the search result into the fast cache now.
+         Also set the mru_epoch to mark this translation as used. */
+      UInt cno = (UInt)original_addr & VG_TT_FAST_MASK;
+      VG_(tt_fast)[cno] = (Addr)tte;
+      VG_(tt_fast_misses)++;
+      tte->mru_epoch = VG_(current_epoch);
+      VGP_POPCC;
+      return tte->trans_addr;
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Detecting and handling self-modifying code.          ---*/
+/*------------------------------------------------------------*/
+
+/* This mechanism uses two data structures:
+
+   vg_oldmap -- array[64k] of Bool, which approximately records
+   parts of the address space corresponding to code for which
+   a translation exists in the translation table.  vg_oldmap is
+   consulted at each write, to determine whether that write might
+   be writing a code address; if so, the program is stopped at 
+   the next jump, and the corresponding translations are invalidated.
+
+   Precise semantics: vg_oldmap[(a >> 8) & 0xFFFF] is true for all
+   addresses a containing a code byte which has been translated.  So
+   it acts kind-of like a direct-mapped cache with 64k entries.
+
+   The second structure is vg_CAW, a small array of addresses at which
+   vg_oldmap indicates a code write may have happened.  This is
+   (effectively) checked at each control transfer (jump), so that
+   translations can be discarded before going on.  An array is
+   somewhat overkill, since it strikes me as very unlikely that a
+   single basic block will do more than one code write.  Nevertheless
+   ...  
+
+   ToDo: make this comment up-to-date.
+*/
+
+
+/* Definitions for the self-modifying-code detection cache, intended
+   as a fast check which clears the vast majority of writes.  */
+
+#define VG_SMC_CACHE_HASH(aaa) \
+   ((((UInt)a) >> VG_SMC_CACHE_SHIFT) & VG_SMC_CACHE_MASK)
+
+Bool VG_(smc_cache)[VG_SMC_CACHE_SIZE];
+
+
+/* Definitions for the fallback mechanism, which, more slowly,
+   provides a precise record of which words in the address space
+   belong to original code. */
+
+typedef struct { UChar chars[2048]; } VgSmcSecondary;
+
+static VgSmcSecondary* vg_smc_primary[65536];
+
+static VgSmcSecondary* vg_smc_new_secondary ( void )
+{
+   Int i;
+   VgSmcSecondary* sec 
+      = VG_(malloc) ( VG_AR_PRIVATE, sizeof(VgSmcSecondary) );
+   for (i = 0; i < 2048; i++)
+      sec->chars[i] = 0;
+   return sec;
+}
+
+#define GET_BIT_ARRAY(arr,indx)                      \
+   (1 & (  ((UChar*)arr)[((UInt)indx) / 8]           \
+           >> ( ((UInt)indx) % 8) ) )
+
+#define SET_BIT_ARRAY(arr,indx)                      \
+   ((UChar*)arr)[((UInt)indx) / 8] |= (1 << ((UInt)indx) % 8)
+
+
+/* Finally, a place to record the original-code-write addresses
+   detected in a basic block. */
+
+#define VG_ORIGWRITES_SIZE 10
+
+static Addr vg_origwrites[VG_ORIGWRITES_SIZE];
+static Int  vg_origwrites_used;
+
+
+/* Call here to check a written address. */
+
+void VG_(smc_check4) ( Addr a )
+{
+   UInt bit_index;
+   VgSmcSecondary* smc_secondary;
+
+#  if VG_SMC_FASTCHECK_IN_C
+   VG_(smc_total_check4s)++;
+
+   /* Try the fast check first. */
+   if (VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] == False) return;
+#  endif
+
+   VG_(smc_cache_passed)++;
+
+   /* Need to do a slow check. */
+   smc_secondary = vg_smc_primary[a >> 16];
+   if (smc_secondary == NULL) return;
+
+   bit_index = (a & 0xFFFF) >> 2;
+   if (GET_BIT_ARRAY(smc_secondary->chars, bit_index) == 0) return;
+
+   VG_(smc_fancy_passed)++;
+
+   /* Detected a Real Live write to code which has been translated.
+      Note it. */
+   if (vg_origwrites_used == VG_ORIGWRITES_SIZE)
+      VG_(panic)("VG_ORIGWRITES_SIZE is too small; "
+                 "increase and recompile.");
+   vg_origwrites[vg_origwrites_used] = a;
+   vg_origwrites_used++;
+
+   VG_(message)(Vg_DebugMsg, "self-modifying-code write at %p", a);
+
+   /* Force an exit before the next basic block, so the translation
+      cache can be flushed appropriately. */
+   VG_(dispatch_ctr_SAVED) = VG_(dispatch_ctr);
+   VG_(dispatch_ctr)       = 1;
+   VG_(interrupt_reason)   = VG_Y_SMC;
+}
+
+
+/* Mark an address range as containing an original translation,
+   updating both the fast-check cache and the slow-but-correct data
+   structure.  
+*/
+void VG_(smc_mark_original) ( Addr orig_addr, Int orig_size )
+{
+   Addr a;
+   VgSmcSecondary* smc_secondary;
+   UInt bit_index;
+
+   for (a = orig_addr; a < orig_addr+orig_size; a++) {
+
+      VG_(smc_cache)[VG_SMC_CACHE_HASH(a)] = True;
+
+      smc_secondary = vg_smc_primary[a >> 16];
+      if (smc_secondary == NULL)
+         smc_secondary = 
+         vg_smc_primary[a >> 16] = vg_smc_new_secondary();
+
+      bit_index = (a & 0xFFFF) >> 2;
+      SET_BIT_ARRAY(smc_secondary->chars, bit_index);      
+   }
+}
+
+
+/* Discard any translations whose original code overlaps with the
+   range w_addr .. w_addr+3 inclusive. 
+*/
+__attribute__ ((unused))
+static void discard_translations_bracketing ( Addr w_addr )
+{
+#  if 0
+   Int      i, rd, wr;
+   Addr     o_start, o_end;
+   TTEntry* tt;
+
+   for (i = 0; i < VG_TRANSTAB_SLOW_SIZE; i++) {
+      tt = vg_transtab[i];
+      wr = 0;
+      for (rd = 0; rd < vg_transtab_used[i]; rd++) {
+         o_start = tt[rd].orig_addr;
+         o_end   = o_start + tt[rd].orig_size;
+         if (w_addr > o_end || (w_addr+3) < o_start) {
+            /* No collision possible; keep this translation */
+            VG_(smc_mark_original) ( tt[rd].orig_addr, tt[rd].orig_size );
+            if (wr < rd) vg_transtab[wr] = vg_transtab[rd];
+            wr++;
+	 } else {
+            /* Possible collision; discard. */
+            vg_smc_discards++;
+            VG_(message) (Vg_DebugMsg, 
+                             "discarding translation of %p .. %p",
+                             tt[rd].orig_addr, 
+                             tt[rd].orig_addr + tt[rd].orig_size - 1);
+            VG_(free)((void*)tt[rd].trans_addr);
+         }         
+      }
+      vg_transtab_used[i] = wr;
+   }
+#  endif   
+}
+
+
+/* Top-level function in charge of discarding out-of-date translations
+   following the discovery of a (potential) original-code-write. 
+*/
+void VG_(flush_transtab) ( void )
+{
+#  if 0
+   Addr w_addr;
+   Int  i, j;
+
+   /* We shouldn't be here unless a code write was detected. */
+   vg_assert(vg_origwrites_used > 0);
+
+   /* Instead of incrementally fixing up the translation table cache,
+      just invalidate the whole darn thing.  Pray this doesn't happen
+      very often :) */
+   for (i = 0; i < VG_TRANSTAB_CACHE_SIZE; i++)
+      VG_(transtab_cache_orig)[i] = 
+      VG_(transtab_cache_trans)[i] = (Addr)0;
+
+   /* Clear out the fast cache; discard_translations_bracketing
+      reconstructs it. */
+   for (i = 0; i < VG_SMC_CACHE_SIZE; i++) 
+      VG_(smc_cache)[i] = False;
+
+   /* And also clear the slow-but-correct table. */
+   for (i = 0; i < 65536; i++) {
+      VgSmcSecondary* sec = vg_smc_primary[i];
+      if (sec)
+         for (j = 0; j < 2048; j++)
+            sec->chars[j] = 0;         
+   }
+
+   /* This doesn't need to be particularly fast, since we (presumably)
+      don't have to handle particularly frequent writes to code
+      addresses. */
+   while (vg_origwrites_used > 0) {
+      vg_origwrites_used--;
+      w_addr = vg_origwrites[vg_origwrites_used];
+      discard_translations_bracketing ( w_addr );
+   }
+
+   vg_assert(vg_origwrites_used == 0);
+#  endif
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Initialisation.                                      ---*/
+/*------------------------------------------------------------*/
+
+void VG_(init_transtab_and_SMC) ( void )
+{
+   Int i;
+
+   /* Allocate the translation table and translation cache. */
+   vg_assert(vg_tc == NULL);
+   vg_tc = VG_(get_memory_from_mmap) ( VG_TC_SIZE * sizeof(UChar) );
+   vg_assert(vg_tc != NULL);
+
+   vg_assert(vg_tt == NULL);
+   vg_tt = VG_(get_memory_from_mmap) ( VG_TT_SIZE * sizeof(TTEntry) );
+   vg_assert(vg_tt != NULL);
+
+   /* The main translation table is empty. */
+   vg_tt_used = 0;
+   for (i = 0; i < VG_TT_SIZE; i++) {
+      vg_tt[i].orig_addr = VG_TTE_EMPTY;
+   }
+
+   /* The translation table's fast cache is empty.  Point all entries
+      at the first TT entry, which is, of course, empty. */
+   for (i = 0; i < VG_TT_FAST_SIZE; i++)
+      VG_(tt_fast)[i] = (Addr)(&vg_tt[0]);
+
+   /* No part of the address space has any translations. */
+   for (i = 0; i < 65536; i++)
+      vg_smc_primary[i] = NULL;
+
+   /* ... and the associated fast-check cache reflects this. */
+   for (i = 0; i < VG_SMC_CACHE_SIZE; i++) 
+      VG_(smc_cache)[i] = False;
+
+   /* Finally, no original-code-writes have been recorded. */
+   vg_origwrites_used = 0;
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_transtab.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_unsafe.h b/vg_unsafe.h
new file mode 100644
index 000000000..9fa91539a
--- /dev/null
+++ b/vg_unsafe.h
@@ -0,0 +1,86 @@
+
+/*--------------------------------------------------------------------*/
+/*--- A header file for making sense of syscalls.  Unsafe in the   ---*/
+/*--- sense that we don't call any functions mentioned herein.     ---*/
+/*---                                                  vg_unsafe.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+
+/* These includes are only used for making sense of the args for
+   system calls. */
+#include <asm/unistd.h>   /* for system call numbers */
+#include <sys/mman.h>     /* for PROT_* */
+#include <sys/utsname.h>  /* for uname */
+#include <sys/time.h>     /* for struct timeval & struct timezone */
+#include <linux/net.h>    /* for the SYS_* constants */
+#include <sys/resource.h> /* for struct rlimit */
+#include <linux/shm.h>    /* for struct shmid_ds & struct ipc_perm */
+#include <sys/socket.h>   /* for struct msghdr */
+#include <sys/un.h>       /* for sockaddr_un */
+#include <net/if.h>       /* for struct ifreq et al */
+#include <net/if_arp.h>   /* for struct arpreq */
+#include <net/route.h>    /* for struct rtentry */
+
+#include <linux/isdn.h>   /* for ISDN ioctls */
+#include <linux/module.h> /* for struct module */
+#include <scsi/sg.h>      /* for the SG_* ioctls */
+#include <sched.h>        /* for struct sched_param */
+#include <linux/sysctl.h> /* for struct __sysctl_args */
+
+
+#define __USE_LARGEFILE64
+#include <sys/stat.h>     /* for struct stat */
+#undef __USE_LARGEFILE64
+
+#include <asm/ioctls.h>   /* for stuff for dealing with ioctl :( */
+#include <sys/soundcard.h> /* for various soundcard ioctl constants :( */
+
+#include <termios.h>
+#include <pty.h>
+
+/* 2.2 stuff ... */
+#include <sys/uio.h>
+
+/* Both */
+#include <utime.h>
+#include <sys/times.h>    /* for struct tms */
+
+/* 2.0 at least, for gid_t and loff_t */
+#include <sys/types.h>
+
+#include <sys/statfs.h>
+
+#include <sys/sysinfo.h>
+
+#include <sys/poll.h>
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              vg_unsafe.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_valgrinq_dummy.c b/vg_valgrinq_dummy.c
new file mode 100644
index 000000000..5b09ddb0f
--- /dev/null
+++ b/vg_valgrinq_dummy.c
@@ -0,0 +1,44 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Used to make a dummy valgrinq.so, which does nothing at all. ---*/
+/*---                                          vg_valgrinq_dummy.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+/* For the rationale behind this file, look at
+   VG_(mash_LD_PRELOAD_string) in vg_main.c. */
+
+/* Remember not to use a variable of this name in any program you want
+   to debug :-) */
+int dont_mess_with_the_RSCDS = 0;
+
+/* If you are bored, perhaps have a look at http://www.rscds.org. */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                      vg_valgrinq_dummy.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_version.h b/vg_version.h
new file mode 100644
index 000000000..b58a45b9f
--- /dev/null
+++ b/vg_version.h
@@ -0,0 +1 @@
+#define VG_VERSION 20020317
diff --git a/vg_vtagops.c b/vg_vtagops.c
new file mode 100644
index 000000000..8502ba5fa
--- /dev/null
+++ b/vg_vtagops.c
@@ -0,0 +1,96 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Supporting routines for v-tag operations.                    ---*/
+/*---                                                 vg_vtagops.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+      Julian_Seward@muraroa.demon.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file LICENSE.
+*/
+
+#include "vg_include.h"
+#include "vg_constants.h"
+
+
+/* ---------------------------------------------------------------------
+   Names of the tag ops.
+   ------------------------------------------------------------------ */
+
+Char* VG_(nameOfTagOp) ( VgTagOp h )
+{
+   switch (h) {
+      case VgT_PCast40:        return "PCast40";
+      case VgT_PCast20:        return "PCast20";
+      case VgT_PCast10:        return "PCast10";
+      case VgT_PCast01:        return "PCast01";
+      case VgT_PCast02:        return "PCast02";
+      case VgT_PCast04:        return "PCast04";
+      case VgT_PCast14:        return "PCast14";
+      case VgT_PCast12:        return "PCast12";
+      case VgT_PCast11:        return "PCast11";
+      case VgT_Left4:          return "Left4";
+      case VgT_Left2:          return "Left2";
+      case VgT_Left1:          return "Left1";
+      case VgT_SWiden14:       return "SWiden14";
+      case VgT_SWiden24:       return "SWiden24";
+      case VgT_SWiden12:       return "SWiden12";
+      case VgT_ZWiden14:       return "ZWiden14";
+      case VgT_ZWiden24:       return "ZWiden24";
+      case VgT_ZWiden12:       return "ZWiden12";
+      case VgT_UifU4:          return "UifU4";
+      case VgT_UifU2:          return "UifU2";
+      case VgT_UifU1:          return "UifU1";
+      case VgT_UifU0:          return "UifU0";
+      case VgT_DifD4:          return "DifD4";
+      case VgT_DifD2:          return "DifD2";
+      case VgT_DifD1:          return "DifD1";
+      case VgT_ImproveAND4_TQ: return "ImproveAND4_TQ";
+      case VgT_ImproveAND2_TQ: return "ImproveAND2_TQ";
+      case VgT_ImproveAND1_TQ: return "ImproveAND1_TQ";
+      case VgT_ImproveOR4_TQ:  return "ImproveOR4_TQ";
+      case VgT_ImproveOR2_TQ:  return "ImproveOR2_TQ";
+      case VgT_ImproveOR1_TQ:  return "ImproveOR1_TQ";
+      case VgT_DebugFn:        return "DebugFn";
+      default: VG_(panic)("vg_nameOfTagOp");
+   }
+}
+
+
+/* ---------------------------------------------------------------------
+   Debugging stuff.
+   ------------------------------------------------------------------ */
+
+/* Implementation for checking tag values. */
+
+UInt VG_(DebugFn) ( UInt a1, UInt a2 )
+{
+   vg_assert(2+2 == 5);
+   return 0;
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                             vg_vtagops.c ---*/
+/*--------------------------------------------------------------------*/