ftmemsim-valgrind/coregrind/m_dispatch/dispatch-amd64-linux.S
Nicholas Nethercote b05a2a18d7 This commit merges the BUILD_TWEAKS branch onto the trunk. It has the
following improvements:

- Arch/OS/platform-specific files are now included/excluded via the
  preprocessor, rather than via the build system.  This is more consistent
  (we use the pre-processor for small arch/OS/platform-specific chunks
  within files) and makes the build system much simpler, as the sources for
  all programs are the same on all platforms.

- Vast amounts of cut+paste Makefile.am code has been factored out.  If a
  new platform is implemented, you need to add 11 extra Makefile.am lines.
  Previously it was over 100 lines.

- Vex has been autotoolised.  Dependency checking now works in Vex (no more
  incomplete builds).  Parallel builds now also work.  --with-vex no longer
  works;  it's little use and a pain to support.  VEX/Makefile is still in
  the Vex repository and gets overwritten at configure-time;  it should
  probably be renamed Makefile-gcc to avoid possible problems, such as
  accidentally committing a generated Makefile.  There's a bunch of hacky
  copying to deal with the fact that autotools don't handle same-named files
  in different directories.  Julian plans to rename the files to avoid this
  problem.

- Various small Makefile.am things have been made more standard automake
  style, eg. the use of pkginclude/pkglib prefixes instead of rolling our
  own.

- The existing five top-level Makefile.am include files have been
  consolidated into three.

- Most Makefile.am files now are structured more clearly, with comment
  headers separating sections, declarations relating to the same things next
  to each other, better spacing and layout, etc.

- Removed the unused exp-ptrcheck/tests/x86 directory.

- Renamed some XML files.

- Factored out some duplicated dSYM handling code.

- Split auxprogs/ into auxprogs/ and mpi/, which allowed the resulting
  Makefile.am files to be much more standard.

- Cleaned up m_coredump by merging a bunch of files that had been
  overzealously separated.

The net result is 630 fewer lines of Makefile.am code, or 897 if you exclude
the added Makefile.vex.am, or 997 once the hacky file copying for Vex is
removed.  And the build system is much simpler.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@10364
2009-06-24 00:37:09 +00:00

349 lines
10 KiB
ArmAsm

/*--------------------------------------------------------------------*/
/*--- The core dispatch loop, for jumping to a code address. ---*/
/*--- dispatch-amd64-linux.S ---*/
/*--------------------------------------------------------------------*/
/*
This file is part of Valgrind, a dynamic binary instrumentation
framework.
Copyright (C) 2000-2009 Julian Seward
jseward@acm.org
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307, USA.
The GNU General Public License is contained in the file COPYING.
*/
#if defined(VGP_amd64_linux)
#include "pub_core_basics_asm.h"
#include "pub_core_dispatch_asm.h"
#include "pub_core_transtab_asm.h"
#include "libvex_guest_offsets.h" /* for OFFSET_amd64_RIP */
/*------------------------------------------------------------*/
/*--- ---*/
/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/
/*--- run all translations except no-redir ones. ---*/
/*--- ---*/
/*------------------------------------------------------------*/
/*----------------------------------------------------*/
/*--- Preamble (set everything up) ---*/
/*----------------------------------------------------*/
/* signature:
UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
*/
.text
.globl VG_(run_innerloop)
.type VG_(run_innerloop), @function
VG_(run_innerloop):
/* %rdi holds guest_state */
/* %rsi holds do_profiling */
/* ----- entry point to VG_(run_innerloop) ----- */
pushq %rbx
pushq %rcx
pushq %rdx
pushq %rsi
pushq %rbp
pushq %r8
pushq %r9
pushq %r10
pushq %r11
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rdi /* guest_state */
movq VG_(dispatch_ctr)@GOTPCREL(%rip), %r15
movl (%r15), %r15d
pushq %r15
/* 8(%rsp) holds cached copy of guest_state ptr */
/* 0(%rsp) holds cached copy of VG_(dispatch_ctr) */
/* Set up the guest state pointer */
movq %rdi, %rbp
/* fetch %RIP into %rax */
movq OFFSET_amd64_RIP(%rbp), %rax
/* set host FPU control word to the default mode expected
by VEX-generated code. See comments in libvex.h for
more info. */
finit
pushq $0x027F
fldcw (%rsp)
addq $8, %rsp
/* set host SSE control word to the default mode expected
by VEX-generated code. */
pushq $0x1F80
ldmxcsr (%rsp)
addq $8, %rsp
/* set dir flag to known value */
cld
/* fall into main loop (the right one) */
cmpq $0, %rsi
je VG_(run_innerloop__dispatch_unprofiled)
jmp VG_(run_innerloop__dispatch_profiled)
/*NOTREACHED*/
/*----------------------------------------------------*/
/*--- NO-PROFILING (standard) dispatcher ---*/
/*----------------------------------------------------*/
.align 16
.global VG_(run_innerloop__dispatch_unprofiled)
VG_(run_innerloop__dispatch_unprofiled):
/* AT ENTRY: %rax is next guest addr, %rbp is possibly
modified guest state ptr */
/* Has the guest state pointer been messed with? If yes, exit.
Also, set %rcx to be &VG_(tt_fast), some insns before it is
used, in the hope of getting it off the critical path. This
location seems to be optimal on 2.2GHz Athlon64. */
cmpq 8(%rsp), %rbp
movq VG_(tt_fast)@GOTPCREL(%rip), %rcx
jnz gsp_changed
/* save the jump address in the guest state */
movq %rax, OFFSET_amd64_RIP(%rbp)
/* Are we out of timeslice? If yes, defer to scheduler. */
subl $1, 0(%rsp)
jz counter_is_zero
/* try a fast lookup in the translation cache */
movq %rax, %rbx /* next guest addr */
andq $VG_TT_FAST_MASK, %rbx /* entry# */
shlq $4, %rbx /* entry# * sizeof(FastCacheEntry) */
movq 0(%rcx,%rbx,1), %r10 /* .guest */
movq 8(%rcx,%rbx,1), %r11 /* .host */
cmpq %rax, %r10
jnz fast_lookup_failed
/* Found a match. Jump to .host. */
jmp *%r11
ud2 /* persuade insn decoders not to speculate past here */
/* generated code should run, then jump back to
VG_(run_innerloop__dispatch_unprofiled). */
/*NOTREACHED*/
/*----------------------------------------------------*/
/*--- PROFILING dispatcher (can be much slower) ---*/
/*----------------------------------------------------*/
.align 16
.global VG_(run_innerloop__dispatch_profiled)
VG_(run_innerloop__dispatch_profiled):
/* AT ENTRY: %rax is next guest addr, %rbp is possibly
modified guest state ptr */
/* Has the guest state pointer been messed with? If yes, exit.
Also, set %rcx to be &VG_(tt_fast), some insns before it is
used, in the hope of getting it off the critical path. This
location seems to be optimal on 2.2GHz Athlon64. */
cmpq 8(%rsp), %rbp
movq VG_(tt_fast)@GOTPCREL(%rip), %rcx
jnz gsp_changed
/* save the jump address in the guest state */
movq %rax, OFFSET_amd64_RIP(%rbp)
/* Are we out of timeslice? If yes, defer to scheduler. */
subl $1, 0(%rsp)
jz counter_is_zero
/* try a fast lookup in the translation cache */
movq %rax, %rbx
andq $VG_TT_FAST_MASK, %rbx /* entry# */
shlq $4, %rbx /* entry# * sizeof(FastCacheEntry) */
movq 0(%rcx,%rbx,1), %r10 /* .guest */
movq 8(%rcx,%rbx,1), %r11 /* .host */
cmpq %rax, %r10
jnz fast_lookup_failed
/* increment bb profile counter */
movq VG_(tt_fastN)@GOTPCREL(%rip), %rdx
shrq $1, %rbx /* entry# * sizeof(UInt*) */
movq (%rdx,%rbx,1), %rdx
addl $1, (%rdx)
/* Found a match. Jump to .host. */
jmp *%r11
ud2 /* persuade insn decoders not to speculate past here */
/* generated code should run, then jump back to
VG_(run_innerloop__dispatch_profiled). */
/*NOTREACHED*/
/*----------------------------------------------------*/
/*--- exit points ---*/
/*----------------------------------------------------*/
gsp_changed:
/* Someone messed with the gsp. Have to
defer to scheduler to resolve this. dispatch ctr
is not yet decremented, so no need to increment. */
/* %RIP is NOT up to date here. First, need to write
%rax back to %RIP, but without trashing %rbp since
that holds the value we want to return to the scheduler.
Hence use %r15 transiently for the guest state pointer. */
movq 8(%rsp), %r15
movq %rax, OFFSET_amd64_RIP(%r15)
movq %rbp, %rax
jmp run_innerloop_exit
/*NOTREACHED*/
counter_is_zero:
/* %RIP is up to date here */
/* back out decrement of the dispatch counter */
addl $1, 0(%rsp)
movq $VG_TRC_INNER_COUNTERZERO, %rax
jmp run_innerloop_exit
fast_lookup_failed:
/* %RIP is up to date here */
/* back out decrement of the dispatch counter */
addl $1, 0(%rsp)
movq $VG_TRC_INNER_FASTMISS, %rax
jmp run_innerloop_exit
/* All exits from the dispatcher go through here. %rax holds
the return value.
*/
run_innerloop_exit:
/* We're leaving. Check that nobody messed with
%mxcsr or %fpucw. We can't mess with %rax here as it
holds the tentative return value, but any other is OK. */
#if !defined(ENABLE_INNER)
/* This check fails for self-hosting, so skip in that case */
pushq $0
fstcw (%rsp)
cmpl $0x027F, (%rsp)
popq %r15 /* get rid of the word without trashing %eflags */
jnz invariant_violation
#endif
pushq $0
stmxcsr (%rsp)
andl $0xFFFFFFC0, (%rsp) /* mask out status flags */
cmpl $0x1F80, (%rsp)
popq %r15
jnz invariant_violation
/* otherwise we're OK */
jmp run_innerloop_exit_REALLY
invariant_violation:
movq $VG_TRC_INVARIANT_FAILED, %rax
jmp run_innerloop_exit_REALLY
run_innerloop_exit_REALLY:
/* restore VG_(dispatch_ctr) */
popq %r14
movq VG_(dispatch_ctr)@GOTPCREL(%rip), %r15
movl %r14d, (%r15)
popq %rdi
popq %r15
popq %r14
popq %r13
popq %r12
popq %r11
popq %r10
popq %r9
popq %r8
popq %rbp
popq %rsi
popq %rdx
popq %rcx
popq %rbx
ret
.size VG_(run_innerloop), .-VG_(run_innerloop)
/*------------------------------------------------------------*/
/*--- ---*/
/*--- A special dispatcher, for running no-redir ---*/
/*--- translations. Just runs the given translation once. ---*/
/*--- ---*/
/*------------------------------------------------------------*/
/* signature:
void VG_(run_a_noredir_translation) ( UWord* argblock );
*/
/* Run a no-redir translation. argblock points to 4 UWords, 2 to carry args
and 2 to carry results:
0: input: ptr to translation
1: input: ptr to guest state
2: output: next guest PC
3: output: guest state pointer afterwards (== thread return code)
*/
.align 16
.global VG_(run_a_noredir_translation)
.type VG_(run_a_noredir_translation), @function
VG_(run_a_noredir_translation):
/* Save callee-saves regs */
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rdi /* we will need it after running the translation */
movq 8(%rdi), %rbp
jmp *0(%rdi)
/*NOTREACHED*/
ud2
/* If the translation has been correctly constructed, we
should resume at the the following label. */
.global VG_(run_a_noredir_translation__return_point)
VG_(run_a_noredir_translation__return_point):
popq %rdi
movq %rax, 16(%rdi)
movq %rbp, 24(%rdi)
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
ret
.size VG_(run_a_noredir_translation), .-VG_(run_a_noredir_translation)
/* Let the linker know we don't need an executable stack */
.section .note.GNU-stack,"",@progbits
#endif // defined(VGP_amd64_linux)
/*--------------------------------------------------------------------*/
/*--- end ---*/
/*--------------------------------------------------------------------*/