mirror of
https://github.com/Zenithsiz/ftmemsim-valgrind.git
synced 2026-02-04 10:21:20 +00:00
506 lines
15 KiB
ArmAsm
506 lines
15 KiB
ArmAsm
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- The core dispatch loop, for jumping to a code address. ---*/
|
|
/*--- dispatch-ppc32-linux.S ---*/
|
|
/*--------------------------------------------------------------------*/
|
|
|
|
/*
|
|
This file is part of Valgrind, a dynamic binary instrumentation
|
|
framework.
|
|
|
|
Copyright (C) 2005-2012 Cerion Armour-Brown <cerion@open-works.co.uk>
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
02111-1307, USA.
|
|
|
|
The GNU General Public License is contained in the file COPYING.
|
|
*/
|
|
|
|
#if defined(VGP_ppc32_linux)
|
|
|
|
#include "config.h"
|
|
#include "pub_core_basics_asm.h"
|
|
#include "pub_core_dispatch_asm.h"
|
|
#include "pub_core_transtab_asm.h"
|
|
#include "libvex_guest_offsets.h" /* for OFFSET_ppc32_CIA */
|
|
|
|
|
|
/*------------------------------------------------------------*/
|
|
/*--- ---*/
|
|
/*--- The dispatch loop. VG_(disp_run_translations) is ---*/
|
|
/*--- used to run all translations, ---*/
|
|
/*--- including no-redir ones. ---*/
|
|
/*--- ---*/
|
|
/*------------------------------------------------------------*/
|
|
|
|
/*----------------------------------------------------*/
|
|
/*--- Entry and preamble (set everything up) ---*/
|
|
/*----------------------------------------------------*/
|
|
|
|
/* signature:
|
|
void VG_(disp_run_translations)( UWord* two_words,
|
|
void* guest_state,
|
|
Addr host_addr );
|
|
*/
|
|
.text
|
|
.globl VG_(disp_run_translations)
|
|
.type VG_(disp_run_translations), @function
|
|
VG_(disp_run_translations):
|
|
/* r3 holds two_words */
|
|
/* r4 holds guest_state */
|
|
/* r5 holds host_addr */
|
|
|
|
/* ----- entry point to VG_(disp_run_translations) ----- */
|
|
/* For Linux/ppc32 we need the SysV ABI, which uses
|
|
LR->4(parent_sp), CR->anywhere.
|
|
(The AIX ABI, used on Darwin,
|
|
uses LR->8(prt_sp), CR->4(prt_sp))
|
|
*/
|
|
|
|
/* Save lr */
|
|
mflr 6
|
|
stw 6,4(1)
|
|
|
|
/* New stack frame */
|
|
stwu 1,-496(1) /* sp should maintain 16-byte alignment */
|
|
|
|
/* Save callee-saved registers... */
|
|
/* r3, r4, r5 are live here, so use r6 */
|
|
lis 6,VG_(machine_ppc32_has_FP)@ha
|
|
lwz 6,VG_(machine_ppc32_has_FP)@l(6)
|
|
cmplwi 6,0
|
|
beq LafterFP1
|
|
|
|
/* Floating-point reg save area : 144 bytes */
|
|
stfd 31,488(1)
|
|
stfd 30,480(1)
|
|
stfd 29,472(1)
|
|
stfd 28,464(1)
|
|
stfd 27,456(1)
|
|
stfd 26,448(1)
|
|
stfd 25,440(1)
|
|
stfd 24,432(1)
|
|
stfd 23,424(1)
|
|
stfd 22,416(1)
|
|
stfd 21,408(1)
|
|
stfd 20,400(1)
|
|
stfd 19,392(1)
|
|
stfd 18,384(1)
|
|
stfd 17,376(1)
|
|
stfd 16,368(1)
|
|
stfd 15,360(1)
|
|
stfd 14,352(1)
|
|
LafterFP1:
|
|
|
|
/* General reg save area : 76 bytes */
|
|
stw 31,348(1)
|
|
stw 30,344(1)
|
|
stw 29,340(1)
|
|
stw 28,336(1)
|
|
stw 27,332(1)
|
|
stw 26,328(1)
|
|
stw 25,324(1)
|
|
stw 24,320(1)
|
|
stw 23,316(1)
|
|
stw 22,312(1)
|
|
stw 21,308(1)
|
|
stw 20,304(1)
|
|
stw 19,300(1)
|
|
stw 18,296(1)
|
|
stw 17,292(1)
|
|
stw 16,288(1)
|
|
stw 15,284(1)
|
|
stw 14,280(1)
|
|
stw 13,276(1)
|
|
stw 3,272(1) /* save two_words for later */
|
|
|
|
/* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
|
|
The Linux kernel might not actually use VRSAVE for its intended
|
|
purpose, but it should be harmless to preserve anyway. */
|
|
/* r3, r4, r5 are live here, so use r6 */
|
|
lis 6,VG_(machine_ppc32_has_VMX)@ha
|
|
lwz 6,VG_(machine_ppc32_has_VMX)@l(6)
|
|
cmplwi 6,0
|
|
beq LafterVMX1
|
|
|
|
#ifdef HAS_ALTIVEC
|
|
/* VRSAVE save word : 32 bytes */
|
|
mfspr 6,256 /* vrsave reg is spr number 256 */
|
|
stw 6,244(1)
|
|
|
|
/* Alignment padding : 4 bytes */
|
|
|
|
/* Vector reg save area (quadword aligned) : 192 bytes */
|
|
li 6,224
|
|
stvx 31,6,1
|
|
li 6,208
|
|
stvx 30,6,1
|
|
li 6,192
|
|
stvx 29,6,1
|
|
li 6,176
|
|
stvx 28,6,1
|
|
li 6,160
|
|
stvx 27,6,1
|
|
li 6,144
|
|
stvx 26,6,1
|
|
li 6,128
|
|
stvx 25,6,1
|
|
li 6,112
|
|
stvx 25,6,1
|
|
li 6,96
|
|
stvx 23,6,1
|
|
li 6,80
|
|
stvx 22,6,1
|
|
li 6,64
|
|
stvx 21,6,1
|
|
li 6,48
|
|
stvx 20,6,1
|
|
#endif
|
|
|
|
LafterVMX1:
|
|
|
|
/* Save cr */
|
|
mfcr 6
|
|
stw 6,44(1)
|
|
|
|
/* Local variable space... */
|
|
|
|
/* 32(sp) used later to check FPSCR[RM] */
|
|
|
|
/* r3 holds two_words */
|
|
/* r4 holds guest_state */
|
|
/* r5 holds host_addr */
|
|
|
|
/* 24(sp) used later to stop ctr reg being clobbered */
|
|
/* 20(sp) used later to load fpscr with zero */
|
|
/* 8:16(sp) free */
|
|
|
|
/* Linkage Area (reserved)
|
|
4(sp) : LR
|
|
0(sp) : back-chain
|
|
*/
|
|
|
|
/* set host FPU control word to the default mode expected
|
|
by VEX-generated code. See comments in libvex.h for
|
|
more info. */
|
|
lis 6,VG_(machine_ppc32_has_FP)@ha
|
|
lwz 6,VG_(machine_ppc32_has_FP)@l(6)
|
|
cmplwi 6,0
|
|
beq LafterFP2
|
|
|
|
/* get zero into f3 (tedious) */
|
|
/* note: fsub 3,3,3 is not a reliable way to do this,
|
|
since if f3 holds a NaN or similar then we don't necessarily
|
|
wind up with zero. */
|
|
li 6,0
|
|
stw 6,20(1)
|
|
lfs 3,20(1)
|
|
mtfsf 0xFF,3 /* fpscr = f3 */
|
|
LafterFP2:
|
|
|
|
/* set host AltiVec control word to the default mode expected
|
|
by VEX-generated code. */
|
|
lis 6,VG_(machine_ppc32_has_VMX)@ha
|
|
lwz 6,VG_(machine_ppc32_has_VMX)@l(6)
|
|
cmplwi 6,0
|
|
beq LafterVMX2
|
|
|
|
#ifdef HAS_ALTIVEC
|
|
vspltisw 3,0x0 /* generate zero */
|
|
mtvscr 3
|
|
#endif
|
|
|
|
LafterVMX2:
|
|
|
|
/* make a stack frame for the code we are calling */
|
|
stwu 1,-16(1)
|
|
|
|
/* Set up the guest state ptr */
|
|
mr 31,4 /* r31 (generated code gsp) = r4 */
|
|
|
|
/* and jump into the code cache. Chained translations in
|
|
the code cache run, until for whatever reason, they can't
|
|
continue. When that happens, the translation in question
|
|
will jump (or call) to one of the continuation points
|
|
VG_(cp_...) below. */
|
|
mtctr 5
|
|
bctr
|
|
/*NOTREACHED*/
|
|
|
|
/*----------------------------------------------------*/
|
|
/*--- Postamble and exit. ---*/
|
|
/*----------------------------------------------------*/
|
|
|
|
postamble:
|
|
/* At this point, r6 and r7 contain two
|
|
words to be returned to the caller. r6
|
|
holds a TRC value, and r7 optionally may
|
|
hold another word (for CHAIN_ME exits, the
|
|
address of the place to patch.) */
|
|
|
|
/* We're leaving. Check that nobody messed with
|
|
VSCR or FPSCR in ways we don't expect. */
|
|
/* Using r10 - value used again further on, so don't trash! */
|
|
lis 10,VG_(machine_ppc32_has_FP)@ha
|
|
lwz 10,VG_(machine_ppc32_has_FP)@l(10)
|
|
|
|
/* Using r11 - value used again further on, so don't trash! */
|
|
lis 11,VG_(machine_ppc32_has_VMX)@ha
|
|
lwz 11,VG_(machine_ppc32_has_VMX)@l(11)
|
|
|
|
cmplwi 10,0 /* Do we have FP ? */
|
|
beq LafterFP8
|
|
|
|
/* Set fpscr back to a known state, since vex-generated code
|
|
may have messed with fpscr[rm]. */
|
|
li 5,0
|
|
addi 1,1,-16
|
|
stw 5,0(1)
|
|
lfs 3,0(1)
|
|
addi 1,1,16
|
|
mtfsf 0xFF,3 /* fpscr = f3 */
|
|
LafterFP8:
|
|
|
|
cmplwi 11,0 /* Do we have altivec? */
|
|
beq LafterVMX8
|
|
|
|
#ifdef HAS_ALTIVEC
|
|
/* Check VSCR[NJ] == 1 */
|
|
/* first generate 4x 0x00010000 */
|
|
vspltisw 4,0x1 /* 4x 0x00000001 */
|
|
vspltisw 5,0x0 /* zero */
|
|
vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */
|
|
/* retrieve VSCR and mask wanted bits */
|
|
mfvscr 7
|
|
vand 7,7,6 /* gives NJ flag */
|
|
vspltw 7,7,0x3 /* flags-word to all lanes */
|
|
vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */
|
|
bt 24,invariant_violation /* branch if all_equal */
|
|
#endif
|
|
|
|
LafterVMX8:
|
|
/* otherwise we're OK */
|
|
b remove_frame
|
|
|
|
invariant_violation:
|
|
li 6,VG_TRC_INVARIANT_FAILED
|
|
li 7,0
|
|
/* fall through */
|
|
|
|
remove_frame:
|
|
/* Restore FP regs */
|
|
/* r10 already holds VG_(machine_ppc32_has_FP) value */
|
|
cmplwi 10,0
|
|
beq LafterFP9
|
|
|
|
/* Floating-point regs */
|
|
lfd 31,488(1)
|
|
lfd 30,480(1)
|
|
lfd 29,472(1)
|
|
lfd 28,464(1)
|
|
lfd 27,456(1)
|
|
lfd 26,448(1)
|
|
lfd 25,440(1)
|
|
lfd 24,432(1)
|
|
lfd 23,424(1)
|
|
lfd 22,416(1)
|
|
lfd 21,408(1)
|
|
lfd 20,400(1)
|
|
lfd 19,392(1)
|
|
lfd 18,384(1)
|
|
lfd 17,376(1)
|
|
lfd 16,368(1)
|
|
lfd 15,360(1)
|
|
lfd 14,352(1)
|
|
LafterFP9:
|
|
|
|
/* r11 already holds VG_(machine_ppc32_has_VMX) value */
|
|
cmplwi 11,0
|
|
beq LafterVMX9
|
|
|
|
/* Restore Altivec regs */
|
|
#ifdef HAS_ALTIVEC
|
|
/* VRSAVE */
|
|
lwz 4,244(1)
|
|
mfspr 4,256 /* VRSAVE reg is spr number 256 */
|
|
|
|
/* Vector regs */
|
|
li 4,224
|
|
lvx 31,4,1
|
|
li 4,208
|
|
lvx 30,4,1
|
|
li 4,192
|
|
lvx 29,4,1
|
|
li 4,176
|
|
lvx 28,4,1
|
|
li 4,160
|
|
lvx 27,4,1
|
|
li 4,144
|
|
lvx 26,4,1
|
|
li 4,128
|
|
lvx 25,4,1
|
|
li 4,112
|
|
lvx 24,4,1
|
|
li 4,96
|
|
lvx 23,4,1
|
|
li 4,80
|
|
lvx 22,4,1
|
|
li 4,64
|
|
lvx 21,4,1
|
|
li 4,48
|
|
lvx 20,4,1
|
|
#endif
|
|
LafterVMX9:
|
|
|
|
/* restore int regs, including importantly r3 (two_words) */
|
|
addi 1,1,16
|
|
lwz 31,348(1)
|
|
lwz 30,344(1)
|
|
lwz 29,340(1)
|
|
lwz 28,336(1)
|
|
lwz 27,332(1)
|
|
lwz 26,328(1)
|
|
lwz 25,324(1)
|
|
lwz 24,320(1)
|
|
lwz 23,316(1)
|
|
lwz 22,312(1)
|
|
lwz 21,308(1)
|
|
lwz 20,304(1)
|
|
lwz 19,300(1)
|
|
lwz 18,296(1)
|
|
lwz 17,292(1)
|
|
lwz 16,288(1)
|
|
lwz 15,284(1)
|
|
lwz 14,280(1)
|
|
lwz 13,276(1)
|
|
lwz 3,272(1)
|
|
/* Stash return values */
|
|
stw 6,0(3)
|
|
stw 7,4(3)
|
|
|
|
/* restore lr & sp, and leave */
|
|
lwz 0,500(1) /* stack_size + 4 */
|
|
mtlr 0
|
|
addi 1,1,496 /* stack_size */
|
|
blr
|
|
|
|
|
|
/*----------------------------------------------------*/
|
|
/*--- Continuation points ---*/
|
|
/*----------------------------------------------------*/
|
|
|
|
/* ------ Chain me to slow entry point ------ */
|
|
.global VG_(disp_cp_chain_me_to_slowEP)
|
|
VG_(disp_cp_chain_me_to_slowEP):
|
|
/* We got called. The return address indicates
|
|
where the patching needs to happen. Collect
|
|
the return address and, exit back to C land,
|
|
handing the caller the pair (Chain_me_S, RA) */
|
|
li 6, VG_TRC_CHAIN_ME_TO_SLOW_EP
|
|
mflr 7
|
|
/* 8 = imm32-fixed2 r30, disp_cp_chain_me_to_slowEP
|
|
4 = mtctr r30
|
|
4 = btctr
|
|
*/
|
|
subi 7,7,8+4+4
|
|
b postamble
|
|
|
|
/* ------ Chain me to fast entry point ------ */
|
|
.global VG_(disp_cp_chain_me_to_fastEP)
|
|
VG_(disp_cp_chain_me_to_fastEP):
|
|
/* We got called. The return address indicates
|
|
where the patching needs to happen. Collect
|
|
the return address and, exit back to C land,
|
|
handing the caller the pair (Chain_me_S, RA) */
|
|
li 6, VG_TRC_CHAIN_ME_TO_FAST_EP
|
|
mflr 7
|
|
/* 8 = imm32-fixed2 r30, disp_cp_chain_me_to_fastEP
|
|
4 = mtctr r30
|
|
4 = btctr
|
|
*/
|
|
subi 7,7,8+4+4
|
|
b postamble
|
|
|
|
/* ------ Indirect but boring jump ------ */
|
|
.global VG_(disp_cp_xindir)
|
|
VG_(disp_cp_xindir):
|
|
/* Where are we going? */
|
|
lwz 3,OFFSET_ppc32_CIA(31)
|
|
|
|
/* stats only */
|
|
lis 5,VG_(stats__n_xindirs_32)@ha
|
|
addi 5,5,VG_(stats__n_xindirs_32)@l
|
|
lwz 6,0(5)
|
|
addi 6,6,1
|
|
stw 6,0(5)
|
|
|
|
/* r5 = &VG_(tt_fast) */
|
|
lis 5,VG_(tt_fast)@ha
|
|
addi 5,5,VG_(tt_fast)@l /* & VG_(tt_fast) */
|
|
|
|
/* try a fast lookup in the translation cache */
|
|
/* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
|
|
= ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */
|
|
rlwinm 4,3,1, 29-VG_TT_FAST_BITS, 28 /* entry# * 8 */
|
|
add 5,5,4 /* & VG_(tt_fast)[entry#] */
|
|
lwz 6,0(5) /* .guest */
|
|
lwz 7,4(5) /* .host */
|
|
cmpw 3,6
|
|
bne fast_lookup_failed
|
|
|
|
/* Found a match. Jump to .host. */
|
|
mtctr 7
|
|
bctr
|
|
|
|
fast_lookup_failed:
|
|
/* stats only */
|
|
lis 5,VG_(stats__n_xindir_misses_32)@ha
|
|
addi 5,5,VG_(stats__n_xindir_misses_32)@l
|
|
lwz 6,0(5)
|
|
addi 6,6,1
|
|
stw 6,0(5)
|
|
|
|
li 6,VG_TRC_INNER_FASTMISS
|
|
li 7,0
|
|
b postamble
|
|
/*NOTREACHED*/
|
|
|
|
/* ------ Assisted jump ------ */
|
|
.global VG_(disp_cp_xassisted)
|
|
VG_(disp_cp_xassisted):
|
|
/* r31 contains the TRC */
|
|
mr 6,31
|
|
li 7,0
|
|
b postamble
|
|
|
|
/* ------ Event check failed ------ */
|
|
.global VG_(disp_cp_evcheck_fail)
|
|
VG_(disp_cp_evcheck_fail):
|
|
li 6,VG_TRC_INNER_COUNTERZERO
|
|
li 7,0
|
|
b postamble
|
|
|
|
|
|
.size VG_(disp_run_translations), .-VG_(disp_run_translations)
|
|
|
|
/* Let the linker know we don't need an executable stack */
|
|
.section .note.GNU-stack,"",@progbits
|
|
|
|
#endif // defined(VGP_ppc32_linux)
|
|
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- end ---*/
|
|
/*--------------------------------------------------------------------*/
|