ftmemsim-valgrind/vg_helpers.S


##--------------------------------------------------------------------##
##--- Support routines for the JITter output.                      ---##
##---                                                 vg_helpers.S ---##
##--------------------------------------------------------------------##

/*
  This file is part of Valgrind, an x86 protected-mode emulator
  designed for debugging and profiling binaries on x86-Unixes.

  Copyright (C) 2000-2002 Julian Seward
     jseward@acm.org
     Julian_Seward@muraroa.demon.co.uk

  This program is free software; you can redistribute it and/or
  modify it under the terms of the GNU General Public License as
  published by the Free Software Foundation; either version 2 of the
  License, or (at your option) any later version.

  This program is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  02111-1307, USA.

  The GNU General Public License is contained in the file LICENSE.
*/

#include "vg_constants.h"

/* ------------------ SIMULATED CPU HELPERS ------------------ */
/* A couple of stubs for returns which we want to catch: signal
   returns and pthread returns.  In the latter case, the thread's
   return value is in %EAX, so we pass this as the first argument
   to the request.  In both cases we use the user request mechanism.
   You need to to read the definition of VALGRIND_MAGIC_SEQUENCE
   in valgrind.h to make sense of this.
*/
.global VG_(signalreturn_bogusRA)
VG_(signalreturn_bogusRA):
	subl	$20, %esp	# allocate arg block
	movl	%esp, %edx	# %edx == &_zzq_args[0]
	movl	$VG_USERREQ__SIGNAL_RETURNS, 0(%edx)	# request
	movl	$0, 4(%edx)	# arg1
	movl	$0, 8(%edx)	# arg2
	movl	$0, 12(%edx)	# arg3
	movl	$0, 16(%edx)	# arg4
	movl	%edx, %eax
	# and now the magic sequence itself:
	roll $29, %eax
	roll $3, %eax
	rorl $27, %eax
	rorl $5, %eax
	roll $13, %eax
	roll $19, %eax
	# should never get here
	pushl	$signalreturn_bogusRA_panic_msg
	call	VG_(panic)

.data
signalreturn_bogusRA_panic_msg:
.ascii	"vg_signalreturn_bogusRA: VG_USERREQ__SIGNAL_RETURNS was missed"
.byte	0
.text


.global VG_(pthreadreturn_bogusRA)
VG_(pthreadreturn_bogusRA):
	subl	$20, %esp	# allocate arg block
	movl	%esp, %edx	# %edx == &_zzq_args[0]
	movl	$VG_USERREQ__PTHREAD_RETURNS, 0(%edx)	# request
	movl	%eax, 4(%edx)	# arg1 == thread return value
	movl	$0, 8(%edx)	# arg2
	movl	$0, 12(%edx)	# arg3
	movl	$0, 16(%edx)	# arg4
	movl	%edx, %eax
	# and now the magic sequence itself:
	roll $29, %eax
	roll $3, %eax
	rorl $27, %eax
	rorl $5, %eax
	roll $13, %eax
	roll $19, %eax
	# should never get here
	pushl	$pthreadreturn_bogusRA_panic_msg
	call	VG_(panic)

.data
pthreadreturn_bogusRA_panic_msg:
.ascii	"vg_pthreadreturn_bogusRA: VG_USERREQ__PTHREAD_RETURNS was missed"
.byte	0
.text


/* ------------------ REAL CPU HELPERS ------------------ */
/* The rest of this lot run on the real CPU. */

/* Various helper routines, for instructions which are just too
   darn tedious for the JITter to output code in-line:

	* integer division
	* integer multiplication
        * setting and getting obscure eflags
	* double-length shifts

   All routines use a standard calling convention designed for
   calling from translations, in which the incoming args are
   underneath the return address, the callee saves _all_ registers,
   and the incoming parameters can be modified, to return results.
*/


.global VG_(helper_value_check0_fail)
VG_(helper_value_check0_fail):
	pushal
	call	VG_(helperc_value_check0_fail)
	popal
	ret

.global VG_(helper_value_check1_fail)
VG_(helper_value_check1_fail):
	pushal
	call	VG_(helperc_value_check1_fail)
	popal
	ret

.global VG_(helper_value_check2_fail)
VG_(helper_value_check2_fail):
	pushal
	call	VG_(helperc_value_check2_fail)
	popal
	ret

.global VG_(helper_value_check4_fail)
VG_(helper_value_check4_fail):
	pushal
	call	VG_(helperc_value_check4_fail)
	popal
	ret


/* Do a original-code-write check for the address in %ebp. */
.global VG_(helper_smc_check4)
VG_(helper_smc_check4):
#if VG_SMC_FASTCHECK_IN_C

	# save the live regs
	pushl	%eax
	pushl	%ebx
	pushl	%ecx
	pushl	%edx
	pushl	%esi
	pushl	%edi

	pushl	%ebp
	call	VG_(smc_check4)
	addl	$4, %esp

	popl	%edi
	popl	%esi
	popl	%edx
	popl	%ecx
	popl	%ebx
	popl	%eax

	ret
#else
	incl	VG_(smc_total_check4s)
	pushl	%ebp
	shrl	$VG_SMC_CACHE_SHIFT, %ebp
	andl	$VG_SMC_CACHE_MASK, %ebp
	cmpb	$0, VG_(smc_cache)(%ebp)
	jnz	vg_smc_cache_failure
	addl	$4, %esp
	ret
      vg_smc_cache_failure:
	popl	%ebp
	pushal
	pushl	%ebp
	call	VG_(smc_check4)
	addl	$4, %esp
	popal
	ret
#endif


/* Fetch the time-stamp-ctr reg.
   On entry:
	dummy, replaced by %EAX value
	dummy, replaced by %EDX value
	RA   <- %esp
*/
.global VG_(helper_RDTSC)
VG_(helper_RDTSC):
	pushl	%eax
	pushl	%edx
	rdtsc
	movl	%edx, 12(%esp)
	movl	%eax, 16(%esp)
	popl	%edx
	popl	%eax
	ret


/* Do the CPUID instruction.
   On entry:
	dummy, replaced by %EAX value
	dummy, replaced by %EBX value
	dummy, replaced by %ECX value
	dummy, replaced by %EDX value
	RA   <- %esp

   As emulating a real CPUID is kinda hard, as it
   has to return different values depending on EAX,
   we just pretend to not support CPUID at all until
   it becomes a problem. This will for sure disable
   all MMX / 3dnow checks so they don't bother us
   with code we don't understand. (Dirk <dirk@kde.org>)

   http://www.sandpile.org/ia32/cpuid.htm

   (Later: we instead pretend to be like Werner's P54C P133, that is
    an original pre-MMX Pentium).
   <werner> cpuid words (0): 0x1 0x756e6547 0x6c65746e 0x49656e69
   <werner> cpuid words (1): 0x52b 0x0 0x0 0x1bf
*/
.global VG_(helper_CPUID)
VG_(helper_CPUID):
	pushl	%eax
	pushl	%ebx
	pushl	%ecx
	pushl	%edx
	movl	32(%esp), %eax
/*
	cpuid
*/
/*
        xor     %eax,%eax
        xor     %ebx,%ebx
        xor     %ecx,%ecx
        xor     %edx,%edx
*/
	cmpl	$0, %eax
	jz	cpuid__0
	movl	$0x52b, %eax
	movl	$0x0,   %ebx
	movl	$0x0,   %ecx
	movl	$0x1bf, %edx
	jmp	cpuid__99
cpuid__0:
	movl	$0x1,        %eax
	movl	$0x756e6547, %ebx
	movl	$0x6c65746e, %ecx
	movl	$0x49656e69, %edx
cpuid__99:

	movl	%edx, 20(%esp)
	movl	%ecx, 24(%esp)
	movl	%ebx, 28(%esp)
	movl	%eax, 32(%esp)
	popl	%edx
	popl	%ecx
	popl	%ebx
	popl	%eax
	ret


/* Fetch the FPU status register.
   On entry:
	dummy, replaced by result
	RA   <- %esp
*/
.global VG_(helper_fstsw_AX)
VG_(helper_fstsw_AX):
	pushl	%eax
	pushl	%esi
	movl	VGOFF_(m_fpustate), %esi
	frstor	(%ebp, %esi, 4)
	fstsw	%ax
	popl	%esi
	movw	%ax, 8(%esp)
	popl	%eax
	ret


/* Copy %ah into %eflags.
   On entry:
	value of %eax
	RA   <- %esp
*/
.global VG_(helper_SAHF)
VG_(helper_SAHF):
	pushl	%eax
	movl	8(%esp), %eax
	sahf
	popl	%eax
	ret


/* Do %al = DAS(%al).  Note that the passed param has %AL as the least
   significant 8 bits, since it was generated with GETB %AL,
   some-temp.  Fortunately %al is the least significant 8 bits of
   %eax anyway, which is why it's safe to work with %eax as a
   whole.

   On entry:
	value of %eax
	RA   <- %esp
*/
.global VG_(helper_DAS)
VG_(helper_DAS):
	pushl	%eax
	movl	8(%esp), %eax
	das
 	movl	%eax, 8(%esp)
	popl	%eax
	ret


/* Similarly, do %al = DAA(%al). */
.global VG_(helper_DAA)
VG_(helper_DAA):
       pushl   %eax
       movl    8(%esp), %eax
       daa
       movl    %eax, 8(%esp)
       popl    %eax
       ret


/* Bit scan forwards/reverse.  Sets flags (??).
   On entry:
	value, replaced by result
	RA   <- %esp
*/
.global VG_(helper_bsr)
VG_(helper_bsr):
	pushl	%eax
	bsrl	8(%esp), %eax
	movl	%eax, 8(%esp)
	popl	%eax
	ret

.global VG_(helper_bsf)
VG_(helper_bsf):
	pushl	%eax
	bsfl	8(%esp), %eax
	movl	%eax, 8(%esp)
	popl	%eax
	ret


/* Bit test and set/reset/complement.  Sets flags.
   On entry:
	src
	dst
	RA   <- %esp

   NOTE all these are basically misimplemented, since for memory
   operands it appears the index value can be arbitrary, and the
   address should be calculated accordingly.  Here, we assume (by
   forcing the register- and memory- versions to be handled by
   the same helper) that the offset is always in the range
   0 .. word-size-1, or to be more precise by implementing the
   client's memory- version of this using the register- version,
   we impose the condition that the offset is used
   modulo-wordsize.  This is just plain wrong and should be
   fixed.
*/
.global VG_(helper_bt)
VG_(helper_bt):
	pushl	%eax
	movl	12(%esp), %eax
	btl	%eax, 8(%esp)
	popl	%eax
	ret
.global VG_(helper_bts)
VG_(helper_bts):
	pushl	%eax
	movl	12(%esp), %eax
	btsl	%eax, 8(%esp)
	popl	%eax
	ret
.global VG_(helper_btr)
VG_(helper_btr):
	pushl	%eax
	movl	12(%esp), %eax
	btrl	%eax, 8(%esp)
	popl	%eax
	ret
.global VG_(helper_btc)
VG_(helper_btc):
	pushl	%eax
	movl	12(%esp), %eax
	btcl	%eax, 8(%esp)
	popl	%eax
	ret


/* 32-bit double-length shift left/right.
   On entry:
	amount
	src
	dst
	RA   <- %esp
*/
.global VG_(helper_shldl)
VG_(helper_shldl):
	pushl	%eax
	pushl	%ebx
	pushl	%ecx

	movb	24(%esp), %cl
	movl	20(%esp), %ebx
	movl	16(%esp), %eax
	shldl	%cl, %ebx, %eax
	movl	%eax, 16(%esp)

	popl	%ecx
	popl	%ebx
	popl	%eax
	ret

.global VG_(helper_shldw)
VG_(helper_shldw):
	pushl	%eax
	pushl	%ebx
	pushl	%ecx

	movb	24(%esp), %cl
	movw	20(%esp), %bx
	movw	16(%esp), %ax
	shldw	%cl, %bx, %ax
	movw	%ax, 16(%esp)

	popl	%ecx
	popl	%ebx
	popl	%eax
	ret

.global VG_(helper_shrdl)
VG_(helper_shrdl):
	pushl	%eax
	pushl	%ebx
	pushl	%ecx

	movb	24(%esp), %cl
	movl	20(%esp), %ebx
	movl	16(%esp), %eax
	shrdl	%cl, %ebx, %eax
	movl	%eax, 16(%esp)

	popl	%ecx
	popl	%ebx
	popl	%eax
	ret

.global VG_(helper_shrdw)
VG_(helper_shrdw):
	pushl	%eax
	pushl	%ebx
	pushl	%ecx

	movb	24(%esp), %cl
	movw	20(%esp), %bx
	movw	16(%esp), %ax
	shrdw	%cl, %bx, %ax
	movw	%ax, 16(%esp)

	popl	%ecx
	popl	%ebx
	popl	%eax
	ret


/* Get the direction flag, and return either 1 or -1. */
.global VG_(helper_get_dirflag)
VG_(helper_get_dirflag):
	pushfl
	pushl	%eax

	pushfl
	popl	%eax
	shrl	$10, %eax
	andl	$1, %eax
	jnz	L1
	movl	$1, %eax
	jmp	L2
L1:	movl	$-1, %eax
L2:	movl	%eax, 12(%esp)

	popl %eax
	popfl
	ret


/* Clear/set the direction flag. */
.global VG_(helper_CLD)
VG_(helper_CLD):
	cld
	ret

.global VG_(helper_STD)
VG_(helper_STD):
	std
	ret


/* Signed 32-to-64 multiply. */
.globl VG_(helper_imul_32_64)
VG_(helper_imul_32_64):
	pushl	%eax
	pushl	%edx
	movl	16(%esp), %eax
	imull	12(%esp)
	movl	%eax, 16(%esp)
	movl	%edx, 12(%esp)
	popl	%edx
	popl	%eax
	ret

/* Signed 16-to-32 multiply. */
.globl VG_(helper_imul_16_32)
VG_(helper_imul_16_32):
	pushl	%eax
	pushl	%edx
	movw	16(%esp), %ax
	imulw	12(%esp)
	movw	%ax, 16(%esp)
	movw	%dx, 12(%esp)
	popl	%edx
	popl	%eax
	ret

/* Signed 8-to-16 multiply. */
.globl VG_(helper_imul_8_16)
VG_(helper_imul_8_16):
	pushl	%eax
	pushl	%edx
	movb	16(%esp), %al
	imulb	12(%esp)
	movw	%ax, 16(%esp)
	popl	%edx
	popl	%eax
	ret


/* Unsigned 32-to-64 multiply. */
.globl VG_(helper_mul_32_64)
VG_(helper_mul_32_64):
	pushl	%eax
	pushl	%edx
	movl	16(%esp), %eax
	mull	12(%esp)
	movl	%eax, 16(%esp)
	movl	%edx, 12(%esp)
	popl	%edx
	popl	%eax
	ret

/* Unsigned 16-to-32 multiply. */
.globl VG_(helper_mul_16_32)
VG_(helper_mul_16_32):
	pushl	%eax
	pushl	%edx
	movw	16(%esp), %ax
	mulw	12(%esp)
	movw	%ax, 16(%esp)
	movw	%dx, 12(%esp)
	popl	%edx
	popl	%eax
	ret

/* Unsigned 8-to-16 multiply. */
.globl VG_(helper_mul_8_16)
VG_(helper_mul_8_16):
	pushl	%eax
	pushl	%edx
	movb	16(%esp), %al
	mulb	12(%esp)
	movw	%ax, 16(%esp)
	popl	%edx
	popl	%eax
	ret


/* Unsigned 64-into-32 divide. */
.globl	VG_(helper_div_64_32)
VG_(helper_div_64_32):
	pushl	%eax
	pushl	%edx
	movl	16(%esp),%eax
	movl	12(%esp),%edx
	divl	20(%esp)
	movl	%eax,16(%esp)
	movl	%edx,12(%esp)
	popl	%edx
	popl	%eax
	ret

/* Signed 64-into-32 divide. */
.globl	VG_(helper_idiv_64_32)
VG_(helper_idiv_64_32):
	pushl	%eax
	pushl	%edx
	movl	16(%esp),%eax
	movl	12(%esp),%edx
	idivl	20(%esp)
	movl	%eax,16(%esp)
	movl	%edx,12(%esp)
	popl	%edx
	popl	%eax
	ret

/* Unsigned 32-into-16 divide. */
.globl	VG_(helper_div_32_16)
VG_(helper_div_32_16):
	pushl	%eax
	pushl	%edx
	movw	16(%esp),%ax
	movw	12(%esp),%dx
	divw	20(%esp)
	movw	%ax,16(%esp)
	movw	%dx,12(%esp)
	popl	%edx
	popl	%eax
	ret

/* Signed 32-into-16 divide. */
.globl	VG_(helper_idiv_32_16)
VG_(helper_idiv_32_16):
	pushl	%eax
	pushl	%edx
	movw	16(%esp),%ax
	movw	12(%esp),%dx
	idivw	20(%esp)
	movw	%ax,16(%esp)
	movw	%dx,12(%esp)
	popl	%edx
	popl	%eax
	ret

/* Unsigned 16-into-8 divide. */
.globl	VG_(helper_div_16_8)
VG_(helper_div_16_8):
	pushl	%eax
	movw	12(%esp),%ax
	divb	16(%esp)
	movb	%ah,12(%esp)
	movb	%al,8(%esp)
	popl	%eax
	ret

/* Signed 16-into-8 divide. */
.globl	VG_(helper_idiv_16_8)
VG_(helper_idiv_16_8):
	pushl	%eax
	movw	12(%esp),%ax
	idivb	16(%esp)
	movb	%ah,12(%esp)
	movb	%al,8(%esp)
	popl	%eax
	ret


##--------------------------------------------------------------------##
##--- end                                             vg_helpers.S ---##
##--------------------------------------------------------------------##