Implement command line option --valgrind-stacksize=<number>

This allows to decrease memory usage when using many threads,
if no big stacksize is needed by Valgrind.
If needed (e.g. for demangling big c++ symbols), the V stacksize
can be increased.



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@15004
This commit is contained in:
Philippe Waroquiers 2015-03-12 20:43:46 +00:00
parent 95d7b1d054
commit 826502e89a
11 changed files with 98 additions and 46 deletions

View File

@ -353,6 +353,9 @@ Bool ML_(am_resolve_filename) ( Int fd, /*OUT*/HChar* buf, Int nbuf )
/*--- Manage stacks for Valgrind itself. ---*/
/*--- ---*/
/*-----------------------------------------------------------------*/
struct _VgStack {
HChar bytes[0];
};
/* Allocate and initialise a VgStack (anonymous valgrind space).
Protect the stack active area and the guard areas appropriately.
@ -370,7 +373,7 @@ VgStack* VG_(am_alloc_VgStack)( /*OUT*/Addr* initial_sp )
/* Allocate the stack. */
szB = VG_STACK_GUARD_SZB
+ VG_STACK_ACTIVE_SZB + VG_STACK_GUARD_SZB;
+ VG_(clo_valgrind_stacksize) + VG_STACK_GUARD_SZB;
sres = VG_(am_mmap_anon_float_valgrind)( szB );
if (sr_isError(sres))
@ -393,12 +396,12 @@ VgStack* VG_(am_alloc_VgStack)( /*OUT*/Addr* initial_sp )
);
sres = local_do_mprotect_NO_NOTIFY(
(Addr) &stack->bytes[VG_STACK_GUARD_SZB + VG_STACK_ACTIVE_SZB],
(Addr) &stack->bytes[VG_STACK_GUARD_SZB + VG_(clo_valgrind_stacksize)],
VG_STACK_GUARD_SZB, VKI_PROT_NONE
);
if (sr_isError(sres)) goto protect_failed;
VG_(am_notify_mprotect)(
(Addr) &stack->bytes[VG_STACK_GUARD_SZB + VG_STACK_ACTIVE_SZB],
(Addr) &stack->bytes[VG_STACK_GUARD_SZB + VG_(clo_valgrind_stacksize)],
VG_STACK_GUARD_SZB, VKI_PROT_NONE
);
@ -406,14 +409,15 @@ VgStack* VG_(am_alloc_VgStack)( /*OUT*/Addr* initial_sp )
tell how much got used. */
p = (UInt*)&stack->bytes[VG_STACK_GUARD_SZB];
for (i = 0; i < VG_STACK_ACTIVE_SZB/sizeof(UInt); i++)
for (i = 0; i < VG_(clo_valgrind_stacksize)/sizeof(UInt); i++)
p[i] = 0xDEADBEEF;
*initial_sp = (Addr)&stack->bytes[VG_STACK_GUARD_SZB + VG_STACK_ACTIVE_SZB];
*initial_sp = (Addr)&stack->bytes[VG_STACK_GUARD_SZB + VG_(clo_valgrind_stacksize)];
*initial_sp -= 8;
*initial_sp &= ~((Addr)0x1F); /* 32-align it */
VG_(debugLog)( 1,"aspacem","allocated thread stack at 0x%llx size %d\n",
VG_(debugLog)( 1,"aspacem",
"allocated valgrind thread stack at 0x%llx size %d\n",
(ULong)(Addr)stack, szB);
ML_(am_do_sanity_check)();
return stack;
@ -436,7 +440,7 @@ SizeT VG_(am_get_VgStack_unused_szB)( const VgStack* stack, SizeT limit )
const UInt* p;
p = (const UInt*)&stack->bytes[VG_STACK_GUARD_SZB];
for (i = 0; i < VG_STACK_ACTIVE_SZB/sizeof(UInt); i++) {
for (i = 0; i < VG_(clo_valgrind_stacksize)/sizeof(UInt); i++) {
if (p[i] != 0xDEADBEEF)
break;
if (i * sizeof(UInt) >= limit)

View File

@ -365,10 +365,10 @@ static void show_sched_status_wrk ( Bool host_stacktrace,
}
if (stack_usage && stack != 0)
VG_(printf)("valgrind stack top usage: %ld of %ld\n",
VG_STACK_ACTIVE_SZB
VG_(clo_valgrind_stacksize)
- VG_(am_get_VgStack_unused_szB)(stack,
VG_STACK_ACTIVE_SZB),
(SizeT) VG_STACK_ACTIVE_SZB);
VG_(clo_valgrind_stacksize)),
(SizeT) VG_(clo_valgrind_stacksize));
}
VG_(printf)("\n");
}

View File

@ -195,6 +195,10 @@ static void usage_NORETURN ( Bool debug_help )
" --num-transtab-sectors=<number> size of translated code cache [%d]\n"
" more sectors may increase performance, but use more memory.\n"
" --aspace-minaddr=0xPP avoid mapping memory below 0xPP [guessed]\n"
" --valgrind-stacksize=<number> size of valgrind (host) thread's stack\n"
" (in bytes) ["
VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)
"]\n"
" --show-emwarns=no|yes show warnings about emulation limits? [no]\n"
" --require-text-symbol=:sonamepattern:symbolpattern abort run if the\n"
" stated shared object doesn't have the stated\n"
@ -556,6 +560,12 @@ void main_process_cmd_line_options ( /*OUT*/Bool* logging_to_fd,
else if VG_STREQN(15, arg, "--redzone-size=") {}
else if VG_STREQN(17, arg, "--aspace-minaddr=") {}
else if VG_BINT_CLO(arg, "--valgrind-stacksize",
VG_(clo_valgrind_stacksize),
2*VKI_PAGE_SIZE, 10*VG_DEFAULT_STACK_ACTIVE_SZB)
{VG_(clo_valgrind_stacksize)
= VG_PGROUNDUP(VG_(clo_valgrind_stacksize));}
/* Obsolete options. Report an error and exit */
else if VG_STREQN(34, arg, "--vex-iropt-precise-memory-exns=no") {
VG_(fmsg_bad_option)
@ -1551,7 +1561,9 @@ static void setup_file_descriptors(void)
marked global even though it isn't, because assembly code below
needs to reference the name. */
/*static*/ VgStack VG_(interim_stack);
/*static*/ struct {
HChar bytes [VG_STACK_GUARD_SZB + VG_DEFAULT_STACK_ACTIVE_SZB + VG_STACK_GUARD_SZB];
} VG_(interim_stack);
/* These are the structures used to hold info for creating the initial
client image.
@ -2506,7 +2518,6 @@ void shutdown_actions_NORETURN( ThreadId tid,
VG_(am_show_nsegments)(1,"Memory layout at client shutdown");
vg_assert(VG_(is_running_thread)(tid));
vg_assert(tids_schedretcode == VgSrc_ExitThread
|| tids_schedretcode == VgSrc_ExitProcess
|| tids_schedretcode == VgSrc_FatalSig );
@ -2863,7 +2874,7 @@ asm("\n"
/* set up the new stack in %eax */
"\tmovl $vgPlain_interim_stack, %eax\n"
"\taddl $"VG_STRINGIFY(VG_STACK_GUARD_SZB)", %eax\n"
"\taddl $"VG_STRINGIFY(VG_STACK_ACTIVE_SZB)", %eax\n"
"\taddl $"VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)", %eax\n"
"\tsubl $16, %eax\n"
"\tandl $~15, %eax\n"
/* install it, and collect the original one */
@ -2883,7 +2894,7 @@ asm("\n"
/* set up the new stack in %rdi */
"\tmovq $vgPlain_interim_stack, %rdi\n"
"\taddq $"VG_STRINGIFY(VG_STACK_GUARD_SZB)", %rdi\n"
"\taddq $"VG_STRINGIFY(VG_STACK_ACTIVE_SZB)", %rdi\n"
"\taddq $"VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)", %rdi\n"
"\tandq $~15, %rdi\n"
/* install it, and collect the original one */
"\txchgq %rdi, %rsp\n"
@ -2903,13 +2914,13 @@ asm("\n"
"\tla 16,vgPlain_interim_stack@l(16)\n"
"\tlis 17,("VG_STRINGIFY(VG_STACK_GUARD_SZB)" >> 16)\n"
"\tori 17,17,("VG_STRINGIFY(VG_STACK_GUARD_SZB)" & 0xFFFF)\n"
"\tlis 18,("VG_STRINGIFY(VG_STACK_ACTIVE_SZB)" >> 16)\n"
"\tori 18,18,("VG_STRINGIFY(VG_STACK_ACTIVE_SZB)" & 0xFFFF)\n"
"\tlis 18,("VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)" >> 16)\n"
"\tori 18,18,("VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)" & 0xFFFF)\n"
"\tadd 16,17,16\n"
"\tadd 16,18,16\n"
"\trlwinm 16,16,0,0,27\n"
/* now r16 = &vgPlain_interim_stack + VG_STACK_GUARD_SZB +
VG_STACK_ACTIVE_SZB rounded down to the nearest 16-byte
VG_DEFAULT_STACK_ACTIVE_SZB rounded down to the nearest 16-byte
boundary. And r1 is the original SP. Set the SP to r16 and
call _start_in_C_linux, passing it the initial SP. */
"\tmr 3,1\n"
@ -2942,13 +2953,13 @@ asm("\n"
"\tlis 17,("VG_STRINGIFY(VG_STACK_GUARD_SZB)" >> 16)\n"
"\tori 17,17,("VG_STRINGIFY(VG_STACK_GUARD_SZB)" & 0xFFFF)\n"
"\txor 18,18,18\n"
"\tlis 18,("VG_STRINGIFY(VG_STACK_ACTIVE_SZB)" >> 16)\n"
"\tori 18,18,("VG_STRINGIFY(VG_STACK_ACTIVE_SZB)" & 0xFFFF)\n"
"\tlis 18,("VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)" >> 16)\n"
"\tori 18,18,("VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)" & 0xFFFF)\n"
"\tadd 16,17,16\n"
"\tadd 16,18,16\n"
"\trldicr 16,16,0,59\n"
/* now r16 = &vgPlain_interim_stack + VG_STACK_GUARD_SZB +
VG_STACK_ACTIVE_SZB rounded down to the nearest 16-byte
VG_DEFAULT_STACK_ACTIVE_SZB rounded down to the nearest 16-byte
boundary. And r1 is the original SP. Set the SP to r16 and
call _start_in_C_linux, passing it the initial SP. */
"\tmr 3,1\n"
@ -2988,13 +2999,13 @@ asm("\n"
"\tlis 17,("VG_STRINGIFY(VG_STACK_GUARD_SZB)" >> 16)\n"
"\tori 17,17,("VG_STRINGIFY(VG_STACK_GUARD_SZB)" & 0xFFFF)\n"
"\txor 18,18,18\n"
"\tlis 18,("VG_STRINGIFY(VG_STACK_ACTIVE_SZB)" >> 16)\n"
"\tori 18,18,("VG_STRINGIFY(VG_STACK_ACTIVE_SZB)" & 0xFFFF)\n"
"\tlis 18,("VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)" >> 16)\n"
"\tori 18,18,("VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)" & 0xFFFF)\n"
"\tadd 16,17,16\n"
"\tadd 16,18,16\n"
"\trldicr 16,16,0,59\n"
/* now r16 = &vgPlain_interim_stack + VG_STACK_GUARD_SZB +
VG_STACK_ACTIVE_SZB rounded down to the nearest 16-byte
VG_DEFAULT_STACK_ACTIVE_SZB rounded down to the nearest 16-byte
boundary. And r1 is the original SP. Set the SP to r16 and
call _start_in_C_linux, passing it the initial SP. */
"\tmr 3,1\n"
@ -3047,7 +3058,7 @@ asm("\n\t"
/* trigger execution of an invalid opcode -> halt machine */
"j .+2\n\t"
"1: .quad "VG_STRINGIFY(VG_STACK_GUARD_SZB)"\n\t"
"2: .quad "VG_STRINGIFY(VG_STACK_ACTIVE_SZB)"\n\t"
"2: .quad "VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)"\n\t"
".previous\n"
);
#elif defined(VGP_arm_linux)
@ -3070,7 +3081,7 @@ asm("\n"
"\tb _start_in_C_linux\n"
"\t.word vgPlain_interim_stack\n"
"\t.word "VG_STRINGIFY(VG_STACK_GUARD_SZB)"\n"
"\t.word "VG_STRINGIFY(VG_STACK_ACTIVE_SZB)"\n"
"\t.word "VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)"\n"
);
#elif defined(VGP_arm64_linux)
asm("\n"
@ -3086,9 +3097,9 @@ asm("\n"
"\tmovk x1, (("VG_STRINGIFY(VG_STACK_GUARD_SZB)") >> 16) & 0xFFFF,"
" lsl 16\n"
"\tadd x0, x0, x1\n"
// The next 2 assume that VG_STACK_ACTIVE_SZB fits in 32 bits
"\tmov x1, (("VG_STRINGIFY(VG_STACK_ACTIVE_SZB)") >> 0) & 0xFFFF\n"
"\tmovk x1, (("VG_STRINGIFY(VG_STACK_ACTIVE_SZB)") >> 16) & 0xFFFF,"
// The next 2 assume that VG_DEFAULT_STACK_ACTIVE_SZB fits in 32 bits
"\tmov x1, (("VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)") >> 0) & 0xFFFF\n"
"\tmovk x1, (("VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)") >> 16) & 0xFFFF,"
" lsl 16\n"
"\tadd x0, x0, x1\n"
"\tand x0, x0, -16\n"
@ -3120,14 +3131,14 @@ asm("\n"
"\tli $10, "VG_STRINGIFY(VG_STACK_GUARD_SZB)"\n"
"\tli $11, "VG_STRINGIFY(VG_STACK_ACTIVE_SZB)"\n"
"\tli $11, "VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)"\n"
"\taddu $9, $9, $10\n"
"\taddu $9, $9, $11\n"
"\tli $12, 0xFFFFFFF0\n"
"\tand $9, $9, $12\n"
/* now t1/$9 = &vgPlain_interim_stack + VG_STACK_GUARD_SZB +
VG_STACK_ACTIVE_SZB rounded down to the nearest 16-byte
VG_DEFAULT_STACK_ACTIVE_SZB rounded down to the nearest 16-byte
boundary. And $29 is the original SP. Set the SP to t1 and
call _start_in_C, passing it the initial SP. */
@ -3156,14 +3167,14 @@ asm(
"\tdaddiu $9, %lo(vgPlain_interim_stack)\n"
"\tli $10, "VG_STRINGIFY(VG_STACK_GUARD_SZB)"\n"
"\tli $11, "VG_STRINGIFY(VG_STACK_ACTIVE_SZB)"\n"
"\tli $11, "VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)"\n"
"\tdaddu $9, $9, $10\n"
"\tdaddu $9, $9, $11\n"
"\tli $12, 0xFFFFFF00\n"
"\tand $9, $9, $12\n"
/* now t1/$9 = &vgPlain_interim_stack + VG_STACK_GUARD_SZB +
VG_STACK_ACTIVE_SZB rounded down to the nearest 16-byte
VG_DEFAULT_STACK_ACTIVE_SZB rounded down to the nearest 16-byte
boundary. And $29 is the original SP. Set the SP to t1 and
call _start_in_C, passing it the initial SP. */
@ -3285,7 +3296,7 @@ asm("\n"
/* set up the new stack in %eax */
"\tmovl $_vgPlain_interim_stack, %eax\n"
"\taddl $"VG_STRINGIFY(VG_STACK_GUARD_SZB)", %eax\n"
"\taddl $"VG_STRINGIFY(VG_STACK_ACTIVE_SZB)", %eax\n"
"\taddl $"VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)", %eax\n"
"\tsubl $16, %eax\n"
"\tandl $~15, %eax\n"
/* install it, and collect the original one */
@ -3306,7 +3317,7 @@ asm("\n"
/* set up the new stack in %rdi */
"\tmovabsq $_vgPlain_interim_stack, %rdi\n"
"\taddq $"VG_STRINGIFY(VG_STACK_GUARD_SZB)", %rdi\n"
"\taddq $"VG_STRINGIFY(VG_STACK_ACTIVE_SZB)", %rdi\n"
"\taddq $"VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)", %rdi\n"
"\tandq $~15, %rdi\n"
/* install it, and collect the original one */
"\txchgq %rdi, %rsp\n"

View File

@ -38,6 +38,7 @@
#include "pub_core_libcproc.h"
#include "pub_core_mallocfree.h"
#include "pub_core_seqmatch.h" // VG_(string_match)
#include "pub_core_aspacemgr.h"
// See pub_{core,tool}_options.h for explanations of all these.
@ -128,6 +129,7 @@ Bool VG_(clo_show_emwarns) = False;
Word VG_(clo_max_stackframe) = 2000000;
UInt VG_(clo_max_threads) = MAX_THREADS_DEFAULT;
Word VG_(clo_main_stacksize) = 0; /* use client's rlimit.stack */
Word VG_(clo_valgrind_stacksize) = VG_DEFAULT_STACK_ACTIVE_SZB;
Bool VG_(clo_wait_for_gdb) = False;
VgSmc VG_(clo_smc_check) = Vg_SmcStack;
UInt VG_(clo_kernel_variant) = 0;

View File

@ -2236,7 +2236,9 @@ void VG_(sanity_check_general) ( Bool force_expensive )
if (remains < limit)
VG_(message)(Vg_DebugMsg,
"WARNING: Thread %d is within %ld bytes "
"of running out of stack!\n",
"of running out of valgrind stack!\n"
"Valgrind stack size can be increased "
"using --valgrind-stacksize=....\n",
tid, remains);
}
}

View File

@ -305,24 +305,23 @@ extern Bool VG_(am_relocate_nooverlap_client)( /*OUT*/Bool* need_discard,
// stacks. The address space manager provides and suitably
// protects such stacks.
// VG_DEFAULT_STACK_ACTIVE_SZB is the default size of a Valgrind stack.
// The effectively used size is controlled by the command line options
// --valgrind-stack-size=xxxx (which must be page aligned).
// Note that m_main.c needs an interim stack (just to startup), before
// any command line option can be processed. This interim stack
// (declared in m_main.c) will use the size VG_DEFAULT_STACK_ACTIVE_SZB.
#if defined(VGP_ppc32_linux) \
|| defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux) \
|| defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
|| defined(VGP_arm64_linux)
# define VG_STACK_GUARD_SZB 65536 // 1 or 16 pages
# define VG_STACK_ACTIVE_SZB (4096 * 256) // 1Mb
#else
# define VG_STACK_GUARD_SZB 8192 // 2 pages
# define VG_STACK_ACTIVE_SZB (4096 * 256) // 1Mb
#endif
# define VG_DEFAULT_STACK_ACTIVE_SZB 1048576 // (4096 * 256) = 1Mb
typedef
struct {
HChar bytes[VG_STACK_GUARD_SZB
+ VG_STACK_ACTIVE_SZB
+ VG_STACK_GUARD_SZB];
}
VgStack;
typedef struct _VgStack VgStack;
/* Allocate and initialise a VgStack (anonymous valgrind space).

View File

@ -314,6 +314,10 @@ extern UInt VG_(clo_num_transtab_sectors);
VG_(clo_aspacem_minAddr). */
extern Addr VG_(clo_aspacem_minAddr);
/* How large the Valgrind thread stacks should be.
Will be rounded up to a page.. */
extern Word VG_(clo_valgrind_stacksize);
/* Delay startup to allow GDB to be attached? Default: NO */
extern Bool VG_(clo_wait_for_gdb);

View File

@ -2249,12 +2249,37 @@ need to use them.</para>
provided <computeroutput>address</computeroutput> must be page
aligned and must be equal or bigger to 0x1000 (4KB). To find the
default value on your platform, do something such as
<computeroutput>valgrind -d -d date 2&gt;&amp;1 | grep -i minaddr</computeroutput>. Values lower than 0x10000 (64KB) are known to create problems
<computeroutput>valgrind -d -d date 2&gt;&amp;1 | grep -i minaddr</computeroutput>.
Values lower than 0x10000 (64KB) are known to create problems
on some distributions.
</para>
</listitem>
</varlistentry>
<varlistentry id="opt.valgrind-stacksize" xreflabel="----valgrind-stacksize">
<term>
<option><![CDATA[--valgrind-stacksize=<number> [default: 1MB] ]]></option>
</term>
<listitem>
<para>For each thread, Valgrind needs its own 'private' stack.
The default size for these stacks is largely dimensioned, and so
should be sufficient in most cases. In case the size is too small,
Valgrind will segfault. Before segfaulting, a warning might be produced
by Valgrind when approaching the limit.
</para>
<para>
Use the option <option>--valgrind-stacksize</option> if such an (unlikely)
warning is produced, or Valgrind dies due to a segmentation violation.
Such segmentation violations have been seen when demangling huge C++
symbols.
</para>
<para>If your application uses many threads and needs a lot of memory, you can
gain some memory by reducing the size of these Valgrind stacks using
the option <option>--valgrind-stacksize</option>.
</para>
</listitem>
</varlistentry>
<varlistentry id="opt.show-emwarns" xreflabel="--show-emwarns">
<term>
<option><![CDATA[--show-emwarns=<yes|no> [default: no] ]]></option>

View File

@ -1,5 +1,6 @@
prog: err_disable4
vgopts: -q --num-callers=3
vgopts: -q --num-callers=3 --valgrind-stacksize=131072
## 131072 is the minimum value needed on ppc64 (2 pages of 64K)
stderr_filter: ../../helgrind/tests/filter_stderr
## This is so as to get rid of the "Thread #" lines, which
## otherwise perturb the output due to differences in

View File

@ -108,6 +108,8 @@ usage: valgrind [options] prog-and-args
--num-transtab-sectors=<number> size of translated code cache [16]
more sectors may increase performance, but use more memory.
--aspace-minaddr=0xPP avoid mapping memory below 0xPP [guessed]
--valgrind-stacksize=<number> size of valgrind (host) thread's stack
(in bytes) [1048576]
--show-emwarns=no|yes show warnings about emulation limits? [no]
--require-text-symbol=:sonamepattern:symbolpattern abort run if the
stated shared object doesn't have the stated

View File

@ -108,6 +108,8 @@ usage: valgrind [options] prog-and-args
--num-transtab-sectors=<number> size of translated code cache [16]
more sectors may increase performance, but use more memory.
--aspace-minaddr=0xPP avoid mapping memory below 0xPP [guessed]
--valgrind-stacksize=<number> size of valgrind (host) thread's stack
(in bytes) [1048576]
--show-emwarns=no|yes show warnings about emulation limits? [no]
--require-text-symbol=:sonamepattern:symbolpattern abort run if the
stated shared object doesn't have the stated