When creating a child thread, initially set its os_state.threadgroup

to have the same value as the parent.  This avoids exit races leading
to hangs and strange behaviour in heavily multithreaded apps, in the
situation where threads are rapidly being created, and at the same
time an existing thread does sys_exit_group so as to terminate the
entire process.  Thanks to Konstantin S for chasing this down to a
small test case.  Fixes #226116.



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11053
This commit is contained in:
Julian Seward 2010-02-22 11:03:10 +00:00
parent d0bb010178
commit e6c318c531
6 changed files with 58 additions and 0 deletions

View File

@ -251,6 +251,17 @@ static SysRes do_clone ( ThreadId ptid,
ctst->sig_mask = ptst->sig_mask;
ctst->tmp_sig_mask = ptst->sig_mask;
/* Start the child with its threadgroup being the same as the
parent's. This is so that any exit_group calls that happen
after the child is created but before it sets its
os_state.threadgroup field for real (in thread_wrapper in
syswrap-linux.c), really kill the new thread. a.k.a this avoids
a race condition in which the thread is unkillable (via
exit_group) because its threadgroup is not set. The race window
is probably only a few hundred or a few thousand cycles long.
See #226116. */
ctst->os_state.threadgroup = ptst->os_state.threadgroup;
/* We don't really know where the client stack is, because its
allocated by the client. The best we can do is look at the
memory mappings and try to derive some useful information. We

View File

@ -201,6 +201,17 @@ static SysRes do_clone ( ThreadId ptid,
ctst->sig_mask = ptst->sig_mask;
ctst->tmp_sig_mask = ptst->sig_mask;
/* Start the child with its threadgroup being the same as the
parent's. This is so that any exit_group calls that happen
after the child is created but before it sets its
os_state.threadgroup field for real (in thread_wrapper in
syswrap-linux.c), really kill the new thread. a.k.a this avoids
a race condition in which the thread is unkillable (via
exit_group) because its threadgroup is not set. The race window
is probably only a few hundred or a few thousand cycles long.
See #226116. */
ctst->os_state.threadgroup = ptst->os_state.threadgroup;
seg = VG_(am_find_nsegment)((Addr)sp);
if (seg && seg->kind != SkResvn) {
ctst->client_stack_highest_word = (Addr)VG_PGROUNDUP(sp);

View File

@ -83,6 +83,9 @@ static VgSchedReturnCode thread_wrapper(Word /*ThreadId*/ tidW)
VG_TRACK(pre_thread_first_insn, tid);
tst->os_state.lwpid = VG_(gettid)();
/* Set the threadgroup for real. This overwrites the provisional
value set in do_clone() syswrap-*-linux.c. See comments in
do_clone for background, also #226116. */
tst->os_state.threadgroup = VG_(getpid)();
/* Thread created with all signals blocked; scheduler will set the

View File

@ -297,6 +297,17 @@ static SysRes do_clone ( ThreadId ptid,
ctst->sig_mask = ptst->sig_mask;
ctst->tmp_sig_mask = ptst->sig_mask;
/* Start the child with its threadgroup being the same as the
parent's. This is so that any exit_group calls that happen
after the child is created but before it sets its
os_state.threadgroup field for real (in thread_wrapper in
syswrap-linux.c), really kill the new thread. a.k.a this avoids
a race condition in which the thread is unkillable (via
exit_group) because its threadgroup is not set. The race window
is probably only a few hundred or a few thousand cycles long.
See #226116. */
ctst->os_state.threadgroup = ptst->os_state.threadgroup;
/* We don't really know where the client stack is, because its
allocated by the client. The best we can do is look at the
memory mappings and try to derive some useful information. We

View File

@ -325,6 +325,17 @@ static SysRes do_clone ( ThreadId ptid,
ctst->sig_mask = ptst->sig_mask;
ctst->tmp_sig_mask = ptst->sig_mask;
/* Start the child with its threadgroup being the same as the
parent's. This is so that any exit_group calls that happen
after the child is created but before it sets its
os_state.threadgroup field for real (in thread_wrapper in
syswrap-linux.c), really kill the new thread. a.k.a this avoids
a race condition in which the thread is unkillable (via
exit_group) because its threadgroup is not set. The race window
is probably only a few hundred or a few thousand cycles long.
See #226116. */
ctst->os_state.threadgroup = ptst->os_state.threadgroup;
/* We don't really know where the client stack is, because its
allocated by the client. The best we can do is look at the
memory mappings and try to derive some useful information. We

View File

@ -262,6 +262,17 @@ static SysRes do_clone ( ThreadId ptid,
ctst->sig_mask = ptst->sig_mask;
ctst->tmp_sig_mask = ptst->sig_mask;
/* Start the child with its threadgroup being the same as the
parent's. This is so that any exit_group calls that happen
after the child is created but before it sets its
os_state.threadgroup field for real (in thread_wrapper in
syswrap-linux.c), really kill the new thread. a.k.a this avoids
a race condition in which the thread is unkillable (via
exit_group) because its threadgroup is not set. The race window
is probably only a few hundred or a few thousand cycles long.
See #226116. */
ctst->os_state.threadgroup = ptst->os_state.threadgroup;
/* We don't really know where the client stack is, because its
allocated by the client. The best we can do is look at the
memory mappings and try to derive some useful information. We