mirror of
https://github.com/Zenithsiz/ftmemsim-valgrind.git
synced 2026-02-04 02:18:37 +00:00
matching, in the function VG_(generic_match). Patterns to be matched against may contain only '*'-style wildcards (matches any number of elements, we don't care what they are), '?' wildcards (matches exactly one element, we don't care what it is) and literal elements. It is totally abstractified, in the sense that the pattern and input arrays may be arrays of anything. The caller provides enough information so that VG_(generic_match) can step along both arrays, and can ask the questions "is this pattern element a '*' ?", "is this pattern element a '?' ?", and "does this pattern element match an input element ?". The existing function VG_(string_match) is reimplemented using VG_(generic_match), although the ability to escape metacharacters in the pattern string is removed -- I don't think it was ever used. In m_errormgr, matching of suppression stacks (including wildcard "..." lines) against error stacks is re-implemented using VG_(generic_match). Further detailed comments are in m_seqmatch.h and pub_tool_seqmatch.h. A negative side effect is that VG_(string_match) will be much slower than before, due to the abstractification. It may be necessary to reimplement a specialised version later. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@8816
234 lines
8.0 KiB
C
234 lines
8.0 KiB
C
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- A simple sequence matching facility. ---*/
|
|
/*--- m_seqmatch.c ---*/
|
|
/*--------------------------------------------------------------------*/
|
|
|
|
/*
|
|
This file is part of Valgrind, a dynamic binary instrumentation
|
|
framework.
|
|
|
|
Copyright (C) 2008-2008 OpenWorks Ltd
|
|
info@open-works.co.uk
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
02111-1307, USA.
|
|
|
|
The GNU General Public License is contained in the file COPYING.
|
|
*/
|
|
|
|
#include "pub_core_basics.h"
|
|
#include "pub_core_libcassert.h"
|
|
#include "pub_core_libcbase.h" // VG_(strlen)
|
|
#include "pub_core_seqmatch.h" // self
|
|
|
|
/* ---------------------------------------------------------------------
|
|
A simple sequence matching facility
|
|
------------------------------------------------------------------ */
|
|
|
|
/* See detailed comment in include/pub_tool_seqmatch.h about this. */
|
|
Bool VG_(generic_match) (
|
|
Bool matchAll,
|
|
void* patt, SizeT szbPatt, UWord nPatt, UWord ixPatt,
|
|
void* input, SizeT szbInput, UWord nInput, UWord ixInput,
|
|
Bool (*pIsStar)(void*),
|
|
Bool (*pIsQuery)(void*),
|
|
Bool (*pattEQinp)(void*,void*)
|
|
)
|
|
{
|
|
/* This is the spec, written in my favourite formal specification
|
|
language. It specifies non-greedy matching of '*'s.
|
|
|
|
ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
|
|
ma ('*':ps) [] = ma ps []
|
|
|
|
ma ('?':ps) (i:is) = ma ps is
|
|
ma ('?':ps) [] = False
|
|
|
|
ma (p:ps) (i:is) = p == i && ma ps is
|
|
|
|
ma (p:ps) [] = False
|
|
ma [] (i:is) = False -- m-all, True for m-prefix
|
|
ma [] [] = True
|
|
*/
|
|
Bool havePatt, haveInput;
|
|
void *currPatt, *currInput;
|
|
tailcall:
|
|
vg_assert(nPatt >= 0 && nPatt < 1000000); /* arbitrary */
|
|
vg_assert(nInput >= 0 && nInput < 1000000); /* arbitrary */
|
|
vg_assert(ixPatt >= 0 && ixPatt <= nPatt);
|
|
vg_assert(ixInput >= 0 && ixInput <= nInput);
|
|
|
|
havePatt = ixPatt < nPatt;
|
|
haveInput = ixInput < nInput;
|
|
|
|
/* No specific need to set NULL when !have{Patt,Input}, but guards
|
|
against inadvertantly dereferencing an out of range pointer to
|
|
the pattern or input arrays. */
|
|
currPatt = havePatt ? ((Char*)patt) + szbPatt * ixPatt : NULL;
|
|
currInput = haveInput ? ((Char*)input) + szbInput * ixInput : NULL;
|
|
|
|
// Deal with the complex case first: wildcards. Do frugal
|
|
// matching. When encountering a '*', first skip no characters
|
|
// at all, and see if the rest of the match still works. Only if
|
|
// that fails do we then skip a character, and retry at the next
|
|
// position.
|
|
//
|
|
// ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
|
|
//
|
|
// If we're out of input, check the rest of the pattern matches
|
|
// the empty input. This really means can only be be empty or
|
|
// composed entirely of '*'s.
|
|
//
|
|
// ma ('*':ps) [] = ma ps []
|
|
//
|
|
if (havePatt && pIsStar(currPatt)) {
|
|
if (haveInput) {
|
|
// ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
|
|
// we unavoidably have to make a real recursive call for the
|
|
// first half of the OR, since this isn't straight tail-recursion.
|
|
if (VG_(generic_match)( matchAll,
|
|
patt, szbPatt, nPatt, ixPatt+1,
|
|
input,szbInput,nInput, ixInput+0,
|
|
pIsStar,pIsQuery,pattEQinp) ) {
|
|
return True;
|
|
}
|
|
// but we can tail-recurse for the second call
|
|
ixInput++; goto tailcall;
|
|
} else {
|
|
// ma ('*':ps) [] = ma ps []
|
|
ixPatt++; goto tailcall;
|
|
}
|
|
}
|
|
|
|
// simpler cases now. Deal with '?' wildcards.
|
|
//
|
|
// ma ('?':ps) (i:is) = ma ps is
|
|
// ma ('?':ps) [] = False
|
|
if (havePatt && pIsQuery(currPatt)) {
|
|
if (haveInput) {
|
|
ixPatt++; ixInput++; goto tailcall;
|
|
} else {
|
|
return False;
|
|
}
|
|
}
|
|
|
|
// obvious case with literal chars in the pattern
|
|
//
|
|
// ma (p:ps) (i:is) = p == i && ma ps is
|
|
if (havePatt && haveInput) {
|
|
if (!pattEQinp(currPatt,currInput)) return False;
|
|
ixPatt++; ixInput++; goto tailcall;
|
|
}
|
|
|
|
// if we run out of input before we run out of pattern, we must fail
|
|
// ma (p:ps) [] = False
|
|
if (havePatt && !haveInput) return False;
|
|
|
|
// if we run out of pattern before we run out of input, the
|
|
// verdict depends on the matching mode. If we are trying to
|
|
// match exactly (the pattern must consume the entire input)
|
|
// then the outcome is failure. However, if we're merely attempting
|
|
// to match some prefix of the input, then we have been successful.
|
|
//
|
|
// ma [] (i:is) = False -- m-all, True for m-prefix
|
|
if (!havePatt && haveInput) {
|
|
return matchAll ? False // match-all
|
|
: True; // match-prefix
|
|
}
|
|
|
|
// finally, if both sequence and input are both completely
|
|
// consumed, then we were successful, regardless of matching mode.
|
|
if (!havePatt && !haveInput) return True;
|
|
|
|
// end of cases
|
|
vg_assert(0);
|
|
}
|
|
|
|
|
|
/* And a parameterization of the above, to make it do
|
|
string matching.
|
|
*/
|
|
static Bool charIsStar ( void* pV ) { return *(Char*)pV == '*'; }
|
|
static Bool charIsQuery ( void* pV ) { return *(Char*)pV == '?'; }
|
|
static Bool char_p_EQ_i ( void* pV, void* cV ) {
|
|
Char p = *(Char*)pV;
|
|
Char c = *(Char*)cV;
|
|
vg_assert(p != '*' && p != '?');
|
|
return p == c;
|
|
}
|
|
Bool VG_(string_match) ( const Char* patt, const Char* input )
|
|
{
|
|
return VG_(generic_match)(
|
|
True/* match-all */,
|
|
(void*)patt, sizeof(UChar), VG_(strlen)(patt), 0,
|
|
(void*)input, sizeof(UChar), VG_(strlen)(input), 0,
|
|
charIsStar, charIsQuery, char_p_EQ_i
|
|
);
|
|
}
|
|
|
|
|
|
// test cases for the matcher (in match-all mode)
|
|
// typedef struct { char* patt; char* input; Bool xres; } Test;
|
|
//
|
|
//static Test tests[] =
|
|
// {
|
|
// { "" ,"" , True },
|
|
// { "a" ,"" , False },
|
|
// { "a" ,"b" , False },
|
|
// { "a" ,"a" , True },
|
|
// { "a" ,"aa" , False },
|
|
// { "*" ,"" , True },
|
|
// { "**" ,"" , True },
|
|
// { "*" ,"abc", True },
|
|
// { "*a" ,"abc", False },
|
|
// { "*b" ,"abc", False },
|
|
// { "*bc" ,"abc", True },
|
|
// { "a*b" ,"abc", False },
|
|
// { "a*c" ,"abc", True },
|
|
// { "*c" ,"abc", True },
|
|
// { "c*c" ,"abc", False },
|
|
// { "abc*" ,"abc", True },
|
|
// { "abc**" ,"abc", True },
|
|
// { "**abc" ,"abc", True },
|
|
// { "**a*b*c**" ,"abc", True },
|
|
// { "**a*b*d**" ,"abc", False },
|
|
// { "a?b" ,"abc", False },
|
|
// { "a?c" ,"abc", True },
|
|
// { "?" ,"" , False },
|
|
// { "?" ,"a" , True },
|
|
// { "?" ,"ab" , False },
|
|
// { "abcd" ,"abc", False },
|
|
// { "ab" ,"abc", False },
|
|
// { NULL ,NULL , False }
|
|
// };
|
|
//
|
|
//int main ( void )
|
|
//{
|
|
// Test* t;
|
|
// for (t = tests; t->patt; t++) {
|
|
// printf("%10s %6s %s\n",
|
|
// t->patt, t->input,
|
|
// match_string_all((UChar*)t->patt,(UChar*)t->input,True)
|
|
// == t->xres
|
|
// ? "pass" : "FAIL" );
|
|
// }
|
|
// return 0;
|
|
//}
|
|
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- end m_seqmatch.c ---*/
|
|
/*--------------------------------------------------------------------*/
|