regexp_parser rxp( "(^wee|^week)(knights|night)" );
If regexp pattern comes also from user input, it is possible to check the pattern:
if( !rxp.is_pattern_valid() ) { cerr << "invalid pattern" << endl; }
To parse the input (find the matching part), simply call parse() method
string input = "helloweeknights"; rxp.parse( input.c_str, input.length() ); if( rxp.is_parsed() ) { cout << "input part recognized by (^wee|^week)(knights|night) on position "; cout rxp.recognized_position() << "with size=" << rxp.recognized_size() << endl; // output is: // input part recognized by (^wee|^week)(knights|night) on position 5 with size=10 } else { cout << "input not recognized by (^wee|^week)(knights|night)" << endl;two }
The parser is also capable to return matches of the regexp and to address all subexpressions matches. Each subexpression is addressed by a key in dotted form. In the match key, the number specifies the subexpression position. By '.' are separated levels, if subexpressions are submerged i.e. "(a)" can be addressed by 1.1, "((a))" by 1.1.1 etc.
By number is specified the subexpression position on the level. Suppose we have pattern "a(a(b){1,3})" and input "aaabbbb". Now we are interested in subexression 1.1.2 = second b
regexp_parser rxp("a(a(b){1,3})"); rxp.parse( "aaabbbb", 7 ); matches m; rxp.assign_matches(m); match subexpr = m.get("1.1.2"); if( subexpr.is_valid() ) { ulong pos = subexpr.get_pos(); // == 4 ulong size = subexpr.get_size();// == 1 // do some work }
To iterate through all matches, it is possible to use method get_all().
matches::matches_coll mc; m.get_all(mc); matches::matches_coll::iterator it = mc.begin(); while( it != mc.end() ) { cout << " match (subexpr) key = " << (*it).first << " position="; cout << (*it).second.get_pos() << " size=" << (*it).second.get_size(); cout << "; " << endl; it++; }
Note for developers:
To test regular pattern recognition (not finding the matches, at least not in this version), it is possible to use tracer in ./src/common directory. But to be honest, I never used that (produces too much traces).
// when using this directove, the ./src/common/tracer.h must be included #define TRACE . . . // switch the trace on parsers_common::trace_context::get().trace_on(); // will trace regexp recognition (or by other words, building the parsing engine). regexp_parser rxp("a(a(b){1,3})"); // switch the trace off parsers_common::trace_context::get().trace_off();
With use of matches it is possible to manipulate the input string. See also regexp_test.cpp in test directory.
Following are test results from regexp_parser tests:
[--8z--] parsed succesfully input = - recognized succesfully [[.-.]] parsed succesfully input = - recognized succesfully [a-bc] parsed succesfully input = ch recognized succesfully [[:print:]] parsed succesfully input = 1 recognized succesfully [8--] parsed succesfully input = - recognized succesfully [a-c] parsed succesfully input = a recognized succesfully [^a-f] parsed succesfully input = g recognized succesfully [^a-f] parsed succesfully input = c not recognized succesfully [[:alnum:]] parsed succesfully input = a recognized succesfully [--z] parsed succesfully input = - recognized succesfully [aa-b] parsed succesfully input = b recognized succesfully [a-bc-h] parsed succesfully input = g recognized succesfully [a-bc-de-f] parsed succesfully input = a recognized succesfully [[.a.]] parsed succesfully input = a recognized succesfully [[=a=]] parsed succesfully input = a recognized succesfully [[.ch.]] parsed succesfully input = ch not recognized wrongly "abc "def" "ghi" jkl" succesfully recognized by "".*"" ,size=11 position=4 match (subexpr) key = 1 position=4 size=11; "abc "def" "ghi" jkl" succesfully recognized by "".*?"" ,size=5 position=4 match (subexpr) key = 1 position=4 size=5; "aaabbbb" succesfully recognized by "a(a(b){1,3})" ,size=6 position=0 match (subexpr) key = 1 position=0 size=6; match (subexpr) key = 1.1 position=1 size=5; match (subexpr) key = 1.1.1 position=3 size=1; match (subexpr) key = 1.1.2 position=4 size=1; match (subexpr) key = 1.1.3 position=5 size=1; "abab" succesfully recognized by "(ab){2,5}" ,size=4 position=0 match (subexpr) key = 1 position=0 size=4; match (subexpr) key = 1.1 position=0 size=2; match (subexpr) key = 1.2 position=2 size=2; "ab" succesfully not recognized by "(ab){2,5}" "aa" succesfully recognized by "(a){2}" ,size=2 position=0 match (subexpr) key = 1 position=0 size=2; match (subexpr) key = 1.1 position=0 size=1; match (subexpr) key = 1.2 position=1 size=1; "a" succesfully recognized by "(a){1}" ,size=1 position=0 match (subexpr) key = 1 position=0 size=1; match (subexpr) key = 1.1 position=0 size=1; "aa" succesfully recognized by "((a){1,2}){2}" ,size=2 position=0 match (subexpr) key = 1 position=0 size=2; match (subexpr) key = 1.1 position=0 size=1; match (subexpr) key = 1.1.1 position=0 size=1; match (subexpr) key = 1.2 position=1 size=1; match (subexpr) key = 1.2.1 position=1 size=1; "abcabcabc" succesfully recognized by "((abc){1,3}){2,5}" ,size=9 position=0 match (subexpr) key = 1 position=0 size=9; match (subexpr) key = 1.1 position=0 size=6; match (subexpr) key = 1.1.1 position=0 size=3; match (subexpr) key = 1.1.2 position=3 size=3; match (subexpr) key = 1.2 position=6 size=3; match (subexpr) key = 1.2.1 position=6 size=3; "abcabcabcabcabcabcabcabc" succesfully recognized by "(((abc){2,5}){2,5}){2,5}" ,size=24 position=0 match (subexpr) key = 1 position=0 size=24; match (subexpr) key = 1.1 position=0 size=12; match (subexpr) key = 1.1.1 position=0 size=6; match (subexpr) key = 1.1.1.1 position=0 size=3; match (subexpr) key = 1.1.1.2 position=3 size=3; match (subexpr) key = 1.1.2 position=6 size=6; match (subexpr) key = 1.1.2.1 position=6 size=3; match (subexpr) key = 1.1.2.2 position=9 size=3; match (subexpr) key = 1.2 position=12 size=12; match (subexpr) key = 1.2.1 position=12 size=6; match (subexpr) key = 1.2.1.1 position=12 size=3; match (subexpr) key = 1.2.1.2 position=15 size=3; match (subexpr) key = 1.2.2 position=18 size=6; match (subexpr) key = 1.2.2.1 position=18 size=3; match (subexpr) key = 1.2.2.2 position=21 size=3; "abcabcabcabcabcabcabc" succesfully not recognized by "(((abc){2,5}){2,5}){2,5}" "aaaa" succesfully recognized by "(a)*" ,size=4 position=0 match (subexpr) key = 1 position=0 size=4; match (subexpr) key = 1.1 position=0 size=1; match (subexpr) key = 1.2 position=1 size=1; match (subexpr) key = 1.3 position=2 size=1; match (subexpr) key = 1.4 position=3 size=1; "abbbc" succesfully recognized by "bb*" ,size=3 position=1 match (subexpr) key = 1 position=1 size=3; "cdabababb" succesfully recognized by "[ab]*" ,size=7 position=2 match (subexpr) key = 1 position=2 size=7; "abcabcabc" succesfully recognized by "(abc){1,3}" ,size=9 position=0 match (subexpr) key = 1 position=0 size=9; match (subexpr) key = 1.1 position=0 size=3; match (subexpr) key = 1.2 position=3 size=3; match (subexpr) key = 1.3 position=6 size=3; "cabc" succesfully recognized by "(abc){1,3}" ,size=4 position=0 match (subexpr) key = 1 position=0 size=4; match (subexpr) key = 1.1 position=1 size=3; "cabcddcabd" succesfully recognized by "(abc)(cab)d{1,3}" ,size=10 position=0 match (subexpr) key = 1 position=0 size=10; match (subexpr) key = 1.1 position=1 size=3; match (subexpr) key = 1.2 position=6 size=3; "cabcddcabddddd" succesfully recognized by "(abc)(cab)d{1,3}" ,size=12 position=0 match (subexpr) key = 1 position=0 size=12; match (subexpr) key = 1.1 position=1 size=3; match (subexpr) key = 1.2 position=6 size=3; "ddddd" succesfully recognized by "d{1,3}d{2,5}" ,size=5 position=0 match (subexpr) key = 1 position=0 size=5; "ddddd" succesfully recognized by "d{1,3}" ,size=3 position=0 match (subexpr) key = 1 position=0 size=3; "abcabcabcabc" succesfully recognized by "((abc){1,2}){2,5}" ,size=12 position=0 match (subexpr) key = 1 position=0 size=12; match (subexpr) key = 1.1 position=0 size=6; match (subexpr) key = 1.1.1 position=0 size=3; match (subexpr) key = 1.1.2 position=3 size=3; match (subexpr) key = 1.2 position=6 size=6; match (subexpr) key = 1.2.1 position=6 size=3; match (subexpr) key = 1.2.2 position=9 size=3; "abcabcabcabc" succesfully recognized by "(abc){1,2}{2}" ,size=12 position=0 match (subexpr) key = 1 position=0 size=12; match (subexpr) key = 1.1 position=0 size=3; match (subexpr) key = 1.2 position=3 size=3; match (subexpr) key = 1.3 position=6 size=3; match (subexpr) key = 1.4 position=9 size=3; "caabcbdcb" succesfully recognized by "a(a(b){3})" ,size=8 position=1 match (subexpr) key = 1 position=1 size=8; match (subexpr) key = 1.1 position=2 size=7; match (subexpr) key = 1.1.1 position=3 size=1; match (subexpr) key = 1.1.2 position=5 size=1; match (subexpr) key = 1.1.3 position=8 size=1; "ba" succesfully not recognized by "^a$" "a" succesfully recognized by "^a$" ,size=1 position=0 match (subexpr) key = 1 position=0 size=1; "bac" succesfully not recognized by "^a$" "abc" succesfully not recognized by "^a$" "aaa" succesfully recognized by "^aaa$" ,size=3 position=0 match (subexpr) key = 1 position=0 size=3; "aba" succesfully recognized by "^a(a)$" ,size=3 position=0 match (subexpr) key = 1 position=0 size=3; match (subexpr) key = 1.1 position=2 size=1; "abbbaa" succesfully recognized by "^a(a)a$" ,size=6 position=0 match (subexpr) key = 1 position=0 size=6; match (subexpr) key = 1.1 position=4 size=1; "abbaccb" succesfully recognized by "^a(a(b))$" ,size=7 position=0 match (subexpr) key = 1 position=0 size=7; match (subexpr) key = 1.1 position=3 size=4; match (subexpr) key = 1.1.1 position=6 size=1; "b" succesfully recognized by "a|b" ,size=1 position=0 match (subexpr) key = 1 position=0 size=1; "ab" succesfully recognized by "(a)(b)" ,size=2 position=0 match (subexpr) key = 1 position=0 size=2; match (subexpr) key = 1.1 position=0 size=1; match (subexpr) key = 1.2 position=1 size=1; "a" succesfully recognized by "(a)|(b)" ,size=1 position=0 match (subexpr) key = 1 position=0 size=1; match (subexpr) key = 1.1 position=0 size=1; "add" succesfully recognized by "(a)|(b)|c|dd" ,size=2 position=1 match (subexpr) key = 1 position=1 size=2; "ac" succesfully recognized by "(a)(b|c)" ,size=2 position=0 match (subexpr) key = 1 position=0 size=2; match (subexpr) key = 1.1 position=0 size=1; match (subexpr) key = 1.2 position=1 size=1; "ac" succesfully recognized by "(a|b)(c|d)" ,size=2 position=0 match (subexpr) key = 1 position=0 size=2; match (subexpr) key = 1.1 position=0 size=1; match (subexpr) key = 1.2 position=1 size=1; "aic" succesfully recognized by "(ai|b)(c|d)" ,size=3 position=0 match (subexpr) key = 1 position=0 size=3; match (subexpr) key = 1.1 position=0 size=2; match (subexpr) key = 1.2 position=2 size=1; "abbaba" succesfully recognized by "^a(a(^b)a$)" ,size=6 position=0 match (subexpr) key = 1 position=0 size=6; match (subexpr) key = 1.1 position=3 size=3; match (subexpr) key = 1.1.1 position=4 size=1; "weeknights" succesfully recognized by "(wee|week)(knights|night)" ,size=10 position=0 match (subexpr) key = 1 position=0 size=10; match (subexpr) key = 1.1 position=0 size=3; match (subexpr) key = 1.2 position=3 size=7; "aabcbdcb" succesfully recognized by "^a(a(b){3})$" ,size=8 position=0 match (subexpr) key = 1 position=0 size=8; match (subexpr) key = 1.1 position=1 size=7; match (subexpr) key = 1.1.1 position=2 size=1; match (subexpr) key = 1.1.2 position=4 size=1; match (subexpr) key = 1.1.3 position=7 size=1; "abab" succesfully recognized by "(ab){2,5}" ,size=4 position=0 match (subexpr) key = 1 position=0 size=4; match (subexpr) key = 1.1 position=0 size=2; match (subexpr) key = 1.2 position=2 size=2; "ab" succesfully not recognized by "(ab){2,5}" "aa" succesfully recognized by "(a){2}" ,size=2 position=0 match (subexpr) key = 1 position=0 size=2; match (subexpr) key = 1.1 position=0 size=1; match (subexpr) key = 1.2 position=1 size=1; "a" succesfully recognized by "(a){1}" ,size=1 position=0 match (subexpr) key = 1 position=0 size=1; match (subexpr) key = 1.1 position=0 size=1; "aa" succesfully recognized by "((a){1,2}){2}" ,size=2 position=0 match (subexpr) key = 1 position=0 size=2; match (subexpr) key = 1.1 position=0 size=1; match (subexpr) key = 1.1.1 position=0 size=1; match (subexpr) key = 1.2 position=1 size=1; match (subexpr) key = 1.2.1 position=1 size=1; "abcabcabc" succesfully recognized by "((abc){1,3}){2,5}" ,size=9 position=0 match (subexpr) key = 1 position=0 size=9; match (subexpr) key = 1.1 position=0 size=6; match (subexpr) key = 1.1.1 position=0 size=3; match (subexpr) key = 1.1.2 position=3 size=3; match (subexpr) key = 1.2 position=6 size=3; match (subexpr) key = 1.2.1 position=6 size=3; "abcabcabcabcabcabcabcabc" succesfully recognized by "(((abc){2,5}){2,5}){2,5}" ,size=24 position=0 match (subexpr) key = 1 position=0 size=24; match (subexpr) key = 1.1 position=0 size=12; match (subexpr) key = 1.1.1 position=0 size=6; match (subexpr) key = 1.1.1.1 position=0 size=3; match (subexpr) key = 1.1.1.2 position=3 size=3; match (subexpr) key = 1.1.2 position=6 size=6; match (subexpr) key = 1.1.2.1 position=6 size=3; match (subexpr) key = 1.1.2.2 position=9 size=3; match (subexpr) key = 1.2 position=12 size=12; match (subexpr) key = 1.2.1 position=12 size=6; match (subexpr) key = 1.2.1.1 position=12 size=3; match (subexpr) key = 1.2.1.2 position=15 size=3; match (subexpr) key = 1.2.2 position=18 size=6; match (subexpr) key = 1.2.2.1 position=18 size=3; match (subexpr) key = 1.2.2.2 position=21 size=3; "abcabcabcabcabcabcabc" succesfully not recognized by "(((abc){2,5}){2,5}){2,5}" "aaaa" succesfully recognized by "(a)*" ,size=4 position=0 match (subexpr) key = 1 position=0 size=4; match (subexpr) key = 1.1 position=0 size=1; match (subexpr) key = 1.2 position=1 size=1; match (subexpr) key = 1.3 position=2 size=1; match (subexpr) key = 1.4 position=3 size=1; "abbbc" succesfully recognized by "bb*" ,size=3 position=1 match (subexpr) key = 1 position=1 size=3; "cdabababb" succesfully recognized by "[ab]*" ,size=7 position=2 match (subexpr) key = 1 position=2 size=7; "abcabcabc" succesfully recognized by "(abc){1,3}" ,size=9 position=0 match (subexpr) key = 1 position=0 size=9; match (subexpr) key = 1.1 position=0 size=3; match (subexpr) key = 1.2 position=3 size=3; match (subexpr) key = 1.3 position=6 size=3; "cabc" succesfully recognized by "(abc){1,3}" ,size=4 position=0 match (subexpr) key = 1 position=0 size=4; match (subexpr) key = 1.1 position=1 size=3; "cabcddcabd" succesfully recognized by "(abc)(cab)d{1,3}" ,size=10 position=0 match (subexpr) key = 1 position=0 size=10; match (subexpr) key = 1.1 position=1 size=3; match (subexpr) key = 1.2 position=6 size=3; "cabcddcabddddd" succesfully recognized by "(abc)(cab)d{1,3}" ,size=12 position=0 match (subexpr) key = 1 position=0 size=12; match (subexpr) key = 1.1 position=1 size=3; match (subexpr) key = 1.2 position=6 size=3; "ddddd" succesfully recognized by "d{1,3}d{2,5}" ,size=5 position=0 match (subexpr) key = 1 position=0 size=5; "ddddd" succesfully recognized by "d{1,3}" ,size=3 position=0 match (subexpr) key = 1 position=0 size=3; "abcabcabcabc" succesfully recognized by "((abc){1,2}){2,5}" ,size=12 position=0 match (subexpr) key = 1 position=0 size=12; match (subexpr) key = 1.1 position=0 size=6; match (subexpr) key = 1.1.1 position=0 size=3; match (subexpr) key = 1.1.2 position=3 size=3; match (subexpr) key = 1.2 position=6 size=6; match (subexpr) key = 1.2.1 position=6 size=3; match (subexpr) key = 1.2.2 position=9 size=3; "abcabcabcabc" succesfully recognized by "(abc){1,2}{2}" ,size=12 position=0 match (subexpr) key = 1 position=0 size=12; match (subexpr) key = 1.1 position=0 size=3; match (subexpr) key = 1.2 position=3 size=3; match (subexpr) key = 1.3 position=6 size=3; match (subexpr) key = 1.4 position=9 size=3; "caabcbdcb" succesfully recognized by "a(a(b){3})" ,size=8 position=1 match (subexpr) key = 1 position=1 size=8; match (subexpr) key = 1.1 position=2 size=7; match (subexpr) key = 1.1.1 position=3 size=1; match (subexpr) key = 1.1.2 position=5 size=1; match (subexpr) key = 1.1.3 position=8 size=1; "ba" succesfully not recognized by "^a$" "a" succesfully recognized by "^a$" ,size=1 position=0 match (subexpr) key = 1 position=0 size=1; "bac" succesfully not recognized by "^a$" "abc" succesfully not recognized by "^a$" "aaa" succesfully recognized by "^aaa$" ,size=3 position=0 match (subexpr) key = 1 position=0 size=3; "aba" succesfully recognized by "^a(a)$" ,size=3 position=0 match (subexpr) key = 1 position=0 size=3; match (subexpr) key = 1.1 position=2 size=1; "abbbaa" succesfully recognized by "^a(a)a$" ,size=6 position=0 match (subexpr) key = 1 position=0 size=6; match (subexpr) key = 1.1 position=4 size=1; "abbaccb" succesfully recognized by "^a(a(b))$" ,size=7 position=0 match (subexpr) key = 1 position=0 size=7; match (subexpr) key = 1.1 position=3 size=4; match (subexpr) key = 1.1.1 position=6 size=1; "b" succesfully recognized by "a|b" ,size=1 position=0 match (subexpr) key = 1 position=0 size=1; "ab" succesfully recognized by "(a)(b)" ,size=2 position=0 match (subexpr) key = 1 position=0 size=2; match (subexpr) key = 1.1 position=0 size=1; match (subexpr) key = 1.2 position=1 size=1; "a" succesfully recognized by "(a)|(b)" ,size=1 position=0 match (subexpr) key = 1 position=0 size=1; match (subexpr) key = 1.1 position=0 size=1; "ca" succesfully recognized by "a|b" ,size=1 position=1 match (subexpr) key = 1 position=1 size=1; "ca" succesfully recognized by "(a)|(b)" ,size=2 position=0 match (subexpr) key = 1 position=0 size=2; match (subexpr) key = 1.1 position=1 size=1; "ca" succesfully recognized by "(^a)|(b)" ,size=1 position=1 match (subexpr) key = 1 position=1 size=1; match (subexpr) key = 1.1 position=1 size=1; "ca" succesfully recognized by "(a)" ,size=2 position=0 match (subexpr) key = 1 position=0 size=2; match (subexpr) key = 1.1 position=1 size=1; "ca" succesfully recognized by "(^a)" ,size=1 position=1 match (subexpr) key = 1 position=1 size=1; match (subexpr) key = 1.1 position=1 size=1; "add" succesfully recognized by "(a)|(b)|c|dd" ,size=2 position=1 match (subexpr) key = 1 position=1 size=2; "ac" succesfully recognized by "(a)(b|c)" ,size=2 position=0 match (subexpr) key = 1 position=0 size=2; match (subexpr) key = 1.1 position=0 size=1; match (subexpr) key = 1.2 position=1 size=1; "ac" succesfully recognized by "(a|b)(c|d)" ,size=2 position=0 match (subexpr) key = 1 position=0 size=2; match (subexpr) key = 1.1 position=0 size=1; match (subexpr) key = 1.2 position=1 size=1; "aic" succesfully recognized by "(ai|b)(c|d)" ,size=3 position=0 match (subexpr) key = 1 position=0 size=3; match (subexpr) key = 1.1 position=0 size=2; match (subexpr) key = 1.2 position=2 size=1; "abbaba" succesfully recognized by "^a(a(^b)a$)" ,size=6 position=0 match (subexpr) key = 1 position=0 size=6; match (subexpr) key = 1.1 position=3 size=3; match (subexpr) key = 1.1.1 position=4 size=1; "weeknights" succesfully recognized by "(wee|week)(knights|night)" ,size=10 position=0 match (subexpr) key = 1 position=0 size=10; match (subexpr) key = 1.1 position=0 size=3; match (subexpr) key = 1.2 position=3 size=7; "helloweeknights" succesfully recognized by "o(wee|week)(knights|night)" ,size=11 position=4 match (subexpr) key = 1 position=4 size=11; match (subexpr) key = 1.1 position=5 size=3; match (subexpr) key = 1.2 position=8 size=7; "helloweeknights" succesfully recognized by "(wee|week)(knights|night)" ,size=15 position=0 match (subexpr) key = 1 position=0 size=15; match (subexpr) key = 1.1 position=5 size=3; match (subexpr) key = 1.2 position=8 size=7; "helloweeknights" succesfully recognized by "(^wee|^week)(knights|night)" ,size=10 position=5 match (subexpr) key = 1 position=5 size=10; match (subexpr) key = 1.1 position=5 size=3; match (subexpr) key = 1.2 position=8 size=7; "aabcbdcb" succesfully recognized by "^a(a(b){3})$" ,size=8 position=0 match (subexpr) key = 1 position=0 size=8; match (subexpr) key = 1.1 position=1 size=7; match (subexpr) key = 1.1.1 position=2 size=1; match (subexpr) key = 1.1.2 position=4 size=1; match (subexpr) key = 1.1.3 position=7 size=1;