Main Page | Class List | Directories | File List | Class Members

regexp_parser.h

00001 /***************************************************************************
00002  *   Copyright (C) 2004-2006 by Radko Mihal                                *
00003  *   rmihal@pobox.sk                                                       *
00004  *                                                                         *
00005  *   This program is free software; you can redistribute it and/or modify  *
00006  *   it under the terms of the GNU General Public License as published by  *
00007  *   the Free Software Foundation; either version 2 of the License, or     *
00008  *   (at your option) any later version.                                   *
00009  *                                                                         *
00010  *   This program is distributed in the hope that it will be useful,       *
00011  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00012  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
00013  *   GNU General Public License for more details.                          *
00014  *                                                                         *
00015  *   You should have received a copy of the GNU General Public License     *
00016  *   along with this program; if not, write to the                         *
00017  *   Free Software Foundation, Inc.,                                       *
00018  *   51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.              *
00019  ***************************************************************************/
00020  
00021 // following commentary is used for main page in API documentation built
00022 // by DOXYGEN
00023 
00549 #ifndef GRAMMAR_TO_PARSERREGEXP_PARSER_H
00550 #define GRAMMAR_TO_PARSERREGEXP_PARSER_H
00551 
00552 #include "extended_regular_expression.h"
00553 
00554 namespace base_parsers {
00555 
00556 const char* REGEXP_PARSER = "regexpParser";
00557 
00567 template< typename E >
00568 class basic_regexp_parser : public basic_object_parser< E, std::basic_string<E> >
00569 {
00570 public:
00571         basic_regexp_parser( std::basic_string<E> regular_expression ) : 
00572                 basic_object_parser< E, std::basic_string<E> >(REGEXP_PARSER),
00573                 m_regular_expression( regular_expression ) {};
00574 
00575         ~basic_regexp_parser() {};
00576         
00578         bool is_pattern_valid()
00579         {
00580                 if( !this->m_regexp.is_parsed() )
00581                 {
00582                         this->m_regexp.parse( this->m_regular_expression.c_str(), 
00583                                                         this->m_regular_expression.length() );
00584                 }
00585                 return this->m_regexp.is_parsed();
00586         }
00587                 
00589 
00595         virtual unsigned long parse( const E *buf, const unsigned long buf_length )
00596         {
00597                 this->prepare_for_parsing();
00598                 
00599                 if( this->is_valid() )
00600                 {
00601                          this->m_parsed_size = this->m_regexp->recognize( buf, buf_length );
00602                 }
00603                 else
00604                 {
00605                         return 0;
00606                 }
00607                 if( this->m_regexp->is_recognized() )
00608                 {
00609                         this->m_parsed_object = 
00610                                 new std::basic_string<E>( buf, this->m_parsed_size );
00611                         this->m_is_parsed = true;
00612                 }
00613                 return this->m_parsed_size;
00614         }
00615         
00617 
00622         virtual std::basic_istream<E>& parse( std::basic_istream<E>& is )
00623         {
00624                 this->prepare_for_parsing();
00625 
00626                 // TODO - implement to pass istream to regexp - now limited to input in one line
00627                 std::basic_stringbuf<E> buf;
00628                 std::streampos pos = is.tellg();
00629                 if( is.good() )
00630                 {
00631                         is.get( buf );
00632                         unsigned long length = buf.str().length();
00633                         if( this->is_valid() )
00634                         {
00635                                  this->m_regexp->recognize( buf.str().c_str(), length );
00636                         }
00637                         else
00638                         {
00639                                 return is;
00640                         }
00641                         if( this->m_regexp->is_recognized() )
00642                         {
00643                                 this->m_parsed_size = this->m_regexp->recognized_position() + this->m_regexp->recognized_size();
00644                                 this->m_parsed_object = 
00645                                         new std::basic_string<E>( buf.str().c_str(), this->m_parsed_size );
00646                                 this->m_is_parsed = true;
00647                                 pos += this->m_parsed_size;
00648                                 is.clear();
00649                                 is.seekg(pos);
00650                         }
00651                         else
00652                         {
00653                                 is.clear();
00654                                 is.seekg(pos);
00655                         }
00656                 }
00657                 return is;
00658         }
00659         
00661         unsigned long recognized_position()
00662         {
00663                 if( is_pattern_valid() && this->m_regexp->is_recognized() )
00664                 {
00665                         return this->m_regexp->recognized_position();
00666                 }
00667                 else
00668                 {
00669                         return 0;
00670                 }
00671         }
00672 
00674         unsigned long recognized_size()
00675         {
00676                 if( is_pattern_valid() && this->m_regexp->is_recognized() )
00677                 {
00678                         return this->m_regexp->recognized_size();
00679                 }
00680                 else
00681                 {
00682                         return 0;
00683                 }
00684         }
00685         
00687 
00710         void assign_matches( extended_regular_expression::matches& m )
00711         {
00712                 if( is_pattern_valid() && this->m_regexp->is_recognized() )
00713                 {
00714                         this->m_regexp->assign_matches(m);
00715                 }
00716         }
00717         
00719         virtual std::basic_ostream<E>& format( std::basic_ostream<E> &os )
00720         {
00721                 os << this->get_valid();
00722                 return os;
00723         }
00724         
00726         operator std::basic_string<E>()
00727         {
00728         return this->get_valid();
00729         }       
00730         
00731 protected:
00732         std::basic_string<E>                                                            m_regular_expression;
00733         basic_non_terminal< E, extended_regular_expression::ere<E> >    m_regexp;
00734 
00735 protected:
00736     void prepare_for_parsing()
00737         {
00738                 basic_object_parser< E,std::basic_string<E> >::prepare_for_parsing();
00739                 if( !this->m_regexp.is_parsed() )
00740                 {
00741                         this->m_regexp.parse( this->m_regular_expression.c_str(), 
00742                                                         this->m_regular_expression.length() );
00743                 }
00744         }
00745         
00746 };
00747 
00748 typedef basic_regexp_parser<char> regexp_parser;
00749 typedef basic_regexp_parser<wchar_t> wregexp_parser;
00750 
00751 };
00752 
00753 #endif

Generated on Tue Nov 14 21:19:55 2006 for regexp_parser.kdevelop by  doxygen 1.4.4