Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members

FXRex.h

Go to the documentation of this file.
00001 /********************************************************************************
00002 *                                                                               *
00003 *                 R e g u l a r   E x p r e s s i o n   C l a s s               *
00004 *                                                                               *
00005 *********************************************************************************
00006 * Copyright (C) 1999,2006 by Jeroen van der Zijp.   All Rights Reserved.        *
00007 *********************************************************************************
00008 * This library is free software; you can redistribute it and/or                 *
00009 * modify it under the terms of the GNU Lesser General Public                    *
00010 * License as published by the Free Software Foundation; either                  *
00011 * version 2.1 of the License, or (at your option) any later version.            *
00012 *                                                                               *
00013 * This library is distributed in the hope that it will be useful,               *
00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of                *
00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU             *
00016 * Lesser General Public License for more details.                               *
00017 *                                                                               *
00018 * You should have received a copy of the GNU Lesser General Public              *
00019 * License along with this library; if not, write to the Free Software           *
00020 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.    *
00021 *********************************************************************************
00022 * $Id: FXRex.h,v 1.53 2006/01/22 17:58:09 fox Exp $                             *
00023 ********************************************************************************/
00024 #ifndef FXREX_H
00025 #define FXREX_H
00026 
00027 
00028 namespace FX {
00029 
00030 
00031 /// Regular expression error codes
00032 enum FXRexError {
00033   REGERR_OK,
00034   REGERR_EMPTY,             /// Empty pattern
00035   REGERR_PAREN,             /// Unmatched parenthesis
00036   REGERR_BRACK,             /// Unmatched bracket
00037   REGERR_BRACE,             /// Unmatched brace
00038   REGERR_RANGE,             /// Bad character range
00039   REGERR_ESC,               /// Bad escape sequence
00040   REGERR_COUNT,             /// Bad counted repeat
00041   REGERR_NOATOM,            /// No atom preceding repetition
00042   REGERR_REPEAT,            /// Repeat following repeat
00043   REGERR_BACKREF,           /// Bad backward reference
00044   REGERR_CLASS,             /// Bad character class
00045   REGERR_COMPLEX,           /// Expression too complex
00046   REGERR_MEMORY,            /// Out of memory
00047   REGERR_TOKEN              /// Illegal token
00048   };
00049 
00050 
00051 /// Regular expression parse flags
00052 enum {
00053   REX_NORMAL    = 0,        /// Normal mode
00054   REX_CAPTURE   = 1,        /// Perform capturing parentheses
00055   REX_ICASE     = 2,        /// Case independent matching
00056   REX_NEWLINE   = 4,        /// Match-any operators match newline too
00057   REX_VERBATIM  = 8,        /// Disable interpretation of magic characters
00058   REX_SYNTAX    = 16        /// Perform syntax check only
00059   };
00060 
00061 
00062 /// Regular expression match flags
00063 enum {
00064   REX_FORWARD   = 0,        /// Match scanning forward from offset
00065   REX_BACKWARD  = 32,       /// Match scanning backward from offset
00066   REX_NOT_BOL   = 64,       /// Start of string is NOT begin of line
00067   REX_NOT_EOL   = 128,      /// End of string is NOT end of line
00068   REX_NOT_EMPTY = 256       /// Do not match empty
00069   };
00070 
00071 
00072 /**
00073 * FXRex is a regular expression class implementing a NFA matcher.
00074 * It supports capturing parentheses, non-capturing parentheses,
00075 * positive or negative lookahead, backreferences, case-insensitive
00076 * matching, counted repetitions, lazy or greedy matches, and
00077 * PERL-like matching operators.
00078 * The subject string may be scanned forwards or backwards, and may
00079 * contain any of 256 possible character values.
00080 *
00081 * When parsing a regular expression pattern, the mode parameter is
00082 * the bitwise OR of a set of flags and affects the match algorithm.
00083 * Passing the flag REX_CAPTURE enables capturing parentheses
00084 * and back references. The flag REX_ICASE enables case-insensitive
00085 * matching. When the flag REX_NEWLINE is passed, newlines are treated
00086 * like normal characters; otherwise, newline is NOT matched
00087 * except when explicitly part of a character class. The flag
00088 * REX_VERBATIM disables all special character interpretation.
00089 *
00090 * When matching a compiled pattern, the mode parameter is the
00091 * bitwise OR of a set of flags that affects how the match is
00092 * performed.  Passing the flag REX_BACKWARD causes the match
00093 * to proceed backwards through the subject string.  Passing the
00094 * flags REX_NOT_BOL and/or REX_NOT_EOL causes the begin and
00095 * end of the subject string NOT to be considered a line start
00096 * or line end. The flag REX_NOT_EMPTY causes a match to fail if
00097 * the empty string was matched.
00098 */
00099 class FXAPI FXRex {
00100 private:
00101   FXint *code;
00102 private:
00103   static const FXchar *const errors[];
00104   static const FXint fallback[];
00105 public:
00106 
00107   /// Construct empty regular expression object
00108   FXRex():code((FXint*)fallback){}
00109 
00110   /// Copy regular expression object
00111   FXRex(const FXRex& orig);
00112 
00113   /// Compile expression from pattern; if error is not NULL, error code is returned
00114   FXRex(const FXchar* pattern,FXint mode=REX_NORMAL,FXRexError* error=NULL);
00115 
00116   /// Compile expression from pattern; if error is not NULL, error code is returned
00117   FXRex(const FXString& pattern,FXint mode=REX_NORMAL,FXRexError* error=NULL);
00118 
00119   /// Assign another regular expression to this one
00120   FXRex& operator=(const FXRex& orig);
00121 
00122   /**
00123   * See if regular expression is empty; the regular expression
00124   * will be empty when it is unable to parse a pattern due to
00125   * a syntax error.
00126   */
00127   bool empty() const { return (code==fallback); }
00128 
00129   /// Parse pattern, return error code if syntax error is found
00130   FXRexError parse(const FXchar* pattern,FXint mode=REX_NORMAL);
00131 
00132   /// Parse pattern, return error code if syntax error is found
00133   FXRexError parse(const FXString& pattern,FXint mode=REX_NORMAL);
00134 
00135   /**
00136   * Match a subject string of length len, returning TRUE if a match is found
00137   * and FALSE otherwise.  The entire pattern is captured in beg[0] and end[0],
00138   * where beg[0] refers to the position of the first matched character and end[0]
00139   * refers to the position after the last matched character.
00140   * Sub expressions from capturing parenthesis i are returned in beg[i] and end[i].
00141   */
00142   bool match(const FXchar* string,FXint len,FXint* beg=NULL,FXint* end=NULL,FXint mode=REX_FORWARD,FXint npar=1,FXint fm=0,FXint to=2147483647) const;
00143 
00144   /// Search for match in a string
00145   bool match(const FXString& string,FXint* beg=NULL,FXint* end=NULL,FXint mode=REX_FORWARD,FXint npar=1,FXint fm=0,FXint to=2147483647) const;
00146 
00147   /**
00148   * After performing a regular expression match with capturing parentheses,
00149   * a substitution string is build from the replace string, where where "&"
00150   * is replaced by the entire matched pattern, and "\1" through "\9" are
00151   * replaced by captured expressions.  The original source string and its
00152   * length, and the match arrays beg and end must be passed.
00153   */
00154   static FXString substitute(const FXchar* string,FXint len,FXint* beg,FXint* end,const FXString& replace,FXint npar=1);
00155 
00156   /// Return substitution string
00157   static FXString substitute(const FXString& string,FXint* beg,FXint* end,const FXString& replace,FXint npar=1);
00158 
00159   /// Returns error code for given error
00160   static const FXchar* getError(FXRexError err){ return errors[err]; }
00161 
00162   /// Comparison operators
00163   bool operator==(const FXRex& rex) const;
00164   bool operator!=(const FXRex& rex) const;
00165 
00166   /// Saving and loading
00167   friend FXAPI FXStream& operator<<(FXStream& store,const FXRex& s);
00168   friend FXAPI FXStream& operator>>(FXStream& store,FXRex& s);
00169 
00170   /// Delete
00171  ~FXRex();
00172   };
00173 
00174 
00175 extern FXAPI FXStream& operator<<(FXStream& store,const FXRex& s);
00176 extern FXAPI FXStream& operator>>(FXStream& store,FXRex& s);
00177 
00178 }
00179 
00180 #endif

Copyright © 1997-2005 Jeroen van der Zijp