Main Page Class Hierarchy Alphabetical List Compound List File List Compound Members
|
00001 /******************************************************************************** 00002 * * 00003 * R e g u l a r E x p r e s s i o n C l a s s * 00004 * * 00005 ********************************************************************************* 00006 * Copyright (C) 1999,2006 by Jeroen van der Zijp. All Rights Reserved. * 00007 ********************************************************************************* 00008 * This library is free software; you can redistribute it and/or * 00009 * modify it under the terms of the GNU Lesser General Public * 00010 * License as published by the Free Software Foundation; either * 00011 * version 2.1 of the License, or (at your option) any later version. * 00012 * * 00013 * This library is distributed in the hope that it will be useful, * 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * 00016 * Lesser General Public License for more details. * 00017 * * 00018 * You should have received a copy of the GNU Lesser General Public * 00019 * License along with this library; if not, write to the Free Software * 00020 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. * 00021 ********************************************************************************* 00022 * $Id: FXRex.h,v 1.53 2006/01/22 17:58:09 fox Exp $ * 00023 ********************************************************************************/ 00024 #ifndef FXREX_H 00025 #define FXREX_H 00026 00027 00028 namespace FX { 00029 00030 00031 /// Regular expression error codes 00032 enum FXRexError { 00033 REGERR_OK, 00034 REGERR_EMPTY, /// Empty pattern 00035 REGERR_PAREN, /// Unmatched parenthesis 00036 REGERR_BRACK, /// Unmatched bracket 00037 REGERR_BRACE, /// Unmatched brace 00038 REGERR_RANGE, /// Bad character range 00039 REGERR_ESC, /// Bad escape sequence 00040 REGERR_COUNT, /// Bad counted repeat 00041 REGERR_NOATOM, /// No atom preceding repetition 00042 REGERR_REPEAT, /// Repeat following repeat 00043 REGERR_BACKREF, /// Bad backward reference 00044 REGERR_CLASS, /// Bad character class 00045 REGERR_COMPLEX, /// Expression too complex 00046 REGERR_MEMORY, /// Out of memory 00047 REGERR_TOKEN /// Illegal token 00048 }; 00049 00050 00051 /// Regular expression parse flags 00052 enum { 00053 REX_NORMAL = 0, /// Normal mode 00054 REX_CAPTURE = 1, /// Perform capturing parentheses 00055 REX_ICASE = 2, /// Case independent matching 00056 REX_NEWLINE = 4, /// Match-any operators match newline too 00057 REX_VERBATIM = 8, /// Disable interpretation of magic characters 00058 REX_SYNTAX = 16 /// Perform syntax check only 00059 }; 00060 00061 00062 /// Regular expression match flags 00063 enum { 00064 REX_FORWARD = 0, /// Match scanning forward from offset 00065 REX_BACKWARD = 32, /// Match scanning backward from offset 00066 REX_NOT_BOL = 64, /// Start of string is NOT begin of line 00067 REX_NOT_EOL = 128, /// End of string is NOT end of line 00068 REX_NOT_EMPTY = 256 /// Do not match empty 00069 }; 00070 00071 00072 /** 00073 * FXRex is a regular expression class implementing a NFA matcher. 00074 * It supports capturing parentheses, non-capturing parentheses, 00075 * positive or negative lookahead, backreferences, case-insensitive 00076 * matching, counted repetitions, lazy or greedy matches, and 00077 * PERL-like matching operators. 00078 * The subject string may be scanned forwards or backwards, and may 00079 * contain any of 256 possible character values. 00080 * 00081 * When parsing a regular expression pattern, the mode parameter is 00082 * the bitwise OR of a set of flags and affects the match algorithm. 00083 * Passing the flag REX_CAPTURE enables capturing parentheses 00084 * and back references. The flag REX_ICASE enables case-insensitive 00085 * matching. When the flag REX_NEWLINE is passed, newlines are treated 00086 * like normal characters; otherwise, newline is NOT matched 00087 * except when explicitly part of a character class. The flag 00088 * REX_VERBATIM disables all special character interpretation. 00089 * 00090 * When matching a compiled pattern, the mode parameter is the 00091 * bitwise OR of a set of flags that affects how the match is 00092 * performed. Passing the flag REX_BACKWARD causes the match 00093 * to proceed backwards through the subject string. Passing the 00094 * flags REX_NOT_BOL and/or REX_NOT_EOL causes the begin and 00095 * end of the subject string NOT to be considered a line start 00096 * or line end. The flag REX_NOT_EMPTY causes a match to fail if 00097 * the empty string was matched. 00098 */ 00099 class FXAPI FXRex { 00100 private: 00101 FXint *code; 00102 private: 00103 static const FXchar *const errors[]; 00104 static const FXint fallback[]; 00105 public: 00106 00107 /// Construct empty regular expression object 00108 FXRex():code((FXint*)fallback){} 00109 00110 /// Copy regular expression object 00111 FXRex(const FXRex& orig); 00112 00113 /// Compile expression from pattern; if error is not NULL, error code is returned 00114 FXRex(const FXchar* pattern,FXint mode=REX_NORMAL,FXRexError* error=NULL); 00115 00116 /// Compile expression from pattern; if error is not NULL, error code is returned 00117 FXRex(const FXString& pattern,FXint mode=REX_NORMAL,FXRexError* error=NULL); 00118 00119 /// Assign another regular expression to this one 00120 FXRex& operator=(const FXRex& orig); 00121 00122 /** 00123 * See if regular expression is empty; the regular expression 00124 * will be empty when it is unable to parse a pattern due to 00125 * a syntax error. 00126 */ 00127 bool empty() const { return (code==fallback); } 00128 00129 /// Parse pattern, return error code if syntax error is found 00130 FXRexError parse(const FXchar* pattern,FXint mode=REX_NORMAL); 00131 00132 /// Parse pattern, return error code if syntax error is found 00133 FXRexError parse(const FXString& pattern,FXint mode=REX_NORMAL); 00134 00135 /** 00136 * Match a subject string of length len, returning TRUE if a match is found 00137 * and FALSE otherwise. The entire pattern is captured in beg[0] and end[0], 00138 * where beg[0] refers to the position of the first matched character and end[0] 00139 * refers to the position after the last matched character. 00140 * Sub expressions from capturing parenthesis i are returned in beg[i] and end[i]. 00141 */ 00142 bool match(const FXchar* string,FXint len,FXint* beg=NULL,FXint* end=NULL,FXint mode=REX_FORWARD,FXint npar=1,FXint fm=0,FXint to=2147483647) const; 00143 00144 /// Search for match in a string 00145 bool match(const FXString& string,FXint* beg=NULL,FXint* end=NULL,FXint mode=REX_FORWARD,FXint npar=1,FXint fm=0,FXint to=2147483647) const; 00146 00147 /** 00148 * After performing a regular expression match with capturing parentheses, 00149 * a substitution string is build from the replace string, where where "&" 00150 * is replaced by the entire matched pattern, and "\1" through "\9" are 00151 * replaced by captured expressions. The original source string and its 00152 * length, and the match arrays beg and end must be passed. 00153 */ 00154 static FXString substitute(const FXchar* string,FXint len,FXint* beg,FXint* end,const FXString& replace,FXint npar=1); 00155 00156 /// Return substitution string 00157 static FXString substitute(const FXString& string,FXint* beg,FXint* end,const FXString& replace,FXint npar=1); 00158 00159 /// Returns error code for given error 00160 static const FXchar* getError(FXRexError err){ return errors[err]; } 00161 00162 /// Comparison operators 00163 bool operator==(const FXRex& rex) const; 00164 bool operator!=(const FXRex& rex) const; 00165 00166 /// Saving and loading 00167 friend FXAPI FXStream& operator<<(FXStream& store,const FXRex& s); 00168 friend FXAPI FXStream& operator>>(FXStream& store,FXRex& s); 00169 00170 /// Delete 00171 ~FXRex(); 00172 }; 00173 00174 00175 extern FXAPI FXStream& operator<<(FXStream& store,const FXRex& s); 00176 extern FXAPI FXStream& operator>>(FXStream& store,FXRex& s); 00177 00178 } 00179 00180 #endif
Copyright © 1997-2005 Jeroen van der Zijp |