![]() |
Main Page Class Hierarchy Alphabetical List Compound List File List Compound Members
![]() |
00001 /******************************************************************************** 00002 * * 00003 * R e g u l a r E x p r e s s i o n C l a s s * 00004 * * 00005 ********************************************************************************* 00006 * Copyright (C) 1999,2002 by Jeroen van der Zijp. All Rights Reserved. * 00007 ********************************************************************************* 00008 * This library is free software; you can redistribute it and/or * 00009 * modify it under the terms of the GNU Lesser General Public * 00010 * License as published by the Free Software Foundation; either * 00011 * version 2.1 of the License, or (at your option) any later version. * 00012 * * 00013 * This library is distributed in the hope that it will be useful, * 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * 00016 * Lesser General Public License for more details. * 00017 * * 00018 * You should have received a copy of the GNU Lesser General Public * 00019 * License along with this library; if not, write to the Free Software * 00020 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. * 00021 ********************************************************************************* 00022 * $Id: FXRex.h,v 1.44 2002/01/18 22:42:54 jeroen Exp $ * 00023 ********************************************************************************/ 00024 #ifndef FXREX_H 00025 #define FXREX_H 00026 00027 00028 00029 00030 /// Regular expression error codes 00031 enum FXRexError { 00032 REGERR_OK, 00033 REGERR_EMPTY, /// Empty pattern 00034 REGERR_PAREN, /// Unmatched parenthesis 00035 REGERR_BRACK, /// Unmatched bracket 00036 REGERR_BRACE, /// Unmatched brace 00037 REGERR_RANGE, /// Bad character range 00038 REGERR_ESC, /// Bad escape sequence 00039 REGERR_COUNT, /// Bad counted repeat 00040 REGERR_NOATOM, /// No atom preceding repetition 00041 REGERR_REPEAT, /// Repeat following repeat 00042 REGERR_BACKREF, /// Bad backward reference 00043 REGERR_CLASS, /// Bad character class 00044 REGERR_COMPLEX, /// Expression too complex 00045 REGERR_MEMORY, /// Out of memory 00046 REGERR_TOKEN /// Illegal token 00047 }; 00048 00049 00050 /// Regular expression parse flags 00051 enum { 00052 REX_NORMAL = 0, /// Normal mode 00053 REX_CAPTURE = 1, /// Perform capturing parentheses 00054 REX_ICASE = 2, /// Case independent matching 00055 REX_NEWLINE = 4, /// Match-any operators match newline too 00056 REX_VERBATIM = 8, /// Disable interpretation of magic characters 00057 REX_SYNTAX = 16 /// Perform syntax check only 00058 }; 00059 00060 00061 /// Regular expression match flags 00062 enum { 00063 REX_FORWARD = 0, /// Match scanning forward from offset 00064 REX_BACKWARD = 32, /// Match scanning backward from offset 00065 REX_NOT_BOL = 64, /// Start of string is NOT begin of line 00066 REX_NOT_EOL = 128, /// End of string is NOT end of line 00067 REX_NOT_EMPTY = 256 /// Do not match empty 00068 }; 00069 00070 00071 /** 00072 * FXRex is a regular expression class implementing a NFA matcher. 00073 * It supports capturing parentheses, non-capturing parentheses, 00074 * positive or negative lookahead, backreferences, case-insensitive 00075 * matching, counted repetitions, lazy or greedy matches, and 00076 * PERL-like matching operators. 00077 * The subject string may be scanned forwards or backwards, and may 00078 * contain any of 256 possible character values. 00079 * 00080 * When parsing a regular expression pattern, the mode parameter is 00081 * the bitwise OR of a set of flags and affects the match algorithm. 00082 * Passing the flag REX_CAPTURE enables capturing parentheses 00083 * and back references. The flag REX_ICASE enables case-insensitive 00084 * matching. When the flag REX_NEWLINE is passed, newlines are treated 00085 * like normal characters; otherwise, newline is NOT matched 00086 * except when explicitly part of a character class. The flag 00087 * REX_VERBATIM disables all special character interpretation. 00088 * 00089 * When matching a compiled pattern, the mode parameter is the 00090 * bitwise OR of a set of flags that affects how the match is 00091 * performed. Passing the flag REX_BACKWARD causes the match 00092 * to proceed backwards through the subject string. Passing the 00093 * flags REX_NOT_BOL and/or REX_NOT_EOL causes the begin and 00094 * end of the subject string NOT to be considered a line start 00095 * or line end. The flag REX_NOT_EMPTY causes a match to fail if 00096 * the empty string was matched. 00097 */ 00098 class FXAPI FXRex { 00099 private: 00100 FXint *code; 00101 private: 00102 static const FXchar *const errors[]; 00103 static const FXint fallback[]; 00104 public: 00105 00106 /// Construct empty regular expression object 00107 FXRex():code((FXint*)fallback){} 00108 00109 /// Copy regular expression object 00110 FXRex(const FXRex& orig); 00111 00112 /// Compile expression from pattern; if error is not NULL, error code is returned 00113 FXRex(const FXchar* pattern,FXint mode=REX_NORMAL,FXRexError* error=NULL); 00114 00115 /// Compile expression from pattern; if error is not NULL, error code is returned 00116 FXRex(const FXString& pattern,FXint mode=REX_NORMAL,FXRexError* error=NULL); 00117 00118 /// Assign another regular expression to this one 00119 FXRex& operator=(const FXRex& orig); 00120 00121 /** 00122 * See if regular expression is empty; the regular expression 00123 * will be empty when it is unable to parse a pattern due to 00124 * a syntax error. 00125 */ 00126 FXbool empty() const { return (code==fallback); } 00127 00128 /// Parse pattern, return error code if syntax error is found 00129 FXRexError parse(const FXchar* pattern,FXint mode=REX_NORMAL); 00130 00131 /// Parse pattern, return error code if syntax error is found 00132 FXRexError parse(const FXString& pattern,FXint mode=REX_NORMAL); 00133 00134 /** 00135 * Match a subject string of length len, returning TRUE if a match is found 00136 * and FALSE otherwise. The entire pattern is captured in beg[0] and end[0], 00137 * where beg[0] refers to the position of the first matched character and end[0] 00138 * refers to the position after the last matched character. 00139 * Sub expressions from capturing parenthesis i are returned in beg[i] and end[i]. 00140 */ 00141 FXbool match(const FXchar* string,FXint len,FXint* beg=NULL,FXint* end=NULL,FXint mode=REX_FORWARD,FXint npar=1,FXint fm=0,FXint to=2147483647) const; 00142 00143 /// Search for match in a string 00144 FXbool match(const FXString& string,FXint* beg=NULL,FXint* end=NULL,FXint mode=REX_FORWARD,FXint npar=1,FXint fm=0,FXint to=2147483647) const; 00145 00146 /** 00147 * After performing a regular expression match with capturing parentheses, 00148 * a substitution string is build from the replace string, where where "&" 00149 * is replaced by the entire matched pattern, and "\1" through "\9" are 00150 * replaced by captured expressions. The original source string and its 00151 * length, and the match arrays beg and end must be passed. 00152 */ 00153 static FXString substitute(const FXchar* string,FXint len,FXint* beg,FXint* end,const FXString& replace,FXint npar=1); 00154 00155 /// Return substitution string 00156 static FXString substitute(const FXString& string,FXint* beg,FXint* end,const FXString& replace,FXint npar=1); 00157 00158 /// Returns error code for given error 00159 static const FXchar* getError(FXRexError err){ return errors[err]; } 00160 00161 /// Comparison operators 00162 friend FXAPI FXbool operator==(const FXRex &r1,const FXRex &r2); 00163 friend FXAPI FXbool operator!=(const FXRex &r1,const FXRex &r2); 00164 00165 /// Saving and loading 00166 friend FXAPI FXStream& operator<<(FXStream& store,const FXRex& s); 00167 friend FXAPI FXStream& operator>>(FXStream& store,FXRex& s); 00168 00169 /// Delete 00170 ~FXRex(); 00171 }; 00172 00173 #endif