Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members

FXRex.h

00001 /********************************************************************************
00002 *                                                                               *
00003 *                 R e g u l a r   E x p r e s s i o n   C l a s s               *
00004 *                                                                               *
00005 *********************************************************************************
00006 * Copyright (C) 1999,2002 by Jeroen van der Zijp.   All Rights Reserved.        *
00007 *********************************************************************************
00008 * This library is free software; you can redistribute it and/or                 *
00009 * modify it under the terms of the GNU Lesser General Public                    *
00010 * License as published by the Free Software Foundation; either                  *
00011 * version 2.1 of the License, or (at your option) any later version.            *
00012 *                                                                               *
00013 * This library is distributed in the hope that it will be useful,               *
00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of                *
00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU             *
00016 * Lesser General Public License for more details.                               *
00017 *                                                                               *
00018 * You should have received a copy of the GNU Lesser General Public              *
00019 * License along with this library; if not, write to the Free Software           *
00020 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.    *
00021 *********************************************************************************
00022 * $Id: FXRex.h,v 1.44 2002/01/18 22:42:54 jeroen Exp $                          *
00023 ********************************************************************************/
00024 #ifndef FXREX_H
00025 #define FXREX_H
00026 
00027 
00028 
00029 
00030 /// Regular expression error codes
00031 enum FXRexError {
00032   REGERR_OK,
00033   REGERR_EMPTY,             /// Empty pattern
00034   REGERR_PAREN,             /// Unmatched parenthesis
00035   REGERR_BRACK,             /// Unmatched bracket
00036   REGERR_BRACE,             /// Unmatched brace
00037   REGERR_RANGE,             /// Bad character range
00038   REGERR_ESC,               /// Bad escape sequence
00039   REGERR_COUNT,             /// Bad counted repeat
00040   REGERR_NOATOM,            /// No atom preceding repetition
00041   REGERR_REPEAT,            /// Repeat following repeat
00042   REGERR_BACKREF,           /// Bad backward reference
00043   REGERR_CLASS,             /// Bad character class
00044   REGERR_COMPLEX,           /// Expression too complex
00045   REGERR_MEMORY,            /// Out of memory
00046   REGERR_TOKEN              /// Illegal token
00047   };
00048 
00049 
00050 /// Regular expression parse flags
00051 enum {
00052   REX_NORMAL    = 0,        /// Normal mode
00053   REX_CAPTURE   = 1,        /// Perform capturing parentheses
00054   REX_ICASE     = 2,        /// Case independent matching
00055   REX_NEWLINE   = 4,        /// Match-any operators match newline too
00056   REX_VERBATIM  = 8,        /// Disable interpretation of magic characters
00057   REX_SYNTAX    = 16        /// Perform syntax check only
00058   };
00059 
00060 
00061 /// Regular expression match flags
00062 enum {
00063   REX_FORWARD   = 0,        /// Match scanning forward from offset
00064   REX_BACKWARD  = 32,       /// Match scanning backward from offset
00065   REX_NOT_BOL   = 64,       /// Start of string is NOT begin of line
00066   REX_NOT_EOL   = 128,      /// End of string is NOT end of line
00067   REX_NOT_EMPTY = 256       /// Do not match empty
00068   };
00069 
00070 
00071 /**
00072 * FXRex is a regular expression class implementing a NFA matcher.
00073 * It supports capturing parentheses, non-capturing parentheses,
00074 * positive or negative lookahead, backreferences, case-insensitive
00075 * matching, counted repetitions, lazy or greedy matches, and
00076 * PERL-like matching operators.
00077 * The subject string may be scanned forwards or backwards, and may
00078 * contain any of 256 possible character values.
00079 *
00080 * When parsing a regular expression pattern, the mode parameter is
00081 * the bitwise OR of a set of flags and affects the match algorithm.
00082 * Passing the flag REX_CAPTURE enables capturing parentheses
00083 * and back references. The flag REX_ICASE enables case-insensitive
00084 * matching. When the flag REX_NEWLINE is passed, newlines are treated
00085 * like normal characters; otherwise, newline is NOT matched
00086 * except when explicitly part of a character class. The flag
00087 * REX_VERBATIM disables all special character interpretation.
00088 *
00089 * When matching a compiled pattern, the mode parameter is the
00090 * bitwise OR of a set of flags that affects how the match is
00091 * performed.  Passing the flag REX_BACKWARD causes the match
00092 * to proceed backwards through the subject string.  Passing the
00093 * flags REX_NOT_BOL and/or REX_NOT_EOL causes the begin and
00094 * end of the subject string NOT to be considered a line start
00095 * or line end. The flag REX_NOT_EMPTY causes a match to fail if
00096 * the empty string was matched.
00097 */
00098 class FXAPI FXRex {
00099 private:
00100   FXint *code;
00101 private:
00102   static const FXchar *const errors[];
00103   static const FXint fallback[];
00104 public:
00105 
00106   /// Construct empty regular expression object
00107   FXRex():code((FXint*)fallback){}
00108 
00109   /// Copy regular expression object
00110   FXRex(const FXRex& orig);
00111 
00112   /// Compile expression from pattern; if error is not NULL, error code is returned
00113   FXRex(const FXchar* pattern,FXint mode=REX_NORMAL,FXRexError* error=NULL);
00114 
00115   /// Compile expression from pattern; if error is not NULL, error code is returned
00116   FXRex(const FXString& pattern,FXint mode=REX_NORMAL,FXRexError* error=NULL);
00117 
00118   /// Assign another regular expression to this one
00119   FXRex& operator=(const FXRex& orig);
00120 
00121   /**
00122   * See if regular expression is empty; the regular expression
00123   * will be empty when it is unable to parse a pattern due to
00124   * a syntax error.
00125   */
00126   FXbool empty() const { return (code==fallback); }
00127 
00128   /// Parse pattern, return error code if syntax error is found
00129   FXRexError parse(const FXchar* pattern,FXint mode=REX_NORMAL);
00130 
00131   /// Parse pattern, return error code if syntax error is found
00132   FXRexError parse(const FXString& pattern,FXint mode=REX_NORMAL);
00133 
00134   /**
00135   * Match a subject string of length len, returning TRUE if a match is found
00136   * and FALSE otherwise.  The entire pattern is captured in beg[0] and end[0],
00137   * where beg[0] refers to the position of the first matched character and end[0]
00138   * refers to the position after the last matched character.
00139   * Sub expressions from capturing parenthesis i are returned in beg[i] and end[i].
00140   */
00141   FXbool match(const FXchar* string,FXint len,FXint* beg=NULL,FXint* end=NULL,FXint mode=REX_FORWARD,FXint npar=1,FXint fm=0,FXint to=2147483647) const;
00142 
00143   /// Search for match in a string
00144   FXbool match(const FXString& string,FXint* beg=NULL,FXint* end=NULL,FXint mode=REX_FORWARD,FXint npar=1,FXint fm=0,FXint to=2147483647) const;
00145 
00146   /**
00147   * After performing a regular expression match with capturing parentheses,
00148   * a substitution string is build from the replace string, where where "&"
00149   * is replaced by the entire matched pattern, and "\1" through "\9" are
00150   * replaced by captured expressions.  The original source string and its
00151   * length, and the match arrays beg and end must be passed.
00152   */
00153   static FXString substitute(const FXchar* string,FXint len,FXint* beg,FXint* end,const FXString& replace,FXint npar=1);
00154 
00155   /// Return substitution string
00156   static FXString substitute(const FXString& string,FXint* beg,FXint* end,const FXString& replace,FXint npar=1);
00157 
00158   /// Returns error code for given error
00159   static const FXchar* getError(FXRexError err){ return errors[err]; }
00160 
00161   /// Comparison operators
00162   friend FXAPI FXbool operator==(const FXRex &r1,const FXRex &r2);
00163   friend FXAPI FXbool operator!=(const FXRex &r1,const FXRex &r2);
00164 
00165   /// Saving and loading
00166   friend FXAPI FXStream& operator<<(FXStream& store,const FXRex& s);
00167   friend FXAPI FXStream& operator>>(FXStream& store,FXRex& s);
00168 
00169   /// Delete
00170  ~FXRex();
00171   };
00172 
00173 #endif