00001 #ifndef __PATTERN_H__
00002 #define __PATTERN_H__
00003
00004 #include <vector>
00005 #include <string>
00006 #include <map>
00007
00008 class Matcher;
00009 class NFANode;
00010 class NFAQuantifierNode;
00011
00028 class Pattern
00029 {
00030 friend class Matcher;
00031 friend class NFANode;
00032 friend class NFAQuantifierNode;
00033 private:
00041 Pattern(const std::string & rhs);
00042 protected:
00047 static std::map<std::string, Pattern *> compiledPatterns;
00053 static std::map<std::string, std::pair<std::string, unsigned long> > registeredPatterns;
00054 protected:
00059 std::map<NFANode*, bool> nodes;
00065 Matcher * matcher;
00069 NFANode * head;
00073 std::string pattern;
00078 bool error;
00084 int curInd;
00088 int groupCount;
00092 int nonCapGroupCount;
00096 unsigned long flags;
00097 protected:
00102 void raiseError();
00108 NFANode * registerNode(NFANode * node);
00109
00119 std::string classUnion (std::string s1, std::string s2) const;
00129 std::string classIntersect (std::string s1, std::string s2) const;
00139 std::string classNegate (std::string s1) const;
00149 std::string classCreateRange(char low, char hi) const;
00150
00159 int getInt(int start, int end);
00171 bool quantifyCurly(int & sNum, int & eNum);
00183 NFANode * quantifyGroup(NFANode * start, NFANode * stop, const int gn);
00184
00192 NFANode * quantify(NFANode * newNode);
00199 std::string parseClass();
00206 std::string parsePosix();
00211 std::string parseOctal();
00216 std::string parseHex();
00221 NFANode * parseBackref();
00231 std::string parseEscape(bool & inv, bool & quo);
00240 NFANode * parseRegisteredPattern(NFANode ** end);
00248 NFANode * parseBehind(const bool pos, NFANode ** end);
00253 NFANode * parseQuote();
00262 NFANode * parse(const bool inParen = 0, const bool inOr = 0, NFANode ** end = NULL);
00263 public:
00265 const static unsigned long CASE_INSENSITIVE;
00267 const static unsigned long LITERAL;
00269 const static unsigned long DOT_MATCHES_ALL;
00273 const static unsigned long MULTILINE_MATCHING;
00277 const static unsigned long UNIX_LINE_MODE;
00279 const static int MIN_QMATCH;
00281 const static int MAX_QMATCH;
00282 public:
00295 static Pattern * compile (const std::string & pattern,
00296 const unsigned long mode = 0);
00310 static Pattern * compileAndKeep (const std::string & pattern,
00311 const unsigned long mode = 0);
00312
00332 static std::string replace (const std::string & pattern,
00333 const std::string & str,
00334 const std::string & replaceWith,
00335 const unsigned long mode = 0);
00336
00360 static std::vector<std::string> split (const std::string & pattern,
00361 const std::string & str,
00362 const bool keepEmptys = 0,
00363 const unsigned long limit = 0,
00364 const unsigned long mode = 0);
00365
00384 static std::vector<std::string> findAll (const std::string & pattern,
00385 const std::string & str,
00386 const unsigned long mode = 0);
00387
00397 static bool matches (const std::string & pattern,
00398 const std::string & str,
00399 const unsigned long mode = 0);
00400
00420 static bool registerPattern(const std::string & name,
00421 const std::string & pattern,
00422 const unsigned long mode = 0);
00423
00427 static void unregisterPatterns();
00431 static void clearPatternCache();
00432
00454 static std::pair<std::string, int> findNthMatch (const std::string & pattern,
00455 const std::string & str,
00456 const int matchNum,
00457 const unsigned long mode = 0);
00458 public:
00462 ~Pattern();
00463
00464 std::string replace (const std::string & str,
00465 const std::string & replaceWith);
00466 std::vector<std::string> split (const std::string & str, const bool keepEmptys = 0,
00467 const unsigned long limit = 0);
00468 std::vector<std::string> findAll (const std::string & str);
00469 bool matches (const std::string & str);
00474 unsigned long getFlags () const;
00479 std::string getPattern () const;
00486 Matcher * createMatcher (const std::string & str);
00487 };
00488
00489 class NFANode
00490 {
00491 friend class Matcher;
00492 public:
00493 NFANode * next;
00494 NFANode();
00495 virtual ~NFANode();
00496 virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
00497 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const = 0;
00498 };
00499 class NFACharNode : public NFANode
00500 {
00501 protected:
00502 char ch;
00503 public:
00504 NFACharNode(const char c);
00505 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00506 };
00507 class NFACICharNode : public NFANode
00508 {
00509 protected:
00510 char ch;
00511 public:
00512 NFACICharNode(const char c);
00513 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00514 };
00515 class NFAStartNode : public NFANode
00516 {
00517 public:
00518 NFAStartNode();
00519 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00520 };
00521 class NFAEndNode : public NFANode
00522 {
00523 public:
00524 NFAEndNode();
00525 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00526 };
00527 class NFAQuantifierNode : public NFANode
00528 {
00529 public:
00530 int min, max;
00531 NFANode * inner;
00532 virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
00533 NFAQuantifierNode(Pattern * pat, NFANode * internal,
00534 const int minMatch = Pattern::MIN_QMATCH,
00535 const int maxMatch = Pattern::MAX_QMATCH);
00536 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00537 };
00538 class NFAGreedyQuantifierNode : public NFAQuantifierNode
00539 {
00540 public:
00541 NFAGreedyQuantifierNode(Pattern * pat, NFANode * internal,
00542 const int minMatch = Pattern::MIN_QMATCH,
00543 const int maxMatch = Pattern::MAX_QMATCH);
00544 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00545 virtual int matchInternal(const std::string & str, Matcher * matcher, const int curInd, const int soFar) const;
00546 };
00547 class NFALazyQuantifierNode : public NFAQuantifierNode
00548 {
00549 public:
00550 NFALazyQuantifierNode(Pattern * pat, NFANode * internal,
00551 const int minMatch = Pattern::MIN_QMATCH,
00552 const int maxMatch = Pattern::MAX_QMATCH);
00553 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00554 };
00555 class NFAPossessiveQuantifierNode : public NFAQuantifierNode
00556 {
00557 public:
00558 NFAPossessiveQuantifierNode(Pattern * pat, NFANode * internal,
00559 const int minMatch = Pattern::MIN_QMATCH,
00560 const int maxMatch = Pattern::MAX_QMATCH);
00561 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00562 };
00563 class NFAAcceptNode : public NFANode
00564 {
00565 public:
00566 NFAAcceptNode();
00567 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00568 };
00569 class NFAClassNode : public NFANode
00570 {
00571 public:
00572 bool inv;
00573 std::map<char, bool> vals;
00574 NFAClassNode(const bool invert = 0);
00575 NFAClassNode(const std::string & clazz, const bool invert);
00576 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00577 };
00578 class NFACIClassNode : public NFANode
00579 {
00580 public:
00581 bool inv;
00582 std::map<char, bool> vals;
00583 NFACIClassNode(const bool invert = 0);
00584 NFACIClassNode(const std::string & clazz, const bool invert);
00585 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00586 };
00587 class NFASubStartNode : public NFANode
00588 {
00589 public:
00590 NFASubStartNode();
00591 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00592 };
00593 class NFAOrNode : public NFANode
00594 {
00595 public:
00596 NFANode * one;
00597 NFANode * two;
00598 NFAOrNode(NFANode * first, NFANode * second);
00599 virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
00600 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00601 };
00602 class NFAQuoteNode : public NFANode
00603 {
00604 public:
00605 std::string qStr;
00606 NFAQuoteNode(const std::string & quoted);
00607 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00608 };
00609 class NFACIQuoteNode : public NFANode
00610 {
00611 public:
00612 std::string qStr;
00613 NFACIQuoteNode(const std::string & quoted);
00614 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00615 };
00616 class NFALookAheadNode : public NFANode
00617 {
00618 public:
00619 bool pos;
00620 NFANode * inner;
00621 NFALookAheadNode(NFANode * internal, const bool positive);
00622 virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
00623 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00624 };
00625 class NFALookBehindNode : public NFANode
00626 {
00627 public:
00628 bool pos;
00629 std::string mStr;
00630 NFALookBehindNode(const std::string & str, const bool positive);
00631 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00632 };
00633 class NFAStartOfLineNode : public NFANode
00634 {
00635 public:
00636 NFAStartOfLineNode();
00637 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00638 };
00639 class NFAEndOfLineNode : public NFANode
00640 {
00641 public:
00642 NFAEndOfLineNode();
00643 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00644 };
00645 class NFAReferenceNode : public NFANode
00646 {
00647 public:
00648 int gi;
00649 NFAReferenceNode(const int groupIndex);
00650 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00651 };
00652 class NFAStartOfInputNode : public NFANode
00653 {
00654 public:
00655 NFAStartOfInputNode();
00656 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00657 };
00658 class NFAEndOfInputNode : public NFANode
00659 {
00660 public:
00661 bool term;
00662 NFAEndOfInputNode(const bool lookForTerm);
00663 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00664 };
00665 class NFAWordBoundaryNode : public NFANode
00666 {
00667 public:
00668 bool pos;
00669 NFAWordBoundaryNode(const bool positive);
00670 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00671 };
00672 class NFAEndOfMatchNode : public NFANode
00673 {
00674 public:
00675 NFAEndOfMatchNode();
00676 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00677 };
00678 class NFAGroupHeadNode : public NFANode
00679 {
00680 public:
00681 int gi;
00682 NFAGroupHeadNode(const int groupIndex);
00683 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00684 };
00685 class NFAGroupTailNode : public NFANode
00686 {
00687 public:
00688 int gi;
00689 NFAGroupTailNode(const int groupIndex);
00690 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00691 };
00692 class NFAGroupLoopPrologueNode : public NFANode
00693 {
00694 public:
00695 int gi;
00696 NFAGroupLoopPrologueNode(const int groupIndex);
00697 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00698 };
00699 class NFAGroupLoopNode : public NFANode
00700 {
00701 public:
00702 int gi, min, max, type;
00703 NFANode * inner;
00704 NFAGroupLoopNode(NFANode * internal, const int minMatch,
00705 const int maxMatch, const int groupIndex, const int matchType);
00706 virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
00707 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00708 int matchGreedy(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00709 int matchLazy(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00710 int matchPossessive(const std::string & str, Matcher * matcher, const int curInd = 0) const;
00711 };
00712
00713 #endif
00714