449 lines
14 KiB
C++
449 lines
14 KiB
C++
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
|
|
|
// Copyright 2012 the V8 project authors. All rights reserved.
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above
|
|
// copyright notice, this list of conditions and the following
|
|
// disclaimer in the documentation and/or other materials provided
|
|
// with the distribution.
|
|
// * Neither the name of Google Inc. nor the names of its
|
|
// contributors may be used to endorse or promote products derived
|
|
// from this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
#ifndef V8_REGEXP_AST_H_
|
|
#define V8_REGEXP_AST_H_
|
|
|
|
// Prevent msvc build failures as indicated in bug 1205328
|
|
#ifdef min
|
|
# undef min
|
|
#endif
|
|
#ifdef max
|
|
# undef max
|
|
#endif
|
|
|
|
#include "irregexp/RegExpEngine.h"
|
|
|
|
namespace js {
|
|
namespace irregexp {
|
|
|
|
class RegExpCompiler;
|
|
class RegExpNode;
|
|
|
|
class RegExpVisitor
|
|
{
|
|
public:
|
|
virtual ~RegExpVisitor() { }
|
|
#define MAKE_CASE(Name) \
|
|
virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
|
|
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
|
|
#undef MAKE_CASE
|
|
};
|
|
|
|
class RegExpTree
|
|
{
|
|
public:
|
|
static const int kInfinity = INT32_MAX;
|
|
virtual ~RegExpTree() {}
|
|
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
|
RegExpNode* on_success) = 0;
|
|
virtual bool IsTextElement() { return false; }
|
|
virtual bool IsAnchoredAtStart() { return false; }
|
|
virtual bool IsAnchoredAtEnd() { return false; }
|
|
virtual int min_match() = 0;
|
|
virtual int max_match() = 0;
|
|
// Returns the interval of registers used for captures within this
|
|
// expression.
|
|
virtual Interval CaptureRegisters() { return Interval::Empty(); }
|
|
virtual void AppendToText(RegExpText* text) {
|
|
MOZ_CRASH("Bad call");
|
|
}
|
|
#define MAKE_ASTYPE(Name) \
|
|
virtual RegExp##Name* As##Name(); \
|
|
virtual bool Is##Name();
|
|
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
|
|
#undef MAKE_ASTYPE
|
|
};
|
|
|
|
typedef InfallibleVector<RegExpTree*, 1> RegExpTreeVector;
|
|
|
|
class RegExpDisjunction : public RegExpTree
|
|
{
|
|
public:
|
|
explicit RegExpDisjunction(RegExpTreeVector* alternatives);
|
|
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
|
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
|
RegExpNode* on_success);
|
|
virtual RegExpDisjunction* AsDisjunction();
|
|
virtual Interval CaptureRegisters();
|
|
virtual bool IsDisjunction();
|
|
virtual bool IsAnchoredAtStart();
|
|
virtual bool IsAnchoredAtEnd();
|
|
virtual int min_match() { return min_match_; }
|
|
virtual int max_match() { return max_match_; }
|
|
|
|
const RegExpTreeVector& alternatives() { return *alternatives_; }
|
|
|
|
private:
|
|
RegExpTreeVector* alternatives_;
|
|
int min_match_;
|
|
int max_match_;
|
|
};
|
|
|
|
class RegExpAlternative : public RegExpTree
|
|
{
|
|
public:
|
|
explicit RegExpAlternative(RegExpTreeVector* nodes);
|
|
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
|
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
|
RegExpNode* on_success);
|
|
virtual RegExpAlternative* AsAlternative();
|
|
virtual Interval CaptureRegisters();
|
|
virtual bool IsAlternative();
|
|
virtual bool IsAnchoredAtStart();
|
|
virtual bool IsAnchoredAtEnd();
|
|
virtual int min_match() { return min_match_; }
|
|
virtual int max_match() { return max_match_; }
|
|
|
|
const RegExpTreeVector& nodes() { return *nodes_; }
|
|
|
|
private:
|
|
RegExpTreeVector* nodes_;
|
|
int min_match_;
|
|
int max_match_;
|
|
};
|
|
|
|
class RegExpAssertion : public RegExpTree {
|
|
public:
|
|
enum AssertionType {
|
|
START_OF_LINE,
|
|
START_OF_INPUT,
|
|
END_OF_LINE,
|
|
END_OF_INPUT,
|
|
BOUNDARY,
|
|
NON_BOUNDARY,
|
|
NOT_AFTER_LEAD_SURROGATE,
|
|
NOT_IN_SURROGATE_PAIR
|
|
};
|
|
explicit RegExpAssertion(AssertionType type) : assertion_type_(type) { }
|
|
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
|
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
|
RegExpNode* on_success);
|
|
virtual RegExpAssertion* AsAssertion();
|
|
virtual bool IsAssertion();
|
|
virtual bool IsAnchoredAtStart();
|
|
virtual bool IsAnchoredAtEnd();
|
|
virtual int min_match() { return 0; }
|
|
virtual int max_match() { return 0; }
|
|
AssertionType assertion_type() { return assertion_type_; }
|
|
private:
|
|
AssertionType assertion_type_;
|
|
};
|
|
|
|
class CharacterSet
|
|
{
|
|
public:
|
|
explicit CharacterSet(char16_t standard_set_type)
|
|
: ranges_(nullptr),
|
|
standard_set_type_(standard_set_type)
|
|
{}
|
|
explicit CharacterSet(CharacterRangeVector* ranges)
|
|
: ranges_(ranges),
|
|
standard_set_type_(0)
|
|
{}
|
|
|
|
CharacterRangeVector& ranges(LifoAlloc* alloc);
|
|
char16_t standard_set_type() { return standard_set_type_; }
|
|
void set_standard_set_type(char16_t special_set_type) {
|
|
standard_set_type_ = special_set_type;
|
|
}
|
|
bool is_standard() { return standard_set_type_ != 0; }
|
|
void Canonicalize();
|
|
|
|
private:
|
|
CharacterRangeVector* ranges_;
|
|
|
|
// If non-zero, the value represents a standard set (e.g., all whitespace
|
|
// characters) without having to expand the ranges.
|
|
char16_t standard_set_type_;
|
|
};
|
|
|
|
class RegExpCharacterClass : public RegExpTree
|
|
{
|
|
public:
|
|
RegExpCharacterClass(CharacterRangeVector* ranges, bool is_negated)
|
|
: set_(ranges),
|
|
is_negated_(is_negated)
|
|
{}
|
|
|
|
explicit RegExpCharacterClass(char16_t type)
|
|
: set_(type),
|
|
is_negated_(false)
|
|
{}
|
|
|
|
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
|
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
|
RegExpNode* on_success);
|
|
virtual RegExpCharacterClass* AsCharacterClass();
|
|
virtual bool IsCharacterClass();
|
|
virtual bool IsTextElement() { return true; }
|
|
virtual int min_match() { return 1; }
|
|
virtual int max_match() { return 1; }
|
|
virtual void AppendToText(RegExpText* text);
|
|
|
|
CharacterSet character_set() { return set_; }
|
|
|
|
// TODO(lrn): Remove need for complex version if is_standard that
|
|
// recognizes a mangled standard set and just do { return set_.is_special(); }
|
|
bool is_standard(LifoAlloc* alloc);
|
|
|
|
// Returns a value representing the standard character set if is_standard()
|
|
// returns true.
|
|
// Currently used values are:
|
|
// s : unicode whitespace
|
|
// S : unicode non-whitespace
|
|
// w : ASCII word character (digit, letter, underscore)
|
|
// W : non-ASCII word character
|
|
// d : ASCII digit
|
|
// D : non-ASCII digit
|
|
// . : non-unicode non-newline
|
|
// * : All characters
|
|
char16_t standard_type() { return set_.standard_set_type(); }
|
|
|
|
CharacterRangeVector& ranges(LifoAlloc* alloc) { return set_.ranges(alloc); }
|
|
bool is_negated() { return is_negated_; }
|
|
|
|
private:
|
|
CharacterSet set_;
|
|
bool is_negated_;
|
|
};
|
|
|
|
typedef InfallibleVector<char16_t, 10> CharacterVector;
|
|
|
|
class RegExpAtom : public RegExpTree
|
|
{
|
|
public:
|
|
explicit RegExpAtom(CharacterVector* data)
|
|
: data_(data)
|
|
{}
|
|
|
|
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
|
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
|
RegExpNode* on_success);
|
|
virtual RegExpAtom* AsAtom();
|
|
virtual bool IsAtom();
|
|
virtual bool IsTextElement() { return true; }
|
|
virtual int min_match() { return data_->length(); }
|
|
virtual int max_match() { return data_->length(); }
|
|
virtual void AppendToText(RegExpText* text);
|
|
|
|
const CharacterVector& data() { return *data_; }
|
|
int length() { return data_->length(); }
|
|
|
|
private:
|
|
CharacterVector* data_;
|
|
};
|
|
|
|
class RegExpText : public RegExpTree
|
|
{
|
|
public:
|
|
explicit RegExpText(LifoAlloc* alloc)
|
|
: elements_(*alloc), length_(0)
|
|
{}
|
|
|
|
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
|
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
|
RegExpNode* on_success);
|
|
virtual RegExpText* AsText();
|
|
virtual bool IsText();
|
|
virtual bool IsTextElement() { return true; }
|
|
virtual int min_match() { return length_; }
|
|
virtual int max_match() { return length_; }
|
|
virtual void AppendToText(RegExpText* text);
|
|
|
|
void AddElement(TextElement elm) {
|
|
elements_.append(elm);
|
|
length_ += elm.length();
|
|
}
|
|
const TextElementVector& elements() { return elements_; }
|
|
|
|
private:
|
|
TextElementVector elements_;
|
|
int length_;
|
|
};
|
|
|
|
class RegExpQuantifier : public RegExpTree
|
|
{
|
|
public:
|
|
enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE };
|
|
RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body)
|
|
: body_(body),
|
|
min_(min),
|
|
max_(max),
|
|
min_match_(min * body->min_match()),
|
|
quantifier_type_(type)
|
|
{
|
|
if (max > 0 && body->max_match() > kInfinity / max) {
|
|
max_match_ = kInfinity;
|
|
} else {
|
|
max_match_ = max * body->max_match();
|
|
}
|
|
}
|
|
|
|
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
|
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
|
RegExpNode* on_success);
|
|
static RegExpNode* ToNode(int min,
|
|
int max,
|
|
bool is_greedy,
|
|
RegExpTree* body,
|
|
RegExpCompiler* compiler,
|
|
RegExpNode* on_success,
|
|
bool not_at_start = false);
|
|
virtual RegExpQuantifier* AsQuantifier();
|
|
virtual Interval CaptureRegisters();
|
|
virtual bool IsQuantifier();
|
|
virtual int min_match() { return min_match_; }
|
|
virtual int max_match() { return max_match_; }
|
|
int min() { return min_; }
|
|
int max() { return max_; }
|
|
bool is_possessive() { return quantifier_type_ == POSSESSIVE; }
|
|
bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; }
|
|
bool is_greedy() { return quantifier_type_ == GREEDY; }
|
|
RegExpTree* body() { return body_; }
|
|
|
|
private:
|
|
RegExpTree* body_;
|
|
int min_;
|
|
int max_;
|
|
int min_match_;
|
|
int max_match_;
|
|
QuantifierType quantifier_type_;
|
|
};
|
|
|
|
class RegExpCapture : public RegExpTree
|
|
{
|
|
public:
|
|
explicit RegExpCapture(RegExpTree* body, int index)
|
|
: body_(body), index_(index)
|
|
{}
|
|
|
|
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
|
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
|
RegExpNode* on_success);
|
|
static RegExpNode* ToNode(RegExpTree* body,
|
|
int index,
|
|
RegExpCompiler* compiler,
|
|
RegExpNode* on_success);
|
|
virtual RegExpCapture* AsCapture();
|
|
virtual bool IsAnchoredAtStart();
|
|
virtual bool IsAnchoredAtEnd();
|
|
virtual Interval CaptureRegisters();
|
|
virtual bool IsCapture();
|
|
virtual int min_match() { return body_->min_match(); }
|
|
virtual int max_match() { return body_->max_match(); }
|
|
RegExpTree* body() { return body_; }
|
|
int index() { return index_; }
|
|
static int StartRegister(int index) { return index * 2; }
|
|
static int EndRegister(int index) { return index * 2 + 1; }
|
|
|
|
private:
|
|
RegExpTree* body_;
|
|
int index_;
|
|
};
|
|
|
|
class RegExpLookahead : public RegExpTree
|
|
{
|
|
public:
|
|
RegExpLookahead(RegExpTree* body,
|
|
bool is_positive,
|
|
int capture_count,
|
|
int capture_from)
|
|
: body_(body),
|
|
is_positive_(is_positive),
|
|
capture_count_(capture_count),
|
|
capture_from_(capture_from)
|
|
{}
|
|
|
|
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
|
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
|
RegExpNode* on_success);
|
|
virtual RegExpLookahead* AsLookahead();
|
|
virtual Interval CaptureRegisters();
|
|
virtual bool IsLookahead();
|
|
virtual bool IsAnchoredAtStart();
|
|
virtual int min_match() { return 0; }
|
|
virtual int max_match() { return 0; }
|
|
RegExpTree* body() { return body_; }
|
|
bool is_positive() { return is_positive_; }
|
|
int capture_count() { return capture_count_; }
|
|
int capture_from() { return capture_from_; }
|
|
|
|
private:
|
|
RegExpTree* body_;
|
|
bool is_positive_;
|
|
int capture_count_;
|
|
int capture_from_;
|
|
};
|
|
|
|
typedef InfallibleVector<RegExpCapture*, 1> RegExpCaptureVector;
|
|
|
|
class RegExpBackReference : public RegExpTree
|
|
{
|
|
public:
|
|
explicit RegExpBackReference(RegExpCapture* capture)
|
|
: capture_(capture)
|
|
{}
|
|
|
|
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
|
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
|
RegExpNode* on_success);
|
|
virtual RegExpBackReference* AsBackReference();
|
|
virtual bool IsBackReference();
|
|
virtual int min_match() { return 0; }
|
|
virtual int max_match() { return capture_->max_match(); }
|
|
int index() { return capture_->index(); }
|
|
RegExpCapture* capture() { return capture_; }
|
|
private:
|
|
RegExpCapture* capture_;
|
|
};
|
|
|
|
class RegExpEmpty : public RegExpTree
|
|
{
|
|
public:
|
|
RegExpEmpty()
|
|
{}
|
|
|
|
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
|
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
|
RegExpNode* on_success);
|
|
virtual RegExpEmpty* AsEmpty();
|
|
virtual bool IsEmpty();
|
|
virtual int min_match() { return 0; }
|
|
virtual int max_match() { return 0; }
|
|
static RegExpEmpty* GetInstance() {
|
|
static RegExpEmpty instance;
|
|
return &instance;
|
|
}
|
|
};
|
|
|
|
} } // namespace js::irregexp
|
|
|
|
#endif // V8_REGEXP_AST_H_
|