Implement regex lookaround.
This commit is contained in:
parent
45cfa8084f
commit
b8c8d932f2
|
@ -3900,10 +3900,10 @@ ConvertRegExpTreeToObject(JSContext* cx, irregexp::RegExpTree* tree)
|
|||
return nullptr;
|
||||
return obj;
|
||||
}
|
||||
if (tree->IsLookahead()) {
|
||||
if (!StringProp(cx, obj, "type", "Lookahead"))
|
||||
if (tree->IsLookaround()) {
|
||||
if (!StringProp(cx, obj, "type", "Lookaround"))
|
||||
return nullptr;
|
||||
irregexp::RegExpLookahead* t = tree->AsLookahead();
|
||||
irregexp::RegExpLookaround* t = tree->AsLookaround();
|
||||
if (!BooleanProp(cx, obj, "is_positive", t->is_positive()))
|
||||
return nullptr;
|
||||
if (!TreeProp(cx, obj, "body", t->body()))
|
||||
|
|
|
@ -582,7 +582,7 @@ NativeRegExpMacroAssembler::CheckAtStart(Label* on_at_start)
|
|||
}
|
||||
|
||||
void
|
||||
NativeRegExpMacroAssembler::CheckNotAtStart(Label* on_not_at_start)
|
||||
NativeRegExpMacroAssembler::CheckNotAtStart(int cp_offset, Label* on_not_at_start)
|
||||
{
|
||||
JitSpew(SPEW_PREFIX "CheckNotAtStart");
|
||||
|
||||
|
@ -673,7 +673,7 @@ NativeRegExpMacroAssembler::CheckGreedyLoop(Label* on_tos_equals_current_positio
|
|||
}
|
||||
|
||||
void
|
||||
NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, Label* on_no_match)
|
||||
NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match)
|
||||
{
|
||||
JitSpew(SPEW_PREFIX "CheckNotBackReference(%d)", start_reg);
|
||||
|
||||
|
@ -744,8 +744,8 @@ NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, Label* on_no_ma
|
|||
}
|
||||
|
||||
void
|
||||
NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label* on_no_match,
|
||||
bool unicode)
|
||||
NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
|
||||
Label* on_no_match, bool unicode)
|
||||
{
|
||||
JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode);
|
||||
|
||||
|
|
|
@ -105,9 +105,10 @@ class MOZ_STACK_CLASS NativeRegExpMacroAssembler final : public RegExpMacroAssem
|
|||
void CheckCharacterGT(char16_t limit, jit::Label* on_greater);
|
||||
void CheckCharacterLT(char16_t limit, jit::Label* on_less);
|
||||
void CheckGreedyLoop(jit::Label* on_tos_equals_current_position);
|
||||
void CheckNotAtStart(jit::Label* on_not_at_start);
|
||||
void CheckNotBackReference(int start_reg, jit::Label* on_no_match);
|
||||
void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, bool unicode);
|
||||
void CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start);
|
||||
void CheckNotBackReference(int start_reg, bool read_backward, jit::Label* on_no_match);
|
||||
void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
|
||||
jit::Label* on_no_match, bool unicode);
|
||||
void CheckNotCharacter(unsigned c, jit::Label* on_not_equal);
|
||||
void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_not_equal);
|
||||
void CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with,
|
||||
|
|
|
@ -250,16 +250,16 @@ RegExpCapture::CaptureRegisters()
|
|||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RegExpLookahead
|
||||
// RegExpLookaround
|
||||
|
||||
Interval
|
||||
RegExpLookahead::CaptureRegisters()
|
||||
RegExpLookaround::CaptureRegisters()
|
||||
{
|
||||
return body()->CaptureRegisters();
|
||||
}
|
||||
|
||||
bool
|
||||
RegExpLookahead::IsAnchoredAtStart()
|
||||
RegExpLookaround::IsAnchoredAtStart()
|
||||
{
|
||||
return is_positive() && body()->IsAnchoredAtStart();
|
||||
return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart();
|
||||
}
|
||||
|
|
|
@ -360,6 +360,7 @@ class RegExpCapture : public RegExpTree
|
|||
virtual int min_match() { return body_->min_match(); }
|
||||
virtual int max_match() { return body_->max_match(); }
|
||||
RegExpTree* body() { return body_; }
|
||||
void set_body(RegExpTree* body) { body_ = body; }
|
||||
int index() { return index_; }
|
||||
static int StartRegister(int index) { return index * 2; }
|
||||
static int EndRegister(int index) { return index * 2 + 1; }
|
||||
|
@ -369,25 +370,29 @@ class RegExpCapture : public RegExpTree
|
|||
int index_;
|
||||
};
|
||||
|
||||
class RegExpLookahead : public RegExpTree
|
||||
class RegExpLookaround : public RegExpTree
|
||||
{
|
||||
public:
|
||||
RegExpLookahead(RegExpTree* body,
|
||||
bool is_positive,
|
||||
int capture_count,
|
||||
int capture_from)
|
||||
enum Type { LOOKAHEAD, LOOKBEHIND };
|
||||
|
||||
RegExpLookaround(RegExpTree* body,
|
||||
bool is_positive,
|
||||
int capture_count,
|
||||
int capture_from,
|
||||
Type type)
|
||||
: body_(body),
|
||||
is_positive_(is_positive),
|
||||
capture_count_(capture_count),
|
||||
capture_from_(capture_from)
|
||||
capture_from_(capture_from),
|
||||
type_(type)
|
||||
{}
|
||||
|
||||
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
||||
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success);
|
||||
virtual RegExpLookahead* AsLookahead();
|
||||
virtual RegExpLookaround* AsLookaround();
|
||||
virtual Interval CaptureRegisters();
|
||||
virtual bool IsLookahead();
|
||||
virtual bool IsLookaround();
|
||||
virtual bool IsAnchoredAtStart();
|
||||
virtual int min_match() { return 0; }
|
||||
virtual int max_match() { return 0; }
|
||||
|
@ -395,12 +400,14 @@ class RegExpLookahead : public RegExpTree
|
|||
bool is_positive() { return is_positive_; }
|
||||
int capture_count() { return capture_count_; }
|
||||
int capture_from() { return capture_from_; }
|
||||
Type type() { return type_; }
|
||||
|
||||
private:
|
||||
RegExpTree* body_;
|
||||
bool is_positive_;
|
||||
int capture_count_;
|
||||
int capture_from_;
|
||||
Type type_;
|
||||
};
|
||||
|
||||
typedef InfallibleVector<RegExpCapture*, 1> RegExpCaptureVector;
|
||||
|
@ -417,8 +424,14 @@ class RegExpBackReference : public RegExpTree
|
|||
RegExpNode* on_success);
|
||||
virtual RegExpBackReference* AsBackReference();
|
||||
virtual bool IsBackReference();
|
||||
virtual int min_match() { return 0; }
|
||||
virtual int max_match() { return capture_->max_match(); }
|
||||
virtual int min_match() override { return 0; }
|
||||
// The capture may not be completely parsed yet, if the reference occurs
|
||||
// before the capture. In the ordinary case, nothing has been captured yet,
|
||||
// so the back reference must have the length 0. If the back reference is
|
||||
// inside a lookbehind, effectively making it a forward reference, we return
|
||||
virtual int max_match() override {
|
||||
return capture_->body() ? capture_->max_match() : 0;
|
||||
}
|
||||
int index() { return capture_->index(); }
|
||||
RegExpCapture* capture() { return capture_; }
|
||||
private:
|
||||
|
|
|
@ -82,16 +82,19 @@ V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32 */ \
|
|||
V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \
|
||||
V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \
|
||||
V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \
|
||||
V(CHECK_NOT_REGS_EQUAL, 39, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
|
||||
V(CHECK_REGISTER_LT, 40, 12) /* bc8 reg_idx24 value32 addr32 */ \
|
||||
V(CHECK_REGISTER_GE, 41, 12) /* bc8 reg_idx24 value32 addr32 */ \
|
||||
V(CHECK_REGISTER_EQ_POS, 42, 8) /* bc8 reg_idx24 addr32 */ \
|
||||
V(CHECK_AT_START, 43, 8) /* bc8 pad24 addr32 */ \
|
||||
V(CHECK_NOT_AT_START, 44, 8) /* bc8 pad24 addr32 */ \
|
||||
V(CHECK_GREEDY, 45, 8) /* bc8 pad24 addr32 */ \
|
||||
V(ADVANCE_CP_AND_GOTO, 46, 8) /* bc8 offset24 addr32 */ \
|
||||
V(SET_CURRENT_POSITION_FROM_END, 47, 4) /* bc8 idx24 */ \
|
||||
V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 48, 8) /* bc8 reg_idx24 addr32 */
|
||||
V(CHECK_NOT_BACK_REF_BACKWARD, 39, 8) /* bc8 reg_idx24 addr32 */ \
|
||||
V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \
|
||||
V(CHECK_NOT_REGS_EQUAL, 41, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
|
||||
V(CHECK_REGISTER_LT, 42, 12) /* bc8 reg_idx24 value32 addr32 */ \
|
||||
V(CHECK_REGISTER_GE, 43, 12) /* bc8 reg_idx24 value32 addr32 */ \
|
||||
V(CHECK_REGISTER_EQ_POS, 44, 8) /* bc8 reg_idx24 addr32 */ \
|
||||
V(CHECK_AT_START, 45, 8) /* bc8 pad24 addr32 */ \
|
||||
V(CHECK_NOT_AT_START, 46, 8) /* bc8 pad24 addr32 */ \
|
||||
V(CHECK_GREEDY, 47, 8) /* bc8 pad24 addr32 */ \
|
||||
V(ADVANCE_CP_AND_GOTO, 48, 8) /* bc8 offset24 addr32 */ \
|
||||
V(SET_CURRENT_POSITION_FROM_END, 49, 4) /* bc8 idx24 */ \
|
||||
V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 50, 8) /* bc8 reg_idx24 addr32 */ \
|
||||
V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_UNICODE, 51, 8) /* bc8 reg_idx24 addr32 */
|
||||
|
||||
#define DECLARE_BYTECODES(name, code, length) \
|
||||
static const int BC_##name = code;
|
||||
|
|
|
@ -721,6 +721,8 @@ ActionNode::EmptyMatchCheck(int start_register,
|
|||
int
|
||||
TextNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
|
||||
{
|
||||
if (read_backward())
|
||||
return 0;
|
||||
int answer = Length();
|
||||
if (answer >= still_to_find)
|
||||
return answer;
|
||||
|
@ -736,8 +738,7 @@ TextNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
|
|||
int
|
||||
TextNode::GreedyLoopTextLength()
|
||||
{
|
||||
TextElement elm = elements()[elements().length() - 1];
|
||||
return elm.cp_offset() + elm.length();
|
||||
return Length();
|
||||
}
|
||||
|
||||
RegExpNode*
|
||||
|
@ -887,6 +888,8 @@ AssertionNode::FillInBMInfo(int offset, int budget, BoyerMooreLookahead* bm, boo
|
|||
int
|
||||
BackReferenceNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
|
||||
{
|
||||
if (read_backward())
|
||||
return 0;
|
||||
if (budget <= 0)
|
||||
return 0;
|
||||
return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
|
||||
|
@ -1578,6 +1581,9 @@ class irregexp::RegExpCompiler
|
|||
current_expansion_factor_ = value;
|
||||
}
|
||||
|
||||
bool read_backward() { return read_backward_; }
|
||||
void set_read_backward(bool value) { read_backward_ = value; }
|
||||
|
||||
JSContext* cx() const { return cx_; }
|
||||
LifoAlloc* alloc() const { return alloc_; }
|
||||
|
||||
|
@ -1595,6 +1601,7 @@ class irregexp::RegExpCompiler
|
|||
bool unicode_;
|
||||
bool reg_exp_too_big_;
|
||||
int current_expansion_factor_;
|
||||
bool read_backward_;
|
||||
FrequencyCollator frequency_collator_;
|
||||
JSContext* cx_;
|
||||
LifoAlloc* alloc_;
|
||||
|
@ -1624,6 +1631,7 @@ RegExpCompiler::RegExpCompiler(JSContext* cx, LifoAlloc* alloc, int capture_coun
|
|||
unicode_(unicode),
|
||||
reg_exp_too_big_(false),
|
||||
current_expansion_factor_(1),
|
||||
read_backward_(false),
|
||||
frequency_collator_(),
|
||||
cx_(cx),
|
||||
alloc_(alloc)
|
||||
|
@ -1747,7 +1755,7 @@ irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData*
|
|||
// at the start of input.
|
||||
ChoiceNode* first_step_node = alloc.newInfallible<ChoiceNode>(&alloc, 2);
|
||||
RegExpNode* char_class =
|
||||
alloc.newInfallible<TextNode>(alloc.newInfallible<RegExpCharacterClass>('*'), loop_node);
|
||||
alloc.newInfallible<TextNode>(alloc.newInfallible<RegExpCharacterClass>('*'), false, loop_node);
|
||||
first_step_node->AddAlternative(GuardedAlternative(captured_body));
|
||||
first_step_node->AddAlternative(GuardedAlternative(char_class));
|
||||
node = first_step_node;
|
||||
|
@ -1850,19 +1858,19 @@ RegExpAtom::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
|
|||
TextElementVector* elms =
|
||||
compiler->alloc()->newInfallible<TextElementVector>(*compiler->alloc());
|
||||
elms->append(TextElement::Atom(this));
|
||||
return compiler->alloc()->newInfallible<TextNode>(elms, on_success);
|
||||
return compiler->alloc()->newInfallible<TextNode>(elms, compiler->read_backward(), on_success);
|
||||
}
|
||||
|
||||
RegExpNode*
|
||||
RegExpText::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
|
||||
{
|
||||
return compiler->alloc()->newInfallible<TextNode>(&elements_, on_success);
|
||||
return compiler->alloc()->newInfallible<TextNode>(&elements_, compiler->read_backward(), on_success);
|
||||
}
|
||||
|
||||
RegExpNode*
|
||||
RegExpCharacterClass::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
|
||||
{
|
||||
return compiler->alloc()->newInfallible<TextNode>(this, on_success);
|
||||
return compiler->alloc()->newInfallible<TextNode>(this, compiler->read_backward(), on_success);
|
||||
}
|
||||
|
||||
RegExpNode*
|
||||
|
@ -2003,7 +2011,8 @@ RegExpQuantifier::ToNode(int min,
|
|||
alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, answer)));
|
||||
}
|
||||
answer = alternation;
|
||||
if (not_at_start) alternation->set_not_at_start();
|
||||
if (not_at_start && !compiler->read_backward())
|
||||
alternation->set_not_at_start();
|
||||
}
|
||||
return answer;
|
||||
}
|
||||
|
@ -2015,8 +2024,9 @@ RegExpQuantifier::ToNode(int min,
|
|||
int reg_ctr = needs_counter
|
||||
? compiler->AllocateRegister()
|
||||
: RegExpCompiler::kNoRegister;
|
||||
LoopChoiceNode* center = alloc->newInfallible<LoopChoiceNode>(alloc, body->min_match() == 0);
|
||||
if (not_at_start)
|
||||
LoopChoiceNode* center = alloc->newInfallible<LoopChoiceNode>(alloc, body->min_match() == 0,
|
||||
compiler->read_backward());
|
||||
if (not_at_start && !compiler->read_backward())
|
||||
center->set_not_at_start();
|
||||
RegExpNode* loop_return = needs_counter
|
||||
? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
|
||||
|
@ -2092,7 +2102,7 @@ RegExpAssertion::ToNode(RegExpCompiler* compiler,
|
|||
CharacterRange::AddClassEscape(alloc, 'n', newline_ranges);
|
||||
RegExpCharacterClass* newline_atom = alloc->newInfallible<RegExpCharacterClass>('n');
|
||||
TextNode* newline_matcher =
|
||||
alloc->newInfallible<TextNode>(newline_atom,
|
||||
alloc->newInfallible<TextNode>(newline_atom, false,
|
||||
ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
|
||||
position_register,
|
||||
0, // No captures inside.
|
||||
|
@ -2124,6 +2134,7 @@ RegExpBackReference::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
|
|||
{
|
||||
return compiler->alloc()->newInfallible<BackReferenceNode>(RegExpCapture::StartRegister(index()),
|
||||
RegExpCapture::EndRegister(index()),
|
||||
compiler->read_backward(),
|
||||
on_success);
|
||||
}
|
||||
|
||||
|
@ -2134,7 +2145,7 @@ RegExpEmpty::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
|
|||
}
|
||||
|
||||
RegExpNode*
|
||||
RegExpLookahead::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
|
||||
RegExpLookaround::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
|
||||
{
|
||||
int stack_pointer_register = compiler->AllocateRegister();
|
||||
int position_register = compiler->AllocateRegister();
|
||||
|
@ -2145,6 +2156,10 @@ RegExpLookahead::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
|
|||
int register_start =
|
||||
register_of_first_capture + capture_from_ * registers_per_capture;
|
||||
|
||||
RegExpNode* result;
|
||||
bool was_reading_backward = compiler->read_backward();
|
||||
compiler->set_read_backward(type() == LOOKBEHIND);
|
||||
|
||||
if (is_positive()) {
|
||||
RegExpNode* bodyNode =
|
||||
body()->ToNode(compiler,
|
||||
|
@ -2153,37 +2168,39 @@ RegExpLookahead::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
|
|||
register_count,
|
||||
register_start,
|
||||
on_success));
|
||||
return ActionNode::BeginSubmatch(stack_pointer_register,
|
||||
result = ActionNode::BeginSubmatch(stack_pointer_register,
|
||||
position_register,
|
||||
bodyNode);
|
||||
} else {
|
||||
// We use a ChoiceNode for a negative lookahead because it has most of
|
||||
// the characteristics we need. It has the body of the lookahead as its
|
||||
// first alternative and the expression after the lookahead of the second
|
||||
// alternative. If the first alternative succeeds then the
|
||||
// NegativeSubmatchSuccess will unwind the stack including everything the
|
||||
// choice node set up and backtrack. If the first alternative fails then
|
||||
// the second alternative is tried, which is exactly the desired result
|
||||
// for a negative lookahead. The NegativeLookaheadChoiceNode is a special
|
||||
// ChoiceNode that knows to ignore the first exit when calculating quick
|
||||
// checks.
|
||||
LifoAlloc* alloc = compiler->alloc();
|
||||
|
||||
RegExpNode* success =
|
||||
alloc->newInfallible<NegativeSubmatchSuccess>(alloc,
|
||||
stack_pointer_register,
|
||||
position_register,
|
||||
register_count,
|
||||
register_start);
|
||||
GuardedAlternative body_alt(body()->ToNode(compiler, success));
|
||||
|
||||
ChoiceNode* choice_node =
|
||||
alloc->newInfallible<NegativeLookaheadChoiceNode>(alloc, body_alt, GuardedAlternative(on_success));
|
||||
|
||||
result = ActionNode::BeginSubmatch(stack_pointer_register,
|
||||
position_register,
|
||||
bodyNode);
|
||||
choice_node);
|
||||
}
|
||||
|
||||
// We use a ChoiceNode for a negative lookahead because it has most of
|
||||
// the characteristics we need. It has the body of the lookahead as its
|
||||
// first alternative and the expression after the lookahead of the second
|
||||
// alternative. If the first alternative succeeds then the
|
||||
// NegativeSubmatchSuccess will unwind the stack including everything the
|
||||
// choice node set up and backtrack. If the first alternative fails then
|
||||
// the second alternative is tried, which is exactly the desired result
|
||||
// for a negative lookahead. The NegativeLookaheadChoiceNode is a special
|
||||
// ChoiceNode that knows to ignore the first exit when calculating quick
|
||||
// checks.
|
||||
LifoAlloc* alloc = compiler->alloc();
|
||||
|
||||
RegExpNode* success =
|
||||
alloc->newInfallible<NegativeSubmatchSuccess>(alloc,
|
||||
stack_pointer_register,
|
||||
position_register,
|
||||
register_count,
|
||||
register_start);
|
||||
GuardedAlternative body_alt(body()->ToNode(compiler, success));
|
||||
|
||||
ChoiceNode* choice_node =
|
||||
alloc->newInfallible<NegativeLookaheadChoiceNode>(alloc, body_alt, GuardedAlternative(on_success));
|
||||
|
||||
return ActionNode::BeginSubmatch(stack_pointer_register,
|
||||
position_register,
|
||||
choice_node);
|
||||
compiler->set_read_backward(was_reading_backward);
|
||||
return result;
|
||||
}
|
||||
|
||||
RegExpNode*
|
||||
|
@ -2198,8 +2215,14 @@ RegExpCapture::ToNode(RegExpTree* body,
|
|||
RegExpCompiler* compiler,
|
||||
RegExpNode* on_success)
|
||||
{
|
||||
MOZ_ASSERT(body);
|
||||
int start_reg = RegExpCapture::StartRegister(index);
|
||||
int end_reg = RegExpCapture::EndRegister(index);
|
||||
if (compiler->read_backward()) {
|
||||
// std::swap(start_reg, end_reg);
|
||||
start_reg = RegExpCapture::EndRegister(index);
|
||||
end_reg = RegExpCapture::StartRegister(index);
|
||||
}
|
||||
RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
|
||||
RegExpNode* body_node = body->ToNode(compiler, store_end);
|
||||
return ActionNode::StorePosition(start_reg, true, body_node);
|
||||
|
@ -2210,8 +2233,15 @@ RegExpAlternative::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
|
|||
{
|
||||
const RegExpTreeVector& children = nodes();
|
||||
RegExpNode* current = on_success;
|
||||
for (int i = children.length() - 1; i >= 0; i--)
|
||||
current = children[i]->ToNode(compiler, current);
|
||||
if (compiler->read_backward()) {
|
||||
for (int i = 0; i < children.length(); i++) {
|
||||
current = children[i]->ToNode(compiler, current);
|
||||
}
|
||||
} else {
|
||||
for (int i = children.length() - 1; i >= 0; i--) {
|
||||
current = children[i]->ToNode(compiler, current);
|
||||
}
|
||||
}
|
||||
return current;
|
||||
}
|
||||
|
||||
|
@ -2764,7 +2794,6 @@ Trace::InvalidateCurrentCharacter()
|
|||
void
|
||||
Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler)
|
||||
{
|
||||
MOZ_ASSERT(by > 0);
|
||||
// We don't have an instruction for shifting the current character register
|
||||
// down or for using a shifted value for anything so lets just forget that
|
||||
// we preloaded any characters into it.
|
||||
|
@ -3109,9 +3138,9 @@ AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace)
|
|||
return;
|
||||
}
|
||||
if (trace->at_start() == Trace::UNKNOWN) {
|
||||
assembler->CheckNotAtStart(trace->backtrack());
|
||||
assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack());
|
||||
Trace at_start_trace = *trace;
|
||||
at_start_trace.set_at_start(true);
|
||||
at_start_trace.set_at_start(Trace::TRUE_VALUE);
|
||||
on_success()->Emit(compiler, &at_start_trace);
|
||||
return;
|
||||
}
|
||||
|
@ -3814,9 +3843,10 @@ TextNode::TextEmitPass(RegExpCompiler* compiler,
|
|||
jit::Label* backtrack = trace->backtrack();
|
||||
QuickCheckDetails* quick_check = trace->quick_check_performed();
|
||||
int element_count = elements().length();
|
||||
int backward_offset = read_backward() ? -Length() : 0;
|
||||
for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
|
||||
TextElement elm = elements()[i];
|
||||
int cp_offset = trace->cp_offset() + elm.cp_offset();
|
||||
int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
|
||||
if (elm.text_type() == TextElement::ATOM) {
|
||||
const CharacterVector& quarks = elm.atom()->data();
|
||||
for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
|
||||
|
@ -3844,11 +3874,12 @@ TextNode::TextEmitPass(RegExpCompiler* compiler,
|
|||
break;
|
||||
}
|
||||
if (emit_function != nullptr) {
|
||||
bool bounds_check = *checked_up_to < cp_offset + j || read_backward();
|
||||
bool bound_checked = emit_function(compiler,
|
||||
quarks[j],
|
||||
backtrack,
|
||||
cp_offset + j,
|
||||
*checked_up_to < cp_offset + j,
|
||||
bounds_check,
|
||||
preloaded);
|
||||
if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
|
||||
}
|
||||
|
@ -3859,13 +3890,14 @@ TextNode::TextEmitPass(RegExpCompiler* compiler,
|
|||
if (first_element_checked && i == 0) continue;
|
||||
if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
|
||||
RegExpCharacterClass* cc = elm.char_class();
|
||||
bool bounds_check = *checked_up_to < cp_offset || read_backward();
|
||||
EmitCharClass(alloc(),
|
||||
assembler,
|
||||
cc,
|
||||
ascii,
|
||||
backtrack,
|
||||
cp_offset,
|
||||
*checked_up_to < cp_offset,
|
||||
bounds_check,
|
||||
preloaded);
|
||||
UpdateBoundsCheck(cp_offset, checked_up_to);
|
||||
}
|
||||
|
@ -3945,8 +3977,11 @@ TextNode::Emit(RegExpCompiler* compiler, Trace* trace)
|
|||
}
|
||||
|
||||
Trace successor_trace(*trace);
|
||||
successor_trace.set_at_start(false);
|
||||
successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
|
||||
// If we advance backward, we may end up at the start.
|
||||
successor_trace.AdvanceCurrentPositionInTrace(
|
||||
read_backward() ? -Length() : Length(), compiler);
|
||||
successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN
|
||||
: Trace::FALSE_VALUE);
|
||||
RecursionCheck rc(compiler);
|
||||
on_success()->Emit(compiler, &successor_trace);
|
||||
}
|
||||
|
@ -4118,6 +4153,8 @@ ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_lea
|
|||
RegExpNode*
|
||||
TextNode::GetSuccessorOfOmnivorousTextNode(RegExpCompiler* compiler)
|
||||
{
|
||||
if (read_backward()) return NULL;
|
||||
|
||||
if (elements().length() != 1)
|
||||
return nullptr;
|
||||
|
||||
|
@ -4165,7 +4202,7 @@ ChoiceNode::GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative)
|
|||
SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
|
||||
node = seq_node->on_success();
|
||||
}
|
||||
return length;
|
||||
return read_backward() ? -length : length;
|
||||
}
|
||||
|
||||
// Creates a list of AlternativeGenerations. If the list has a reasonable
|
||||
|
@ -4240,7 +4277,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
|
|||
jit::Label greedy_loop_label;
|
||||
Trace counter_backtrack_trace;
|
||||
counter_backtrack_trace.set_backtrack(&greedy_loop_label);
|
||||
if (not_at_start()) counter_backtrack_trace.set_at_start(false);
|
||||
if (not_at_start()) counter_backtrack_trace.set_at_start(Trace::FALSE_VALUE);
|
||||
|
||||
if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
|
||||
// Here we have special handling for greedy loops containing only text nodes
|
||||
|
@ -4256,7 +4293,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
|
|||
current_trace = &counter_backtrack_trace;
|
||||
jit::Label greedy_match_failed;
|
||||
Trace greedy_match_trace;
|
||||
if (not_at_start()) greedy_match_trace.set_at_start(false);
|
||||
if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE);
|
||||
greedy_match_trace.set_backtrack(&greedy_match_failed);
|
||||
jit::Label loop_label;
|
||||
macro_assembler->Bind(&loop_label);
|
||||
|
@ -4605,11 +4642,14 @@ BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace)
|
|||
MOZ_ASSERT(start_reg_ + 1 == end_reg_);
|
||||
if (compiler->ignore_case()) {
|
||||
assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
|
||||
read_backward(),
|
||||
trace->backtrack(),
|
||||
compiler->unicode());
|
||||
} else {
|
||||
assembler->CheckNotBackReference(start_reg_, trace->backtrack());
|
||||
assembler->CheckNotBackReference(start_reg_, read_backward(), trace->backtrack());
|
||||
}
|
||||
// We are going to advance backward, so we may end up at the start.
|
||||
if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
|
||||
on_success()->Emit(compiler, trace);
|
||||
}
|
||||
|
||||
|
@ -4977,7 +5017,6 @@ QuickCheckDetails::Clear()
|
|||
void
|
||||
QuickCheckDetails::Advance(int by, bool ascii)
|
||||
{
|
||||
MOZ_ASSERT(by >= 0);
|
||||
if (by >= characters_) {
|
||||
Clear();
|
||||
return;
|
||||
|
|
|
@ -119,7 +119,7 @@ InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* chars, size_t
|
|||
VISIT(Atom) \
|
||||
VISIT(Quantifier) \
|
||||
VISIT(Capture) \
|
||||
VISIT(Lookahead) \
|
||||
VISIT(Lookaround) \
|
||||
VISIT(BackReference) \
|
||||
VISIT(Empty) \
|
||||
VISIT(Text)
|
||||
|
@ -763,15 +763,19 @@ class TextNode : public SeqRegExpNode
|
|||
{
|
||||
public:
|
||||
TextNode(TextElementVector* elements,
|
||||
bool read_backward,
|
||||
RegExpNode* on_success)
|
||||
: SeqRegExpNode(on_success),
|
||||
elements_(elements)
|
||||
elements_(elements),
|
||||
read_backward_(read_backward)
|
||||
{}
|
||||
|
||||
TextNode(RegExpCharacterClass* that,
|
||||
bool read_backward,
|
||||
RegExpNode* on_success)
|
||||
: SeqRegExpNode(on_success),
|
||||
elements_(alloc()->newInfallible<TextElementVector>(*alloc()))
|
||||
elements_(alloc()->newInfallible<TextElementVector>(*alloc())),
|
||||
read_backward_(read_backward)
|
||||
{
|
||||
elements_->append(TextElement::CharClass(that));
|
||||
}
|
||||
|
@ -784,6 +788,7 @@ class TextNode : public SeqRegExpNode
|
|||
int characters_filled_in,
|
||||
bool not_at_start);
|
||||
TextElementVector& elements() { return *elements_; }
|
||||
bool read_backward() { return read_backward_; }
|
||||
void MakeCaseIndependent(bool is_ascii, bool unicode);
|
||||
virtual int GreedyLoopTextLength();
|
||||
virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
|
||||
|
@ -814,6 +819,7 @@ class TextNode : public SeqRegExpNode
|
|||
int* checked_up_to);
|
||||
int Length();
|
||||
TextElementVector* elements_;
|
||||
bool read_backward_;
|
||||
};
|
||||
|
||||
class AssertionNode : public SeqRegExpNode
|
||||
|
@ -882,15 +888,18 @@ class BackReferenceNode : public SeqRegExpNode
|
|||
public:
|
||||
BackReferenceNode(int start_reg,
|
||||
int end_reg,
|
||||
bool read_backward,
|
||||
RegExpNode* on_success)
|
||||
: SeqRegExpNode(on_success),
|
||||
start_reg_(start_reg),
|
||||
end_reg_(end_reg)
|
||||
end_reg_(end_reg),
|
||||
read_backward_(read_backward)
|
||||
{}
|
||||
|
||||
virtual void Accept(NodeVisitor* visitor);
|
||||
int start_register() { return start_reg_; }
|
||||
int end_register() { return end_reg_; }
|
||||
bool read_backward() { return read_backward_; }
|
||||
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
|
||||
virtual int EatsAtLeast(int still_to_find,
|
||||
int recursion_depth,
|
||||
|
@ -909,6 +918,7 @@ class BackReferenceNode : public SeqRegExpNode
|
|||
private:
|
||||
int start_reg_;
|
||||
int end_reg_;
|
||||
bool read_backward_;
|
||||
};
|
||||
|
||||
class EndNode : public RegExpNode
|
||||
|
@ -1053,6 +1063,7 @@ class ChoiceNode : public RegExpNode
|
|||
void set_being_calculated(bool b) { being_calculated_ = b; }
|
||||
virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
|
||||
virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode);
|
||||
virtual bool read_backward() { return false; }
|
||||
|
||||
protected:
|
||||
int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
|
||||
|
@ -1111,11 +1122,13 @@ class NegativeLookaheadChoiceNode : public ChoiceNode
|
|||
class LoopChoiceNode : public ChoiceNode
|
||||
{
|
||||
public:
|
||||
explicit LoopChoiceNode(LifoAlloc* alloc, bool body_can_be_zero_length)
|
||||
explicit LoopChoiceNode(LifoAlloc* alloc, bool body_can_be_zero_length,
|
||||
bool read_backward)
|
||||
: ChoiceNode(alloc, 2),
|
||||
loop_node_(nullptr),
|
||||
continue_node_(nullptr),
|
||||
body_can_be_zero_length_(body_can_be_zero_length)
|
||||
body_can_be_zero_length_(body_can_be_zero_length),
|
||||
read_backward_(read_backward)
|
||||
{}
|
||||
|
||||
void AddLoopAlternative(GuardedAlternative alt);
|
||||
|
@ -1133,6 +1146,7 @@ class LoopChoiceNode : public ChoiceNode
|
|||
RegExpNode* loop_node() { return loop_node_; }
|
||||
RegExpNode* continue_node() { return continue_node_; }
|
||||
bool body_can_be_zero_length() { return body_can_be_zero_length_; }
|
||||
virtual bool read_backward() { return read_backward_; }
|
||||
virtual void Accept(NodeVisitor* visitor);
|
||||
virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode);
|
||||
|
||||
|
@ -1147,6 +1161,7 @@ class LoopChoiceNode : public ChoiceNode
|
|||
RegExpNode* loop_node_;
|
||||
RegExpNode* continue_node_;
|
||||
bool body_can_be_zero_length_;
|
||||
bool read_backward_;
|
||||
};
|
||||
|
||||
// Improve the speed that we scan for an initial point where a non-anchored
|
||||
|
@ -1422,8 +1437,8 @@ class Trace
|
|||
}
|
||||
|
||||
TriBool at_start() { return at_start_; }
|
||||
void set_at_start(bool at_start) {
|
||||
at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE;
|
||||
void set_at_start(TriBool at_start) {
|
||||
at_start_ = at_start;
|
||||
}
|
||||
jit::Label* backtrack() { return backtrack_; }
|
||||
jit::Label* loop_label() { return loop_label_; }
|
||||
|
|
|
@ -222,8 +222,8 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
|
|||
}
|
||||
break;
|
||||
BYTECODE(LOAD_CURRENT_CHAR) {
|
||||
size_t pos = current + (insn >> BYTECODE_SHIFT);
|
||||
if (pos >= length) {
|
||||
int pos = current + (insn >> BYTECODE_SHIFT);
|
||||
if (pos >= (int)length || pos < 0) {
|
||||
pc = byteCode + Load32Aligned(pc + 4);
|
||||
} else {
|
||||
current_char = chars[pos];
|
||||
|
@ -238,8 +238,8 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
|
|||
break;
|
||||
}
|
||||
BYTECODE(LOAD_2_CURRENT_CHARS) {
|
||||
size_t pos = current + (insn >> BYTECODE_SHIFT);
|
||||
if (pos + 2 > length) {
|
||||
int pos = current + (insn >> BYTECODE_SHIFT);
|
||||
if (pos + 2 > (int)length || pos < 0) {
|
||||
pc = byteCode + Load32Aligned(pc + 4);
|
||||
} else {
|
||||
CharT next = chars[pos + 1];
|
||||
|
@ -425,6 +425,30 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
|
|||
pc += BC_CHECK_NOT_BACK_REF_LENGTH;
|
||||
break;
|
||||
}
|
||||
BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
|
||||
int from = registers[insn >> BYTECODE_SHIFT];
|
||||
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
|
||||
if (from < 0 || len <= 0) {
|
||||
pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
|
||||
break;
|
||||
}
|
||||
if (int(current) - len < 0) {
|
||||
pc = byteCode + Load32Aligned(pc + 4);
|
||||
break;
|
||||
} else {
|
||||
int i;
|
||||
for (i = 0; i < len; i++) {
|
||||
if (chars[from + i] != chars[int(current) - len + i]) {
|
||||
pc = byteCode + Load32Aligned(pc + 4);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i < len) break;
|
||||
current -= len;
|
||||
}
|
||||
pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
|
||||
break;
|
||||
}
|
||||
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
|
||||
int from = registers[insn >> BYTECODE_SHIFT];
|
||||
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
|
||||
|
@ -465,6 +489,46 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
|
|||
}
|
||||
break;
|
||||
}
|
||||
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
|
||||
int from = registers[insn >> BYTECODE_SHIFT];
|
||||
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
|
||||
if (from < 0 || len <= 0) {
|
||||
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
|
||||
break;
|
||||
}
|
||||
if (int(current) - len < 0) {
|
||||
pc = byteCode + Load32Aligned(pc + 4);
|
||||
break;
|
||||
}
|
||||
if (CaseInsensitiveCompareStrings(chars + from, chars + int(current) - len, len * sizeof(CharT))) {
|
||||
current -= len;
|
||||
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
|
||||
} else {
|
||||
pc = byteCode + Load32Aligned(pc + 4);
|
||||
}
|
||||
break;
|
||||
|
||||
}
|
||||
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_UNICODE) {
|
||||
int from = registers[insn >> BYTECODE_SHIFT];
|
||||
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
|
||||
if (from < 0 || len <= 0) {
|
||||
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
|
||||
break;
|
||||
}
|
||||
if (int(current) - len < 0) {
|
||||
pc = byteCode + Load32Aligned(pc + 4);
|
||||
break;
|
||||
}
|
||||
if (CaseInsensitiveCompareUCStrings(chars + from, chars + int(current) - len, len * sizeof(CharT))) {
|
||||
current -= len;
|
||||
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
|
||||
} else {
|
||||
pc = byteCode + Load32Aligned(pc + 4);
|
||||
}
|
||||
break;
|
||||
|
||||
}
|
||||
BYTECODE(CHECK_AT_START)
|
||||
if (current == 0)
|
||||
pc = byteCode + Load32Aligned(pc + 4);
|
||||
|
@ -472,7 +536,7 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
|
|||
pc += BC_CHECK_AT_START_LENGTH;
|
||||
break;
|
||||
BYTECODE(CHECK_NOT_AT_START)
|
||||
if (current == 0)
|
||||
if (current + (insn >> BYTECODE_SHIFT) == 0)
|
||||
pc += BC_CHECK_NOT_AT_START_LENGTH;
|
||||
else
|
||||
pc = byteCode + Load32Aligned(pc + 4);
|
||||
|
|
|
@ -226,32 +226,37 @@ InterpretedRegExpMacroAssembler::CheckGreedyLoop(jit::Label* on_tos_equals_curre
|
|||
}
|
||||
|
||||
void
|
||||
InterpretedRegExpMacroAssembler::CheckNotAtStart(jit::Label* on_not_at_start)
|
||||
InterpretedRegExpMacroAssembler::CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start)
|
||||
{
|
||||
Emit(BC_CHECK_NOT_AT_START, 0);
|
||||
Emit(BC_CHECK_NOT_AT_START, cp_offset);
|
||||
EmitOrLink(on_not_at_start);
|
||||
}
|
||||
|
||||
void
|
||||
InterpretedRegExpMacroAssembler::CheckNotBackReference(int start_reg, jit::Label* on_no_match)
|
||||
InterpretedRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backward,
|
||||
jit::Label* on_no_match)
|
||||
{
|
||||
MOZ_ASSERT(start_reg >= 0);
|
||||
MOZ_ASSERT(start_reg <= kMaxRegister);
|
||||
Emit(BC_CHECK_NOT_BACK_REF, start_reg);
|
||||
Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF,
|
||||
start_reg);
|
||||
EmitOrLink(on_no_match);
|
||||
}
|
||||
|
||||
void
|
||||
InterpretedRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg,
|
||||
bool read_backward,
|
||||
jit::Label* on_no_match,
|
||||
bool unicode)
|
||||
{
|
||||
MOZ_ASSERT(start_reg >= 0);
|
||||
MOZ_ASSERT(start_reg <= kMaxRegister);
|
||||
if (unicode)
|
||||
Emit(BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE, start_reg);
|
||||
Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_UNICODE : BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE,
|
||||
start_reg);
|
||||
else
|
||||
Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg);
|
||||
Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD : BC_CHECK_NOT_BACK_REF_NO_CASE,
|
||||
start_reg);
|
||||
EmitOrLink(on_no_match);
|
||||
}
|
||||
|
||||
|
|
|
@ -110,10 +110,10 @@ class MOZ_STACK_CLASS RegExpMacroAssembler
|
|||
virtual void CheckCharacterGT(char16_t limit, jit::Label* on_greater) = 0;
|
||||
virtual void CheckCharacterLT(char16_t limit, jit::Label* on_less) = 0;
|
||||
virtual void CheckGreedyLoop(jit::Label* on_tos_equals_current_position) = 0;
|
||||
virtual void CheckNotAtStart(jit::Label* on_not_at_start) = 0;
|
||||
virtual void CheckNotBackReference(int start_reg, jit::Label* on_no_match) = 0;
|
||||
virtual void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match,
|
||||
bool unicode) = 0;
|
||||
virtual void CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start) = 0;
|
||||
virtual void CheckNotBackReference(int start_reg, bool read_backward, jit::Label* on_no_match) = 0;
|
||||
virtual void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
|
||||
jit::Label* on_no_match, bool unicode) = 0;
|
||||
|
||||
// Check the current character for a match with a literal character. If we
|
||||
// fail to match then goto the on_failure label. End of input always
|
||||
|
@ -245,9 +245,10 @@ class MOZ_STACK_CLASS InterpretedRegExpMacroAssembler final : public RegExpMacro
|
|||
void CheckCharacterGT(char16_t limit, jit::Label* on_greater);
|
||||
void CheckCharacterLT(char16_t limit, jit::Label* on_less);
|
||||
void CheckGreedyLoop(jit::Label* on_tos_equals_current_position);
|
||||
void CheckNotAtStart(jit::Label* on_not_at_start);
|
||||
void CheckNotBackReference(int start_reg, jit::Label* on_no_match);
|
||||
void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, bool unicode);
|
||||
void CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start);
|
||||
void CheckNotBackReference(int start_reg, bool read_backward, jit::Label* on_no_match);
|
||||
void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
|
||||
jit::Label* on_no_match, bool unicode);
|
||||
void CheckNotCharacter(unsigned c, jit::Label* on_not_equal);
|
||||
void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_not_equal);
|
||||
void CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with,
|
||||
|
|
|
@ -227,6 +227,7 @@ RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
|
|||
alloc(alloc),
|
||||
captures_(nullptr),
|
||||
next_pos_(chars),
|
||||
captures_started_(0),
|
||||
end_(end),
|
||||
current_(kEndMarker),
|
||||
capture_count_(0),
|
||||
|
@ -418,7 +419,8 @@ RangeAtom(LifoAlloc* alloc, char16_t from, char16_t to)
|
|||
static inline RegExpTree*
|
||||
NegativeLookahead(LifoAlloc* alloc, char16_t from, char16_t to)
|
||||
{
|
||||
return alloc->newInfallible<RegExpLookahead>(RangeAtom(alloc, from, to), false, 0, 0);
|
||||
return alloc->newInfallible<RegExpLookaround>(RangeAtom(alloc, from, to), false,
|
||||
0, 0, RegExpLookaround::LOOKAHEAD);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
@ -1213,6 +1215,38 @@ RegExpParser<CharT>::ParseBackReferenceIndex(int* index_out)
|
|||
return true;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
RegExpCapture*
|
||||
RegExpParser<CharT>::GetCapture(int index) {
|
||||
// The index for the capture groups are one-based. Its index in the list is
|
||||
// zero-based.
|
||||
int known_captures =
|
||||
is_scanned_for_captures_ ? capture_count_ : captures_started_;
|
||||
MOZ_ASSERT(index <= known_captures);
|
||||
if (captures_ == NULL) {
|
||||
captures_ = alloc->newInfallible<RegExpCaptureVector>(*alloc);
|
||||
}
|
||||
while ((int)captures_->length() < known_captures) {
|
||||
RegExpCapture* capture = alloc->newInfallible<RegExpCapture>(nullptr, captures_->length() + 1);
|
||||
captures_->append(capture);
|
||||
}
|
||||
return (*captures_)[index - 1];
|
||||
}
|
||||
|
||||
|
||||
template <typename CharT>
|
||||
bool
|
||||
RegExpParser<CharT>::RegExpParserState::IsInsideCaptureGroup(int index) {
|
||||
for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {
|
||||
if (s->group_type() != CAPTURE) continue;
|
||||
// Return true if we found the matching capture index.
|
||||
if (index == s->capture_index()) return true;
|
||||
// Abort if index is larger than what has been parsed up till this state.
|
||||
if (index > s->capture_index()) return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// QuantifierPrefix ::
|
||||
// { DecimalDigits }
|
||||
// { DecimalDigits , }
|
||||
|
@ -1423,24 +1457,24 @@ RegExpTree*
|
|||
RegExpParser<CharT>::ParseDisjunction()
|
||||
{
|
||||
// Used to store current state while parsing subexpressions.
|
||||
RegExpParserState initial_state(alloc, nullptr, INITIAL, 0);
|
||||
RegExpParserState* stored_state = &initial_state;
|
||||
RegExpParserState initial_state(alloc, nullptr, INITIAL, RegExpLookaround::LOOKAHEAD, 0);
|
||||
RegExpParserState* state = &initial_state;
|
||||
// Cache the builder in a local variable for quick access.
|
||||
RegExpBuilder* builder = initial_state.builder();
|
||||
while (true) {
|
||||
switch (current()) {
|
||||
case kEndMarker:
|
||||
if (stored_state->IsSubexpression()) {
|
||||
if (state->IsSubexpression()) {
|
||||
// Inside a parenthesized group when hitting end of input.
|
||||
return ReportError(JSMSG_MISSING_PAREN);
|
||||
}
|
||||
MOZ_ASSERT(INITIAL == stored_state->group_type());
|
||||
MOZ_ASSERT(INITIAL == state->group_type());
|
||||
// Parsing completed successfully.
|
||||
return builder->ToRegExp();
|
||||
case ')': {
|
||||
if (!stored_state->IsSubexpression())
|
||||
if (!state->IsSubexpression())
|
||||
return ReportError(JSMSG_UNMATCHED_RIGHT_PAREN);
|
||||
MOZ_ASSERT(INITIAL != stored_state->group_type());
|
||||
MOZ_ASSERT(INITIAL != state->group_type());
|
||||
|
||||
Advance();
|
||||
// End disjunction parsing and convert builder content to new single
|
||||
|
@ -1449,29 +1483,30 @@ RegExpParser<CharT>::ParseDisjunction()
|
|||
|
||||
int end_capture_index = captures_started();
|
||||
|
||||
int capture_index = stored_state->capture_index();
|
||||
SubexpressionType group_type = stored_state->group_type();
|
||||
|
||||
// Restore previous state.
|
||||
stored_state = stored_state->previous_state();
|
||||
builder = stored_state->builder();
|
||||
int capture_index = state->capture_index();
|
||||
SubexpressionType group_type = state->group_type();
|
||||
|
||||
// Build result of subexpression.
|
||||
if (group_type == CAPTURE) {
|
||||
RegExpCapture* capture = alloc->newInfallible<RegExpCapture>(body, capture_index);
|
||||
(*captures_)[capture_index - 1] = capture;
|
||||
RegExpCapture* capture = GetCapture(capture_index);
|
||||
capture->set_body(body);
|
||||
body = capture;
|
||||
} else if (group_type != GROUPING) {
|
||||
MOZ_ASSERT(group_type == POSITIVE_LOOKAHEAD ||
|
||||
group_type == NEGATIVE_LOOKAHEAD);
|
||||
bool is_positive = (group_type == POSITIVE_LOOKAHEAD);
|
||||
body = alloc->newInfallible<RegExpLookahead>(body,
|
||||
MOZ_ASSERT(group_type == POSITIVE_LOOKAROUND ||
|
||||
group_type == NEGATIVE_LOOKAROUND);
|
||||
bool is_positive = (group_type == POSITIVE_LOOKAROUND);
|
||||
body = alloc->newInfallible<RegExpLookaround>(body,
|
||||
is_positive,
|
||||
end_capture_index - capture_index,
|
||||
capture_index);
|
||||
capture_index,
|
||||
state->lookaround_type());
|
||||
}
|
||||
|
||||
// Restore previous state.
|
||||
state = state->previous_state();
|
||||
builder = state->builder();
|
||||
builder->AddAtom(body);
|
||||
if (unicode_ && (group_type == POSITIVE_LOOKAHEAD || group_type == NEGATIVE_LOOKAHEAD))
|
||||
if (unicode_ && (group_type == POSITIVE_LOOKAROUND || group_type == NEGATIVE_LOOKAROUND))
|
||||
continue;
|
||||
// For compatability with JSC and ES3, we allow quantifiers after
|
||||
// lookaheads, and break in all cases.
|
||||
|
@ -1519,6 +1554,7 @@ RegExpParser<CharT>::ParseDisjunction()
|
|||
}
|
||||
case '(': {
|
||||
SubexpressionType subexpr_type = CAPTURE;
|
||||
RegExpLookaround::Type lookaround_type = state->lookaround_type();
|
||||
Advance();
|
||||
if (current() == '?') {
|
||||
switch (Next()) {
|
||||
|
@ -1526,26 +1562,39 @@ RegExpParser<CharT>::ParseDisjunction()
|
|||
subexpr_type = GROUPING;
|
||||
break;
|
||||
case '=':
|
||||
subexpr_type = POSITIVE_LOOKAHEAD;
|
||||
lookaround_type = RegExpLookaround::LOOKAHEAD;
|
||||
subexpr_type = POSITIVE_LOOKAROUND;
|
||||
break;
|
||||
case '!':
|
||||
subexpr_type = NEGATIVE_LOOKAHEAD;
|
||||
lookaround_type = RegExpLookaround::LOOKAHEAD;
|
||||
subexpr_type = NEGATIVE_LOOKAROUND;
|
||||
break;
|
||||
case '<':
|
||||
Advance();
|
||||
lookaround_type = RegExpLookaround::LOOKBEHIND;
|
||||
if (Next() == '=') {
|
||||
subexpr_type = POSITIVE_LOOKAROUND;
|
||||
break;
|
||||
} else if (Next() == '!') {
|
||||
subexpr_type = NEGATIVE_LOOKAROUND;
|
||||
break;
|
||||
}
|
||||
// We didn't get a positive or negative after '<'.
|
||||
// That's an error.
|
||||
return ReportError(JSMSG_INVALID_GROUP);
|
||||
default:
|
||||
return ReportError(JSMSG_INVALID_GROUP);
|
||||
}
|
||||
Advance(2);
|
||||
} else {
|
||||
if (captures_ == nullptr)
|
||||
captures_ = alloc->newInfallible<RegExpCaptureVector>(*alloc);
|
||||
if (captures_started() >= kMaxCaptures)
|
||||
return ReportError(JSMSG_TOO_MANY_PARENS);
|
||||
captures_->append((RegExpCapture*) nullptr);
|
||||
captures_started_++;
|
||||
}
|
||||
// Store current state and begin new disjunction parsing.
|
||||
stored_state = alloc->newInfallible<RegExpParserState>(alloc, stored_state, subexpr_type,
|
||||
captures_started());
|
||||
builder = stored_state->builder();
|
||||
state = alloc->newInfallible<RegExpParserState>(alloc, state, subexpr_type,
|
||||
lookaround_type, captures_started_);
|
||||
builder = state->builder();
|
||||
continue;
|
||||
}
|
||||
case '[': {
|
||||
|
@ -1600,19 +1649,18 @@ RegExpParser<CharT>::ParseDisjunction()
|
|||
case '7': case '8': case '9': {
|
||||
int index = 0;
|
||||
if (ParseBackReferenceIndex(&index)) {
|
||||
RegExpCapture* capture = nullptr;
|
||||
if (captures_ != nullptr && index <= (int) captures_->length()) {
|
||||
capture = (*captures_)[index - 1];
|
||||
if (state->IsInsideCaptureGroup(index)) {
|
||||
// The backreference is inside the capture group it refers to.
|
||||
// Nothing can possibly have been captured yet.
|
||||
builder->AddEmpty();
|
||||
} else {
|
||||
RegExpCapture* capture = GetCapture(index);
|
||||
RegExpTree* atom = alloc->newInfallible<RegExpBackReference>(capture);
|
||||
if (unicode_)
|
||||
builder->AddAtom(UnicodeBackReferenceAtom(alloc, atom));
|
||||
else
|
||||
builder->AddAtom(atom);
|
||||
}
|
||||
if (capture == nullptr) {
|
||||
builder->AddEmpty();
|
||||
break;
|
||||
}
|
||||
RegExpTree* atom = alloc->newInfallible<RegExpBackReference>(capture);
|
||||
if (unicode_)
|
||||
builder->AddAtom(UnicodeBackReferenceAtom(alloc, atom));
|
||||
else
|
||||
builder->AddAtom(atom);
|
||||
break;
|
||||
}
|
||||
if (unicode_)
|
||||
|
|
|
@ -229,7 +229,7 @@ class RegExpParser
|
|||
bool simple() { return simple_; }
|
||||
bool contains_anchor() { return contains_anchor_; }
|
||||
void set_contains_anchor() { contains_anchor_ = true; }
|
||||
int captures_started() { return captures_ == nullptr ? 0 : captures_->length(); }
|
||||
int captures_started() { return captures_started_; }
|
||||
const CharT* position() { return next_pos_ - 1; }
|
||||
|
||||
static const int kMaxCaptures = 1 << 16;
|
||||
|
@ -239,8 +239,8 @@ class RegExpParser
|
|||
enum SubexpressionType {
|
||||
INITIAL,
|
||||
CAPTURE, // All positive values represent captures.
|
||||
POSITIVE_LOOKAHEAD,
|
||||
NEGATIVE_LOOKAHEAD,
|
||||
POSITIVE_LOOKAROUND,
|
||||
NEGATIVE_LOOKAROUND,
|
||||
GROUPING
|
||||
};
|
||||
|
||||
|
@ -249,10 +249,12 @@ class RegExpParser
|
|||
RegExpParserState(LifoAlloc* alloc,
|
||||
RegExpParserState* previous_state,
|
||||
SubexpressionType group_type,
|
||||
RegExpLookaround::Type lookaround_type,
|
||||
int disjunction_capture_index)
|
||||
: previous_state_(previous_state),
|
||||
builder_(alloc->newInfallible<RegExpBuilder>(alloc)),
|
||||
group_type_(group_type),
|
||||
lookaround_type_(lookaround_type),
|
||||
disjunction_capture_index_(disjunction_capture_index)
|
||||
{}
|
||||
// Parser state of containing expression, if any.
|
||||
|
@ -262,11 +264,16 @@ class RegExpParser
|
|||
RegExpBuilder* builder() { return builder_; }
|
||||
// Type of regexp being parsed (parenthesized group or entire regexp).
|
||||
SubexpressionType group_type() { return group_type_; }
|
||||
// Lookahead or Lookbehind.
|
||||
RegExpLookaround::Type lookaround_type() { return lookaround_type_; }
|
||||
// Index in captures array of first capture in this sub-expression, if any.
|
||||
// Also the capture index of this sub-expression itself, if group_type
|
||||
// is CAPTURE.
|
||||
int capture_index() { return disjunction_capture_index_; }
|
||||
|
||||
// Check whether the parser is inside a capture group with the given index.
|
||||
bool IsInsideCaptureGroup(int index);
|
||||
|
||||
private:
|
||||
// Linked list implementation of stack of states.
|
||||
RegExpParserState* previous_state_;
|
||||
|
@ -274,10 +281,15 @@ class RegExpParser
|
|||
RegExpBuilder* builder_;
|
||||
// Stored disjunction type (capture, look-ahead or grouping), if any.
|
||||
SubexpressionType group_type_;
|
||||
// Stored read direction.
|
||||
RegExpLookaround::Type lookaround_type_;
|
||||
// Stored disjunction's capture index (if any).
|
||||
int disjunction_capture_index_;
|
||||
};
|
||||
|
||||
// Return the 1-indexed RegExpCapture object, allocate if necessary.
|
||||
RegExpCapture* GetCapture(int index);
|
||||
|
||||
widechar current() { return current_; }
|
||||
bool has_more() { return has_more_; }
|
||||
bool has_next() { return next_pos_ < end_; }
|
||||
|
@ -294,6 +306,7 @@ class RegExpParser
|
|||
const CharT* next_pos_;
|
||||
const CharT* end_;
|
||||
widechar current_;
|
||||
int captures_started_;
|
||||
// The capture count is only valid after we have scanned for captures.
|
||||
int capture_count_;
|
||||
bool has_more_;
|
||||
|
|
Loading…
Reference in New Issue