From 86d370c5c8eefe9719ff416f0538b30ca88397c3 Mon Sep 17 00:00:00 2001 From: Fedor Date: Sat, 8 Feb 2020 01:24:07 +0300 Subject: [PATCH] Revert "Implement regex lookaround." This reverts commit ca60b3854f21a1ef7f86314c0fa1854c3fb7330a. --- js/src/builtin/TestingFunctions.cpp | 6 +- .../irregexp/NativeRegExpMacroAssembler.cpp | 8 +- js/src/irregexp/NativeRegExpMacroAssembler.h | 7 +- js/src/irregexp/RegExpAST.cpp | 8 +- js/src/irregexp/RegExpAST.h | 33 ++-- js/src/irregexp/RegExpBytecode.h | 23 ++- js/src/irregexp/RegExpEngine.cpp | 149 +++++++----------- js/src/irregexp/RegExpEngine.h | 31 +--- js/src/irregexp/RegExpInterpreter.cpp | 74 +-------- js/src/irregexp/RegExpMacroAssembler.cpp | 17 +- js/src/irregexp/RegExpMacroAssembler.h | 15 +- js/src/irregexp/RegExpParser.cpp | 130 +++++---------- js/src/irregexp/RegExpParser.h | 19 +-- 13 files changed, 159 insertions(+), 361 deletions(-) diff --git a/js/src/builtin/TestingFunctions.cpp b/js/src/builtin/TestingFunctions.cpp index a9a307da7..025620766 100644 --- a/js/src/builtin/TestingFunctions.cpp +++ b/js/src/builtin/TestingFunctions.cpp @@ -3862,10 +3862,10 @@ ConvertRegExpTreeToObject(JSContext* cx, irregexp::RegExpTree* tree) return nullptr; return obj; } - if (tree->IsLookaround()) { - if (!StringProp(cx, obj, "type", "Lookaround")) + if (tree->IsLookahead()) { + if (!StringProp(cx, obj, "type", "Lookahead")) return nullptr; - irregexp::RegExpLookaround* t = tree->AsLookaround(); + irregexp::RegExpLookahead* t = tree->AsLookahead(); if (!BooleanProp(cx, obj, "is_positive", t->is_positive())) return nullptr; if (!TreeProp(cx, obj, "body", t->body())) diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.cpp b/js/src/irregexp/NativeRegExpMacroAssembler.cpp index e17eecb9b..0fb507297 100644 --- a/js/src/irregexp/NativeRegExpMacroAssembler.cpp +++ b/js/src/irregexp/NativeRegExpMacroAssembler.cpp @@ -582,7 +582,7 @@ NativeRegExpMacroAssembler::CheckAtStart(Label* on_at_start) } void -NativeRegExpMacroAssembler::CheckNotAtStart(int cp_offset, Label* on_not_at_start) +NativeRegExpMacroAssembler::CheckNotAtStart(Label* on_not_at_start) { JitSpew(SPEW_PREFIX "CheckNotAtStart"); @@ -673,7 +673,7 @@ NativeRegExpMacroAssembler::CheckGreedyLoop(Label* on_tos_equals_current_positio } void -NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match) +NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, Label* on_no_match) { JitSpew(SPEW_PREFIX "CheckNotBackReference(%d)", start_reg); @@ -744,8 +744,8 @@ NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backw } void -NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward, - Label* on_no_match, bool unicode) +NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label* on_no_match, + bool unicode) { JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode); diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.h b/js/src/irregexp/NativeRegExpMacroAssembler.h index fc582dccf..7a72e252f 100644 --- a/js/src/irregexp/NativeRegExpMacroAssembler.h +++ b/js/src/irregexp/NativeRegExpMacroAssembler.h @@ -105,10 +105,9 @@ class MOZ_STACK_CLASS NativeRegExpMacroAssembler final : public RegExpMacroAssem void CheckCharacterGT(char16_t limit, jit::Label* on_greater); void CheckCharacterLT(char16_t limit, jit::Label* on_less); void CheckGreedyLoop(jit::Label* on_tos_equals_current_position); - void CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start); - void CheckNotBackReference(int start_reg, bool read_backward, jit::Label* on_no_match); - void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward, - jit::Label* on_no_match, bool unicode); + void CheckNotAtStart(jit::Label* on_not_at_start); + void CheckNotBackReference(int start_reg, jit::Label* on_no_match); + void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, bool unicode); void CheckNotCharacter(unsigned c, jit::Label* on_not_equal); void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_not_equal); void CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with, diff --git a/js/src/irregexp/RegExpAST.cpp b/js/src/irregexp/RegExpAST.cpp index 43867c312..8dfd99057 100644 --- a/js/src/irregexp/RegExpAST.cpp +++ b/js/src/irregexp/RegExpAST.cpp @@ -250,16 +250,16 @@ RegExpCapture::CaptureRegisters() } // ---------------------------------------------------------------------------- -// RegExpLookaround +// RegExpLookahead Interval -RegExpLookaround::CaptureRegisters() +RegExpLookahead::CaptureRegisters() { return body()->CaptureRegisters(); } bool -RegExpLookaround::IsAnchoredAtStart() +RegExpLookahead::IsAnchoredAtStart() { - return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart(); + return is_positive() && body()->IsAnchoredAtStart(); } diff --git a/js/src/irregexp/RegExpAST.h b/js/src/irregexp/RegExpAST.h index 6f59842bc..7bda6fc7e 100644 --- a/js/src/irregexp/RegExpAST.h +++ b/js/src/irregexp/RegExpAST.h @@ -360,7 +360,6 @@ class RegExpCapture : public RegExpTree virtual int min_match() { return body_->min_match(); } virtual int max_match() { return body_->max_match(); } RegExpTree* body() { return body_; } - void set_body(RegExpTree* body) { body_ = body; } int index() { return index_; } static int StartRegister(int index) { return index * 2; } static int EndRegister(int index) { return index * 2 + 1; } @@ -370,29 +369,25 @@ class RegExpCapture : public RegExpTree int index_; }; -class RegExpLookaround : public RegExpTree +class RegExpLookahead : public RegExpTree { public: - enum Type { LOOKAHEAD, LOOKBEHIND }; - - RegExpLookaround(RegExpTree* body, - bool is_positive, - int capture_count, - int capture_from, - Type type) + RegExpLookahead(RegExpTree* body, + bool is_positive, + int capture_count, + int capture_from) : body_(body), is_positive_(is_positive), capture_count_(capture_count), - capture_from_(capture_from), - type_(type) + capture_from_(capture_from) {} virtual void* Accept(RegExpVisitor* visitor, void* data); virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success); - virtual RegExpLookaround* AsLookaround(); + virtual RegExpLookahead* AsLookahead(); virtual Interval CaptureRegisters(); - virtual bool IsLookaround(); + virtual bool IsLookahead(); virtual bool IsAnchoredAtStart(); virtual int min_match() { return 0; } virtual int max_match() { return 0; } @@ -400,14 +395,12 @@ class RegExpLookaround : public RegExpTree bool is_positive() { return is_positive_; } int capture_count() { return capture_count_; } int capture_from() { return capture_from_; } - Type type() { return type_; } private: RegExpTree* body_; bool is_positive_; int capture_count_; int capture_from_; - Type type_; }; typedef InfallibleVector RegExpCaptureVector; @@ -424,14 +417,8 @@ class RegExpBackReference : public RegExpTree RegExpNode* on_success); virtual RegExpBackReference* AsBackReference(); virtual bool IsBackReference(); - virtual int min_match() override { return 0; } - // The capture may not be completely parsed yet, if the reference occurs - // before the capture. In the ordinary case, nothing has been captured yet, - // so the back reference must have the length 0. If the back reference is - // inside a lookbehind, effectively making it a forward reference, we return - virtual int max_match() override { - return capture_->body() ? capture_->max_match() : 0; - } + virtual int min_match() { return 0; } + virtual int max_match() { return capture_->max_match(); } int index() { return capture_->index(); } RegExpCapture* capture() { return capture_; } private: diff --git a/js/src/irregexp/RegExpBytecode.h b/js/src/irregexp/RegExpBytecode.h index ea3f80b4f..f31b78c59 100644 --- a/js/src/irregexp/RegExpBytecode.h +++ b/js/src/irregexp/RegExpBytecode.h @@ -82,19 +82,16 @@ V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32 */ \ V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \ V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \ V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \ -V(CHECK_NOT_BACK_REF_BACKWARD, 39, 8) /* bc8 reg_idx24 addr32 */ \ -V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \ -V(CHECK_NOT_REGS_EQUAL, 41, 12) /* bc8 regidx24 reg_idx32 addr32 */ \ -V(CHECK_REGISTER_LT, 42, 12) /* bc8 reg_idx24 value32 addr32 */ \ -V(CHECK_REGISTER_GE, 43, 12) /* bc8 reg_idx24 value32 addr32 */ \ -V(CHECK_REGISTER_EQ_POS, 44, 8) /* bc8 reg_idx24 addr32 */ \ -V(CHECK_AT_START, 45, 8) /* bc8 pad24 addr32 */ \ -V(CHECK_NOT_AT_START, 46, 8) /* bc8 pad24 addr32 */ \ -V(CHECK_GREEDY, 47, 8) /* bc8 pad24 addr32 */ \ -V(ADVANCE_CP_AND_GOTO, 48, 8) /* bc8 offset24 addr32 */ \ -V(SET_CURRENT_POSITION_FROM_END, 49, 4) /* bc8 idx24 */ \ -V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 50, 8) /* bc8 reg_idx24 addr32 */ \ -V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_UNICODE, 51, 8) /* bc8 reg_idx24 addr32 */ +V(CHECK_NOT_REGS_EQUAL, 39, 12) /* bc8 regidx24 reg_idx32 addr32 */ \ +V(CHECK_REGISTER_LT, 40, 12) /* bc8 reg_idx24 value32 addr32 */ \ +V(CHECK_REGISTER_GE, 41, 12) /* bc8 reg_idx24 value32 addr32 */ \ +V(CHECK_REGISTER_EQ_POS, 42, 8) /* bc8 reg_idx24 addr32 */ \ +V(CHECK_AT_START, 43, 8) /* bc8 pad24 addr32 */ \ +V(CHECK_NOT_AT_START, 44, 8) /* bc8 pad24 addr32 */ \ +V(CHECK_GREEDY, 45, 8) /* bc8 pad24 addr32 */ \ +V(ADVANCE_CP_AND_GOTO, 46, 8) /* bc8 offset24 addr32 */ \ +V(SET_CURRENT_POSITION_FROM_END, 47, 4) /* bc8 idx24 */ \ +V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 48, 8) /* bc8 reg_idx24 addr32 */ #define DECLARE_BYTECODES(name, code, length) \ static const int BC_##name = code; diff --git a/js/src/irregexp/RegExpEngine.cpp b/js/src/irregexp/RegExpEngine.cpp index 62f94c3e7..4d691a5dc 100644 --- a/js/src/irregexp/RegExpEngine.cpp +++ b/js/src/irregexp/RegExpEngine.cpp @@ -721,8 +721,6 @@ ActionNode::EmptyMatchCheck(int start_register, int TextNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start) { - if (read_backward()) - return 0; int answer = Length(); if (answer >= still_to_find) return answer; @@ -738,7 +736,8 @@ TextNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start) int TextNode::GreedyLoopTextLength() { - return Length(); + TextElement elm = elements()[elements().length() - 1]; + return elm.cp_offset() + elm.length(); } RegExpNode* @@ -888,8 +887,6 @@ AssertionNode::FillInBMInfo(int offset, int budget, BoyerMooreLookahead* bm, boo int BackReferenceNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start) { - if (read_backward()) - return 0; if (budget <= 0) return 0; return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); @@ -1581,9 +1578,6 @@ class irregexp::RegExpCompiler current_expansion_factor_ = value; } - bool read_backward() { return read_backward_; } - void set_read_backward(bool value) { read_backward_ = value; } - JSContext* cx() const { return cx_; } LifoAlloc* alloc() const { return alloc_; } @@ -1601,7 +1595,6 @@ class irregexp::RegExpCompiler bool unicode_; bool reg_exp_too_big_; int current_expansion_factor_; - bool read_backward_; FrequencyCollator frequency_collator_; JSContext* cx_; LifoAlloc* alloc_; @@ -1631,7 +1624,6 @@ RegExpCompiler::RegExpCompiler(JSContext* cx, LifoAlloc* alloc, int capture_coun unicode_(unicode), reg_exp_too_big_(false), current_expansion_factor_(1), - read_backward_(false), frequency_collator_(), cx_(cx), alloc_(alloc) @@ -1755,7 +1747,7 @@ irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData* // at the start of input. ChoiceNode* first_step_node = alloc.newInfallible(&alloc, 2); RegExpNode* char_class = - alloc.newInfallible(alloc.newInfallible('*'), false, loop_node); + alloc.newInfallible(alloc.newInfallible('*'), loop_node); first_step_node->AddAlternative(GuardedAlternative(captured_body)); first_step_node->AddAlternative(GuardedAlternative(char_class)); node = first_step_node; @@ -1858,19 +1850,19 @@ RegExpAtom::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) TextElementVector* elms = compiler->alloc()->newInfallible(*compiler->alloc()); elms->append(TextElement::Atom(this)); - return compiler->alloc()->newInfallible(elms, compiler->read_backward(), on_success); + return compiler->alloc()->newInfallible(elms, on_success); } RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) { - return compiler->alloc()->newInfallible(&elements_, compiler->read_backward(), on_success); + return compiler->alloc()->newInfallible(&elements_, on_success); } RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) { - return compiler->alloc()->newInfallible(this, compiler->read_backward(), on_success); + return compiler->alloc()->newInfallible(this, on_success); } RegExpNode* @@ -2011,8 +2003,7 @@ RegExpQuantifier::ToNode(int min, alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, answer))); } answer = alternation; - if (not_at_start && !compiler->read_backward()) - alternation->set_not_at_start(); + if (not_at_start) alternation->set_not_at_start(); } return answer; } @@ -2024,9 +2015,8 @@ RegExpQuantifier::ToNode(int min, int reg_ctr = needs_counter ? compiler->AllocateRegister() : RegExpCompiler::kNoRegister; - LoopChoiceNode* center = alloc->newInfallible(alloc, body->min_match() == 0, - compiler->read_backward()); - if (not_at_start && !compiler->read_backward()) + LoopChoiceNode* center = alloc->newInfallible(alloc, body->min_match() == 0); + if (not_at_start) center->set_not_at_start(); RegExpNode* loop_return = needs_counter ? static_cast(ActionNode::IncrementRegister(reg_ctr, center)) @@ -2102,7 +2092,7 @@ RegExpAssertion::ToNode(RegExpCompiler* compiler, CharacterRange::AddClassEscape(alloc, 'n', newline_ranges); RegExpCharacterClass* newline_atom = alloc->newInfallible('n'); TextNode* newline_matcher = - alloc->newInfallible(newline_atom, false, + alloc->newInfallible(newline_atom, ActionNode::PositiveSubmatchSuccess(stack_pointer_register, position_register, 0, // No captures inside. @@ -2134,7 +2124,6 @@ RegExpBackReference::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) { return compiler->alloc()->newInfallible(RegExpCapture::StartRegister(index()), RegExpCapture::EndRegister(index()), - compiler->read_backward(), on_success); } @@ -2145,7 +2134,7 @@ RegExpEmpty::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) } RegExpNode* -RegExpLookaround::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) +RegExpLookahead::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) { int stack_pointer_register = compiler->AllocateRegister(); int position_register = compiler->AllocateRegister(); @@ -2156,10 +2145,6 @@ RegExpLookaround::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) int register_start = register_of_first_capture + capture_from_ * registers_per_capture; - RegExpNode* result; - bool was_reading_backward = compiler->read_backward(); - compiler->set_read_backward(type() == LOOKBEHIND); - if (is_positive()) { RegExpNode* bodyNode = body()->ToNode(compiler, @@ -2168,39 +2153,37 @@ RegExpLookaround::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) register_count, register_start, on_success)); - result = ActionNode::BeginSubmatch(stack_pointer_register, - position_register, - bodyNode); - } else { - // We use a ChoiceNode for a negative lookahead because it has most of - // the characteristics we need. It has the body of the lookahead as its - // first alternative and the expression after the lookahead of the second - // alternative. If the first alternative succeeds then the - // NegativeSubmatchSuccess will unwind the stack including everything the - // choice node set up and backtrack. If the first alternative fails then - // the second alternative is tried, which is exactly the desired result - // for a negative lookahead. The NegativeLookaheadChoiceNode is a special - // ChoiceNode that knows to ignore the first exit when calculating quick - // checks. - LifoAlloc* alloc = compiler->alloc(); - - RegExpNode* success = - alloc->newInfallible(alloc, - stack_pointer_register, - position_register, - register_count, - register_start); - GuardedAlternative body_alt(body()->ToNode(compiler, success)); - - ChoiceNode* choice_node = - alloc->newInfallible(alloc, body_alt, GuardedAlternative(on_success)); - - result = ActionNode::BeginSubmatch(stack_pointer_register, + return ActionNode::BeginSubmatch(stack_pointer_register, position_register, - choice_node); + bodyNode); } - compiler->set_read_backward(was_reading_backward); - return result; + + // We use a ChoiceNode for a negative lookahead because it has most of + // the characteristics we need. It has the body of the lookahead as its + // first alternative and the expression after the lookahead of the second + // alternative. If the first alternative succeeds then the + // NegativeSubmatchSuccess will unwind the stack including everything the + // choice node set up and backtrack. If the first alternative fails then + // the second alternative is tried, which is exactly the desired result + // for a negative lookahead. The NegativeLookaheadChoiceNode is a special + // ChoiceNode that knows to ignore the first exit when calculating quick + // checks. + LifoAlloc* alloc = compiler->alloc(); + + RegExpNode* success = + alloc->newInfallible(alloc, + stack_pointer_register, + position_register, + register_count, + register_start); + GuardedAlternative body_alt(body()->ToNode(compiler, success)); + + ChoiceNode* choice_node = + alloc->newInfallible(alloc, body_alt, GuardedAlternative(on_success)); + + return ActionNode::BeginSubmatch(stack_pointer_register, + position_register, + choice_node); } RegExpNode* @@ -2215,14 +2198,8 @@ RegExpCapture::ToNode(RegExpTree* body, RegExpCompiler* compiler, RegExpNode* on_success) { - MOZ_ASSERT(body); int start_reg = RegExpCapture::StartRegister(index); int end_reg = RegExpCapture::EndRegister(index); - if (compiler->read_backward()) { - // std::swap(start_reg, end_reg); - start_reg = RegExpCapture::EndRegister(index); - end_reg = RegExpCapture::StartRegister(index); - } RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); RegExpNode* body_node = body->ToNode(compiler, store_end); return ActionNode::StorePosition(start_reg, true, body_node); @@ -2233,15 +2210,8 @@ RegExpAlternative::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) { const RegExpTreeVector& children = nodes(); RegExpNode* current = on_success; - if (compiler->read_backward()) { - for (int i = 0; i < children.length(); i++) { - current = children[i]->ToNode(compiler, current); - } - } else { - for (int i = children.length() - 1; i >= 0; i--) { - current = children[i]->ToNode(compiler, current); - } - } + for (int i = children.length() - 1; i >= 0; i--) + current = children[i]->ToNode(compiler, current); return current; } @@ -2794,6 +2764,7 @@ Trace::InvalidateCurrentCharacter() void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) { + MOZ_ASSERT(by > 0); // We don't have an instruction for shifting the current character register // down or for using a shifted value for anything so lets just forget that // we preloaded any characters into it. @@ -3138,9 +3109,9 @@ AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) return; } if (trace->at_start() == Trace::UNKNOWN) { - assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack()); + assembler->CheckNotAtStart(trace->backtrack()); Trace at_start_trace = *trace; - at_start_trace.set_at_start(Trace::TRUE_VALUE); + at_start_trace.set_at_start(true); on_success()->Emit(compiler, &at_start_trace); return; } @@ -3843,10 +3814,9 @@ TextNode::TextEmitPass(RegExpCompiler* compiler, jit::Label* backtrack = trace->backtrack(); QuickCheckDetails* quick_check = trace->quick_check_performed(); int element_count = elements().length(); - int backward_offset = read_backward() ? -Length() : 0; for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { TextElement elm = elements()[i]; - int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset; + int cp_offset = trace->cp_offset() + elm.cp_offset(); if (elm.text_type() == TextElement::ATOM) { const CharacterVector& quarks = elm.atom()->data(); for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { @@ -3874,12 +3844,11 @@ TextNode::TextEmitPass(RegExpCompiler* compiler, break; } if (emit_function != nullptr) { - bool bounds_check = *checked_up_to < cp_offset + j || read_backward(); bool bound_checked = emit_function(compiler, quarks[j], backtrack, cp_offset + j, - bounds_check, + *checked_up_to < cp_offset + j, preloaded); if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); } @@ -3890,14 +3859,13 @@ TextNode::TextEmitPass(RegExpCompiler* compiler, if (first_element_checked && i == 0) continue; if (DeterminedAlready(quick_check, elm.cp_offset())) continue; RegExpCharacterClass* cc = elm.char_class(); - bool bounds_check = *checked_up_to < cp_offset || read_backward(); EmitCharClass(alloc(), assembler, cc, ascii, backtrack, cp_offset, - bounds_check, + *checked_up_to < cp_offset, preloaded); UpdateBoundsCheck(cp_offset, checked_up_to); } @@ -3977,11 +3945,8 @@ TextNode::Emit(RegExpCompiler* compiler, Trace* trace) } Trace successor_trace(*trace); - // If we advance backward, we may end up at the start. - successor_trace.AdvanceCurrentPositionInTrace( - read_backward() ? -Length() : Length(), compiler); - successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN - : Trace::FALSE_VALUE); + successor_trace.set_at_start(false); + successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); RecursionCheck rc(compiler); on_success()->Emit(compiler, &successor_trace); } @@ -4153,8 +4118,6 @@ ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_lea RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(RegExpCompiler* compiler) { - if (read_backward()) return NULL; - if (elements().length() != 1) return nullptr; @@ -4202,7 +4165,7 @@ ChoiceNode::GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative) SeqRegExpNode* seq_node = static_cast(node); node = seq_node->on_success(); } - return read_backward() ? -length : length; + return length; } // Creates a list of AlternativeGenerations. If the list has a reasonable @@ -4277,7 +4240,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) jit::Label greedy_loop_label; Trace counter_backtrack_trace; counter_backtrack_trace.set_backtrack(&greedy_loop_label); - if (not_at_start()) counter_backtrack_trace.set_at_start(Trace::FALSE_VALUE); + if (not_at_start()) counter_backtrack_trace.set_at_start(false); if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { // Here we have special handling for greedy loops containing only text nodes @@ -4293,7 +4256,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) current_trace = &counter_backtrack_trace; jit::Label greedy_match_failed; Trace greedy_match_trace; - if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE); + if (not_at_start()) greedy_match_trace.set_at_start(false); greedy_match_trace.set_backtrack(&greedy_match_failed); jit::Label loop_label; macro_assembler->Bind(&loop_label); @@ -4642,14 +4605,11 @@ BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) MOZ_ASSERT(start_reg_ + 1 == end_reg_); if (compiler->ignore_case()) { assembler->CheckNotBackReferenceIgnoreCase(start_reg_, - read_backward(), trace->backtrack(), compiler->unicode()); } else { - assembler->CheckNotBackReference(start_reg_, read_backward(), trace->backtrack()); + assembler->CheckNotBackReference(start_reg_, trace->backtrack()); } - // We are going to advance backward, so we may end up at the start. - if (read_backward()) trace->set_at_start(Trace::UNKNOWN); on_success()->Emit(compiler, trace); } @@ -5017,6 +4977,7 @@ QuickCheckDetails::Clear() void QuickCheckDetails::Advance(int by, bool ascii) { + MOZ_ASSERT(by >= 0); if (by >= characters_) { Clear(); return; diff --git a/js/src/irregexp/RegExpEngine.h b/js/src/irregexp/RegExpEngine.h index c4409dcca..1a8fd4b22 100644 --- a/js/src/irregexp/RegExpEngine.h +++ b/js/src/irregexp/RegExpEngine.h @@ -119,7 +119,7 @@ InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* chars, size_t VISIT(Atom) \ VISIT(Quantifier) \ VISIT(Capture) \ - VISIT(Lookaround) \ + VISIT(Lookahead) \ VISIT(BackReference) \ VISIT(Empty) \ VISIT(Text) @@ -763,19 +763,15 @@ class TextNode : public SeqRegExpNode { public: TextNode(TextElementVector* elements, - bool read_backward, RegExpNode* on_success) : SeqRegExpNode(on_success), - elements_(elements), - read_backward_(read_backward) + elements_(elements) {} TextNode(RegExpCharacterClass* that, - bool read_backward, RegExpNode* on_success) : SeqRegExpNode(on_success), - elements_(alloc()->newInfallible(*alloc())), - read_backward_(read_backward) + elements_(alloc()->newInfallible(*alloc())) { elements_->append(TextElement::CharClass(that)); } @@ -788,7 +784,6 @@ class TextNode : public SeqRegExpNode int characters_filled_in, bool not_at_start); TextElementVector& elements() { return *elements_; } - bool read_backward() { return read_backward_; } void MakeCaseIndependent(bool is_ascii, bool unicode); virtual int GreedyLoopTextLength(); virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( @@ -819,7 +814,6 @@ class TextNode : public SeqRegExpNode int* checked_up_to); int Length(); TextElementVector* elements_; - bool read_backward_; }; class AssertionNode : public SeqRegExpNode @@ -888,18 +882,15 @@ class BackReferenceNode : public SeqRegExpNode public: BackReferenceNode(int start_reg, int end_reg, - bool read_backward, RegExpNode* on_success) : SeqRegExpNode(on_success), start_reg_(start_reg), - end_reg_(end_reg), - read_backward_(read_backward) + end_reg_(end_reg) {} virtual void Accept(NodeVisitor* visitor); int start_register() { return start_reg_; } int end_register() { return end_reg_; } - bool read_backward() { return read_backward_; } virtual void Emit(RegExpCompiler* compiler, Trace* trace); virtual int EatsAtLeast(int still_to_find, int recursion_depth, @@ -918,7 +909,6 @@ class BackReferenceNode : public SeqRegExpNode private: int start_reg_; int end_reg_; - bool read_backward_; }; class EndNode : public RegExpNode @@ -1063,7 +1053,6 @@ class ChoiceNode : public RegExpNode void set_being_calculated(bool b) { being_calculated_ = b; } virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode); - virtual bool read_backward() { return false; } protected: int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); @@ -1122,13 +1111,11 @@ class NegativeLookaheadChoiceNode : public ChoiceNode class LoopChoiceNode : public ChoiceNode { public: - explicit LoopChoiceNode(LifoAlloc* alloc, bool body_can_be_zero_length, - bool read_backward) + explicit LoopChoiceNode(LifoAlloc* alloc, bool body_can_be_zero_length) : ChoiceNode(alloc, 2), loop_node_(nullptr), continue_node_(nullptr), - body_can_be_zero_length_(body_can_be_zero_length), - read_backward_(read_backward) + body_can_be_zero_length_(body_can_be_zero_length) {} void AddLoopAlternative(GuardedAlternative alt); @@ -1146,7 +1133,6 @@ class LoopChoiceNode : public ChoiceNode RegExpNode* loop_node() { return loop_node_; } RegExpNode* continue_node() { return continue_node_; } bool body_can_be_zero_length() { return body_can_be_zero_length_; } - virtual bool read_backward() { return read_backward_; } virtual void Accept(NodeVisitor* visitor); virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode); @@ -1161,7 +1147,6 @@ class LoopChoiceNode : public ChoiceNode RegExpNode* loop_node_; RegExpNode* continue_node_; bool body_can_be_zero_length_; - bool read_backward_; }; // Improve the speed that we scan for an initial point where a non-anchored @@ -1437,8 +1422,8 @@ class Trace } TriBool at_start() { return at_start_; } - void set_at_start(TriBool at_start) { - at_start_ = at_start; + void set_at_start(bool at_start) { + at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE; } jit::Label* backtrack() { return backtrack_; } jit::Label* loop_label() { return loop_label_; } diff --git a/js/src/irregexp/RegExpInterpreter.cpp b/js/src/irregexp/RegExpInterpreter.cpp index d09b4671e..7fd2d983a 100644 --- a/js/src/irregexp/RegExpInterpreter.cpp +++ b/js/src/irregexp/RegExpInterpreter.cpp @@ -222,8 +222,8 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha } break; BYTECODE(LOAD_CURRENT_CHAR) { - int pos = current + (insn >> BYTECODE_SHIFT); - if (pos >= (int)length || pos < 0) { + size_t pos = current + (insn >> BYTECODE_SHIFT); + if (pos >= length) { pc = byteCode + Load32Aligned(pc + 4); } else { current_char = chars[pos]; @@ -238,8 +238,8 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha break; } BYTECODE(LOAD_2_CURRENT_CHARS) { - int pos = current + (insn >> BYTECODE_SHIFT); - if (pos + 2 > (int)length || pos < 0) { + size_t pos = current + (insn >> BYTECODE_SHIFT); + if (pos + 2 > length) { pc = byteCode + Load32Aligned(pc + 4); } else { CharT next = chars[pos + 1]; @@ -425,30 +425,6 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha pc += BC_CHECK_NOT_BACK_REF_LENGTH; break; } - BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) { - int from = registers[insn >> BYTECODE_SHIFT]; - int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; - if (from < 0 || len <= 0) { - pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH; - break; - } - if (int(current) - len < 0) { - pc = byteCode + Load32Aligned(pc + 4); - break; - } else { - int i; - for (i = 0; i < len; i++) { - if (chars[from + i] != chars[int(current) - len + i]) { - pc = byteCode + Load32Aligned(pc + 4); - break; - } - } - if (i < len) break; - current -= len; - } - pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH; - break; - } BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) { int from = registers[insn >> BYTECODE_SHIFT]; int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; @@ -489,46 +465,6 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha } break; } - BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) { - int from = registers[insn >> BYTECODE_SHIFT]; - int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; - if (from < 0 || len <= 0) { - pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; - break; - } - if (int(current) - len < 0) { - pc = byteCode + Load32Aligned(pc + 4); - break; - } - if (CaseInsensitiveCompareStrings(chars + from, chars + int(current) - len, len * sizeof(CharT))) { - current -= len; - pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; - } else { - pc = byteCode + Load32Aligned(pc + 4); - } - break; - - } - BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_UNICODE) { - int from = registers[insn >> BYTECODE_SHIFT]; - int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; - if (from < 0 || len <= 0) { - pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; - break; - } - if (int(current) - len < 0) { - pc = byteCode + Load32Aligned(pc + 4); - break; - } - if (CaseInsensitiveCompareUCStrings(chars + from, chars + int(current) - len, len * sizeof(CharT))) { - current -= len; - pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; - } else { - pc = byteCode + Load32Aligned(pc + 4); - } - break; - - } BYTECODE(CHECK_AT_START) if (current == 0) pc = byteCode + Load32Aligned(pc + 4); @@ -536,7 +472,7 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha pc += BC_CHECK_AT_START_LENGTH; break; BYTECODE(CHECK_NOT_AT_START) - if (current + (insn >> BYTECODE_SHIFT) == 0) + if (current == 0) pc += BC_CHECK_NOT_AT_START_LENGTH; else pc = byteCode + Load32Aligned(pc + 4); diff --git a/js/src/irregexp/RegExpMacroAssembler.cpp b/js/src/irregexp/RegExpMacroAssembler.cpp index 6b1ceba8a..d66d0d204 100644 --- a/js/src/irregexp/RegExpMacroAssembler.cpp +++ b/js/src/irregexp/RegExpMacroAssembler.cpp @@ -226,37 +226,32 @@ InterpretedRegExpMacroAssembler::CheckGreedyLoop(jit::Label* on_tos_equals_curre } void -InterpretedRegExpMacroAssembler::CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start) +InterpretedRegExpMacroAssembler::CheckNotAtStart(jit::Label* on_not_at_start) { - Emit(BC_CHECK_NOT_AT_START, cp_offset); + Emit(BC_CHECK_NOT_AT_START, 0); EmitOrLink(on_not_at_start); } void -InterpretedRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backward, - jit::Label* on_no_match) +InterpretedRegExpMacroAssembler::CheckNotBackReference(int start_reg, jit::Label* on_no_match) { MOZ_ASSERT(start_reg >= 0); MOZ_ASSERT(start_reg <= kMaxRegister); - Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF, - start_reg); + Emit(BC_CHECK_NOT_BACK_REF, start_reg); EmitOrLink(on_no_match); } void InterpretedRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, - bool read_backward, jit::Label* on_no_match, bool unicode) { MOZ_ASSERT(start_reg >= 0); MOZ_ASSERT(start_reg <= kMaxRegister); if (unicode) - Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_UNICODE : BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE, - start_reg); + Emit(BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE, start_reg); else - Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD : BC_CHECK_NOT_BACK_REF_NO_CASE, - start_reg); + Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg); EmitOrLink(on_no_match); } diff --git a/js/src/irregexp/RegExpMacroAssembler.h b/js/src/irregexp/RegExpMacroAssembler.h index c5def92f2..dca2edf90 100644 --- a/js/src/irregexp/RegExpMacroAssembler.h +++ b/js/src/irregexp/RegExpMacroAssembler.h @@ -110,10 +110,10 @@ class MOZ_STACK_CLASS RegExpMacroAssembler virtual void CheckCharacterGT(char16_t limit, jit::Label* on_greater) = 0; virtual void CheckCharacterLT(char16_t limit, jit::Label* on_less) = 0; virtual void CheckGreedyLoop(jit::Label* on_tos_equals_current_position) = 0; - virtual void CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start) = 0; - virtual void CheckNotBackReference(int start_reg, bool read_backward, jit::Label* on_no_match) = 0; - virtual void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward, - jit::Label* on_no_match, bool unicode) = 0; + virtual void CheckNotAtStart(jit::Label* on_not_at_start) = 0; + virtual void CheckNotBackReference(int start_reg, jit::Label* on_no_match) = 0; + virtual void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, + bool unicode) = 0; // Check the current character for a match with a literal character. If we // fail to match then goto the on_failure label. End of input always @@ -245,10 +245,9 @@ class MOZ_STACK_CLASS InterpretedRegExpMacroAssembler final : public RegExpMacro void CheckCharacterGT(char16_t limit, jit::Label* on_greater); void CheckCharacterLT(char16_t limit, jit::Label* on_less); void CheckGreedyLoop(jit::Label* on_tos_equals_current_position); - void CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start); - void CheckNotBackReference(int start_reg, bool read_backward, jit::Label* on_no_match); - void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward, - jit::Label* on_no_match, bool unicode); + void CheckNotAtStart(jit::Label* on_not_at_start); + void CheckNotBackReference(int start_reg, jit::Label* on_no_match); + void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, bool unicode); void CheckNotCharacter(unsigned c, jit::Label* on_not_equal); void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_not_equal); void CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with, diff --git a/js/src/irregexp/RegExpParser.cpp b/js/src/irregexp/RegExpParser.cpp index 1ad044e8e..d4308d0d8 100644 --- a/js/src/irregexp/RegExpParser.cpp +++ b/js/src/irregexp/RegExpParser.cpp @@ -227,7 +227,6 @@ RegExpParser::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc, alloc(alloc), captures_(nullptr), next_pos_(chars), - captures_started_(0), end_(end), current_(kEndMarker), capture_count_(0), @@ -420,8 +419,7 @@ RangeAtom(LifoAlloc* alloc, char16_t from, char16_t to) static inline RegExpTree* NegativeLookahead(LifoAlloc* alloc, char16_t from, char16_t to) { - return alloc->newInfallible(RangeAtom(alloc, from, to), false, - 0, 0, RegExpLookaround::LOOKAHEAD); + return alloc->newInfallible(RangeAtom(alloc, from, to), false, 0, 0); } static bool @@ -1216,38 +1214,6 @@ RegExpParser::ParseBackReferenceIndex(int* index_out) return true; } -template -RegExpCapture* -RegExpParser::GetCapture(int index) { - // The index for the capture groups are one-based. Its index in the list is - // zero-based. - int known_captures = - is_scanned_for_captures_ ? capture_count_ : captures_started_; - MOZ_ASSERT(index <= known_captures); - if (captures_ == NULL) { - captures_ = alloc->newInfallible(*alloc); - } - while ((int)captures_->length() < known_captures) { - RegExpCapture* capture = alloc->newInfallible(nullptr, captures_->length() + 1); - captures_->append(capture); - } - return (*captures_)[index - 1]; -} - - -template -bool -RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) { - for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { - if (s->group_type() != CAPTURE) continue; - // Return true if we found the matching capture index. - if (index == s->capture_index()) return true; - // Abort if index is larger than what has been parsed up till this state. - if (index > s->capture_index()) return false; - } - return false; -} - // QuantifierPrefix :: // { DecimalDigits } // { DecimalDigits , } @@ -1490,24 +1456,24 @@ RegExpTree* RegExpParser::ParseDisjunction() { // Used to store current state while parsing subexpressions. - RegExpParserState initial_state(alloc, nullptr, INITIAL, RegExpLookaround::LOOKAHEAD, 0); - RegExpParserState* state = &initial_state; + RegExpParserState initial_state(alloc, nullptr, INITIAL, 0); + RegExpParserState* stored_state = &initial_state; // Cache the builder in a local variable for quick access. RegExpBuilder* builder = initial_state.builder(); while (true) { switch (current()) { case kEndMarker: - if (state->IsSubexpression()) { + if (stored_state->IsSubexpression()) { // Inside a parenthesized group when hitting end of input. return ReportError(JSMSG_MISSING_PAREN); } - MOZ_ASSERT(INITIAL == state->group_type()); + MOZ_ASSERT(INITIAL == stored_state->group_type()); // Parsing completed successfully. return builder->ToRegExp(); case ')': { - if (!state->IsSubexpression()) + if (!stored_state->IsSubexpression()) return ReportError(JSMSG_UNMATCHED_RIGHT_PAREN); - MOZ_ASSERT(INITIAL != state->group_type()); + MOZ_ASSERT(INITIAL != stored_state->group_type()); Advance(); // End disjunction parsing and convert builder content to new single @@ -1516,30 +1482,29 @@ RegExpParser::ParseDisjunction() int end_capture_index = captures_started(); - int capture_index = state->capture_index(); - SubexpressionType group_type = state->group_type(); + int capture_index = stored_state->capture_index(); + SubexpressionType group_type = stored_state->group_type(); + + // Restore previous state. + stored_state = stored_state->previous_state(); + builder = stored_state->builder(); // Build result of subexpression. if (group_type == CAPTURE) { - RegExpCapture* capture = GetCapture(capture_index); - capture->set_body(body); + RegExpCapture* capture = alloc->newInfallible(body, capture_index); + (*captures_)[capture_index - 1] = capture; body = capture; } else if (group_type != GROUPING) { - MOZ_ASSERT(group_type == POSITIVE_LOOKAROUND || - group_type == NEGATIVE_LOOKAROUND); - bool is_positive = (group_type == POSITIVE_LOOKAROUND); - body = alloc->newInfallible(body, + MOZ_ASSERT(group_type == POSITIVE_LOOKAHEAD || + group_type == NEGATIVE_LOOKAHEAD); + bool is_positive = (group_type == POSITIVE_LOOKAHEAD); + body = alloc->newInfallible(body, is_positive, end_capture_index - capture_index, - capture_index, - state->lookaround_type()); + capture_index); } - - // Restore previous state. - state = state->previous_state(); - builder = state->builder(); builder->AddAtom(body); - if (unicode_ && (group_type == POSITIVE_LOOKAROUND || group_type == NEGATIVE_LOOKAROUND)) + if (unicode_ && (group_type == POSITIVE_LOOKAHEAD || group_type == NEGATIVE_LOOKAHEAD)) continue; // For compatability with JSC and ES3, we allow quantifiers after // lookaheads, and break in all cases. @@ -1599,7 +1564,6 @@ RegExpParser::ParseDisjunction() } case '(': { SubexpressionType subexpr_type = CAPTURE; - RegExpLookaround::Type lookaround_type = state->lookaround_type(); Advance(); if (current() == '?') { switch (Next()) { @@ -1607,39 +1571,26 @@ RegExpParser::ParseDisjunction() subexpr_type = GROUPING; break; case '=': - lookaround_type = RegExpLookaround::LOOKAHEAD; - subexpr_type = POSITIVE_LOOKAROUND; + subexpr_type = POSITIVE_LOOKAHEAD; break; case '!': - lookaround_type = RegExpLookaround::LOOKAHEAD; - subexpr_type = NEGATIVE_LOOKAROUND; + subexpr_type = NEGATIVE_LOOKAHEAD; break; - case '<': - Advance(); - lookaround_type = RegExpLookaround::LOOKBEHIND; - if (Next() == '=') { - subexpr_type = POSITIVE_LOOKAROUND; - break; - } else if (Next() == '!') { - subexpr_type = NEGATIVE_LOOKAROUND; - break; - } - // We didn't get a positive or negative after '<'. - // That's an error. - return ReportError(JSMSG_INVALID_GROUP); default: return ReportError(JSMSG_INVALID_GROUP); } Advance(2); } else { + if (captures_ == nullptr) + captures_ = alloc->newInfallible(*alloc); if (captures_started() >= kMaxCaptures) return ReportError(JSMSG_TOO_MANY_PARENS); - captures_started_++; + captures_->append((RegExpCapture*) nullptr); } // Store current state and begin new disjunction parsing. - state = alloc->newInfallible(alloc, state, subexpr_type, - lookaround_type, captures_started_); - builder = state->builder(); + stored_state = alloc->newInfallible(alloc, stored_state, subexpr_type, + captures_started()); + builder = stored_state->builder(); continue; } case '[': { @@ -1694,18 +1645,19 @@ RegExpParser::ParseDisjunction() case '7': case '8': case '9': { int index = 0; if (ParseBackReferenceIndex(&index)) { - if (state->IsInsideCaptureGroup(index)) { - // The backreference is inside the capture group it refers to. - // Nothing can possibly have been captured yet. - builder->AddEmpty(); - } else { - RegExpCapture* capture = GetCapture(index); - RegExpTree* atom = alloc->newInfallible(capture); - if (unicode_) - builder->AddAtom(UnicodeBackReferenceAtom(alloc, atom)); - else - builder->AddAtom(atom); + RegExpCapture* capture = nullptr; + if (captures_ != nullptr && index <= (int) captures_->length()) { + capture = (*captures_)[index - 1]; } + if (capture == nullptr) { + builder->AddEmpty(); + break; + } + RegExpTree* atom = alloc->newInfallible(capture); + if (unicode_) + builder->AddAtom(UnicodeBackReferenceAtom(alloc, atom)); + else + builder->AddAtom(atom); break; } if (unicode_) diff --git a/js/src/irregexp/RegExpParser.h b/js/src/irregexp/RegExpParser.h index ee57f0436..a58800a91 100644 --- a/js/src/irregexp/RegExpParser.h +++ b/js/src/irregexp/RegExpParser.h @@ -229,7 +229,7 @@ class RegExpParser bool simple() { return simple_; } bool contains_anchor() { return contains_anchor_; } void set_contains_anchor() { contains_anchor_ = true; } - int captures_started() { return captures_started_; } + int captures_started() { return captures_ == nullptr ? 0 : captures_->length(); } const CharT* position() { return next_pos_ - 1; } static const int kMaxCaptures = 1 << 16; @@ -239,8 +239,8 @@ class RegExpParser enum SubexpressionType { INITIAL, CAPTURE, // All positive values represent captures. - POSITIVE_LOOKAROUND, - NEGATIVE_LOOKAROUND, + POSITIVE_LOOKAHEAD, + NEGATIVE_LOOKAHEAD, GROUPING }; @@ -249,12 +249,10 @@ class RegExpParser RegExpParserState(LifoAlloc* alloc, RegExpParserState* previous_state, SubexpressionType group_type, - RegExpLookaround::Type lookaround_type, int disjunction_capture_index) : previous_state_(previous_state), builder_(alloc->newInfallible(alloc)), group_type_(group_type), - lookaround_type_(lookaround_type), disjunction_capture_index_(disjunction_capture_index) {} // Parser state of containing expression, if any. @@ -264,16 +262,11 @@ class RegExpParser RegExpBuilder* builder() { return builder_; } // Type of regexp being parsed (parenthesized group or entire regexp). SubexpressionType group_type() { return group_type_; } - // Lookahead or Lookbehind. - RegExpLookaround::Type lookaround_type() { return lookaround_type_; } // Index in captures array of first capture in this sub-expression, if any. // Also the capture index of this sub-expression itself, if group_type // is CAPTURE. int capture_index() { return disjunction_capture_index_; } - // Check whether the parser is inside a capture group with the given index. - bool IsInsideCaptureGroup(int index); - private: // Linked list implementation of stack of states. RegExpParserState* previous_state_; @@ -281,15 +274,10 @@ class RegExpParser RegExpBuilder* builder_; // Stored disjunction type (capture, look-ahead or grouping), if any. SubexpressionType group_type_; - // Stored read direction. - RegExpLookaround::Type lookaround_type_; // Stored disjunction's capture index (if any). int disjunction_capture_index_; }; - // Return the 1-indexed RegExpCapture object, allocate if necessary. - RegExpCapture* GetCapture(int index); - widechar current() { return current_; } bool has_more() { return has_more_; } bool has_next() { return next_pos_ < end_; } @@ -306,7 +294,6 @@ class RegExpParser const CharT* next_pos_; const CharT* end_; widechar current_; - int captures_started_; // The capture count is only valid after we have scanned for captures. int capture_count_; bool has_more_;