Implement /s (dotAll) flag for Regexes.
This commit is contained in:
parent
d9300b2bb0
commit
85906a8fa5
|
@ -178,7 +178,7 @@ CheckPatternSyntax(JSContext* cx, HandleAtom pattern, RegExpFlag flags)
|
|||
CompileOptions options(cx);
|
||||
frontend::TokenStream dummyTokenStream(cx, options, nullptr, 0, nullptr);
|
||||
return irregexp::ParsePatternSyntax(dummyTokenStream, cx->tempLifoAlloc(), pattern,
|
||||
flags & UnicodeFlag);
|
||||
flags & UnicodeFlag, flags & DotAllFlag);
|
||||
}
|
||||
|
||||
enum RegExpSharedUse {
|
||||
|
@ -664,6 +664,29 @@ js::regexp_multiline(JSContext* cx, unsigned argc, JS::Value* vp)
|
|||
return CallNonGenericMethod<IsRegExpInstanceOrPrototype, regexp_multiline_impl>(cx, args);
|
||||
}
|
||||
|
||||
// ES 2018 dotAll
|
||||
MOZ_ALWAYS_INLINE bool
|
||||
regexp_dotall_impl(JSContext* cx, const CallArgs& args)
|
||||
{
|
||||
MOZ_ASSERT(IsRegExpInstanceOrPrototype(args.thisv()));
|
||||
|
||||
if (!IsRegExpObject(args.thisv())) {
|
||||
args.rval().setUndefined();
|
||||
return true;
|
||||
}
|
||||
|
||||
Rooted<RegExpObject*> reObj(cx, &args.thisv().toObject().as<RegExpObject>());
|
||||
args.rval().setBoolean(reObj->dotall());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
js::regexp_dotall(JSContext* cx, unsigned argc, JS::Value* vp)
|
||||
{
|
||||
CallArgs args = CallArgsFromVp(argc, vp);
|
||||
return CallNonGenericMethod<IsRegExpInstanceOrPrototype, regexp_dotall_impl>(cx, args);
|
||||
}
|
||||
|
||||
// ES 2017 draft rev32 21.2.5.10.
|
||||
MOZ_ALWAYS_INLINE bool
|
||||
regexp_source_impl(JSContext* cx, const CallArgs& args)
|
||||
|
@ -759,6 +782,7 @@ const JSPropertySpec js::regexp_properties[] = {
|
|||
JS_PSG("source", regexp_source, 0),
|
||||
JS_PSG("sticky", regexp_sticky, 0),
|
||||
JS_PSG("unicode", regexp_unicode, 0),
|
||||
JS_PSG("dotall", regexp_dotall, 0),
|
||||
JS_PS_END
|
||||
};
|
||||
|
||||
|
@ -1642,6 +1666,13 @@ js::RegExpPrototypeOptimizableRaw(JSContext* cx, JSObject* proto)
|
|||
if (unicodeGetter != regexp_unicode)
|
||||
return false;
|
||||
|
||||
JSNative dotAllGetter;
|
||||
if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().dotall), &dotAllGetter))
|
||||
return false;
|
||||
|
||||
if (dotAllGetter != regexp_dotall)
|
||||
return false;
|
||||
|
||||
// Check if @@match, @@search, and exec are own data properties,
|
||||
// those values should be tested in selfhosted JS.
|
||||
bool has = false;
|
||||
|
|
|
@ -153,6 +153,8 @@ extern MOZ_MUST_USE bool
|
|||
regexp_sticky(JSContext* cx, unsigned argc, JS::Value* vp);
|
||||
extern MOZ_MUST_USE bool
|
||||
regexp_unicode(JSContext* cx, unsigned argc, JS::Value* vp);
|
||||
extern MOZ_MUST_USE bool
|
||||
regexp_dotall(JSContext* cx, unsigned argc, JS::Value* vp);
|
||||
|
||||
} /* namespace js */
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
// ES6 draft rev34 (2015/02/20) 21.2.5.3 get RegExp.prototype.flags
|
||||
// Updated for ES2018 /s (dotAll)
|
||||
function RegExpFlagsGetter() {
|
||||
// Steps 1-2.
|
||||
var R = this;
|
||||
|
@ -31,6 +32,10 @@ function RegExpFlagsGetter() {
|
|||
// Steps 16-18.
|
||||
if (R.sticky)
|
||||
result += "y";
|
||||
|
||||
// ES2018
|
||||
if (R.dotall)
|
||||
result += "s";
|
||||
|
||||
// Step 19.
|
||||
return result;
|
||||
|
|
|
@ -90,6 +90,7 @@
|
|||
#define REGEXP_MULTILINE_FLAG 0x04
|
||||
#define REGEXP_STICKY_FLAG 0x08
|
||||
#define REGEXP_UNICODE_FLAG 0x10
|
||||
#define REGEXP_DOTALL_FLAG 0x20
|
||||
|
||||
#define MODULE_OBJECT_ENVIRONMENT_SLOT 2
|
||||
|
||||
|
|
|
@ -3975,6 +3975,7 @@ ParseRegExp(JSContext* cx, unsigned argc, Value* vp)
|
|||
flags & MultilineFlag, match_only,
|
||||
flags & UnicodeFlag, flags & IgnoreCaseFlag,
|
||||
flags & GlobalFlag, flags & StickyFlag,
|
||||
flags & DotAllFlag,
|
||||
&data))
|
||||
{
|
||||
return false;
|
||||
|
|
|
@ -1843,6 +1843,8 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
|
|||
reflags = RegExpFlag(reflags | StickyFlag);
|
||||
else if (c == 'u' && !(reflags & UnicodeFlag))
|
||||
reflags = RegExpFlag(reflags | UnicodeFlag);
|
||||
else if (c == 's' && !(reflags & DotAllFlag))
|
||||
reflags = RegExpFlag(reflags | DotAllFlag);
|
||||
else
|
||||
break;
|
||||
getChar();
|
||||
|
|
|
@ -222,7 +222,7 @@ RegExpBuilder::AddQuantifierToAtom(int min, int max,
|
|||
template <typename CharT>
|
||||
RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
|
||||
const CharT* chars, const CharT* end, bool multiline_mode,
|
||||
bool unicode, bool ignore_case)
|
||||
bool unicode, bool ignore_case, bool dotall)
|
||||
: ts(ts),
|
||||
alloc(alloc),
|
||||
captures_(nullptr),
|
||||
|
@ -235,6 +235,7 @@ RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
|
|||
multiline_(multiline_mode),
|
||||
unicode_(unicode),
|
||||
ignore_case_(ignore_case),
|
||||
dotall_(dotall),
|
||||
simple_(false),
|
||||
contains_anchor_(false),
|
||||
is_scanned_for_captures_(false)
|
||||
|
@ -1384,7 +1385,7 @@ UnicodeEverythingAtom(LifoAlloc* alloc)
|
|||
{
|
||||
RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
|
||||
|
||||
// everything except \x0a, \x0d, \u2028 and \u2029
|
||||
// Everything except \x0a, \x0d, \u2028 and \u2029
|
||||
|
||||
CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
|
||||
ranges->append(CharacterRange::Range(0x0, 0x09));
|
||||
|
@ -1414,6 +1415,38 @@ UnicodeEverythingAtom(LifoAlloc* alloc)
|
|||
return builder->ToRegExp();
|
||||
}
|
||||
|
||||
static inline RegExpTree*
|
||||
UnicodeDotAllAtom(LifoAlloc* alloc)
|
||||
{
|
||||
RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
|
||||
|
||||
// Full range excluding surrogates because /s was specified
|
||||
|
||||
CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
|
||||
ranges->append(CharacterRange::Range(0x0, unicode::LeadSurrogateMin - 1));
|
||||
ranges->append(CharacterRange::Range(unicode::TrailSurrogateMax + 1, unicode::UTF16Max));
|
||||
builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(ranges, false));
|
||||
|
||||
builder->NewAlternative();
|
||||
|
||||
builder->AddAtom(RangeAtom(alloc, unicode::LeadSurrogateMin, unicode::LeadSurrogateMax));
|
||||
builder->AddAtom(NegativeLookahead(alloc, unicode::TrailSurrogateMin,
|
||||
unicode::TrailSurrogateMax));
|
||||
|
||||
builder->NewAlternative();
|
||||
|
||||
builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(
|
||||
RegExpAssertion::NOT_AFTER_LEAD_SURROGATE));
|
||||
builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, unicode::TrailSurrogateMax));
|
||||
|
||||
builder->NewAlternative();
|
||||
|
||||
builder->AddAtom(RangeAtom(alloc, unicode::LeadSurrogateMin, unicode::LeadSurrogateMax));
|
||||
builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, unicode::TrailSurrogateMax));
|
||||
|
||||
return builder->ToRegExp();
|
||||
}
|
||||
|
||||
RegExpTree*
|
||||
UnicodeCharacterClassEscapeAtom(LifoAlloc* alloc, char16_t char_class, bool ignore_case)
|
||||
{
|
||||
|
@ -1541,13 +1574,25 @@ RegExpParser<CharT>::ParseDisjunction()
|
|||
}
|
||||
case '.': {
|
||||
Advance();
|
||||
// everything except \x0a, \x0d, \u2028 and \u2029
|
||||
|
||||
if (unicode_) {
|
||||
builder->AddAtom(UnicodeEverythingAtom(alloc));
|
||||
if (dotall_) {
|
||||
// Everything
|
||||
builder->AddAtom(UnicodeDotAllAtom(alloc));
|
||||
} else {
|
||||
// Everything except \x0a, \x0d, \u2028 and \u2029
|
||||
builder->AddAtom(UnicodeEverythingAtom(alloc));
|
||||
}
|
||||
break;
|
||||
}
|
||||
CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
|
||||
CharacterRange::AddClassEscape(alloc, '.', ranges);
|
||||
if (dotall_) {
|
||||
// Everything
|
||||
CharacterRange::AddClassEscape(alloc, '*', ranges);
|
||||
} else {
|
||||
// Everything except \x0a, \x0d, \u2028 and \u2029
|
||||
CharacterRange::AddClassEscape(alloc, '.', ranges);
|
||||
}
|
||||
RegExpTree* atom = alloc->newInfallible<RegExpCharacterClass>(ranges, false);
|
||||
builder->AddAtom(atom);
|
||||
break;
|
||||
|
@ -1880,7 +1925,7 @@ template <typename CharT>
|
|||
static bool
|
||||
ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length,
|
||||
bool multiline, bool match_only, bool unicode, bool ignore_case,
|
||||
bool global, bool sticky, RegExpCompileData* data)
|
||||
bool global, bool sticky, bool dotall, RegExpCompileData* data)
|
||||
{
|
||||
if (match_only) {
|
||||
// Try to strip a leading '.*' from the RegExp, but only if it is not
|
||||
|
@ -1907,7 +1952,7 @@ ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, si
|
|||
}
|
||||
}
|
||||
|
||||
RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, multiline, unicode, ignore_case);
|
||||
RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, multiline, unicode, ignore_case, dotall);
|
||||
data->tree = parser.ParsePattern();
|
||||
if (!data->tree)
|
||||
return false;
|
||||
|
@ -1921,33 +1966,33 @@ ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, si
|
|||
bool
|
||||
irregexp::ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
|
||||
bool multiline, bool match_only, bool unicode, bool ignore_case,
|
||||
bool global, bool sticky, RegExpCompileData* data)
|
||||
bool global, bool sticky, bool dotall, RegExpCompileData* data)
|
||||
{
|
||||
JS::AutoCheckCannotGC nogc;
|
||||
return str->hasLatin1Chars()
|
||||
? ::ParsePattern(ts, alloc, str->latin1Chars(nogc), str->length(),
|
||||
multiline, match_only, unicode, ignore_case, global, sticky, data)
|
||||
multiline, match_only, unicode, ignore_case, global, sticky, dotall, data)
|
||||
: ::ParsePattern(ts, alloc, str->twoByteChars(nogc), str->length(),
|
||||
multiline, match_only, unicode, ignore_case, global, sticky, data);
|
||||
multiline, match_only, unicode, ignore_case, global, sticky, dotall, data);
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
static bool
|
||||
ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length,
|
||||
bool unicode)
|
||||
bool unicode, bool dotall)
|
||||
{
|
||||
LifoAllocScope scope(&alloc);
|
||||
|
||||
RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, false, unicode, false);
|
||||
RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, false, unicode, dotall, false);
|
||||
return parser.ParsePattern() != nullptr;
|
||||
}
|
||||
|
||||
bool
|
||||
irregexp::ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
|
||||
bool unicode)
|
||||
bool unicode, bool dotall)
|
||||
{
|
||||
JS::AutoCheckCannotGC nogc;
|
||||
return str->hasLatin1Chars()
|
||||
? ::ParsePatternSyntax(ts, alloc, str->latin1Chars(nogc), str->length(), unicode)
|
||||
: ::ParsePatternSyntax(ts, alloc, str->twoByteChars(nogc), str->length(), unicode);
|
||||
? ::ParsePatternSyntax(ts, alloc, str->latin1Chars(nogc), str->length(), unicode, dotall)
|
||||
: ::ParsePatternSyntax(ts, alloc, str->twoByteChars(nogc), str->length(), unicode, dotall);
|
||||
}
|
||||
|
|
|
@ -44,11 +44,11 @@ namespace irregexp {
|
|||
bool
|
||||
ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
|
||||
bool multiline, bool match_only, bool unicode, bool ignore_case,
|
||||
bool global, bool sticky, RegExpCompileData* data);
|
||||
bool global, bool sticky, bool dotall, RegExpCompileData* data);
|
||||
|
||||
bool
|
||||
ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
|
||||
bool unicode);
|
||||
bool unicode, bool dotall);
|
||||
|
||||
// A BufferedVector is an automatically growing list, just like (and backed
|
||||
// by) a Vector, that is optimized for the case of adding and removing
|
||||
|
@ -178,7 +178,7 @@ class RegExpParser
|
|||
public:
|
||||
RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
|
||||
const CharT* chars, const CharT* end, bool multiline_mode, bool unicode,
|
||||
bool ignore_case);
|
||||
bool ignore_case, bool dotall);
|
||||
|
||||
RegExpTree* ParsePattern();
|
||||
RegExpTree* ParseDisjunction();
|
||||
|
@ -313,6 +313,7 @@ class RegExpParser
|
|||
bool multiline_;
|
||||
bool unicode_;
|
||||
bool ignore_case_;
|
||||
bool dotall_;
|
||||
bool simple_;
|
||||
bool contains_anchor_;
|
||||
bool is_scanned_for_captures_;
|
||||
|
|
|
@ -5704,6 +5704,7 @@ JS_ObjectIsDate(JSContext* cx, JS::HandleObject obj, bool* isDate);
|
|||
#define JSREG_MULTILINE 0x04u /* treat ^ and $ as begin and end of line */
|
||||
#define JSREG_STICKY 0x08u /* only match starting at lastIndex */
|
||||
#define JSREG_UNICODE 0x10u /* unicode */
|
||||
#define JSREG_DOTALL 0x20u /* match . to everything including newlines */
|
||||
|
||||
extern JS_PUBLIC_API(JSObject*)
|
||||
JS_NewRegExpObject(JSContext* cx, const char* bytes, size_t length, unsigned flags);
|
||||
|
|
|
@ -97,6 +97,7 @@
|
|||
macro(displayURL, displayURL, "displayURL") \
|
||||
macro(do, do_, "do") \
|
||||
macro(done, done, "done") \
|
||||
macro(dotall, dotall, "dotall") \
|
||||
macro(dotGenerator, dotGenerator, ".generator") \
|
||||
macro(dotThis, dotThis, ".this") \
|
||||
macro(each, each, "each") \
|
||||
|
|
|
@ -49,6 +49,7 @@ JS_STATIC_ASSERT(GlobalFlag == JSREG_GLOB);
|
|||
JS_STATIC_ASSERT(MultilineFlag == JSREG_MULTILINE);
|
||||
JS_STATIC_ASSERT(StickyFlag == JSREG_STICKY);
|
||||
JS_STATIC_ASSERT(UnicodeFlag == JSREG_UNICODE);
|
||||
JS_STATIC_ASSERT(DotAllFlag == JSREG_DOTALL);
|
||||
|
||||
RegExpObject*
|
||||
js::RegExpAlloc(ExclusiveContext* cx, HandleObject proto /* = nullptr */)
|
||||
|
@ -267,7 +268,7 @@ RegExpObject::create(ExclusiveContext* cx, HandleAtom source, RegExpFlag flags,
|
|||
tokenStream = dummyTokenStream.ptr();
|
||||
}
|
||||
|
||||
if (!irregexp::ParsePatternSyntax(*tokenStream, alloc, source, flags & UnicodeFlag))
|
||||
if (!irregexp::ParsePatternSyntax(*tokenStream, alloc, source, flags & UnicodeFlag, flags & DotAllFlag))
|
||||
return nullptr;
|
||||
|
||||
Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx));
|
||||
|
@ -1017,7 +1018,7 @@ RegExpShared::compile(JSContext* cx, HandleAtom pattern, HandleLinearString inpu
|
|||
irregexp::RegExpCompileData data;
|
||||
if (!irregexp::ParsePattern(dummyTokenStream, cx->tempLifoAlloc(), pattern,
|
||||
multiline(), mode == MatchOnly, unicode(), ignoreCase(),
|
||||
global(), sticky(), &data))
|
||||
global(), sticky(), dotall(), &data))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -53,16 +53,18 @@ enum RegExpFlag
|
|||
MultilineFlag = 0x04,
|
||||
StickyFlag = 0x08,
|
||||
UnicodeFlag = 0x10,
|
||||
DotAllFlag = 0x20,
|
||||
|
||||
NoFlags = 0x00,
|
||||
AllFlags = 0x1f
|
||||
AllFlags = 0x3f
|
||||
};
|
||||
|
||||
static_assert(IgnoreCaseFlag == REGEXP_IGNORECASE_FLAG &&
|
||||
GlobalFlag == REGEXP_GLOBAL_FLAG &&
|
||||
MultilineFlag == REGEXP_MULTILINE_FLAG &&
|
||||
StickyFlag == REGEXP_STICKY_FLAG &&
|
||||
UnicodeFlag == REGEXP_UNICODE_FLAG,
|
||||
UnicodeFlag == REGEXP_UNICODE_FLAG &&
|
||||
DotAllFlag == REGEXP_DOTALL_FLAG,
|
||||
"Flag values should be in sync with self-hosted JS");
|
||||
|
||||
enum RegExpRunStatus
|
||||
|
@ -193,6 +195,7 @@ class RegExpShared
|
|||
bool multiline() const { return flags & MultilineFlag; }
|
||||
bool sticky() const { return flags & StickyFlag; }
|
||||
bool unicode() const { return flags & UnicodeFlag; }
|
||||
bool dotall() const { return flags & DotAllFlag; }
|
||||
|
||||
bool isCompiled(CompilationMode mode, bool latin1,
|
||||
ForceByteCodeEnum force = DontForceByteCode) const {
|
||||
|
@ -480,6 +483,7 @@ class RegExpObject : public NativeObject
|
|||
bool multiline() const { return getFlags() & MultilineFlag; }
|
||||
bool sticky() const { return getFlags() & StickyFlag; }
|
||||
bool unicode() const { return getFlags() & UnicodeFlag; }
|
||||
bool dotall() const { return getFlags() & DotAllFlag; }
|
||||
|
||||
static bool isOriginalFlagGetter(JSNative native, RegExpFlag* mask);
|
||||
|
||||
|
|
Loading…
Reference in New Issue