Mypal/js/src/frontend/SyntaxParseHandler.h

606 lines
24 KiB
C++

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=8 sts=4 et sw=4 tw=99:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef frontend_SyntaxParseHandler_h
#define frontend_SyntaxParseHandler_h
#include "mozilla/Attributes.h"
#include "frontend/ParseNode.h"
#include "frontend/TokenStream.h"
namespace js {
namespace frontend {
template <typename ParseHandler>
class Parser;
// Parse handler used when processing the syntax in a block of code, to generate
// the minimal information which is required to detect syntax errors and allow
// bytecode to be emitted for outer functions.
//
// When parsing, we start at the top level with a full parse, and when possible
// only check the syntax for inner functions, so that they can be lazily parsed
// into bytecode when/if they first run. Checking the syntax of a function is
// several times faster than doing a full parse/emit, and lazy parsing improves
// both performance and memory usage significantly when pages contain large
// amounts of code that never executes (which happens often).
class SyntaxParseHandler
{
// Remember the last encountered name or string literal during syntax parses.
JSAtom* lastAtom;
TokenPos lastStringPos;
TokenStream& tokenStream;
public:
enum Node {
NodeFailure = 0,
NodeGeneric,
NodeGetProp,
NodeStringExprStatement,
NodeReturn,
NodeBreak,
NodeThrow,
NodeEmptyStatement,
NodeVarDeclaration,
NodeLexicalDeclaration,
NodeFunctionDefinition,
// This is needed for proper assignment-target handling. ES6 formally
// requires function calls *not* pass IsValidSimpleAssignmentTarget,
// but at last check there were still sites with |f() = 5| and similar
// in code not actually executed (or at least not executed enough to be
// noticed).
NodeFunctionCall,
// Nodes representing *parenthesized* IsValidSimpleAssignmentTarget
// nodes. We can't simply treat all such parenthesized nodes
// identically, because in assignment and increment/decrement contexts
// ES6 says that parentheses constitute a syntax error.
//
// var obj = {};
// var val;
// (val) = 3; (obj.prop) = 4; // okay per ES5's little mind
// [(a)] = [3]; [(obj.prop)] = [4]; // invalid ES6 syntax
// // ...and so on for the other IsValidSimpleAssignmentTarget nodes
//
// We don't know in advance in the current parser when we're parsing
// in a place where name parenthesization changes meaning, so we must
// have multiple node values for these cases.
NodeParenthesizedArgumentsName,
NodeParenthesizedEvalName,
NodeParenthesizedName,
NodeDottedProperty,
NodeElement,
// Destructuring target patterns can't be parenthesized: |([a]) = [3];|
// must be a syntax error. (We can't use NodeGeneric instead of these
// because that would trigger invalid-left-hand-side ReferenceError
// semantics when SyntaxError semantics are desired.)
NodeParenthesizedArray,
NodeParenthesizedObject,
// In rare cases a parenthesized |node| doesn't have the same semantics
// as |node|. Each such node has a special Node value, and we use a
// different Node value to represent the parenthesized form. See also
// is{Unp,P}arenthesized*(Node), parenthesize(Node), and the various
// functions that deal in NodeUnparenthesized* below.
// Nodes representing unparenthesized names.
NodeUnparenthesizedArgumentsName,
NodeUnparenthesizedAsyncName,
NodeUnparenthesizedEvalName,
NodeUnparenthesizedName,
// Valuable for recognizing potential destructuring patterns.
NodeUnparenthesizedArray,
NodeUnparenthesizedObject,
// The directive prologue at the start of a FunctionBody or ScriptBody
// is the longest sequence (possibly empty) of string literal
// expression statements at the start of a function. Thus we need this
// to treat |"use strict";| as a possible Use Strict Directive and
// |("use strict");| as a useless statement.
NodeUnparenthesizedString,
// Legacy generator expressions of the form |(expr for (...))| and
// array comprehensions of the form |[expr for (...)]|) don't permit
// |expr| to be a comma expression. Thus we need this to treat
// |(a(), b for (x in []))| as a syntax error and
// |((a(), b) for (x in []))| as a generator that calls |a| and then
// yields |b| each time it's resumed.
NodeUnparenthesizedCommaExpr,
// Assignment expressions in condition contexts could be typos for
// equality checks. (Think |if (x = y)| versus |if (x == y)|.) Thus
// we need this to treat |if (x = y)| as a possible typo and
// |if ((x = y))| as a deliberate assignment within a condition.
//
// (Technically this isn't needed, as these are *only* extraWarnings
// warnings, and parsing with that option disables syntax parsing. But
// it seems best to be consistent, and perhaps the syntax parser will
// eventually enforce extraWarnings and will require this then.)
NodeUnparenthesizedAssignment,
// This node is necessary to determine if the base operand in an
// exponentiation operation is an unparenthesized unary expression.
// We want to reject |-2 ** 3|, but still need to allow |(-2) ** 3|.
NodeUnparenthesizedUnary,
// This node is necessary to determine if the LHS of a property access is
// super related.
NodeSuperBase
};
bool isPropertyAccess(Node node) {
return node == NodeDottedProperty || node == NodeElement;
}
bool isFunctionCall(Node node) {
// Note: super() is a special form, *not* a function call.
return node == NodeFunctionCall;
}
static bool isUnparenthesizedDestructuringPattern(Node node) {
return node == NodeUnparenthesizedArray || node == NodeUnparenthesizedObject;
}
static bool isParenthesizedDestructuringPattern(Node node) {
// Technically this isn't a destructuring target at all -- the grammar
// doesn't treat it as such. But we need to know when this happens to
// consider it a SyntaxError rather than an invalid-left-hand-side
// ReferenceError.
return node == NodeParenthesizedArray || node == NodeParenthesizedObject;
}
static bool isDestructuringPatternAnyParentheses(Node node) {
return isUnparenthesizedDestructuringPattern(node) ||
isParenthesizedDestructuringPattern(node);
}
public:
SyntaxParseHandler(ExclusiveContext* cx, LifoAlloc& alloc,
TokenStream& tokenStream, Parser<SyntaxParseHandler>* syntaxParser,
LazyScript* lazyOuterFunction)
: lastAtom(nullptr),
tokenStream(tokenStream)
{}
static Node null() { return NodeFailure; }
void prepareNodeForMutation(Node node) {}
void freeTree(Node node) {}
void trace(JSTracer* trc) {}
Node newName(PropertyName* name, const TokenPos& pos, ExclusiveContext* cx) {
lastAtom = name;
if (name == cx->names().arguments)
return NodeUnparenthesizedArgumentsName;
if (name == cx->names().async)
return NodeUnparenthesizedAsyncName;
if (name == cx->names().eval)
return NodeUnparenthesizedEvalName;
return NodeUnparenthesizedName;
}
Node newComputedName(Node expr, uint32_t start, uint32_t end) {
return NodeGeneric;
}
Node newObjectLiteralPropertyName(JSAtom* atom, const TokenPos& pos) {
return NodeUnparenthesizedName;
}
Node newNumber(double value, DecimalPoint decimalPoint, const TokenPos& pos) { return NodeGeneric; }
Node newBooleanLiteral(bool cond, const TokenPos& pos) { return NodeGeneric; }
Node newStringLiteral(JSAtom* atom, const TokenPos& pos) {
lastAtom = atom;
lastStringPos = pos;
return NodeUnparenthesizedString;
}
Node newTemplateStringLiteral(JSAtom* atom, const TokenPos& pos) {
return NodeGeneric;
}
Node newCallSiteObject(uint32_t begin) {
return NodeGeneric;
}
void addToCallSiteObject(Node callSiteObj, Node rawNode, Node cookedNode) {}
Node newThisLiteral(const TokenPos& pos, Node thisName) { return NodeGeneric; }
Node newNullLiteral(const TokenPos& pos) { return NodeGeneric; }
template <class Boxer>
Node newRegExp(RegExpObject* reobj, const TokenPos& pos, Boxer& boxer) { return NodeGeneric; }
Node newConditional(Node cond, Node thenExpr, Node elseExpr) { return NodeGeneric; }
Node newElision() { return NodeGeneric; }
Node newDelete(uint32_t begin, Node expr) {
return NodeUnparenthesizedUnary;
}
Node newTypeof(uint32_t begin, Node kid) {
return NodeUnparenthesizedUnary;
}
Node newUnary(ParseNodeKind kind, JSOp op, uint32_t begin, Node kid) {
return NodeUnparenthesizedUnary;
}
Node newUpdate(ParseNodeKind kind, uint32_t begin, Node kid) {
return NodeGeneric;
}
Node newSpread(uint32_t begin, Node kid) {
return NodeGeneric;
}
Node newArrayPush(uint32_t begin, Node kid) {
return NodeGeneric;
}
Node newBinary(ParseNodeKind kind, JSOp op = JSOP_NOP) { return NodeGeneric; }
Node newBinary(ParseNodeKind kind, Node left, JSOp op = JSOP_NOP) { return NodeGeneric; }
Node newBinary(ParseNodeKind kind, Node left, Node right, JSOp op = JSOP_NOP) {
return NodeGeneric;
}
Node appendOrCreateList(ParseNodeKind kind, Node left, Node right,
ParseContext* pc, JSOp op = JSOP_NOP) {
return NodeGeneric;
}
Node newTernary(ParseNodeKind kind, Node first, Node second, Node third, JSOp op = JSOP_NOP) {
return NodeGeneric;
}
// Expressions
Node newArrayComprehension(Node body, const TokenPos& pos) { return NodeGeneric; }
Node newArrayLiteral(uint32_t begin) { return NodeUnparenthesizedArray; }
MOZ_MUST_USE bool addElision(Node literal, const TokenPos& pos) { return true; }
MOZ_MUST_USE bool addSpreadElement(Node literal, uint32_t begin, Node inner) { return true; }
void addArrayElement(Node literal, Node element) { }
Node newCall() { return NodeFunctionCall; }
Node newTaggedTemplate() { return NodeGeneric; }
Node newObjectLiteral(uint32_t begin) { return NodeUnparenthesizedObject; }
Node newClassMethodList(uint32_t begin) { return NodeGeneric; }
Node newClassNames(Node outer, Node inner, const TokenPos& pos) { return NodeGeneric; }
Node newClass(Node name, Node heritage, Node methodBlock) { return NodeGeneric; }
Node newNewTarget(Node newHolder, Node targetHolder) { return NodeGeneric; }
Node newPosHolder(const TokenPos& pos) { return NodeGeneric; }
Node newSuperBase(Node thisName, const TokenPos& pos) { return NodeSuperBase; }
MOZ_MUST_USE bool addPrototypeMutation(Node literal, uint32_t begin, Node expr) { return true; }
MOZ_MUST_USE bool addPropertyDefinition(Node literal, Node name, Node expr) { return true; }
MOZ_MUST_USE bool addShorthand(Node literal, Node name, Node expr) { return true; }
MOZ_MUST_USE bool addObjectMethodDefinition(Node literal, Node name, Node fn, JSOp op) { return true; }
MOZ_MUST_USE bool addClassMethodDefinition(Node literal, Node name, Node fn, JSOp op, bool isStatic) { return true; }
Node newYieldExpression(uint32_t begin, Node value, Node gen) { return NodeGeneric; }
Node newYieldStarExpression(uint32_t begin, Node value, Node gen) { return NodeGeneric; }
Node newAwaitExpression(uint32_t begin, Node value, Node gen) { return NodeGeneric; }
// Statements
Node newStatementList(const TokenPos& pos) { return NodeGeneric; }
void addStatementToList(Node list, Node stmt) {}
void addCaseStatementToList(Node list, Node stmt) {}
MOZ_MUST_USE bool prependInitialYield(Node stmtList, Node gen) { return true; }
Node newEmptyStatement(const TokenPos& pos) { return NodeEmptyStatement; }
Node newSetThis(Node thisName, Node value) { return value; }
Node newExprStatement(Node expr, uint32_t end) {
return expr == NodeUnparenthesizedString ? NodeStringExprStatement : NodeGeneric;
}
Node newIfStatement(uint32_t begin, Node cond, Node then, Node else_) { return NodeGeneric; }
Node newDoWhileStatement(Node body, Node cond, const TokenPos& pos) { return NodeGeneric; }
Node newWhileStatement(uint32_t begin, Node cond, Node body) { return NodeGeneric; }
Node newSwitchStatement(uint32_t begin, Node discriminant, Node caseList) { return NodeGeneric; }
Node newCaseOrDefault(uint32_t begin, Node expr, Node body) { return NodeGeneric; }
Node newContinueStatement(PropertyName* label, const TokenPos& pos) { return NodeGeneric; }
Node newBreakStatement(PropertyName* label, const TokenPos& pos) { return NodeBreak; }
Node newReturnStatement(Node expr, const TokenPos& pos) { return NodeReturn; }
Node newWithStatement(uint32_t begin, Node expr, Node body) { return NodeGeneric; }
Node newLabeledStatement(PropertyName* label, Node stmt, uint32_t begin) {
return NodeGeneric;
}
Node newThrowStatement(Node expr, const TokenPos& pos) { return NodeThrow; }
Node newTryStatement(uint32_t begin, Node body, Node catchList, Node finallyBlock) {
return NodeGeneric;
}
Node newDebuggerStatement(const TokenPos& pos) { return NodeGeneric; }
Node newPropertyAccess(Node pn, PropertyName* name, uint32_t end) {
lastAtom = name;
return NodeDottedProperty;
}
Node newPropertyByValue(Node pn, Node kid, uint32_t end) { return NodeElement; }
MOZ_MUST_USE bool addCatchBlock(Node catchList, Node letBlock, Node catchName,
Node catchGuard, Node catchBody) { return true; }
MOZ_MUST_USE bool setLastFunctionFormalParameterDefault(Node funcpn, Node pn) { return true; }
void checkAndSetIsDirectRHSAnonFunction(Node pn) {}
Node newFunctionStatement() { return NodeFunctionDefinition; }
Node newFunctionExpression() { return NodeFunctionDefinition; }
Node newArrowFunction() { return NodeFunctionDefinition; }
bool setComprehensionLambdaBody(Node pn, Node body) { return true; }
void setFunctionFormalParametersAndBody(Node pn, Node kid) {}
void setFunctionBody(Node pn, Node kid) {}
void setFunctionBox(Node pn, FunctionBox* funbox) {}
void addFunctionFormalParameter(Node pn, Node argpn) {}
Node newForStatement(uint32_t begin, Node forHead, Node body, unsigned iflags) {
return NodeGeneric;
}
Node newComprehensionFor(uint32_t begin, Node forHead, Node body) {
return NodeGeneric;
}
Node newComprehensionBinding(Node kid) {
// Careful: we're asking this well after the name was parsed, so the
// value returned may not correspond to |kid|'s actual name. But it
// *will* be truthy iff |kid| was a name, so we're safe.
MOZ_ASSERT(isUnparenthesizedName(kid));
return NodeGeneric;
}
Node newForHead(Node init, Node test, Node update, const TokenPos& pos) {
return NodeGeneric;
}
Node newForInOrOfHead(ParseNodeKind kind, Node target, Node iteratedExpr, const TokenPos& pos) {
return NodeGeneric;
}
MOZ_MUST_USE bool finishInitializerAssignment(Node pn, Node init) { return true; }
void setBeginPosition(Node pn, Node oth) {}
void setBeginPosition(Node pn, uint32_t begin) {}
void setEndPosition(Node pn, Node oth) {}
void setEndPosition(Node pn, uint32_t end) {}
void setPosition(Node pn, const TokenPos& pos) {}
TokenPos getPosition(Node pn) {
return tokenStream.currentToken().pos;
}
Node newList(ParseNodeKind kind, JSOp op = JSOP_NOP) {
MOZ_ASSERT(kind != PNK_VAR);
MOZ_ASSERT(kind != PNK_LET);
MOZ_ASSERT(kind != PNK_CONST);
return NodeGeneric;
}
Node newList(ParseNodeKind kind, uint32_t begin, JSOp op = JSOP_NOP) {
return newList(kind, op);
}
Node newList(ParseNodeKind kind, Node kid, JSOp op = JSOP_NOP) {
return newList(kind, op);
}
Node newDeclarationList(ParseNodeKind kind, JSOp op = JSOP_NOP) {
if (kind == PNK_VAR)
return NodeVarDeclaration;
MOZ_ASSERT(kind == PNK_LET || kind == PNK_CONST);
return NodeLexicalDeclaration;
}
Node newDeclarationList(ParseNodeKind kind, Node kid, JSOp op = JSOP_NOP) {
return newDeclarationList(kind, op);
}
bool isDeclarationList(Node node) {
return node == NodeVarDeclaration || node == NodeLexicalDeclaration;
}
Node singleBindingFromDeclaration(Node decl) {
MOZ_ASSERT(isDeclarationList(decl));
// This is, unfortunately, very dodgy. Obviously NodeVarDeclaration
// and NodeLexicalDeclaration can store no info on the arbitrary
// number of bindings it could contain.
//
// But this method is called only for cloning for-in/of declarations
// as initialization targets. That context simplifies matters. If the
// binding is a single name, it'll always syntax-parse (or it would
// already have been rejected as assigning/binding a forbidden name).
// Otherwise the binding is a destructuring pattern. But syntax
// parsing would *already* have aborted when it saw a destructuring
// pattern. So we can just say any old thing here, because the only
// time we'll be wrong is a case that syntax parsing has already
// rejected. Use NodeUnparenthesizedName so the SyntaxParseHandler
// Parser::cloneLeftHandSide can assert it sees only this.
return NodeUnparenthesizedName;
}
Node newCatchList() {
return newList(PNK_CATCHLIST, JSOP_NOP);
}
Node newCommaExpressionList(Node kid) {
return NodeUnparenthesizedCommaExpr;
}
void addList(Node list, Node kid) {
MOZ_ASSERT(list == NodeGeneric ||
list == NodeUnparenthesizedArray ||
list == NodeUnparenthesizedObject ||
list == NodeUnparenthesizedCommaExpr ||
list == NodeVarDeclaration ||
list == NodeLexicalDeclaration ||
list == NodeFunctionCall);
}
Node newAssignment(ParseNodeKind kind, Node lhs, Node rhs, JSOp op) {
if (kind == PNK_ASSIGN)
return NodeUnparenthesizedAssignment;
return newBinary(kind, lhs, rhs, op);
}
bool isUnparenthesizedCommaExpression(Node node) {
return node == NodeUnparenthesizedCommaExpr;
}
bool isUnparenthesizedAssignment(Node node) {
return node == NodeUnparenthesizedAssignment;
}
bool isUnparenthesizedUnaryExpression(Node node) {
return node == NodeUnparenthesizedUnary;
}
bool isReturnStatement(Node node) {
return node == NodeReturn;
}
bool isStatementPermittedAfterReturnStatement(Node pn) {
return pn == NodeFunctionDefinition || pn == NodeVarDeclaration ||
pn == NodeBreak ||
pn == NodeThrow ||
pn == NodeEmptyStatement;
}
bool isSuperBase(Node pn) {
return pn == NodeSuperBase;
}
void setOp(Node pn, JSOp op) {}
void setListFlag(Node pn, unsigned flag) {}
MOZ_MUST_USE Node parenthesize(Node node) {
// A number of nodes have different behavior upon parenthesization, but
// only in some circumstances. Convert these nodes to special
// parenthesized forms.
if (node == NodeUnparenthesizedArgumentsName)
return NodeParenthesizedArgumentsName;
if (node == NodeUnparenthesizedEvalName)
return NodeParenthesizedEvalName;
if (node == NodeUnparenthesizedName || node == NodeUnparenthesizedAsyncName)
return NodeParenthesizedName;
if (node == NodeUnparenthesizedArray)
return NodeParenthesizedArray;
if (node == NodeUnparenthesizedObject)
return NodeParenthesizedObject;
// Other nodes need not be recognizable after parenthesization; convert
// them to a generic node.
if (node == NodeUnparenthesizedString ||
node == NodeUnparenthesizedCommaExpr ||
node == NodeUnparenthesizedAssignment ||
node == NodeUnparenthesizedUnary)
{
return NodeGeneric;
}
// In all other cases, the parenthesized form of |node| is equivalent
// to the unparenthesized form: return |node| unchanged.
return node;
}
MOZ_MUST_USE Node setLikelyIIFE(Node pn) {
return pn; // Remain in syntax-parse mode.
}
void setInDirectivePrologue(Node pn) {}
bool isConstant(Node pn) { return false; }
bool isUnparenthesizedName(Node node) {
return node == NodeUnparenthesizedArgumentsName ||
node == NodeUnparenthesizedAsyncName ||
node == NodeUnparenthesizedEvalName ||
node == NodeUnparenthesizedName;
}
bool isNameAnyParentheses(Node node) {
if (isUnparenthesizedName(node))
return true;
return node == NodeParenthesizedArgumentsName ||
node == NodeParenthesizedEvalName ||
node == NodeParenthesizedName;
}
bool nameIsEvalAnyParentheses(Node node, ExclusiveContext* cx) {
MOZ_ASSERT(isNameAnyParentheses(node),
"must only call this function on known names");
return node == NodeUnparenthesizedEvalName || node == NodeParenthesizedEvalName;
}
const char* nameIsArgumentsEvalAnyParentheses(Node node, ExclusiveContext* cx) {
MOZ_ASSERT(isNameAnyParentheses(node),
"must only call this method on known names");
if (nameIsEvalAnyParentheses(node, cx))
return js_eval_str;
if (node == NodeUnparenthesizedArgumentsName || node == NodeParenthesizedArgumentsName)
return js_arguments_str;
return nullptr;
}
bool nameIsUnparenthesizedAsync(Node node, ExclusiveContext* cx) {
MOZ_ASSERT(isNameAnyParentheses(node),
"must only call this function on known names");
return node == NodeUnparenthesizedAsyncName;
}
PropertyName* maybeDottedProperty(Node node) {
// Note: |super.apply(...)| is a special form that calls an "apply"
// method retrieved from one value, but using a *different* value as
// |this|. It's not really eligible for the funapply/funcall
// optimizations as they're currently implemented (assuming a single
// value is used for both retrieval and |this|).
if (node != NodeDottedProperty)
return nullptr;
return lastAtom->asPropertyName();
}
JSAtom* isStringExprStatement(Node pn, TokenPos* pos) {
if (pn == NodeStringExprStatement) {
*pos = lastStringPos;
return lastAtom;
}
return nullptr;
}
bool canSkipLazyInnerFunctions() {
return false;
}
bool canSkipLazyClosedOverBindings() {
return false;
}
JSAtom* nextLazyClosedOverBinding() {
MOZ_CRASH("SyntaxParseHandler::canSkipLazyClosedOverBindings must return false");
}
void adjustGetToSet(Node node) {}
void disableSyntaxParser() {
}
};
} // namespace frontend
} // namespace js
#endif /* frontend_SyntaxParseHandler_h */