Mypal/js/src/frontend/SyntaxParseHandler.h

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
 * vim: set ts=8 sts=4 et sw=4 tw=99:
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef frontend_SyntaxParseHandler_h
#define frontend_SyntaxParseHandler_h

#include "mozilla/Attributes.h"

#include "frontend/ParseNode.h"
#include "frontend/TokenStream.h"

namespace js {
namespace frontend {

template <typename ParseHandler>
class Parser;

// Parse handler used when processing the syntax in a block of code, to generate
// the minimal information which is required to detect syntax errors and allow
// bytecode to be emitted for outer functions.
//
// When parsing, we start at the top level with a full parse, and when possible
// only check the syntax for inner functions, so that they can be lazily parsed
// into bytecode when/if they first run. Checking the syntax of a function is
// several times faster than doing a full parse/emit, and lazy parsing improves
// both performance and memory usage significantly when pages contain large
// amounts of code that never executes (which happens often).
class SyntaxParseHandler
{
    // Remember the last encountered name or string literal during syntax parses.
    JSAtom* lastAtom;
    TokenPos lastStringPos;
    TokenStream& tokenStream;

  public:
    enum Node {
        NodeFailure = 0,
        NodeGeneric,
        NodeGetProp,
        NodeStringExprStatement,
        NodeReturn,
        NodeBreak,
        NodeThrow,
        NodeEmptyStatement,

        NodeVarDeclaration,
        NodeLexicalDeclaration,

        NodeFunctionDefinition,

        // This is needed for proper assignment-target handling.  ES6 formally
        // requires function calls *not* pass IsValidSimpleAssignmentTarget,
        // but at last check there were still sites with |f() = 5| and similar
        // in code not actually executed (or at least not executed enough to be
        // noticed).
        NodeFunctionCall,

        // Nodes representing *parenthesized* IsValidSimpleAssignmentTarget
        // nodes.  We can't simply treat all such parenthesized nodes
        // identically, because in assignment and increment/decrement contexts
        // ES6 says that parentheses constitute a syntax error.
        //
        //   var obj = {};
        //   var val;
        //   (val) = 3; (obj.prop) = 4;       // okay per ES5's little mind
        //   [(a)] = [3]; [(obj.prop)] = [4]; // invalid ES6 syntax
        //   // ...and so on for the other IsValidSimpleAssignmentTarget nodes
        //
        // We don't know in advance in the current parser when we're parsing
        // in a place where name parenthesization changes meaning, so we must
        // have multiple node values for these cases.
        NodeParenthesizedArgumentsName,
        NodeParenthesizedEvalName,
        NodeParenthesizedName,

        NodeDottedProperty,
        NodeElement,

        // Destructuring target patterns can't be parenthesized: |([a]) = [3];|
        // must be a syntax error.  (We can't use NodeGeneric instead of these
        // because that would trigger invalid-left-hand-side ReferenceError
        // semantics when SyntaxError semantics are desired.)
        NodeParenthesizedArray,
        NodeParenthesizedObject,

        // In rare cases a parenthesized |node| doesn't have the same semantics
        // as |node|.  Each such node has a special Node value, and we use a
        // different Node value to represent the parenthesized form.  See also
        // is{Unp,P}arenthesized*(Node), parenthesize(Node), and the various
        // functions that deal in NodeUnparenthesized* below.

        // Nodes representing unparenthesized names.
        NodeUnparenthesizedArgumentsName,
        NodeUnparenthesizedAsyncName,
        NodeUnparenthesizedEvalName,
        NodeUnparenthesizedName,

        // Valuable for recognizing potential destructuring patterns.
        NodeUnparenthesizedArray,
        NodeUnparenthesizedObject,

        // The directive prologue at the start of a FunctionBody or ScriptBody
        // is the longest sequence (possibly empty) of string literal
        // expression statements at the start of a function.  Thus we need this
        // to treat |"use strict";| as a possible Use Strict Directive and
        // |("use strict");| as a useless statement.
        NodeUnparenthesizedString,

        // Legacy generator expressions of the form |(expr for (...))| and
        // array comprehensions of the form |[expr for (...)]|) don't permit
        // |expr| to be a comma expression.  Thus we need this to treat
        // |(a(), b for (x in []))| as a syntax error and
        // |((a(), b) for (x in []))| as a generator that calls |a| and then
        // yields |b| each time it's resumed.
        NodeUnparenthesizedCommaExpr,

        // Assignment expressions in condition contexts could be typos for
        // equality checks.  (Think |if (x = y)| versus |if (x == y)|.)  Thus
        // we need this to treat |if (x = y)| as a possible typo and
        // |if ((x = y))| as a deliberate assignment within a condition.
        //
        // (Technically this isn't needed, as these are *only* extraWarnings
        // warnings, and parsing with that option disables syntax parsing.  But
        // it seems best to be consistent, and perhaps the syntax parser will
        // eventually enforce extraWarnings and will require this then.)
        NodeUnparenthesizedAssignment,

        // This node is necessary to determine if the base operand in an
        // exponentiation operation is an unparenthesized unary expression.
        // We want to reject |-2 ** 3|, but still need to allow |(-2) ** 3|.
        NodeUnparenthesizedUnary,

        // This node is necessary to determine if the LHS of a property access is
        // super related.
        NodeSuperBase
    };

    bool isPropertyAccess(Node node) {
        return node == NodeDottedProperty || node == NodeElement;
    }

    bool isFunctionCall(Node node) {
        // Note: super() is a special form, *not* a function call.
        return node == NodeFunctionCall;
    }

    static bool isUnparenthesizedDestructuringPattern(Node node) {
        return node == NodeUnparenthesizedArray || node == NodeUnparenthesizedObject;
    }

    static bool isParenthesizedDestructuringPattern(Node node) {
        // Technically this isn't a destructuring target at all -- the grammar
        // doesn't treat it as such.  But we need to know when this happens to
        // consider it a SyntaxError rather than an invalid-left-hand-side
        // ReferenceError.
        return node == NodeParenthesizedArray || node == NodeParenthesizedObject;
    }

    static bool isDestructuringPatternAnyParentheses(Node node) {
        return isUnparenthesizedDestructuringPattern(node) ||
                isParenthesizedDestructuringPattern(node);
    }

  public:
    SyntaxParseHandler(ExclusiveContext* cx, LifoAlloc& alloc,
                       TokenStream& tokenStream, Parser<SyntaxParseHandler>* syntaxParser,
                       LazyScript* lazyOuterFunction)
      : lastAtom(nullptr),
        tokenStream(tokenStream)
    {}

    static Node null() { return NodeFailure; }

    void prepareNodeForMutation(Node node) {}
    void freeTree(Node node) {}

    void trace(JSTracer* trc) {}

    Node newName(PropertyName* name, const TokenPos& pos, ExclusiveContext* cx) {
        lastAtom = name;
        if (name == cx->names().arguments)
            return NodeUnparenthesizedArgumentsName;
        if (name == cx->names().async)
            return NodeUnparenthesizedAsyncName;
        if (name == cx->names().eval)
            return NodeUnparenthesizedEvalName;
        return NodeUnparenthesizedName;
    }

    Node newComputedName(Node expr, uint32_t start, uint32_t end) {
        return NodeGeneric;
    }

    Node newObjectLiteralPropertyName(JSAtom* atom, const TokenPos& pos) {
        return NodeUnparenthesizedName;
    }

    Node newNumber(double value, DecimalPoint decimalPoint, const TokenPos& pos) { return NodeGeneric; }
    Node newBooleanLiteral(bool cond, const TokenPos& pos) { return NodeGeneric; }

    Node newStringLiteral(JSAtom* atom, const TokenPos& pos) {
        lastAtom = atom;
        lastStringPos = pos;
        return NodeUnparenthesizedString;
    }

    Node newTemplateStringLiteral(JSAtom* atom, const TokenPos& pos) {
        return NodeGeneric;
    }

    Node newCallSiteObject(uint32_t begin) {
        return NodeGeneric;
    }

    void addToCallSiteObject(Node callSiteObj, Node rawNode, Node cookedNode) {}

    Node newThisLiteral(const TokenPos& pos, Node thisName) { return NodeGeneric; }
    Node newNullLiteral(const TokenPos& pos) { return NodeGeneric; }

    template <class Boxer>
    Node newRegExp(RegExpObject* reobj, const TokenPos& pos, Boxer& boxer) { return NodeGeneric; }

    Node newConditional(Node cond, Node thenExpr, Node elseExpr) { return NodeGeneric; }

    Node newElision() { return NodeGeneric; }

    Node newDelete(uint32_t begin, Node expr) {
        return NodeUnparenthesizedUnary;
    }

    Node newTypeof(uint32_t begin, Node kid) {
        return NodeUnparenthesizedUnary;
    }

    Node newUnary(ParseNodeKind kind, JSOp op, uint32_t begin, Node kid) {
        return NodeUnparenthesizedUnary;
    }

    Node newUpdate(ParseNodeKind kind, uint32_t begin, Node kid) {
        return NodeGeneric;
    }

    Node newSpread(uint32_t begin, Node kid) {
        return NodeGeneric;
    }

    Node newArrayPush(uint32_t begin, Node kid) {
        return NodeGeneric;
    }

    Node newBinary(ParseNodeKind kind, JSOp op = JSOP_NOP) { return NodeGeneric; }
    Node newBinary(ParseNodeKind kind, Node left, JSOp op = JSOP_NOP) { return NodeGeneric; }
    Node newBinary(ParseNodeKind kind, Node left, Node right, JSOp op = JSOP_NOP) {
        return NodeGeneric;
    }
    Node appendOrCreateList(ParseNodeKind kind, Node left, Node right,
                            ParseContext* pc, JSOp op = JSOP_NOP) {
        return NodeGeneric;
    }

    Node newTernary(ParseNodeKind kind, Node first, Node second, Node third, JSOp op = JSOP_NOP) {
        return NodeGeneric;
    }

    // Expressions

    Node newArrayComprehension(Node body, const TokenPos& pos) { return NodeGeneric; }
    Node newArrayLiteral(uint32_t begin) { return NodeUnparenthesizedArray; }
    MOZ_MUST_USE bool addElision(Node literal, const TokenPos& pos) { return true; }
    MOZ_MUST_USE bool addSpreadElement(Node literal, uint32_t begin, Node inner) { return true; }
    void addArrayElement(Node literal, Node element) { }

    Node newCall() { return NodeFunctionCall; }
    Node newTaggedTemplate() { return NodeGeneric; }

    Node newObjectLiteral(uint32_t begin) { return NodeUnparenthesizedObject; }
    Node newClassMethodList(uint32_t begin) { return NodeGeneric; }
    Node newClassNames(Node outer, Node inner, const TokenPos& pos) { return NodeGeneric; }
    Node newClass(Node name, Node heritage, Node methodBlock) { return NodeGeneric; }

    Node newNewTarget(Node newHolder, Node targetHolder) { return NodeGeneric; }
    Node newPosHolder(const TokenPos& pos) { return NodeGeneric; }
    Node newSuperBase(Node thisName, const TokenPos& pos) { return NodeSuperBase; }

    MOZ_MUST_USE bool addPrototypeMutation(Node literal, uint32_t begin, Node expr) { return true; }
    MOZ_MUST_USE bool addPropertyDefinition(Node literal, Node name, Node expr) { return true; }
    MOZ_MUST_USE bool addShorthand(Node literal, Node name, Node expr) { return true; }
    MOZ_MUST_USE bool addObjectMethodDefinition(Node literal, Node name, Node fn, JSOp op) { return true; }
    MOZ_MUST_USE bool addClassMethodDefinition(Node literal, Node name, Node fn, JSOp op, bool isStatic) { return true; }
    Node newYieldExpression(uint32_t begin, Node value, Node gen) { return NodeGeneric; }
    Node newYieldStarExpression(uint32_t begin, Node value, Node gen) { return NodeGeneric; }
    Node newAwaitExpression(uint32_t begin, Node value, Node gen) { return NodeGeneric; }

    // Statements

    Node newStatementList(const TokenPos& pos) { return NodeGeneric; }
    void addStatementToList(Node list, Node stmt) {}
    void addCaseStatementToList(Node list, Node stmt) {}
    MOZ_MUST_USE bool prependInitialYield(Node stmtList, Node gen) { return true; }
    Node newEmptyStatement(const TokenPos& pos) { return NodeEmptyStatement; }

    Node newSetThis(Node thisName, Node value) { return value; }

    Node newExprStatement(Node expr, uint32_t end) {
        return expr == NodeUnparenthesizedString ? NodeStringExprStatement : NodeGeneric;
    }

    Node newIfStatement(uint32_t begin, Node cond, Node then, Node else_) { return NodeGeneric; }
    Node newDoWhileStatement(Node body, Node cond, const TokenPos& pos) { return NodeGeneric; }
    Node newWhileStatement(uint32_t begin, Node cond, Node body) { return NodeGeneric; }
    Node newSwitchStatement(uint32_t begin, Node discriminant, Node caseList) { return NodeGeneric; }
    Node newCaseOrDefault(uint32_t begin, Node expr, Node body) { return NodeGeneric; }
    Node newContinueStatement(PropertyName* label, const TokenPos& pos) { return NodeGeneric; }
    Node newBreakStatement(PropertyName* label, const TokenPos& pos) { return NodeBreak; }
    Node newReturnStatement(Node expr, const TokenPos& pos) { return NodeReturn; }
    Node newWithStatement(uint32_t begin, Node expr, Node body) { return NodeGeneric; }

    Node newLabeledStatement(PropertyName* label, Node stmt, uint32_t begin) {
        return NodeGeneric;
    }

    Node newThrowStatement(Node expr, const TokenPos& pos) { return NodeThrow; }
    Node newTryStatement(uint32_t begin, Node body, Node catchList, Node finallyBlock) {
        return NodeGeneric;
    }
    Node newDebuggerStatement(const TokenPos& pos) { return NodeGeneric; }

    Node newPropertyAccess(Node pn, PropertyName* name, uint32_t end) {
        lastAtom = name;
        return NodeDottedProperty;
    }

    Node newPropertyByValue(Node pn, Node kid, uint32_t end) { return NodeElement; }

    MOZ_MUST_USE bool addCatchBlock(Node catchList, Node letBlock, Node catchName,
                                    Node catchGuard, Node catchBody) { return true; }

    MOZ_MUST_USE bool setLastFunctionFormalParameterDefault(Node funcpn, Node pn) { return true; }

    void checkAndSetIsDirectRHSAnonFunction(Node pn) {}

    Node newFunctionStatement() { return NodeFunctionDefinition; }
    Node newFunctionExpression() { return NodeFunctionDefinition; }
    Node newArrowFunction() { return NodeFunctionDefinition; }

    bool setComprehensionLambdaBody(Node pn, Node body) { return true; }
    void setFunctionFormalParametersAndBody(Node pn, Node kid) {}
    void setFunctionBody(Node pn, Node kid) {}
    void setFunctionBox(Node pn, FunctionBox* funbox) {}
    void addFunctionFormalParameter(Node pn, Node argpn) {}

    Node newForStatement(uint32_t begin, Node forHead, Node body, unsigned iflags) {
        return NodeGeneric;
    }

    Node newComprehensionFor(uint32_t begin, Node forHead, Node body) {
        return NodeGeneric;
    }

    Node newComprehensionBinding(Node kid) {
        // Careful: we're asking this well after the name was parsed, so the
        // value returned may not correspond to |kid|'s actual name.  But it
        // *will* be truthy iff |kid| was a name, so we're safe.
        MOZ_ASSERT(isUnparenthesizedName(kid));
        return NodeGeneric;
    }

    Node newForHead(Node init, Node test, Node update, const TokenPos& pos) {
        return NodeGeneric;
    }

    Node newForInOrOfHead(ParseNodeKind kind, Node target, Node iteratedExpr, const TokenPos& pos) {
        return NodeGeneric;
    }

    MOZ_MUST_USE bool finishInitializerAssignment(Node pn, Node init) { return true; }

    void setBeginPosition(Node pn, Node oth) {}
    void setBeginPosition(Node pn, uint32_t begin) {}

    void setEndPosition(Node pn, Node oth) {}
    void setEndPosition(Node pn, uint32_t end) {}

    void setPosition(Node pn, const TokenPos& pos) {}
    TokenPos getPosition(Node pn) {
        return tokenStream.currentToken().pos;
    }

    Node newList(ParseNodeKind kind, JSOp op = JSOP_NOP) {
        MOZ_ASSERT(kind != PNK_VAR);
        MOZ_ASSERT(kind != PNK_LET);
        MOZ_ASSERT(kind != PNK_CONST);
        return NodeGeneric;
    }
    Node newList(ParseNodeKind kind, uint32_t begin, JSOp op = JSOP_NOP) {
        return newList(kind, op);
    }
    Node newList(ParseNodeKind kind, Node kid, JSOp op = JSOP_NOP) {
        return newList(kind, op);
    }

    Node newDeclarationList(ParseNodeKind kind, JSOp op = JSOP_NOP) {
        if (kind == PNK_VAR)
            return NodeVarDeclaration;
        MOZ_ASSERT(kind == PNK_LET || kind == PNK_CONST);
        return NodeLexicalDeclaration;
    }
    Node newDeclarationList(ParseNodeKind kind, Node kid, JSOp op = JSOP_NOP) {
        return newDeclarationList(kind, op);
    }

    bool isDeclarationList(Node node) {
        return node == NodeVarDeclaration || node == NodeLexicalDeclaration;
    }

    Node singleBindingFromDeclaration(Node decl) {
        MOZ_ASSERT(isDeclarationList(decl));

        // This is, unfortunately, very dodgy.  Obviously NodeVarDeclaration
        // and NodeLexicalDeclaration can store no info on the arbitrary
        // number of bindings it could contain.
        //
        // But this method is called only for cloning for-in/of declarations
        // as initialization targets.  That context simplifies matters.  If the
        // binding is a single name, it'll always syntax-parse (or it would
        // already have been rejected as assigning/binding a forbidden name).
        // Otherwise the binding is a destructuring pattern.  But syntax
        // parsing would *already* have aborted when it saw a destructuring
        // pattern.  So we can just say any old thing here, because the only
        // time we'll be wrong is a case that syntax parsing has already
        // rejected.  Use NodeUnparenthesizedName so the SyntaxParseHandler
        // Parser::cloneLeftHandSide can assert it sees only this.
        return NodeUnparenthesizedName;
    }

    Node newCatchList() {
        return newList(PNK_CATCHLIST, JSOP_NOP);
    }

    Node newCommaExpressionList(Node kid) {
        return NodeUnparenthesizedCommaExpr;
    }

    void addList(Node list, Node kid) {
        MOZ_ASSERT(list == NodeGeneric ||
                   list == NodeUnparenthesizedArray ||
                   list == NodeUnparenthesizedObject ||
                   list == NodeUnparenthesizedCommaExpr ||
                   list == NodeVarDeclaration ||
                   list == NodeLexicalDeclaration ||
                   list == NodeFunctionCall);
    }

    Node newAssignment(ParseNodeKind kind, Node lhs, Node rhs, JSOp op) {
        if (kind == PNK_ASSIGN)
            return NodeUnparenthesizedAssignment;
        return newBinary(kind, lhs, rhs, op);
    }

    bool isUnparenthesizedCommaExpression(Node node) {
        return node == NodeUnparenthesizedCommaExpr;
    }

    bool isUnparenthesizedAssignment(Node node) {
        return node == NodeUnparenthesizedAssignment;
    }

    bool isUnparenthesizedUnaryExpression(Node node) {
        return node == NodeUnparenthesizedUnary;
    }

    bool isReturnStatement(Node node) {
        return node == NodeReturn;
    }

    bool isStatementPermittedAfterReturnStatement(Node pn) {
        return pn == NodeFunctionDefinition || pn == NodeVarDeclaration ||
               pn == NodeBreak ||
               pn == NodeThrow ||
               pn == NodeEmptyStatement;
    }

    bool isSuperBase(Node pn) {
        return pn == NodeSuperBase;
    }

    void setOp(Node pn, JSOp op) {}
    void setListFlag(Node pn, unsigned flag) {}
    MOZ_MUST_USE Node parenthesize(Node node) {
        // A number of nodes have different behavior upon parenthesization, but
        // only in some circumstances.  Convert these nodes to special
        // parenthesized forms.
        if (node == NodeUnparenthesizedArgumentsName)
            return NodeParenthesizedArgumentsName;
        if (node == NodeUnparenthesizedEvalName)
            return NodeParenthesizedEvalName;
        if (node == NodeUnparenthesizedName || node == NodeUnparenthesizedAsyncName)
            return NodeParenthesizedName;

        if (node == NodeUnparenthesizedArray)
            return NodeParenthesizedArray;
        if (node == NodeUnparenthesizedObject)
            return NodeParenthesizedObject;

        // Other nodes need not be recognizable after parenthesization; convert
        // them to a generic node.
        if (node == NodeUnparenthesizedString ||
            node == NodeUnparenthesizedCommaExpr ||
            node == NodeUnparenthesizedAssignment ||
            node == NodeUnparenthesizedUnary)
        {
            return NodeGeneric;
        }

        // In all other cases, the parenthesized form of |node| is equivalent
        // to the unparenthesized form: return |node| unchanged.
        return node;
    }
    MOZ_MUST_USE Node setLikelyIIFE(Node pn) {
        return pn; // Remain in syntax-parse mode.
    }
    void setInDirectivePrologue(Node pn) {}

    bool isConstant(Node pn) { return false; }

    bool isUnparenthesizedName(Node node) {
        return node == NodeUnparenthesizedArgumentsName ||
               node == NodeUnparenthesizedAsyncName ||
               node == NodeUnparenthesizedEvalName ||
               node == NodeUnparenthesizedName;
    }

    bool isNameAnyParentheses(Node node) {
        if (isUnparenthesizedName(node))
            return true;
        return node == NodeParenthesizedArgumentsName ||
               node == NodeParenthesizedEvalName ||
               node == NodeParenthesizedName;
    }

    bool nameIsEvalAnyParentheses(Node node, ExclusiveContext* cx) {
        MOZ_ASSERT(isNameAnyParentheses(node),
                   "must only call this function on known names");
        return node == NodeUnparenthesizedEvalName || node == NodeParenthesizedEvalName;
    }

    const char* nameIsArgumentsEvalAnyParentheses(Node node, ExclusiveContext* cx) {
        MOZ_ASSERT(isNameAnyParentheses(node),
                   "must only call this method on known names");

        if (nameIsEvalAnyParentheses(node, cx))
            return js_eval_str;
        if (node == NodeUnparenthesizedArgumentsName || node == NodeParenthesizedArgumentsName)
            return js_arguments_str;
        return nullptr;
    }

    bool nameIsUnparenthesizedAsync(Node node, ExclusiveContext* cx) {
        MOZ_ASSERT(isNameAnyParentheses(node),
                   "must only call this function on known names");
        return node == NodeUnparenthesizedAsyncName;
    }

    PropertyName* maybeDottedProperty(Node node) {
        // Note: |super.apply(...)| is a special form that calls an "apply"
        // method retrieved from one value, but using a *different* value as
        // |this|.  It's not really eligible for the funapply/funcall
        // optimizations as they're currently implemented (assuming a single
        // value is used for both retrieval and |this|).
        if (node != NodeDottedProperty)
            return nullptr;
        return lastAtom->asPropertyName();
    }

    JSAtom* isStringExprStatement(Node pn, TokenPos* pos) {
        if (pn == NodeStringExprStatement) {
            *pos = lastStringPos;
            return lastAtom;
        }
        return nullptr;
    }

    bool canSkipLazyInnerFunctions() {
        return false;
    }
    bool canSkipLazyClosedOverBindings() {
        return false;
    }
    JSAtom* nextLazyClosedOverBinding() {
        MOZ_CRASH("SyntaxParseHandler::canSkipLazyClosedOverBindings must return false");
    }

    void adjustGetToSet(Node node) {}

    void disableSyntaxParser() {
    }
};

} // namespace frontend
} // namespace js

#endif /* frontend_SyntaxParseHandler_h */