Update Readability from mozilla-central release

2019-07-08 13:07:38 +03:00 · 2019-07-08 13:07:38 +03:00 · 4e76e90d29
parent 74f1a97751
commit 4e76e90d29
11 changed files with 417 additions and 332 deletions
--- a/application/basilisk/base/content/tab-content.js
+++ b/application/basilisk/base/content/tab-content.js
@ -23,6 +23,8 @@ XPCOMUtils.defineLazyModuleGetter(this, "AboutReader",
  "resource://gre/modules/AboutReader.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "ReaderMode",
  "resource://gre/modules/ReaderMode.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "Readerable",
+  "resource://gre/modules/Readerable.jsm");
 XPCOMUtils.defineLazyGetter(this, "SimpleServiceDiscovery", function() {
  let ssdp = Cu.import("resource://gre/modules/SimpleServiceDiscovery.jsm", {}).SimpleServiceDiscovery;
  // Register targets
@ -339,7 +341,7 @@ var AboutReaderListener = {
   * painted is not going to work.
   */
  updateReaderButton: function(forceNonArticle) {
-    if (!ReaderMode.isEnabledForParseOnLoad || this.isAboutReader ||
+    if (!Readerable.isEnabledForParseOnLoad || this.isAboutReader ||
        !content || !(content.document instanceof content.HTMLDocument) ||
        content.document.mozSyntheticDocument) {
      return;
@ -378,7 +380,7 @@ var AboutReaderListener = {
    this.cancelPotentialPendingReadabilityCheck();
    // Only send updates when there are articles; there's no point updating with
    // |false| all the time.
-    if (ReaderMode.isProbablyReaderable(content.document)) {
+    if (Readerable.isProbablyReaderable(content.document)) {
      sendAsyncMessage("Reader:UpdateReaderButton", { isArticle: true });
    } else if (forceNonArticle) {
      sendAsyncMessage("Reader:UpdateReaderButton", { isArticle: false });
--- a/toolkit/components/reader/AboutReader.jsm
+++ b/toolkit/components/reader/AboutReader.jsm
@ -58,6 +58,7 @@ var AboutReader = function(win, articlePromise) {

  this._scrollOffset = win.pageYOffset;

+  doc.addEventListener("mousedown", this);
  doc.addEventListener("click", this);

  win.addEventListener("pagehide", this);
@ -119,6 +120,25 @@ var AboutReader = function(win, articlePromise) {
  }

  this._loadArticle();
+
+  let dropdown = this._toolbarElement;
+
+  let elemL10nMap = {
+    ".minus-button": "minus",
+    ".plus-button": "plus",
+    ".content-width-minus-button": "contentwidthminus",
+    ".content-width-plus-button": "contentwidthplus",
+    ".line-height-minus-button": "lineheightminus",
+    ".line-height-plus-button": "lineheightplus",
+    ".light-button": "colorschemelight",
+    ".dark-button": "colorschemedark",
+    ".sepia-button": "colorschemesepia",
+  };
+
+  for (let [selector, stringID] of Object.entries(elemL10nMap)) {
+    dropdown.querySelector(selector).setAttribute("title",
+      gStrings.GetStringFromName("aboutReader.toolbar." + stringID));
+  }
 };

 AboutReader.prototype = {
@ -191,13 +211,16 @@ AboutReader.prototype = {
    if (!aEvent.isTrusted)
      return;

+    let target = aEvent.target;
    switch (aEvent.type) {
+      case "mousedown":
+        if (!target.closest(".dropdown-popup")) {
+          this._closeDropdowns();
+        }
+        break;
      case "click":
-        let target = aEvent.target;
        if (target.classList.contains("dropdown-toggle")) {
          this._toggleDropdownClicked(aEvent);
-        } else if (!target.closest(".dropdown-popup")) {
-          this._closeDropdowns();
        }
        break;
      case "scroll":
@ -276,13 +299,10 @@ AboutReader.prototype = {
  },

  _setFontSize(newFontSize) {
-    let containerClasses = this._containerElement.classList;
-
-    if (this._fontSize > 0)
-      containerClasses.remove("font-size" + this._fontSize);
-
    this._fontSize = newFontSize;
-    containerClasses.add("font-size" + this._fontSize);
+    let size = (10 + 2 * this._fontSize) + "px";
+
+    this._containerElement.style.setProperty("--font-size", size);
    return AsyncPrefs.set("reader.font_size", this._fontSize);
  },

--- a/toolkit/components/reader/JSDOMParser.js
+++ b/toolkit/components/reader/JSDOMParser.js
@ -691,7 +691,7 @@
              // the attribute value will be HTML escaped.
              var val = attr.value;
              var quote = (val.indexOf('"') === -1 ? '"' : "'");
-              arr.push(" " + attr.name + '=' + quote + val + quote);
+              arr.push(" " + attr.name + "=" + quote + val + quote);
            }

            if (child.localName in voidElems && !child.childNodes.length) {
@ -970,7 +970,7 @@
        strBuf.push(c);
        c = this.nextChar();
      }
-      var tag = strBuf.join('');
+      var tag = strBuf.join("");

      if (!tag)
        return false;
@ -981,7 +981,9 @@
      while (c !== "/" && c !== ">") {
        if (c === undefined)
          return false;
-        while (whitespace.indexOf(this.html[this.currentChar++]) != -1);
+        while (whitespace.indexOf(this.html[this.currentChar++]) != -1) {
+          // Advance cursor to first non-whitespace char.
+        }
        this.currentChar--;
        c = this.nextChar();
        if (c !== "/" && c !== ">") {
--- a/toolkit/components/reader/Readability-readerable.js
+++ b/toolkit/components/reader/Readability-readerable.js
@ -0,0 +1,104 @@
+/* eslint-env es6:false */
+/* globals exports */
+/*
+ * DO NOT MODIFY THIS FILE DIRECTLY!
+ *
+ * This is a shared library that is maintained in an external repo:
+ * https://github.com/mozilla/readability
+ */
+
+/*
+ * Copyright (c) 2010 Arc90 Inc
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This code is heavily based on Arc90's readability.js (1.7.1) script
+ * available at: http://code.google.com/p/arc90labs-readability
+ */
+
+var REGEXPS = {
+  // NOTE: These two regular expressions are duplicated in
+  // Readability.js. Please keep both copies in sync.
+  unlikelyCandidates: /-ad-|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,
+  okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
+};
+
+function isNodeVisible(node) {
+  // Have to null-check node.style to deal with SVG and MathML nodes.
+  return (!node.style || node.style.display != "none") && !node.hasAttribute("hidden");
+}
+
+/**
+ * Decides whether or not the document is reader-able without parsing the whole thing.
+ *
+ * @return boolean Whether or not we suspect Readability.parse() will suceeed at returning an article object.
+ */
+function isProbablyReaderable(doc, isVisible) {
+  if (!isVisible) {
+    isVisible = isNodeVisible;
+  }
+
+  var nodes = doc.querySelectorAll("p, pre");
+
+  // Get <div> nodes which have <br> node(s) and append them into the `nodes` variable.
+  // Some articles' DOM structures might look like
+  // <div>
+  //   Sentences<br>
+  //   <br>
+  //   Sentences<br>
+  // </div>
+  var brNodes = doc.querySelectorAll("div > br");
+  if (brNodes.length) {
+    var set = new Set(nodes);
+    [].forEach.call(brNodes, function(node) {
+      set.add(node.parentNode);
+    });
+    nodes = Array.from(set);
+  }
+
+  var score = 0;
+  // This is a little cheeky, we use the accumulator 'score' to decide what to return from
+  // this callback:
+  return [].some.call(nodes, function(node) {
+    if (!isVisible(node))
+      return false;
+
+    var matchString = node.className + " " + node.id;
+    if (REGEXPS.unlikelyCandidates.test(matchString) &&
+        !REGEXPS.okMaybeItsACandidate.test(matchString)) {
+      return false;
+    }
+
+    if (node.matches("li p")) {
+      return false;
+    }
+
+    var textContentLength = node.textContent.trim().length;
+    if (textContentLength < 140) {
+      return false;
+    }
+
+    score += Math.sqrt(textContentLength - 140);
+
+    if (score > 20) {
+      return true;
+    }
+    return false;
+  });
+}
+
+if (typeof exports === "object") {
+  exports.isProbablyReaderable = isProbablyReaderable;
+}
--- a/toolkit/components/reader/Readability.js
+++ b/toolkit/components/reader/Readability.js
@ -46,6 +46,7 @@ function Readability(doc, options) {
  this._articleTitle = null;
  this._articleByline = null;
  this._articleDir = null;
+  this._articleSiteName = null;
  this._attempts = [];

  // Configurable options
@ -118,15 +119,18 @@ Readability.prototype = {
  // All of the regular expressions in use within readability.
  // Defined up here so we don't instantiate them repeatedly in loops.
  REGEXPS: {
+    // NOTE: These two regular expressions are duplicated in
+    // Readability-readerable.js. Please keep both copies in sync.
    unlikelyCandidates: /-ad-|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,
    okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
+
    positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,
    negative: /hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,
    extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,
    byline: /byline|author|dateline|writtenby|p-author/i,
    replaceFonts: /<(\/?)font[^>]*>/gi,
    normalize: /\s{2,}/g,
-    videos: /\/\/(www\.)?(dailymotion|youtube|youtube-nocookie|player\.vimeo)\.com/i,
+    videos: /\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i,
    nextLink: /(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i,
    prevLink: /(prev|earl|old|new|<|«)/i,
    whitespace: /^\s*$/,
@ -267,7 +271,7 @@ Readability.prototype = {

  _getAllNodesWithTag: function(node, tagNames) {
    if (node.querySelectorAll) {
-      return node.querySelectorAll(tagNames.join(','));
+      return node.querySelectorAll(tagNames.join(","));
    }
    return [].concat.apply([], tagNames.map(function(tag) {
      var collection = node.getElementsByTagName(tag);
@ -327,7 +331,7 @@ Readability.prototype = {
      return uri;
    }

-    var links = articleContent.getElementsByTagName("a");
+    var links = this._getAllNodesWithTag(articleContent, ["a"]);
    this._forEachNode(links, function(link) {
      var href = link.getAttribute("href");
      if (href) {
@ -342,7 +346,7 @@ Readability.prototype = {
      }
    });

-    var imgs = articleContent.getElementsByTagName("img");
+    var imgs = this._getAllNodesWithTag(articleContent, ["img"]);
    this._forEachNode(imgs, function(img) {
      var src = img.getAttribute("src");
      if (src) {
@ -366,7 +370,7 @@ Readability.prototype = {

      // If they had an element with id "title" in their HTML
      if (typeof curTitle !== "string")
-        curTitle = origTitle = this._getInnerText(doc.getElementsByTagName('title')[0]);
+        curTitle = origTitle = this._getInnerText(doc.getElementsByTagName("title")[0]);
    } catch (e) {/* ignore exceptions setting the title. */}

    var titleHadHierarchicalSeparators = false;
@ -377,18 +381,18 @@ Readability.prototype = {
    // If there's a separator in the title, first remove the final part
    if ((/ [\|\-\\\/>»] /).test(curTitle)) {
      titleHadHierarchicalSeparators = / [\\\/>»] /.test(curTitle);
-      curTitle = origTitle.replace(/(.*)[\|\-\\\/>»] .*/gi, '$1');
+      curTitle = origTitle.replace(/(.*)[\|\-\\\/>»] .*/gi, "$1");

      // If the resulting title is too short (3 words or fewer), remove
      // the first part instead:
      if (wordCount(curTitle) < 3)
-        curTitle = origTitle.replace(/[^\|\-\\\/>»]*[\|\-\\\/>»](.*)/gi, '$1');
-    } else if (curTitle.indexOf(': ') !== -1) {
+        curTitle = origTitle.replace(/[^\|\-\\\/>»]*[\|\-\\\/>»](.*)/gi, "$1");
+    } else if (curTitle.indexOf(": ") !== -1) {
      // Check if we have an heading containing this exact string, so we
      // could assume it's the full title.
      var headings = this._concatNodeLists(
-        doc.getElementsByTagName('h1'),
-        doc.getElementsByTagName('h2')
+        doc.getElementsByTagName("h1"),
+        doc.getElementsByTagName("h2")
      );
      var trimmedTitle = curTitle.trim();
      var match = this._someNode(headings, function(heading) {
@ -397,25 +401,25 @@ Readability.prototype = {

      // If we don't, let's extract the title out of the original title string.
      if (!match) {
-        curTitle = origTitle.substring(origTitle.lastIndexOf(':') + 1);
+        curTitle = origTitle.substring(origTitle.lastIndexOf(":") + 1);

        // If the title is now too short, try the first colon instead:
        if (wordCount(curTitle) < 3) {
-          curTitle = origTitle.substring(origTitle.indexOf(':') + 1);
+          curTitle = origTitle.substring(origTitle.indexOf(":") + 1);
          // But if we have too many words before the colon there's something weird
          // with the titles and the H tags so let's just use the original title instead
-        } else if (wordCount(origTitle.substr(0, origTitle.indexOf(':'))) > 5) {
+        } else if (wordCount(origTitle.substr(0, origTitle.indexOf(":"))) > 5) {
          curTitle = origTitle;
        }
      }
    } else if (curTitle.length > 150 || curTitle.length < 15) {
-      var hOnes = doc.getElementsByTagName('h1');
+      var hOnes = doc.getElementsByTagName("h1");

      if (hOnes.length === 1)
        curTitle = this._getInnerText(hOnes[0]);
    }

-    curTitle = curTitle.trim();
+    curTitle = curTitle.trim().replace(this.REGEXPS.normalize, " ");
    // If we now have 4 words or fewer as our title, and either no
    // 'hierarchical' separators (\, /, > or ») were found in the original
    // title or we decreased the number of words by more than 1 word, use
@ -505,7 +509,8 @@ Readability.prototype = {
              break;
          }

-          if (!this._isPhrasingContent(next)) break;
+          if (!this._isPhrasingContent(next))
+            break;

          // Otherwise, make this node a child of the new <p>.
          var sibling = next.nextSibling;
@ -513,9 +518,12 @@ Readability.prototype = {
          next = sibling;
        }

-        while (p.lastChild && this._isWhitespace(p.lastChild)) p.removeChild(p.lastChild);
+        while (p.lastChild && this._isWhitespace(p.lastChild)) {
+          p.removeChild(p.lastChild);
+        }

-        if (p.parentNode.tagName === "P") this._setNodeTag(p.parentNode, "DIV");
+        if (p.parentNode.tagName === "P")
+          this._setNodeTag(p.parentNode, "DIV");
      }
    });
  },
@ -576,7 +584,7 @@ Readability.prototype = {
    // If there is only one h2 and its text content substantially equals article title,
    // they are probably using it as a header and not a subheader,
    // so remove it since we already extract the title separately.
-    var h2 = articleContent.getElementsByTagName('h2');
+    var h2 = articleContent.getElementsByTagName("h2");
    if (h2.length === 1) {
      var lengthSimilarRate = (h2[0].textContent.length - this._articleTitle.length) / this._articleTitle.length;
      if (Math.abs(lengthSimilarRate) < 0.5) {
@ -606,12 +614,12 @@ Readability.prototype = {
    this._cleanConditionally(articleContent, "div");

    // Remove extra paragraphs
-    this._removeNodes(articleContent.getElementsByTagName('p'), function (paragraph) {
-      var imgCount = paragraph.getElementsByTagName('img').length;
-      var embedCount = paragraph.getElementsByTagName('embed').length;
-      var objectCount = paragraph.getElementsByTagName('object').length;
+    this._removeNodes(articleContent.getElementsByTagName("p"), function (paragraph) {
+      var imgCount = paragraph.getElementsByTagName("img").length;
+      var embedCount = paragraph.getElementsByTagName("embed").length;
+      var objectCount = paragraph.getElementsByTagName("object").length;
      // At this point, nasty iframes have been removed, only remain embedded video ones.
-      var iframeCount = paragraph.getElementsByTagName('iframe').length;
+      var iframeCount = paragraph.getElementsByTagName("iframe").length;
      var totalCount = imgCount + embedCount + objectCount + iframeCount;

      return totalCount === 0 && !this._getInnerText(paragraph, false);
@ -648,34 +656,34 @@ Readability.prototype = {
    node.readability = {"contentScore": 0};

    switch (node.tagName) {
-      case 'DIV':
+      case "DIV":
        node.readability.contentScore += 5;
        break;

-      case 'PRE':
-      case 'TD':
-      case 'BLOCKQUOTE':
+      case "PRE":
+      case "TD":
+      case "BLOCKQUOTE":
        node.readability.contentScore += 3;
        break;

-      case 'ADDRESS':
-      case 'OL':
-      case 'UL':
-      case 'DL':
-      case 'DD':
-      case 'DT':
-      case 'LI':
-      case 'FORM':
+      case "ADDRESS":
+      case "OL":
+      case "UL":
+      case "DL":
+      case "DD":
+      case "DT":
+      case "LI":
+      case "FORM":
        node.readability.contentScore -= 3;
        break;

-      case 'H1':
-      case 'H2':
-      case 'H3':
-      case 'H4':
-      case 'H5':
-      case 'H6':
-      case 'TH':
+      case "H1":
+      case "H2":
+      case "H3":
+      case "H4":
+      case "H5":
+      case "H6":
+      case "TH":
        node.readability.contentScore -= 5;
        break;
    }
@ -824,12 +832,14 @@ Readability.prototype = {
              if (p !== null) {
                p.appendChild(childNode);
              } else if (!this._isWhitespace(childNode)) {
-                p = doc.createElement('p');
+                p = doc.createElement("p");
                node.replaceChild(p, childNode);
                p.appendChild(childNode);
              }
            } else if (p !== null) {
-              while (p.lastChild && this._isWhitespace(p.lastChild)) p.removeChild(p.lastChild);
+              while (p.lastChild && this._isWhitespace(p.lastChild)) {
+                p.removeChild(p.lastChild);
+              }
              p = null;
            }
            childNode = nextSibling;
@ -860,7 +870,7 @@ Readability.prototype = {
      **/
      var candidates = [];
      this._forEachNode(elementsToScore, function(elementToScore) {
-        if (!elementToScore.parentNode || typeof(elementToScore.parentNode.tagName) === 'undefined')
+        if (!elementToScore.parentNode || typeof(elementToScore.parentNode.tagName) === "undefined")
          return;

        // If this paragraph is less than 25 characters, don't even count it.
@ -879,17 +889,17 @@ Readability.prototype = {
        contentScore += 1;

        // Add points for any commas within this paragraph.
-        contentScore += innerText.split(',').length;
+        contentScore += innerText.split(",").length;

        // For every 100 characters in this paragraph, add another point. Up to 3 points.
        contentScore += Math.min(Math.floor(innerText.length / 100), 3);

        // Initialize and score ancestors.
        this._forEachNode(ancestors, function(ancestor, level) {
-          if (!ancestor.tagName || !ancestor.parentNode || typeof(ancestor.parentNode.tagName) === 'undefined')
+          if (!ancestor.tagName || !ancestor.parentNode || typeof(ancestor.parentNode.tagName) === "undefined")
            return;

-          if (typeof(ancestor.readability) === 'undefined') {
+          if (typeof(ancestor.readability) === "undefined") {
            this._initializeNode(ancestor);
            candidates.push(ancestor);
          }
@ -920,7 +930,7 @@ Readability.prototype = {
        var candidateScore = candidate.readability.contentScore * (1 - this._getLinkDensity(candidate));
        candidate.readability.contentScore = candidateScore;

-        this.log('Candidate:', candidate, "with score " + candidateScore);
+        this.log("Candidate:", candidate, "with score " + candidateScore);

        for (var t = 0; t < this._nbTopCandidates; t++) {
          var aTopCandidate = topCandidates[t];
@ -1039,8 +1049,8 @@ Readability.prototype = {
        var sibling = siblings[s];
        var append = false;

-        this.log("Looking at sibling node:", sibling, sibling.readability ? ("with score " + sibling.readability.contentScore) : '');
-        this.log("Sibling has score", sibling.readability ? sibling.readability.contentScore : 'Unknown');
+        this.log("Looking at sibling node:", sibling, sibling.readability ? ("with score " + sibling.readability.contentScore) : "");
+        this.log("Sibling has score", sibling.readability ? sibling.readability.contentScore : "Unknown");

        if (sibling === topCandidate) {
          append = true;
@ -1074,7 +1084,7 @@ Readability.prototype = {
          if (this.ALTER_TO_DIV_EXCEPTIONS.indexOf(sibling.nodeName) === -1) {
            // We have a node that isn't a common block level element, like a form or td tag.
            // Turn it into a div so it doesn't get filtered out later by accident.
-            this.log("Altering sibling:", sibling, 'to div.');
+            this.log("Altering sibling:", sibling, "to div.");

            sibling = this._setNodeTag(sibling, "DIV");
          }
@ -1142,7 +1152,7 @@ Readability.prototype = {
          this._attempts.push({articleContent: articleContent, textLength: textLength});
          // No luck after removing flags, just return the longest text we found during the different loops
          this._attempts.sort(function (a, b) {
-            return a.textLength < b.textLength;
+            return b.textLength - a.textLength;
          });

          // But first check if we actually have something
@ -1182,7 +1192,7 @@ Readability.prototype = {
   * @return Boolean - whether the input string is a byline.
   */
  _isValidByline: function(byline) {
-    if (typeof byline == 'string' || byline instanceof String) {
+    if (typeof byline == "string" || byline instanceof String) {
      byline = byline.trim();
      return (byline.length > 0) && (byline.length < 100);
    }
@ -1199,61 +1209,72 @@ Readability.prototype = {
    var values = {};
    var metaElements = this._doc.getElementsByTagName("meta");

-    // Match "description", or Twitter's "twitter:description" (Cards)
-    // in name attribute.
-    var namePattern = /^\s*((twitter)\s*:\s*)?(description|title)\s*$/gi;
+    // property is a space-separated list of values
+    var propertyPattern = /\s*(dc|dcterm|og|twitter)\s*:\s*(author|creator|description|title|site_name)\s*/gi;

-    // Match Facebook's Open Graph title & description properties.
-    var propertyPattern = /^\s*og\s*:\s*(description|title)\s*$/gi;
+    // name is a single value
+    var namePattern = /^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|site_name)\s*$/i;

    // Find description tags.
    this._forEachNode(metaElements, function(element) {
      var elementName = element.getAttribute("name");
      var elementProperty = element.getAttribute("property");
-
-      if ([elementName, elementProperty].indexOf("author") !== -1) {
-        metadata.byline = element.getAttribute("content");
-        return;
-      }
-
+      var content = element.getAttribute("content");
+      var matches = null;
      var name = null;
-      if (namePattern.test(elementName)) {
-        name = elementName;
-      } else if (propertyPattern.test(elementProperty)) {
-        name = elementProperty;
-      }

-      if (name) {
-        var content = element.getAttribute("content");
+      if (elementProperty) {
+        matches = elementProperty.match(propertyPattern);
+        if (matches) {
+          for (var i = matches.length - 1; i >= 0; i--) {
+            // Convert to lowercase, and remove any whitespace
+            // so we can match below.
+            name = matches[i].toLowerCase().replace(/\s/g, "");
+            // multiple authors
+            values[name] = content.trim();
+          }
+        }
+      }
+      if (!matches && elementName && namePattern.test(elementName)) {
+        name = elementName;
        if (content) {
-          // Convert to lowercase and remove any whitespace
-          // so we can match below.
-          name = name.toLowerCase().replace(/\s/g, '');
+          // Convert to lowercase, remove any whitespace, and convert dots
+          // to colons so we can match below.
+          name = name.toLowerCase().replace(/\s/g, "").replace(/\./g, ":");
          values[name] = content.trim();
        }
      }
    });

-    if ("description" in values) {
-      metadata.excerpt = values["description"];
-    } else if ("og:description" in values) {
-      // Use facebook open graph description.
-      metadata.excerpt = values["og:description"];
-    } else if ("twitter:description" in values) {
-      // Use twitter cards description.
-      metadata.excerpt = values["twitter:description"];
+    // get title
+    metadata.title = values["dc:title"] ||
+                     values["dcterm:title"] ||
+                     values["og:title"] ||
+                     values["weibo:article:title"] ||
+                     values["weibo:webpage:title"] ||
+                     values["title"] ||
+                     values["twitter:title"];
+
+    if (!metadata.title) {
+      metadata.title = this._getArticleTitle();
    }

-    metadata.title = this._getArticleTitle();
-    if (!metadata.title) {
-      if ("og:title" in values) {
-        // Use facebook open graph title.
-        metadata.title = values["og:title"];
-      } else if ("twitter:title" in values) {
-        // Use twitter cards title.
-        metadata.title = values["twitter:title"];
-      }
-    }
+    // get author
+    metadata.byline = values["dc:creator"] ||
+                      values["dcterm:creator"] ||
+                      values["author"];
+
+    // get description
+    metadata.excerpt = values["dc:description"] ||
+                       values["dcterm:description"] ||
+                       values["og:description"] ||
+                       values["weibo:article:description"] ||
+                       values["weibo:webpage:description"] ||
+                       values["description"] ||
+                       values["twitter:description"];
+
+    // get site name
+    metadata.siteName = values["og:site_name"];

    return metadata;
  },
@ -1264,12 +1285,12 @@ Readability.prototype = {
   * @param Element
  **/
  _removeScripts: function(doc) {
-    this._removeNodes(doc.getElementsByTagName('script'), function(scriptNode) {
+    this._removeNodes(doc.getElementsByTagName("script"), function(scriptNode) {
      scriptNode.nodeValue = "";
-      scriptNode.removeAttribute('src');
+      scriptNode.removeAttribute("src");
      return true;
    });
-    this._removeNodes(doc.getElementsByTagName('noscript'));
+    this._removeNodes(doc.getElementsByTagName("noscript"));
  },

  /**
@ -1336,7 +1357,7 @@ Readability.prototype = {
   * @return string
  **/
  _getInnerText: function(e, normalizeSpaces) {
-    normalizeSpaces = (typeof normalizeSpaces === 'undefined') ? true : normalizeSpaces;
+    normalizeSpaces = (typeof normalizeSpaces === "undefined") ? true : normalizeSpaces;
    var textContent = e.textContent.trim();

    if (normalizeSpaces) {
@ -1365,7 +1386,7 @@ Readability.prototype = {
   * @return void
  **/
  _cleanStyles: function(e) {
-    if (!e || e.tagName.toLowerCase() === 'svg')
+    if (!e || e.tagName.toLowerCase() === "svg")
      return;

    // Remove `style` and deprecated presentational attributes
@ -1374,8 +1395,8 @@ Readability.prototype = {
    }

    if (this.DEPRECATED_SIZE_ATTRIBUTE_ELEMS.indexOf(e.tagName) !== -1) {
-      e.removeAttribute('width');
-      e.removeAttribute('height');
+      e.removeAttribute("width");
+      e.removeAttribute("height");
    }

    var cur = e.firstElementChild;
@ -1421,7 +1442,7 @@ Readability.prototype = {
    var weight = 0;

    // Look for a special classname
-    if (typeof(e.className) === 'string' && e.className !== '') {
+    if (typeof(e.className) === "string" && e.className !== "") {
      if (this.REGEXPS.negative.test(e.className))
        weight -= 25;

@ -1430,7 +1451,7 @@ Readability.prototype = {
    }

    // Look for a special ID
-    if (typeof(e.id) === 'string' && e.id !== '') {
+    if (typeof(e.id) === "string" && e.id !== "") {
      if (this.REGEXPS.negative.test(e.id))
        weight -= 25;

@ -1619,7 +1640,7 @@ Readability.prototype = {
        return true;
      }

-      if (this._getCharCount(node, ',') < 10) {
+      if (this._getCharCount(node, ",") < 10) {
        // If there are not very many commas, and the number of
        // non-paragraph elements is more than paragraphs or other
        // ominous signs, remove the element.
@ -1679,7 +1700,7 @@ Readability.prototype = {
  **/
  _cleanHeaders: function(e) {
    for (var headerIndex = 1; headerIndex < 3; headerIndex += 1) {
-      this._removeNodes(e.getElementsByTagName('h' + headerIndex), function (header) {
+      this._removeNodes(e.getElementsByTagName("h" + headerIndex), function (header) {
        return this._getClassWeight(header) < 0;
      });
    }
@ -1694,66 +1715,7 @@ Readability.prototype = {
  },

  _isProbablyVisible: function(node) {
-    return node.style.display != "none" && !node.hasAttribute("hidden");
-  },
-
-  /**
-   * Decides whether or not the document is reader-able without parsing the whole thing.
-   *
-   * @return boolean Whether or not we suspect parse() will suceeed at returning an article object.
-   */
-  isProbablyReaderable: function(helperIsVisible) {
-    var nodes = this._getAllNodesWithTag(this._doc, ["p", "pre"]);
-
-    // Get <div> nodes which have <br> node(s) and append them into the `nodes` variable.
-    // Some articles' DOM structures might look like
-    // <div>
-    //   Sentences<br>
-    //   <br>
-    //   Sentences<br>
-    // </div>
-    var brNodes = this._getAllNodesWithTag(this._doc, ["div > br"]);
-    if (brNodes.length) {
-      var set = new Set();
-      [].forEach.call(brNodes, function(node) {
-        set.add(node.parentNode);
-      });
-      nodes = [].concat.apply(Array.from(set), nodes);
-    }
-
-    if (!helperIsVisible) {
-      helperIsVisible = this._isProbablyVisible;
-    }
-
-    var score = 0;
-    // This is a little cheeky, we use the accumulator 'score' to decide what to return from
-    // this callback:
-    return this._someNode(nodes, function(node) {
-      if (helperIsVisible && !helperIsVisible(node))
-        return false;
-      var matchString = node.className + " " + node.id;
-
-      if (this.REGEXPS.unlikelyCandidates.test(matchString) &&
-          !this.REGEXPS.okMaybeItsACandidate.test(matchString)) {
-        return false;
-      }
-
-      if (node.matches && node.matches("li p")) {
-        return false;
-      }
-
-      var textContentLength = node.textContent.trim().length;
-      if (textContentLength < 140) {
-        return false;
-      }
-
-      score += Math.sqrt(textContentLength - 140);
-
-      if (score > 20) {
-        return true;
-      }
-      return false;
-    });
+    return (!node.style || node.style.display != "none") && !node.hasAttribute("hidden");
  },

  /**
@ -1812,6 +1774,7 @@ Readability.prototype = {
      textContent: textContent,
      length: textContent.length,
      excerpt: metadata.excerpt,
+      siteName: metadata.siteName || this._articleSiteName
    };
  }
 };
--- a/toolkit/components/reader/ReaderMode.jsm
+++ b/toolkit/components/reader/ReaderMode.jsm
@ -12,6 +12,7 @@ const { classes: Cc, interfaces: Ci, utils: Cu } = Components;
 // names so that rules in aboutReader.css can match them.
 const CLASSES_TO_PRESERVE = [
  "caption",
+  "emoji",
  "hidden",
  "invisble",
  "sr-only",
@ -19,6 +20,7 @@ const CLASSES_TO_PRESERVE = [
  "visuallyhidden",
  "wp-caption",
  "wp-caption-text",
+  "wp-smiley",
 ];

 Cu.import("resource://gre/modules/Services.jsm");
@ -30,13 +32,7 @@ XPCOMUtils.defineLazyModuleGetter(this, "CommonUtils", "resource://services-comm
 XPCOMUtils.defineLazyModuleGetter(this, "EventDispatcher", "resource://gre/modules/Messaging.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "OS", "resource://gre/modules/osfile.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "ReaderWorker", "resource://gre/modules/reader/ReaderWorker.jsm");
-
-XPCOMUtils.defineLazyGetter(this, "Readability", function() {
-  let scope = {};
-  scope.dump = this.dump;
-  Services.scriptloader.loadSubScript("resource://gre/modules/reader/Readability.js", scope);
-  return scope.Readability;
-});
+XPCOMUtils.defineLazyModuleGetter(this, "Readerable", "resource://gre/modules/Readerable.jsm");

 this.ReaderMode = {
  // Version of the cache schema.
@ -44,42 +40,6 @@ this.ReaderMode = {

  DEBUG: 0,

-  // Don't try to parse the page if it has too many elements (for memory and
-  // performance reasons)
-  get maxElemsToParse() {
-    delete this.parseNodeLimit;
-
-    Services.prefs.addObserver("reader.parse-node-limit", this, false);
-    return this.parseNodeLimit = Services.prefs.getIntPref("reader.parse-node-limit");
-  },
-
-  get isEnabledForParseOnLoad() {
-    delete this.isEnabledForParseOnLoad;
-
-    // Listen for future pref changes.
-    Services.prefs.addObserver("reader.parse-on-load.", this, false);
-
-    return this.isEnabledForParseOnLoad = this._getStateForParseOnLoad();
-  },
-
-  _getStateForParseOnLoad() {
-    let isEnabled = Services.prefs.getBoolPref("reader.parse-on-load.enabled");
-    let isForceEnabled = Services.prefs.getBoolPref("reader.parse-on-load.force-enabled");
-    return isForceEnabled || isEnabled;
-  },
-
-  observe(aMessage, aTopic, aData) {
-    switch (aTopic) {
-      case "nsPref:changed":
-        if (aData.startsWith("reader.parse-on-load.")) {
-          this.isEnabledForParseOnLoad = this._getStateForParseOnLoad();
-        } else if (aData === "reader.parse-node-limit") {
-          this.parseNodeLimit = Services.prefs.getIntPref(aData);
-        }
-        break;
-    }
-  },
-
  /**
   * Enter the reader mode by going forward one step in history if applicable,
   * if not, append the about:reader page in the history instead.
@ -174,39 +134,6 @@ this.ReaderMode = {
    return null;
  },

-  /**
-   * Decides whether or not a document is reader-able without parsing the whole thing.
-   *
-   * @param doc A document to parse.
-   * @return boolean Whether or not we should show the reader mode button.
-   */
-  isProbablyReaderable(doc) {
-    // Only care about 'real' HTML documents:
-    if (doc.mozSyntheticDocument || !(doc instanceof doc.defaultView.HTMLDocument)) {
-      return false;
-    }
-
-    let uri = Services.io.newURI(doc.location.href);
-    if (!this._shouldCheckUri(uri)) {
-      return false;
-    }
-
-    let utils = this.getUtilsForWin(doc.defaultView);
-    // We pass in a helper function to determine if a node is visible, because
-    // it uses gecko APIs that the engine-agnostic readability code can't rely
-    // upon.
-    return new Readability(doc).isProbablyReaderable(this.isNodeVisible.bind(this, utils));
-  },
-
-  isNodeVisible(utils, node) {
-    let bounds = utils.getBoundsWithoutFlushing(node);
-    return bounds.height > 0 && bounds.width > 0;
-  },
-
-  getUtilsForWin(win) {
-    return win.QueryInterface(Ci.nsIInterfaceRequestor).getInterface(Ci.nsIDOMWindowUtils);
-  },
-
  /**
   * Gets an article from a loaded browser's document. This method will not attempt
   * to parse certain URIs (e.g. about: URIs).
@ -216,7 +143,8 @@ this.ReaderMode = {
   * @resolves JS object representing the article, or null if no article is found.
   */
  parseDocument(doc) {
-    if (!this._shouldCheckUri(doc.documentURIObject) || !this._shouldCheckUri(doc.baseURIObject, true)) {
+    if (!Readerable.shouldCheckUri(doc.documentURIObject) ||
+        !Readerable.shouldCheckUri(doc.baseURIObject, true)) {
      this.log("Reader mode disabled for URI");
      return null;
    }
@ -236,7 +164,8 @@ this.ReaderMode = {
    if (!doc) {
      return null;
    }
-    if (!this._shouldCheckUri(doc.documentURIObject) || !this._shouldCheckUri(doc.baseURIObject, true)) {
+    if (!Readerable.shouldCheckUri(doc.documentURIObject) ||
+        !Readerable.shouldCheckUri(doc.baseURIObject, true)) {
      this.log("Reader mode disabled for URI");
      return null;
    }
@ -246,7 +175,7 @@ this.ReaderMode = {

  _downloadDocument(url) {
    try {
-      if (!this._shouldCheckUri(Services.io.newURI(url))) {
+      if (!Readerable.shouldCheckUri(Services.io.newURI(url))) {
        return null;
      }
    } catch (ex) {
@ -388,44 +317,6 @@ this.ReaderMode = {
      dump("Reader: " + msg);
  },

-  _blockedHosts: [
-    "amazon.com",
-    "basilisk-browser.org",
-    "github.com",
-    "mail.google.com",
-    "palemoon.org",
-    "pinterest.com",
-    "reddit.com",
-    "twitter.com",
-    "youtube.com",
-  ],
-
-  _shouldCheckUri(uri, isBaseUri = false) {
-    if (!(uri.schemeIs("http") || uri.schemeIs("https"))) {
-      this.log("Not parsing URI scheme: " + uri.scheme);
-      return false;
-    }
-
-    try {
-      uri.QueryInterface(Ci.nsIURL);
-    } catch (ex) {
-      // If this doesn't work, presumably the URL is not well-formed or something
-      return false;
-    }
-    // Sadly, some high-profile pages have false positives, so bail early for those:
-    let asciiHost = uri.asciiHost;
-    if (!isBaseUri && this._blockedHosts.some(blockedHost => asciiHost.endsWith(blockedHost))) {
-      return false;
-    }
-
-    if (!isBaseUri && (!uri.filePath || uri.filePath == "/")) {
-      this.log("Not parsing home page: " + uri.spec);
-      return false;
-    }
-
-    return true;
-  },
-
  /**
   * Attempts to parse a document into an article. Heavy lifting happens
   * in readerWorker.js.
@ -650,3 +541,8 @@ this.ReaderMode = {
    return readingSpeed.get(lang) || readingSpeed.get("en");
  },
 };
+
+// Don't try to parse the page if it has too many elements (for memory and
+// performance reasons)
+XPCOMUtils.defineLazyPreferenceGetter(
+  ReaderMode, "parseNodeLimit", "reader.parse-node-limit", 0);
--- a/toolkit/components/reader/Readerable.js
+++ b/toolkit/components/reader/Readerable.js
@ -0,0 +1,96 @@
+// -*- indent-tabs-mode: nil; js-indent-level: 2 -*-
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+"use strict";
+
+// This file and Readability-readerable.js are merged together into
+// Readerable.jsm.
+
+/* exported Readerable */
+/* import-globals-from Readability-readerable.js */
+
+const { classes: Cc, interfaces: Ci, utils: Cu } = Components;
+
+Cu.import("resource://gre/modules/Services.jsm");
+Cu.import("resource://gre/modules/XPCOMUtils.jsm");
+
+function isNodeVisible(node) {
+  return node.clientHeight > 0 && node.clientWidth > 0;
+}
+
+var Readerable = {
+  DEBUG: 0,
+
+  get isEnabledForParseOnLoad() {
+    return this.isEnabled || this.isForceEnabled;
+  },
+
+  log(msg) {
+    if (this.DEBUG)
+      dump("Reader: " + msg);
+  },
+
+  /**
+   * Decides whether or not a document is reader-able without parsing the whole thing.
+   *
+   * @param doc A document to parse.
+   * @return boolean Whether or not we should show the reader mode button.
+   */
+  isProbablyReaderable(doc) {
+    // Only care about 'real' HTML documents:
+    if (doc.mozSyntheticDocument || !(doc instanceof doc.defaultView.HTMLDocument)) {
+      return false;
+    }
+
+    let uri = Services.io.newURI(doc.location.href);
+    if (!this.shouldCheckUri(uri)) {
+      return false;
+    }
+
+    return isProbablyReaderable(doc, isNodeVisible);
+  },
+
+  _blockedHosts: [
+    "amazon.com",
+    "basilisk-browser.org",
+    "github.com",
+    "mail.google.com",
+    "palemoon.org",
+    "pinterest.com",
+    "reddit.com",
+    "twitter.com",
+    "youtube.com",
+  ],
+
+  shouldCheckUri(uri, isBaseUri = false) {
+    if (!(uri.schemeIs("http") || uri.schemeIs("https"))) {
+      this.log("Not parsing URI scheme: " + uri.scheme);
+      return false;
+    }
+
+    try {
+      uri.QueryInterface(Ci.nsIURL);
+    } catch (ex) {
+      // If this doesn't work, presumably the URL is not well-formed or something
+      return false;
+    }
+    // Sadly, some high-profile pages have false positives, so bail early for those:
+    let asciiHost = uri.asciiHost;
+    if (!isBaseUri && this._blockedHosts.some(blockedHost => asciiHost.endsWith(blockedHost))) {
+      return false;
+    }
+
+    if (!isBaseUri && (!uri.filePath || uri.filePath == "/")) {
+      this.log("Not parsing home page: " + uri.spec);
+      return false;
+    }
+
+    return true;
+  },
+};
+
+XPCOMUtils.defineLazyPreferenceGetter(
+  Readerable, "isEnabled", "reader.parse-on-load.enabled", true);
+XPCOMUtils.defineLazyPreferenceGetter(
+  Readerable, "isForceEnabled", "reader.parse-on-load.force-enabled", false);
--- a/toolkit/components/reader/Readerable.jsm
+++ b/toolkit/components/reader/Readerable.jsm
@ -0,0 +1,10 @@
+// -*- indent-tabs-mode: nil; js-indent-level: 2 -*-
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+"use strict";
+
+var EXPORTED_SYMBOLS = ["Readerable"];
+
+#include Readability-readerable.js
+#include Readerable.js
--- a/toolkit/components/reader/moz.build
+++ b/toolkit/components/reader/moz.build
@ -11,6 +11,10 @@ EXTRA_JS_MODULES += [
  'ReaderMode.jsm'
 ]

+EXTRA_PP_JS_MODULES += [
+  'Readerable.jsm'
+]
+
 EXTRA_JS_MODULES.reader = [
  'JSDOMParser.js',
  'Readability.js',
--- a/toolkit/locales/en-US/chrome/global/aboutReader.properties
+++ b/toolkit/locales/en-US/chrome/global/aboutReader.properties
@ -46,3 +46,14 @@ readerView.enter=Enter Reader View
 readerView.enter.accesskey=R
 readerView.close=Close Reader View
 readerView.close.accesskey=R
+
+# These are used as tooltips in Type Control
+aboutReader.toolbar.minus = Decrease Font Size
+aboutReader.toolbar.plus = Increase Font Size
+aboutReader.toolbar.contentwidthminus = Decrease Content Width
+aboutReader.toolbar.contentwidthplus = Increase Content Width
+aboutReader.toolbar.lineheightminus = Decrease Line Height
+aboutReader.toolbar.lineheightplus = Increase Line Height
+aboutReader.toolbar.colorschemelight = Color Scheme Light
+aboutReader.toolbar.colorschemedark = Color Scheme Dark
+aboutReader.toolbar.colorschemesepia = Color Scheme Sepia
--- a/toolkit/themes/shared/aboutReader.css
+++ b/toolkit/themes/shared/aboutReader.css
@ -47,44 +47,10 @@ body.serif .remove-button  {
 }

 .container {
+  --font-size: 12;
  max-width: 30em;
  margin: 0 auto;
-}
-
-.container.font-size1 {
-  font-size: 12px;
-}
-
-.container.font-size2 {
-  font-size: 14px;
-}
-
-.container.font-size3 {
-  font-size: 16px;
-}
-
-.container.font-size4  {
-  font-size: 18px;
-}
-
-.container.font-size5 {
-  font-size: 20px;
-}
-
-.container.font-size6 {
-  font-size: 22px;
-}
-
-.container.font-size7 {
-  font-size: 24px;
-}
-
-.container.font-size8 {
-  font-size: 26px;
-}
-
-.container.font-size9 {
-  font-size: 28px;
+  font-size: var(--font-size);
 }

 .container.content-width1 {
@ -738,3 +704,14 @@ body:not(.loaded) .toolbar:-moz-locale-dir(rtl) {
 .moz-reader-content .sr-only {
  display: none;
 }
+
+/* Enforce wordpress and similar emoji/smileys aren't sized to be full-width */
+.moz-reader-content img.wp-smiley,
+.moz-reader-content img.emoji {
+  display: inline-block;
+  border-width: 0;
+  /* height: auto is implied from `.moz-reader-content *` rule. */
+  width: 1em;
+  margin: 0 .07em;
+  padding: 0;
+}