===================== Price Tracker Ruleset ===================== .. code-block:: js :linenos: /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ import {dom, out, rule, ruleset, score, type} from 'fathom-web'; import {ancestors} from 'fathom-web/utilsForFrontend'; import {euclidean} from 'fathom-web/clusters'; const TOP_BUFFER = 150; // From: https://github.com/mozilla/fathom-trainees/blob/master/src/trainees.js /** * Creates Fathom ruleset instances, and holds individual rule methods for * easier testing. */ export default class RulesetFactory { /** Scores fnode in direct proportion to its size */ isBig(fnode) { const domRect = fnode.element.getBoundingClientRect(); const area = domRect.width * domRect.height; // Assume no product images as small as 80px^2. No further bonus over // 1000^2. For one thing, that's getting into background image territory // (though we should have distinct penalties for that sort of thing if we // care). More importantly, clamp the upper bound of the score so we don't // overcome other bonuses and penalties. return linearScale(area, 80 ** 2, 1000 ** 2); } /** Return whether the computed font size of an element is big. */ fontIsBig(fnode) { const size = parseInt(getComputedStyle(fnode.element).fontSize, 10); return linearScale(size, 14, 50); } /** Scores fnode with a '$' in its innerText */ hasDollarSign(fnode) { return (fnode.element.innerText.includes('$') ? 1 : 0); } /** * Return whether some substring is within a given string, case * insensitively. */ caselessIncludes(haystack, needle) { return haystack.toLowerCase().includes(needle); } /** * Return a weighted confidence of whether a substring is within a given * string, case insensitively. */ stringIncludes(haystack, needle) { return (this.caselessIncludes(haystack, needle) ? 1 : 0); } /** * Punish elements with "background" in their ID. Do nothing to those without. */ hasBackgroundInID(fnode) { return this.caselessIncludes(fnode.element.id, 'background') ? 0 : 1; } /** Scores fnode with 'price' in its id */ hasPriceInID(fnode) { return this.stringIncludes(fnode.element.id, 'price'); } hasPriceInParentID(fnode) { return this.stringIncludes(fnode.element.parentElement.id, 'price'); } /** Scores fnode with 'price' in its class name */ hasPriceInClassName(fnode) { return this.stringIncludes(fnode.element.className, 'price'); } /** Scores fnode with 'price' in its parent's class name */ hasPriceInParentClassName(fnode) { return this.stringIncludes(fnode.element.parentElement.className, 'price'); } /** Scores fnode by its vertical location relative to the fold */ isAboveTheFold(fnode) { const viewportHeight = 950; const imageTop = fnode.element.getBoundingClientRect().top; // Stop giving additional bonus for anything closer than 200px to the top // of the viewport. Those are probably usually headers. return linearScale(imageTop, viewportHeight * 2, 200); } /** * Return whether the centerpoint of the element is near that of the highest- * scoring image. */ isNearImage(fnode) { const imageFnode = this.getHighestScoringImage(fnode); return linearScale(euclidean(fnode, imageFnode), 1000, 0); } /** * Return whether the potential title is near the top or bottom of the * highest-scoring image. * * This is a makeshift ORing of 2 signals: a "near the top" and a "near the * bottom" one. */ isNearImageTopOrBottom(fnode) { const imageElement = this.getHighestScoringImage(fnode).element; const imageRect = imageElement.getBoundingClientRect(); const nodeRect = fnode.element.getBoundingClientRect(); // Should cover title above image and title in a column next to image. // Could also consider using the y-axis midpoint of title. const topDistance = Math.abs(imageRect.top - nodeRect.top); // Test nodeRect.top. They're probably not side by side with the title at // the bottom. Rather, title will be below image. const bottomDistance = Math.abs(imageRect.bottom - nodeRect.top); const shortestDistance = Math.min(topDistance, bottomDistance); return linearScale(shortestDistance, 200, 0); } /** * Return whether the fnode's innertext contains a dollars-and-cents number. */ hasPriceishPattern(fnode) { const text = fnode.element.innerText; /** * With an optional '$' that doesn't necessarily have to be at the beginning * of the string (ex: 'US $5.00' on Ebay), matches any number of digits before * a decimal point and exactly two after. */ const regExp = /\$?\d+\.\d{2}(?![0-9])/; return (regExp.test(text) ? 1 : 0); } /** Checks to see if a 'price' fnode is eligible for scoring */ isEligiblePrice(fnode) { return ( this.isVisible(fnode) && this.hasDifferentInnerTextThanChildren(fnode) && this.isNearbyImageYAxisPrice(fnode) ); } /** Checks to see if a 'title' fnode is eligible for scoring */ isEligibleTitle(fnode) { return ( this.isVisible(fnode) // Don't use hasDifferentInnerTextThanChildren, because