Example RulesetΒΆ

This is the simple example ruleset that ships with FathomFox; it is made available for experimentation when you run fathom fox without passing in your own ruleset. In its comments, it documents the structure of the trainees object, which is what the trainer needs to do its job.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import {ruleset, rule, dom, type, score, out, utils} from 'fathom-web';
const {ancestors, isVisible, linearScale, rgbaFromString, saturation} = utils;


/**
 * Rulesets to vectorize or debug (and metadata about them)
 *
 * More mechanically, a map of names to {coeffs, rulesetMaker, ...} objects,
 * which we call "trainees". The rulesets you specify here are available to the
 * trainer and also show up in the FathomFox UI, from which you can debug a
 * ruleset. Most often, all the entries here point to the same ruleset but have
 * different values of `vectorType` for separately training each type of thing
 * the ruleset recognizes.
 */
const trainees = new Map();

/**
 * An example ruleset. Replace it with your own.
 *
 * This one finds the full-screen, content-blocking overlays that often go
 * behind modal popups. It's not the most well-honed thing, but it's simple and
 * short.
 */
trainees.set(
    // The ID for this trainee, which must be the same as the Fathom type you
    // are evaluating, if you are using the FathomFox Evaluator:
    'overlay',

    // Here we paste in coefficients from ``fathom train``. This lets us use
    // the Evaluator to see what Fathom is getting wrong. Otherwise, these
    // numbers do nothing until you deploy your application, so there's no need
    // to maintain them until then.
    {coeffs: new Map([  // [rule name, coefficient]
        ['big', 50.4946],
        ['nearlyOpaque', 48.6396],
        ['monochrome', 42.8406],
        ['classOrId', 0.5005],
        ['visible', 55.8750]]),
     // Bias is -139.3106 for this example, though that isn't needed until
     // production.

     // The content-area size to use while training. Defaults to 1024x768.
     viewportSize: {width: 1024, height: 768},

     // The type of node to extract features from when using the Vectorizer.
     // Defaults to the trainee ID.
     //
     // vectorType: 'overlay',

     rulesetMaker:
        function () {
            /**
             * Return whether the passed-in div is the size of the whole viewport/document
             * or nearly so.
             */
            function big(fnode) {
                // Compare the size of the fnode to the size of the viewport. So far, spot-
                // checking shows the overlay is never the size of the whole document, just
                // the viewport.
                const rect = fnode.element.getBoundingClientRect();
                const hDifference = Math.abs(rect.height - window.innerHeight);
                const wDifference = Math.abs(rect.width - window.innerWidth);
                return linearScale(hDifference + wDifference, 250, 0);  // 250px is getting into "too tall to just be nav or something" territory.
            }

            /**
             * Return whether the fnode is almost but not entirely opaque.
             */
            function nearlyOpaque(fnode) {
                const style = getComputedStyle(fnode.element);
                const opacity = parseFloat(style.getPropertyValue('opacity'));
                let bgColorAlpha = rgbaFromString(style.getPropertyValue('background-color'))[3];
                if (bgColorAlpha === undefined) {
                    bgColorAlpha = 1;
                }
                const totalOpacity = opacity * bgColorAlpha;
                let ret;
                if (totalOpacity === 1) {  // seems to work even though a float
                    ret = 0;
                } else {
                    ret = linearScale(totalOpacity, .4, .6);
                }
                return ret;
            }

            /**
             * Return whether the fnode's bgcolor is nearly black or white.
             */
            function monochrome(fnode) {
                const rgba = rgbaFromString(getComputedStyle(fnode.element).getPropertyValue('background-color'));
                return linearScale(1 - saturation(...rgba), .96, 1);
            }

            function suspiciousClassOrId(fnode) {
                const element = fnode.element;
                const attributeNames = ['class', 'id'];
                let numOccurences = 0;
                function numberOfSuspiciousSubstrings(value) {
                    return value.includes('popup') + value.includes('modal') + value.includes('overlay') + value.includes('underlay') + value.includes('backdrop');
                }

                for (const name of attributeNames) {
                    let values = element.getAttribute(name);
                    if (values) {
                        if (!Array.isArray(values)) {
                            values = [values];
                        }
                        for (const value of values) {
                            numOccurences += numberOfSuspiciousSubstrings(value);
                        }
                    }
                }

                // 1 occurrence gets us to about 75% certainty; 2, 92%. It bottoms
                // out at 0 and tops out at 1.
                // TODO: Figure out how to derive the magic number .1685 from
                // 0 and 1.
                return (-(.3 ** (numOccurences + .1685)) + 1);
            }

            /* The actual ruleset */

            const rules = ruleset([
                // Consider all <div> tags as candidate overlays:
                rule(dom('div'), type('overlay')),

                // Contribute the "bigness" of the node to its overlay score:
                rule(type('overlay'), score(big), {name: 'big'}),
        
                // Contibute the opacity of the node to its overlay score:
                rule(type('overlay'), score(nearlyOpaque), {name: 'nearlyOpaque'}),
        
                // Contribute some other signals as well:
                rule(type('overlay'), score(monochrome), {name: 'monochrome'}),
                rule(type('overlay'), score(suspiciousClassOrId), {name: 'classOrId'}),
                rule(type('overlay'), score(isVisible), {name: 'visible'}),

                // Offer the max-scoring overlay-typed node under the output key
                // "overlay". The score on that node will represent the probability,
                // informed by a corpus of training samples, that the node is, indeed,
                // a pop-up overlay.
                rule(type('overlay').max(), out('overlay'))
            ]);
            return rules;
        }

     // isTarget is an optional function which returns whether the Vectorizer
     // should consider a fnode a target. The default is to consider it a
     // target iff its ``data-fathom`` attribute === the trainee ID.
     //
     // isTarget: fnode => fnode.element.dataset.fathom === 'foo'
    }
);

export default trainees;