mozilla_schema_generator.glean_ping

  1# -*- coding: utf-8 -*-
  2
  3# This Source Code Form is subject to the terms of the Mozilla Public
  4# License, v. 2.0. If a copy of the MPL was not distributed with this
  5# file, You can obtain one at http://mozilla.org/MPL/2.0/.
  6
  7import logging
  8from pathlib import Path
  9from typing import Dict, List, Set
 10
 11from requests import HTTPError
 12
 13from .config import Config
 14from .generic_ping import GenericPing
 15from .probes import GleanProbe
 16from .schema import Schema
 17
 18ROOT_DIR = Path(__file__).parent
 19BUG_1737656_TXT = ROOT_DIR / "configs" / "bug_1737656_affected.txt"
 20
 21logger = logging.getLogger(__name__)
 22
 23DEFAULT_SCHEMA_URL = (
 24    "https://raw.githubusercontent.com"
 25    "/mozilla-services/mozilla-pipeline-schemas"
 26    "/{branch}/schemas/glean/glean/glean.1.schema.json"
 27)
 28
 29MINIMUM_SCHEMA_URL = (
 30    "https://raw.githubusercontent.com"
 31    "/mozilla-services/mozilla-pipeline-schemas"
 32    "/{branch}/schemas/glean/glean/glean-min.1.schema.json"
 33)
 34
 35
 36class GleanPing(GenericPing):
 37    probes_url_template = GenericPing.probe_info_base_url + "/glean/{}/metrics"
 38    ping_url_template = GenericPing.probe_info_base_url + "/glean/{}/pings"
 39    repos_url = GenericPing.probe_info_base_url + "/glean/repositories"
 40    dependencies_url_template = (
 41        GenericPing.probe_info_base_url + "/glean/{}/dependencies"
 42    )
 43
 44    default_dependencies = ["glean-core"]
 45
 46    with open(BUG_1737656_TXT, "r") as f:
 47        bug_1737656_affected_tables = [
 48            line.strip() for line in f.readlines() if line.strip()
 49        ]
 50
 51    def __init__(self, repo, **kwargs):  # TODO: Make env-url optional
 52        self.repo = repo
 53        self.repo_name = repo["name"]
 54        self.app_id = repo["app_id"]
 55        super().__init__(
 56            DEFAULT_SCHEMA_URL,
 57            DEFAULT_SCHEMA_URL,
 58            self.probes_url_template.format(self.repo_name),
 59            **kwargs,
 60        )
 61
 62    def get_schema(self, generic_schema=False) -> Schema:
 63        """
 64        Fetch schema via URL.
 65
 66        Unless *generic_schema* is set to true, this function makes some modifications
 67        to allow some workarounds for proper injection of metrics.
 68        """
 69        schema = super().get_schema()
 70        if generic_schema:
 71            return schema
 72
 73        # We need to inject placeholders for the url2, text2, etc. types as part
 74        # of mitigation for https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
 75        for metric_name in ["labeled_rate", "jwe", "url", "text"]:
 76            metric1 = schema.get(
 77                ("properties", "metrics", "properties", metric_name)
 78            ).copy()
 79            metric1 = schema.set_schema_elem(
 80                ("properties", "metrics", "properties", metric_name + "2"),
 81                metric1,
 82            )
 83
 84        return schema
 85
 86    def get_dependencies(self):
 87        # Get all of the library dependencies for the application that
 88        # are also known about in the repositories file.
 89
 90        # The dependencies are specified using library names, but we need to
 91        # map those back to the name of the repository in the repository file.
 92        try:
 93            dependencies = self._get_json(
 94                self.dependencies_url_template.format(self.repo_name)
 95            )
 96        except HTTPError:
 97            logging.info(f"For {self.repo_name}, using default Glean dependencies")
 98            return self.default_dependencies
 99
100        dependency_library_names = list(dependencies.keys())
101
102        repos = GleanPing._get_json(GleanPing.repos_url)
103        repos_by_dependency_name = {}
104        for repo in repos:
105            for library_name in repo.get("library_names", []):
106                repos_by_dependency_name[library_name] = repo["name"]
107
108        dependencies = []
109        for name in dependency_library_names:
110            if name in repos_by_dependency_name:
111                dependencies.append(repos_by_dependency_name[name])
112
113        if len(dependencies) == 0:
114            logging.info(f"For {self.repo_name}, using default Glean dependencies")
115            return self.default_dependencies
116
117        logging.info(f"For {self.repo_name}, found Glean dependencies: {dependencies}")
118        return dependencies
119
120    def get_probes(self) -> List[GleanProbe]:
121        data = self._get_json(self.probes_url)
122        probes = list(data.items())
123
124        for dependency in self.get_dependencies():
125            dependency_probes = self._get_json(
126                self.probes_url_template.format(dependency)
127            )
128            probes += list(dependency_probes.items())
129
130        pings = self.get_pings()
131
132        processed = []
133        for _id, defn in probes:
134            probe = GleanProbe(_id, defn, pings=pings)
135            processed.append(probe)
136
137            # Manual handling of incompatible schema changes
138            issue_118_affected = {
139                "fenix",
140                "fenix-nightly",
141                "firefox-android-nightly",
142                "firefox-android-beta",
143                "firefox-android-release",
144            }
145            if (
146                self.repo_name in issue_118_affected
147                and probe.get_name() == "installation.timestamp"
148            ):
149                logging.info(f"Writing column {probe.get_name()} for compatibility.")
150                # See: https://github.com/mozilla/mozilla-schema-generator/issues/118
151                # Search through history for the "string" type and add a copy of
152                # the probe at that time in history. The changepoint signifies
153                # this event.
154                changepoint_index = 0
155                for definition in probe.definition_history:
156                    if definition["type"] != probe.get_type():
157                        break
158                    changepoint_index += 1
159                # Modify the definition with the truncated history.
160                hist_defn = defn.copy()
161                hist_defn[probe.history_key] = probe.definition_history[
162                    changepoint_index:
163                ]
164                hist_defn["type"] = hist_defn[probe.history_key][0]["type"]
165                incompatible_probe_type = GleanProbe(_id, hist_defn, pings=pings)
166                processed.append(incompatible_probe_type)
167
168            # Handling probe type changes (Bug 1870317)
169            probe_types = {hist["type"] for hist in defn[probe.history_key]}
170            if len(probe_types) > 1:
171                # The probe type changed at some point in history.
172                # Create schema entry for each type.
173                hist_defn = defn.copy()
174
175                # No new entry needs to be created for the current probe type
176                probe_types.remove(defn["type"])
177
178                for hist in hist_defn[probe.history_key]:
179                    # Create a new entry for a historic type
180                    if hist["type"] in probe_types:
181                        hist_defn["type"] = hist["type"]
182                        probe = GleanProbe(_id, hist_defn, pings=pings)
183                        processed.append(probe)
184
185                        # Keep track of the types entries were already created for
186                        probe_types.remove(hist["type"])
187
188        return processed
189
190    def _get_ping_data(self) -> Dict[str, Dict]:
191        url = self.ping_url_template.format(self.repo_name)
192        ping_data = GleanPing._get_json(url)
193        for dependency in self.get_dependencies():
194            dependency_pings = self._get_json(self.ping_url_template.format(dependency))
195            ping_data.update(dependency_pings)
196        return ping_data
197
198    def _get_ping_data_without_dependencies(self) -> Dict[str, Dict]:
199        url = self.ping_url_template.format(self.repo_name)
200        ping_data = GleanPing._get_json(url)
201        return ping_data
202
203    def _get_dependency_pings(self, dependency):
204        return self._get_json(self.ping_url_template.format(dependency))
205
206    def get_pings(self) -> Set[str]:
207        return self._get_ping_data().keys()
208
209    @staticmethod
210    def apply_default_metadata(ping_metadata, default_metadata):
211        """apply_default_metadata recurses down into dicts nested
212        to an arbitrary depth, updating keys. The ``default_metadata`` is merged into
213        ``ping_metadata``.
214        :param ping_metadata: dict onto which the merge is executed
215        :param default_metadata: dct merged into ping_metadata
216        :return: None
217        """
218        for k, v in default_metadata.items():
219            if (
220                k in ping_metadata
221                and isinstance(ping_metadata[k], dict)
222                and isinstance(default_metadata[k], dict)
223            ):
224                GleanPing.apply_default_metadata(ping_metadata[k], default_metadata[k])
225            else:
226                ping_metadata[k] = default_metadata[k]
227
228    def _get_ping_data_and_dependencies_with_default_metadata(self) -> Dict[str, Dict]:
229        # Get the ping data with the pipeline metadata
230        ping_data = self._get_ping_data_without_dependencies()
231
232        # The ping endpoint for the dependency pings does not include any repo defined
233        # moz_pipeline_metadata_defaults so they need to be applied here.
234
235        # 1.  Get repo and pipeline default metadata.
236        repos = self.get_repos()
237        current_repo = next((x for x in repos if x.get("app_id") == self.app_id), {})
238        default_metadata = current_repo.get("moz_pipeline_metadata_defaults", {})
239
240        # 2.  Apply the default metadata to each dependency defined ping.
241
242        # Apply app-level metadata to pings defined in dependencies
243        app_metadata = current_repo.get("moz_pipeline_metadata", {})
244
245        for dependency in self.get_dependencies():
246            dependency_pings = self._get_dependency_pings(dependency)
247            for dependency_ping in dependency_pings.values():
248                # Although it is counter intuitive to apply the default metadata on top of the
249                # existing dependency ping metadata it does set the repo specific value for
250                # bq_dataset_family instead of using the dependency id for the bq_dataset_family
251                # value.
252                GleanPing.apply_default_metadata(
253                    dependency_ping.get("moz_pipeline_metadata"), default_metadata
254                )
255                # app-level ping properties take priority over the app defaults
256                metadata_override = app_metadata.get(dependency_ping["name"])
257                if metadata_override is not None:
258                    GleanPing.apply_default_metadata(
259                        dependency_ping.get("moz_pipeline_metadata"), metadata_override
260                    )
261            ping_data.update(dependency_pings)
262
263        return ping_data
264
265    @staticmethod
266    def reorder_metadata(metadata):
267        desired_order_list = [
268            "bq_dataset_family",
269            "bq_table",
270            "bq_metadata_format",
271            "include_info_sections",
272            "submission_timestamp_granularity",
273            "expiration_policy",
274            "override_attributes",
275            "jwe_mappings",
276        ]
277        reordered_metadata = {
278            k: metadata[k] for k in desired_order_list if k in metadata
279        }
280
281        # re-order jwe-mappings
282        desired_order_list = ["source_field_path", "decrypted_field_path"]
283        jwe_mapping_metadata = reordered_metadata.get("jwe_mappings")
284        if jwe_mapping_metadata:
285            reordered_jwe_mapping_metadata = []
286            for mapping in jwe_mapping_metadata:
287                reordered_jwe_mapping_metadata.append(
288                    {k: mapping[k] for k in desired_order_list if k in mapping}
289                )
290            reordered_metadata["jwe_mappings"] = reordered_jwe_mapping_metadata
291
292        # future proofing, in case there are other fields added at the ping top level
293        # add them to the end.
294        leftovers = {k: metadata[k] for k in set(metadata) - set(reordered_metadata)}
295        reordered_metadata = {**reordered_metadata, **leftovers}
296        return reordered_metadata
297
298    def get_pings_and_pipeline_metadata(self) -> Dict[str, Dict]:
299        pings = self._get_ping_data_and_dependencies_with_default_metadata()
300        for ping_name, ping_data in pings.items():
301            metadata = ping_data.get("moz_pipeline_metadata")
302            if not metadata:
303                continue
304            metadata["include_info_sections"] = self._is_field_included(
305                ping_data, "include_info_sections", consider_all_history=False
306            )
307            metadata["include_client_id"] = self._is_field_included(
308                ping_data, "include_client_id"
309            )
310
311            # While technically unnecessary, the dictionary elements are re-ordered to match the
312            # currently deployed order and used to verify no difference in output.
313            pings[ping_name] = GleanPing.reorder_metadata(metadata)
314        return pings
315
316    def get_ping_descriptions(self) -> Dict[str, str]:
317        return {
318            k: v["history"][-1]["description"] for k, v in self._get_ping_data().items()
319        }
320
321    @staticmethod
322    def _is_field_included(ping_data, field_name, consider_all_history=True) -> bool:
323        """Return false if the field exists and is false.
324
325        If `consider_all_history` is False, then only check the latest value in the ping history.
326
327        Otherwise, if the field is not found or true in one or more history entries,
328        true is returned.
329        """
330
331        # Default to true if not specified.
332        if "history" not in ping_data or len(ping_data["history"]) == 0:
333            return True
334
335        # Check if at some point in the past the field has already been deployed.
336        # And if the caller of this method wants to consider this history of the field.
337        # Keep them in the schema, even if the field has changed as
338        # removing fields is currently not supported.
339        # See https://bugzilla.mozilla.org/show_bug.cgi?id=1898105
340        # and https://bugzilla.mozilla.org/show_bug.cgi?id=1898105#c10
341        ping_history: list
342        if consider_all_history:
343            ping_history = ping_data["history"]
344        else:
345            ping_history = [ping_data["history"][-1]]
346        for history in ping_history:
347            if field_name not in history or history[field_name]:
348                return True
349
350        # The ping was created with include_info_sections = False. The fields can be excluded.
351        return False
352
353    def set_schema_url(self, metadata):
354        """
355        Switch between the glean-min and glean schemas if the ping does not require
356        info sections as specified in the parsed ping info in probe scraper.
357        """
358        if not metadata["include_info_sections"]:
359            self.schema_url = MINIMUM_SCHEMA_URL.format(branch=self.branch_name)
360        else:
361            self.schema_url = DEFAULT_SCHEMA_URL.format(branch=self.branch_name)
362
363    def generate_schema(self, config, generic_schema=False) -> Dict[str, Schema]:
364        pings = self.get_pings_and_pipeline_metadata()
365        schemas = {}
366
367        for ping, pipeline_meta in pings.items():
368            matchers = {
369                loc: m.clone(new_table_group=ping) for loc, m in config.matchers.items()
370            }
371
372            # Four newly introduced metric types were incorrectly deployed
373            # as repeated key/value structs in all Glean ping tables existing prior
374            # to November 2021. We maintain the incorrect fields for existing tables
375            # by disabling the associated matchers.
376            # Note that each of these types now has a "2" matcher ("text2", "url2", etc.)
377            # defined that will allow metrics of these types to be injected into proper
378            # structs. The gcp-ingestion repository includes logic to rewrite these
379            # metrics under the "2" names.
380            # See https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
381            bq_identifier = "{bq_dataset_family}.{bq_table}".format(**pipeline_meta)
382            if bq_identifier in self.bug_1737656_affected_tables:
383                matchers = {
384                    loc: m
385                    for loc, m in matchers.items()
386                    if not m.matcher.get("bug_1737656_affected")
387                }
388
389            for matcher in matchers.values():
390                matcher.matcher["send_in_pings"]["contains"] = ping
391            new_config = Config(ping, matchers=matchers)
392
393            defaults = {"mozPipelineMetadata": pipeline_meta}
394
395            # Adjust the schema path if the ping does not require info sections
396            self.set_schema_url(pipeline_meta)
397            if generic_schema:  # Use the generic glean ping schema
398                schema = self.get_schema(generic_schema=True)
399                schema.schema.update(defaults)
400                schemas[new_config.name] = schema
401            else:
402                generated = super().generate_schema(new_config)
403                for schema in generated.values():
404                    # We want to override each individual key with assembled defaults,
405                    # but keep values _inside_ them if they have been set in the schemas.
406                    for key, value in defaults.items():
407                        if key not in schema.schema:
408                            schema.schema[key] = {}
409                        schema.schema[key].update(value)
410                schemas.update(generated)
411
412        return schemas
413
414    @staticmethod
415    def get_repos():
416        """
417        Retrieve metadata for all non-library Glean repositories
418        """
419        repos = GleanPing._get_json(GleanPing.repos_url)
420        return [repo for repo in repos if "library_names" not in repo]
ROOT_DIR = PosixPath('/home/circleci/project/mozilla_schema_generator')
BUG_1737656_TXT = PosixPath('/home/circleci/project/mozilla_schema_generator/configs/bug_1737656_affected.txt')
logger = <Logger mozilla_schema_generator.glean_ping (WARNING)>
DEFAULT_SCHEMA_URL = 'https://raw.githubusercontent.com/mozilla-services/mozilla-pipeline-schemas/{branch}/schemas/glean/glean/glean.1.schema.json'
MINIMUM_SCHEMA_URL = 'https://raw.githubusercontent.com/mozilla-services/mozilla-pipeline-schemas/{branch}/schemas/glean/glean/glean-min.1.schema.json'
 37class GleanPing(GenericPing):
 38    probes_url_template = GenericPing.probe_info_base_url + "/glean/{}/metrics"
 39    ping_url_template = GenericPing.probe_info_base_url + "/glean/{}/pings"
 40    repos_url = GenericPing.probe_info_base_url + "/glean/repositories"
 41    dependencies_url_template = (
 42        GenericPing.probe_info_base_url + "/glean/{}/dependencies"
 43    )
 44
 45    default_dependencies = ["glean-core"]
 46
 47    with open(BUG_1737656_TXT, "r") as f:
 48        bug_1737656_affected_tables = [
 49            line.strip() for line in f.readlines() if line.strip()
 50        ]
 51
 52    def __init__(self, repo, **kwargs):  # TODO: Make env-url optional
 53        self.repo = repo
 54        self.repo_name = repo["name"]
 55        self.app_id = repo["app_id"]
 56        super().__init__(
 57            DEFAULT_SCHEMA_URL,
 58            DEFAULT_SCHEMA_URL,
 59            self.probes_url_template.format(self.repo_name),
 60            **kwargs,
 61        )
 62
 63    def get_schema(self, generic_schema=False) -> Schema:
 64        """
 65        Fetch schema via URL.
 66
 67        Unless *generic_schema* is set to true, this function makes some modifications
 68        to allow some workarounds for proper injection of metrics.
 69        """
 70        schema = super().get_schema()
 71        if generic_schema:
 72            return schema
 73
 74        # We need to inject placeholders for the url2, text2, etc. types as part
 75        # of mitigation for https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
 76        for metric_name in ["labeled_rate", "jwe", "url", "text"]:
 77            metric1 = schema.get(
 78                ("properties", "metrics", "properties", metric_name)
 79            ).copy()
 80            metric1 = schema.set_schema_elem(
 81                ("properties", "metrics", "properties", metric_name + "2"),
 82                metric1,
 83            )
 84
 85        return schema
 86
 87    def get_dependencies(self):
 88        # Get all of the library dependencies for the application that
 89        # are also known about in the repositories file.
 90
 91        # The dependencies are specified using library names, but we need to
 92        # map those back to the name of the repository in the repository file.
 93        try:
 94            dependencies = self._get_json(
 95                self.dependencies_url_template.format(self.repo_name)
 96            )
 97        except HTTPError:
 98            logging.info(f"For {self.repo_name}, using default Glean dependencies")
 99            return self.default_dependencies
100
101        dependency_library_names = list(dependencies.keys())
102
103        repos = GleanPing._get_json(GleanPing.repos_url)
104        repos_by_dependency_name = {}
105        for repo in repos:
106            for library_name in repo.get("library_names", []):
107                repos_by_dependency_name[library_name] = repo["name"]
108
109        dependencies = []
110        for name in dependency_library_names:
111            if name in repos_by_dependency_name:
112                dependencies.append(repos_by_dependency_name[name])
113
114        if len(dependencies) == 0:
115            logging.info(f"For {self.repo_name}, using default Glean dependencies")
116            return self.default_dependencies
117
118        logging.info(f"For {self.repo_name}, found Glean dependencies: {dependencies}")
119        return dependencies
120
121    def get_probes(self) -> List[GleanProbe]:
122        data = self._get_json(self.probes_url)
123        probes = list(data.items())
124
125        for dependency in self.get_dependencies():
126            dependency_probes = self._get_json(
127                self.probes_url_template.format(dependency)
128            )
129            probes += list(dependency_probes.items())
130
131        pings = self.get_pings()
132
133        processed = []
134        for _id, defn in probes:
135            probe = GleanProbe(_id, defn, pings=pings)
136            processed.append(probe)
137
138            # Manual handling of incompatible schema changes
139            issue_118_affected = {
140                "fenix",
141                "fenix-nightly",
142                "firefox-android-nightly",
143                "firefox-android-beta",
144                "firefox-android-release",
145            }
146            if (
147                self.repo_name in issue_118_affected
148                and probe.get_name() == "installation.timestamp"
149            ):
150                logging.info(f"Writing column {probe.get_name()} for compatibility.")
151                # See: https://github.com/mozilla/mozilla-schema-generator/issues/118
152                # Search through history for the "string" type and add a copy of
153                # the probe at that time in history. The changepoint signifies
154                # this event.
155                changepoint_index = 0
156                for definition in probe.definition_history:
157                    if definition["type"] != probe.get_type():
158                        break
159                    changepoint_index += 1
160                # Modify the definition with the truncated history.
161                hist_defn = defn.copy()
162                hist_defn[probe.history_key] = probe.definition_history[
163                    changepoint_index:
164                ]
165                hist_defn["type"] = hist_defn[probe.history_key][0]["type"]
166                incompatible_probe_type = GleanProbe(_id, hist_defn, pings=pings)
167                processed.append(incompatible_probe_type)
168
169            # Handling probe type changes (Bug 1870317)
170            probe_types = {hist["type"] for hist in defn[probe.history_key]}
171            if len(probe_types) > 1:
172                # The probe type changed at some point in history.
173                # Create schema entry for each type.
174                hist_defn = defn.copy()
175
176                # No new entry needs to be created for the current probe type
177                probe_types.remove(defn["type"])
178
179                for hist in hist_defn[probe.history_key]:
180                    # Create a new entry for a historic type
181                    if hist["type"] in probe_types:
182                        hist_defn["type"] = hist["type"]
183                        probe = GleanProbe(_id, hist_defn, pings=pings)
184                        processed.append(probe)
185
186                        # Keep track of the types entries were already created for
187                        probe_types.remove(hist["type"])
188
189        return processed
190
191    def _get_ping_data(self) -> Dict[str, Dict]:
192        url = self.ping_url_template.format(self.repo_name)
193        ping_data = GleanPing._get_json(url)
194        for dependency in self.get_dependencies():
195            dependency_pings = self._get_json(self.ping_url_template.format(dependency))
196            ping_data.update(dependency_pings)
197        return ping_data
198
199    def _get_ping_data_without_dependencies(self) -> Dict[str, Dict]:
200        url = self.ping_url_template.format(self.repo_name)
201        ping_data = GleanPing._get_json(url)
202        return ping_data
203
204    def _get_dependency_pings(self, dependency):
205        return self._get_json(self.ping_url_template.format(dependency))
206
207    def get_pings(self) -> Set[str]:
208        return self._get_ping_data().keys()
209
210    @staticmethod
211    def apply_default_metadata(ping_metadata, default_metadata):
212        """apply_default_metadata recurses down into dicts nested
213        to an arbitrary depth, updating keys. The ``default_metadata`` is merged into
214        ``ping_metadata``.
215        :param ping_metadata: dict onto which the merge is executed
216        :param default_metadata: dct merged into ping_metadata
217        :return: None
218        """
219        for k, v in default_metadata.items():
220            if (
221                k in ping_metadata
222                and isinstance(ping_metadata[k], dict)
223                and isinstance(default_metadata[k], dict)
224            ):
225                GleanPing.apply_default_metadata(ping_metadata[k], default_metadata[k])
226            else:
227                ping_metadata[k] = default_metadata[k]
228
229    def _get_ping_data_and_dependencies_with_default_metadata(self) -> Dict[str, Dict]:
230        # Get the ping data with the pipeline metadata
231        ping_data = self._get_ping_data_without_dependencies()
232
233        # The ping endpoint for the dependency pings does not include any repo defined
234        # moz_pipeline_metadata_defaults so they need to be applied here.
235
236        # 1.  Get repo and pipeline default metadata.
237        repos = self.get_repos()
238        current_repo = next((x for x in repos if x.get("app_id") == self.app_id), {})
239        default_metadata = current_repo.get("moz_pipeline_metadata_defaults", {})
240
241        # 2.  Apply the default metadata to each dependency defined ping.
242
243        # Apply app-level metadata to pings defined in dependencies
244        app_metadata = current_repo.get("moz_pipeline_metadata", {})
245
246        for dependency in self.get_dependencies():
247            dependency_pings = self._get_dependency_pings(dependency)
248            for dependency_ping in dependency_pings.values():
249                # Although it is counter intuitive to apply the default metadata on top of the
250                # existing dependency ping metadata it does set the repo specific value for
251                # bq_dataset_family instead of using the dependency id for the bq_dataset_family
252                # value.
253                GleanPing.apply_default_metadata(
254                    dependency_ping.get("moz_pipeline_metadata"), default_metadata
255                )
256                # app-level ping properties take priority over the app defaults
257                metadata_override = app_metadata.get(dependency_ping["name"])
258                if metadata_override is not None:
259                    GleanPing.apply_default_metadata(
260                        dependency_ping.get("moz_pipeline_metadata"), metadata_override
261                    )
262            ping_data.update(dependency_pings)
263
264        return ping_data
265
266    @staticmethod
267    def reorder_metadata(metadata):
268        desired_order_list = [
269            "bq_dataset_family",
270            "bq_table",
271            "bq_metadata_format",
272            "include_info_sections",
273            "submission_timestamp_granularity",
274            "expiration_policy",
275            "override_attributes",
276            "jwe_mappings",
277        ]
278        reordered_metadata = {
279            k: metadata[k] for k in desired_order_list if k in metadata
280        }
281
282        # re-order jwe-mappings
283        desired_order_list = ["source_field_path", "decrypted_field_path"]
284        jwe_mapping_metadata = reordered_metadata.get("jwe_mappings")
285        if jwe_mapping_metadata:
286            reordered_jwe_mapping_metadata = []
287            for mapping in jwe_mapping_metadata:
288                reordered_jwe_mapping_metadata.append(
289                    {k: mapping[k] for k in desired_order_list if k in mapping}
290                )
291            reordered_metadata["jwe_mappings"] = reordered_jwe_mapping_metadata
292
293        # future proofing, in case there are other fields added at the ping top level
294        # add them to the end.
295        leftovers = {k: metadata[k] for k in set(metadata) - set(reordered_metadata)}
296        reordered_metadata = {**reordered_metadata, **leftovers}
297        return reordered_metadata
298
299    def get_pings_and_pipeline_metadata(self) -> Dict[str, Dict]:
300        pings = self._get_ping_data_and_dependencies_with_default_metadata()
301        for ping_name, ping_data in pings.items():
302            metadata = ping_data.get("moz_pipeline_metadata")
303            if not metadata:
304                continue
305            metadata["include_info_sections"] = self._is_field_included(
306                ping_data, "include_info_sections", consider_all_history=False
307            )
308            metadata["include_client_id"] = self._is_field_included(
309                ping_data, "include_client_id"
310            )
311
312            # While technically unnecessary, the dictionary elements are re-ordered to match the
313            # currently deployed order and used to verify no difference in output.
314            pings[ping_name] = GleanPing.reorder_metadata(metadata)
315        return pings
316
317    def get_ping_descriptions(self) -> Dict[str, str]:
318        return {
319            k: v["history"][-1]["description"] for k, v in self._get_ping_data().items()
320        }
321
322    @staticmethod
323    def _is_field_included(ping_data, field_name, consider_all_history=True) -> bool:
324        """Return false if the field exists and is false.
325
326        If `consider_all_history` is False, then only check the latest value in the ping history.
327
328        Otherwise, if the field is not found or true in one or more history entries,
329        true is returned.
330        """
331
332        # Default to true if not specified.
333        if "history" not in ping_data or len(ping_data["history"]) == 0:
334            return True
335
336        # Check if at some point in the past the field has already been deployed.
337        # And if the caller of this method wants to consider this history of the field.
338        # Keep them in the schema, even if the field has changed as
339        # removing fields is currently not supported.
340        # See https://bugzilla.mozilla.org/show_bug.cgi?id=1898105
341        # and https://bugzilla.mozilla.org/show_bug.cgi?id=1898105#c10
342        ping_history: list
343        if consider_all_history:
344            ping_history = ping_data["history"]
345        else:
346            ping_history = [ping_data["history"][-1]]
347        for history in ping_history:
348            if field_name not in history or history[field_name]:
349                return True
350
351        # The ping was created with include_info_sections = False. The fields can be excluded.
352        return False
353
354    def set_schema_url(self, metadata):
355        """
356        Switch between the glean-min and glean schemas if the ping does not require
357        info sections as specified in the parsed ping info in probe scraper.
358        """
359        if not metadata["include_info_sections"]:
360            self.schema_url = MINIMUM_SCHEMA_URL.format(branch=self.branch_name)
361        else:
362            self.schema_url = DEFAULT_SCHEMA_URL.format(branch=self.branch_name)
363
364    def generate_schema(self, config, generic_schema=False) -> Dict[str, Schema]:
365        pings = self.get_pings_and_pipeline_metadata()
366        schemas = {}
367
368        for ping, pipeline_meta in pings.items():
369            matchers = {
370                loc: m.clone(new_table_group=ping) for loc, m in config.matchers.items()
371            }
372
373            # Four newly introduced metric types were incorrectly deployed
374            # as repeated key/value structs in all Glean ping tables existing prior
375            # to November 2021. We maintain the incorrect fields for existing tables
376            # by disabling the associated matchers.
377            # Note that each of these types now has a "2" matcher ("text2", "url2", etc.)
378            # defined that will allow metrics of these types to be injected into proper
379            # structs. The gcp-ingestion repository includes logic to rewrite these
380            # metrics under the "2" names.
381            # See https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
382            bq_identifier = "{bq_dataset_family}.{bq_table}".format(**pipeline_meta)
383            if bq_identifier in self.bug_1737656_affected_tables:
384                matchers = {
385                    loc: m
386                    for loc, m in matchers.items()
387                    if not m.matcher.get("bug_1737656_affected")
388                }
389
390            for matcher in matchers.values():
391                matcher.matcher["send_in_pings"]["contains"] = ping
392            new_config = Config(ping, matchers=matchers)
393
394            defaults = {"mozPipelineMetadata": pipeline_meta}
395
396            # Adjust the schema path if the ping does not require info sections
397            self.set_schema_url(pipeline_meta)
398            if generic_schema:  # Use the generic glean ping schema
399                schema = self.get_schema(generic_schema=True)
400                schema.schema.update(defaults)
401                schemas[new_config.name] = schema
402            else:
403                generated = super().generate_schema(new_config)
404                for schema in generated.values():
405                    # We want to override each individual key with assembled defaults,
406                    # but keep values _inside_ them if they have been set in the schemas.
407                    for key, value in defaults.items():
408                        if key not in schema.schema:
409                            schema.schema[key] = {}
410                        schema.schema[key].update(value)
411                schemas.update(generated)
412
413        return schemas
414
415    @staticmethod
416    def get_repos():
417        """
418        Retrieve metadata for all non-library Glean repositories
419        """
420        repos = GleanPing._get_json(GleanPing.repos_url)
421        return [repo for repo in repos if "library_names" not in repo]
GleanPing(repo, **kwargs)
52    def __init__(self, repo, **kwargs):  # TODO: Make env-url optional
53        self.repo = repo
54        self.repo_name = repo["name"]
55        self.app_id = repo["app_id"]
56        super().__init__(
57            DEFAULT_SCHEMA_URL,
58            DEFAULT_SCHEMA_URL,
59            self.probes_url_template.format(self.repo_name),
60            **kwargs,
61        )
probes_url_template = 'https://probeinfo.telemetry.mozilla.org/glean/{}/metrics'
ping_url_template = 'https://probeinfo.telemetry.mozilla.org/glean/{}/pings'
repos_url = 'https://probeinfo.telemetry.mozilla.org/glean/repositories'
dependencies_url_template = 'https://probeinfo.telemetry.mozilla.org/glean/{}/dependencies'
default_dependencies = ['glean-core']
repo
repo_name
app_id
def get_schema(self, generic_schema=False) -> mozilla_schema_generator.schema.Schema:
63    def get_schema(self, generic_schema=False) -> Schema:
64        """
65        Fetch schema via URL.
66
67        Unless *generic_schema* is set to true, this function makes some modifications
68        to allow some workarounds for proper injection of metrics.
69        """
70        schema = super().get_schema()
71        if generic_schema:
72            return schema
73
74        # We need to inject placeholders for the url2, text2, etc. types as part
75        # of mitigation for https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
76        for metric_name in ["labeled_rate", "jwe", "url", "text"]:
77            metric1 = schema.get(
78                ("properties", "metrics", "properties", metric_name)
79            ).copy()
80            metric1 = schema.set_schema_elem(
81                ("properties", "metrics", "properties", metric_name + "2"),
82                metric1,
83            )
84
85        return schema

Fetch schema via URL.

Unless generic_schema is set to true, this function makes some modifications to allow some workarounds for proper injection of metrics.

def get_dependencies(self):
 87    def get_dependencies(self):
 88        # Get all of the library dependencies for the application that
 89        # are also known about in the repositories file.
 90
 91        # The dependencies are specified using library names, but we need to
 92        # map those back to the name of the repository in the repository file.
 93        try:
 94            dependencies = self._get_json(
 95                self.dependencies_url_template.format(self.repo_name)
 96            )
 97        except HTTPError:
 98            logging.info(f"For {self.repo_name}, using default Glean dependencies")
 99            return self.default_dependencies
100
101        dependency_library_names = list(dependencies.keys())
102
103        repos = GleanPing._get_json(GleanPing.repos_url)
104        repos_by_dependency_name = {}
105        for repo in repos:
106            for library_name in repo.get("library_names", []):
107                repos_by_dependency_name[library_name] = repo["name"]
108
109        dependencies = []
110        for name in dependency_library_names:
111            if name in repos_by_dependency_name:
112                dependencies.append(repos_by_dependency_name[name])
113
114        if len(dependencies) == 0:
115            logging.info(f"For {self.repo_name}, using default Glean dependencies")
116            return self.default_dependencies
117
118        logging.info(f"For {self.repo_name}, found Glean dependencies: {dependencies}")
119        return dependencies
def get_probes(self) -> List[mozilla_schema_generator.probes.GleanProbe]:
121    def get_probes(self) -> List[GleanProbe]:
122        data = self._get_json(self.probes_url)
123        probes = list(data.items())
124
125        for dependency in self.get_dependencies():
126            dependency_probes = self._get_json(
127                self.probes_url_template.format(dependency)
128            )
129            probes += list(dependency_probes.items())
130
131        pings = self.get_pings()
132
133        processed = []
134        for _id, defn in probes:
135            probe = GleanProbe(_id, defn, pings=pings)
136            processed.append(probe)
137
138            # Manual handling of incompatible schema changes
139            issue_118_affected = {
140                "fenix",
141                "fenix-nightly",
142                "firefox-android-nightly",
143                "firefox-android-beta",
144                "firefox-android-release",
145            }
146            if (
147                self.repo_name in issue_118_affected
148                and probe.get_name() == "installation.timestamp"
149            ):
150                logging.info(f"Writing column {probe.get_name()} for compatibility.")
151                # See: https://github.com/mozilla/mozilla-schema-generator/issues/118
152                # Search through history for the "string" type and add a copy of
153                # the probe at that time in history. The changepoint signifies
154                # this event.
155                changepoint_index = 0
156                for definition in probe.definition_history:
157                    if definition["type"] != probe.get_type():
158                        break
159                    changepoint_index += 1
160                # Modify the definition with the truncated history.
161                hist_defn = defn.copy()
162                hist_defn[probe.history_key] = probe.definition_history[
163                    changepoint_index:
164                ]
165                hist_defn["type"] = hist_defn[probe.history_key][0]["type"]
166                incompatible_probe_type = GleanProbe(_id, hist_defn, pings=pings)
167                processed.append(incompatible_probe_type)
168
169            # Handling probe type changes (Bug 1870317)
170            probe_types = {hist["type"] for hist in defn[probe.history_key]}
171            if len(probe_types) > 1:
172                # The probe type changed at some point in history.
173                # Create schema entry for each type.
174                hist_defn = defn.copy()
175
176                # No new entry needs to be created for the current probe type
177                probe_types.remove(defn["type"])
178
179                for hist in hist_defn[probe.history_key]:
180                    # Create a new entry for a historic type
181                    if hist["type"] in probe_types:
182                        hist_defn["type"] = hist["type"]
183                        probe = GleanProbe(_id, hist_defn, pings=pings)
184                        processed.append(probe)
185
186                        # Keep track of the types entries were already created for
187                        probe_types.remove(hist["type"])
188
189        return processed
def get_pings(self) -> Set[str]:
207    def get_pings(self) -> Set[str]:
208        return self._get_ping_data().keys()
@staticmethod
def apply_default_metadata(ping_metadata, default_metadata):
210    @staticmethod
211    def apply_default_metadata(ping_metadata, default_metadata):
212        """apply_default_metadata recurses down into dicts nested
213        to an arbitrary depth, updating keys. The ``default_metadata`` is merged into
214        ``ping_metadata``.
215        :param ping_metadata: dict onto which the merge is executed
216        :param default_metadata: dct merged into ping_metadata
217        :return: None
218        """
219        for k, v in default_metadata.items():
220            if (
221                k in ping_metadata
222                and isinstance(ping_metadata[k], dict)
223                and isinstance(default_metadata[k], dict)
224            ):
225                GleanPing.apply_default_metadata(ping_metadata[k], default_metadata[k])
226            else:
227                ping_metadata[k] = default_metadata[k]

apply_default_metadata recurses down into dicts nested to an arbitrary depth, updating keys. The default_metadata is merged into ping_metadata.

Parameters
  • ping_metadata: dict onto which the merge is executed
  • default_metadata: dct merged into ping_metadata
Returns

None

@staticmethod
def reorder_metadata(metadata):
266    @staticmethod
267    def reorder_metadata(metadata):
268        desired_order_list = [
269            "bq_dataset_family",
270            "bq_table",
271            "bq_metadata_format",
272            "include_info_sections",
273            "submission_timestamp_granularity",
274            "expiration_policy",
275            "override_attributes",
276            "jwe_mappings",
277        ]
278        reordered_metadata = {
279            k: metadata[k] for k in desired_order_list if k in metadata
280        }
281
282        # re-order jwe-mappings
283        desired_order_list = ["source_field_path", "decrypted_field_path"]
284        jwe_mapping_metadata = reordered_metadata.get("jwe_mappings")
285        if jwe_mapping_metadata:
286            reordered_jwe_mapping_metadata = []
287            for mapping in jwe_mapping_metadata:
288                reordered_jwe_mapping_metadata.append(
289                    {k: mapping[k] for k in desired_order_list if k in mapping}
290                )
291            reordered_metadata["jwe_mappings"] = reordered_jwe_mapping_metadata
292
293        # future proofing, in case there are other fields added at the ping top level
294        # add them to the end.
295        leftovers = {k: metadata[k] for k in set(metadata) - set(reordered_metadata)}
296        reordered_metadata = {**reordered_metadata, **leftovers}
297        return reordered_metadata
def get_pings_and_pipeline_metadata(self) -> Dict[str, Dict]:
299    def get_pings_and_pipeline_metadata(self) -> Dict[str, Dict]:
300        pings = self._get_ping_data_and_dependencies_with_default_metadata()
301        for ping_name, ping_data in pings.items():
302            metadata = ping_data.get("moz_pipeline_metadata")
303            if not metadata:
304                continue
305            metadata["include_info_sections"] = self._is_field_included(
306                ping_data, "include_info_sections", consider_all_history=False
307            )
308            metadata["include_client_id"] = self._is_field_included(
309                ping_data, "include_client_id"
310            )
311
312            # While technically unnecessary, the dictionary elements are re-ordered to match the
313            # currently deployed order and used to verify no difference in output.
314            pings[ping_name] = GleanPing.reorder_metadata(metadata)
315        return pings
def get_ping_descriptions(self) -> Dict[str, str]:
317    def get_ping_descriptions(self) -> Dict[str, str]:
318        return {
319            k: v["history"][-1]["description"] for k, v in self._get_ping_data().items()
320        }
def set_schema_url(self, metadata):
354    def set_schema_url(self, metadata):
355        """
356        Switch between the glean-min and glean schemas if the ping does not require
357        info sections as specified in the parsed ping info in probe scraper.
358        """
359        if not metadata["include_info_sections"]:
360            self.schema_url = MINIMUM_SCHEMA_URL.format(branch=self.branch_name)
361        else:
362            self.schema_url = DEFAULT_SCHEMA_URL.format(branch=self.branch_name)

Switch between the glean-min and glean schemas if the ping does not require info sections as specified in the parsed ping info in probe scraper.

def generate_schema( self, config, generic_schema=False) -> Dict[str, mozilla_schema_generator.schema.Schema]:
364    def generate_schema(self, config, generic_schema=False) -> Dict[str, Schema]:
365        pings = self.get_pings_and_pipeline_metadata()
366        schemas = {}
367
368        for ping, pipeline_meta in pings.items():
369            matchers = {
370                loc: m.clone(new_table_group=ping) for loc, m in config.matchers.items()
371            }
372
373            # Four newly introduced metric types were incorrectly deployed
374            # as repeated key/value structs in all Glean ping tables existing prior
375            # to November 2021. We maintain the incorrect fields for existing tables
376            # by disabling the associated matchers.
377            # Note that each of these types now has a "2" matcher ("text2", "url2", etc.)
378            # defined that will allow metrics of these types to be injected into proper
379            # structs. The gcp-ingestion repository includes logic to rewrite these
380            # metrics under the "2" names.
381            # See https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
382            bq_identifier = "{bq_dataset_family}.{bq_table}".format(**pipeline_meta)
383            if bq_identifier in self.bug_1737656_affected_tables:
384                matchers = {
385                    loc: m
386                    for loc, m in matchers.items()
387                    if not m.matcher.get("bug_1737656_affected")
388                }
389
390            for matcher in matchers.values():
391                matcher.matcher["send_in_pings"]["contains"] = ping
392            new_config = Config(ping, matchers=matchers)
393
394            defaults = {"mozPipelineMetadata": pipeline_meta}
395
396            # Adjust the schema path if the ping does not require info sections
397            self.set_schema_url(pipeline_meta)
398            if generic_schema:  # Use the generic glean ping schema
399                schema = self.get_schema(generic_schema=True)
400                schema.schema.update(defaults)
401                schemas[new_config.name] = schema
402            else:
403                generated = super().generate_schema(new_config)
404                for schema in generated.values():
405                    # We want to override each individual key with assembled defaults,
406                    # but keep values _inside_ them if they have been set in the schemas.
407                    for key, value in defaults.items():
408                        if key not in schema.schema:
409                            schema.schema[key] = {}
410                        schema.schema[key].update(value)
411                schemas.update(generated)
412
413        return schemas
@staticmethod
def get_repos():
415    @staticmethod
416    def get_repos():
417        """
418        Retrieve metadata for all non-library Glean repositories
419        """
420        repos = GleanPing._get_json(GleanPing.repos_url)
421        return [repo for repo in repos if "library_names" not in repo]

Retrieve metadata for all non-library Glean repositories

f = <_io.TextIOWrapper name='/home/circleci/project/mozilla_schema_generator/configs/bug_1737656_affected.txt' mode='r' encoding='UTF-8'>
bug_1737656_affected_tables = ['burnham.baseline_v1', 'burnham.deletion_request_v1', 'burnham.discovery_v1', 'burnham.events_v1', 'burnham.metrics_v1', 'burnham.space_ship_ready_v1', 'burnham.starbase46_v1', 'firefox_desktop_background_update.background_update_v1', 'firefox_desktop_background_update.baseline_v1', 'firefox_desktop_background_update.deletion_request_v1', 'firefox_desktop_background_update.events_v1', 'firefox_desktop_background_update.metrics_v1', 'firefox_desktop.baseline_v1', 'firefox_desktop.deletion_request_v1', 'firefox_desktop.events_v1', 'firefox_desktop.fog_validation_v1', 'firefox_desktop.metrics_v1', 'firefox_installer.install_v1', 'firefox_launcher_process.launcher_process_failure_v1', 'messaging_system.cfr_v1', 'messaging_system.infobar_v1', 'messaging_system.moments_v1', 'messaging_system.onboarding_v1', 'messaging_system.personalization_experiment_v1', 'messaging_system.snippets_v1', 'messaging_system.spotlight_v1', 'messaging_system.undesired_events_v1', 'messaging_system.whats_new_panel_v1', 'mlhackweek_search.action_v1', 'mlhackweek_search.baseline_v1', 'mlhackweek_search.custom_v1', 'mlhackweek_search.deletion_request_v1', 'mlhackweek_search.events_v1', 'mlhackweek_search.metrics_v1', 'mozilla_lockbox.addresses_sync_v1', 'mozilla_lockbox.baseline_v1', 'mozilla_lockbox.bookmarks_sync_v1', 'mozilla_lockbox.creditcards_sync_v1', 'mozilla_lockbox.deletion_request_v1', 'mozilla_lockbox.events_v1', 'mozilla_lockbox.history_sync_v1', 'mozilla_lockbox.logins_sync_v1', 'mozilla_lockbox.metrics_v1', 'mozilla_lockbox.sync_v1', 'mozilla_lockbox.tabs_sync_v1', 'mozilla_mach.baseline_v1', 'mozilla_mach.deletion_request_v1', 'mozilla_mach.events_v1', 'mozilla_mach.metrics_v1', 'mozilla_mach.usage_v1', 'mozillavpn.deletion_request_v1', 'mozillavpn.main_v1', 'mozphab.baseline_v1', 'mozphab.deletion_request_v1', 'mozphab.events_v1', 'mozphab.metrics_v1', 'mozphab.usage_v1', 'org_mozilla_bergamot.custom_v1', 'org_mozilla_bergamot.deletion_request_v1', 'org_mozilla_connect_firefox.baseline_v1', 'org_mozilla_connect_firefox.deletion_request_v1', 'org_mozilla_connect_firefox.events_v1', 'org_mozilla_connect_firefox.metrics_v1', 'org_mozilla_fenix.activation_v1', 'org_mozilla_fenix.addresses_sync_v1', 'org_mozilla_fenix.baseline_v1', 'org_mozilla_fenix.bookmarks_sync_v1', 'org_mozilla_fenix.creditcards_sync_v1', 'org_mozilla_fenix.deletion_request_v1', 'org_mozilla_fenix.events_v1', 'org_mozilla_fenix.first_session_v1', 'org_mozilla_fenix.fog_validation_v1', 'org_mozilla_fenix.history_sync_v1', 'org_mozilla_fenix.installation_v1', 'org_mozilla_fenix.logins_sync_v1', 'org_mozilla_fenix.metrics_v1', 'org_mozilla_fenix.migration_v1', 'org_mozilla_fenix.startup_timeline_v1', 'org_mozilla_fenix.sync_v1', 'org_mozilla_fenix.tabs_sync_v1', 'org_mozilla_fenix_nightly.activation_v1', 'org_mozilla_fenix_nightly.addresses_sync_v1', 'org_mozilla_fenix_nightly.baseline_v1', 'org_mozilla_fenix_nightly.bookmarks_sync_v1', 'org_mozilla_fenix_nightly.creditcards_sync_v1', 'org_mozilla_fenix_nightly.deletion_request_v1', 'org_mozilla_fenix_nightly.events_v1', 'org_mozilla_fenix_nightly.first_session_v1', 'org_mozilla_fenix_nightly.fog_validation_v1', 'org_mozilla_fenix_nightly.history_sync_v1', 'org_mozilla_fenix_nightly.installation_v1', 'org_mozilla_fenix_nightly.logins_sync_v1', 'org_mozilla_fenix_nightly.metrics_v1', 'org_mozilla_fenix_nightly.migration_v1', 'org_mozilla_fenix_nightly.startup_timeline_v1', 'org_mozilla_fenix_nightly.sync_v1', 'org_mozilla_fenix_nightly.tabs_sync_v1', 'org_mozilla_fennec_aurora.activation_v1', 'org_mozilla_fennec_aurora.addresses_sync_v1', 'org_mozilla_fennec_aurora.baseline_v1', 'org_mozilla_fennec_aurora.bookmarks_sync_v1', 'org_mozilla_fennec_aurora.creditcards_sync_v1', 'org_mozilla_fennec_aurora.deletion_request_v1', 'org_mozilla_fennec_aurora.events_v1', 'org_mozilla_fennec_aurora.first_session_v1', 'org_mozilla_fennec_aurora.fog_validation_v1', 'org_mozilla_fennec_aurora.history_sync_v1', 'org_mozilla_fennec_aurora.installation_v1', 'org_mozilla_fennec_aurora.logins_sync_v1', 'org_mozilla_fennec_aurora.metrics_v1', 'org_mozilla_fennec_aurora.migration_v1', 'org_mozilla_fennec_aurora.startup_timeline_v1', 'org_mozilla_fennec_aurora.sync_v1', 'org_mozilla_fennec_aurora.tabs_sync_v1', 'org_mozilla_firefox_beta.activation_v1', 'org_mozilla_firefox_beta.addresses_sync_v1', 'org_mozilla_firefox_beta.baseline_v1', 'org_mozilla_firefox_beta.bookmarks_sync_v1', 'org_mozilla_firefox_beta.creditcards_sync_v1', 'org_mozilla_firefox_beta.deletion_request_v1', 'org_mozilla_firefox_beta.events_v1', 'org_mozilla_firefox_beta.first_session_v1', 'org_mozilla_firefox_beta.fog_validation_v1', 'org_mozilla_firefox_beta.history_sync_v1', 'org_mozilla_firefox_beta.installation_v1', 'org_mozilla_firefox_beta.logins_sync_v1', 'org_mozilla_firefox_beta.metrics_v1', 'org_mozilla_firefox_beta.migration_v1', 'org_mozilla_firefox_beta.startup_timeline_v1', 'org_mozilla_firefox_beta.sync_v1', 'org_mozilla_firefox_beta.tabs_sync_v1', 'org_mozilla_firefox.activation_v1', 'org_mozilla_firefox.addresses_sync_v1', 'org_mozilla_firefox.baseline_v1', 'org_mozilla_firefox.bookmarks_sync_v1', 'org_mozilla_firefox.creditcards_sync_v1', 'org_mozilla_firefox.deletion_request_v1', 'org_mozilla_firefox.events_v1', 'org_mozilla_firefox.first_session_v1', 'org_mozilla_firefox.fog_validation_v1', 'org_mozilla_firefox.history_sync_v1', 'org_mozilla_firefox.installation_v1', 'org_mozilla_firefox.logins_sync_v1', 'org_mozilla_firefox.metrics_v1', 'org_mozilla_firefox.migration_v1', 'org_mozilla_firefox.startup_timeline_v1', 'org_mozilla_firefox.sync_v1', 'org_mozilla_firefox.tabs_sync_v1', 'org_mozilla_firefoxreality.baseline_v1', 'org_mozilla_firefoxreality.deletion_request_v1', 'org_mozilla_firefoxreality.events_v1', 'org_mozilla_firefoxreality.launch_v1', 'org_mozilla_firefoxreality.metrics_v1', 'org_mozilla_focus_beta.activation_v1', 'org_mozilla_focus_beta.baseline_v1', 'org_mozilla_focus_beta.deletion_request_v1', 'org_mozilla_focus_beta.events_v1', 'org_mozilla_focus_beta.metrics_v1', 'org_mozilla_focus.activation_v1', 'org_mozilla_focus.baseline_v1', 'org_mozilla_focus.deletion_request_v1', 'org_mozilla_focus.events_v1', 'org_mozilla_focus.metrics_v1', 'org_mozilla_focus_nightly.activation_v1', 'org_mozilla_focus_nightly.baseline_v1', 'org_mozilla_focus_nightly.deletion_request_v1', 'org_mozilla_focus_nightly.events_v1', 'org_mozilla_focus_nightly.metrics_v1', 'org_mozilla_ios_fennec.baseline_v1', 'org_mozilla_ios_fennec.deletion_request_v1', 'org_mozilla_ios_fennec.events_v1', 'org_mozilla_ios_fennec.metrics_v1', 'org_mozilla_ios_firefox.baseline_v1', 'org_mozilla_ios_firefox.deletion_request_v1', 'org_mozilla_ios_firefox.events_v1', 'org_mozilla_ios_firefox.metrics_v1', 'org_mozilla_ios_firefoxbeta.baseline_v1', 'org_mozilla_ios_firefoxbeta.deletion_request_v1', 'org_mozilla_ios_firefoxbeta.events_v1', 'org_mozilla_ios_firefoxbeta.metrics_v1', 'org_mozilla_ios_focus.baseline_v1', 'org_mozilla_ios_focus.deletion_request_v1', 'org_mozilla_ios_focus.events_v1', 'org_mozilla_ios_focus.metrics_v1', 'org_mozilla_ios_klar.baseline_v1', 'org_mozilla_ios_klar.deletion_request_v1', 'org_mozilla_ios_klar.events_v1', 'org_mozilla_ios_klar.metrics_v1', 'org_mozilla_ios_lockbox.baseline_v1', 'org_mozilla_ios_lockbox.deletion_request_v1', 'org_mozilla_ios_lockbox.events_v1', 'org_mozilla_ios_lockbox.metrics_v1', 'org_mozilla_klar.activation_v1', 'org_mozilla_klar.baseline_v1', 'org_mozilla_klar.deletion_request_v1', 'org_mozilla_klar.events_v1', 'org_mozilla_klar.metrics_v1', 'org_mozilla_mozregression.baseline_v1', 'org_mozilla_mozregression.deletion_request_v1', 'org_mozilla_mozregression.events_v1', 'org_mozilla_mozregression.metrics_v1', 'org_mozilla_mozregression.usage_v1', 'org_mozilla_reference_browser.baseline_v1', 'org_mozilla_reference_browser.deletion_request_v1', 'org_mozilla_reference_browser.events_v1', 'org_mozilla_reference_browser.metrics_v1', 'org_mozilla_tv_firefox.baseline_v1', 'org_mozilla_tv_firefox.deletion_request_v1', 'org_mozilla_tv_firefox.events_v1', 'org_mozilla_tv_firefox.metrics_v1', 'org_mozilla_vrbrowser.addresses_sync_v1', 'org_mozilla_vrbrowser.baseline_v1', 'org_mozilla_vrbrowser.bookmarks_sync_v1', 'org_mozilla_vrbrowser.creditcards_sync_v1', 'org_mozilla_vrbrowser.deletion_request_v1', 'org_mozilla_vrbrowser.events_v1', 'org_mozilla_vrbrowser.history_sync_v1', 'org_mozilla_vrbrowser.logins_sync_v1', 'org_mozilla_vrbrowser.metrics_v1', 'org_mozilla_vrbrowser.session_end_v1', 'org_mozilla_vrbrowser.sync_v1', 'org_mozilla_vrbrowser.tabs_sync_v1', 'rally_core.deletion_request_v1', 'rally_core.demographics_v1', 'rally_core.enrollment_v1', 'rally_core.study_enrollment_v1', 'rally_core.study_unenrollment_v1', 'rally_core.uninstall_deletion_v1', 'rally_debug.deletion_request_v1', 'rally_debug.demographics_v1', 'rally_debug.enrollment_v1', 'rally_debug.study_enrollment_v1', 'rally_debug.study_unenrollment_v1', 'rally_debug.uninstall_deletion_v1', 'rally_study_zero_one.deletion_request_v1', 'rally_study_zero_one.rs01_event_v1', 'rally_study_zero_one.study_enrollment_v1', 'rally_zero_one.deletion_request_v1', 'rally_zero_one.measurements_v1', 'rally_zero_one.pioneer_enrollment_v1']