mozilla_schema_generator.glean_ping

  1# -*- coding: utf-8 -*-
  2
  3# This Source Code Form is subject to the terms of the Mozilla Public
  4# License, v. 2.0. If a copy of the MPL was not distributed with this
  5# file, You can obtain one at http://mozilla.org/MPL/2.0/.
  6
  7import copy
  8import logging
  9from pathlib import Path
 10from typing import Dict, List, Set
 11
 12from requests import HTTPError
 13
 14from .config import Config
 15from .generic_ping import GenericPing
 16from .probes import GleanProbe
 17from .schema import Schema
 18
 19ROOT_DIR = Path(__file__).parent
 20BUG_1737656_TXT = ROOT_DIR / "configs" / "bug_1737656_affected.txt"
 21
 22logger = logging.getLogger(__name__)
 23
 24DEFAULT_SCHEMA_URL = (
 25    "https://raw.githubusercontent.com"
 26    "/mozilla-services/mozilla-pipeline-schemas"
 27    "/{branch}/schemas/glean/glean/glean.1.schema.json"
 28)
 29
 30MINIMUM_SCHEMA_URL = (
 31    "https://raw.githubusercontent.com"
 32    "/mozilla-services/mozilla-pipeline-schemas"
 33    "/{branch}/schemas/glean/glean/glean-min.1.schema.json"
 34)
 35
 36
 37class GleanPing(GenericPing):
 38    probes_url_template = GenericPing.probe_info_base_url + "/glean/{}/metrics"
 39    ping_url_template = GenericPing.probe_info_base_url + "/glean/{}/pings"
 40    repos_url = GenericPing.probe_info_base_url + "/glean/repositories"
 41    dependencies_url_template = (
 42        GenericPing.probe_info_base_url + "/glean/{}/dependencies"
 43    )
 44
 45    default_dependencies = ["glean-core"]
 46
 47    with open(BUG_1737656_TXT, "r") as f:
 48        bug_1737656_affected_tables = [
 49            line.strip() for line in f.readlines() if line.strip()
 50        ]
 51
 52    def __init__(self, repo, **kwargs):  # TODO: Make env-url optional
 53        self.repo = repo
 54        self.repo_name = repo["name"]
 55        self.app_id = repo["app_id"]
 56        super().__init__(
 57            DEFAULT_SCHEMA_URL,
 58            DEFAULT_SCHEMA_URL,
 59            self.probes_url_template.format(self.repo_name),
 60            **kwargs,
 61        )
 62
 63    def get_schema(self, generic_schema=False) -> Schema:
 64        """
 65        Fetch schema via URL.
 66
 67        Unless *generic_schema* is set to true, this function makes some modifications
 68        to allow some workarounds for proper injection of metrics.
 69        """
 70        schema = super().get_schema()
 71        if generic_schema:
 72            return schema
 73
 74        # We need to inject placeholders for the url2, text2, etc. types as part
 75        # of mitigation for https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
 76        for metric_name in ["labeled_rate", "jwe", "url", "text"]:
 77            metric1 = schema.get(
 78                ("properties", "metrics", "properties", metric_name)
 79            ).copy()
 80            metric1 = schema.set_schema_elem(
 81                ("properties", "metrics", "properties", metric_name + "2"),
 82                metric1,
 83            )
 84
 85        return schema
 86
 87    def get_dependencies(self):
 88        # Get all of the library dependencies for the application that
 89        # are also known about in the repositories file.
 90
 91        # The dependencies are specified using library names, but we need to
 92        # map those back to the name of the repository in the repository file.
 93        try:
 94            dependencies = self._get_json(
 95                self.dependencies_url_template.format(self.repo_name)
 96            )
 97        except HTTPError:
 98            logging.info(f"For {self.repo_name}, using default Glean dependencies")
 99            return self.default_dependencies
100
101        dependency_library_names = list(dependencies.keys())
102
103        repos = GleanPing._get_json(GleanPing.repos_url)
104        repos_by_dependency_name = {}
105        for repo in repos:
106            for library_name in repo.get("library_names", []):
107                repos_by_dependency_name[library_name] = repo["name"]
108
109        dependencies = []
110        for name in dependency_library_names:
111            if name in repos_by_dependency_name:
112                dependencies.append(repos_by_dependency_name[name])
113
114        if len(dependencies) == 0:
115            logging.info(f"For {self.repo_name}, using default Glean dependencies")
116            return self.default_dependencies
117
118        logging.info(f"For {self.repo_name}, found Glean dependencies: {dependencies}")
119        return dependencies
120
121    def get_probes(self) -> List[GleanProbe]:
122        data = self._get_json(self.probes_url)
123        probes = list(data.items())
124
125        for dependency in self.get_dependencies():
126            dependency_probes = self._get_json(
127                self.probes_url_template.format(dependency)
128            )
129            probes += list(dependency_probes.items())
130
131        pings = self.get_pings()
132
133        processed = []
134        for _id, defn in probes:
135            probe = GleanProbe(_id, defn, pings=pings)
136            processed.append(probe)
137
138            # Manual handling of incompatible schema changes
139            issue_118_affected = {
140                "fenix",
141                "fenix-nightly",
142                "firefox-android-nightly",
143                "firefox-android-beta",
144                "firefox-android-release",
145            }
146            if (
147                self.repo_name in issue_118_affected
148                and probe.get_name() == "installation.timestamp"
149            ):
150                logging.info(f"Writing column {probe.get_name()} for compatibility.")
151                # See: https://github.com/mozilla/mozilla-schema-generator/issues/118
152                # Search through history for the "string" type and add a copy of
153                # the probe at that time in history. The changepoint signifies
154                # this event.
155                changepoint_index = 0
156                for definition in probe.definition_history:
157                    if definition["type"] != probe.get_type():
158                        break
159                    changepoint_index += 1
160                # Modify the definition with the truncated history.
161                hist_defn = defn.copy()
162                hist_defn[probe.history_key] = probe.definition_history[
163                    changepoint_index:
164                ]
165                hist_defn["type"] = hist_defn[probe.history_key][0]["type"]
166                incompatible_probe_type = GleanProbe(_id, hist_defn, pings=pings)
167                processed.append(incompatible_probe_type)
168
169            # Handling probe type changes (Bug 1870317)
170            probe_types = {hist["type"] for hist in defn[probe.history_key]}
171            if len(probe_types) > 1:
172                # The probe type changed at some point in history.
173                # Create schema entry for each type.
174                hist_defn = defn.copy()
175
176                # No new entry needs to be created for the current probe type
177                probe_types.remove(defn["type"])
178
179                for hist in hist_defn[probe.history_key]:
180                    # Create a new entry for a historic type
181                    if hist["type"] in probe_types:
182                        hist_defn["type"] = hist["type"]
183                        probe = GleanProbe(_id, hist_defn, pings=pings)
184                        processed.append(probe)
185
186                        # Keep track of the types entries were already created for
187                        probe_types.remove(hist["type"])
188
189        return processed
190
191    def _get_ping_data(self) -> Dict[str, Dict]:
192        url = self.ping_url_template.format(self.repo_name)
193        ping_data = GleanPing._get_json(url)
194        for dependency in self.get_dependencies():
195            dependency_pings = self._get_json(self.ping_url_template.format(dependency))
196            ping_data.update(dependency_pings)
197        return ping_data
198
199    def _get_ping_data_without_dependencies(self) -> Dict[str, Dict]:
200        url = self.ping_url_template.format(self.repo_name)
201        ping_data = GleanPing._get_json(url)
202        return ping_data
203
204    def _get_dependency_pings(self, dependency):
205        return self._get_json(self.ping_url_template.format(dependency))
206
207    def get_pings(self) -> Set[str]:
208        return self._get_ping_data().keys()
209
210    @staticmethod
211    def apply_default_metadata(ping_metadata, default_metadata):
212        """apply_default_metadata recurses down into dicts nested
213        to an arbitrary depth, updating keys. The ``default_metadata`` is merged into
214        ``ping_metadata``.
215        :param ping_metadata: dict onto which the merge is executed
216        :param default_metadata: dct merged into ping_metadata
217        :return: None
218        """
219        for k, v in default_metadata.items():
220            if (
221                k in ping_metadata
222                and isinstance(ping_metadata[k], dict)
223                and isinstance(default_metadata[k], dict)
224            ):
225                GleanPing.apply_default_metadata(ping_metadata[k], default_metadata[k])
226            else:
227                ping_metadata[k] = default_metadata[k]
228
229    def _get_ping_data_and_dependencies_with_default_metadata(self) -> Dict[str, Dict]:
230        # Get the ping data with the pipeline metadata
231        ping_data = self._get_ping_data_without_dependencies()
232
233        # The ping endpoint for the dependency pings does not include any repo defined
234        # moz_pipeline_metadata_defaults so they need to be applied here.
235
236        # 1.  Get repo and pipeline default metadata.
237        repos = self.get_repos()
238        current_repo = next((x for x in repos if x.get("app_id") == self.app_id), {})
239        default_metadata = current_repo.get("moz_pipeline_metadata_defaults", {})
240
241        # 2.  Apply the default metadata to each dependency defined ping.
242
243        # Apply app-level metadata to pings defined in dependencies
244        app_metadata = current_repo.get("moz_pipeline_metadata", {})
245
246        for dependency in self.get_dependencies():
247            dependency_pings = self._get_dependency_pings(dependency)
248            for dependency_ping in dependency_pings.values():
249                # Although it is counter intuitive to apply the default metadata on top of the
250                # existing dependency ping metadata it does set the repo specific value for
251                # bq_dataset_family instead of using the dependency id for the bq_dataset_family
252                # value.
253                GleanPing.apply_default_metadata(
254                    dependency_ping.get("moz_pipeline_metadata"),
255                    copy.deepcopy(default_metadata),
256                )
257                # app-level ping properties take priority over the app defaults
258                metadata_override = app_metadata.get(dependency_ping["name"])
259                if metadata_override is not None:
260                    GleanPing.apply_default_metadata(
261                        dependency_ping.get("moz_pipeline_metadata"), metadata_override
262                    )
263            ping_data.update(dependency_pings)
264
265        return ping_data
266
267    @staticmethod
268    def reorder_metadata(metadata):
269        desired_order_list = [
270            "bq_dataset_family",
271            "bq_table",
272            "bq_metadata_format",
273            "include_info_sections",
274            "submission_timestamp_granularity",
275            "expiration_policy",
276            "override_attributes",
277            "jwe_mappings",
278        ]
279        reordered_metadata = {
280            k: metadata[k] for k in desired_order_list if k in metadata
281        }
282
283        # re-order jwe-mappings
284        desired_order_list = ["source_field_path", "decrypted_field_path"]
285        jwe_mapping_metadata = reordered_metadata.get("jwe_mappings")
286        if jwe_mapping_metadata:
287            reordered_jwe_mapping_metadata = []
288            for mapping in jwe_mapping_metadata:
289                reordered_jwe_mapping_metadata.append(
290                    {k: mapping[k] for k in desired_order_list if k in mapping}
291                )
292            reordered_metadata["jwe_mappings"] = reordered_jwe_mapping_metadata
293
294        # future proofing, in case there are other fields added at the ping top level
295        # add them to the end.
296        leftovers = {k: metadata[k] for k in set(metadata) - set(reordered_metadata)}
297        reordered_metadata = {**reordered_metadata, **leftovers}
298        return reordered_metadata
299
300    def get_pings_and_pipeline_metadata(self) -> Dict[str, Dict]:
301        pings = self._get_ping_data_and_dependencies_with_default_metadata()
302        for ping_name, ping_data in pings.items():
303            metadata = ping_data.get("moz_pipeline_metadata")
304            if not metadata:
305                continue
306            metadata["include_info_sections"] = self._is_field_included(
307                ping_data, "include_info_sections", consider_all_history=False
308            )
309            metadata["include_client_id"] = self._is_field_included(
310                ping_data, "include_client_id"
311            )
312
313            # While technically unnecessary, the dictionary elements are re-ordered to match the
314            # currently deployed order and used to verify no difference in output.
315            pings[ping_name] = GleanPing.reorder_metadata(metadata)
316        return pings
317
318    def get_ping_descriptions(self) -> Dict[str, str]:
319        return {
320            k: v["history"][-1]["description"] for k, v in self._get_ping_data().items()
321        }
322
323    @staticmethod
324    def _is_field_included(ping_data, field_name, consider_all_history=True) -> bool:
325        """Return false if the field exists and is false.
326
327        If `consider_all_history` is False, then only check the latest value in the ping history.
328
329        Otherwise, if the field is not found or true in one or more history entries,
330        true is returned.
331        """
332
333        # Default to true if not specified.
334        if "history" not in ping_data or len(ping_data["history"]) == 0:
335            return True
336
337        # Check if at some point in the past the field has already been deployed.
338        # And if the caller of this method wants to consider this history of the field.
339        # Keep them in the schema, even if the field has changed as
340        # removing fields is currently not supported.
341        # See https://bugzilla.mozilla.org/show_bug.cgi?id=1898105
342        # and https://bugzilla.mozilla.org/show_bug.cgi?id=1898105#c10
343        ping_history: list
344        if consider_all_history:
345            ping_history = ping_data["history"]
346        else:
347            ping_history = [ping_data["history"][-1]]
348        for history in ping_history:
349            if field_name not in history or history[field_name]:
350                return True
351
352        # The ping was created with include_info_sections = False. The fields can be excluded.
353        return False
354
355    def set_schema_url(self, metadata):
356        """
357        Switch between the glean-min and glean schemas if the ping does not require
358        info sections as specified in the parsed ping info in probe scraper.
359        """
360        if not metadata["include_info_sections"]:
361            self.schema_url = MINIMUM_SCHEMA_URL.format(branch=self.branch_name)
362        else:
363            self.schema_url = DEFAULT_SCHEMA_URL.format(branch=self.branch_name)
364
365    def generate_schema(self, config, generic_schema=False) -> Dict[str, Schema]:
366        pings = self.get_pings_and_pipeline_metadata()
367        schemas = {}
368
369        for ping, pipeline_meta in pings.items():
370            matchers = {
371                loc: m.clone(new_table_group=ping) for loc, m in config.matchers.items()
372            }
373
374            # Four newly introduced metric types were incorrectly deployed
375            # as repeated key/value structs in all Glean ping tables existing prior
376            # to November 2021. We maintain the incorrect fields for existing tables
377            # by disabling the associated matchers.
378            # Note that each of these types now has a "2" matcher ("text2", "url2", etc.)
379            # defined that will allow metrics of these types to be injected into proper
380            # structs. The gcp-ingestion repository includes logic to rewrite these
381            # metrics under the "2" names.
382            # See https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
383            bq_identifier = "{bq_dataset_family}.{bq_table}".format(**pipeline_meta)
384            if bq_identifier in self.bug_1737656_affected_tables:
385                matchers = {
386                    loc: m
387                    for loc, m in matchers.items()
388                    if not m.matcher.get("bug_1737656_affected")
389                }
390
391            for matcher in matchers.values():
392                matcher.matcher["send_in_pings"]["contains"] = ping
393            new_config = Config(ping, matchers=matchers)
394
395            defaults = {"mozPipelineMetadata": pipeline_meta}
396
397            # Adjust the schema path if the ping does not require info sections
398            self.set_schema_url(pipeline_meta)
399            if generic_schema:  # Use the generic glean ping schema
400                schema = self.get_schema(generic_schema=True)
401                schema.schema.update(defaults)
402                schemas[new_config.name] = schema
403            else:
404                generated = super().generate_schema(new_config)
405                for schema in generated.values():
406                    # We want to override each individual key with assembled defaults,
407                    # but keep values _inside_ them if they have been set in the schemas.
408                    for key, value in defaults.items():
409                        if key not in schema.schema:
410                            schema.schema[key] = {}
411                        schema.schema[key].update(value)
412                schemas.update(generated)
413
414        return schemas
415
416    @staticmethod
417    def get_repos():
418        """
419        Retrieve metadata for all non-library Glean repositories
420        """
421        repos = GleanPing._get_json(GleanPing.repos_url)
422        return [repo for repo in repos if "library_names" not in repo]
ROOT_DIR = PosixPath('/home/circleci/project/mozilla_schema_generator')
BUG_1737656_TXT = PosixPath('/home/circleci/project/mozilla_schema_generator/configs/bug_1737656_affected.txt')
logger = <Logger mozilla_schema_generator.glean_ping (WARNING)>
DEFAULT_SCHEMA_URL = 'https://raw.githubusercontent.com/mozilla-services/mozilla-pipeline-schemas/{branch}/schemas/glean/glean/glean.1.schema.json'
MINIMUM_SCHEMA_URL = 'https://raw.githubusercontent.com/mozilla-services/mozilla-pipeline-schemas/{branch}/schemas/glean/glean/glean-min.1.schema.json'
 38class GleanPing(GenericPing):
 39    probes_url_template = GenericPing.probe_info_base_url + "/glean/{}/metrics"
 40    ping_url_template = GenericPing.probe_info_base_url + "/glean/{}/pings"
 41    repos_url = GenericPing.probe_info_base_url + "/glean/repositories"
 42    dependencies_url_template = (
 43        GenericPing.probe_info_base_url + "/glean/{}/dependencies"
 44    )
 45
 46    default_dependencies = ["glean-core"]
 47
 48    with open(BUG_1737656_TXT, "r") as f:
 49        bug_1737656_affected_tables = [
 50            line.strip() for line in f.readlines() if line.strip()
 51        ]
 52
 53    def __init__(self, repo, **kwargs):  # TODO: Make env-url optional
 54        self.repo = repo
 55        self.repo_name = repo["name"]
 56        self.app_id = repo["app_id"]
 57        super().__init__(
 58            DEFAULT_SCHEMA_URL,
 59            DEFAULT_SCHEMA_URL,
 60            self.probes_url_template.format(self.repo_name),
 61            **kwargs,
 62        )
 63
 64    def get_schema(self, generic_schema=False) -> Schema:
 65        """
 66        Fetch schema via URL.
 67
 68        Unless *generic_schema* is set to true, this function makes some modifications
 69        to allow some workarounds for proper injection of metrics.
 70        """
 71        schema = super().get_schema()
 72        if generic_schema:
 73            return schema
 74
 75        # We need to inject placeholders for the url2, text2, etc. types as part
 76        # of mitigation for https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
 77        for metric_name in ["labeled_rate", "jwe", "url", "text"]:
 78            metric1 = schema.get(
 79                ("properties", "metrics", "properties", metric_name)
 80            ).copy()
 81            metric1 = schema.set_schema_elem(
 82                ("properties", "metrics", "properties", metric_name + "2"),
 83                metric1,
 84            )
 85
 86        return schema
 87
 88    def get_dependencies(self):
 89        # Get all of the library dependencies for the application that
 90        # are also known about in the repositories file.
 91
 92        # The dependencies are specified using library names, but we need to
 93        # map those back to the name of the repository in the repository file.
 94        try:
 95            dependencies = self._get_json(
 96                self.dependencies_url_template.format(self.repo_name)
 97            )
 98        except HTTPError:
 99            logging.info(f"For {self.repo_name}, using default Glean dependencies")
100            return self.default_dependencies
101
102        dependency_library_names = list(dependencies.keys())
103
104        repos = GleanPing._get_json(GleanPing.repos_url)
105        repos_by_dependency_name = {}
106        for repo in repos:
107            for library_name in repo.get("library_names", []):
108                repos_by_dependency_name[library_name] = repo["name"]
109
110        dependencies = []
111        for name in dependency_library_names:
112            if name in repos_by_dependency_name:
113                dependencies.append(repos_by_dependency_name[name])
114
115        if len(dependencies) == 0:
116            logging.info(f"For {self.repo_name}, using default Glean dependencies")
117            return self.default_dependencies
118
119        logging.info(f"For {self.repo_name}, found Glean dependencies: {dependencies}")
120        return dependencies
121
122    def get_probes(self) -> List[GleanProbe]:
123        data = self._get_json(self.probes_url)
124        probes = list(data.items())
125
126        for dependency in self.get_dependencies():
127            dependency_probes = self._get_json(
128                self.probes_url_template.format(dependency)
129            )
130            probes += list(dependency_probes.items())
131
132        pings = self.get_pings()
133
134        processed = []
135        for _id, defn in probes:
136            probe = GleanProbe(_id, defn, pings=pings)
137            processed.append(probe)
138
139            # Manual handling of incompatible schema changes
140            issue_118_affected = {
141                "fenix",
142                "fenix-nightly",
143                "firefox-android-nightly",
144                "firefox-android-beta",
145                "firefox-android-release",
146            }
147            if (
148                self.repo_name in issue_118_affected
149                and probe.get_name() == "installation.timestamp"
150            ):
151                logging.info(f"Writing column {probe.get_name()} for compatibility.")
152                # See: https://github.com/mozilla/mozilla-schema-generator/issues/118
153                # Search through history for the "string" type and add a copy of
154                # the probe at that time in history. The changepoint signifies
155                # this event.
156                changepoint_index = 0
157                for definition in probe.definition_history:
158                    if definition["type"] != probe.get_type():
159                        break
160                    changepoint_index += 1
161                # Modify the definition with the truncated history.
162                hist_defn = defn.copy()
163                hist_defn[probe.history_key] = probe.definition_history[
164                    changepoint_index:
165                ]
166                hist_defn["type"] = hist_defn[probe.history_key][0]["type"]
167                incompatible_probe_type = GleanProbe(_id, hist_defn, pings=pings)
168                processed.append(incompatible_probe_type)
169
170            # Handling probe type changes (Bug 1870317)
171            probe_types = {hist["type"] for hist in defn[probe.history_key]}
172            if len(probe_types) > 1:
173                # The probe type changed at some point in history.
174                # Create schema entry for each type.
175                hist_defn = defn.copy()
176
177                # No new entry needs to be created for the current probe type
178                probe_types.remove(defn["type"])
179
180                for hist in hist_defn[probe.history_key]:
181                    # Create a new entry for a historic type
182                    if hist["type"] in probe_types:
183                        hist_defn["type"] = hist["type"]
184                        probe = GleanProbe(_id, hist_defn, pings=pings)
185                        processed.append(probe)
186
187                        # Keep track of the types entries were already created for
188                        probe_types.remove(hist["type"])
189
190        return processed
191
192    def _get_ping_data(self) -> Dict[str, Dict]:
193        url = self.ping_url_template.format(self.repo_name)
194        ping_data = GleanPing._get_json(url)
195        for dependency in self.get_dependencies():
196            dependency_pings = self._get_json(self.ping_url_template.format(dependency))
197            ping_data.update(dependency_pings)
198        return ping_data
199
200    def _get_ping_data_without_dependencies(self) -> Dict[str, Dict]:
201        url = self.ping_url_template.format(self.repo_name)
202        ping_data = GleanPing._get_json(url)
203        return ping_data
204
205    def _get_dependency_pings(self, dependency):
206        return self._get_json(self.ping_url_template.format(dependency))
207
208    def get_pings(self) -> Set[str]:
209        return self._get_ping_data().keys()
210
211    @staticmethod
212    def apply_default_metadata(ping_metadata, default_metadata):
213        """apply_default_metadata recurses down into dicts nested
214        to an arbitrary depth, updating keys. The ``default_metadata`` is merged into
215        ``ping_metadata``.
216        :param ping_metadata: dict onto which the merge is executed
217        :param default_metadata: dct merged into ping_metadata
218        :return: None
219        """
220        for k, v in default_metadata.items():
221            if (
222                k in ping_metadata
223                and isinstance(ping_metadata[k], dict)
224                and isinstance(default_metadata[k], dict)
225            ):
226                GleanPing.apply_default_metadata(ping_metadata[k], default_metadata[k])
227            else:
228                ping_metadata[k] = default_metadata[k]
229
230    def _get_ping_data_and_dependencies_with_default_metadata(self) -> Dict[str, Dict]:
231        # Get the ping data with the pipeline metadata
232        ping_data = self._get_ping_data_without_dependencies()
233
234        # The ping endpoint for the dependency pings does not include any repo defined
235        # moz_pipeline_metadata_defaults so they need to be applied here.
236
237        # 1.  Get repo and pipeline default metadata.
238        repos = self.get_repos()
239        current_repo = next((x for x in repos if x.get("app_id") == self.app_id), {})
240        default_metadata = current_repo.get("moz_pipeline_metadata_defaults", {})
241
242        # 2.  Apply the default metadata to each dependency defined ping.
243
244        # Apply app-level metadata to pings defined in dependencies
245        app_metadata = current_repo.get("moz_pipeline_metadata", {})
246
247        for dependency in self.get_dependencies():
248            dependency_pings = self._get_dependency_pings(dependency)
249            for dependency_ping in dependency_pings.values():
250                # Although it is counter intuitive to apply the default metadata on top of the
251                # existing dependency ping metadata it does set the repo specific value for
252                # bq_dataset_family instead of using the dependency id for the bq_dataset_family
253                # value.
254                GleanPing.apply_default_metadata(
255                    dependency_ping.get("moz_pipeline_metadata"),
256                    copy.deepcopy(default_metadata),
257                )
258                # app-level ping properties take priority over the app defaults
259                metadata_override = app_metadata.get(dependency_ping["name"])
260                if metadata_override is not None:
261                    GleanPing.apply_default_metadata(
262                        dependency_ping.get("moz_pipeline_metadata"), metadata_override
263                    )
264            ping_data.update(dependency_pings)
265
266        return ping_data
267
268    @staticmethod
269    def reorder_metadata(metadata):
270        desired_order_list = [
271            "bq_dataset_family",
272            "bq_table",
273            "bq_metadata_format",
274            "include_info_sections",
275            "submission_timestamp_granularity",
276            "expiration_policy",
277            "override_attributes",
278            "jwe_mappings",
279        ]
280        reordered_metadata = {
281            k: metadata[k] for k in desired_order_list if k in metadata
282        }
283
284        # re-order jwe-mappings
285        desired_order_list = ["source_field_path", "decrypted_field_path"]
286        jwe_mapping_metadata = reordered_metadata.get("jwe_mappings")
287        if jwe_mapping_metadata:
288            reordered_jwe_mapping_metadata = []
289            for mapping in jwe_mapping_metadata:
290                reordered_jwe_mapping_metadata.append(
291                    {k: mapping[k] for k in desired_order_list if k in mapping}
292                )
293            reordered_metadata["jwe_mappings"] = reordered_jwe_mapping_metadata
294
295        # future proofing, in case there are other fields added at the ping top level
296        # add them to the end.
297        leftovers = {k: metadata[k] for k in set(metadata) - set(reordered_metadata)}
298        reordered_metadata = {**reordered_metadata, **leftovers}
299        return reordered_metadata
300
301    def get_pings_and_pipeline_metadata(self) -> Dict[str, Dict]:
302        pings = self._get_ping_data_and_dependencies_with_default_metadata()
303        for ping_name, ping_data in pings.items():
304            metadata = ping_data.get("moz_pipeline_metadata")
305            if not metadata:
306                continue
307            metadata["include_info_sections"] = self._is_field_included(
308                ping_data, "include_info_sections", consider_all_history=False
309            )
310            metadata["include_client_id"] = self._is_field_included(
311                ping_data, "include_client_id"
312            )
313
314            # While technically unnecessary, the dictionary elements are re-ordered to match the
315            # currently deployed order and used to verify no difference in output.
316            pings[ping_name] = GleanPing.reorder_metadata(metadata)
317        return pings
318
319    def get_ping_descriptions(self) -> Dict[str, str]:
320        return {
321            k: v["history"][-1]["description"] for k, v in self._get_ping_data().items()
322        }
323
324    @staticmethod
325    def _is_field_included(ping_data, field_name, consider_all_history=True) -> bool:
326        """Return false if the field exists and is false.
327
328        If `consider_all_history` is False, then only check the latest value in the ping history.
329
330        Otherwise, if the field is not found or true in one or more history entries,
331        true is returned.
332        """
333
334        # Default to true if not specified.
335        if "history" not in ping_data or len(ping_data["history"]) == 0:
336            return True
337
338        # Check if at some point in the past the field has already been deployed.
339        # And if the caller of this method wants to consider this history of the field.
340        # Keep them in the schema, even if the field has changed as
341        # removing fields is currently not supported.
342        # See https://bugzilla.mozilla.org/show_bug.cgi?id=1898105
343        # and https://bugzilla.mozilla.org/show_bug.cgi?id=1898105#c10
344        ping_history: list
345        if consider_all_history:
346            ping_history = ping_data["history"]
347        else:
348            ping_history = [ping_data["history"][-1]]
349        for history in ping_history:
350            if field_name not in history or history[field_name]:
351                return True
352
353        # The ping was created with include_info_sections = False. The fields can be excluded.
354        return False
355
356    def set_schema_url(self, metadata):
357        """
358        Switch between the glean-min and glean schemas if the ping does not require
359        info sections as specified in the parsed ping info in probe scraper.
360        """
361        if not metadata["include_info_sections"]:
362            self.schema_url = MINIMUM_SCHEMA_URL.format(branch=self.branch_name)
363        else:
364            self.schema_url = DEFAULT_SCHEMA_URL.format(branch=self.branch_name)
365
366    def generate_schema(self, config, generic_schema=False) -> Dict[str, Schema]:
367        pings = self.get_pings_and_pipeline_metadata()
368        schemas = {}
369
370        for ping, pipeline_meta in pings.items():
371            matchers = {
372                loc: m.clone(new_table_group=ping) for loc, m in config.matchers.items()
373            }
374
375            # Four newly introduced metric types were incorrectly deployed
376            # as repeated key/value structs in all Glean ping tables existing prior
377            # to November 2021. We maintain the incorrect fields for existing tables
378            # by disabling the associated matchers.
379            # Note that each of these types now has a "2" matcher ("text2", "url2", etc.)
380            # defined that will allow metrics of these types to be injected into proper
381            # structs. The gcp-ingestion repository includes logic to rewrite these
382            # metrics under the "2" names.
383            # See https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
384            bq_identifier = "{bq_dataset_family}.{bq_table}".format(**pipeline_meta)
385            if bq_identifier in self.bug_1737656_affected_tables:
386                matchers = {
387                    loc: m
388                    for loc, m in matchers.items()
389                    if not m.matcher.get("bug_1737656_affected")
390                }
391
392            for matcher in matchers.values():
393                matcher.matcher["send_in_pings"]["contains"] = ping
394            new_config = Config(ping, matchers=matchers)
395
396            defaults = {"mozPipelineMetadata": pipeline_meta}
397
398            # Adjust the schema path if the ping does not require info sections
399            self.set_schema_url(pipeline_meta)
400            if generic_schema:  # Use the generic glean ping schema
401                schema = self.get_schema(generic_schema=True)
402                schema.schema.update(defaults)
403                schemas[new_config.name] = schema
404            else:
405                generated = super().generate_schema(new_config)
406                for schema in generated.values():
407                    # We want to override each individual key with assembled defaults,
408                    # but keep values _inside_ them if they have been set in the schemas.
409                    for key, value in defaults.items():
410                        if key not in schema.schema:
411                            schema.schema[key] = {}
412                        schema.schema[key].update(value)
413                schemas.update(generated)
414
415        return schemas
416
417    @staticmethod
418    def get_repos():
419        """
420        Retrieve metadata for all non-library Glean repositories
421        """
422        repos = GleanPing._get_json(GleanPing.repos_url)
423        return [repo for repo in repos if "library_names" not in repo]
GleanPing(repo, **kwargs)
53    def __init__(self, repo, **kwargs):  # TODO: Make env-url optional
54        self.repo = repo
55        self.repo_name = repo["name"]
56        self.app_id = repo["app_id"]
57        super().__init__(
58            DEFAULT_SCHEMA_URL,
59            DEFAULT_SCHEMA_URL,
60            self.probes_url_template.format(self.repo_name),
61            **kwargs,
62        )
probes_url_template = 'https://probeinfo.telemetry.mozilla.org/glean/{}/metrics'
ping_url_template = 'https://probeinfo.telemetry.mozilla.org/glean/{}/pings'
repos_url = 'https://probeinfo.telemetry.mozilla.org/glean/repositories'
dependencies_url_template = 'https://probeinfo.telemetry.mozilla.org/glean/{}/dependencies'
default_dependencies = ['glean-core']
repo
repo_name
app_id
def get_schema(self, generic_schema=False) -> mozilla_schema_generator.schema.Schema:
64    def get_schema(self, generic_schema=False) -> Schema:
65        """
66        Fetch schema via URL.
67
68        Unless *generic_schema* is set to true, this function makes some modifications
69        to allow some workarounds for proper injection of metrics.
70        """
71        schema = super().get_schema()
72        if generic_schema:
73            return schema
74
75        # We need to inject placeholders for the url2, text2, etc. types as part
76        # of mitigation for https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
77        for metric_name in ["labeled_rate", "jwe", "url", "text"]:
78            metric1 = schema.get(
79                ("properties", "metrics", "properties", metric_name)
80            ).copy()
81            metric1 = schema.set_schema_elem(
82                ("properties", "metrics", "properties", metric_name + "2"),
83                metric1,
84            )
85
86        return schema

Fetch schema via URL.

Unless generic_schema is set to true, this function makes some modifications to allow some workarounds for proper injection of metrics.

def get_dependencies(self):
 88    def get_dependencies(self):
 89        # Get all of the library dependencies for the application that
 90        # are also known about in the repositories file.
 91
 92        # The dependencies are specified using library names, but we need to
 93        # map those back to the name of the repository in the repository file.
 94        try:
 95            dependencies = self._get_json(
 96                self.dependencies_url_template.format(self.repo_name)
 97            )
 98        except HTTPError:
 99            logging.info(f"For {self.repo_name}, using default Glean dependencies")
100            return self.default_dependencies
101
102        dependency_library_names = list(dependencies.keys())
103
104        repos = GleanPing._get_json(GleanPing.repos_url)
105        repos_by_dependency_name = {}
106        for repo in repos:
107            for library_name in repo.get("library_names", []):
108                repos_by_dependency_name[library_name] = repo["name"]
109
110        dependencies = []
111        for name in dependency_library_names:
112            if name in repos_by_dependency_name:
113                dependencies.append(repos_by_dependency_name[name])
114
115        if len(dependencies) == 0:
116            logging.info(f"For {self.repo_name}, using default Glean dependencies")
117            return self.default_dependencies
118
119        logging.info(f"For {self.repo_name}, found Glean dependencies: {dependencies}")
120        return dependencies
def get_probes(self) -> List[mozilla_schema_generator.probes.GleanProbe]:
122    def get_probes(self) -> List[GleanProbe]:
123        data = self._get_json(self.probes_url)
124        probes = list(data.items())
125
126        for dependency in self.get_dependencies():
127            dependency_probes = self._get_json(
128                self.probes_url_template.format(dependency)
129            )
130            probes += list(dependency_probes.items())
131
132        pings = self.get_pings()
133
134        processed = []
135        for _id, defn in probes:
136            probe = GleanProbe(_id, defn, pings=pings)
137            processed.append(probe)
138
139            # Manual handling of incompatible schema changes
140            issue_118_affected = {
141                "fenix",
142                "fenix-nightly",
143                "firefox-android-nightly",
144                "firefox-android-beta",
145                "firefox-android-release",
146            }
147            if (
148                self.repo_name in issue_118_affected
149                and probe.get_name() == "installation.timestamp"
150            ):
151                logging.info(f"Writing column {probe.get_name()} for compatibility.")
152                # See: https://github.com/mozilla/mozilla-schema-generator/issues/118
153                # Search through history for the "string" type and add a copy of
154                # the probe at that time in history. The changepoint signifies
155                # this event.
156                changepoint_index = 0
157                for definition in probe.definition_history:
158                    if definition["type"] != probe.get_type():
159                        break
160                    changepoint_index += 1
161                # Modify the definition with the truncated history.
162                hist_defn = defn.copy()
163                hist_defn[probe.history_key] = probe.definition_history[
164                    changepoint_index:
165                ]
166                hist_defn["type"] = hist_defn[probe.history_key][0]["type"]
167                incompatible_probe_type = GleanProbe(_id, hist_defn, pings=pings)
168                processed.append(incompatible_probe_type)
169
170            # Handling probe type changes (Bug 1870317)
171            probe_types = {hist["type"] for hist in defn[probe.history_key]}
172            if len(probe_types) > 1:
173                # The probe type changed at some point in history.
174                # Create schema entry for each type.
175                hist_defn = defn.copy()
176
177                # No new entry needs to be created for the current probe type
178                probe_types.remove(defn["type"])
179
180                for hist in hist_defn[probe.history_key]:
181                    # Create a new entry for a historic type
182                    if hist["type"] in probe_types:
183                        hist_defn["type"] = hist["type"]
184                        probe = GleanProbe(_id, hist_defn, pings=pings)
185                        processed.append(probe)
186
187                        # Keep track of the types entries were already created for
188                        probe_types.remove(hist["type"])
189
190        return processed
def get_pings(self) -> Set[str]:
208    def get_pings(self) -> Set[str]:
209        return self._get_ping_data().keys()
@staticmethod
def apply_default_metadata(ping_metadata, default_metadata):
211    @staticmethod
212    def apply_default_metadata(ping_metadata, default_metadata):
213        """apply_default_metadata recurses down into dicts nested
214        to an arbitrary depth, updating keys. The ``default_metadata`` is merged into
215        ``ping_metadata``.
216        :param ping_metadata: dict onto which the merge is executed
217        :param default_metadata: dct merged into ping_metadata
218        :return: None
219        """
220        for k, v in default_metadata.items():
221            if (
222                k in ping_metadata
223                and isinstance(ping_metadata[k], dict)
224                and isinstance(default_metadata[k], dict)
225            ):
226                GleanPing.apply_default_metadata(ping_metadata[k], default_metadata[k])
227            else:
228                ping_metadata[k] = default_metadata[k]

apply_default_metadata recurses down into dicts nested to an arbitrary depth, updating keys. The default_metadata is merged into ping_metadata.

Parameters
  • ping_metadata: dict onto which the merge is executed
  • default_metadata: dct merged into ping_metadata
Returns

None

@staticmethod
def reorder_metadata(metadata):
268    @staticmethod
269    def reorder_metadata(metadata):
270        desired_order_list = [
271            "bq_dataset_family",
272            "bq_table",
273            "bq_metadata_format",
274            "include_info_sections",
275            "submission_timestamp_granularity",
276            "expiration_policy",
277            "override_attributes",
278            "jwe_mappings",
279        ]
280        reordered_metadata = {
281            k: metadata[k] for k in desired_order_list if k in metadata
282        }
283
284        # re-order jwe-mappings
285        desired_order_list = ["source_field_path", "decrypted_field_path"]
286        jwe_mapping_metadata = reordered_metadata.get("jwe_mappings")
287        if jwe_mapping_metadata:
288            reordered_jwe_mapping_metadata = []
289            for mapping in jwe_mapping_metadata:
290                reordered_jwe_mapping_metadata.append(
291                    {k: mapping[k] for k in desired_order_list if k in mapping}
292                )
293            reordered_metadata["jwe_mappings"] = reordered_jwe_mapping_metadata
294
295        # future proofing, in case there are other fields added at the ping top level
296        # add them to the end.
297        leftovers = {k: metadata[k] for k in set(metadata) - set(reordered_metadata)}
298        reordered_metadata = {**reordered_metadata, **leftovers}
299        return reordered_metadata
def get_pings_and_pipeline_metadata(self) -> Dict[str, Dict]:
301    def get_pings_and_pipeline_metadata(self) -> Dict[str, Dict]:
302        pings = self._get_ping_data_and_dependencies_with_default_metadata()
303        for ping_name, ping_data in pings.items():
304            metadata = ping_data.get("moz_pipeline_metadata")
305            if not metadata:
306                continue
307            metadata["include_info_sections"] = self._is_field_included(
308                ping_data, "include_info_sections", consider_all_history=False
309            )
310            metadata["include_client_id"] = self._is_field_included(
311                ping_data, "include_client_id"
312            )
313
314            # While technically unnecessary, the dictionary elements are re-ordered to match the
315            # currently deployed order and used to verify no difference in output.
316            pings[ping_name] = GleanPing.reorder_metadata(metadata)
317        return pings
def get_ping_descriptions(self) -> Dict[str, str]:
319    def get_ping_descriptions(self) -> Dict[str, str]:
320        return {
321            k: v["history"][-1]["description"] for k, v in self._get_ping_data().items()
322        }
def set_schema_url(self, metadata):
356    def set_schema_url(self, metadata):
357        """
358        Switch between the glean-min and glean schemas if the ping does not require
359        info sections as specified in the parsed ping info in probe scraper.
360        """
361        if not metadata["include_info_sections"]:
362            self.schema_url = MINIMUM_SCHEMA_URL.format(branch=self.branch_name)
363        else:
364            self.schema_url = DEFAULT_SCHEMA_URL.format(branch=self.branch_name)

Switch between the glean-min and glean schemas if the ping does not require info sections as specified in the parsed ping info in probe scraper.

def generate_schema( self, config, generic_schema=False) -> Dict[str, mozilla_schema_generator.schema.Schema]:
366    def generate_schema(self, config, generic_schema=False) -> Dict[str, Schema]:
367        pings = self.get_pings_and_pipeline_metadata()
368        schemas = {}
369
370        for ping, pipeline_meta in pings.items():
371            matchers = {
372                loc: m.clone(new_table_group=ping) for loc, m in config.matchers.items()
373            }
374
375            # Four newly introduced metric types were incorrectly deployed
376            # as repeated key/value structs in all Glean ping tables existing prior
377            # to November 2021. We maintain the incorrect fields for existing tables
378            # by disabling the associated matchers.
379            # Note that each of these types now has a "2" matcher ("text2", "url2", etc.)
380            # defined that will allow metrics of these types to be injected into proper
381            # structs. The gcp-ingestion repository includes logic to rewrite these
382            # metrics under the "2" names.
383            # See https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
384            bq_identifier = "{bq_dataset_family}.{bq_table}".format(**pipeline_meta)
385            if bq_identifier in self.bug_1737656_affected_tables:
386                matchers = {
387                    loc: m
388                    for loc, m in matchers.items()
389                    if not m.matcher.get("bug_1737656_affected")
390                }
391
392            for matcher in matchers.values():
393                matcher.matcher["send_in_pings"]["contains"] = ping
394            new_config = Config(ping, matchers=matchers)
395
396            defaults = {"mozPipelineMetadata": pipeline_meta}
397
398            # Adjust the schema path if the ping does not require info sections
399            self.set_schema_url(pipeline_meta)
400            if generic_schema:  # Use the generic glean ping schema
401                schema = self.get_schema(generic_schema=True)
402                schema.schema.update(defaults)
403                schemas[new_config.name] = schema
404            else:
405                generated = super().generate_schema(new_config)
406                for schema in generated.values():
407                    # We want to override each individual key with assembled defaults,
408                    # but keep values _inside_ them if they have been set in the schemas.
409                    for key, value in defaults.items():
410                        if key not in schema.schema:
411                            schema.schema[key] = {}
412                        schema.schema[key].update(value)
413                schemas.update(generated)
414
415        return schemas
@staticmethod
def get_repos():
417    @staticmethod
418    def get_repos():
419        """
420        Retrieve metadata for all non-library Glean repositories
421        """
422        repos = GleanPing._get_json(GleanPing.repos_url)
423        return [repo for repo in repos if "library_names" not in repo]

Retrieve metadata for all non-library Glean repositories

f = <_io.TextIOWrapper name='/home/circleci/project/mozilla_schema_generator/configs/bug_1737656_affected.txt' mode='r' encoding='UTF-8'>
bug_1737656_affected_tables = ['burnham.baseline_v1', 'burnham.deletion_request_v1', 'burnham.discovery_v1', 'burnham.events_v1', 'burnham.metrics_v1', 'burnham.space_ship_ready_v1', 'burnham.starbase46_v1', 'firefox_desktop_background_update.background_update_v1', 'firefox_desktop_background_update.baseline_v1', 'firefox_desktop_background_update.deletion_request_v1', 'firefox_desktop_background_update.events_v1', 'firefox_desktop_background_update.metrics_v1', 'firefox_desktop.baseline_v1', 'firefox_desktop.deletion_request_v1', 'firefox_desktop.events_v1', 'firefox_desktop.fog_validation_v1', 'firefox_desktop.metrics_v1', 'firefox_installer.install_v1', 'firefox_launcher_process.launcher_process_failure_v1', 'messaging_system.cfr_v1', 'messaging_system.infobar_v1', 'messaging_system.moments_v1', 'messaging_system.onboarding_v1', 'messaging_system.personalization_experiment_v1', 'messaging_system.snippets_v1', 'messaging_system.spotlight_v1', 'messaging_system.undesired_events_v1', 'messaging_system.whats_new_panel_v1', 'mlhackweek_search.action_v1', 'mlhackweek_search.baseline_v1', 'mlhackweek_search.custom_v1', 'mlhackweek_search.deletion_request_v1', 'mlhackweek_search.events_v1', 'mlhackweek_search.metrics_v1', 'mozilla_lockbox.addresses_sync_v1', 'mozilla_lockbox.baseline_v1', 'mozilla_lockbox.bookmarks_sync_v1', 'mozilla_lockbox.creditcards_sync_v1', 'mozilla_lockbox.deletion_request_v1', 'mozilla_lockbox.events_v1', 'mozilla_lockbox.history_sync_v1', 'mozilla_lockbox.logins_sync_v1', 'mozilla_lockbox.metrics_v1', 'mozilla_lockbox.sync_v1', 'mozilla_lockbox.tabs_sync_v1', 'mozilla_mach.baseline_v1', 'mozilla_mach.deletion_request_v1', 'mozilla_mach.events_v1', 'mozilla_mach.metrics_v1', 'mozilla_mach.usage_v1', 'mozillavpn.deletion_request_v1', 'mozillavpn.main_v1', 'mozphab.baseline_v1', 'mozphab.deletion_request_v1', 'mozphab.events_v1', 'mozphab.metrics_v1', 'mozphab.usage_v1', 'org_mozilla_bergamot.custom_v1', 'org_mozilla_bergamot.deletion_request_v1', 'org_mozilla_connect_firefox.baseline_v1', 'org_mozilla_connect_firefox.deletion_request_v1', 'org_mozilla_connect_firefox.events_v1', 'org_mozilla_connect_firefox.metrics_v1', 'org_mozilla_fenix.activation_v1', 'org_mozilla_fenix.addresses_sync_v1', 'org_mozilla_fenix.baseline_v1', 'org_mozilla_fenix.bookmarks_sync_v1', 'org_mozilla_fenix.creditcards_sync_v1', 'org_mozilla_fenix.deletion_request_v1', 'org_mozilla_fenix.events_v1', 'org_mozilla_fenix.first_session_v1', 'org_mozilla_fenix.fog_validation_v1', 'org_mozilla_fenix.history_sync_v1', 'org_mozilla_fenix.installation_v1', 'org_mozilla_fenix.logins_sync_v1', 'org_mozilla_fenix.metrics_v1', 'org_mozilla_fenix.migration_v1', 'org_mozilla_fenix.startup_timeline_v1', 'org_mozilla_fenix.sync_v1', 'org_mozilla_fenix.tabs_sync_v1', 'org_mozilla_fenix_nightly.activation_v1', 'org_mozilla_fenix_nightly.addresses_sync_v1', 'org_mozilla_fenix_nightly.baseline_v1', 'org_mozilla_fenix_nightly.bookmarks_sync_v1', 'org_mozilla_fenix_nightly.creditcards_sync_v1', 'org_mozilla_fenix_nightly.deletion_request_v1', 'org_mozilla_fenix_nightly.events_v1', 'org_mozilla_fenix_nightly.first_session_v1', 'org_mozilla_fenix_nightly.fog_validation_v1', 'org_mozilla_fenix_nightly.history_sync_v1', 'org_mozilla_fenix_nightly.installation_v1', 'org_mozilla_fenix_nightly.logins_sync_v1', 'org_mozilla_fenix_nightly.metrics_v1', 'org_mozilla_fenix_nightly.migration_v1', 'org_mozilla_fenix_nightly.startup_timeline_v1', 'org_mozilla_fenix_nightly.sync_v1', 'org_mozilla_fenix_nightly.tabs_sync_v1', 'org_mozilla_fennec_aurora.activation_v1', 'org_mozilla_fennec_aurora.addresses_sync_v1', 'org_mozilla_fennec_aurora.baseline_v1', 'org_mozilla_fennec_aurora.bookmarks_sync_v1', 'org_mozilla_fennec_aurora.creditcards_sync_v1', 'org_mozilla_fennec_aurora.deletion_request_v1', 'org_mozilla_fennec_aurora.events_v1', 'org_mozilla_fennec_aurora.first_session_v1', 'org_mozilla_fennec_aurora.fog_validation_v1', 'org_mozilla_fennec_aurora.history_sync_v1', 'org_mozilla_fennec_aurora.installation_v1', 'org_mozilla_fennec_aurora.logins_sync_v1', 'org_mozilla_fennec_aurora.metrics_v1', 'org_mozilla_fennec_aurora.migration_v1', 'org_mozilla_fennec_aurora.startup_timeline_v1', 'org_mozilla_fennec_aurora.sync_v1', 'org_mozilla_fennec_aurora.tabs_sync_v1', 'org_mozilla_firefox_beta.activation_v1', 'org_mozilla_firefox_beta.addresses_sync_v1', 'org_mozilla_firefox_beta.baseline_v1', 'org_mozilla_firefox_beta.bookmarks_sync_v1', 'org_mozilla_firefox_beta.creditcards_sync_v1', 'org_mozilla_firefox_beta.deletion_request_v1', 'org_mozilla_firefox_beta.events_v1', 'org_mozilla_firefox_beta.first_session_v1', 'org_mozilla_firefox_beta.fog_validation_v1', 'org_mozilla_firefox_beta.history_sync_v1', 'org_mozilla_firefox_beta.installation_v1', 'org_mozilla_firefox_beta.logins_sync_v1', 'org_mozilla_firefox_beta.metrics_v1', 'org_mozilla_firefox_beta.migration_v1', 'org_mozilla_firefox_beta.startup_timeline_v1', 'org_mozilla_firefox_beta.sync_v1', 'org_mozilla_firefox_beta.tabs_sync_v1', 'org_mozilla_firefox.activation_v1', 'org_mozilla_firefox.addresses_sync_v1', 'org_mozilla_firefox.baseline_v1', 'org_mozilla_firefox.bookmarks_sync_v1', 'org_mozilla_firefox.creditcards_sync_v1', 'org_mozilla_firefox.deletion_request_v1', 'org_mozilla_firefox.events_v1', 'org_mozilla_firefox.first_session_v1', 'org_mozilla_firefox.fog_validation_v1', 'org_mozilla_firefox.history_sync_v1', 'org_mozilla_firefox.installation_v1', 'org_mozilla_firefox.logins_sync_v1', 'org_mozilla_firefox.metrics_v1', 'org_mozilla_firefox.migration_v1', 'org_mozilla_firefox.startup_timeline_v1', 'org_mozilla_firefox.sync_v1', 'org_mozilla_firefox.tabs_sync_v1', 'org_mozilla_firefoxreality.baseline_v1', 'org_mozilla_firefoxreality.deletion_request_v1', 'org_mozilla_firefoxreality.events_v1', 'org_mozilla_firefoxreality.launch_v1', 'org_mozilla_firefoxreality.metrics_v1', 'org_mozilla_focus_beta.activation_v1', 'org_mozilla_focus_beta.baseline_v1', 'org_mozilla_focus_beta.deletion_request_v1', 'org_mozilla_focus_beta.events_v1', 'org_mozilla_focus_beta.metrics_v1', 'org_mozilla_focus.activation_v1', 'org_mozilla_focus.baseline_v1', 'org_mozilla_focus.deletion_request_v1', 'org_mozilla_focus.events_v1', 'org_mozilla_focus.metrics_v1', 'org_mozilla_focus_nightly.activation_v1', 'org_mozilla_focus_nightly.baseline_v1', 'org_mozilla_focus_nightly.deletion_request_v1', 'org_mozilla_focus_nightly.events_v1', 'org_mozilla_focus_nightly.metrics_v1', 'org_mozilla_ios_fennec.baseline_v1', 'org_mozilla_ios_fennec.deletion_request_v1', 'org_mozilla_ios_fennec.events_v1', 'org_mozilla_ios_fennec.metrics_v1', 'org_mozilla_ios_firefox.baseline_v1', 'org_mozilla_ios_firefox.deletion_request_v1', 'org_mozilla_ios_firefox.events_v1', 'org_mozilla_ios_firefox.metrics_v1', 'org_mozilla_ios_firefoxbeta.baseline_v1', 'org_mozilla_ios_firefoxbeta.deletion_request_v1', 'org_mozilla_ios_firefoxbeta.events_v1', 'org_mozilla_ios_firefoxbeta.metrics_v1', 'org_mozilla_ios_focus.baseline_v1', 'org_mozilla_ios_focus.deletion_request_v1', 'org_mozilla_ios_focus.events_v1', 'org_mozilla_ios_focus.metrics_v1', 'org_mozilla_ios_klar.baseline_v1', 'org_mozilla_ios_klar.deletion_request_v1', 'org_mozilla_ios_klar.events_v1', 'org_mozilla_ios_klar.metrics_v1', 'org_mozilla_ios_lockbox.baseline_v1', 'org_mozilla_ios_lockbox.deletion_request_v1', 'org_mozilla_ios_lockbox.events_v1', 'org_mozilla_ios_lockbox.metrics_v1', 'org_mozilla_klar.activation_v1', 'org_mozilla_klar.baseline_v1', 'org_mozilla_klar.deletion_request_v1', 'org_mozilla_klar.events_v1', 'org_mozilla_klar.metrics_v1', 'org_mozilla_mozregression.baseline_v1', 'org_mozilla_mozregression.deletion_request_v1', 'org_mozilla_mozregression.events_v1', 'org_mozilla_mozregression.metrics_v1', 'org_mozilla_mozregression.usage_v1', 'org_mozilla_reference_browser.baseline_v1', 'org_mozilla_reference_browser.deletion_request_v1', 'org_mozilla_reference_browser.events_v1', 'org_mozilla_reference_browser.metrics_v1', 'org_mozilla_tv_firefox.baseline_v1', 'org_mozilla_tv_firefox.deletion_request_v1', 'org_mozilla_tv_firefox.events_v1', 'org_mozilla_tv_firefox.metrics_v1', 'org_mozilla_vrbrowser.addresses_sync_v1', 'org_mozilla_vrbrowser.baseline_v1', 'org_mozilla_vrbrowser.bookmarks_sync_v1', 'org_mozilla_vrbrowser.creditcards_sync_v1', 'org_mozilla_vrbrowser.deletion_request_v1', 'org_mozilla_vrbrowser.events_v1', 'org_mozilla_vrbrowser.history_sync_v1', 'org_mozilla_vrbrowser.logins_sync_v1', 'org_mozilla_vrbrowser.metrics_v1', 'org_mozilla_vrbrowser.session_end_v1', 'org_mozilla_vrbrowser.sync_v1', 'org_mozilla_vrbrowser.tabs_sync_v1', 'rally_core.deletion_request_v1', 'rally_core.demographics_v1', 'rally_core.enrollment_v1', 'rally_core.study_enrollment_v1', 'rally_core.study_unenrollment_v1', 'rally_core.uninstall_deletion_v1', 'rally_debug.deletion_request_v1', 'rally_debug.demographics_v1', 'rally_debug.enrollment_v1', 'rally_debug.study_enrollment_v1', 'rally_debug.study_unenrollment_v1', 'rally_debug.uninstall_deletion_v1', 'rally_study_zero_one.deletion_request_v1', 'rally_study_zero_one.rs01_event_v1', 'rally_study_zero_one.study_enrollment_v1', 'rally_zero_one.deletion_request_v1', 'rally_zero_one.measurements_v1', 'rally_zero_one.pioneer_enrollment_v1']