mozilla_schema_generator.glean_ping

  1# -*- coding: utf-8 -*-
  2
  3# This Source Code Form is subject to the terms of the Mozilla Public
  4# License, v. 2.0. If a copy of the MPL was not distributed with this
  5# file, You can obtain one at http://mozilla.org/MPL/2.0/.
  6
  7import copy
  8import logging
  9from pathlib import Path
 10from typing import Dict, List, Set
 11
 12from requests import HTTPError
 13
 14from .config import Config
 15from .generic_ping import GenericPing
 16from .probes import GleanProbe
 17from .schema import Schema
 18
 19ROOT_DIR = Path(__file__).parent
 20BUG_1737656_TXT = ROOT_DIR / "configs" / "bug_1737656_affected.txt"
 21
 22logger = logging.getLogger(__name__)
 23
 24DEFAULT_SCHEMA_URL = (
 25    "https://raw.githubusercontent.com"
 26    "/mozilla-services/mozilla-pipeline-schemas"
 27    "/{branch}/schemas/glean/glean/glean.1.schema.json"
 28)
 29
 30MINIMUM_SCHEMA_URL = (
 31    "https://raw.githubusercontent.com"
 32    "/mozilla-services/mozilla-pipeline-schemas"
 33    "/{branch}/schemas/glean/glean/glean-min.1.schema.json"
 34)
 35
 36
 37class GleanPing(GenericPing):
 38    probes_url_template = GenericPing.probe_info_base_url + "/glean/{}/metrics"
 39    ping_url_template = GenericPing.probe_info_base_url + "/glean/{}/pings"
 40    repos_url = GenericPing.probe_info_base_url + "/glean/repositories"
 41    dependencies_url_template = (
 42        GenericPing.probe_info_base_url + "/glean/{}/dependencies"
 43    )
 44
 45    default_dependencies = ["glean-core"]
 46
 47    with open(BUG_1737656_TXT, "r") as f:
 48        bug_1737656_affected_tables = [
 49            line.strip() for line in f.readlines() if line.strip()
 50        ]
 51
 52    def __init__(self, repo, **kwargs):  # TODO: Make env-url optional
 53        self.repo = repo
 54        self.repo_name = repo["name"]
 55        self.app_id = repo["app_id"]
 56        super().__init__(
 57            DEFAULT_SCHEMA_URL,
 58            DEFAULT_SCHEMA_URL,
 59            self.probes_url_template.format(self.repo_name),
 60            **kwargs,
 61        )
 62
 63    def get_schema(self, generic_schema=False) -> Schema:
 64        """
 65        Fetch schema via URL.
 66
 67        Unless *generic_schema* is set to true, this function makes some modifications
 68        to allow some workarounds for proper injection of metrics.
 69        """
 70        schema = super().get_schema()
 71        if generic_schema:
 72            return schema
 73
 74        # We need to inject placeholders for the url2, text2, etc. types as part
 75        # of mitigation for https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
 76        for metric_name in ["labeled_rate", "jwe", "url", "text"]:
 77            metric1 = schema.get(
 78                ("properties", "metrics", "properties", metric_name)
 79            ).copy()
 80            metric1 = schema.set_schema_elem(
 81                ("properties", "metrics", "properties", metric_name + "2"),
 82                metric1,
 83            )
 84
 85        return schema
 86
 87    def get_dependencies(self):
 88        # Get all of the library dependencies for the application that
 89        # are also known about in the repositories file.
 90
 91        # The dependencies are specified using library names, but we need to
 92        # map those back to the name of the repository in the repository file.
 93        try:
 94            dependencies = self._get_json(
 95                self.dependencies_url_template.format(self.repo_name)
 96            )
 97        except HTTPError:
 98            logging.info(f"For {self.repo_name}, using default Glean dependencies")
 99            return self.default_dependencies
100
101        dependency_library_names = list(dependencies.keys())
102
103        repos = GleanPing._get_json(GleanPing.repos_url)
104        repos_by_dependency_name = {}
105        for repo in repos:
106            for library_name in repo.get("library_names", []):
107                repos_by_dependency_name[library_name] = repo["name"]
108
109        dependencies = []
110        for name in dependency_library_names:
111            if name in repos_by_dependency_name:
112                dependencies.append(repos_by_dependency_name[name])
113
114        if len(dependencies) == 0:
115            logging.info(f"For {self.repo_name}, using default Glean dependencies")
116            return self.default_dependencies
117
118        logging.info(f"For {self.repo_name}, found Glean dependencies: {dependencies}")
119        return dependencies
120
121    def get_probes(self) -> List[GleanProbe]:
122        data = self._get_json(self.probes_url)
123        probes = list(data.items())
124
125        for dependency in self.get_dependencies():
126            dependency_probes = self._get_json(
127                self.probes_url_template.format(dependency)
128            )
129            probes += list(dependency_probes.items())
130
131        pings = self.get_pings()
132
133        processed = []
134        for _id, defn in probes:
135            probe = GleanProbe(_id, defn, pings=pings)
136            processed.append(probe)
137
138            # Handling probe type changes (Bug 1870317)
139            probe_types = {hist["type"] for hist in defn[probe.history_key]}
140            if len(probe_types) > 1:
141                # The probe type changed at some point in history.
142                # Create schema entry for each type.
143                hist_defn = defn.copy()
144
145                # No new entry needs to be created for the current probe type
146                probe_types.remove(defn["type"])
147
148                for hist in hist_defn[probe.history_key]:
149                    # Create a new entry for a historic type
150                    if hist["type"] in probe_types:
151                        hist_defn["type"] = hist["type"]
152                        probe = GleanProbe(_id, hist_defn, pings=pings)
153                        processed.append(probe)
154
155                        # Keep track of the types entries were already created for
156                        probe_types.remove(hist["type"])
157
158        return processed
159
160    def _get_ping_data(self) -> Dict[str, Dict]:
161        url = self.ping_url_template.format(self.repo_name)
162        ping_data = GleanPing._get_json(url)
163        for dependency in self.get_dependencies():
164            dependency_pings = self._get_json(self.ping_url_template.format(dependency))
165            ping_data.update(dependency_pings)
166        return ping_data
167
168    def _get_ping_data_without_dependencies(self) -> Dict[str, Dict]:
169        url = self.ping_url_template.format(self.repo_name)
170        ping_data = GleanPing._get_json(url)
171        return ping_data
172
173    def _get_dependency_pings(self, dependency):
174        return self._get_json(self.ping_url_template.format(dependency))
175
176    def get_pings(self) -> Set[str]:
177        return self._get_ping_data().keys()
178
179    @staticmethod
180    def apply_default_metadata(ping_metadata, default_metadata):
181        """apply_default_metadata recurses down into dicts nested
182        to an arbitrary depth, updating keys. The ``default_metadata`` is merged into
183        ``ping_metadata``.
184        :param ping_metadata: dict onto which the merge is executed
185        :param default_metadata: dct merged into ping_metadata
186        :return: None
187        """
188        for k, v in default_metadata.items():
189            if (
190                k in ping_metadata
191                and isinstance(ping_metadata[k], dict)
192                and isinstance(default_metadata[k], dict)
193            ):
194                GleanPing.apply_default_metadata(ping_metadata[k], default_metadata[k])
195            else:
196                ping_metadata[k] = default_metadata[k]
197
198    def _get_ping_data_and_dependencies_with_default_metadata(self) -> Dict[str, Dict]:
199        # Get the ping data with the pipeline metadata
200        ping_data = self._get_ping_data_without_dependencies()
201
202        # The ping endpoint for the dependency pings does not include any repo defined
203        # moz_pipeline_metadata_defaults so they need to be applied here.
204
205        # 1.  Get repo and pipeline default metadata.
206        repos = self.get_repos()
207        current_repo = next((x for x in repos if x.get("app_id") == self.app_id), {})
208        default_metadata = current_repo.get("moz_pipeline_metadata_defaults", {})
209
210        # 2.  Apply the default metadata to each dependency defined ping.
211
212        # Apply app-level metadata to pings defined in dependencies
213        app_metadata = current_repo.get("moz_pipeline_metadata", {})
214
215        for dependency in self.get_dependencies():
216            dependency_pings = self._get_dependency_pings(dependency)
217            for dependency_ping in dependency_pings.values():
218                # Although it is counter intuitive to apply the default metadata on top of the
219                # existing dependency ping metadata it does set the repo specific value for
220                # bq_dataset_family instead of using the dependency id for the bq_dataset_family
221                # value.
222                GleanPing.apply_default_metadata(
223                    dependency_ping.get("moz_pipeline_metadata"),
224                    copy.deepcopy(default_metadata),
225                )
226                # app-level ping properties take priority over the app defaults
227                metadata_override = app_metadata.get(dependency_ping["name"])
228                if metadata_override is not None:
229                    GleanPing.apply_default_metadata(
230                        dependency_ping.get("moz_pipeline_metadata"), metadata_override
231                    )
232            ping_data.update(dependency_pings)
233
234        return ping_data
235
236    @staticmethod
237    def reorder_metadata(metadata):
238        desired_order_list = [
239            "bq_dataset_family",
240            "bq_table",
241            "bq_metadata_format",
242            "include_info_sections",
243            "submission_timestamp_granularity",
244            "expiration_policy",
245            "override_attributes",
246            "jwe_mappings",
247        ]
248        reordered_metadata = {
249            k: metadata[k] for k in desired_order_list if k in metadata
250        }
251
252        # re-order jwe-mappings
253        desired_order_list = ["source_field_path", "decrypted_field_path"]
254        jwe_mapping_metadata = reordered_metadata.get("jwe_mappings")
255        if jwe_mapping_metadata:
256            reordered_jwe_mapping_metadata = []
257            for mapping in jwe_mapping_metadata:
258                reordered_jwe_mapping_metadata.append(
259                    {k: mapping[k] for k in desired_order_list if k in mapping}
260                )
261            reordered_metadata["jwe_mappings"] = reordered_jwe_mapping_metadata
262
263        # future proofing, in case there are other fields added at the ping top level
264        # add them to the end.
265        leftovers = {k: metadata[k] for k in set(metadata) - set(reordered_metadata)}
266        reordered_metadata = {**reordered_metadata, **leftovers}
267        return reordered_metadata
268
269    def get_pings_and_pipeline_metadata(self) -> Dict[str, Dict]:
270        pings = self._get_ping_data_and_dependencies_with_default_metadata()
271        for ping_name, ping_data in pings.items():
272            metadata = ping_data.get("moz_pipeline_metadata")
273            if not metadata:
274                continue
275            metadata["include_info_sections"] = self._is_field_included(
276                ping_data, "include_info_sections", consider_all_history=False
277            )
278            metadata["include_client_id"] = self._is_field_included(
279                ping_data, "include_client_id"
280            )
281
282            # While technically unnecessary, the dictionary elements are re-ordered to match the
283            # currently deployed order and used to verify no difference in output.
284            pings[ping_name] = GleanPing.reorder_metadata(metadata)
285        return pings
286
287    def get_ping_descriptions(self) -> Dict[str, str]:
288        return {
289            k: v["history"][-1]["description"] for k, v in self._get_ping_data().items()
290        }
291
292    @staticmethod
293    def _is_field_included(ping_data, field_name, consider_all_history=True) -> bool:
294        """Return false if the field exists and is false.
295
296        If `consider_all_history` is False, then only check the latest value in the ping history.
297
298        Otherwise, if the field is not found or true in one or more history entries,
299        true is returned.
300        """
301
302        # Default to true if not specified.
303        if "history" not in ping_data or len(ping_data["history"]) == 0:
304            return True
305
306        # Check if at some point in the past the field has already been deployed.
307        # And if the caller of this method wants to consider this history of the field.
308        # Keep them in the schema, even if the field has changed as
309        # removing fields is currently not supported.
310        # See https://bugzilla.mozilla.org/show_bug.cgi?id=1898105
311        # and https://bugzilla.mozilla.org/show_bug.cgi?id=1898105#c10
312        ping_history: list
313        if consider_all_history:
314            ping_history = ping_data["history"]
315        else:
316            ping_history = [ping_data["history"][-1]]
317        for history in ping_history:
318            if field_name not in history or history[field_name]:
319                return True
320
321        # The ping was created with include_info_sections = False. The fields can be excluded.
322        return False
323
324    def set_schema_url(self, metadata):
325        """
326        Switch between the glean-min and glean schemas if the ping does not require
327        info sections as specified in the parsed ping info in probe scraper.
328        """
329        if not metadata["include_info_sections"]:
330            self.schema_url = MINIMUM_SCHEMA_URL.format(branch=self.branch_name)
331        else:
332            self.schema_url = DEFAULT_SCHEMA_URL.format(branch=self.branch_name)
333
334    def generate_schema(self, config, generic_schema=False) -> Dict[str, Schema]:
335        pings = self.get_pings_and_pipeline_metadata()
336        schemas = {}
337
338        for ping, pipeline_meta in pings.items():
339            matchers = {
340                loc: m.clone(new_table_group=ping) for loc, m in config.matchers.items()
341            }
342
343            # Four newly introduced metric types were incorrectly deployed
344            # as repeated key/value structs in all Glean ping tables existing prior
345            # to November 2021. We maintain the incorrect fields for existing tables
346            # by disabling the associated matchers.
347            # Note that each of these types now has a "2" matcher ("text2", "url2", etc.)
348            # defined that will allow metrics of these types to be injected into proper
349            # structs. The gcp-ingestion repository includes logic to rewrite these
350            # metrics under the "2" names.
351            # See https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
352            bq_identifier = "{bq_dataset_family}.{bq_table}".format(**pipeline_meta)
353            if bq_identifier in self.bug_1737656_affected_tables:
354                matchers = {
355                    loc: m
356                    for loc, m in matchers.items()
357                    if not m.matcher.get("bug_1737656_affected")
358                }
359
360            for matcher in matchers.values():
361                matcher.matcher["send_in_pings"]["contains"] = ping
362            new_config = Config(ping, matchers=matchers)
363
364            defaults = {"mozPipelineMetadata": pipeline_meta}
365
366            # Adjust the schema path if the ping does not require info sections
367            self.set_schema_url(pipeline_meta)
368            if generic_schema:  # Use the generic glean ping schema
369                schema = self.get_schema(generic_schema=True)
370                schema.schema.update(defaults)
371                schemas[new_config.name] = schema
372            else:
373                generated = super().generate_schema(new_config)
374                for schema in generated.values():
375                    # We want to override each individual key with assembled defaults,
376                    # but keep values _inside_ them if they have been set in the schemas.
377                    for key, value in defaults.items():
378                        if key not in schema.schema:
379                            schema.schema[key] = {}
380                        schema.schema[key].update(value)
381                schemas.update(generated)
382
383        return schemas
384
385    @staticmethod
386    def get_repos():
387        """
388        Retrieve metadata for all non-library Glean repositories
389        """
390        repos = GleanPing._get_json(GleanPing.repos_url)
391        return [repo for repo in repos if "library_names" not in repo]
ROOT_DIR = PosixPath('/home/circleci/project/mozilla_schema_generator')
BUG_1737656_TXT = PosixPath('/home/circleci/project/mozilla_schema_generator/configs/bug_1737656_affected.txt')
logger = <Logger mozilla_schema_generator.glean_ping (WARNING)>
DEFAULT_SCHEMA_URL = 'https://raw.githubusercontent.com/mozilla-services/mozilla-pipeline-schemas/{branch}/schemas/glean/glean/glean.1.schema.json'
MINIMUM_SCHEMA_URL = 'https://raw.githubusercontent.com/mozilla-services/mozilla-pipeline-schemas/{branch}/schemas/glean/glean/glean-min.1.schema.json'
 38class GleanPing(GenericPing):
 39    probes_url_template = GenericPing.probe_info_base_url + "/glean/{}/metrics"
 40    ping_url_template = GenericPing.probe_info_base_url + "/glean/{}/pings"
 41    repos_url = GenericPing.probe_info_base_url + "/glean/repositories"
 42    dependencies_url_template = (
 43        GenericPing.probe_info_base_url + "/glean/{}/dependencies"
 44    )
 45
 46    default_dependencies = ["glean-core"]
 47
 48    with open(BUG_1737656_TXT, "r") as f:
 49        bug_1737656_affected_tables = [
 50            line.strip() for line in f.readlines() if line.strip()
 51        ]
 52
 53    def __init__(self, repo, **kwargs):  # TODO: Make env-url optional
 54        self.repo = repo
 55        self.repo_name = repo["name"]
 56        self.app_id = repo["app_id"]
 57        super().__init__(
 58            DEFAULT_SCHEMA_URL,
 59            DEFAULT_SCHEMA_URL,
 60            self.probes_url_template.format(self.repo_name),
 61            **kwargs,
 62        )
 63
 64    def get_schema(self, generic_schema=False) -> Schema:
 65        """
 66        Fetch schema via URL.
 67
 68        Unless *generic_schema* is set to true, this function makes some modifications
 69        to allow some workarounds for proper injection of metrics.
 70        """
 71        schema = super().get_schema()
 72        if generic_schema:
 73            return schema
 74
 75        # We need to inject placeholders for the url2, text2, etc. types as part
 76        # of mitigation for https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
 77        for metric_name in ["labeled_rate", "jwe", "url", "text"]:
 78            metric1 = schema.get(
 79                ("properties", "metrics", "properties", metric_name)
 80            ).copy()
 81            metric1 = schema.set_schema_elem(
 82                ("properties", "metrics", "properties", metric_name + "2"),
 83                metric1,
 84            )
 85
 86        return schema
 87
 88    def get_dependencies(self):
 89        # Get all of the library dependencies for the application that
 90        # are also known about in the repositories file.
 91
 92        # The dependencies are specified using library names, but we need to
 93        # map those back to the name of the repository in the repository file.
 94        try:
 95            dependencies = self._get_json(
 96                self.dependencies_url_template.format(self.repo_name)
 97            )
 98        except HTTPError:
 99            logging.info(f"For {self.repo_name}, using default Glean dependencies")
100            return self.default_dependencies
101
102        dependency_library_names = list(dependencies.keys())
103
104        repos = GleanPing._get_json(GleanPing.repos_url)
105        repos_by_dependency_name = {}
106        for repo in repos:
107            for library_name in repo.get("library_names", []):
108                repos_by_dependency_name[library_name] = repo["name"]
109
110        dependencies = []
111        for name in dependency_library_names:
112            if name in repos_by_dependency_name:
113                dependencies.append(repos_by_dependency_name[name])
114
115        if len(dependencies) == 0:
116            logging.info(f"For {self.repo_name}, using default Glean dependencies")
117            return self.default_dependencies
118
119        logging.info(f"For {self.repo_name}, found Glean dependencies: {dependencies}")
120        return dependencies
121
122    def get_probes(self) -> List[GleanProbe]:
123        data = self._get_json(self.probes_url)
124        probes = list(data.items())
125
126        for dependency in self.get_dependencies():
127            dependency_probes = self._get_json(
128                self.probes_url_template.format(dependency)
129            )
130            probes += list(dependency_probes.items())
131
132        pings = self.get_pings()
133
134        processed = []
135        for _id, defn in probes:
136            probe = GleanProbe(_id, defn, pings=pings)
137            processed.append(probe)
138
139            # Handling probe type changes (Bug 1870317)
140            probe_types = {hist["type"] for hist in defn[probe.history_key]}
141            if len(probe_types) > 1:
142                # The probe type changed at some point in history.
143                # Create schema entry for each type.
144                hist_defn = defn.copy()
145
146                # No new entry needs to be created for the current probe type
147                probe_types.remove(defn["type"])
148
149                for hist in hist_defn[probe.history_key]:
150                    # Create a new entry for a historic type
151                    if hist["type"] in probe_types:
152                        hist_defn["type"] = hist["type"]
153                        probe = GleanProbe(_id, hist_defn, pings=pings)
154                        processed.append(probe)
155
156                        # Keep track of the types entries were already created for
157                        probe_types.remove(hist["type"])
158
159        return processed
160
161    def _get_ping_data(self) -> Dict[str, Dict]:
162        url = self.ping_url_template.format(self.repo_name)
163        ping_data = GleanPing._get_json(url)
164        for dependency in self.get_dependencies():
165            dependency_pings = self._get_json(self.ping_url_template.format(dependency))
166            ping_data.update(dependency_pings)
167        return ping_data
168
169    def _get_ping_data_without_dependencies(self) -> Dict[str, Dict]:
170        url = self.ping_url_template.format(self.repo_name)
171        ping_data = GleanPing._get_json(url)
172        return ping_data
173
174    def _get_dependency_pings(self, dependency):
175        return self._get_json(self.ping_url_template.format(dependency))
176
177    def get_pings(self) -> Set[str]:
178        return self._get_ping_data().keys()
179
180    @staticmethod
181    def apply_default_metadata(ping_metadata, default_metadata):
182        """apply_default_metadata recurses down into dicts nested
183        to an arbitrary depth, updating keys. The ``default_metadata`` is merged into
184        ``ping_metadata``.
185        :param ping_metadata: dict onto which the merge is executed
186        :param default_metadata: dct merged into ping_metadata
187        :return: None
188        """
189        for k, v in default_metadata.items():
190            if (
191                k in ping_metadata
192                and isinstance(ping_metadata[k], dict)
193                and isinstance(default_metadata[k], dict)
194            ):
195                GleanPing.apply_default_metadata(ping_metadata[k], default_metadata[k])
196            else:
197                ping_metadata[k] = default_metadata[k]
198
199    def _get_ping_data_and_dependencies_with_default_metadata(self) -> Dict[str, Dict]:
200        # Get the ping data with the pipeline metadata
201        ping_data = self._get_ping_data_without_dependencies()
202
203        # The ping endpoint for the dependency pings does not include any repo defined
204        # moz_pipeline_metadata_defaults so they need to be applied here.
205
206        # 1.  Get repo and pipeline default metadata.
207        repos = self.get_repos()
208        current_repo = next((x for x in repos if x.get("app_id") == self.app_id), {})
209        default_metadata = current_repo.get("moz_pipeline_metadata_defaults", {})
210
211        # 2.  Apply the default metadata to each dependency defined ping.
212
213        # Apply app-level metadata to pings defined in dependencies
214        app_metadata = current_repo.get("moz_pipeline_metadata", {})
215
216        for dependency in self.get_dependencies():
217            dependency_pings = self._get_dependency_pings(dependency)
218            for dependency_ping in dependency_pings.values():
219                # Although it is counter intuitive to apply the default metadata on top of the
220                # existing dependency ping metadata it does set the repo specific value for
221                # bq_dataset_family instead of using the dependency id for the bq_dataset_family
222                # value.
223                GleanPing.apply_default_metadata(
224                    dependency_ping.get("moz_pipeline_metadata"),
225                    copy.deepcopy(default_metadata),
226                )
227                # app-level ping properties take priority over the app defaults
228                metadata_override = app_metadata.get(dependency_ping["name"])
229                if metadata_override is not None:
230                    GleanPing.apply_default_metadata(
231                        dependency_ping.get("moz_pipeline_metadata"), metadata_override
232                    )
233            ping_data.update(dependency_pings)
234
235        return ping_data
236
237    @staticmethod
238    def reorder_metadata(metadata):
239        desired_order_list = [
240            "bq_dataset_family",
241            "bq_table",
242            "bq_metadata_format",
243            "include_info_sections",
244            "submission_timestamp_granularity",
245            "expiration_policy",
246            "override_attributes",
247            "jwe_mappings",
248        ]
249        reordered_metadata = {
250            k: metadata[k] for k in desired_order_list if k in metadata
251        }
252
253        # re-order jwe-mappings
254        desired_order_list = ["source_field_path", "decrypted_field_path"]
255        jwe_mapping_metadata = reordered_metadata.get("jwe_mappings")
256        if jwe_mapping_metadata:
257            reordered_jwe_mapping_metadata = []
258            for mapping in jwe_mapping_metadata:
259                reordered_jwe_mapping_metadata.append(
260                    {k: mapping[k] for k in desired_order_list if k in mapping}
261                )
262            reordered_metadata["jwe_mappings"] = reordered_jwe_mapping_metadata
263
264        # future proofing, in case there are other fields added at the ping top level
265        # add them to the end.
266        leftovers = {k: metadata[k] for k in set(metadata) - set(reordered_metadata)}
267        reordered_metadata = {**reordered_metadata, **leftovers}
268        return reordered_metadata
269
270    def get_pings_and_pipeline_metadata(self) -> Dict[str, Dict]:
271        pings = self._get_ping_data_and_dependencies_with_default_metadata()
272        for ping_name, ping_data in pings.items():
273            metadata = ping_data.get("moz_pipeline_metadata")
274            if not metadata:
275                continue
276            metadata["include_info_sections"] = self._is_field_included(
277                ping_data, "include_info_sections", consider_all_history=False
278            )
279            metadata["include_client_id"] = self._is_field_included(
280                ping_data, "include_client_id"
281            )
282
283            # While technically unnecessary, the dictionary elements are re-ordered to match the
284            # currently deployed order and used to verify no difference in output.
285            pings[ping_name] = GleanPing.reorder_metadata(metadata)
286        return pings
287
288    def get_ping_descriptions(self) -> Dict[str, str]:
289        return {
290            k: v["history"][-1]["description"] for k, v in self._get_ping_data().items()
291        }
292
293    @staticmethod
294    def _is_field_included(ping_data, field_name, consider_all_history=True) -> bool:
295        """Return false if the field exists and is false.
296
297        If `consider_all_history` is False, then only check the latest value in the ping history.
298
299        Otherwise, if the field is not found or true in one or more history entries,
300        true is returned.
301        """
302
303        # Default to true if not specified.
304        if "history" not in ping_data or len(ping_data["history"]) == 0:
305            return True
306
307        # Check if at some point in the past the field has already been deployed.
308        # And if the caller of this method wants to consider this history of the field.
309        # Keep them in the schema, even if the field has changed as
310        # removing fields is currently not supported.
311        # See https://bugzilla.mozilla.org/show_bug.cgi?id=1898105
312        # and https://bugzilla.mozilla.org/show_bug.cgi?id=1898105#c10
313        ping_history: list
314        if consider_all_history:
315            ping_history = ping_data["history"]
316        else:
317            ping_history = [ping_data["history"][-1]]
318        for history in ping_history:
319            if field_name not in history or history[field_name]:
320                return True
321
322        # The ping was created with include_info_sections = False. The fields can be excluded.
323        return False
324
325    def set_schema_url(self, metadata):
326        """
327        Switch between the glean-min and glean schemas if the ping does not require
328        info sections as specified in the parsed ping info in probe scraper.
329        """
330        if not metadata["include_info_sections"]:
331            self.schema_url = MINIMUM_SCHEMA_URL.format(branch=self.branch_name)
332        else:
333            self.schema_url = DEFAULT_SCHEMA_URL.format(branch=self.branch_name)
334
335    def generate_schema(self, config, generic_schema=False) -> Dict[str, Schema]:
336        pings = self.get_pings_and_pipeline_metadata()
337        schemas = {}
338
339        for ping, pipeline_meta in pings.items():
340            matchers = {
341                loc: m.clone(new_table_group=ping) for loc, m in config.matchers.items()
342            }
343
344            # Four newly introduced metric types were incorrectly deployed
345            # as repeated key/value structs in all Glean ping tables existing prior
346            # to November 2021. We maintain the incorrect fields for existing tables
347            # by disabling the associated matchers.
348            # Note that each of these types now has a "2" matcher ("text2", "url2", etc.)
349            # defined that will allow metrics of these types to be injected into proper
350            # structs. The gcp-ingestion repository includes logic to rewrite these
351            # metrics under the "2" names.
352            # See https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
353            bq_identifier = "{bq_dataset_family}.{bq_table}".format(**pipeline_meta)
354            if bq_identifier in self.bug_1737656_affected_tables:
355                matchers = {
356                    loc: m
357                    for loc, m in matchers.items()
358                    if not m.matcher.get("bug_1737656_affected")
359                }
360
361            for matcher in matchers.values():
362                matcher.matcher["send_in_pings"]["contains"] = ping
363            new_config = Config(ping, matchers=matchers)
364
365            defaults = {"mozPipelineMetadata": pipeline_meta}
366
367            # Adjust the schema path if the ping does not require info sections
368            self.set_schema_url(pipeline_meta)
369            if generic_schema:  # Use the generic glean ping schema
370                schema = self.get_schema(generic_schema=True)
371                schema.schema.update(defaults)
372                schemas[new_config.name] = schema
373            else:
374                generated = super().generate_schema(new_config)
375                for schema in generated.values():
376                    # We want to override each individual key with assembled defaults,
377                    # but keep values _inside_ them if they have been set in the schemas.
378                    for key, value in defaults.items():
379                        if key not in schema.schema:
380                            schema.schema[key] = {}
381                        schema.schema[key].update(value)
382                schemas.update(generated)
383
384        return schemas
385
386    @staticmethod
387    def get_repos():
388        """
389        Retrieve metadata for all non-library Glean repositories
390        """
391        repos = GleanPing._get_json(GleanPing.repos_url)
392        return [repo for repo in repos if "library_names" not in repo]
GleanPing(repo, **kwargs)
53    def __init__(self, repo, **kwargs):  # TODO: Make env-url optional
54        self.repo = repo
55        self.repo_name = repo["name"]
56        self.app_id = repo["app_id"]
57        super().__init__(
58            DEFAULT_SCHEMA_URL,
59            DEFAULT_SCHEMA_URL,
60            self.probes_url_template.format(self.repo_name),
61            **kwargs,
62        )
probes_url_template = 'https://probeinfo.telemetry.mozilla.org/glean/{}/metrics'
ping_url_template = 'https://probeinfo.telemetry.mozilla.org/glean/{}/pings'
repos_url = 'https://probeinfo.telemetry.mozilla.org/glean/repositories'
dependencies_url_template = 'https://probeinfo.telemetry.mozilla.org/glean/{}/dependencies'
default_dependencies = ['glean-core']
repo
repo_name
app_id
def get_schema(self, generic_schema=False) -> mozilla_schema_generator.schema.Schema:
64    def get_schema(self, generic_schema=False) -> Schema:
65        """
66        Fetch schema via URL.
67
68        Unless *generic_schema* is set to true, this function makes some modifications
69        to allow some workarounds for proper injection of metrics.
70        """
71        schema = super().get_schema()
72        if generic_schema:
73            return schema
74
75        # We need to inject placeholders for the url2, text2, etc. types as part
76        # of mitigation for https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
77        for metric_name in ["labeled_rate", "jwe", "url", "text"]:
78            metric1 = schema.get(
79                ("properties", "metrics", "properties", metric_name)
80            ).copy()
81            metric1 = schema.set_schema_elem(
82                ("properties", "metrics", "properties", metric_name + "2"),
83                metric1,
84            )
85
86        return schema

Fetch schema via URL.

Unless generic_schema is set to true, this function makes some modifications to allow some workarounds for proper injection of metrics.

def get_dependencies(self):
 88    def get_dependencies(self):
 89        # Get all of the library dependencies for the application that
 90        # are also known about in the repositories file.
 91
 92        # The dependencies are specified using library names, but we need to
 93        # map those back to the name of the repository in the repository file.
 94        try:
 95            dependencies = self._get_json(
 96                self.dependencies_url_template.format(self.repo_name)
 97            )
 98        except HTTPError:
 99            logging.info(f"For {self.repo_name}, using default Glean dependencies")
100            return self.default_dependencies
101
102        dependency_library_names = list(dependencies.keys())
103
104        repos = GleanPing._get_json(GleanPing.repos_url)
105        repos_by_dependency_name = {}
106        for repo in repos:
107            for library_name in repo.get("library_names", []):
108                repos_by_dependency_name[library_name] = repo["name"]
109
110        dependencies = []
111        for name in dependency_library_names:
112            if name in repos_by_dependency_name:
113                dependencies.append(repos_by_dependency_name[name])
114
115        if len(dependencies) == 0:
116            logging.info(f"For {self.repo_name}, using default Glean dependencies")
117            return self.default_dependencies
118
119        logging.info(f"For {self.repo_name}, found Glean dependencies: {dependencies}")
120        return dependencies
def get_probes(self) -> List[mozilla_schema_generator.probes.GleanProbe]:
122    def get_probes(self) -> List[GleanProbe]:
123        data = self._get_json(self.probes_url)
124        probes = list(data.items())
125
126        for dependency in self.get_dependencies():
127            dependency_probes = self._get_json(
128                self.probes_url_template.format(dependency)
129            )
130            probes += list(dependency_probes.items())
131
132        pings = self.get_pings()
133
134        processed = []
135        for _id, defn in probes:
136            probe = GleanProbe(_id, defn, pings=pings)
137            processed.append(probe)
138
139            # Handling probe type changes (Bug 1870317)
140            probe_types = {hist["type"] for hist in defn[probe.history_key]}
141            if len(probe_types) > 1:
142                # The probe type changed at some point in history.
143                # Create schema entry for each type.
144                hist_defn = defn.copy()
145
146                # No new entry needs to be created for the current probe type
147                probe_types.remove(defn["type"])
148
149                for hist in hist_defn[probe.history_key]:
150                    # Create a new entry for a historic type
151                    if hist["type"] in probe_types:
152                        hist_defn["type"] = hist["type"]
153                        probe = GleanProbe(_id, hist_defn, pings=pings)
154                        processed.append(probe)
155
156                        # Keep track of the types entries were already created for
157                        probe_types.remove(hist["type"])
158
159        return processed
def get_pings(self) -> Set[str]:
177    def get_pings(self) -> Set[str]:
178        return self._get_ping_data().keys()
@staticmethod
def apply_default_metadata(ping_metadata, default_metadata):
180    @staticmethod
181    def apply_default_metadata(ping_metadata, default_metadata):
182        """apply_default_metadata recurses down into dicts nested
183        to an arbitrary depth, updating keys. The ``default_metadata`` is merged into
184        ``ping_metadata``.
185        :param ping_metadata: dict onto which the merge is executed
186        :param default_metadata: dct merged into ping_metadata
187        :return: None
188        """
189        for k, v in default_metadata.items():
190            if (
191                k in ping_metadata
192                and isinstance(ping_metadata[k], dict)
193                and isinstance(default_metadata[k], dict)
194            ):
195                GleanPing.apply_default_metadata(ping_metadata[k], default_metadata[k])
196            else:
197                ping_metadata[k] = default_metadata[k]

apply_default_metadata recurses down into dicts nested to an arbitrary depth, updating keys. The default_metadata is merged into ping_metadata.

Parameters
  • ping_metadata: dict onto which the merge is executed
  • default_metadata: dct merged into ping_metadata
Returns

None

@staticmethod
def reorder_metadata(metadata):
237    @staticmethod
238    def reorder_metadata(metadata):
239        desired_order_list = [
240            "bq_dataset_family",
241            "bq_table",
242            "bq_metadata_format",
243            "include_info_sections",
244            "submission_timestamp_granularity",
245            "expiration_policy",
246            "override_attributes",
247            "jwe_mappings",
248        ]
249        reordered_metadata = {
250            k: metadata[k] for k in desired_order_list if k in metadata
251        }
252
253        # re-order jwe-mappings
254        desired_order_list = ["source_field_path", "decrypted_field_path"]
255        jwe_mapping_metadata = reordered_metadata.get("jwe_mappings")
256        if jwe_mapping_metadata:
257            reordered_jwe_mapping_metadata = []
258            for mapping in jwe_mapping_metadata:
259                reordered_jwe_mapping_metadata.append(
260                    {k: mapping[k] for k in desired_order_list if k in mapping}
261                )
262            reordered_metadata["jwe_mappings"] = reordered_jwe_mapping_metadata
263
264        # future proofing, in case there are other fields added at the ping top level
265        # add them to the end.
266        leftovers = {k: metadata[k] for k in set(metadata) - set(reordered_metadata)}
267        reordered_metadata = {**reordered_metadata, **leftovers}
268        return reordered_metadata
def get_pings_and_pipeline_metadata(self) -> Dict[str, Dict]:
270    def get_pings_and_pipeline_metadata(self) -> Dict[str, Dict]:
271        pings = self._get_ping_data_and_dependencies_with_default_metadata()
272        for ping_name, ping_data in pings.items():
273            metadata = ping_data.get("moz_pipeline_metadata")
274            if not metadata:
275                continue
276            metadata["include_info_sections"] = self._is_field_included(
277                ping_data, "include_info_sections", consider_all_history=False
278            )
279            metadata["include_client_id"] = self._is_field_included(
280                ping_data, "include_client_id"
281            )
282
283            # While technically unnecessary, the dictionary elements are re-ordered to match the
284            # currently deployed order and used to verify no difference in output.
285            pings[ping_name] = GleanPing.reorder_metadata(metadata)
286        return pings
def get_ping_descriptions(self) -> Dict[str, str]:
288    def get_ping_descriptions(self) -> Dict[str, str]:
289        return {
290            k: v["history"][-1]["description"] for k, v in self._get_ping_data().items()
291        }
def set_schema_url(self, metadata):
325    def set_schema_url(self, metadata):
326        """
327        Switch between the glean-min and glean schemas if the ping does not require
328        info sections as specified in the parsed ping info in probe scraper.
329        """
330        if not metadata["include_info_sections"]:
331            self.schema_url = MINIMUM_SCHEMA_URL.format(branch=self.branch_name)
332        else:
333            self.schema_url = DEFAULT_SCHEMA_URL.format(branch=self.branch_name)

Switch between the glean-min and glean schemas if the ping does not require info sections as specified in the parsed ping info in probe scraper.

def generate_schema( self, config, generic_schema=False) -> Dict[str, mozilla_schema_generator.schema.Schema]:
335    def generate_schema(self, config, generic_schema=False) -> Dict[str, Schema]:
336        pings = self.get_pings_and_pipeline_metadata()
337        schemas = {}
338
339        for ping, pipeline_meta in pings.items():
340            matchers = {
341                loc: m.clone(new_table_group=ping) for loc, m in config.matchers.items()
342            }
343
344            # Four newly introduced metric types were incorrectly deployed
345            # as repeated key/value structs in all Glean ping tables existing prior
346            # to November 2021. We maintain the incorrect fields for existing tables
347            # by disabling the associated matchers.
348            # Note that each of these types now has a "2" matcher ("text2", "url2", etc.)
349            # defined that will allow metrics of these types to be injected into proper
350            # structs. The gcp-ingestion repository includes logic to rewrite these
351            # metrics under the "2" names.
352            # See https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
353            bq_identifier = "{bq_dataset_family}.{bq_table}".format(**pipeline_meta)
354            if bq_identifier in self.bug_1737656_affected_tables:
355                matchers = {
356                    loc: m
357                    for loc, m in matchers.items()
358                    if not m.matcher.get("bug_1737656_affected")
359                }
360
361            for matcher in matchers.values():
362                matcher.matcher["send_in_pings"]["contains"] = ping
363            new_config = Config(ping, matchers=matchers)
364
365            defaults = {"mozPipelineMetadata": pipeline_meta}
366
367            # Adjust the schema path if the ping does not require info sections
368            self.set_schema_url(pipeline_meta)
369            if generic_schema:  # Use the generic glean ping schema
370                schema = self.get_schema(generic_schema=True)
371                schema.schema.update(defaults)
372                schemas[new_config.name] = schema
373            else:
374                generated = super().generate_schema(new_config)
375                for schema in generated.values():
376                    # We want to override each individual key with assembled defaults,
377                    # but keep values _inside_ them if they have been set in the schemas.
378                    for key, value in defaults.items():
379                        if key not in schema.schema:
380                            schema.schema[key] = {}
381                        schema.schema[key].update(value)
382                schemas.update(generated)
383
384        return schemas
@staticmethod
def get_repos():
386    @staticmethod
387    def get_repos():
388        """
389        Retrieve metadata for all non-library Glean repositories
390        """
391        repos = GleanPing._get_json(GleanPing.repos_url)
392        return [repo for repo in repos if "library_names" not in repo]

Retrieve metadata for all non-library Glean repositories

f = <_io.TextIOWrapper name='/home/circleci/project/mozilla_schema_generator/configs/bug_1737656_affected.txt' mode='r' encoding='UTF-8'>
bug_1737656_affected_tables = ['burnham.baseline_v1', 'burnham.deletion_request_v1', 'burnham.discovery_v1', 'burnham.events_v1', 'burnham.metrics_v1', 'burnham.space_ship_ready_v1', 'burnham.starbase46_v1', 'firefox_desktop_background_update.background_update_v1', 'firefox_desktop_background_update.baseline_v1', 'firefox_desktop_background_update.deletion_request_v1', 'firefox_desktop_background_update.events_v1', 'firefox_desktop_background_update.metrics_v1', 'firefox_desktop.baseline_v1', 'firefox_desktop.deletion_request_v1', 'firefox_desktop.events_v1', 'firefox_desktop.fog_validation_v1', 'firefox_desktop.metrics_v1', 'firefox_installer.install_v1', 'firefox_launcher_process.launcher_process_failure_v1', 'messaging_system.cfr_v1', 'messaging_system.infobar_v1', 'messaging_system.moments_v1', 'messaging_system.onboarding_v1', 'messaging_system.personalization_experiment_v1', 'messaging_system.snippets_v1', 'messaging_system.spotlight_v1', 'messaging_system.undesired_events_v1', 'messaging_system.whats_new_panel_v1', 'mlhackweek_search.action_v1', 'mlhackweek_search.baseline_v1', 'mlhackweek_search.custom_v1', 'mlhackweek_search.deletion_request_v1', 'mlhackweek_search.events_v1', 'mlhackweek_search.metrics_v1', 'mozilla_lockbox.addresses_sync_v1', 'mozilla_lockbox.baseline_v1', 'mozilla_lockbox.bookmarks_sync_v1', 'mozilla_lockbox.creditcards_sync_v1', 'mozilla_lockbox.deletion_request_v1', 'mozilla_lockbox.events_v1', 'mozilla_lockbox.history_sync_v1', 'mozilla_lockbox.logins_sync_v1', 'mozilla_lockbox.metrics_v1', 'mozilla_lockbox.sync_v1', 'mozilla_lockbox.tabs_sync_v1', 'mozilla_mach.baseline_v1', 'mozilla_mach.deletion_request_v1', 'mozilla_mach.events_v1', 'mozilla_mach.metrics_v1', 'mozilla_mach.usage_v1', 'mozillavpn.deletion_request_v1', 'mozillavpn.main_v1', 'mozphab.baseline_v1', 'mozphab.deletion_request_v1', 'mozphab.events_v1', 'mozphab.metrics_v1', 'mozphab.usage_v1', 'org_mozilla_bergamot.custom_v1', 'org_mozilla_bergamot.deletion_request_v1', 'org_mozilla_connect_firefox.baseline_v1', 'org_mozilla_connect_firefox.deletion_request_v1', 'org_mozilla_connect_firefox.events_v1', 'org_mozilla_connect_firefox.metrics_v1', 'org_mozilla_fenix.activation_v1', 'org_mozilla_fenix.addresses_sync_v1', 'org_mozilla_fenix.baseline_v1', 'org_mozilla_fenix.bookmarks_sync_v1', 'org_mozilla_fenix.creditcards_sync_v1', 'org_mozilla_fenix.deletion_request_v1', 'org_mozilla_fenix.events_v1', 'org_mozilla_fenix.first_session_v1', 'org_mozilla_fenix.fog_validation_v1', 'org_mozilla_fenix.history_sync_v1', 'org_mozilla_fenix.installation_v1', 'org_mozilla_fenix.logins_sync_v1', 'org_mozilla_fenix.metrics_v1', 'org_mozilla_fenix.migration_v1', 'org_mozilla_fenix.startup_timeline_v1', 'org_mozilla_fenix.sync_v1', 'org_mozilla_fenix.tabs_sync_v1', 'org_mozilla_fenix_nightly.activation_v1', 'org_mozilla_fenix_nightly.addresses_sync_v1', 'org_mozilla_fenix_nightly.baseline_v1', 'org_mozilla_fenix_nightly.bookmarks_sync_v1', 'org_mozilla_fenix_nightly.creditcards_sync_v1', 'org_mozilla_fenix_nightly.deletion_request_v1', 'org_mozilla_fenix_nightly.events_v1', 'org_mozilla_fenix_nightly.first_session_v1', 'org_mozilla_fenix_nightly.fog_validation_v1', 'org_mozilla_fenix_nightly.history_sync_v1', 'org_mozilla_fenix_nightly.installation_v1', 'org_mozilla_fenix_nightly.logins_sync_v1', 'org_mozilla_fenix_nightly.metrics_v1', 'org_mozilla_fenix_nightly.migration_v1', 'org_mozilla_fenix_nightly.startup_timeline_v1', 'org_mozilla_fenix_nightly.sync_v1', 'org_mozilla_fenix_nightly.tabs_sync_v1', 'org_mozilla_fennec_aurora.activation_v1', 'org_mozilla_fennec_aurora.addresses_sync_v1', 'org_mozilla_fennec_aurora.baseline_v1', 'org_mozilla_fennec_aurora.bookmarks_sync_v1', 'org_mozilla_fennec_aurora.creditcards_sync_v1', 'org_mozilla_fennec_aurora.deletion_request_v1', 'org_mozilla_fennec_aurora.events_v1', 'org_mozilla_fennec_aurora.first_session_v1', 'org_mozilla_fennec_aurora.fog_validation_v1', 'org_mozilla_fennec_aurora.history_sync_v1', 'org_mozilla_fennec_aurora.installation_v1', 'org_mozilla_fennec_aurora.logins_sync_v1', 'org_mozilla_fennec_aurora.metrics_v1', 'org_mozilla_fennec_aurora.migration_v1', 'org_mozilla_fennec_aurora.startup_timeline_v1', 'org_mozilla_fennec_aurora.sync_v1', 'org_mozilla_fennec_aurora.tabs_sync_v1', 'org_mozilla_firefox_beta.activation_v1', 'org_mozilla_firefox_beta.addresses_sync_v1', 'org_mozilla_firefox_beta.baseline_v1', 'org_mozilla_firefox_beta.bookmarks_sync_v1', 'org_mozilla_firefox_beta.creditcards_sync_v1', 'org_mozilla_firefox_beta.deletion_request_v1', 'org_mozilla_firefox_beta.events_v1', 'org_mozilla_firefox_beta.first_session_v1', 'org_mozilla_firefox_beta.fog_validation_v1', 'org_mozilla_firefox_beta.history_sync_v1', 'org_mozilla_firefox_beta.installation_v1', 'org_mozilla_firefox_beta.logins_sync_v1', 'org_mozilla_firefox_beta.metrics_v1', 'org_mozilla_firefox_beta.migration_v1', 'org_mozilla_firefox_beta.startup_timeline_v1', 'org_mozilla_firefox_beta.sync_v1', 'org_mozilla_firefox_beta.tabs_sync_v1', 'org_mozilla_firefox.activation_v1', 'org_mozilla_firefox.addresses_sync_v1', 'org_mozilla_firefox.baseline_v1', 'org_mozilla_firefox.bookmarks_sync_v1', 'org_mozilla_firefox.creditcards_sync_v1', 'org_mozilla_firefox.deletion_request_v1', 'org_mozilla_firefox.events_v1', 'org_mozilla_firefox.first_session_v1', 'org_mozilla_firefox.fog_validation_v1', 'org_mozilla_firefox.history_sync_v1', 'org_mozilla_firefox.installation_v1', 'org_mozilla_firefox.logins_sync_v1', 'org_mozilla_firefox.metrics_v1', 'org_mozilla_firefox.migration_v1', 'org_mozilla_firefox.startup_timeline_v1', 'org_mozilla_firefox.sync_v1', 'org_mozilla_firefox.tabs_sync_v1', 'org_mozilla_firefoxreality.baseline_v1', 'org_mozilla_firefoxreality.deletion_request_v1', 'org_mozilla_firefoxreality.events_v1', 'org_mozilla_firefoxreality.launch_v1', 'org_mozilla_firefoxreality.metrics_v1', 'org_mozilla_focus_beta.activation_v1', 'org_mozilla_focus_beta.baseline_v1', 'org_mozilla_focus_beta.deletion_request_v1', 'org_mozilla_focus_beta.events_v1', 'org_mozilla_focus_beta.metrics_v1', 'org_mozilla_focus.activation_v1', 'org_mozilla_focus.baseline_v1', 'org_mozilla_focus.deletion_request_v1', 'org_mozilla_focus.events_v1', 'org_mozilla_focus.metrics_v1', 'org_mozilla_focus_nightly.activation_v1', 'org_mozilla_focus_nightly.baseline_v1', 'org_mozilla_focus_nightly.deletion_request_v1', 'org_mozilla_focus_nightly.events_v1', 'org_mozilla_focus_nightly.metrics_v1', 'org_mozilla_ios_fennec.baseline_v1', 'org_mozilla_ios_fennec.deletion_request_v1', 'org_mozilla_ios_fennec.events_v1', 'org_mozilla_ios_fennec.metrics_v1', 'org_mozilla_ios_firefox.baseline_v1', 'org_mozilla_ios_firefox.deletion_request_v1', 'org_mozilla_ios_firefox.events_v1', 'org_mozilla_ios_firefox.metrics_v1', 'org_mozilla_ios_firefoxbeta.baseline_v1', 'org_mozilla_ios_firefoxbeta.deletion_request_v1', 'org_mozilla_ios_firefoxbeta.events_v1', 'org_mozilla_ios_firefoxbeta.metrics_v1', 'org_mozilla_ios_focus.baseline_v1', 'org_mozilla_ios_focus.deletion_request_v1', 'org_mozilla_ios_focus.events_v1', 'org_mozilla_ios_focus.metrics_v1', 'org_mozilla_ios_klar.baseline_v1', 'org_mozilla_ios_klar.deletion_request_v1', 'org_mozilla_ios_klar.events_v1', 'org_mozilla_ios_klar.metrics_v1', 'org_mozilla_ios_lockbox.baseline_v1', 'org_mozilla_ios_lockbox.deletion_request_v1', 'org_mozilla_ios_lockbox.events_v1', 'org_mozilla_ios_lockbox.metrics_v1', 'org_mozilla_klar.activation_v1', 'org_mozilla_klar.baseline_v1', 'org_mozilla_klar.deletion_request_v1', 'org_mozilla_klar.events_v1', 'org_mozilla_klar.metrics_v1', 'org_mozilla_mozregression.baseline_v1', 'org_mozilla_mozregression.deletion_request_v1', 'org_mozilla_mozregression.events_v1', 'org_mozilla_mozregression.metrics_v1', 'org_mozilla_mozregression.usage_v1', 'org_mozilla_reference_browser.baseline_v1', 'org_mozilla_reference_browser.deletion_request_v1', 'org_mozilla_reference_browser.events_v1', 'org_mozilla_reference_browser.metrics_v1', 'org_mozilla_tv_firefox.baseline_v1', 'org_mozilla_tv_firefox.deletion_request_v1', 'org_mozilla_tv_firefox.events_v1', 'org_mozilla_tv_firefox.metrics_v1', 'org_mozilla_vrbrowser.addresses_sync_v1', 'org_mozilla_vrbrowser.baseline_v1', 'org_mozilla_vrbrowser.bookmarks_sync_v1', 'org_mozilla_vrbrowser.creditcards_sync_v1', 'org_mozilla_vrbrowser.deletion_request_v1', 'org_mozilla_vrbrowser.events_v1', 'org_mozilla_vrbrowser.history_sync_v1', 'org_mozilla_vrbrowser.logins_sync_v1', 'org_mozilla_vrbrowser.metrics_v1', 'org_mozilla_vrbrowser.session_end_v1', 'org_mozilla_vrbrowser.sync_v1', 'org_mozilla_vrbrowser.tabs_sync_v1', 'rally_core.deletion_request_v1', 'rally_core.demographics_v1', 'rally_core.enrollment_v1', 'rally_core.study_enrollment_v1', 'rally_core.study_unenrollment_v1', 'rally_core.uninstall_deletion_v1', 'rally_debug.deletion_request_v1', 'rally_debug.demographics_v1', 'rally_debug.enrollment_v1', 'rally_debug.study_enrollment_v1', 'rally_debug.study_unenrollment_v1', 'rally_debug.uninstall_deletion_v1', 'rally_study_zero_one.deletion_request_v1', 'rally_study_zero_one.rs01_event_v1', 'rally_study_zero_one.study_enrollment_v1', 'rally_zero_one.deletion_request_v1', 'rally_zero_one.measurements_v1', 'rally_zero_one.pioneer_enrollment_v1']