generator.views.glean_ping_view

Class to describe a Glean Ping View.

  1"""Class to describe a Glean Ping View."""
  2
  3import logging
  4import re
  5from collections import Counter
  6from textwrap import dedent
  7from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
  8
  9import click
 10from mozilla_schema_generator.glean_ping import GleanPing
 11from mozilla_schema_generator.probes import GleanProbe
 12
 13from . import lookml_utils
 14from .lookml_utils import slug_to_title
 15from .ping_view import PingView
 16
 17DISTRIBUTION_TYPES = {
 18    "timing_distribution",
 19    "memory_distribution",
 20    "custom_distribution",
 21}
 22
 23
 24ALLOWED_TYPES = DISTRIBUTION_TYPES | {
 25    "boolean",
 26    "labeled_boolean",
 27    "counter",
 28    "labeled_counter",
 29    "datetime",
 30    "jwe",
 31    "quantity",
 32    "string",
 33    "labeled_string",
 34    "rate",
 35    "timespan",
 36    "uuid",
 37    "url",
 38    "text",
 39    "labeled_quantity",
 40}
 41
 42# Bug 1737656 - some metric types are exposed under different names
 43# We need to map to the new name when building dimensions.
 44RENAMED_METRIC_TYPES = {
 45    "jwe": "jwe2",
 46    "text": "text2",
 47    "url": "url2",
 48}
 49
 50
 51DISALLOWED_PINGS = {"events"}
 52
 53# List of labeled counter names for which a suggest explore should be generated.
 54# Generating suggest explores for all labeled counters slows down Looker.
 55SUGGESTS_FOR_LABELED_COUNTERS = {"metrics__labeled_counter__glean_error_invalid_label"}
 56
 57
 58class GleanPingView(PingView):
 59    """A view on a ping table for an application using the Glean SDK."""
 60
 61    type: str = "glean_ping_view"
 62    allow_glean: bool = True
 63
 64    @classmethod
 65    def from_db_views(klass, *args, **kwargs):
 66        """Generate GleanPingViews from db views."""
 67        for view in super().from_db_views(*args, **kwargs):
 68            if view.name not in DISALLOWED_PINGS:
 69                yield view
 70
 71    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 72        """Generate LookML for this view.
 73
 74        The Glean views include a labeled metrics, which need to be joined
 75        against the view in the explore.
 76        """
 77        lookml = super().to_lookml(v1_name, dryrun=dryrun)
 78        # ignore nested join views
 79        lookml["views"] = [lookml["views"][0]]
 80
 81        # iterate over all of the glean metrics and generate views for unnested
 82        # fields as necessary. Append them to the list of existing view
 83        # definitions.
 84        table = next(
 85            (table for table in self.tables if table.get("channel") == "release"),
 86            self.tables[0],
 87        )["table"]
 88        dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun)
 89        dimension_names = {dimension["name"] for dimension in dimensions}
 90
 91        client_id_field = self.get_client_id(dimensions, table)
 92
 93        view_definitions = []
 94        metrics = self._get_glean_metrics(v1_name)
 95        for metric in metrics:
 96            looker_name = self._to_looker_name(metric)
 97            if looker_name not in dimension_names:
 98                continue  # skip metrics with no matching dimension
 99            if metric.type == "labeled_counter":
100                view_name = f"{self.name}__{looker_name}"
101                suggest_name = f"suggest__{view_name}"
102
103                category, name = [
104                    slug_to_title(v) for v in self._get_category_and_name(metric)
105                ]
106                view_label = f"{category} - {name}"
107                metric_hidden = "no" if metric.is_in_source() else "yes"
108
109                measures = [
110                    {
111                        "name": "count",
112                        "type": "sum",
113                        "sql": "${value}",
114                        "hidden": metric_hidden,
115                    }
116                ]
117
118                if client_id_field is not None:
119                    # client_id field is missing for pings with minimal Glean schema
120                    measures.append(
121                        {
122                            "name": "client_count",
123                            "type": "count_distinct",
124                            "sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end",
125                            "hidden": metric_hidden,
126                        }
127                    )
128
129                join_view: Dict[str, Any] = {
130                    "name": view_name,
131                    "label": view_label,
132                    "dimensions": [
133                        {
134                            "name": "document_id",
135                            "type": "string",
136                            "sql": f"${{{self.name}.document_id}}",
137                            "hidden": "yes",
138                        },
139                        # labeled counters need a primary key that incorporates
140                        # their labels, otherwise we get jumbled results:
141                        # https://github.com/mozilla/lookml-generator/issues/171
142                        {
143                            "name": "document_label_id",
144                            "type": "string",
145                            "sql": f"${{{self.name}.document_id}}-${{label}}",
146                            "primary_key": "yes",
147                            "hidden": "yes",
148                        },
149                        {
150                            "name": "value",
151                            "type": "number",
152                            "sql": "${TABLE}.value",
153                            "hidden": "yes",
154                        },
155                    ],
156                    "measures": measures,
157                }
158
159                if looker_name in SUGGESTS_FOR_LABELED_COUNTERS:
160                    join_view["dimensions"].append(
161                        {
162                            "name": "label",
163                            "type": "string",
164                            "sql": "${TABLE}.key",
165                            "suggest_explore": suggest_name,
166                            "suggest_dimension": f"{suggest_name}.key",
167                            "hidden": metric_hidden,
168                        },
169                    )
170
171                    suggest_view = {
172                        "name": suggest_name,
173                        "derived_table": {
174                            "sql": dedent(
175                                f"""
176                                select
177                                    m.key,
178                                    count(*) as n
179                                from {table} as t,
180                                unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m
181                                where date(submission_timestamp) > date_sub(current_date, interval 30 day)
182                                    and sample_id = 0
183                                group by key
184                                order by n desc
185                                """
186                            )
187                        },
188                        "dimensions": [
189                            {"name": "key", "type": "string", "sql": "${TABLE}.key"}
190                        ],
191                    }
192                    view_definitions += [join_view, suggest_view]
193                else:
194                    join_view["dimensions"].append(
195                        {
196                            "name": "label",
197                            "type": "string",
198                            "sql": "${TABLE}.key",
199                            "hidden": metric_hidden,
200                        },
201                    )
202                    view_definitions += [join_view]
203
204        # deduplicate view definitions, because somehow a few entries make it in
205        # twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure
206        view_definitions = sorted(
207            {v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"]  # type: ignore
208        )
209
210        [project, dataset, table] = table.split(".")
211        table_schema = dryrun.create(
212            project=project,
213            dataset=dataset,
214            table=table,
215        ).get_table_schema()
216        nested_views = lookml_utils._generate_nested_dimension_views(
217            table_schema, self.name
218        )
219
220        lookml["views"] += view_definitions + nested_views
221
222        return lookml
223
224    def _get_links(self, dimension: dict) -> List[Dict[str, str]]:
225        """Get a link annotation given a metric name."""
226        name = self._get_name(dimension)
227        title = slug_to_title(name)
228        return [
229            {
230                "label": (f"Glean Dictionary reference for {title}"),
231                "url": (
232                    f"https://dictionary.telemetry.mozilla.org"
233                    f"/apps/{self.namespace}/metrics/{name}"
234                ),
235                "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png",
236            }
237        ]
238
239    def _get_name(self, dimension: dict) -> str:
240        return dimension["name"].split("__")[-1]
241
242    def _get_metric_type(self, dimension: dict) -> str:
243        return dimension["name"].split("__")[1]
244
245    def _is_metric(self, dimension) -> bool:
246        return dimension["name"].startswith("metrics__")
247
248    def _get_glean_metrics(self, v1_name: Optional[str]) -> List[GleanProbe]:
249        if v1_name is None:
250            logging.error(
251                f"Error: Missing v1 name for ping {self.name} in namespace {self.namespace}"
252            )
253            return []
254
255        repo = next((r for r in GleanPing.get_repos() if r["name"] == v1_name))
256        glean_app = GleanPing(repo)
257
258        ping_probes = []
259        probe_ids = set()
260        for probe in glean_app.get_probes():
261            send_in_pings_snakecase = [
262                ping.replace("-", "_") for ping in probe.definition["send_in_pings"]
263            ]
264            if self.name not in send_in_pings_snakecase:
265                continue
266            if probe.id in probe_ids:
267                # Some ids are duplicated, ignore them
268                continue
269
270            ping_probes.append(probe)
271            probe_ids.add(probe.id)
272
273        return ping_probes
274
275    def _get_category_and_name(self, metric: GleanProbe) -> Tuple[str, str]:
276        *category, name = metric.id.split(".")
277        category = "_".join(category)
278
279        return category, name
280
281    def _to_looker_name(self, metric: GleanProbe, suffix: str = "") -> str:
282        """Convert a glean probe into a looker name."""
283        category, name = self._get_category_and_name(metric)
284
285        sep = "" if not category else "_"
286        label = name
287        looker_name = f"metrics__{metric.type}__{category}{sep}{label}"
288        if suffix:
289            looker_name = f"{looker_name}__{suffix}"
290        return looker_name
291
292    def _make_dimension(
293        self, metric: GleanProbe, suffix: str, sql_map: Dict[str, Dict[str, str]]
294    ) -> Optional[Dict[str, Union[str, List[Dict[str, str]]]]]:
295        *category, name = metric.id.split(".")
296        category = "_".join(category)
297
298        sep = "" if not category else "_"
299        label = name
300        type = RENAMED_METRIC_TYPES.get(metric.type, metric.type)
301        looker_name = f"metrics__{type}__{category}{sep}{name}"
302        if suffix:
303            label = f"{name}_{suffix}"
304            looker_name = f"{looker_name}__{suffix}"
305
306        if looker_name not in sql_map:
307            return None
308
309        group_label = slug_to_title(category)
310        group_item_label = slug_to_title(label)
311
312        if not group_label:
313            group_label = "Glean"
314
315        friendly_name = f"{group_label} {group_item_label}"
316
317        lookml = {
318            "name": looker_name,
319            "label": friendly_name,
320            # metrics that are no longer in the source are hidden by default
321            "hidden": "no" if metric.is_in_source() else "yes",
322            "sql": sql_map[looker_name]["sql"],
323            "type": sql_map[looker_name]["type"],
324            "group_label": group_label,
325            "group_item_label": group_item_label,
326            "links": [
327                {
328                    "label": (f"Glean Dictionary reference for {friendly_name}"),
329                    "url": (
330                        f"https://dictionary.telemetry.mozilla.org"
331                        f"/apps/{self.namespace}/metrics/{category}{sep}{name}"
332                    ),
333                    "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png",
334                },
335            ],
336        }
337
338        if lookml["type"] == "time":
339            # Remove any _{type} suffix from the dimension group name because each timeframe
340            # will add a _{type} suffix to its individual dimension name.
341            lookml["name"] = re.sub("_(date|time(stamp)?)$", "", looker_name)
342            lookml["timeframes"] = [
343                "raw",
344                "time",
345                "date",
346                "week",
347                "month",
348                "quarter",
349                "year",
350            ]
351            # Dimension groups should not be nested (see issue #82).
352            del lookml["group_label"]
353            del lookml["group_item_label"]
354            # Links are not supported for dimension groups.
355            del lookml["links"]
356
357        # remove some elements from the definition if we're handling a labeled
358        # counter, as an initial join dimension
359        if metric.type == "labeled_counter":
360            # this field is not used since labeled counters are maps
361            del lookml["type"]
362            lookml["hidden"] = "yes"
363
364        if metric.description:
365            lookml["description"] = metric.description
366
367        return lookml
368
369    def _get_metric_dimensions(
370        self, metric: GleanProbe, sql_map: Dict[str, Dict[str, str]]
371    ) -> Iterable[Optional[Dict[str, Union[str, List[Dict[str, str]]]]]]:
372        if metric.type == "rate":
373            for suffix in ("numerator", "denominator"):
374                yield self._make_dimension(metric, suffix, sql_map)
375        elif metric.type in DISTRIBUTION_TYPES:
376            yield self._make_dimension(metric, "sum", sql_map)
377        elif metric.type == "timespan":
378            yield self._make_dimension(metric, "value", sql_map)
379        elif metric.type in ALLOWED_TYPES:
380            yield self._make_dimension(metric, "", sql_map)
381
382    def _get_glean_metric_dimensions(
383        self, all_fields: List[dict], v1_name: Optional[str]
384    ):
385        sql_map = {
386            f["name"]: {"sql": f["sql"], "type": f.get("type", "string")}
387            for f in all_fields
388        }
389        metrics = self._get_glean_metrics(v1_name)
390        return [
391            dimension
392            for metric in metrics
393            for dimension in self._get_metric_dimensions(metric, sql_map)
394            if dimension is not None
395        ]
396
397    def _add_link(self, dimension):
398        annotations = {}
399        if self._is_metric(dimension) and not self._get_metric_type(
400            dimension
401        ).startswith("labeled"):
402            annotations["links"] = self._get_links(dimension)
403
404        return dict(dimension, **annotations)
405
406    def get_dimensions(
407        self, table, v1_name: Optional[str], dryrun
408    ) -> List[Dict[str, Any]]:
409        """Get the set of dimensions for this view."""
410        all_fields = super().get_dimensions(table, v1_name, dryrun=dryrun)
411        fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [
412            self._add_link(d)
413            for d in all_fields
414            if not d["name"].startswith("metrics__")
415        ]
416        # later entries will override earlier entries, if there are duplicates
417        field_dict = {f["name"]: f for f in fields}
418        return list(field_dict.values())
419
420    def get_measures(
421        self, dimensions: List[dict], table: str, v1_name: Optional[str]
422    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
423        """Generate measures from a list of dimensions.
424
425        When no dimension-specific measures are found, return a single "count" measure.
426
427        Raise ClickException if dimensions result in duplicate measures.
428        """
429        measures = super().get_measures(dimensions, table, v1_name)
430        client_id_field = self.get_client_id(dimensions, table)
431
432        for dimension in dimensions:
433            if (
434                self._is_metric(dimension)
435                and self._get_metric_type(dimension) == "counter"
436            ):
437                # handle the counters in the metric ping
438                name = self._get_name(dimension)
439                dimension_name = dimension["name"]
440                measures += [
441                    {
442                        "name": name,
443                        "type": "sum",
444                        "sql": f"${{{dimension_name}}}",
445                        "links": self._get_links(dimension),
446                    },
447                ]
448
449                if client_id_field is not None:
450                    measures += [
451                        {
452                            "name": f"{name}_client_count",
453                            "type": "count_distinct",
454                            "filters": [{dimension_name: ">0"}],
455                            "sql": f"${{{client_id_field}}}",
456                            "links": self._get_links(dimension),
457                        },
458                    ]
459
460        # check if there are any duplicate values
461        names = [measure["name"] for measure in measures]
462        duplicates = [k for k, v in Counter(names).items() if v > 1]
463        if duplicates:
464            raise click.ClickException(
465                f"duplicate measures {duplicates!r} for table {table!r}"
466            )
467
468        return measures
DISTRIBUTION_TYPES = {'custom_distribution', 'memory_distribution', 'timing_distribution'}
ALLOWED_TYPES = {'string', 'text', 'labeled_boolean', 'custom_distribution', 'url', 'labeled_quantity', 'timing_distribution', 'quantity', 'datetime', 'uuid', 'rate', 'boolean', 'counter', 'labeled_counter', 'jwe', 'labeled_string', 'memory_distribution', 'timespan'}
RENAMED_METRIC_TYPES = {'jwe': 'jwe2', 'text': 'text2', 'url': 'url2'}
DISALLOWED_PINGS = {'events'}
SUGGESTS_FOR_LABELED_COUNTERS = {'metrics__labeled_counter__glean_error_invalid_label'}
class GleanPingView(generator.views.ping_view.PingView):
 59class GleanPingView(PingView):
 60    """A view on a ping table for an application using the Glean SDK."""
 61
 62    type: str = "glean_ping_view"
 63    allow_glean: bool = True
 64
 65    @classmethod
 66    def from_db_views(klass, *args, **kwargs):
 67        """Generate GleanPingViews from db views."""
 68        for view in super().from_db_views(*args, **kwargs):
 69            if view.name not in DISALLOWED_PINGS:
 70                yield view
 71
 72    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 73        """Generate LookML for this view.
 74
 75        The Glean views include a labeled metrics, which need to be joined
 76        against the view in the explore.
 77        """
 78        lookml = super().to_lookml(v1_name, dryrun=dryrun)
 79        # ignore nested join views
 80        lookml["views"] = [lookml["views"][0]]
 81
 82        # iterate over all of the glean metrics and generate views for unnested
 83        # fields as necessary. Append them to the list of existing view
 84        # definitions.
 85        table = next(
 86            (table for table in self.tables if table.get("channel") == "release"),
 87            self.tables[0],
 88        )["table"]
 89        dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun)
 90        dimension_names = {dimension["name"] for dimension in dimensions}
 91
 92        client_id_field = self.get_client_id(dimensions, table)
 93
 94        view_definitions = []
 95        metrics = self._get_glean_metrics(v1_name)
 96        for metric in metrics:
 97            looker_name = self._to_looker_name(metric)
 98            if looker_name not in dimension_names:
 99                continue  # skip metrics with no matching dimension
100            if metric.type == "labeled_counter":
101                view_name = f"{self.name}__{looker_name}"
102                suggest_name = f"suggest__{view_name}"
103
104                category, name = [
105                    slug_to_title(v) for v in self._get_category_and_name(metric)
106                ]
107                view_label = f"{category} - {name}"
108                metric_hidden = "no" if metric.is_in_source() else "yes"
109
110                measures = [
111                    {
112                        "name": "count",
113                        "type": "sum",
114                        "sql": "${value}",
115                        "hidden": metric_hidden,
116                    }
117                ]
118
119                if client_id_field is not None:
120                    # client_id field is missing for pings with minimal Glean schema
121                    measures.append(
122                        {
123                            "name": "client_count",
124                            "type": "count_distinct",
125                            "sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end",
126                            "hidden": metric_hidden,
127                        }
128                    )
129
130                join_view: Dict[str, Any] = {
131                    "name": view_name,
132                    "label": view_label,
133                    "dimensions": [
134                        {
135                            "name": "document_id",
136                            "type": "string",
137                            "sql": f"${{{self.name}.document_id}}",
138                            "hidden": "yes",
139                        },
140                        # labeled counters need a primary key that incorporates
141                        # their labels, otherwise we get jumbled results:
142                        # https://github.com/mozilla/lookml-generator/issues/171
143                        {
144                            "name": "document_label_id",
145                            "type": "string",
146                            "sql": f"${{{self.name}.document_id}}-${{label}}",
147                            "primary_key": "yes",
148                            "hidden": "yes",
149                        },
150                        {
151                            "name": "value",
152                            "type": "number",
153                            "sql": "${TABLE}.value",
154                            "hidden": "yes",
155                        },
156                    ],
157                    "measures": measures,
158                }
159
160                if looker_name in SUGGESTS_FOR_LABELED_COUNTERS:
161                    join_view["dimensions"].append(
162                        {
163                            "name": "label",
164                            "type": "string",
165                            "sql": "${TABLE}.key",
166                            "suggest_explore": suggest_name,
167                            "suggest_dimension": f"{suggest_name}.key",
168                            "hidden": metric_hidden,
169                        },
170                    )
171
172                    suggest_view = {
173                        "name": suggest_name,
174                        "derived_table": {
175                            "sql": dedent(
176                                f"""
177                                select
178                                    m.key,
179                                    count(*) as n
180                                from {table} as t,
181                                unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m
182                                where date(submission_timestamp) > date_sub(current_date, interval 30 day)
183                                    and sample_id = 0
184                                group by key
185                                order by n desc
186                                """
187                            )
188                        },
189                        "dimensions": [
190                            {"name": "key", "type": "string", "sql": "${TABLE}.key"}
191                        ],
192                    }
193                    view_definitions += [join_view, suggest_view]
194                else:
195                    join_view["dimensions"].append(
196                        {
197                            "name": "label",
198                            "type": "string",
199                            "sql": "${TABLE}.key",
200                            "hidden": metric_hidden,
201                        },
202                    )
203                    view_definitions += [join_view]
204
205        # deduplicate view definitions, because somehow a few entries make it in
206        # twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure
207        view_definitions = sorted(
208            {v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"]  # type: ignore
209        )
210
211        [project, dataset, table] = table.split(".")
212        table_schema = dryrun.create(
213            project=project,
214            dataset=dataset,
215            table=table,
216        ).get_table_schema()
217        nested_views = lookml_utils._generate_nested_dimension_views(
218            table_schema, self.name
219        )
220
221        lookml["views"] += view_definitions + nested_views
222
223        return lookml
224
225    def _get_links(self, dimension: dict) -> List[Dict[str, str]]:
226        """Get a link annotation given a metric name."""
227        name = self._get_name(dimension)
228        title = slug_to_title(name)
229        return [
230            {
231                "label": (f"Glean Dictionary reference for {title}"),
232                "url": (
233                    f"https://dictionary.telemetry.mozilla.org"
234                    f"/apps/{self.namespace}/metrics/{name}"
235                ),
236                "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png",
237            }
238        ]
239
240    def _get_name(self, dimension: dict) -> str:
241        return dimension["name"].split("__")[-1]
242
243    def _get_metric_type(self, dimension: dict) -> str:
244        return dimension["name"].split("__")[1]
245
246    def _is_metric(self, dimension) -> bool:
247        return dimension["name"].startswith("metrics__")
248
249    def _get_glean_metrics(self, v1_name: Optional[str]) -> List[GleanProbe]:
250        if v1_name is None:
251            logging.error(
252                f"Error: Missing v1 name for ping {self.name} in namespace {self.namespace}"
253            )
254            return []
255
256        repo = next((r for r in GleanPing.get_repos() if r["name"] == v1_name))
257        glean_app = GleanPing(repo)
258
259        ping_probes = []
260        probe_ids = set()
261        for probe in glean_app.get_probes():
262            send_in_pings_snakecase = [
263                ping.replace("-", "_") for ping in probe.definition["send_in_pings"]
264            ]
265            if self.name not in send_in_pings_snakecase:
266                continue
267            if probe.id in probe_ids:
268                # Some ids are duplicated, ignore them
269                continue
270
271            ping_probes.append(probe)
272            probe_ids.add(probe.id)
273
274        return ping_probes
275
276    def _get_category_and_name(self, metric: GleanProbe) -> Tuple[str, str]:
277        *category, name = metric.id.split(".")
278        category = "_".join(category)
279
280        return category, name
281
282    def _to_looker_name(self, metric: GleanProbe, suffix: str = "") -> str:
283        """Convert a glean probe into a looker name."""
284        category, name = self._get_category_and_name(metric)
285
286        sep = "" if not category else "_"
287        label = name
288        looker_name = f"metrics__{metric.type}__{category}{sep}{label}"
289        if suffix:
290            looker_name = f"{looker_name}__{suffix}"
291        return looker_name
292
293    def _make_dimension(
294        self, metric: GleanProbe, suffix: str, sql_map: Dict[str, Dict[str, str]]
295    ) -> Optional[Dict[str, Union[str, List[Dict[str, str]]]]]:
296        *category, name = metric.id.split(".")
297        category = "_".join(category)
298
299        sep = "" if not category else "_"
300        label = name
301        type = RENAMED_METRIC_TYPES.get(metric.type, metric.type)
302        looker_name = f"metrics__{type}__{category}{sep}{name}"
303        if suffix:
304            label = f"{name}_{suffix}"
305            looker_name = f"{looker_name}__{suffix}"
306
307        if looker_name not in sql_map:
308            return None
309
310        group_label = slug_to_title(category)
311        group_item_label = slug_to_title(label)
312
313        if not group_label:
314            group_label = "Glean"
315
316        friendly_name = f"{group_label} {group_item_label}"
317
318        lookml = {
319            "name": looker_name,
320            "label": friendly_name,
321            # metrics that are no longer in the source are hidden by default
322            "hidden": "no" if metric.is_in_source() else "yes",
323            "sql": sql_map[looker_name]["sql"],
324            "type": sql_map[looker_name]["type"],
325            "group_label": group_label,
326            "group_item_label": group_item_label,
327            "links": [
328                {
329                    "label": (f"Glean Dictionary reference for {friendly_name}"),
330                    "url": (
331                        f"https://dictionary.telemetry.mozilla.org"
332                        f"/apps/{self.namespace}/metrics/{category}{sep}{name}"
333                    ),
334                    "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png",
335                },
336            ],
337        }
338
339        if lookml["type"] == "time":
340            # Remove any _{type} suffix from the dimension group name because each timeframe
341            # will add a _{type} suffix to its individual dimension name.
342            lookml["name"] = re.sub("_(date|time(stamp)?)$", "", looker_name)
343            lookml["timeframes"] = [
344                "raw",
345                "time",
346                "date",
347                "week",
348                "month",
349                "quarter",
350                "year",
351            ]
352            # Dimension groups should not be nested (see issue #82).
353            del lookml["group_label"]
354            del lookml["group_item_label"]
355            # Links are not supported for dimension groups.
356            del lookml["links"]
357
358        # remove some elements from the definition if we're handling a labeled
359        # counter, as an initial join dimension
360        if metric.type == "labeled_counter":
361            # this field is not used since labeled counters are maps
362            del lookml["type"]
363            lookml["hidden"] = "yes"
364
365        if metric.description:
366            lookml["description"] = metric.description
367
368        return lookml
369
370    def _get_metric_dimensions(
371        self, metric: GleanProbe, sql_map: Dict[str, Dict[str, str]]
372    ) -> Iterable[Optional[Dict[str, Union[str, List[Dict[str, str]]]]]]:
373        if metric.type == "rate":
374            for suffix in ("numerator", "denominator"):
375                yield self._make_dimension(metric, suffix, sql_map)
376        elif metric.type in DISTRIBUTION_TYPES:
377            yield self._make_dimension(metric, "sum", sql_map)
378        elif metric.type == "timespan":
379            yield self._make_dimension(metric, "value", sql_map)
380        elif metric.type in ALLOWED_TYPES:
381            yield self._make_dimension(metric, "", sql_map)
382
383    def _get_glean_metric_dimensions(
384        self, all_fields: List[dict], v1_name: Optional[str]
385    ):
386        sql_map = {
387            f["name"]: {"sql": f["sql"], "type": f.get("type", "string")}
388            for f in all_fields
389        }
390        metrics = self._get_glean_metrics(v1_name)
391        return [
392            dimension
393            for metric in metrics
394            for dimension in self._get_metric_dimensions(metric, sql_map)
395            if dimension is not None
396        ]
397
398    def _add_link(self, dimension):
399        annotations = {}
400        if self._is_metric(dimension) and not self._get_metric_type(
401            dimension
402        ).startswith("labeled"):
403            annotations["links"] = self._get_links(dimension)
404
405        return dict(dimension, **annotations)
406
407    def get_dimensions(
408        self, table, v1_name: Optional[str], dryrun
409    ) -> List[Dict[str, Any]]:
410        """Get the set of dimensions for this view."""
411        all_fields = super().get_dimensions(table, v1_name, dryrun=dryrun)
412        fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [
413            self._add_link(d)
414            for d in all_fields
415            if not d["name"].startswith("metrics__")
416        ]
417        # later entries will override earlier entries, if there are duplicates
418        field_dict = {f["name"]: f for f in fields}
419        return list(field_dict.values())
420
421    def get_measures(
422        self, dimensions: List[dict], table: str, v1_name: Optional[str]
423    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
424        """Generate measures from a list of dimensions.
425
426        When no dimension-specific measures are found, return a single "count" measure.
427
428        Raise ClickException if dimensions result in duplicate measures.
429        """
430        measures = super().get_measures(dimensions, table, v1_name)
431        client_id_field = self.get_client_id(dimensions, table)
432
433        for dimension in dimensions:
434            if (
435                self._is_metric(dimension)
436                and self._get_metric_type(dimension) == "counter"
437            ):
438                # handle the counters in the metric ping
439                name = self._get_name(dimension)
440                dimension_name = dimension["name"]
441                measures += [
442                    {
443                        "name": name,
444                        "type": "sum",
445                        "sql": f"${{{dimension_name}}}",
446                        "links": self._get_links(dimension),
447                    },
448                ]
449
450                if client_id_field is not None:
451                    measures += [
452                        {
453                            "name": f"{name}_client_count",
454                            "type": "count_distinct",
455                            "filters": [{dimension_name: ">0"}],
456                            "sql": f"${{{client_id_field}}}",
457                            "links": self._get_links(dimension),
458                        },
459                    ]
460
461        # check if there are any duplicate values
462        names = [measure["name"] for measure in measures]
463        duplicates = [k for k, v in Counter(names).items() if v > 1]
464        if duplicates:
465            raise click.ClickException(
466                f"duplicate measures {duplicates!r} for table {table!r}"
467            )
468
469        return measures

A view on a ping table for an application using the Glean SDK.

type: str = 'glean_ping_view'
allow_glean: bool = True
@classmethod
def from_db_views(klass, *args, **kwargs):
65    @classmethod
66    def from_db_views(klass, *args, **kwargs):
67        """Generate GleanPingViews from db views."""
68        for view in super().from_db_views(*args, **kwargs):
69            if view.name not in DISALLOWED_PINGS:
70                yield view

Generate GleanPingViews from db views.

def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 72    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 73        """Generate LookML for this view.
 74
 75        The Glean views include a labeled metrics, which need to be joined
 76        against the view in the explore.
 77        """
 78        lookml = super().to_lookml(v1_name, dryrun=dryrun)
 79        # ignore nested join views
 80        lookml["views"] = [lookml["views"][0]]
 81
 82        # iterate over all of the glean metrics and generate views for unnested
 83        # fields as necessary. Append them to the list of existing view
 84        # definitions.
 85        table = next(
 86            (table for table in self.tables if table.get("channel") == "release"),
 87            self.tables[0],
 88        )["table"]
 89        dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun)
 90        dimension_names = {dimension["name"] for dimension in dimensions}
 91
 92        client_id_field = self.get_client_id(dimensions, table)
 93
 94        view_definitions = []
 95        metrics = self._get_glean_metrics(v1_name)
 96        for metric in metrics:
 97            looker_name = self._to_looker_name(metric)
 98            if looker_name not in dimension_names:
 99                continue  # skip metrics with no matching dimension
100            if metric.type == "labeled_counter":
101                view_name = f"{self.name}__{looker_name}"
102                suggest_name = f"suggest__{view_name}"
103
104                category, name = [
105                    slug_to_title(v) for v in self._get_category_and_name(metric)
106                ]
107                view_label = f"{category} - {name}"
108                metric_hidden = "no" if metric.is_in_source() else "yes"
109
110                measures = [
111                    {
112                        "name": "count",
113                        "type": "sum",
114                        "sql": "${value}",
115                        "hidden": metric_hidden,
116                    }
117                ]
118
119                if client_id_field is not None:
120                    # client_id field is missing for pings with minimal Glean schema
121                    measures.append(
122                        {
123                            "name": "client_count",
124                            "type": "count_distinct",
125                            "sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end",
126                            "hidden": metric_hidden,
127                        }
128                    )
129
130                join_view: Dict[str, Any] = {
131                    "name": view_name,
132                    "label": view_label,
133                    "dimensions": [
134                        {
135                            "name": "document_id",
136                            "type": "string",
137                            "sql": f"${{{self.name}.document_id}}",
138                            "hidden": "yes",
139                        },
140                        # labeled counters need a primary key that incorporates
141                        # their labels, otherwise we get jumbled results:
142                        # https://github.com/mozilla/lookml-generator/issues/171
143                        {
144                            "name": "document_label_id",
145                            "type": "string",
146                            "sql": f"${{{self.name}.document_id}}-${{label}}",
147                            "primary_key": "yes",
148                            "hidden": "yes",
149                        },
150                        {
151                            "name": "value",
152                            "type": "number",
153                            "sql": "${TABLE}.value",
154                            "hidden": "yes",
155                        },
156                    ],
157                    "measures": measures,
158                }
159
160                if looker_name in SUGGESTS_FOR_LABELED_COUNTERS:
161                    join_view["dimensions"].append(
162                        {
163                            "name": "label",
164                            "type": "string",
165                            "sql": "${TABLE}.key",
166                            "suggest_explore": suggest_name,
167                            "suggest_dimension": f"{suggest_name}.key",
168                            "hidden": metric_hidden,
169                        },
170                    )
171
172                    suggest_view = {
173                        "name": suggest_name,
174                        "derived_table": {
175                            "sql": dedent(
176                                f"""
177                                select
178                                    m.key,
179                                    count(*) as n
180                                from {table} as t,
181                                unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m
182                                where date(submission_timestamp) > date_sub(current_date, interval 30 day)
183                                    and sample_id = 0
184                                group by key
185                                order by n desc
186                                """
187                            )
188                        },
189                        "dimensions": [
190                            {"name": "key", "type": "string", "sql": "${TABLE}.key"}
191                        ],
192                    }
193                    view_definitions += [join_view, suggest_view]
194                else:
195                    join_view["dimensions"].append(
196                        {
197                            "name": "label",
198                            "type": "string",
199                            "sql": "${TABLE}.key",
200                            "hidden": metric_hidden,
201                        },
202                    )
203                    view_definitions += [join_view]
204
205        # deduplicate view definitions, because somehow a few entries make it in
206        # twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure
207        view_definitions = sorted(
208            {v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"]  # type: ignore
209        )
210
211        [project, dataset, table] = table.split(".")
212        table_schema = dryrun.create(
213            project=project,
214            dataset=dataset,
215            table=table,
216        ).get_table_schema()
217        nested_views = lookml_utils._generate_nested_dimension_views(
218            table_schema, self.name
219        )
220
221        lookml["views"] += view_definitions + nested_views
222
223        return lookml

Generate LookML for this view.

The Glean views include a labeled metrics, which need to be joined against the view in the explore.

def get_dimensions(self, table, v1_name: Optional[str], dryrun) -> List[Dict[str, Any]]:
407    def get_dimensions(
408        self, table, v1_name: Optional[str], dryrun
409    ) -> List[Dict[str, Any]]:
410        """Get the set of dimensions for this view."""
411        all_fields = super().get_dimensions(table, v1_name, dryrun=dryrun)
412        fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [
413            self._add_link(d)
414            for d in all_fields
415            if not d["name"].startswith("metrics__")
416        ]
417        # later entries will override earlier entries, if there are duplicates
418        field_dict = {f["name"]: f for f in fields}
419        return list(field_dict.values())

Get the set of dimensions for this view.

def get_measures( self, dimensions: List[dict], table: str, v1_name: Optional[str]) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
421    def get_measures(
422        self, dimensions: List[dict], table: str, v1_name: Optional[str]
423    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
424        """Generate measures from a list of dimensions.
425
426        When no dimension-specific measures are found, return a single "count" measure.
427
428        Raise ClickException if dimensions result in duplicate measures.
429        """
430        measures = super().get_measures(dimensions, table, v1_name)
431        client_id_field = self.get_client_id(dimensions, table)
432
433        for dimension in dimensions:
434            if (
435                self._is_metric(dimension)
436                and self._get_metric_type(dimension) == "counter"
437            ):
438                # handle the counters in the metric ping
439                name = self._get_name(dimension)
440                dimension_name = dimension["name"]
441                measures += [
442                    {
443                        "name": name,
444                        "type": "sum",
445                        "sql": f"${{{dimension_name}}}",
446                        "links": self._get_links(dimension),
447                    },
448                ]
449
450                if client_id_field is not None:
451                    measures += [
452                        {
453                            "name": f"{name}_client_count",
454                            "type": "count_distinct",
455                            "filters": [{dimension_name: ">0"}],
456                            "sql": f"${{{client_id_field}}}",
457                            "links": self._get_links(dimension),
458                        },
459                    ]
460
461        # check if there are any duplicate values
462        names = [measure["name"] for measure in measures]
463        duplicates = [k for k, v in Counter(names).items() if v > 1]
464        if duplicates:
465            raise click.ClickException(
466                f"duplicate measures {duplicates!r} for table {table!r}"
467            )
468
469        return measures

Generate measures from a list of dimensions.

When no dimension-specific measures are found, return a single "count" measure.

Raise ClickException if dimensions result in duplicate measures.