generator.views.glean_ping_view

Class to describe a Glean Ping View.

  1"""Class to describe a Glean Ping View."""
  2
  3import logging
  4import re
  5from collections import Counter
  6from textwrap import dedent
  7from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
  8
  9import click
 10from mozilla_schema_generator.glean_ping import GleanPing
 11from mozilla_schema_generator.probes import GleanProbe
 12
 13from . import lookml_utils
 14from .lookml_utils import slug_to_title
 15from .ping_view import PingView
 16
 17DISTRIBUTION_TYPES = {
 18    "timing_distribution",
 19    "memory_distribution",
 20    "custom_distribution",
 21}
 22
 23
 24ALLOWED_TYPES = DISTRIBUTION_TYPES | {
 25    "boolean",
 26    "labeled_boolean",
 27    "counter",
 28    "labeled_counter",
 29    "datetime",
 30    "jwe",
 31    "quantity",
 32    "string",
 33    "labeled_string",
 34    "rate",
 35    "timespan",
 36    "uuid",
 37    "url",
 38    "text",
 39}
 40
 41# Bug 1737656 - some metric types are exposed under different names
 42# We need to map to the new name when building dimensions.
 43RENAMED_METRIC_TYPES = {
 44    "jwe": "jwe2",
 45    "text": "text2",
 46    "url": "url2",
 47}
 48
 49
 50DISALLOWED_PINGS = {"events"}
 51
 52# List of labeled counter names for which a suggest explore should be generated.
 53# Generating suggest explores for all labeled counters slows down Looker.
 54SUGGESTS_FOR_LABELED_COUNTERS = {"metrics__labeled_counter__glean_error_invalid_label"}
 55
 56
 57class GleanPingView(PingView):
 58    """A view on a ping table for an application using the Glean SDK."""
 59
 60    type: str = "glean_ping_view"
 61    allow_glean: bool = True
 62
 63    @classmethod
 64    def from_db_views(klass, *args, **kwargs):
 65        """Generate GleanPingViews from db views."""
 66        for view in super().from_db_views(*args, **kwargs):
 67            if view.name not in DISALLOWED_PINGS:
 68                yield view
 69
 70    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 71        """Generate LookML for this view.
 72
 73        The Glean views include a labeled metrics, which need to be joined
 74        against the view in the explore.
 75        """
 76        lookml = super().to_lookml(v1_name, dryrun=dryrun)
 77        # ignore nested join views
 78        lookml["views"] = [lookml["views"][0]]
 79
 80        # iterate over all of the glean metrics and generate views for unnested
 81        # fields as necessary. Append them to the list of existing view
 82        # definitions.
 83        table = next(
 84            (table for table in self.tables if table.get("channel") == "release"),
 85            self.tables[0],
 86        )["table"]
 87        dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun)
 88        dimension_names = {dimension["name"] for dimension in dimensions}
 89
 90        client_id_field = self.get_client_id(dimensions, table)
 91
 92        view_definitions = []
 93        metrics = self._get_glean_metrics(v1_name)
 94        for metric in metrics:
 95            looker_name = self._to_looker_name(metric)
 96            if looker_name not in dimension_names:
 97                continue  # skip metrics with no matching dimension
 98            if metric.type == "labeled_counter":
 99                view_name = f"{self.name}__{looker_name}"
100                suggest_name = f"suggest__{view_name}"
101
102                category, name = [
103                    slug_to_title(v) for v in self._get_category_and_name(metric)
104                ]
105                view_label = f"{category} - {name}"
106                metric_hidden = "no" if metric.is_in_source() else "yes"
107
108                measures = [
109                    {
110                        "name": "count",
111                        "type": "sum",
112                        "sql": "${value}",
113                        "hidden": metric_hidden,
114                    }
115                ]
116
117                if client_id_field is not None:
118                    # client_id field is missing for pings with minimal Glean schema
119                    measures.append(
120                        {
121                            "name": "client_count",
122                            "type": "count_distinct",
123                            "sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end",
124                            "hidden": metric_hidden,
125                        }
126                    )
127
128                join_view: Dict[str, Any] = {
129                    "name": view_name,
130                    "label": view_label,
131                    "dimensions": [
132                        {
133                            "name": "document_id",
134                            "type": "string",
135                            "sql": f"${{{self.name}.document_id}}",
136                            "hidden": "yes",
137                        },
138                        # labeled counters need a primary key that incorporates
139                        # their labels, otherwise we get jumbled results:
140                        # https://github.com/mozilla/lookml-generator/issues/171
141                        {
142                            "name": "document_label_id",
143                            "type": "string",
144                            "sql": f"${{{self.name}.document_id}}-${{label}}",
145                            "primary_key": "yes",
146                            "hidden": "yes",
147                        },
148                        {
149                            "name": "value",
150                            "type": "number",
151                            "sql": "${TABLE}.value",
152                            "hidden": "yes",
153                        },
154                    ],
155                    "measures": measures,
156                }
157
158                if looker_name in SUGGESTS_FOR_LABELED_COUNTERS:
159                    join_view["dimensions"].append(
160                        {
161                            "name": "label",
162                            "type": "string",
163                            "sql": "${TABLE}.key",
164                            "suggest_explore": suggest_name,
165                            "suggest_dimension": f"{suggest_name}.key",
166                            "hidden": metric_hidden,
167                        },
168                    )
169
170                    suggest_view = {
171                        "name": suggest_name,
172                        "derived_table": {
173                            "sql": dedent(
174                                f"""
175                                select
176                                    m.key,
177                                    count(*) as n
178                                from {table} as t,
179                                unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m
180                                where date(submission_timestamp) > date_sub(current_date, interval 30 day)
181                                    and sample_id = 0
182                                group by key
183                                order by n desc
184                                """
185                            )
186                        },
187                        "dimensions": [
188                            {"name": "key", "type": "string", "sql": "${TABLE}.key"}
189                        ],
190                    }
191                    view_definitions += [join_view, suggest_view]
192                else:
193                    join_view["dimensions"].append(
194                        {
195                            "name": "label",
196                            "type": "string",
197                            "sql": "${TABLE}.key",
198                            "hidden": metric_hidden,
199                        },
200                    )
201                    view_definitions += [join_view]
202
203        # deduplicate view definitions, because somehow a few entries make it in
204        # twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure
205        view_definitions = sorted(
206            {v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"]  # type: ignore
207        )
208
209        [project, dataset, table] = table.split(".")
210        table_schema = dryrun.create(
211            project=project,
212            dataset=dataset,
213            table=table,
214        ).get_table_schema()
215        nested_views = lookml_utils._generate_nested_dimension_views(
216            table_schema, self.name
217        )
218
219        lookml["views"] += view_definitions + nested_views
220
221        return lookml
222
223    def _get_links(self, dimension: dict) -> List[Dict[str, str]]:
224        """Get a link annotation given a metric name."""
225        name = self._get_name(dimension)
226        title = slug_to_title(name)
227        return [
228            {
229                "label": (f"Glean Dictionary reference for {title}"),
230                "url": (
231                    f"https://dictionary.telemetry.mozilla.org"
232                    f"/apps/{self.namespace}/metrics/{name}"
233                ),
234                "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png",
235            }
236        ]
237
238    def _get_name(self, dimension: dict) -> str:
239        return dimension["name"].split("__")[-1]
240
241    def _get_metric_type(self, dimension: dict) -> str:
242        return dimension["name"].split("__")[1]
243
244    def _is_metric(self, dimension) -> bool:
245        return dimension["name"].startswith("metrics__")
246
247    def _get_glean_metrics(self, v1_name: Optional[str]) -> List[GleanProbe]:
248        if v1_name is None:
249            logging.error(
250                f"Error: Missing v1 name for ping {self.name} in namespace {self.namespace}"
251            )
252            return []
253
254        repo = next((r for r in GleanPing.get_repos() if r["name"] == v1_name))
255        glean_app = GleanPing(repo)
256
257        ping_probes = []
258        probe_ids = set()
259        for probe in glean_app.get_probes():
260            send_in_pings_snakecase = [
261                ping.replace("-", "_") for ping in probe.definition["send_in_pings"]
262            ]
263            if self.name not in send_in_pings_snakecase:
264                continue
265            if probe.id in probe_ids:
266                # Some ids are duplicated, ignore them
267                continue
268
269            ping_probes.append(probe)
270            probe_ids.add(probe.id)
271
272        return ping_probes
273
274    def _get_category_and_name(self, metric: GleanProbe) -> Tuple[str, str]:
275        *category, name = metric.id.split(".")
276        category = "_".join(category)
277
278        return category, name
279
280    def _to_looker_name(self, metric: GleanProbe, suffix: str = "") -> str:
281        """Convert a glean probe into a looker name."""
282        category, name = self._get_category_and_name(metric)
283
284        sep = "" if not category else "_"
285        label = name
286        looker_name = f"metrics__{metric.type}__{category}{sep}{label}"
287        if suffix:
288            looker_name = f"{looker_name}__{suffix}"
289        return looker_name
290
291    def _make_dimension(
292        self, metric: GleanProbe, suffix: str, sql_map: Dict[str, Dict[str, str]]
293    ) -> Optional[Dict[str, Union[str, List[Dict[str, str]]]]]:
294        *category, name = metric.id.split(".")
295        category = "_".join(category)
296
297        sep = "" if not category else "_"
298        label = name
299        type = RENAMED_METRIC_TYPES.get(metric.type, metric.type)
300        looker_name = f"metrics__{type}__{category}{sep}{name}"
301        if suffix:
302            label = f"{name}_{suffix}"
303            looker_name = f"{looker_name}__{suffix}"
304
305        if looker_name not in sql_map:
306            return None
307
308        group_label = slug_to_title(category)
309        group_item_label = slug_to_title(label)
310
311        if not group_label:
312            group_label = "Glean"
313
314        friendly_name = f"{group_label} {group_item_label}"
315
316        lookml = {
317            "name": looker_name,
318            "label": friendly_name,
319            # metrics that are no longer in the source are hidden by default
320            "hidden": "no" if metric.is_in_source() else "yes",
321            "sql": sql_map[looker_name]["sql"],
322            "type": sql_map[looker_name]["type"],
323            "group_label": group_label,
324            "group_item_label": group_item_label,
325            "links": [
326                {
327                    "label": (f"Glean Dictionary reference for {friendly_name}"),
328                    "url": (
329                        f"https://dictionary.telemetry.mozilla.org"
330                        f"/apps/{self.namespace}/metrics/{category}{sep}{name}"
331                    ),
332                    "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png",
333                },
334            ],
335        }
336
337        if lookml["type"] == "time":
338            # Remove any _{type} suffix from the dimension group name because each timeframe
339            # will add a _{type} suffix to its individual dimension name.
340            lookml["name"] = re.sub("_(date|time(stamp)?)$", "", looker_name)
341            lookml["timeframes"] = [
342                "raw",
343                "time",
344                "date",
345                "week",
346                "month",
347                "quarter",
348                "year",
349            ]
350            # Dimension groups should not be nested (see issue #82).
351            del lookml["group_label"]
352            del lookml["group_item_label"]
353            # Links are not supported for dimension groups.
354            del lookml["links"]
355
356        # remove some elements from the definition if we're handling a labeled
357        # counter, as an initial join dimension
358        if metric.type == "labeled_counter":
359            # this field is not used since labeled counters are maps
360            del lookml["type"]
361            lookml["hidden"] = "yes"
362
363        if metric.description:
364            lookml["description"] = metric.description
365
366        return lookml
367
368    def _get_metric_dimensions(
369        self, metric: GleanProbe, sql_map: Dict[str, Dict[str, str]]
370    ) -> Iterable[Optional[Dict[str, Union[str, List[Dict[str, str]]]]]]:
371        if metric.type == "rate":
372            for suffix in ("numerator", "denominator"):
373                yield self._make_dimension(metric, suffix, sql_map)
374        elif metric.type in DISTRIBUTION_TYPES:
375            yield self._make_dimension(metric, "sum", sql_map)
376        elif metric.type == "timespan":
377            yield self._make_dimension(metric, "value", sql_map)
378        elif metric.type in ALLOWED_TYPES:
379            yield self._make_dimension(metric, "", sql_map)
380
381    def _get_glean_metric_dimensions(
382        self, all_fields: List[dict], v1_name: Optional[str]
383    ):
384        sql_map = {
385            f["name"]: {"sql": f["sql"], "type": f.get("type", "string")}
386            for f in all_fields
387        }
388        metrics = self._get_glean_metrics(v1_name)
389        return [
390            dimension
391            for metric in metrics
392            for dimension in self._get_metric_dimensions(metric, sql_map)
393            if dimension is not None
394        ]
395
396    def _add_link(self, dimension):
397        annotations = {}
398        if self._is_metric(dimension) and not self._get_metric_type(
399            dimension
400        ).startswith("labeled"):
401            annotations["links"] = self._get_links(dimension)
402
403        return dict(dimension, **annotations)
404
405    def get_dimensions(
406        self, table, v1_name: Optional[str], dryrun
407    ) -> List[Dict[str, Any]]:
408        """Get the set of dimensions for this view."""
409        all_fields = super().get_dimensions(table, v1_name, dryrun=dryrun)
410        fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [
411            self._add_link(d)
412            for d in all_fields
413            if not d["name"].startswith("metrics__")
414        ]
415        # later entries will override earlier entries, if there are duplicates
416        field_dict = {f["name"]: f for f in fields}
417        return list(field_dict.values())
418
419    def get_measures(
420        self, dimensions: List[dict], table: str, v1_name: Optional[str]
421    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
422        """Generate measures from a list of dimensions.
423
424        When no dimension-specific measures are found, return a single "count" measure.
425
426        Raise ClickException if dimensions result in duplicate measures.
427        """
428        measures = super().get_measures(dimensions, table, v1_name)
429        client_id_field = self.get_client_id(dimensions, table)
430
431        for dimension in dimensions:
432            if (
433                self._is_metric(dimension)
434                and self._get_metric_type(dimension) == "counter"
435            ):
436                # handle the counters in the metric ping
437                name = self._get_name(dimension)
438                dimension_name = dimension["name"]
439                measures += [
440                    {
441                        "name": name,
442                        "type": "sum",
443                        "sql": f"${{{dimension_name}}}",
444                        "links": self._get_links(dimension),
445                    },
446                ]
447
448                if client_id_field is not None:
449                    measures += [
450                        {
451                            "name": f"{name}_client_count",
452                            "type": "count_distinct",
453                            "filters": [{dimension_name: ">0"}],
454                            "sql": f"${{{client_id_field}}}",
455                            "links": self._get_links(dimension),
456                        },
457                    ]
458
459        # check if there are any duplicate values
460        names = [measure["name"] for measure in measures]
461        duplicates = [k for k, v in Counter(names).items() if v > 1]
462        if duplicates:
463            raise click.ClickException(
464                f"duplicate measures {duplicates!r} for table {table!r}"
465            )
466
467        return measures
DISTRIBUTION_TYPES = {'timing_distribution', 'memory_distribution', 'custom_distribution'}
ALLOWED_TYPES = {'rate', 'datetime', 'labeled_counter', 'jwe', 'labeled_boolean', 'labeled_string', 'timing_distribution', 'quantity', 'custom_distribution', 'boolean', 'counter', 'timespan', 'text', 'string', 'url', 'memory_distribution', 'uuid'}
RENAMED_METRIC_TYPES = {'jwe': 'jwe2', 'text': 'text2', 'url': 'url2'}
DISALLOWED_PINGS = {'events'}
SUGGESTS_FOR_LABELED_COUNTERS = {'metrics__labeled_counter__glean_error_invalid_label'}
class GleanPingView(generator.views.ping_view.PingView):
 58class GleanPingView(PingView):
 59    """A view on a ping table for an application using the Glean SDK."""
 60
 61    type: str = "glean_ping_view"
 62    allow_glean: bool = True
 63
 64    @classmethod
 65    def from_db_views(klass, *args, **kwargs):
 66        """Generate GleanPingViews from db views."""
 67        for view in super().from_db_views(*args, **kwargs):
 68            if view.name not in DISALLOWED_PINGS:
 69                yield view
 70
 71    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 72        """Generate LookML for this view.
 73
 74        The Glean views include a labeled metrics, which need to be joined
 75        against the view in the explore.
 76        """
 77        lookml = super().to_lookml(v1_name, dryrun=dryrun)
 78        # ignore nested join views
 79        lookml["views"] = [lookml["views"][0]]
 80
 81        # iterate over all of the glean metrics and generate views for unnested
 82        # fields as necessary. Append them to the list of existing view
 83        # definitions.
 84        table = next(
 85            (table for table in self.tables if table.get("channel") == "release"),
 86            self.tables[0],
 87        )["table"]
 88        dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun)
 89        dimension_names = {dimension["name"] for dimension in dimensions}
 90
 91        client_id_field = self.get_client_id(dimensions, table)
 92
 93        view_definitions = []
 94        metrics = self._get_glean_metrics(v1_name)
 95        for metric in metrics:
 96            looker_name = self._to_looker_name(metric)
 97            if looker_name not in dimension_names:
 98                continue  # skip metrics with no matching dimension
 99            if metric.type == "labeled_counter":
100                view_name = f"{self.name}__{looker_name}"
101                suggest_name = f"suggest__{view_name}"
102
103                category, name = [
104                    slug_to_title(v) for v in self._get_category_and_name(metric)
105                ]
106                view_label = f"{category} - {name}"
107                metric_hidden = "no" if metric.is_in_source() else "yes"
108
109                measures = [
110                    {
111                        "name": "count",
112                        "type": "sum",
113                        "sql": "${value}",
114                        "hidden": metric_hidden,
115                    }
116                ]
117
118                if client_id_field is not None:
119                    # client_id field is missing for pings with minimal Glean schema
120                    measures.append(
121                        {
122                            "name": "client_count",
123                            "type": "count_distinct",
124                            "sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end",
125                            "hidden": metric_hidden,
126                        }
127                    )
128
129                join_view: Dict[str, Any] = {
130                    "name": view_name,
131                    "label": view_label,
132                    "dimensions": [
133                        {
134                            "name": "document_id",
135                            "type": "string",
136                            "sql": f"${{{self.name}.document_id}}",
137                            "hidden": "yes",
138                        },
139                        # labeled counters need a primary key that incorporates
140                        # their labels, otherwise we get jumbled results:
141                        # https://github.com/mozilla/lookml-generator/issues/171
142                        {
143                            "name": "document_label_id",
144                            "type": "string",
145                            "sql": f"${{{self.name}.document_id}}-${{label}}",
146                            "primary_key": "yes",
147                            "hidden": "yes",
148                        },
149                        {
150                            "name": "value",
151                            "type": "number",
152                            "sql": "${TABLE}.value",
153                            "hidden": "yes",
154                        },
155                    ],
156                    "measures": measures,
157                }
158
159                if looker_name in SUGGESTS_FOR_LABELED_COUNTERS:
160                    join_view["dimensions"].append(
161                        {
162                            "name": "label",
163                            "type": "string",
164                            "sql": "${TABLE}.key",
165                            "suggest_explore": suggest_name,
166                            "suggest_dimension": f"{suggest_name}.key",
167                            "hidden": metric_hidden,
168                        },
169                    )
170
171                    suggest_view = {
172                        "name": suggest_name,
173                        "derived_table": {
174                            "sql": dedent(
175                                f"""
176                                select
177                                    m.key,
178                                    count(*) as n
179                                from {table} as t,
180                                unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m
181                                where date(submission_timestamp) > date_sub(current_date, interval 30 day)
182                                    and sample_id = 0
183                                group by key
184                                order by n desc
185                                """
186                            )
187                        },
188                        "dimensions": [
189                            {"name": "key", "type": "string", "sql": "${TABLE}.key"}
190                        ],
191                    }
192                    view_definitions += [join_view, suggest_view]
193                else:
194                    join_view["dimensions"].append(
195                        {
196                            "name": "label",
197                            "type": "string",
198                            "sql": "${TABLE}.key",
199                            "hidden": metric_hidden,
200                        },
201                    )
202                    view_definitions += [join_view]
203
204        # deduplicate view definitions, because somehow a few entries make it in
205        # twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure
206        view_definitions = sorted(
207            {v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"]  # type: ignore
208        )
209
210        [project, dataset, table] = table.split(".")
211        table_schema = dryrun.create(
212            project=project,
213            dataset=dataset,
214            table=table,
215        ).get_table_schema()
216        nested_views = lookml_utils._generate_nested_dimension_views(
217            table_schema, self.name
218        )
219
220        lookml["views"] += view_definitions + nested_views
221
222        return lookml
223
224    def _get_links(self, dimension: dict) -> List[Dict[str, str]]:
225        """Get a link annotation given a metric name."""
226        name = self._get_name(dimension)
227        title = slug_to_title(name)
228        return [
229            {
230                "label": (f"Glean Dictionary reference for {title}"),
231                "url": (
232                    f"https://dictionary.telemetry.mozilla.org"
233                    f"/apps/{self.namespace}/metrics/{name}"
234                ),
235                "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png",
236            }
237        ]
238
239    def _get_name(self, dimension: dict) -> str:
240        return dimension["name"].split("__")[-1]
241
242    def _get_metric_type(self, dimension: dict) -> str:
243        return dimension["name"].split("__")[1]
244
245    def _is_metric(self, dimension) -> bool:
246        return dimension["name"].startswith("metrics__")
247
248    def _get_glean_metrics(self, v1_name: Optional[str]) -> List[GleanProbe]:
249        if v1_name is None:
250            logging.error(
251                f"Error: Missing v1 name for ping {self.name} in namespace {self.namespace}"
252            )
253            return []
254
255        repo = next((r for r in GleanPing.get_repos() if r["name"] == v1_name))
256        glean_app = GleanPing(repo)
257
258        ping_probes = []
259        probe_ids = set()
260        for probe in glean_app.get_probes():
261            send_in_pings_snakecase = [
262                ping.replace("-", "_") for ping in probe.definition["send_in_pings"]
263            ]
264            if self.name not in send_in_pings_snakecase:
265                continue
266            if probe.id in probe_ids:
267                # Some ids are duplicated, ignore them
268                continue
269
270            ping_probes.append(probe)
271            probe_ids.add(probe.id)
272
273        return ping_probes
274
275    def _get_category_and_name(self, metric: GleanProbe) -> Tuple[str, str]:
276        *category, name = metric.id.split(".")
277        category = "_".join(category)
278
279        return category, name
280
281    def _to_looker_name(self, metric: GleanProbe, suffix: str = "") -> str:
282        """Convert a glean probe into a looker name."""
283        category, name = self._get_category_and_name(metric)
284
285        sep = "" if not category else "_"
286        label = name
287        looker_name = f"metrics__{metric.type}__{category}{sep}{label}"
288        if suffix:
289            looker_name = f"{looker_name}__{suffix}"
290        return looker_name
291
292    def _make_dimension(
293        self, metric: GleanProbe, suffix: str, sql_map: Dict[str, Dict[str, str]]
294    ) -> Optional[Dict[str, Union[str, List[Dict[str, str]]]]]:
295        *category, name = metric.id.split(".")
296        category = "_".join(category)
297
298        sep = "" if not category else "_"
299        label = name
300        type = RENAMED_METRIC_TYPES.get(metric.type, metric.type)
301        looker_name = f"metrics__{type}__{category}{sep}{name}"
302        if suffix:
303            label = f"{name}_{suffix}"
304            looker_name = f"{looker_name}__{suffix}"
305
306        if looker_name not in sql_map:
307            return None
308
309        group_label = slug_to_title(category)
310        group_item_label = slug_to_title(label)
311
312        if not group_label:
313            group_label = "Glean"
314
315        friendly_name = f"{group_label} {group_item_label}"
316
317        lookml = {
318            "name": looker_name,
319            "label": friendly_name,
320            # metrics that are no longer in the source are hidden by default
321            "hidden": "no" if metric.is_in_source() else "yes",
322            "sql": sql_map[looker_name]["sql"],
323            "type": sql_map[looker_name]["type"],
324            "group_label": group_label,
325            "group_item_label": group_item_label,
326            "links": [
327                {
328                    "label": (f"Glean Dictionary reference for {friendly_name}"),
329                    "url": (
330                        f"https://dictionary.telemetry.mozilla.org"
331                        f"/apps/{self.namespace}/metrics/{category}{sep}{name}"
332                    ),
333                    "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png",
334                },
335            ],
336        }
337
338        if lookml["type"] == "time":
339            # Remove any _{type} suffix from the dimension group name because each timeframe
340            # will add a _{type} suffix to its individual dimension name.
341            lookml["name"] = re.sub("_(date|time(stamp)?)$", "", looker_name)
342            lookml["timeframes"] = [
343                "raw",
344                "time",
345                "date",
346                "week",
347                "month",
348                "quarter",
349                "year",
350            ]
351            # Dimension groups should not be nested (see issue #82).
352            del lookml["group_label"]
353            del lookml["group_item_label"]
354            # Links are not supported for dimension groups.
355            del lookml["links"]
356
357        # remove some elements from the definition if we're handling a labeled
358        # counter, as an initial join dimension
359        if metric.type == "labeled_counter":
360            # this field is not used since labeled counters are maps
361            del lookml["type"]
362            lookml["hidden"] = "yes"
363
364        if metric.description:
365            lookml["description"] = metric.description
366
367        return lookml
368
369    def _get_metric_dimensions(
370        self, metric: GleanProbe, sql_map: Dict[str, Dict[str, str]]
371    ) -> Iterable[Optional[Dict[str, Union[str, List[Dict[str, str]]]]]]:
372        if metric.type == "rate":
373            for suffix in ("numerator", "denominator"):
374                yield self._make_dimension(metric, suffix, sql_map)
375        elif metric.type in DISTRIBUTION_TYPES:
376            yield self._make_dimension(metric, "sum", sql_map)
377        elif metric.type == "timespan":
378            yield self._make_dimension(metric, "value", sql_map)
379        elif metric.type in ALLOWED_TYPES:
380            yield self._make_dimension(metric, "", sql_map)
381
382    def _get_glean_metric_dimensions(
383        self, all_fields: List[dict], v1_name: Optional[str]
384    ):
385        sql_map = {
386            f["name"]: {"sql": f["sql"], "type": f.get("type", "string")}
387            for f in all_fields
388        }
389        metrics = self._get_glean_metrics(v1_name)
390        return [
391            dimension
392            for metric in metrics
393            for dimension in self._get_metric_dimensions(metric, sql_map)
394            if dimension is not None
395        ]
396
397    def _add_link(self, dimension):
398        annotations = {}
399        if self._is_metric(dimension) and not self._get_metric_type(
400            dimension
401        ).startswith("labeled"):
402            annotations["links"] = self._get_links(dimension)
403
404        return dict(dimension, **annotations)
405
406    def get_dimensions(
407        self, table, v1_name: Optional[str], dryrun
408    ) -> List[Dict[str, Any]]:
409        """Get the set of dimensions for this view."""
410        all_fields = super().get_dimensions(table, v1_name, dryrun=dryrun)
411        fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [
412            self._add_link(d)
413            for d in all_fields
414            if not d["name"].startswith("metrics__")
415        ]
416        # later entries will override earlier entries, if there are duplicates
417        field_dict = {f["name"]: f for f in fields}
418        return list(field_dict.values())
419
420    def get_measures(
421        self, dimensions: List[dict], table: str, v1_name: Optional[str]
422    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
423        """Generate measures from a list of dimensions.
424
425        When no dimension-specific measures are found, return a single "count" measure.
426
427        Raise ClickException if dimensions result in duplicate measures.
428        """
429        measures = super().get_measures(dimensions, table, v1_name)
430        client_id_field = self.get_client_id(dimensions, table)
431
432        for dimension in dimensions:
433            if (
434                self._is_metric(dimension)
435                and self._get_metric_type(dimension) == "counter"
436            ):
437                # handle the counters in the metric ping
438                name = self._get_name(dimension)
439                dimension_name = dimension["name"]
440                measures += [
441                    {
442                        "name": name,
443                        "type": "sum",
444                        "sql": f"${{{dimension_name}}}",
445                        "links": self._get_links(dimension),
446                    },
447                ]
448
449                if client_id_field is not None:
450                    measures += [
451                        {
452                            "name": f"{name}_client_count",
453                            "type": "count_distinct",
454                            "filters": [{dimension_name: ">0"}],
455                            "sql": f"${{{client_id_field}}}",
456                            "links": self._get_links(dimension),
457                        },
458                    ]
459
460        # check if there are any duplicate values
461        names = [measure["name"] for measure in measures]
462        duplicates = [k for k, v in Counter(names).items() if v > 1]
463        if duplicates:
464            raise click.ClickException(
465                f"duplicate measures {duplicates!r} for table {table!r}"
466            )
467
468        return measures

A view on a ping table for an application using the Glean SDK.

type: str = 'glean_ping_view'
allow_glean: bool = True
@classmethod
def from_db_views(klass, *args, **kwargs):
64    @classmethod
65    def from_db_views(klass, *args, **kwargs):
66        """Generate GleanPingViews from db views."""
67        for view in super().from_db_views(*args, **kwargs):
68            if view.name not in DISALLOWED_PINGS:
69                yield view

Generate GleanPingViews from db views.

def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 71    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 72        """Generate LookML for this view.
 73
 74        The Glean views include a labeled metrics, which need to be joined
 75        against the view in the explore.
 76        """
 77        lookml = super().to_lookml(v1_name, dryrun=dryrun)
 78        # ignore nested join views
 79        lookml["views"] = [lookml["views"][0]]
 80
 81        # iterate over all of the glean metrics and generate views for unnested
 82        # fields as necessary. Append them to the list of existing view
 83        # definitions.
 84        table = next(
 85            (table for table in self.tables if table.get("channel") == "release"),
 86            self.tables[0],
 87        )["table"]
 88        dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun)
 89        dimension_names = {dimension["name"] for dimension in dimensions}
 90
 91        client_id_field = self.get_client_id(dimensions, table)
 92
 93        view_definitions = []
 94        metrics = self._get_glean_metrics(v1_name)
 95        for metric in metrics:
 96            looker_name = self._to_looker_name(metric)
 97            if looker_name not in dimension_names:
 98                continue  # skip metrics with no matching dimension
 99            if metric.type == "labeled_counter":
100                view_name = f"{self.name}__{looker_name}"
101                suggest_name = f"suggest__{view_name}"
102
103                category, name = [
104                    slug_to_title(v) for v in self._get_category_and_name(metric)
105                ]
106                view_label = f"{category} - {name}"
107                metric_hidden = "no" if metric.is_in_source() else "yes"
108
109                measures = [
110                    {
111                        "name": "count",
112                        "type": "sum",
113                        "sql": "${value}",
114                        "hidden": metric_hidden,
115                    }
116                ]
117
118                if client_id_field is not None:
119                    # client_id field is missing for pings with minimal Glean schema
120                    measures.append(
121                        {
122                            "name": "client_count",
123                            "type": "count_distinct",
124                            "sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end",
125                            "hidden": metric_hidden,
126                        }
127                    )
128
129                join_view: Dict[str, Any] = {
130                    "name": view_name,
131                    "label": view_label,
132                    "dimensions": [
133                        {
134                            "name": "document_id",
135                            "type": "string",
136                            "sql": f"${{{self.name}.document_id}}",
137                            "hidden": "yes",
138                        },
139                        # labeled counters need a primary key that incorporates
140                        # their labels, otherwise we get jumbled results:
141                        # https://github.com/mozilla/lookml-generator/issues/171
142                        {
143                            "name": "document_label_id",
144                            "type": "string",
145                            "sql": f"${{{self.name}.document_id}}-${{label}}",
146                            "primary_key": "yes",
147                            "hidden": "yes",
148                        },
149                        {
150                            "name": "value",
151                            "type": "number",
152                            "sql": "${TABLE}.value",
153                            "hidden": "yes",
154                        },
155                    ],
156                    "measures": measures,
157                }
158
159                if looker_name in SUGGESTS_FOR_LABELED_COUNTERS:
160                    join_view["dimensions"].append(
161                        {
162                            "name": "label",
163                            "type": "string",
164                            "sql": "${TABLE}.key",
165                            "suggest_explore": suggest_name,
166                            "suggest_dimension": f"{suggest_name}.key",
167                            "hidden": metric_hidden,
168                        },
169                    )
170
171                    suggest_view = {
172                        "name": suggest_name,
173                        "derived_table": {
174                            "sql": dedent(
175                                f"""
176                                select
177                                    m.key,
178                                    count(*) as n
179                                from {table} as t,
180                                unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m
181                                where date(submission_timestamp) > date_sub(current_date, interval 30 day)
182                                    and sample_id = 0
183                                group by key
184                                order by n desc
185                                """
186                            )
187                        },
188                        "dimensions": [
189                            {"name": "key", "type": "string", "sql": "${TABLE}.key"}
190                        ],
191                    }
192                    view_definitions += [join_view, suggest_view]
193                else:
194                    join_view["dimensions"].append(
195                        {
196                            "name": "label",
197                            "type": "string",
198                            "sql": "${TABLE}.key",
199                            "hidden": metric_hidden,
200                        },
201                    )
202                    view_definitions += [join_view]
203
204        # deduplicate view definitions, because somehow a few entries make it in
205        # twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure
206        view_definitions = sorted(
207            {v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"]  # type: ignore
208        )
209
210        [project, dataset, table] = table.split(".")
211        table_schema = dryrun.create(
212            project=project,
213            dataset=dataset,
214            table=table,
215        ).get_table_schema()
216        nested_views = lookml_utils._generate_nested_dimension_views(
217            table_schema, self.name
218        )
219
220        lookml["views"] += view_definitions + nested_views
221
222        return lookml

Generate LookML for this view.

The Glean views include a labeled metrics, which need to be joined against the view in the explore.

def get_dimensions(self, table, v1_name: Optional[str], dryrun) -> List[Dict[str, Any]]:
406    def get_dimensions(
407        self, table, v1_name: Optional[str], dryrun
408    ) -> List[Dict[str, Any]]:
409        """Get the set of dimensions for this view."""
410        all_fields = super().get_dimensions(table, v1_name, dryrun=dryrun)
411        fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [
412            self._add_link(d)
413            for d in all_fields
414            if not d["name"].startswith("metrics__")
415        ]
416        # later entries will override earlier entries, if there are duplicates
417        field_dict = {f["name"]: f for f in fields}
418        return list(field_dict.values())

Get the set of dimensions for this view.

def get_measures( self, dimensions: List[dict], table: str, v1_name: Optional[str]) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
420    def get_measures(
421        self, dimensions: List[dict], table: str, v1_name: Optional[str]
422    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
423        """Generate measures from a list of dimensions.
424
425        When no dimension-specific measures are found, return a single "count" measure.
426
427        Raise ClickException if dimensions result in duplicate measures.
428        """
429        measures = super().get_measures(dimensions, table, v1_name)
430        client_id_field = self.get_client_id(dimensions, table)
431
432        for dimension in dimensions:
433            if (
434                self._is_metric(dimension)
435                and self._get_metric_type(dimension) == "counter"
436            ):
437                # handle the counters in the metric ping
438                name = self._get_name(dimension)
439                dimension_name = dimension["name"]
440                measures += [
441                    {
442                        "name": name,
443                        "type": "sum",
444                        "sql": f"${{{dimension_name}}}",
445                        "links": self._get_links(dimension),
446                    },
447                ]
448
449                if client_id_field is not None:
450                    measures += [
451                        {
452                            "name": f"{name}_client_count",
453                            "type": "count_distinct",
454                            "filters": [{dimension_name: ">0"}],
455                            "sql": f"${{{client_id_field}}}",
456                            "links": self._get_links(dimension),
457                        },
458                    ]
459
460        # check if there are any duplicate values
461        names = [measure["name"] for measure in measures]
462        duplicates = [k for k, v in Counter(names).items() if v > 1]
463        if duplicates:
464            raise click.ClickException(
465                f"duplicate measures {duplicates!r} for table {table!r}"
466            )
467
468        return measures

Generate measures from a list of dimensions.

When no dimension-specific measures are found, return a single "count" measure.

Raise ClickException if dimensions result in duplicate measures.