generator.views.glean_ping_view

Class to describe a Glean Ping View.

  1"""Class to describe a Glean Ping View."""
  2
  3import logging
  4import re
  5from collections import Counter
  6from textwrap import dedent
  7from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union
  8
  9import click
 10from mozilla_schema_generator.glean_ping import GleanPing
 11from mozilla_schema_generator.probes import GleanProbe
 12
 13from . import lookml_utils
 14from .lookml_utils import slug_to_title
 15from .ping_view import PingView
 16
 17DISTRIBUTION_TYPES = {
 18    "timing_distribution",
 19    "memory_distribution",
 20    "custom_distribution",
 21}
 22
 23
 24ALLOWED_TYPES = DISTRIBUTION_TYPES | {
 25    "boolean",
 26    "labeled_boolean",
 27    "counter",
 28    "labeled_counter",
 29    "datetime",
 30    "jwe",
 31    "quantity",
 32    "string",
 33    "labeled_string",
 34    "rate",
 35    "timespan",
 36    "uuid",
 37    "url",
 38    "text",
 39    "labeled_quantity",
 40}
 41
 42# Bug 1737656 - some metric types are exposed under different names
 43# We need to map to the new name when building dimensions.
 44RENAMED_METRIC_TYPES = {
 45    "jwe": "jwe2",
 46    "text": "text2",
 47    "url": "url2",
 48}
 49
 50
 51DISALLOWED_PINGS = {"events", "events_stream"}
 52
 53# List of labeled counter names for which a suggest explore should be generated.
 54# Generating suggest explores for all labeled counters slows down Looker.
 55SUGGESTS_FOR_LABELED_COUNTERS: Set[str] = set()
 56
 57
 58class GleanPingView(PingView):
 59    """A view on a ping table for an application using the Glean SDK."""
 60
 61    type: str = "glean_ping_view"
 62    allow_glean: bool = True
 63
 64    @classmethod
 65    def from_db_views(klass, *args, **kwargs):
 66        """Generate GleanPingViews from db views."""
 67        for view in super().from_db_views(*args, **kwargs):
 68            if view.name not in DISALLOWED_PINGS:
 69                yield view
 70
 71    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 72        """Generate LookML for this view.
 73
 74        The Glean views include a labeled metrics, which need to be joined
 75        against the view in the explore.
 76        """
 77        lookml = super().to_lookml(v1_name, dryrun=dryrun)
 78        # ignore nested join views
 79        lookml["views"] = [lookml["views"][0]]
 80
 81        # iterate over all of the glean metrics and generate views for unnested
 82        # fields as necessary. Append them to the list of existing view
 83        # definitions.
 84        table = next(
 85            (table for table in self.tables if table.get("channel") == "release"),
 86            self.tables[0],
 87        )["table"]
 88        dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun)
 89        dimension_names = {dimension["name"] for dimension in dimensions}
 90
 91        client_id_field = self.get_client_id(dimensions, table)
 92
 93        view_definitions = []
 94        metrics = self._get_glean_metrics(v1_name)
 95        for metric in metrics:
 96            looker_name = self._to_looker_name(metric)
 97            if looker_name not in dimension_names:
 98                continue  # skip metrics with no matching dimension
 99            if metric.type == "labeled_counter":
100                view_name = f"{self.name}__{looker_name}"
101                suggest_name = f"suggest__{view_name}"
102
103                category, name = [
104                    slug_to_title(v) for v in self._get_category_and_name(metric)
105                ]
106                view_label = f"{category}: {name}"
107                metric_hidden = "no" if metric.is_in_source() else "yes"
108
109                measures = [
110                    {
111                        "name": "count",
112                        "type": "sum",
113                        "sql": "${value}",
114                        "hidden": metric_hidden,
115                    }
116                ]
117
118                if client_id_field is not None:
119                    # client_id field is missing for pings with minimal Glean schema
120                    measures.append(
121                        {
122                            "name": "client_count",
123                            "type": "count_distinct",
124                            "sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end",
125                            "hidden": metric_hidden,
126                        }
127                    )
128
129                join_view: Dict[str, Any] = {
130                    "name": view_name,
131                    "label": view_label,
132                    "dimensions": [
133                        {
134                            "name": "document_id",
135                            "type": "string",
136                            "sql": f"${{{self.name}.document_id}}",
137                            "hidden": "yes",
138                        },
139                        # labeled counters need a primary key that incorporates
140                        # their labels, otherwise we get jumbled results:
141                        # https://github.com/mozilla/lookml-generator/issues/171
142                        {
143                            "name": "document_label_id",
144                            "type": "string",
145                            "sql": f"${{{self.name}.document_id}}-${{label}}",
146                            "primary_key": "yes",
147                            "hidden": "yes",
148                        },
149                        {
150                            "name": "value",
151                            "type": "number",
152                            "sql": "${TABLE}.value",
153                            "hidden": "yes",
154                        },
155                    ],
156                    "measures": measures,
157                }
158
159                if looker_name in SUGGESTS_FOR_LABELED_COUNTERS:
160                    join_view["dimensions"].append(
161                        {
162                            "name": "label",
163                            "type": "string",
164                            "sql": "${TABLE}.key",
165                            "suggest_explore": suggest_name,
166                            "suggest_dimension": f"{suggest_name}.key",
167                            "hidden": metric_hidden,
168                        },
169                    )
170
171                    suggest_view = {
172                        "name": suggest_name,
173                        "derived_table": {
174                            "sql": dedent(
175                                f"""
176                                select
177                                    m.key,
178                                    count(*) as n
179                                from {table} as t,
180                                unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m
181                                where date(submission_timestamp) > date_sub(current_date, interval 30 day)
182                                    and sample_id = 0
183                                group by key
184                                order by n desc
185                                """
186                            )
187                        },
188                        "dimensions": [
189                            {"name": "key", "type": "string", "sql": "${TABLE}.key"}
190                        ],
191                    }
192                    view_definitions += [join_view, suggest_view]
193                else:
194                    join_view["dimensions"].append(
195                        {
196                            "name": "label",
197                            "type": "string",
198                            "sql": "${TABLE}.key",
199                            "hidden": metric_hidden,
200                        },
201                    )
202                    view_definitions += [join_view]
203
204        # deduplicate view definitions, because somehow a few entries make it in
205        # twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure
206        view_definitions = sorted(
207            {v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"]  # type: ignore
208        )
209
210        [project, dataset, table] = table.split(".")
211        table_schema = dryrun.create(
212            project=project,
213            dataset=dataset,
214            table=table,
215        ).get_table_schema()
216        nested_views = lookml_utils._generate_nested_dimension_views(
217            table_schema, self.name
218        )
219
220        lookml["views"] += view_definitions + nested_views
221
222        return lookml
223
224    def _get_links(self, dimension: dict) -> List[Dict[str, str]]:
225        """Get a link annotation given a metric name."""
226        name = self._get_name(dimension)
227        title = slug_to_title(name)
228        return [
229            {
230                "label": (f"Glean Dictionary reference for {title}"),
231                "url": (
232                    f"https://dictionary.telemetry.mozilla.org"
233                    f"/apps/{self.namespace}/metrics/{name}"
234                ),
235                "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png",
236            }
237        ]
238
239    def _get_name(self, dimension: dict) -> str:
240        return dimension["name"].split("__")[-1]
241
242    def _get_metric_type(self, dimension: dict) -> str:
243        return dimension["name"].split("__")[1]
244
245    def _is_metric(self, dimension) -> bool:
246        return dimension["name"].startswith("metrics__")
247
248    def _get_glean_metrics(self, v1_name: Optional[str]) -> List[GleanProbe]:
249        if v1_name is None:
250            logging.error(
251                f"Error: Missing v1 name for ping {self.name} in namespace {self.namespace}"
252            )
253            return []
254
255        repo = next((r for r in GleanPing.get_repos() if r["name"] == v1_name))
256        glean_app = GleanPing(repo)
257
258        ping_probes = []
259        probe_ids = set()
260        for probe in glean_app.get_probes():
261            send_in_pings_snakecase = [
262                ping.replace("-", "_") for ping in probe.definition["send_in_pings"]
263            ]
264            if self.name not in send_in_pings_snakecase:
265                continue
266            if probe.id in probe_ids:
267                # Some ids are duplicated, ignore them
268                continue
269
270            ping_probes.append(probe)
271            probe_ids.add(probe.id)
272
273        return ping_probes
274
275    def _get_category_and_name(self, metric: GleanProbe) -> Tuple[str, str]:
276        *category, name = metric.id.split(".")
277        category = "_".join(category)
278
279        return category, name
280
281    def _to_looker_name(self, metric: GleanProbe, suffix: str = "") -> str:
282        """Convert a glean probe into a looker name."""
283        category, name = self._get_category_and_name(metric)
284
285        sep = "" if not category else "_"
286        label = name
287        looker_name = f"metrics__{metric.type}__{category}{sep}{label}"
288        if suffix:
289            looker_name = f"{looker_name}__{suffix}"
290        return looker_name
291
292    def _make_dimension(
293        self, metric: GleanProbe, suffix: str, sql_map: Dict[str, Dict[str, str]]
294    ) -> Optional[Dict[str, Union[str, List[Dict[str, str]]]]]:
295        *category, name = metric.id.split(".")
296        category = "_".join(category)
297
298        sep = "" if not category else "_"
299        label = name
300        type = RENAMED_METRIC_TYPES.get(metric.type, metric.type)
301        looker_name = f"metrics__{type}__{category}{sep}{name}"
302        if suffix:
303            label = f"{name}_{suffix}"
304            looker_name = f"{looker_name}__{suffix}"
305
306        if looker_name not in sql_map:
307            return None
308
309        group_label = slug_to_title(category)
310        group_item_label = slug_to_title(label)
311
312        if not group_label:
313            group_label = "Glean"
314
315        friendly_name = f"{group_label}: {group_item_label}"
316
317        lookml = {
318            "name": looker_name,
319            "label": friendly_name,
320            # metrics that are no longer in the source are hidden by default
321            "hidden": "no" if metric.is_in_source() else "yes",
322            "sql": sql_map[looker_name]["sql"],
323            "type": sql_map[looker_name]["type"],
324            "group_label": group_label,
325            "group_item_label": group_item_label,
326            "links": [
327                {
328                    "label": (f"Glean Dictionary reference for {friendly_name}"),
329                    "url": (
330                        f"https://dictionary.telemetry.mozilla.org"
331                        f"/apps/{self.namespace}/metrics/{category}{sep}{name}"
332                    ),
333                    "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png",
334                },
335            ],
336        }
337
338        if lookml["type"] == "time":
339            # Remove any _{type} suffix from the dimension group name because each timeframe
340            # will add a _{type} suffix to its individual dimension name.
341            lookml["name"] = re.sub("_(date|time(stamp)?)$", "", looker_name)
342            lookml["timeframes"] = [
343                timeframe
344                for timeframe in (
345                    "raw",
346                    "time",
347                    "date",
348                    "week",
349                    "month",
350                    "quarter",
351                    "year",
352                )
353                # Exclude timeframes where the resulting dimension would conflict with an existing dimension.
354                if f"{lookml['name']}_{timeframe}" not in sql_map
355            ]
356            # Dimension groups should not be nested (see issue #82).
357            del lookml["group_label"]
358            del lookml["group_item_label"]
359            # Links are not supported for dimension groups.
360            del lookml["links"]
361
362        # remove some elements from the definition if we're handling a labeled
363        # counter, as an initial join dimension
364        if metric.type == "labeled_counter":
365            # this field is not used since labeled counters are maps
366            del lookml["type"]
367            lookml["hidden"] = "yes"
368
369        if metric.description:
370            lookml["description"] = metric.description
371
372        return lookml
373
374    def _get_metric_dimensions(
375        self, metric: GleanProbe, sql_map: Dict[str, Dict[str, str]]
376    ) -> Iterable[Optional[Dict[str, Union[str, List[Dict[str, str]]]]]]:
377        if metric.type == "rate":
378            for suffix in ("numerator", "denominator"):
379                yield self._make_dimension(metric, suffix, sql_map)
380        elif metric.type in DISTRIBUTION_TYPES:
381            yield self._make_dimension(metric, "sum", sql_map)
382        elif metric.type == "timespan":
383            yield self._make_dimension(metric, "value", sql_map)
384        elif metric.type in ALLOWED_TYPES:
385            yield self._make_dimension(metric, "", sql_map)
386
387    def _get_glean_metric_dimensions(
388        self, all_fields: List[dict], v1_name: Optional[str]
389    ):
390        sql_map = {
391            f["name"]: {"sql": f["sql"], "type": f.get("type", "string")}
392            for f in all_fields
393        }
394        metrics = self._get_glean_metrics(v1_name)
395        return [
396            dimension
397            for metric in metrics
398            for dimension in self._get_metric_dimensions(metric, sql_map)
399            if dimension is not None
400        ]
401
402    def _add_link(self, dimension):
403        annotations = {}
404        if self._is_metric(dimension) and not self._get_metric_type(
405            dimension
406        ).startswith("labeled"):
407            annotations["links"] = self._get_links(dimension)
408
409        return dict(dimension, **annotations)
410
411    def get_dimensions(
412        self, table, v1_name: Optional[str], dryrun
413    ) -> List[Dict[str, Any]]:
414        """Get the set of dimensions for this view."""
415        all_fields = super().get_dimensions(table, v1_name, dryrun=dryrun)
416        fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [
417            self._add_link(d)
418            for d in all_fields
419            if not d["name"].startswith("metrics__")
420        ]
421        # later entries will override earlier entries, if there are duplicates
422        field_dict = {f["name"]: f for f in fields}
423        return list(field_dict.values())
424
425    def get_measures(
426        self, dimensions: List[dict], table: str, v1_name: Optional[str]
427    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
428        """Generate measures from a list of dimensions.
429
430        When no dimension-specific measures are found, return a single "count" measure.
431
432        Raise ClickException if dimensions result in duplicate measures.
433        """
434        measures = super().get_measures(dimensions, table, v1_name)
435        client_id_field = self.get_client_id(dimensions, table)
436
437        for dimension in dimensions:
438            if (
439                self._is_metric(dimension)
440                and self._get_metric_type(dimension) == "counter"
441            ):
442                # handle the counters in the metric ping
443                name = self._get_name(dimension)
444                dimension_name = dimension["name"]
445                measures += [
446                    {
447                        "name": name,
448                        "type": "sum",
449                        "sql": f"${{{dimension_name}}}",
450                        "links": self._get_links(dimension),
451                    },
452                ]
453
454                if client_id_field is not None:
455                    measures += [
456                        {
457                            "name": f"{name}_client_count",
458                            "type": "count_distinct",
459                            "filters": [{dimension_name: ">0"}],
460                            "sql": f"${{{client_id_field}}}",
461                            "links": self._get_links(dimension),
462                        },
463                    ]
464
465        # check if there are any duplicate values
466        names = [measure["name"] for measure in measures]
467        duplicates = [k for k, v in Counter(names).items() if v > 1]
468        if duplicates:
469            raise click.ClickException(
470                f"duplicate measures {duplicates!r} for table {table!r}"
471            )
472
473        return measures
DISTRIBUTION_TYPES = {'timing_distribution', 'memory_distribution', 'custom_distribution'}
ALLOWED_TYPES = {'boolean', 'url', 'labeled_counter', 'labeled_quantity', 'text', 'timing_distribution', 'quantity', 'counter', 'labeled_boolean', 'timespan', 'datetime', 'labeled_string', 'rate', 'memory_distribution', 'string', 'jwe', 'custom_distribution', 'uuid'}
RENAMED_METRIC_TYPES = {'jwe': 'jwe2', 'text': 'text2', 'url': 'url2'}
DISALLOWED_PINGS = {'events', 'events_stream'}
SUGGESTS_FOR_LABELED_COUNTERS: Set[str] = set()
class GleanPingView(generator.views.ping_view.PingView):
 59class GleanPingView(PingView):
 60    """A view on a ping table for an application using the Glean SDK."""
 61
 62    type: str = "glean_ping_view"
 63    allow_glean: bool = True
 64
 65    @classmethod
 66    def from_db_views(klass, *args, **kwargs):
 67        """Generate GleanPingViews from db views."""
 68        for view in super().from_db_views(*args, **kwargs):
 69            if view.name not in DISALLOWED_PINGS:
 70                yield view
 71
 72    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 73        """Generate LookML for this view.
 74
 75        The Glean views include a labeled metrics, which need to be joined
 76        against the view in the explore.
 77        """
 78        lookml = super().to_lookml(v1_name, dryrun=dryrun)
 79        # ignore nested join views
 80        lookml["views"] = [lookml["views"][0]]
 81
 82        # iterate over all of the glean metrics and generate views for unnested
 83        # fields as necessary. Append them to the list of existing view
 84        # definitions.
 85        table = next(
 86            (table for table in self.tables if table.get("channel") == "release"),
 87            self.tables[0],
 88        )["table"]
 89        dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun)
 90        dimension_names = {dimension["name"] for dimension in dimensions}
 91
 92        client_id_field = self.get_client_id(dimensions, table)
 93
 94        view_definitions = []
 95        metrics = self._get_glean_metrics(v1_name)
 96        for metric in metrics:
 97            looker_name = self._to_looker_name(metric)
 98            if looker_name not in dimension_names:
 99                continue  # skip metrics with no matching dimension
100            if metric.type == "labeled_counter":
101                view_name = f"{self.name}__{looker_name}"
102                suggest_name = f"suggest__{view_name}"
103
104                category, name = [
105                    slug_to_title(v) for v in self._get_category_and_name(metric)
106                ]
107                view_label = f"{category}: {name}"
108                metric_hidden = "no" if metric.is_in_source() else "yes"
109
110                measures = [
111                    {
112                        "name": "count",
113                        "type": "sum",
114                        "sql": "${value}",
115                        "hidden": metric_hidden,
116                    }
117                ]
118
119                if client_id_field is not None:
120                    # client_id field is missing for pings with minimal Glean schema
121                    measures.append(
122                        {
123                            "name": "client_count",
124                            "type": "count_distinct",
125                            "sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end",
126                            "hidden": metric_hidden,
127                        }
128                    )
129
130                join_view: Dict[str, Any] = {
131                    "name": view_name,
132                    "label": view_label,
133                    "dimensions": [
134                        {
135                            "name": "document_id",
136                            "type": "string",
137                            "sql": f"${{{self.name}.document_id}}",
138                            "hidden": "yes",
139                        },
140                        # labeled counters need a primary key that incorporates
141                        # their labels, otherwise we get jumbled results:
142                        # https://github.com/mozilla/lookml-generator/issues/171
143                        {
144                            "name": "document_label_id",
145                            "type": "string",
146                            "sql": f"${{{self.name}.document_id}}-${{label}}",
147                            "primary_key": "yes",
148                            "hidden": "yes",
149                        },
150                        {
151                            "name": "value",
152                            "type": "number",
153                            "sql": "${TABLE}.value",
154                            "hidden": "yes",
155                        },
156                    ],
157                    "measures": measures,
158                }
159
160                if looker_name in SUGGESTS_FOR_LABELED_COUNTERS:
161                    join_view["dimensions"].append(
162                        {
163                            "name": "label",
164                            "type": "string",
165                            "sql": "${TABLE}.key",
166                            "suggest_explore": suggest_name,
167                            "suggest_dimension": f"{suggest_name}.key",
168                            "hidden": metric_hidden,
169                        },
170                    )
171
172                    suggest_view = {
173                        "name": suggest_name,
174                        "derived_table": {
175                            "sql": dedent(
176                                f"""
177                                select
178                                    m.key,
179                                    count(*) as n
180                                from {table} as t,
181                                unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m
182                                where date(submission_timestamp) > date_sub(current_date, interval 30 day)
183                                    and sample_id = 0
184                                group by key
185                                order by n desc
186                                """
187                            )
188                        },
189                        "dimensions": [
190                            {"name": "key", "type": "string", "sql": "${TABLE}.key"}
191                        ],
192                    }
193                    view_definitions += [join_view, suggest_view]
194                else:
195                    join_view["dimensions"].append(
196                        {
197                            "name": "label",
198                            "type": "string",
199                            "sql": "${TABLE}.key",
200                            "hidden": metric_hidden,
201                        },
202                    )
203                    view_definitions += [join_view]
204
205        # deduplicate view definitions, because somehow a few entries make it in
206        # twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure
207        view_definitions = sorted(
208            {v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"]  # type: ignore
209        )
210
211        [project, dataset, table] = table.split(".")
212        table_schema = dryrun.create(
213            project=project,
214            dataset=dataset,
215            table=table,
216        ).get_table_schema()
217        nested_views = lookml_utils._generate_nested_dimension_views(
218            table_schema, self.name
219        )
220
221        lookml["views"] += view_definitions + nested_views
222
223        return lookml
224
225    def _get_links(self, dimension: dict) -> List[Dict[str, str]]:
226        """Get a link annotation given a metric name."""
227        name = self._get_name(dimension)
228        title = slug_to_title(name)
229        return [
230            {
231                "label": (f"Glean Dictionary reference for {title}"),
232                "url": (
233                    f"https://dictionary.telemetry.mozilla.org"
234                    f"/apps/{self.namespace}/metrics/{name}"
235                ),
236                "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png",
237            }
238        ]
239
240    def _get_name(self, dimension: dict) -> str:
241        return dimension["name"].split("__")[-1]
242
243    def _get_metric_type(self, dimension: dict) -> str:
244        return dimension["name"].split("__")[1]
245
246    def _is_metric(self, dimension) -> bool:
247        return dimension["name"].startswith("metrics__")
248
249    def _get_glean_metrics(self, v1_name: Optional[str]) -> List[GleanProbe]:
250        if v1_name is None:
251            logging.error(
252                f"Error: Missing v1 name for ping {self.name} in namespace {self.namespace}"
253            )
254            return []
255
256        repo = next((r for r in GleanPing.get_repos() if r["name"] == v1_name))
257        glean_app = GleanPing(repo)
258
259        ping_probes = []
260        probe_ids = set()
261        for probe in glean_app.get_probes():
262            send_in_pings_snakecase = [
263                ping.replace("-", "_") for ping in probe.definition["send_in_pings"]
264            ]
265            if self.name not in send_in_pings_snakecase:
266                continue
267            if probe.id in probe_ids:
268                # Some ids are duplicated, ignore them
269                continue
270
271            ping_probes.append(probe)
272            probe_ids.add(probe.id)
273
274        return ping_probes
275
276    def _get_category_and_name(self, metric: GleanProbe) -> Tuple[str, str]:
277        *category, name = metric.id.split(".")
278        category = "_".join(category)
279
280        return category, name
281
282    def _to_looker_name(self, metric: GleanProbe, suffix: str = "") -> str:
283        """Convert a glean probe into a looker name."""
284        category, name = self._get_category_and_name(metric)
285
286        sep = "" if not category else "_"
287        label = name
288        looker_name = f"metrics__{metric.type}__{category}{sep}{label}"
289        if suffix:
290            looker_name = f"{looker_name}__{suffix}"
291        return looker_name
292
293    def _make_dimension(
294        self, metric: GleanProbe, suffix: str, sql_map: Dict[str, Dict[str, str]]
295    ) -> Optional[Dict[str, Union[str, List[Dict[str, str]]]]]:
296        *category, name = metric.id.split(".")
297        category = "_".join(category)
298
299        sep = "" if not category else "_"
300        label = name
301        type = RENAMED_METRIC_TYPES.get(metric.type, metric.type)
302        looker_name = f"metrics__{type}__{category}{sep}{name}"
303        if suffix:
304            label = f"{name}_{suffix}"
305            looker_name = f"{looker_name}__{suffix}"
306
307        if looker_name not in sql_map:
308            return None
309
310        group_label = slug_to_title(category)
311        group_item_label = slug_to_title(label)
312
313        if not group_label:
314            group_label = "Glean"
315
316        friendly_name = f"{group_label}: {group_item_label}"
317
318        lookml = {
319            "name": looker_name,
320            "label": friendly_name,
321            # metrics that are no longer in the source are hidden by default
322            "hidden": "no" if metric.is_in_source() else "yes",
323            "sql": sql_map[looker_name]["sql"],
324            "type": sql_map[looker_name]["type"],
325            "group_label": group_label,
326            "group_item_label": group_item_label,
327            "links": [
328                {
329                    "label": (f"Glean Dictionary reference for {friendly_name}"),
330                    "url": (
331                        f"https://dictionary.telemetry.mozilla.org"
332                        f"/apps/{self.namespace}/metrics/{category}{sep}{name}"
333                    ),
334                    "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png",
335                },
336            ],
337        }
338
339        if lookml["type"] == "time":
340            # Remove any _{type} suffix from the dimension group name because each timeframe
341            # will add a _{type} suffix to its individual dimension name.
342            lookml["name"] = re.sub("_(date|time(stamp)?)$", "", looker_name)
343            lookml["timeframes"] = [
344                timeframe
345                for timeframe in (
346                    "raw",
347                    "time",
348                    "date",
349                    "week",
350                    "month",
351                    "quarter",
352                    "year",
353                )
354                # Exclude timeframes where the resulting dimension would conflict with an existing dimension.
355                if f"{lookml['name']}_{timeframe}" not in sql_map
356            ]
357            # Dimension groups should not be nested (see issue #82).
358            del lookml["group_label"]
359            del lookml["group_item_label"]
360            # Links are not supported for dimension groups.
361            del lookml["links"]
362
363        # remove some elements from the definition if we're handling a labeled
364        # counter, as an initial join dimension
365        if metric.type == "labeled_counter":
366            # this field is not used since labeled counters are maps
367            del lookml["type"]
368            lookml["hidden"] = "yes"
369
370        if metric.description:
371            lookml["description"] = metric.description
372
373        return lookml
374
375    def _get_metric_dimensions(
376        self, metric: GleanProbe, sql_map: Dict[str, Dict[str, str]]
377    ) -> Iterable[Optional[Dict[str, Union[str, List[Dict[str, str]]]]]]:
378        if metric.type == "rate":
379            for suffix in ("numerator", "denominator"):
380                yield self._make_dimension(metric, suffix, sql_map)
381        elif metric.type in DISTRIBUTION_TYPES:
382            yield self._make_dimension(metric, "sum", sql_map)
383        elif metric.type == "timespan":
384            yield self._make_dimension(metric, "value", sql_map)
385        elif metric.type in ALLOWED_TYPES:
386            yield self._make_dimension(metric, "", sql_map)
387
388    def _get_glean_metric_dimensions(
389        self, all_fields: List[dict], v1_name: Optional[str]
390    ):
391        sql_map = {
392            f["name"]: {"sql": f["sql"], "type": f.get("type", "string")}
393            for f in all_fields
394        }
395        metrics = self._get_glean_metrics(v1_name)
396        return [
397            dimension
398            for metric in metrics
399            for dimension in self._get_metric_dimensions(metric, sql_map)
400            if dimension is not None
401        ]
402
403    def _add_link(self, dimension):
404        annotations = {}
405        if self._is_metric(dimension) and not self._get_metric_type(
406            dimension
407        ).startswith("labeled"):
408            annotations["links"] = self._get_links(dimension)
409
410        return dict(dimension, **annotations)
411
412    def get_dimensions(
413        self, table, v1_name: Optional[str], dryrun
414    ) -> List[Dict[str, Any]]:
415        """Get the set of dimensions for this view."""
416        all_fields = super().get_dimensions(table, v1_name, dryrun=dryrun)
417        fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [
418            self._add_link(d)
419            for d in all_fields
420            if not d["name"].startswith("metrics__")
421        ]
422        # later entries will override earlier entries, if there are duplicates
423        field_dict = {f["name"]: f for f in fields}
424        return list(field_dict.values())
425
426    def get_measures(
427        self, dimensions: List[dict], table: str, v1_name: Optional[str]
428    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
429        """Generate measures from a list of dimensions.
430
431        When no dimension-specific measures are found, return a single "count" measure.
432
433        Raise ClickException if dimensions result in duplicate measures.
434        """
435        measures = super().get_measures(dimensions, table, v1_name)
436        client_id_field = self.get_client_id(dimensions, table)
437
438        for dimension in dimensions:
439            if (
440                self._is_metric(dimension)
441                and self._get_metric_type(dimension) == "counter"
442            ):
443                # handle the counters in the metric ping
444                name = self._get_name(dimension)
445                dimension_name = dimension["name"]
446                measures += [
447                    {
448                        "name": name,
449                        "type": "sum",
450                        "sql": f"${{{dimension_name}}}",
451                        "links": self._get_links(dimension),
452                    },
453                ]
454
455                if client_id_field is not None:
456                    measures += [
457                        {
458                            "name": f"{name}_client_count",
459                            "type": "count_distinct",
460                            "filters": [{dimension_name: ">0"}],
461                            "sql": f"${{{client_id_field}}}",
462                            "links": self._get_links(dimension),
463                        },
464                    ]
465
466        # check if there are any duplicate values
467        names = [measure["name"] for measure in measures]
468        duplicates = [k for k, v in Counter(names).items() if v > 1]
469        if duplicates:
470            raise click.ClickException(
471                f"duplicate measures {duplicates!r} for table {table!r}"
472            )
473
474        return measures

A view on a ping table for an application using the Glean SDK.

type: str = 'glean_ping_view'
allow_glean: bool = True
@classmethod
def from_db_views(klass, *args, **kwargs):
65    @classmethod
66    def from_db_views(klass, *args, **kwargs):
67        """Generate GleanPingViews from db views."""
68        for view in super().from_db_views(*args, **kwargs):
69            if view.name not in DISALLOWED_PINGS:
70                yield view

Generate GleanPingViews from db views.

def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 72    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 73        """Generate LookML for this view.
 74
 75        The Glean views include a labeled metrics, which need to be joined
 76        against the view in the explore.
 77        """
 78        lookml = super().to_lookml(v1_name, dryrun=dryrun)
 79        # ignore nested join views
 80        lookml["views"] = [lookml["views"][0]]
 81
 82        # iterate over all of the glean metrics and generate views for unnested
 83        # fields as necessary. Append them to the list of existing view
 84        # definitions.
 85        table = next(
 86            (table for table in self.tables if table.get("channel") == "release"),
 87            self.tables[0],
 88        )["table"]
 89        dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun)
 90        dimension_names = {dimension["name"] for dimension in dimensions}
 91
 92        client_id_field = self.get_client_id(dimensions, table)
 93
 94        view_definitions = []
 95        metrics = self._get_glean_metrics(v1_name)
 96        for metric in metrics:
 97            looker_name = self._to_looker_name(metric)
 98            if looker_name not in dimension_names:
 99                continue  # skip metrics with no matching dimension
100            if metric.type == "labeled_counter":
101                view_name = f"{self.name}__{looker_name}"
102                suggest_name = f"suggest__{view_name}"
103
104                category, name = [
105                    slug_to_title(v) for v in self._get_category_and_name(metric)
106                ]
107                view_label = f"{category}: {name}"
108                metric_hidden = "no" if metric.is_in_source() else "yes"
109
110                measures = [
111                    {
112                        "name": "count",
113                        "type": "sum",
114                        "sql": "${value}",
115                        "hidden": metric_hidden,
116                    }
117                ]
118
119                if client_id_field is not None:
120                    # client_id field is missing for pings with minimal Glean schema
121                    measures.append(
122                        {
123                            "name": "client_count",
124                            "type": "count_distinct",
125                            "sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end",
126                            "hidden": metric_hidden,
127                        }
128                    )
129
130                join_view: Dict[str, Any] = {
131                    "name": view_name,
132                    "label": view_label,
133                    "dimensions": [
134                        {
135                            "name": "document_id",
136                            "type": "string",
137                            "sql": f"${{{self.name}.document_id}}",
138                            "hidden": "yes",
139                        },
140                        # labeled counters need a primary key that incorporates
141                        # their labels, otherwise we get jumbled results:
142                        # https://github.com/mozilla/lookml-generator/issues/171
143                        {
144                            "name": "document_label_id",
145                            "type": "string",
146                            "sql": f"${{{self.name}.document_id}}-${{label}}",
147                            "primary_key": "yes",
148                            "hidden": "yes",
149                        },
150                        {
151                            "name": "value",
152                            "type": "number",
153                            "sql": "${TABLE}.value",
154                            "hidden": "yes",
155                        },
156                    ],
157                    "measures": measures,
158                }
159
160                if looker_name in SUGGESTS_FOR_LABELED_COUNTERS:
161                    join_view["dimensions"].append(
162                        {
163                            "name": "label",
164                            "type": "string",
165                            "sql": "${TABLE}.key",
166                            "suggest_explore": suggest_name,
167                            "suggest_dimension": f"{suggest_name}.key",
168                            "hidden": metric_hidden,
169                        },
170                    )
171
172                    suggest_view = {
173                        "name": suggest_name,
174                        "derived_table": {
175                            "sql": dedent(
176                                f"""
177                                select
178                                    m.key,
179                                    count(*) as n
180                                from {table} as t,
181                                unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m
182                                where date(submission_timestamp) > date_sub(current_date, interval 30 day)
183                                    and sample_id = 0
184                                group by key
185                                order by n desc
186                                """
187                            )
188                        },
189                        "dimensions": [
190                            {"name": "key", "type": "string", "sql": "${TABLE}.key"}
191                        ],
192                    }
193                    view_definitions += [join_view, suggest_view]
194                else:
195                    join_view["dimensions"].append(
196                        {
197                            "name": "label",
198                            "type": "string",
199                            "sql": "${TABLE}.key",
200                            "hidden": metric_hidden,
201                        },
202                    )
203                    view_definitions += [join_view]
204
205        # deduplicate view definitions, because somehow a few entries make it in
206        # twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure
207        view_definitions = sorted(
208            {v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"]  # type: ignore
209        )
210
211        [project, dataset, table] = table.split(".")
212        table_schema = dryrun.create(
213            project=project,
214            dataset=dataset,
215            table=table,
216        ).get_table_schema()
217        nested_views = lookml_utils._generate_nested_dimension_views(
218            table_schema, self.name
219        )
220
221        lookml["views"] += view_definitions + nested_views
222
223        return lookml

Generate LookML for this view.

The Glean views include a labeled metrics, which need to be joined against the view in the explore.

def get_dimensions(self, table, v1_name: Optional[str], dryrun) -> List[Dict[str, Any]]:
412    def get_dimensions(
413        self, table, v1_name: Optional[str], dryrun
414    ) -> List[Dict[str, Any]]:
415        """Get the set of dimensions for this view."""
416        all_fields = super().get_dimensions(table, v1_name, dryrun=dryrun)
417        fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [
418            self._add_link(d)
419            for d in all_fields
420            if not d["name"].startswith("metrics__")
421        ]
422        # later entries will override earlier entries, if there are duplicates
423        field_dict = {f["name"]: f for f in fields}
424        return list(field_dict.values())

Get the set of dimensions for this view.

def get_measures( self, dimensions: List[dict], table: str, v1_name: Optional[str]) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
426    def get_measures(
427        self, dimensions: List[dict], table: str, v1_name: Optional[str]
428    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
429        """Generate measures from a list of dimensions.
430
431        When no dimension-specific measures are found, return a single "count" measure.
432
433        Raise ClickException if dimensions result in duplicate measures.
434        """
435        measures = super().get_measures(dimensions, table, v1_name)
436        client_id_field = self.get_client_id(dimensions, table)
437
438        for dimension in dimensions:
439            if (
440                self._is_metric(dimension)
441                and self._get_metric_type(dimension) == "counter"
442            ):
443                # handle the counters in the metric ping
444                name = self._get_name(dimension)
445                dimension_name = dimension["name"]
446                measures += [
447                    {
448                        "name": name,
449                        "type": "sum",
450                        "sql": f"${{{dimension_name}}}",
451                        "links": self._get_links(dimension),
452                    },
453                ]
454
455                if client_id_field is not None:
456                    measures += [
457                        {
458                            "name": f"{name}_client_count",
459                            "type": "count_distinct",
460                            "filters": [{dimension_name: ">0"}],
461                            "sql": f"${{{client_id_field}}}",
462                            "links": self._get_links(dimension),
463                        },
464                    ]
465
466        # check if there are any duplicate values
467        names = [measure["name"] for measure in measures]
468        duplicates = [k for k, v in Counter(names).items() if v > 1]
469        if duplicates:
470            raise click.ClickException(
471                f"duplicate measures {duplicates!r} for table {table!r}"
472            )
473
474        return measures

Generate measures from a list of dimensions.

When no dimension-specific measures are found, return a single "count" measure.

Raise ClickException if dimensions result in duplicate measures.