generator.views.metric_definitions_view

Class to describe a view with metrics from metric-hub.

  1"""Class to describe a view with metrics from metric-hub."""
  2
  3from __future__ import annotations
  4
  5import re
  6from typing import Any, Dict, Iterator, List, Optional, Union
  7
  8from generator.metrics_utils import MetricsConfigLoader
  9
 10from . import lookml_utils
 11from .view import View, ViewDict
 12
 13
 14class MetricDefinitionsView(View):
 15    """A view for metric-hub metrics that come from the same data source."""
 16
 17    type: str = "metric_definitions_view"
 18
 19    def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]):
 20        """Get an instance of an MetricDefinitionsView."""
 21        super().__init__(namespace, name, MetricDefinitionsView.type, tables)
 22
 23    @classmethod
 24    def from_db_views(
 25        klass,
 26        namespace: str,
 27        is_glean: bool,
 28        channels: List[Dict[str, str]],
 29        db_views: dict,
 30    ) -> Iterator[MetricDefinitionsView]:
 31        """Get Metric Definition Views from db views and app variants."""
 32        return iter(())
 33
 34    @classmethod
 35    def from_dict(
 36        klass, namespace: str, name: str, definition: ViewDict
 37    ) -> MetricDefinitionsView:
 38        """Get a MetricDefinitionsView from a dict representation."""
 39        return klass(namespace, name, definition.get("tables", []))
 40
 41    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 42        """Get this view as LookML."""
 43        namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
 44            self.namespace
 45        )
 46        if namespace_definitions is None:
 47            return {}
 48
 49        # get all metric definitions that depend on the data source represented by this view
 50        data_source_name = re.sub("^metric_definitions_", "", self.name)
 51        data_source_definition = MetricsConfigLoader.configs.get_data_source_definition(
 52            data_source_name, self.namespace
 53        )
 54
 55        if data_source_definition is None:
 56            return {}
 57
 58        # todo: hide deprecated metrics?
 59        metric_definitions = [
 60            f"""{
 61                MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render()
 62            } AS {metric_slug},\n"""
 63            for metric_slug, metric in namespace_definitions.metrics.definitions.items()
 64            if metric.select_expression
 65            and metric.data_source.name == data_source_name
 66            and metric.type != "histogram"
 67        ]
 68
 69        if metric_definitions == []:
 70            return {}
 71
 72        # Metric definitions are intended to aggregated by client per date.
 73        # A derived table is needed to do these aggregations, instead of defining them as measures
 74        # we want to have them available as dimensions (which don't allow aggregations in their definitions)
 75        # to allow for custom measures to be later defined in Looker that aggregate these per client metrics.
 76        view_defn: Dict[str, Any] = {"name": self.name}
 77
 78        ignore_base_fields = [
 79            "client_id",
 80            "submission_date",
 81            "submission",
 82            "first_run",
 83        ] + [
 84            metric_slug
 85            for metric_slug, metric in namespace_definitions.metrics.definitions.items()
 86            if metric.select_expression
 87            and metric.data_source.name == data_source_name
 88            and metric.type != "histogram"
 89        ]
 90
 91        base_view_dimensions = {}
 92        joined_data_sources = []
 93
 94        # check if the metric data source has joins
 95        # joined data sources are generally used for creating the "Base Fields"
 96        if data_source_definition.joins:
 97            # determine the dimensions selected by the joined data sources
 98            for joined_data_source_slug, join in data_source_definition.joins.items():
 99                joined_data_source = (
100                    MetricsConfigLoader.configs.get_data_source_definition(
101                        joined_data_source_slug, self.namespace
102                    )
103                )
104
105                if joined_data_source.columns_as_dimensions:
106                    joined_data_sources.append(joined_data_source)
107
108                    date_filter = None
109                    if joined_data_source.submission_date_column != "NULL":
110                        date_filter = (
111                            None
112                            if joined_data_source.submission_date_column is None
113                            or joined_data_source.submission_date_column == "NULL"
114                            else f"{joined_data_source.submission_date_column} = '2023-01-01'"
115                        )
116
117                    # create Looker dimensions by doing a dryrun
118                    query = MetricsConfigLoader.configs.get_data_source_sql(
119                        joined_data_source_slug,
120                        self.namespace,
121                        where=date_filter,
122                    ).format(dataset=self.namespace)
123
124                    base_view_dimensions[joined_data_source_slug] = (
125                        lookml_utils._generate_dimensions_from_query(
126                            query, dryrun=dryrun
127                        )
128                    )
129
130        if (
131            data_source_definition.client_id_column == "NULL"
132            or data_source_definition.columns_as_dimensions
133        ):
134            # if the metrics data source doesn't have any joins then use the dimensions
135            # of the data source itself as base fields
136            date_filter = None
137            if data_source_definition.submission_date_column != "NULL":
138                date_filter = (
139                    "submission_date = '2023-01-01'"
140                    if data_source_definition.submission_date_column is None
141                    else f"{data_source_definition.submission_date_column} = '2023-01-01'"
142                )
143
144            query = MetricsConfigLoader.configs.get_data_source_sql(
145                data_source_definition.name,
146                self.namespace,
147                where=date_filter,
148                ignore_joins=True,
149            ).format(dataset=self.namespace)
150
151            base_view_dimensions[data_source_definition.name] = (
152                lookml_utils._generate_dimensions_from_query(query, dryrun)
153            )
154
155        # prepare base field data for query
156        base_view_fields = [
157            {
158                "name": f"{data_source}_{dimension['name']}",
159                "select_sql": f"{data_source}_{dimension['name']},\n",
160                "sql": f"{data_source}.{dimension['name'].replace('__', '.')} AS {data_source}_{dimension['name']},\n",
161            }
162            for data_source, dimensions in base_view_dimensions.items()
163            for dimension in dimensions
164            if dimension["name"] not in ignore_base_fields
165            and "hidden" not in dimension
166            and dimension["type"] != "time"
167        ]
168
169        client_id_field = (
170            "NULL"
171            if data_source_definition.client_id_column == "NULL"
172            else f'{data_source_definition.client_id_column or "client_id"}'
173        )
174
175        # filters for date ranges
176        where_sql = " AND ".join(
177            [
178                f"""
179                    {data_source.name}.{data_source.submission_date_column or "submission_date"}
180                    BETWEEN
181                    COALESCE(
182                        SAFE_CAST(
183                            {{% date_start submission_date %}} AS DATE
184                        ), CURRENT_DATE()) AND
185                    COALESCE(
186                        SAFE_CAST(
187                            {{% date_end submission_date %}} AS DATE
188                        ), CURRENT_DATE())
189                """
190                for data_source in [data_source_definition] + joined_data_sources
191                if data_source.submission_date_column != "NULL"
192            ]
193        )
194
195        # filte on sample_id if such a field exists
196        for field in base_view_fields:
197            if field["name"].endswith("_sample_id"):
198                where_sql += f"""
199                    AND
200                        {field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}}
201                """
202                break
203
204        view_defn["derived_table"] = {
205            "sql": f"""
206            SELECT
207                {"".join(metric_definitions)}
208                {"".join([field['select_sql'] for field in base_view_fields])}
209                {client_id_field} AS client_id,
210                {{% if aggregate_metrics_by._parameter_value == 'day' %}}
211                {data_source_definition.submission_date_column or "submission_date"} AS analysis_basis
212                {{% elsif aggregate_metrics_by._parameter_value == 'week'  %}}
213                (FORMAT_DATE(
214                    '%F',
215                    DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
216                    WEEK(MONDAY)))
217                ) AS analysis_basis
218                {{% elsif aggregate_metrics_by._parameter_value == 'month'  %}}
219                (FORMAT_DATE(
220                    '%Y-%m',
221                    {data_source_definition.submission_date_column or "submission_date"})
222                ) AS analysis_basis
223                {{% elsif aggregate_metrics_by._parameter_value == 'quarter'  %}}
224                (FORMAT_DATE(
225                    '%Y-%m',
226                    DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
227                    QUARTER))
228                ) AS analysis_basis
229                {{% elsif aggregate_metrics_by._parameter_value == 'year'  %}}
230                (EXTRACT(
231                    YEAR FROM {data_source_definition.submission_date_column or "submission_date"})
232                ) AS analysis_basis
233                {{% else %}}
234                NULL as analysis_basis
235                {{% endif %}}
236            FROM
237                (
238                    SELECT
239                        {data_source_name}.*,
240                        {"".join([field['sql'] for field in base_view_fields])}
241                    FROM
242                    {
243                        MetricsConfigLoader.configs.get_data_source_sql(
244                            data_source_name,
245                            self.namespace,
246                            select_fields=False
247                        ).format(dataset=self.namespace)
248                    }
249                    WHERE {where_sql}
250                )
251            GROUP BY
252                {"".join([field['select_sql'] for field in base_view_fields])}
253                client_id,
254                analysis_basis
255            """
256        }
257
258        view_defn["dimensions"] = self.get_dimensions()
259        view_defn["dimension_groups"] = self.get_dimension_groups()
260
261        # add the Looker dimensions
262        for data_source, dimensions in base_view_dimensions.items():
263            for dimension in dimensions:
264                if (
265                    dimension["name"] not in ignore_base_fields
266                    and dimension.get("type", "") != "time"
267                ):
268                    dimension["sql"] = (
269                        "${TABLE}." + f"{data_source}_{dimension['name']}"
270                    )
271                    dimension["group_label"] = "Base Fields"
272                    if not lookml_utils._is_dimension_group(dimension):
273                        view_defn["dimensions"].append(dimension)
274                    else:
275                        view_defn["dimension_groups"].append(dimension)
276                    # avoid duplicate dimensions
277                    ignore_base_fields.append(dimension["name"])
278
279        view_defn["measures"] = self.get_measures(
280            view_defn["dimensions"],
281        )
282        view_defn["sets"] = self._get_sets()
283        view_defn["parameters"] = self._get_parameters(view_defn["dimensions"])
284
285        return {"views": [view_defn]}
286
287    def get_dimensions(
288        self,
289        _table=None,
290        _v1_name: Optional[str] = None,
291        _dryrun=None,
292    ) -> List[Dict[str, Any]]:
293        """Get the set of dimensions for this view based on the metric definitions in metric-hub."""
294        namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
295            self.namespace
296        )
297        metric_definitions = namespace_definitions.metrics.definitions
298        data_source_name = re.sub("^metric_definitions_", "", self.name)
299
300        return [
301            {
302                "name": "client_id",
303                "type": "string",
304                "sql": "SAFE_CAST(${TABLE}.client_id AS STRING)",
305                "label": "Client ID",
306                "primary_key": "yes",
307                "group_label": "Base Fields",
308                "description": "Unique client identifier",
309            },
310        ] + [  # add a dimension for each metric definition
311            {
312                "name": metric_slug,
313                "group_label": "Metrics",
314                "label": metric.friendly_name
315                or lookml_utils.slug_to_title(metric_slug),
316                "description": metric.description or "",
317                "type": "number",
318                "sql": "${TABLE}." + metric_slug,
319            }
320            for metric_slug, metric in metric_definitions.items()
321            if metric.select_expression
322            and metric.data_source.name == data_source_name
323            and metric.type != "histogram"
324        ]
325
326    def get_dimension_groups(self) -> List[Dict[str, Any]]:
327        """Get dimension groups for this view."""
328        return [
329            {
330                "name": "submission",
331                "type": "time",
332                "group_label": "Base Fields",
333                "sql": "CAST(${TABLE}.analysis_basis AS TIMESTAMP)",
334                "label": "Submission",
335                "timeframes": [
336                    "raw",
337                    "date",
338                    "week",
339                    "month",
340                    "quarter",
341                    "year",
342                ],
343            }
344        ]
345
346    def _get_sets(self) -> List[Dict[str, Any]]:
347        """Generate metric sets."""
348        # group all the metric dimensions into a set
349        dimensions = self.get_dimensions()
350        measures = self.get_measures(dimensions)
351
352        return [
353            {
354                "name": "metrics",
355                "fields": [
356                    dimension["name"]
357                    for dimension in dimensions
358                    if dimension["name"] != "client_id"
359                ]
360                + [measure["name"] for measure in measures],
361            }
362        ]
363
364    def _get_parameters(self, dimensions: List[dict]):
365        hide_sampling = "yes"
366
367        for dim in dimensions:
368            if dim["name"] == "sample_id":
369                hide_sampling = "no"
370                break
371
372        return [
373            {
374                "name": "aggregate_metrics_by",
375                "label": "Aggregate Client Metrics Per",
376                "type": "unquoted",
377                "default_value": "day",
378                "allowed_values": [
379                    {"label": "Per Day", "value": "day"},
380                    {"label": "Per Week", "value": "week"},
381                    {"label": "Per Month", "value": "month"},
382                    {"label": "Per Quarter", "value": "quarter"},
383                    {"label": "Per Year", "value": "year"},
384                    {"label": "Overall", "value": "overall"},
385                ],
386            },
387            {
388                "name": "sampling",
389                "label": "Sample of source data in %",
390                "type": "unquoted",
391                "default_value": "100",
392                "hidden": hide_sampling,
393            },
394        ]
395
396    def get_measures(
397        self, dimensions: List[dict]
398    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
399        """Get statistics as measures."""
400        measures = []
401        sampling = "1"
402
403        for dim in dimensions:
404            if dim["name"] == "sample_id":
405                sampling = "100 / {% parameter sampling %}"
406                break
407
408        for dimension in dimensions:
409            metric = MetricsConfigLoader.configs.get_metric_definition(
410                dimension["name"], self.namespace
411            )
412            if metric and metric.statistics:
413                for statistic_slug, statistic_conf in metric.statistics.items():
414                    dimension_label = dimension.get("label") or dimension.get("name")
415                    if statistic_slug in [
416                        "average",
417                        "max",
418                        "min",
419                        "median",
420                    ]:
421                        measures.append(
422                            {
423                                "name": f"{dimension['name']}_{statistic_slug}",
424                                "type": statistic_slug,
425                                "sql": "${TABLE}." + dimension["name"],
426                                "label": f"{dimension_label} {statistic_slug.title()}",
427                                "group_label": "Statistics",
428                                "description": f"{statistic_slug.title()} of {dimension_label}",
429                            }
430                        )
431                    elif statistic_slug == "sum":
432                        measures.append(
433                            {
434                                "name": f"{dimension['name']}_{statistic_slug}",
435                                "type": "sum",
436                                "sql": "${TABLE}." + dimension["name"] + "*" + sampling,
437                                "label": f"{dimension_label} Sum",
438                                "group_label": "Statistics",
439                                "description": f"Sum of {dimension_label}",
440                            }
441                        )
442                    elif statistic_slug == "client_count":
443                        measures.append(
444                            {
445                                "name": (
446                                    f"{dimension['name']}_{statistic_slug}_sampled"
447                                    if sampling
448                                    else f"{dimension['name']}_{statistic_slug}"
449                                ),
450                                "type": "count_distinct",
451                                "label": f"{dimension_label} Client Count",
452                                "group_label": "Statistics",
453                                "sql": "IF(${TABLE}."
454                                + f"{dimension['name']} > 0, "
455                                + "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))",
456                                "description": f"Number of clients with {dimension_label}",
457                                "hidden": "yes" if sampling else "no",
458                            }
459                        )
460
461                        if sampling:
462                            measures.append(
463                                {
464                                    "name": f"{dimension['name']}_{statistic_slug}",
465                                    "type": "number",
466                                    "label": f"{dimension_label} Client Count",
467                                    "group_label": "Statistics",
468                                    "sql": "${"
469                                    + f"{dimension['name']}_{statistic_slug}_sampled"
470                                    + "} *"
471                                    + sampling,
472                                    "description": f"Number of clients with {dimension_label}",
473                                }
474                            )
475                    elif statistic_slug == "dau_proportion":
476                        if "numerator" in statistic_conf:
477                            [numerator, numerator_stat] = statistic_conf[
478                                "numerator"
479                            ].split(".")
480                            measures.append(
481                                {
482                                    "name": "DAU_sampled" if sampling else "DAU",
483                                    "type": "count_distinct",
484                                    "label": "DAU",
485                                    "group_label": "Statistics",
486                                    "sql": "${TABLE}.client_id",
487                                    "hidden": "yes",
488                                }
489                            )
490
491                            if sampling:
492                                measures.append(
493                                    {
494                                        "name": "DAU",
495                                        "type": "number",
496                                        "label": "DAU",
497                                        "group_label": "Statistics",
498                                        "sql": "${DAU_sampled} *" + sampling,
499                                        "hidden": "yes",
500                                    }
501                                )
502
503                            measures.append(
504                                {
505                                    "name": f"{dimension['name']}_{statistic_slug}",
506                                    "type": "number",
507                                    "label": f"{dimension_label} DAU Proportion",
508                                    "sql": "SAFE_DIVIDE(${"
509                                    + f"{numerator}_{numerator_stat}"
510                                    + "}, ${DAU})",
511                                    "group_label": "Statistics",
512                                    "description": f"Proportion of daily active users with {dimension['name']}",
513                                }
514                            )
515                    elif statistic_slug == "ratio":
516                        if (
517                            "numerator" in statistic_conf
518                            and "denominator" in statistic_conf
519                        ):
520                            [numerator, numerator_stat] = statistic_conf[
521                                "numerator"
522                            ].split(".")
523                            [denominator, denominator_stat] = statistic_conf[
524                                "denominator"
525                            ].split(".")
526
527                            measures.append(
528                                {
529                                    "name": f"{dimension['name']}_{statistic_slug}",
530                                    "type": "number",
531                                    "label": f"{dimension_label} Ratio",
532                                    "sql": "SAFE_DIVIDE(${"
533                                    + f"{numerator}_{numerator_stat}"
534                                    + "}, ${"
535                                    + f"{denominator}_{denominator_stat}"
536                                    + "})",
537                                    "group_label": "Statistics",
538                                    "description": f""""
539                                        Ratio between {statistic_conf['numerator']} and
540                                        {statistic_conf['denominator']}""",
541                                }
542                            )
543                    elif statistic_slug == "rolling_average":
544                        aggregation = statistic_conf.get("aggregation", "sum")
545                        if "window_sizes" in statistic_conf:
546                            for window_size in statistic_conf["window_sizes"]:
547                                measures.append(
548                                    {
549                                        "name": f"{dimension['name']}_{window_size}_day_{statistic_slug}",
550                                        "type": "number",
551                                        "label": f"{dimension_label} {window_size} Day Rolling Average",
552                                        "sql": f"""
553                                            AVG({aggregation}(${{TABLE}}.{dimension["name"]} * {sampling})) OVER (
554                                                ROWS {window_size} PRECEDING
555                                        )""",
556                                        "group_label": "Statistics",
557                                        "description": f"{window_size} day rolling average of {dimension_label}",
558                                    }
559                                )
560
561        return measures
class MetricDefinitionsView(generator.views.view.View):
 15class MetricDefinitionsView(View):
 16    """A view for metric-hub metrics that come from the same data source."""
 17
 18    type: str = "metric_definitions_view"
 19
 20    def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]):
 21        """Get an instance of an MetricDefinitionsView."""
 22        super().__init__(namespace, name, MetricDefinitionsView.type, tables)
 23
 24    @classmethod
 25    def from_db_views(
 26        klass,
 27        namespace: str,
 28        is_glean: bool,
 29        channels: List[Dict[str, str]],
 30        db_views: dict,
 31    ) -> Iterator[MetricDefinitionsView]:
 32        """Get Metric Definition Views from db views and app variants."""
 33        return iter(())
 34
 35    @classmethod
 36    def from_dict(
 37        klass, namespace: str, name: str, definition: ViewDict
 38    ) -> MetricDefinitionsView:
 39        """Get a MetricDefinitionsView from a dict representation."""
 40        return klass(namespace, name, definition.get("tables", []))
 41
 42    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 43        """Get this view as LookML."""
 44        namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
 45            self.namespace
 46        )
 47        if namespace_definitions is None:
 48            return {}
 49
 50        # get all metric definitions that depend on the data source represented by this view
 51        data_source_name = re.sub("^metric_definitions_", "", self.name)
 52        data_source_definition = MetricsConfigLoader.configs.get_data_source_definition(
 53            data_source_name, self.namespace
 54        )
 55
 56        if data_source_definition is None:
 57            return {}
 58
 59        # todo: hide deprecated metrics?
 60        metric_definitions = [
 61            f"""{
 62                MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render()
 63            } AS {metric_slug},\n"""
 64            for metric_slug, metric in namespace_definitions.metrics.definitions.items()
 65            if metric.select_expression
 66            and metric.data_source.name == data_source_name
 67            and metric.type != "histogram"
 68        ]
 69
 70        if metric_definitions == []:
 71            return {}
 72
 73        # Metric definitions are intended to aggregated by client per date.
 74        # A derived table is needed to do these aggregations, instead of defining them as measures
 75        # we want to have them available as dimensions (which don't allow aggregations in their definitions)
 76        # to allow for custom measures to be later defined in Looker that aggregate these per client metrics.
 77        view_defn: Dict[str, Any] = {"name": self.name}
 78
 79        ignore_base_fields = [
 80            "client_id",
 81            "submission_date",
 82            "submission",
 83            "first_run",
 84        ] + [
 85            metric_slug
 86            for metric_slug, metric in namespace_definitions.metrics.definitions.items()
 87            if metric.select_expression
 88            and metric.data_source.name == data_source_name
 89            and metric.type != "histogram"
 90        ]
 91
 92        base_view_dimensions = {}
 93        joined_data_sources = []
 94
 95        # check if the metric data source has joins
 96        # joined data sources are generally used for creating the "Base Fields"
 97        if data_source_definition.joins:
 98            # determine the dimensions selected by the joined data sources
 99            for joined_data_source_slug, join in data_source_definition.joins.items():
100                joined_data_source = (
101                    MetricsConfigLoader.configs.get_data_source_definition(
102                        joined_data_source_slug, self.namespace
103                    )
104                )
105
106                if joined_data_source.columns_as_dimensions:
107                    joined_data_sources.append(joined_data_source)
108
109                    date_filter = None
110                    if joined_data_source.submission_date_column != "NULL":
111                        date_filter = (
112                            None
113                            if joined_data_source.submission_date_column is None
114                            or joined_data_source.submission_date_column == "NULL"
115                            else f"{joined_data_source.submission_date_column} = '2023-01-01'"
116                        )
117
118                    # create Looker dimensions by doing a dryrun
119                    query = MetricsConfigLoader.configs.get_data_source_sql(
120                        joined_data_source_slug,
121                        self.namespace,
122                        where=date_filter,
123                    ).format(dataset=self.namespace)
124
125                    base_view_dimensions[joined_data_source_slug] = (
126                        lookml_utils._generate_dimensions_from_query(
127                            query, dryrun=dryrun
128                        )
129                    )
130
131        if (
132            data_source_definition.client_id_column == "NULL"
133            or data_source_definition.columns_as_dimensions
134        ):
135            # if the metrics data source doesn't have any joins then use the dimensions
136            # of the data source itself as base fields
137            date_filter = None
138            if data_source_definition.submission_date_column != "NULL":
139                date_filter = (
140                    "submission_date = '2023-01-01'"
141                    if data_source_definition.submission_date_column is None
142                    else f"{data_source_definition.submission_date_column} = '2023-01-01'"
143                )
144
145            query = MetricsConfigLoader.configs.get_data_source_sql(
146                data_source_definition.name,
147                self.namespace,
148                where=date_filter,
149                ignore_joins=True,
150            ).format(dataset=self.namespace)
151
152            base_view_dimensions[data_source_definition.name] = (
153                lookml_utils._generate_dimensions_from_query(query, dryrun)
154            )
155
156        # prepare base field data for query
157        base_view_fields = [
158            {
159                "name": f"{data_source}_{dimension['name']}",
160                "select_sql": f"{data_source}_{dimension['name']},\n",
161                "sql": f"{data_source}.{dimension['name'].replace('__', '.')} AS {data_source}_{dimension['name']},\n",
162            }
163            for data_source, dimensions in base_view_dimensions.items()
164            for dimension in dimensions
165            if dimension["name"] not in ignore_base_fields
166            and "hidden" not in dimension
167            and dimension["type"] != "time"
168        ]
169
170        client_id_field = (
171            "NULL"
172            if data_source_definition.client_id_column == "NULL"
173            else f'{data_source_definition.client_id_column or "client_id"}'
174        )
175
176        # filters for date ranges
177        where_sql = " AND ".join(
178            [
179                f"""
180                    {data_source.name}.{data_source.submission_date_column or "submission_date"}
181                    BETWEEN
182                    COALESCE(
183                        SAFE_CAST(
184                            {{% date_start submission_date %}} AS DATE
185                        ), CURRENT_DATE()) AND
186                    COALESCE(
187                        SAFE_CAST(
188                            {{% date_end submission_date %}} AS DATE
189                        ), CURRENT_DATE())
190                """
191                for data_source in [data_source_definition] + joined_data_sources
192                if data_source.submission_date_column != "NULL"
193            ]
194        )
195
196        # filte on sample_id if such a field exists
197        for field in base_view_fields:
198            if field["name"].endswith("_sample_id"):
199                where_sql += f"""
200                    AND
201                        {field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}}
202                """
203                break
204
205        view_defn["derived_table"] = {
206            "sql": f"""
207            SELECT
208                {"".join(metric_definitions)}
209                {"".join([field['select_sql'] for field in base_view_fields])}
210                {client_id_field} AS client_id,
211                {{% if aggregate_metrics_by._parameter_value == 'day' %}}
212                {data_source_definition.submission_date_column or "submission_date"} AS analysis_basis
213                {{% elsif aggregate_metrics_by._parameter_value == 'week'  %}}
214                (FORMAT_DATE(
215                    '%F',
216                    DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
217                    WEEK(MONDAY)))
218                ) AS analysis_basis
219                {{% elsif aggregate_metrics_by._parameter_value == 'month'  %}}
220                (FORMAT_DATE(
221                    '%Y-%m',
222                    {data_source_definition.submission_date_column or "submission_date"})
223                ) AS analysis_basis
224                {{% elsif aggregate_metrics_by._parameter_value == 'quarter'  %}}
225                (FORMAT_DATE(
226                    '%Y-%m',
227                    DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
228                    QUARTER))
229                ) AS analysis_basis
230                {{% elsif aggregate_metrics_by._parameter_value == 'year'  %}}
231                (EXTRACT(
232                    YEAR FROM {data_source_definition.submission_date_column or "submission_date"})
233                ) AS analysis_basis
234                {{% else %}}
235                NULL as analysis_basis
236                {{% endif %}}
237            FROM
238                (
239                    SELECT
240                        {data_source_name}.*,
241                        {"".join([field['sql'] for field in base_view_fields])}
242                    FROM
243                    {
244                        MetricsConfigLoader.configs.get_data_source_sql(
245                            data_source_name,
246                            self.namespace,
247                            select_fields=False
248                        ).format(dataset=self.namespace)
249                    }
250                    WHERE {where_sql}
251                )
252            GROUP BY
253                {"".join([field['select_sql'] for field in base_view_fields])}
254                client_id,
255                analysis_basis
256            """
257        }
258
259        view_defn["dimensions"] = self.get_dimensions()
260        view_defn["dimension_groups"] = self.get_dimension_groups()
261
262        # add the Looker dimensions
263        for data_source, dimensions in base_view_dimensions.items():
264            for dimension in dimensions:
265                if (
266                    dimension["name"] not in ignore_base_fields
267                    and dimension.get("type", "") != "time"
268                ):
269                    dimension["sql"] = (
270                        "${TABLE}." + f"{data_source}_{dimension['name']}"
271                    )
272                    dimension["group_label"] = "Base Fields"
273                    if not lookml_utils._is_dimension_group(dimension):
274                        view_defn["dimensions"].append(dimension)
275                    else:
276                        view_defn["dimension_groups"].append(dimension)
277                    # avoid duplicate dimensions
278                    ignore_base_fields.append(dimension["name"])
279
280        view_defn["measures"] = self.get_measures(
281            view_defn["dimensions"],
282        )
283        view_defn["sets"] = self._get_sets()
284        view_defn["parameters"] = self._get_parameters(view_defn["dimensions"])
285
286        return {"views": [view_defn]}
287
288    def get_dimensions(
289        self,
290        _table=None,
291        _v1_name: Optional[str] = None,
292        _dryrun=None,
293    ) -> List[Dict[str, Any]]:
294        """Get the set of dimensions for this view based on the metric definitions in metric-hub."""
295        namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
296            self.namespace
297        )
298        metric_definitions = namespace_definitions.metrics.definitions
299        data_source_name = re.sub("^metric_definitions_", "", self.name)
300
301        return [
302            {
303                "name": "client_id",
304                "type": "string",
305                "sql": "SAFE_CAST(${TABLE}.client_id AS STRING)",
306                "label": "Client ID",
307                "primary_key": "yes",
308                "group_label": "Base Fields",
309                "description": "Unique client identifier",
310            },
311        ] + [  # add a dimension for each metric definition
312            {
313                "name": metric_slug,
314                "group_label": "Metrics",
315                "label": metric.friendly_name
316                or lookml_utils.slug_to_title(metric_slug),
317                "description": metric.description or "",
318                "type": "number",
319                "sql": "${TABLE}." + metric_slug,
320            }
321            for metric_slug, metric in metric_definitions.items()
322            if metric.select_expression
323            and metric.data_source.name == data_source_name
324            and metric.type != "histogram"
325        ]
326
327    def get_dimension_groups(self) -> List[Dict[str, Any]]:
328        """Get dimension groups for this view."""
329        return [
330            {
331                "name": "submission",
332                "type": "time",
333                "group_label": "Base Fields",
334                "sql": "CAST(${TABLE}.analysis_basis AS TIMESTAMP)",
335                "label": "Submission",
336                "timeframes": [
337                    "raw",
338                    "date",
339                    "week",
340                    "month",
341                    "quarter",
342                    "year",
343                ],
344            }
345        ]
346
347    def _get_sets(self) -> List[Dict[str, Any]]:
348        """Generate metric sets."""
349        # group all the metric dimensions into a set
350        dimensions = self.get_dimensions()
351        measures = self.get_measures(dimensions)
352
353        return [
354            {
355                "name": "metrics",
356                "fields": [
357                    dimension["name"]
358                    for dimension in dimensions
359                    if dimension["name"] != "client_id"
360                ]
361                + [measure["name"] for measure in measures],
362            }
363        ]
364
365    def _get_parameters(self, dimensions: List[dict]):
366        hide_sampling = "yes"
367
368        for dim in dimensions:
369            if dim["name"] == "sample_id":
370                hide_sampling = "no"
371                break
372
373        return [
374            {
375                "name": "aggregate_metrics_by",
376                "label": "Aggregate Client Metrics Per",
377                "type": "unquoted",
378                "default_value": "day",
379                "allowed_values": [
380                    {"label": "Per Day", "value": "day"},
381                    {"label": "Per Week", "value": "week"},
382                    {"label": "Per Month", "value": "month"},
383                    {"label": "Per Quarter", "value": "quarter"},
384                    {"label": "Per Year", "value": "year"},
385                    {"label": "Overall", "value": "overall"},
386                ],
387            },
388            {
389                "name": "sampling",
390                "label": "Sample of source data in %",
391                "type": "unquoted",
392                "default_value": "100",
393                "hidden": hide_sampling,
394            },
395        ]
396
397    def get_measures(
398        self, dimensions: List[dict]
399    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
400        """Get statistics as measures."""
401        measures = []
402        sampling = "1"
403
404        for dim in dimensions:
405            if dim["name"] == "sample_id":
406                sampling = "100 / {% parameter sampling %}"
407                break
408
409        for dimension in dimensions:
410            metric = MetricsConfigLoader.configs.get_metric_definition(
411                dimension["name"], self.namespace
412            )
413            if metric and metric.statistics:
414                for statistic_slug, statistic_conf in metric.statistics.items():
415                    dimension_label = dimension.get("label") or dimension.get("name")
416                    if statistic_slug in [
417                        "average",
418                        "max",
419                        "min",
420                        "median",
421                    ]:
422                        measures.append(
423                            {
424                                "name": f"{dimension['name']}_{statistic_slug}",
425                                "type": statistic_slug,
426                                "sql": "${TABLE}." + dimension["name"],
427                                "label": f"{dimension_label} {statistic_slug.title()}",
428                                "group_label": "Statistics",
429                                "description": f"{statistic_slug.title()} of {dimension_label}",
430                            }
431                        )
432                    elif statistic_slug == "sum":
433                        measures.append(
434                            {
435                                "name": f"{dimension['name']}_{statistic_slug}",
436                                "type": "sum",
437                                "sql": "${TABLE}." + dimension["name"] + "*" + sampling,
438                                "label": f"{dimension_label} Sum",
439                                "group_label": "Statistics",
440                                "description": f"Sum of {dimension_label}",
441                            }
442                        )
443                    elif statistic_slug == "client_count":
444                        measures.append(
445                            {
446                                "name": (
447                                    f"{dimension['name']}_{statistic_slug}_sampled"
448                                    if sampling
449                                    else f"{dimension['name']}_{statistic_slug}"
450                                ),
451                                "type": "count_distinct",
452                                "label": f"{dimension_label} Client Count",
453                                "group_label": "Statistics",
454                                "sql": "IF(${TABLE}."
455                                + f"{dimension['name']} > 0, "
456                                + "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))",
457                                "description": f"Number of clients with {dimension_label}",
458                                "hidden": "yes" if sampling else "no",
459                            }
460                        )
461
462                        if sampling:
463                            measures.append(
464                                {
465                                    "name": f"{dimension['name']}_{statistic_slug}",
466                                    "type": "number",
467                                    "label": f"{dimension_label} Client Count",
468                                    "group_label": "Statistics",
469                                    "sql": "${"
470                                    + f"{dimension['name']}_{statistic_slug}_sampled"
471                                    + "} *"
472                                    + sampling,
473                                    "description": f"Number of clients with {dimension_label}",
474                                }
475                            )
476                    elif statistic_slug == "dau_proportion":
477                        if "numerator" in statistic_conf:
478                            [numerator, numerator_stat] = statistic_conf[
479                                "numerator"
480                            ].split(".")
481                            measures.append(
482                                {
483                                    "name": "DAU_sampled" if sampling else "DAU",
484                                    "type": "count_distinct",
485                                    "label": "DAU",
486                                    "group_label": "Statistics",
487                                    "sql": "${TABLE}.client_id",
488                                    "hidden": "yes",
489                                }
490                            )
491
492                            if sampling:
493                                measures.append(
494                                    {
495                                        "name": "DAU",
496                                        "type": "number",
497                                        "label": "DAU",
498                                        "group_label": "Statistics",
499                                        "sql": "${DAU_sampled} *" + sampling,
500                                        "hidden": "yes",
501                                    }
502                                )
503
504                            measures.append(
505                                {
506                                    "name": f"{dimension['name']}_{statistic_slug}",
507                                    "type": "number",
508                                    "label": f"{dimension_label} DAU Proportion",
509                                    "sql": "SAFE_DIVIDE(${"
510                                    + f"{numerator}_{numerator_stat}"
511                                    + "}, ${DAU})",
512                                    "group_label": "Statistics",
513                                    "description": f"Proportion of daily active users with {dimension['name']}",
514                                }
515                            )
516                    elif statistic_slug == "ratio":
517                        if (
518                            "numerator" in statistic_conf
519                            and "denominator" in statistic_conf
520                        ):
521                            [numerator, numerator_stat] = statistic_conf[
522                                "numerator"
523                            ].split(".")
524                            [denominator, denominator_stat] = statistic_conf[
525                                "denominator"
526                            ].split(".")
527
528                            measures.append(
529                                {
530                                    "name": f"{dimension['name']}_{statistic_slug}",
531                                    "type": "number",
532                                    "label": f"{dimension_label} Ratio",
533                                    "sql": "SAFE_DIVIDE(${"
534                                    + f"{numerator}_{numerator_stat}"
535                                    + "}, ${"
536                                    + f"{denominator}_{denominator_stat}"
537                                    + "})",
538                                    "group_label": "Statistics",
539                                    "description": f""""
540                                        Ratio between {statistic_conf['numerator']} and
541                                        {statistic_conf['denominator']}""",
542                                }
543                            )
544                    elif statistic_slug == "rolling_average":
545                        aggregation = statistic_conf.get("aggregation", "sum")
546                        if "window_sizes" in statistic_conf:
547                            for window_size in statistic_conf["window_sizes"]:
548                                measures.append(
549                                    {
550                                        "name": f"{dimension['name']}_{window_size}_day_{statistic_slug}",
551                                        "type": "number",
552                                        "label": f"{dimension_label} {window_size} Day Rolling Average",
553                                        "sql": f"""
554                                            AVG({aggregation}(${{TABLE}}.{dimension["name"]} * {sampling})) OVER (
555                                                ROWS {window_size} PRECEDING
556                                        )""",
557                                        "group_label": "Statistics",
558                                        "description": f"{window_size} day rolling average of {dimension_label}",
559                                    }
560                                )
561
562        return measures

A view for metric-hub metrics that come from the same data source.

MetricDefinitionsView(namespace: str, name: str, tables: List[Dict[str, str]])
20    def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]):
21        """Get an instance of an MetricDefinitionsView."""
22        super().__init__(namespace, name, MetricDefinitionsView.type, tables)

Get an instance of an MetricDefinitionsView.

type: str = 'metric_definitions_view'
@classmethod
def from_db_views( klass, namespace: str, is_glean: bool, channels: List[Dict[str, str]], db_views: dict) -> Iterator[MetricDefinitionsView]:
24    @classmethod
25    def from_db_views(
26        klass,
27        namespace: str,
28        is_glean: bool,
29        channels: List[Dict[str, str]],
30        db_views: dict,
31    ) -> Iterator[MetricDefinitionsView]:
32        """Get Metric Definition Views from db views and app variants."""
33        return iter(())

Get Metric Definition Views from db views and app variants.

@classmethod
def from_dict( klass, namespace: str, name: str, definition: generator.views.view.ViewDict) -> MetricDefinitionsView:
35    @classmethod
36    def from_dict(
37        klass, namespace: str, name: str, definition: ViewDict
38    ) -> MetricDefinitionsView:
39        """Get a MetricDefinitionsView from a dict representation."""
40        return klass(namespace, name, definition.get("tables", []))

Get a MetricDefinitionsView from a dict representation.

def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 42    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 43        """Get this view as LookML."""
 44        namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
 45            self.namespace
 46        )
 47        if namespace_definitions is None:
 48            return {}
 49
 50        # get all metric definitions that depend on the data source represented by this view
 51        data_source_name = re.sub("^metric_definitions_", "", self.name)
 52        data_source_definition = MetricsConfigLoader.configs.get_data_source_definition(
 53            data_source_name, self.namespace
 54        )
 55
 56        if data_source_definition is None:
 57            return {}
 58
 59        # todo: hide deprecated metrics?
 60        metric_definitions = [
 61            f"""{
 62                MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render()
 63            } AS {metric_slug},\n"""
 64            for metric_slug, metric in namespace_definitions.metrics.definitions.items()
 65            if metric.select_expression
 66            and metric.data_source.name == data_source_name
 67            and metric.type != "histogram"
 68        ]
 69
 70        if metric_definitions == []:
 71            return {}
 72
 73        # Metric definitions are intended to aggregated by client per date.
 74        # A derived table is needed to do these aggregations, instead of defining them as measures
 75        # we want to have them available as dimensions (which don't allow aggregations in their definitions)
 76        # to allow for custom measures to be later defined in Looker that aggregate these per client metrics.
 77        view_defn: Dict[str, Any] = {"name": self.name}
 78
 79        ignore_base_fields = [
 80            "client_id",
 81            "submission_date",
 82            "submission",
 83            "first_run",
 84        ] + [
 85            metric_slug
 86            for metric_slug, metric in namespace_definitions.metrics.definitions.items()
 87            if metric.select_expression
 88            and metric.data_source.name == data_source_name
 89            and metric.type != "histogram"
 90        ]
 91
 92        base_view_dimensions = {}
 93        joined_data_sources = []
 94
 95        # check if the metric data source has joins
 96        # joined data sources are generally used for creating the "Base Fields"
 97        if data_source_definition.joins:
 98            # determine the dimensions selected by the joined data sources
 99            for joined_data_source_slug, join in data_source_definition.joins.items():
100                joined_data_source = (
101                    MetricsConfigLoader.configs.get_data_source_definition(
102                        joined_data_source_slug, self.namespace
103                    )
104                )
105
106                if joined_data_source.columns_as_dimensions:
107                    joined_data_sources.append(joined_data_source)
108
109                    date_filter = None
110                    if joined_data_source.submission_date_column != "NULL":
111                        date_filter = (
112                            None
113                            if joined_data_source.submission_date_column is None
114                            or joined_data_source.submission_date_column == "NULL"
115                            else f"{joined_data_source.submission_date_column} = '2023-01-01'"
116                        )
117
118                    # create Looker dimensions by doing a dryrun
119                    query = MetricsConfigLoader.configs.get_data_source_sql(
120                        joined_data_source_slug,
121                        self.namespace,
122                        where=date_filter,
123                    ).format(dataset=self.namespace)
124
125                    base_view_dimensions[joined_data_source_slug] = (
126                        lookml_utils._generate_dimensions_from_query(
127                            query, dryrun=dryrun
128                        )
129                    )
130
131        if (
132            data_source_definition.client_id_column == "NULL"
133            or data_source_definition.columns_as_dimensions
134        ):
135            # if the metrics data source doesn't have any joins then use the dimensions
136            # of the data source itself as base fields
137            date_filter = None
138            if data_source_definition.submission_date_column != "NULL":
139                date_filter = (
140                    "submission_date = '2023-01-01'"
141                    if data_source_definition.submission_date_column is None
142                    else f"{data_source_definition.submission_date_column} = '2023-01-01'"
143                )
144
145            query = MetricsConfigLoader.configs.get_data_source_sql(
146                data_source_definition.name,
147                self.namespace,
148                where=date_filter,
149                ignore_joins=True,
150            ).format(dataset=self.namespace)
151
152            base_view_dimensions[data_source_definition.name] = (
153                lookml_utils._generate_dimensions_from_query(query, dryrun)
154            )
155
156        # prepare base field data for query
157        base_view_fields = [
158            {
159                "name": f"{data_source}_{dimension['name']}",
160                "select_sql": f"{data_source}_{dimension['name']},\n",
161                "sql": f"{data_source}.{dimension['name'].replace('__', '.')} AS {data_source}_{dimension['name']},\n",
162            }
163            for data_source, dimensions in base_view_dimensions.items()
164            for dimension in dimensions
165            if dimension["name"] not in ignore_base_fields
166            and "hidden" not in dimension
167            and dimension["type"] != "time"
168        ]
169
170        client_id_field = (
171            "NULL"
172            if data_source_definition.client_id_column == "NULL"
173            else f'{data_source_definition.client_id_column or "client_id"}'
174        )
175
176        # filters for date ranges
177        where_sql = " AND ".join(
178            [
179                f"""
180                    {data_source.name}.{data_source.submission_date_column or "submission_date"}
181                    BETWEEN
182                    COALESCE(
183                        SAFE_CAST(
184                            {{% date_start submission_date %}} AS DATE
185                        ), CURRENT_DATE()) AND
186                    COALESCE(
187                        SAFE_CAST(
188                            {{% date_end submission_date %}} AS DATE
189                        ), CURRENT_DATE())
190                """
191                for data_source in [data_source_definition] + joined_data_sources
192                if data_source.submission_date_column != "NULL"
193            ]
194        )
195
196        # filte on sample_id if such a field exists
197        for field in base_view_fields:
198            if field["name"].endswith("_sample_id"):
199                where_sql += f"""
200                    AND
201                        {field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}}
202                """
203                break
204
205        view_defn["derived_table"] = {
206            "sql": f"""
207            SELECT
208                {"".join(metric_definitions)}
209                {"".join([field['select_sql'] for field in base_view_fields])}
210                {client_id_field} AS client_id,
211                {{% if aggregate_metrics_by._parameter_value == 'day' %}}
212                {data_source_definition.submission_date_column or "submission_date"} AS analysis_basis
213                {{% elsif aggregate_metrics_by._parameter_value == 'week'  %}}
214                (FORMAT_DATE(
215                    '%F',
216                    DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
217                    WEEK(MONDAY)))
218                ) AS analysis_basis
219                {{% elsif aggregate_metrics_by._parameter_value == 'month'  %}}
220                (FORMAT_DATE(
221                    '%Y-%m',
222                    {data_source_definition.submission_date_column or "submission_date"})
223                ) AS analysis_basis
224                {{% elsif aggregate_metrics_by._parameter_value == 'quarter'  %}}
225                (FORMAT_DATE(
226                    '%Y-%m',
227                    DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
228                    QUARTER))
229                ) AS analysis_basis
230                {{% elsif aggregate_metrics_by._parameter_value == 'year'  %}}
231                (EXTRACT(
232                    YEAR FROM {data_source_definition.submission_date_column or "submission_date"})
233                ) AS analysis_basis
234                {{% else %}}
235                NULL as analysis_basis
236                {{% endif %}}
237            FROM
238                (
239                    SELECT
240                        {data_source_name}.*,
241                        {"".join([field['sql'] for field in base_view_fields])}
242                    FROM
243                    {
244                        MetricsConfigLoader.configs.get_data_source_sql(
245                            data_source_name,
246                            self.namespace,
247                            select_fields=False
248                        ).format(dataset=self.namespace)
249                    }
250                    WHERE {where_sql}
251                )
252            GROUP BY
253                {"".join([field['select_sql'] for field in base_view_fields])}
254                client_id,
255                analysis_basis
256            """
257        }
258
259        view_defn["dimensions"] = self.get_dimensions()
260        view_defn["dimension_groups"] = self.get_dimension_groups()
261
262        # add the Looker dimensions
263        for data_source, dimensions in base_view_dimensions.items():
264            for dimension in dimensions:
265                if (
266                    dimension["name"] not in ignore_base_fields
267                    and dimension.get("type", "") != "time"
268                ):
269                    dimension["sql"] = (
270                        "${TABLE}." + f"{data_source}_{dimension['name']}"
271                    )
272                    dimension["group_label"] = "Base Fields"
273                    if not lookml_utils._is_dimension_group(dimension):
274                        view_defn["dimensions"].append(dimension)
275                    else:
276                        view_defn["dimension_groups"].append(dimension)
277                    # avoid duplicate dimensions
278                    ignore_base_fields.append(dimension["name"])
279
280        view_defn["measures"] = self.get_measures(
281            view_defn["dimensions"],
282        )
283        view_defn["sets"] = self._get_sets()
284        view_defn["parameters"] = self._get_parameters(view_defn["dimensions"])
285
286        return {"views": [view_defn]}

Get this view as LookML.

def get_dimensions( self, _table=None, _v1_name: Optional[str] = None, _dryrun=None) -> List[Dict[str, Any]]:
288    def get_dimensions(
289        self,
290        _table=None,
291        _v1_name: Optional[str] = None,
292        _dryrun=None,
293    ) -> List[Dict[str, Any]]:
294        """Get the set of dimensions for this view based on the metric definitions in metric-hub."""
295        namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
296            self.namespace
297        )
298        metric_definitions = namespace_definitions.metrics.definitions
299        data_source_name = re.sub("^metric_definitions_", "", self.name)
300
301        return [
302            {
303                "name": "client_id",
304                "type": "string",
305                "sql": "SAFE_CAST(${TABLE}.client_id AS STRING)",
306                "label": "Client ID",
307                "primary_key": "yes",
308                "group_label": "Base Fields",
309                "description": "Unique client identifier",
310            },
311        ] + [  # add a dimension for each metric definition
312            {
313                "name": metric_slug,
314                "group_label": "Metrics",
315                "label": metric.friendly_name
316                or lookml_utils.slug_to_title(metric_slug),
317                "description": metric.description or "",
318                "type": "number",
319                "sql": "${TABLE}." + metric_slug,
320            }
321            for metric_slug, metric in metric_definitions.items()
322            if metric.select_expression
323            and metric.data_source.name == data_source_name
324            and metric.type != "histogram"
325        ]

Get the set of dimensions for this view based on the metric definitions in metric-hub.

def get_dimension_groups(self) -> List[Dict[str, Any]]:
327    def get_dimension_groups(self) -> List[Dict[str, Any]]:
328        """Get dimension groups for this view."""
329        return [
330            {
331                "name": "submission",
332                "type": "time",
333                "group_label": "Base Fields",
334                "sql": "CAST(${TABLE}.analysis_basis AS TIMESTAMP)",
335                "label": "Submission",
336                "timeframes": [
337                    "raw",
338                    "date",
339                    "week",
340                    "month",
341                    "quarter",
342                    "year",
343                ],
344            }
345        ]

Get dimension groups for this view.

def get_measures( self, dimensions: List[dict]) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
397    def get_measures(
398        self, dimensions: List[dict]
399    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
400        """Get statistics as measures."""
401        measures = []
402        sampling = "1"
403
404        for dim in dimensions:
405            if dim["name"] == "sample_id":
406                sampling = "100 / {% parameter sampling %}"
407                break
408
409        for dimension in dimensions:
410            metric = MetricsConfigLoader.configs.get_metric_definition(
411                dimension["name"], self.namespace
412            )
413            if metric and metric.statistics:
414                for statistic_slug, statistic_conf in metric.statistics.items():
415                    dimension_label = dimension.get("label") or dimension.get("name")
416                    if statistic_slug in [
417                        "average",
418                        "max",
419                        "min",
420                        "median",
421                    ]:
422                        measures.append(
423                            {
424                                "name": f"{dimension['name']}_{statistic_slug}",
425                                "type": statistic_slug,
426                                "sql": "${TABLE}." + dimension["name"],
427                                "label": f"{dimension_label} {statistic_slug.title()}",
428                                "group_label": "Statistics",
429                                "description": f"{statistic_slug.title()} of {dimension_label}",
430                            }
431                        )
432                    elif statistic_slug == "sum":
433                        measures.append(
434                            {
435                                "name": f"{dimension['name']}_{statistic_slug}",
436                                "type": "sum",
437                                "sql": "${TABLE}." + dimension["name"] + "*" + sampling,
438                                "label": f"{dimension_label} Sum",
439                                "group_label": "Statistics",
440                                "description": f"Sum of {dimension_label}",
441                            }
442                        )
443                    elif statistic_slug == "client_count":
444                        measures.append(
445                            {
446                                "name": (
447                                    f"{dimension['name']}_{statistic_slug}_sampled"
448                                    if sampling
449                                    else f"{dimension['name']}_{statistic_slug}"
450                                ),
451                                "type": "count_distinct",
452                                "label": f"{dimension_label} Client Count",
453                                "group_label": "Statistics",
454                                "sql": "IF(${TABLE}."
455                                + f"{dimension['name']} > 0, "
456                                + "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))",
457                                "description": f"Number of clients with {dimension_label}",
458                                "hidden": "yes" if sampling else "no",
459                            }
460                        )
461
462                        if sampling:
463                            measures.append(
464                                {
465                                    "name": f"{dimension['name']}_{statistic_slug}",
466                                    "type": "number",
467                                    "label": f"{dimension_label} Client Count",
468                                    "group_label": "Statistics",
469                                    "sql": "${"
470                                    + f"{dimension['name']}_{statistic_slug}_sampled"
471                                    + "} *"
472                                    + sampling,
473                                    "description": f"Number of clients with {dimension_label}",
474                                }
475                            )
476                    elif statistic_slug == "dau_proportion":
477                        if "numerator" in statistic_conf:
478                            [numerator, numerator_stat] = statistic_conf[
479                                "numerator"
480                            ].split(".")
481                            measures.append(
482                                {
483                                    "name": "DAU_sampled" if sampling else "DAU",
484                                    "type": "count_distinct",
485                                    "label": "DAU",
486                                    "group_label": "Statistics",
487                                    "sql": "${TABLE}.client_id",
488                                    "hidden": "yes",
489                                }
490                            )
491
492                            if sampling:
493                                measures.append(
494                                    {
495                                        "name": "DAU",
496                                        "type": "number",
497                                        "label": "DAU",
498                                        "group_label": "Statistics",
499                                        "sql": "${DAU_sampled} *" + sampling,
500                                        "hidden": "yes",
501                                    }
502                                )
503
504                            measures.append(
505                                {
506                                    "name": f"{dimension['name']}_{statistic_slug}",
507                                    "type": "number",
508                                    "label": f"{dimension_label} DAU Proportion",
509                                    "sql": "SAFE_DIVIDE(${"
510                                    + f"{numerator}_{numerator_stat}"
511                                    + "}, ${DAU})",
512                                    "group_label": "Statistics",
513                                    "description": f"Proportion of daily active users with {dimension['name']}",
514                                }
515                            )
516                    elif statistic_slug == "ratio":
517                        if (
518                            "numerator" in statistic_conf
519                            and "denominator" in statistic_conf
520                        ):
521                            [numerator, numerator_stat] = statistic_conf[
522                                "numerator"
523                            ].split(".")
524                            [denominator, denominator_stat] = statistic_conf[
525                                "denominator"
526                            ].split(".")
527
528                            measures.append(
529                                {
530                                    "name": f"{dimension['name']}_{statistic_slug}",
531                                    "type": "number",
532                                    "label": f"{dimension_label} Ratio",
533                                    "sql": "SAFE_DIVIDE(${"
534                                    + f"{numerator}_{numerator_stat}"
535                                    + "}, ${"
536                                    + f"{denominator}_{denominator_stat}"
537                                    + "})",
538                                    "group_label": "Statistics",
539                                    "description": f""""
540                                        Ratio between {statistic_conf['numerator']} and
541                                        {statistic_conf['denominator']}""",
542                                }
543                            )
544                    elif statistic_slug == "rolling_average":
545                        aggregation = statistic_conf.get("aggregation", "sum")
546                        if "window_sizes" in statistic_conf:
547                            for window_size in statistic_conf["window_sizes"]:
548                                measures.append(
549                                    {
550                                        "name": f"{dimension['name']}_{window_size}_day_{statistic_slug}",
551                                        "type": "number",
552                                        "label": f"{dimension_label} {window_size} Day Rolling Average",
553                                        "sql": f"""
554                                            AVG({aggregation}(${{TABLE}}.{dimension["name"]} * {sampling})) OVER (
555                                                ROWS {window_size} PRECEDING
556                                        )""",
557                                        "group_label": "Statistics",
558                                        "description": f"{window_size} day rolling average of {dimension_label}",
559                                    }
560                                )
561
562        return measures

Get statistics as measures.