generator.views.metric_definitions_view

Class to describe a view with metrics from metric-hub.

  1"""Class to describe a view with metrics from metric-hub."""
  2
  3from __future__ import annotations
  4
  5import re
  6from typing import Any, Dict, Iterator, List, Optional, Union
  7
  8from generator.metrics_utils import MetricsConfigLoader
  9
 10from . import lookml_utils
 11from .view import View, ViewDict
 12
 13
 14class MetricDefinitionsView(View):
 15    """A view for metric-hub metrics that come from the same data source."""
 16
 17    type: str = "metric_definitions_view"
 18
 19    def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]):
 20        """Get an instance of an MetricDefinitionsView."""
 21        super().__init__(namespace, name, MetricDefinitionsView.type, tables)
 22
 23    @classmethod
 24    def from_db_views(
 25        klass,
 26        namespace: str,
 27        is_glean: bool,
 28        channels: List[Dict[str, str]],
 29        db_views: dict,
 30    ) -> Iterator[MetricDefinitionsView]:
 31        """Get Metric Definition Views from db views and app variants."""
 32        return iter(())
 33
 34    @classmethod
 35    def from_dict(
 36        klass, namespace: str, name: str, definition: ViewDict
 37    ) -> MetricDefinitionsView:
 38        """Get a MetricDefinitionsView from a dict representation."""
 39        return klass(namespace, name, definition.get("tables", []))
 40
 41    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 42        """Get this view as LookML."""
 43        namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
 44            self.namespace
 45        )
 46        if namespace_definitions is None:
 47            return {}
 48
 49        # get all metric definitions that depend on the data source represented by this view
 50        data_source_name = re.sub("^metric_definitions_", "", self.name)
 51        data_source_definition = MetricsConfigLoader.configs.get_data_source_definition(
 52            data_source_name, self.namespace
 53        )
 54
 55        if data_source_definition is None:
 56            return {}
 57
 58        # todo: hide deprecated metrics?
 59        metric_definitions = [
 60            f"""{
 61                MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render()
 62            } AS {metric_slug},\n"""
 63            for metric_slug, metric in namespace_definitions.metrics.definitions.items()
 64            if metric.select_expression
 65            and metric.data_source.name == data_source_name
 66            and metric.type != "histogram"
 67        ]
 68
 69        if metric_definitions == []:
 70            return {}
 71
 72        # Metric definitions are intended to aggregated by client per date.
 73        # A derived table is needed to do these aggregations, instead of defining them as measures
 74        # we want to have them available as dimensions (which don't allow aggregations in their definitions)
 75        # to allow for custom measures to be later defined in Looker that aggregate these per client metrics.
 76        view_defn: Dict[str, Any] = {"name": self.name}
 77
 78        ignore_base_fields = [
 79            "client_id",
 80            "submission_date",
 81            "submission",
 82            "first_run",
 83        ] + [
 84            metric_slug
 85            for metric_slug, metric in namespace_definitions.metrics.definitions.items()
 86            if metric.select_expression
 87            and metric.data_source.name == data_source_name
 88            and metric.type != "histogram"
 89        ]
 90
 91        base_view_dimensions = {}
 92        joined_data_sources = []
 93
 94        # check if the metric data source has joins
 95        # joined data sources are generally used for creating the "Base Fields"
 96        if data_source_definition.joins:
 97            # determine the dimensions selected by the joined data sources
 98            for joined_data_source_slug, join in data_source_definition.joins.items():
 99                joined_data_source = (
100                    MetricsConfigLoader.configs.get_data_source_definition(
101                        joined_data_source_slug, self.namespace
102                    )
103                )
104
105                if joined_data_source.columns_as_dimensions:
106                    joined_data_sources.append(joined_data_source)
107
108                    date_filter = None
109                    if joined_data_source.submission_date_column != "NULL":
110                        date_filter = (
111                            None
112                            if joined_data_source.submission_date_column is None
113                            or joined_data_source.submission_date_column == "NULL"
114                            else f"{joined_data_source.submission_date_column} = '2023-01-01'"
115                        )
116
117                    # create Looker dimensions by doing a dryrun
118                    query = MetricsConfigLoader.configs.get_data_source_sql(
119                        joined_data_source_slug,
120                        self.namespace,
121                        where=date_filter,
122                    ).format(dataset=self.namespace)
123
124                    base_view_dimensions[joined_data_source_slug] = (
125                        lookml_utils._generate_dimensions_from_query(
126                            query, dryrun=dryrun
127                        )
128                    )
129
130        if (
131            data_source_definition.client_id_column == "NULL"
132            and not base_view_dimensions
133        ) or data_source_definition.columns_as_dimensions:
134            # if the metrics data source doesn't have any joins then use the dimensions
135            # of the data source itself as base fields
136            date_filter = None
137            if data_source_definition.submission_date_column != "NULL":
138                date_filter = (
139                    "submission_date = '2023-01-01'"
140                    if data_source_definition.submission_date_column is None
141                    else f"{data_source_definition.submission_date_column} = '2023-01-01'"
142                )
143
144            query = MetricsConfigLoader.configs.get_data_source_sql(
145                data_source_definition.name,
146                self.namespace,
147                where=date_filter,
148                ignore_joins=True,
149            ).format(dataset=self.namespace)
150
151            base_view_dimensions[data_source_definition.name] = (
152                lookml_utils._generate_dimensions_from_query(query, dryrun)
153            )
154
155        # to prevent duplicate dimensions, especially when working with time dimensions
156        # where names are modified potentially causing naming collisions
157        seen_dimensions = set()
158        # prepare base field data for query
159        base_view_fields = []
160        for data_source, dimensions in base_view_dimensions.items():
161            for dimension in dimensions:
162                if (
163                    dimension["name"] not in ignore_base_fields
164                    and dimension["name"] not in seen_dimensions
165                    and "hidden" not in dimension
166                ):
167                    sql = (
168                        f"{data_source}.{dimension['name'].replace('__', '.')} AS"
169                        + f" {data_source}_{dimension['name']},\n"
170                    )
171                    # date/time/timestamp suffixes are removed when generating lookml dimensions, however we
172                    # need the original field name for the derived view SQL
173                    if dimension["type"] == "time" and not dimension["sql"].endswith(
174                        dimension["name"]
175                    ):
176                        suffix = dimension["sql"].split(
177                            dimension["name"].replace("__", ".")
178                        )[-1]
179                        sql = (
180                            f"{data_source}.{(dimension['name']+suffix).replace('__', '.')} AS"
181                            + f" {data_source}_{dimension['name']},\n"
182                        )
183
184                    base_view_fields.append(
185                        {
186                            "name": f"{data_source}_{dimension['name']}",
187                            "select_sql": f"{data_source}_{dimension['name']},\n",
188                            "sql": sql,
189                        }
190                    )
191                    seen_dimensions.add(dimension["name"])
192
193        client_id_field = (
194            "NULL"
195            if data_source_definition.client_id_column == "NULL"
196            else f'{data_source_definition.client_id_column or "client_id"}'
197        )
198
199        # filters for date ranges
200        where_sql = " AND ".join(
201            [
202                f"""
203                    {data_source.name}.{data_source.submission_date_column or "submission_date"}
204                    BETWEEN
205                    COALESCE(
206                        SAFE_CAST(
207                            {{% date_start submission_date %}} AS DATE
208                        ), CURRENT_DATE()) AND
209                    COALESCE(
210                        SAFE_CAST(
211                            {{% date_end submission_date %}} AS DATE
212                        ), CURRENT_DATE())
213                """
214                for data_source in [data_source_definition] + joined_data_sources
215                if data_source.submission_date_column != "NULL"
216            ]
217        )
218
219        # filte on sample_id if such a field exists
220        for field in base_view_fields:
221            if field["name"].endswith("_sample_id"):
222                where_sql += f"""
223                    AND
224                        {field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}}
225                """
226                break
227
228        view_defn["derived_table"] = {
229            "sql": f"""
230            SELECT
231                {"".join(metric_definitions)}
232                {"".join([field['select_sql'] for field in base_view_fields])}
233                {client_id_field} AS client_id,
234                {{% if aggregate_metrics_by._parameter_value == 'day' %}}
235                {data_source_definition.submission_date_column or "submission_date"} AS analysis_basis
236                {{% elsif aggregate_metrics_by._parameter_value == 'week'  %}}
237                (FORMAT_DATE(
238                    '%F',
239                    DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
240                    WEEK(MONDAY)))
241                ) AS analysis_basis
242                {{% elsif aggregate_metrics_by._parameter_value == 'month'  %}}
243                (FORMAT_DATE(
244                    '%Y-%m',
245                    {data_source_definition.submission_date_column or "submission_date"})
246                ) AS analysis_basis
247                {{% elsif aggregate_metrics_by._parameter_value == 'quarter'  %}}
248                (FORMAT_DATE(
249                    '%Y-%m',
250                    DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
251                    QUARTER))
252                ) AS analysis_basis
253                {{% elsif aggregate_metrics_by._parameter_value == 'year'  %}}
254                (EXTRACT(
255                    YEAR FROM {data_source_definition.submission_date_column or "submission_date"})
256                ) AS analysis_basis
257                {{% else %}}
258                NULL as analysis_basis
259                {{% endif %}}
260            FROM
261                (
262                    SELECT
263                        {data_source_name}.*,
264                        {"".join([field['sql'] for field in base_view_fields])}
265                    FROM
266                    {
267                        MetricsConfigLoader.configs.get_data_source_sql(
268                            data_source_name,
269                            self.namespace,
270                            select_fields=False
271                        ).format(dataset=self.namespace)
272                    }
273                    WHERE {where_sql}
274                )
275            GROUP BY
276                {"".join([field['select_sql'] for field in base_view_fields])}
277                client_id,
278                analysis_basis
279            """
280        }
281
282        view_defn["dimensions"] = self.get_dimensions()
283        view_defn["dimension_groups"] = self.get_dimension_groups()
284
285        # add the Looker dimensions
286        for data_source, dimensions in base_view_dimensions.items():
287            for dimension in dimensions:
288                if dimension["name"] not in ignore_base_fields:
289                    dimension["sql"] = (
290                        "${TABLE}." + f"{data_source}_{dimension['name']}"
291                    )
292                    dimension["group_label"] = "Base Fields"
293                    if not lookml_utils._is_dimension_group(dimension):
294                        view_defn["dimensions"].append(dimension)
295                    else:
296                        view_defn["dimension_groups"].append(dimension)
297                    # avoid duplicate dimensions
298                    ignore_base_fields.append(dimension["name"])
299
300        view_defn["measures"] = self.get_measures(
301            view_defn["dimensions"],
302        )
303        view_defn["sets"] = self._get_sets()
304        view_defn["parameters"] = self._get_parameters(view_defn["dimensions"])
305
306        return {"views": [view_defn]}
307
308    def get_dimensions(
309        self,
310        _table=None,
311        _v1_name: Optional[str] = None,
312        _dryrun=None,
313    ) -> List[Dict[str, Any]]:
314        """Get the set of dimensions for this view based on the metric definitions in metric-hub."""
315        namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
316            self.namespace
317        )
318        metric_definitions = namespace_definitions.metrics.definitions
319        data_source_name = re.sub("^metric_definitions_", "", self.name)
320
321        return [
322            {
323                "name": "client_id",
324                "type": "string",
325                "sql": "SAFE_CAST(${TABLE}.client_id AS STRING)",
326                "label": "Client ID",
327                "primary_key": "yes",
328                "group_label": "Base Fields",
329                "description": "Unique client identifier",
330            },
331        ] + [  # add a dimension for each metric definition
332            {
333                "name": metric_slug,
334                "group_label": "Metrics",
335                "label": metric.friendly_name
336                or lookml_utils.slug_to_title(metric_slug),
337                "description": metric.description or "",
338                "type": "number",
339                "sql": "${TABLE}." + metric_slug,
340            }
341            for metric_slug, metric in metric_definitions.items()
342            if metric.select_expression
343            and metric.data_source.name == data_source_name
344            and metric.type != "histogram"
345        ]
346
347    def get_dimension_groups(self) -> List[Dict[str, Any]]:
348        """Get dimension groups for this view."""
349        return [
350            {
351                "name": "submission",
352                "type": "time",
353                "group_label": "Base Fields",
354                "sql": "CAST(${TABLE}.analysis_basis AS TIMESTAMP)",
355                "label": "Submission",
356                "timeframes": [
357                    "raw",
358                    "date",
359                    "week",
360                    "month",
361                    "quarter",
362                    "year",
363                ],
364            }
365        ]
366
367    def _get_sets(self) -> List[Dict[str, Any]]:
368        """Generate metric sets."""
369        # group all the metric dimensions into a set
370        dimensions = self.get_dimensions()
371        measures = self.get_measures(dimensions)
372
373        return [
374            {
375                "name": "metrics",
376                "fields": [
377                    dimension["name"]
378                    for dimension in dimensions
379                    if dimension["name"] != "client_id"
380                ]
381                + [measure["name"] for measure in measures],
382            }
383        ]
384
385    def _get_parameters(self, dimensions: List[dict]):
386        hide_sampling = "yes"
387
388        for dim in dimensions:
389            if dim["name"] == "sample_id":
390                hide_sampling = "no"
391                break
392
393        return [
394            {
395                "name": "aggregate_metrics_by",
396                "label": "Aggregate Client Metrics Per",
397                "type": "unquoted",
398                "default_value": "day",
399                "allowed_values": [
400                    {"label": "Per Day", "value": "day"},
401                    {"label": "Per Week", "value": "week"},
402                    {"label": "Per Month", "value": "month"},
403                    {"label": "Per Quarter", "value": "quarter"},
404                    {"label": "Per Year", "value": "year"},
405                    {"label": "Overall", "value": "overall"},
406                ],
407            },
408            {
409                "name": "sampling",
410                "label": "Sample of source data in %",
411                "type": "unquoted",
412                "default_value": "100",
413                "hidden": hide_sampling,
414            },
415        ]
416
417    def get_measures(
418        self, dimensions: List[dict]
419    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
420        """Get statistics as measures."""
421        measures = []
422        sampling = "1"
423
424        for dim in dimensions:
425            if dim["name"] == "sample_id":
426                sampling = "100 / {% parameter sampling %}"
427                break
428
429        for dimension in dimensions:
430            metric = MetricsConfigLoader.configs.get_metric_definition(
431                dimension["name"], self.namespace
432            )
433            if metric and metric.statistics:
434                for statistic_slug, statistic_conf in metric.statistics.items():
435                    dimension_label = dimension.get("label") or dimension.get("name")
436                    if statistic_slug in [
437                        "average",
438                        "max",
439                        "min",
440                        "median",
441                    ]:
442                        measures.append(
443                            {
444                                "name": f"{dimension['name']}_{statistic_slug}",
445                                "type": statistic_slug,
446                                "sql": "${TABLE}." + dimension["name"],
447                                "label": f"{dimension_label} {statistic_slug.title()}",
448                                "group_label": "Statistics",
449                                "description": f"{statistic_slug.title()} of {dimension_label}",
450                            }
451                        )
452                    elif statistic_slug == "sum":
453                        measures.append(
454                            {
455                                "name": f"{dimension['name']}_{statistic_slug}",
456                                "type": "sum",
457                                "sql": "${TABLE}." + dimension["name"] + "*" + sampling,
458                                "label": f"{dimension_label} Sum",
459                                "group_label": "Statistics",
460                                "description": f"Sum of {dimension_label}",
461                            }
462                        )
463                    elif statistic_slug == "client_count":
464                        measures.append(
465                            {
466                                "name": (
467                                    f"{dimension['name']}_{statistic_slug}_sampled"
468                                    if sampling
469                                    else f"{dimension['name']}_{statistic_slug}"
470                                ),
471                                "type": "count_distinct",
472                                "label": f"{dimension_label} Client Count",
473                                "group_label": "Statistics",
474                                "sql": "IF(${TABLE}."
475                                + f"{dimension['name']} > 0, "
476                                + "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))",
477                                "description": f"Number of clients with {dimension_label}",
478                                "hidden": "yes" if sampling else "no",
479                            }
480                        )
481
482                        if sampling:
483                            measures.append(
484                                {
485                                    "name": f"{dimension['name']}_{statistic_slug}",
486                                    "type": "number",
487                                    "label": f"{dimension_label} Client Count",
488                                    "group_label": "Statistics",
489                                    "sql": "${"
490                                    + f"{dimension['name']}_{statistic_slug}_sampled"
491                                    + "} *"
492                                    + sampling,
493                                    "description": f"Number of clients with {dimension_label}",
494                                }
495                            )
496                    elif statistic_slug == "dau_proportion":
497                        if "numerator" in statistic_conf:
498                            [numerator, numerator_stat] = statistic_conf[
499                                "numerator"
500                            ].split(".")
501                            measures.append(
502                                {
503                                    "name": "DAU_sampled" if sampling else "DAU",
504                                    "type": "count_distinct",
505                                    "label": "DAU",
506                                    "group_label": "Statistics",
507                                    "sql": "${TABLE}.client_id",
508                                    "hidden": "yes",
509                                }
510                            )
511
512                            if sampling:
513                                measures.append(
514                                    {
515                                        "name": "DAU",
516                                        "type": "number",
517                                        "label": "DAU",
518                                        "group_label": "Statistics",
519                                        "sql": "${DAU_sampled} *" + sampling,
520                                        "hidden": "yes",
521                                    }
522                                )
523
524                            measures.append(
525                                {
526                                    "name": f"{dimension['name']}_{statistic_slug}",
527                                    "type": "number",
528                                    "label": f"{dimension_label} DAU Proportion",
529                                    "sql": "SAFE_DIVIDE(${"
530                                    + f"{numerator}_{numerator_stat}"
531                                    + "}, ${DAU})",
532                                    "group_label": "Statistics",
533                                    "description": f"Proportion of daily active users with {dimension['name']}",
534                                }
535                            )
536                    elif statistic_slug == "ratio":
537                        if (
538                            "numerator" in statistic_conf
539                            and "denominator" in statistic_conf
540                        ):
541                            [numerator, numerator_stat] = statistic_conf[
542                                "numerator"
543                            ].split(".")
544                            [denominator, denominator_stat] = statistic_conf[
545                                "denominator"
546                            ].split(".")
547
548                            measures.append(
549                                {
550                                    "name": f"{dimension['name']}_{statistic_slug}",
551                                    "type": "number",
552                                    "label": f"{dimension_label} Ratio",
553                                    "sql": "SAFE_DIVIDE(${"
554                                    + f"{numerator}_{numerator_stat}"
555                                    + "}, ${"
556                                    + f"{denominator}_{denominator_stat}"
557                                    + "})",
558                                    "group_label": "Statistics",
559                                    "description": f""""
560                                        Ratio between {statistic_conf['numerator']} and
561                                        {statistic_conf['denominator']}""",
562                                }
563                            )
564                    elif statistic_slug == "rolling_average":
565                        aggregation = statistic_conf.get("aggregation", "sum")
566                        if "window_sizes" in statistic_conf:
567                            for window_size in statistic_conf["window_sizes"]:
568                                measures.append(
569                                    {
570                                        "name": f"{dimension['name']}_{window_size}_day_{statistic_slug}",
571                                        "type": "number",
572                                        "label": f"{dimension_label} {window_size} Day Rolling Average",
573                                        "sql": f"""
574                                            AVG({aggregation}(${{TABLE}}.{dimension["name"]} * {sampling})) OVER (
575                                                ROWS {window_size} PRECEDING
576                                        )""",
577                                        "group_label": "Statistics",
578                                        "description": f"{window_size} day rolling average of {dimension_label}",
579                                    }
580                                )
581
582        return measures
class MetricDefinitionsView(generator.views.view.View):
 15class MetricDefinitionsView(View):
 16    """A view for metric-hub metrics that come from the same data source."""
 17
 18    type: str = "metric_definitions_view"
 19
 20    def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]):
 21        """Get an instance of an MetricDefinitionsView."""
 22        super().__init__(namespace, name, MetricDefinitionsView.type, tables)
 23
 24    @classmethod
 25    def from_db_views(
 26        klass,
 27        namespace: str,
 28        is_glean: bool,
 29        channels: List[Dict[str, str]],
 30        db_views: dict,
 31    ) -> Iterator[MetricDefinitionsView]:
 32        """Get Metric Definition Views from db views and app variants."""
 33        return iter(())
 34
 35    @classmethod
 36    def from_dict(
 37        klass, namespace: str, name: str, definition: ViewDict
 38    ) -> MetricDefinitionsView:
 39        """Get a MetricDefinitionsView from a dict representation."""
 40        return klass(namespace, name, definition.get("tables", []))
 41
 42    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 43        """Get this view as LookML."""
 44        namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
 45            self.namespace
 46        )
 47        if namespace_definitions is None:
 48            return {}
 49
 50        # get all metric definitions that depend on the data source represented by this view
 51        data_source_name = re.sub("^metric_definitions_", "", self.name)
 52        data_source_definition = MetricsConfigLoader.configs.get_data_source_definition(
 53            data_source_name, self.namespace
 54        )
 55
 56        if data_source_definition is None:
 57            return {}
 58
 59        # todo: hide deprecated metrics?
 60        metric_definitions = [
 61            f"""{
 62                MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render()
 63            } AS {metric_slug},\n"""
 64            for metric_slug, metric in namespace_definitions.metrics.definitions.items()
 65            if metric.select_expression
 66            and metric.data_source.name == data_source_name
 67            and metric.type != "histogram"
 68        ]
 69
 70        if metric_definitions == []:
 71            return {}
 72
 73        # Metric definitions are intended to aggregated by client per date.
 74        # A derived table is needed to do these aggregations, instead of defining them as measures
 75        # we want to have them available as dimensions (which don't allow aggregations in their definitions)
 76        # to allow for custom measures to be later defined in Looker that aggregate these per client metrics.
 77        view_defn: Dict[str, Any] = {"name": self.name}
 78
 79        ignore_base_fields = [
 80            "client_id",
 81            "submission_date",
 82            "submission",
 83            "first_run",
 84        ] + [
 85            metric_slug
 86            for metric_slug, metric in namespace_definitions.metrics.definitions.items()
 87            if metric.select_expression
 88            and metric.data_source.name == data_source_name
 89            and metric.type != "histogram"
 90        ]
 91
 92        base_view_dimensions = {}
 93        joined_data_sources = []
 94
 95        # check if the metric data source has joins
 96        # joined data sources are generally used for creating the "Base Fields"
 97        if data_source_definition.joins:
 98            # determine the dimensions selected by the joined data sources
 99            for joined_data_source_slug, join in data_source_definition.joins.items():
100                joined_data_source = (
101                    MetricsConfigLoader.configs.get_data_source_definition(
102                        joined_data_source_slug, self.namespace
103                    )
104                )
105
106                if joined_data_source.columns_as_dimensions:
107                    joined_data_sources.append(joined_data_source)
108
109                    date_filter = None
110                    if joined_data_source.submission_date_column != "NULL":
111                        date_filter = (
112                            None
113                            if joined_data_source.submission_date_column is None
114                            or joined_data_source.submission_date_column == "NULL"
115                            else f"{joined_data_source.submission_date_column} = '2023-01-01'"
116                        )
117
118                    # create Looker dimensions by doing a dryrun
119                    query = MetricsConfigLoader.configs.get_data_source_sql(
120                        joined_data_source_slug,
121                        self.namespace,
122                        where=date_filter,
123                    ).format(dataset=self.namespace)
124
125                    base_view_dimensions[joined_data_source_slug] = (
126                        lookml_utils._generate_dimensions_from_query(
127                            query, dryrun=dryrun
128                        )
129                    )
130
131        if (
132            data_source_definition.client_id_column == "NULL"
133            and not base_view_dimensions
134        ) or data_source_definition.columns_as_dimensions:
135            # if the metrics data source doesn't have any joins then use the dimensions
136            # of the data source itself as base fields
137            date_filter = None
138            if data_source_definition.submission_date_column != "NULL":
139                date_filter = (
140                    "submission_date = '2023-01-01'"
141                    if data_source_definition.submission_date_column is None
142                    else f"{data_source_definition.submission_date_column} = '2023-01-01'"
143                )
144
145            query = MetricsConfigLoader.configs.get_data_source_sql(
146                data_source_definition.name,
147                self.namespace,
148                where=date_filter,
149                ignore_joins=True,
150            ).format(dataset=self.namespace)
151
152            base_view_dimensions[data_source_definition.name] = (
153                lookml_utils._generate_dimensions_from_query(query, dryrun)
154            )
155
156        # to prevent duplicate dimensions, especially when working with time dimensions
157        # where names are modified potentially causing naming collisions
158        seen_dimensions = set()
159        # prepare base field data for query
160        base_view_fields = []
161        for data_source, dimensions in base_view_dimensions.items():
162            for dimension in dimensions:
163                if (
164                    dimension["name"] not in ignore_base_fields
165                    and dimension["name"] not in seen_dimensions
166                    and "hidden" not in dimension
167                ):
168                    sql = (
169                        f"{data_source}.{dimension['name'].replace('__', '.')} AS"
170                        + f" {data_source}_{dimension['name']},\n"
171                    )
172                    # date/time/timestamp suffixes are removed when generating lookml dimensions, however we
173                    # need the original field name for the derived view SQL
174                    if dimension["type"] == "time" and not dimension["sql"].endswith(
175                        dimension["name"]
176                    ):
177                        suffix = dimension["sql"].split(
178                            dimension["name"].replace("__", ".")
179                        )[-1]
180                        sql = (
181                            f"{data_source}.{(dimension['name']+suffix).replace('__', '.')} AS"
182                            + f" {data_source}_{dimension['name']},\n"
183                        )
184
185                    base_view_fields.append(
186                        {
187                            "name": f"{data_source}_{dimension['name']}",
188                            "select_sql": f"{data_source}_{dimension['name']},\n",
189                            "sql": sql,
190                        }
191                    )
192                    seen_dimensions.add(dimension["name"])
193
194        client_id_field = (
195            "NULL"
196            if data_source_definition.client_id_column == "NULL"
197            else f'{data_source_definition.client_id_column or "client_id"}'
198        )
199
200        # filters for date ranges
201        where_sql = " AND ".join(
202            [
203                f"""
204                    {data_source.name}.{data_source.submission_date_column or "submission_date"}
205                    BETWEEN
206                    COALESCE(
207                        SAFE_CAST(
208                            {{% date_start submission_date %}} AS DATE
209                        ), CURRENT_DATE()) AND
210                    COALESCE(
211                        SAFE_CAST(
212                            {{% date_end submission_date %}} AS DATE
213                        ), CURRENT_DATE())
214                """
215                for data_source in [data_source_definition] + joined_data_sources
216                if data_source.submission_date_column != "NULL"
217            ]
218        )
219
220        # filte on sample_id if such a field exists
221        for field in base_view_fields:
222            if field["name"].endswith("_sample_id"):
223                where_sql += f"""
224                    AND
225                        {field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}}
226                """
227                break
228
229        view_defn["derived_table"] = {
230            "sql": f"""
231            SELECT
232                {"".join(metric_definitions)}
233                {"".join([field['select_sql'] for field in base_view_fields])}
234                {client_id_field} AS client_id,
235                {{% if aggregate_metrics_by._parameter_value == 'day' %}}
236                {data_source_definition.submission_date_column or "submission_date"} AS analysis_basis
237                {{% elsif aggregate_metrics_by._parameter_value == 'week'  %}}
238                (FORMAT_DATE(
239                    '%F',
240                    DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
241                    WEEK(MONDAY)))
242                ) AS analysis_basis
243                {{% elsif aggregate_metrics_by._parameter_value == 'month'  %}}
244                (FORMAT_DATE(
245                    '%Y-%m',
246                    {data_source_definition.submission_date_column or "submission_date"})
247                ) AS analysis_basis
248                {{% elsif aggregate_metrics_by._parameter_value == 'quarter'  %}}
249                (FORMAT_DATE(
250                    '%Y-%m',
251                    DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
252                    QUARTER))
253                ) AS analysis_basis
254                {{% elsif aggregate_metrics_by._parameter_value == 'year'  %}}
255                (EXTRACT(
256                    YEAR FROM {data_source_definition.submission_date_column or "submission_date"})
257                ) AS analysis_basis
258                {{% else %}}
259                NULL as analysis_basis
260                {{% endif %}}
261            FROM
262                (
263                    SELECT
264                        {data_source_name}.*,
265                        {"".join([field['sql'] for field in base_view_fields])}
266                    FROM
267                    {
268                        MetricsConfigLoader.configs.get_data_source_sql(
269                            data_source_name,
270                            self.namespace,
271                            select_fields=False
272                        ).format(dataset=self.namespace)
273                    }
274                    WHERE {where_sql}
275                )
276            GROUP BY
277                {"".join([field['select_sql'] for field in base_view_fields])}
278                client_id,
279                analysis_basis
280            """
281        }
282
283        view_defn["dimensions"] = self.get_dimensions()
284        view_defn["dimension_groups"] = self.get_dimension_groups()
285
286        # add the Looker dimensions
287        for data_source, dimensions in base_view_dimensions.items():
288            for dimension in dimensions:
289                if dimension["name"] not in ignore_base_fields:
290                    dimension["sql"] = (
291                        "${TABLE}." + f"{data_source}_{dimension['name']}"
292                    )
293                    dimension["group_label"] = "Base Fields"
294                    if not lookml_utils._is_dimension_group(dimension):
295                        view_defn["dimensions"].append(dimension)
296                    else:
297                        view_defn["dimension_groups"].append(dimension)
298                    # avoid duplicate dimensions
299                    ignore_base_fields.append(dimension["name"])
300
301        view_defn["measures"] = self.get_measures(
302            view_defn["dimensions"],
303        )
304        view_defn["sets"] = self._get_sets()
305        view_defn["parameters"] = self._get_parameters(view_defn["dimensions"])
306
307        return {"views": [view_defn]}
308
309    def get_dimensions(
310        self,
311        _table=None,
312        _v1_name: Optional[str] = None,
313        _dryrun=None,
314    ) -> List[Dict[str, Any]]:
315        """Get the set of dimensions for this view based on the metric definitions in metric-hub."""
316        namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
317            self.namespace
318        )
319        metric_definitions = namespace_definitions.metrics.definitions
320        data_source_name = re.sub("^metric_definitions_", "", self.name)
321
322        return [
323            {
324                "name": "client_id",
325                "type": "string",
326                "sql": "SAFE_CAST(${TABLE}.client_id AS STRING)",
327                "label": "Client ID",
328                "primary_key": "yes",
329                "group_label": "Base Fields",
330                "description": "Unique client identifier",
331            },
332        ] + [  # add a dimension for each metric definition
333            {
334                "name": metric_slug,
335                "group_label": "Metrics",
336                "label": metric.friendly_name
337                or lookml_utils.slug_to_title(metric_slug),
338                "description": metric.description or "",
339                "type": "number",
340                "sql": "${TABLE}." + metric_slug,
341            }
342            for metric_slug, metric in metric_definitions.items()
343            if metric.select_expression
344            and metric.data_source.name == data_source_name
345            and metric.type != "histogram"
346        ]
347
348    def get_dimension_groups(self) -> List[Dict[str, Any]]:
349        """Get dimension groups for this view."""
350        return [
351            {
352                "name": "submission",
353                "type": "time",
354                "group_label": "Base Fields",
355                "sql": "CAST(${TABLE}.analysis_basis AS TIMESTAMP)",
356                "label": "Submission",
357                "timeframes": [
358                    "raw",
359                    "date",
360                    "week",
361                    "month",
362                    "quarter",
363                    "year",
364                ],
365            }
366        ]
367
368    def _get_sets(self) -> List[Dict[str, Any]]:
369        """Generate metric sets."""
370        # group all the metric dimensions into a set
371        dimensions = self.get_dimensions()
372        measures = self.get_measures(dimensions)
373
374        return [
375            {
376                "name": "metrics",
377                "fields": [
378                    dimension["name"]
379                    for dimension in dimensions
380                    if dimension["name"] != "client_id"
381                ]
382                + [measure["name"] for measure in measures],
383            }
384        ]
385
386    def _get_parameters(self, dimensions: List[dict]):
387        hide_sampling = "yes"
388
389        for dim in dimensions:
390            if dim["name"] == "sample_id":
391                hide_sampling = "no"
392                break
393
394        return [
395            {
396                "name": "aggregate_metrics_by",
397                "label": "Aggregate Client Metrics Per",
398                "type": "unquoted",
399                "default_value": "day",
400                "allowed_values": [
401                    {"label": "Per Day", "value": "day"},
402                    {"label": "Per Week", "value": "week"},
403                    {"label": "Per Month", "value": "month"},
404                    {"label": "Per Quarter", "value": "quarter"},
405                    {"label": "Per Year", "value": "year"},
406                    {"label": "Overall", "value": "overall"},
407                ],
408            },
409            {
410                "name": "sampling",
411                "label": "Sample of source data in %",
412                "type": "unquoted",
413                "default_value": "100",
414                "hidden": hide_sampling,
415            },
416        ]
417
418    def get_measures(
419        self, dimensions: List[dict]
420    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
421        """Get statistics as measures."""
422        measures = []
423        sampling = "1"
424
425        for dim in dimensions:
426            if dim["name"] == "sample_id":
427                sampling = "100 / {% parameter sampling %}"
428                break
429
430        for dimension in dimensions:
431            metric = MetricsConfigLoader.configs.get_metric_definition(
432                dimension["name"], self.namespace
433            )
434            if metric and metric.statistics:
435                for statistic_slug, statistic_conf in metric.statistics.items():
436                    dimension_label = dimension.get("label") or dimension.get("name")
437                    if statistic_slug in [
438                        "average",
439                        "max",
440                        "min",
441                        "median",
442                    ]:
443                        measures.append(
444                            {
445                                "name": f"{dimension['name']}_{statistic_slug}",
446                                "type": statistic_slug,
447                                "sql": "${TABLE}." + dimension["name"],
448                                "label": f"{dimension_label} {statistic_slug.title()}",
449                                "group_label": "Statistics",
450                                "description": f"{statistic_slug.title()} of {dimension_label}",
451                            }
452                        )
453                    elif statistic_slug == "sum":
454                        measures.append(
455                            {
456                                "name": f"{dimension['name']}_{statistic_slug}",
457                                "type": "sum",
458                                "sql": "${TABLE}." + dimension["name"] + "*" + sampling,
459                                "label": f"{dimension_label} Sum",
460                                "group_label": "Statistics",
461                                "description": f"Sum of {dimension_label}",
462                            }
463                        )
464                    elif statistic_slug == "client_count":
465                        measures.append(
466                            {
467                                "name": (
468                                    f"{dimension['name']}_{statistic_slug}_sampled"
469                                    if sampling
470                                    else f"{dimension['name']}_{statistic_slug}"
471                                ),
472                                "type": "count_distinct",
473                                "label": f"{dimension_label} Client Count",
474                                "group_label": "Statistics",
475                                "sql": "IF(${TABLE}."
476                                + f"{dimension['name']} > 0, "
477                                + "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))",
478                                "description": f"Number of clients with {dimension_label}",
479                                "hidden": "yes" if sampling else "no",
480                            }
481                        )
482
483                        if sampling:
484                            measures.append(
485                                {
486                                    "name": f"{dimension['name']}_{statistic_slug}",
487                                    "type": "number",
488                                    "label": f"{dimension_label} Client Count",
489                                    "group_label": "Statistics",
490                                    "sql": "${"
491                                    + f"{dimension['name']}_{statistic_slug}_sampled"
492                                    + "} *"
493                                    + sampling,
494                                    "description": f"Number of clients with {dimension_label}",
495                                }
496                            )
497                    elif statistic_slug == "dau_proportion":
498                        if "numerator" in statistic_conf:
499                            [numerator, numerator_stat] = statistic_conf[
500                                "numerator"
501                            ].split(".")
502                            measures.append(
503                                {
504                                    "name": "DAU_sampled" if sampling else "DAU",
505                                    "type": "count_distinct",
506                                    "label": "DAU",
507                                    "group_label": "Statistics",
508                                    "sql": "${TABLE}.client_id",
509                                    "hidden": "yes",
510                                }
511                            )
512
513                            if sampling:
514                                measures.append(
515                                    {
516                                        "name": "DAU",
517                                        "type": "number",
518                                        "label": "DAU",
519                                        "group_label": "Statistics",
520                                        "sql": "${DAU_sampled} *" + sampling,
521                                        "hidden": "yes",
522                                    }
523                                )
524
525                            measures.append(
526                                {
527                                    "name": f"{dimension['name']}_{statistic_slug}",
528                                    "type": "number",
529                                    "label": f"{dimension_label} DAU Proportion",
530                                    "sql": "SAFE_DIVIDE(${"
531                                    + f"{numerator}_{numerator_stat}"
532                                    + "}, ${DAU})",
533                                    "group_label": "Statistics",
534                                    "description": f"Proportion of daily active users with {dimension['name']}",
535                                }
536                            )
537                    elif statistic_slug == "ratio":
538                        if (
539                            "numerator" in statistic_conf
540                            and "denominator" in statistic_conf
541                        ):
542                            [numerator, numerator_stat] = statistic_conf[
543                                "numerator"
544                            ].split(".")
545                            [denominator, denominator_stat] = statistic_conf[
546                                "denominator"
547                            ].split(".")
548
549                            measures.append(
550                                {
551                                    "name": f"{dimension['name']}_{statistic_slug}",
552                                    "type": "number",
553                                    "label": f"{dimension_label} Ratio",
554                                    "sql": "SAFE_DIVIDE(${"
555                                    + f"{numerator}_{numerator_stat}"
556                                    + "}, ${"
557                                    + f"{denominator}_{denominator_stat}"
558                                    + "})",
559                                    "group_label": "Statistics",
560                                    "description": f""""
561                                        Ratio between {statistic_conf['numerator']} and
562                                        {statistic_conf['denominator']}""",
563                                }
564                            )
565                    elif statistic_slug == "rolling_average":
566                        aggregation = statistic_conf.get("aggregation", "sum")
567                        if "window_sizes" in statistic_conf:
568                            for window_size in statistic_conf["window_sizes"]:
569                                measures.append(
570                                    {
571                                        "name": f"{dimension['name']}_{window_size}_day_{statistic_slug}",
572                                        "type": "number",
573                                        "label": f"{dimension_label} {window_size} Day Rolling Average",
574                                        "sql": f"""
575                                            AVG({aggregation}(${{TABLE}}.{dimension["name"]} * {sampling})) OVER (
576                                                ROWS {window_size} PRECEDING
577                                        )""",
578                                        "group_label": "Statistics",
579                                        "description": f"{window_size} day rolling average of {dimension_label}",
580                                    }
581                                )
582
583        return measures

A view for metric-hub metrics that come from the same data source.

MetricDefinitionsView(namespace: str, name: str, tables: List[Dict[str, str]])
20    def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]):
21        """Get an instance of an MetricDefinitionsView."""
22        super().__init__(namespace, name, MetricDefinitionsView.type, tables)

Get an instance of an MetricDefinitionsView.

type: str = 'metric_definitions_view'
@classmethod
def from_db_views( klass, namespace: str, is_glean: bool, channels: List[Dict[str, str]], db_views: dict) -> Iterator[MetricDefinitionsView]:
24    @classmethod
25    def from_db_views(
26        klass,
27        namespace: str,
28        is_glean: bool,
29        channels: List[Dict[str, str]],
30        db_views: dict,
31    ) -> Iterator[MetricDefinitionsView]:
32        """Get Metric Definition Views from db views and app variants."""
33        return iter(())

Get Metric Definition Views from db views and app variants.

@classmethod
def from_dict( klass, namespace: str, name: str, definition: generator.views.view.ViewDict) -> MetricDefinitionsView:
35    @classmethod
36    def from_dict(
37        klass, namespace: str, name: str, definition: ViewDict
38    ) -> MetricDefinitionsView:
39        """Get a MetricDefinitionsView from a dict representation."""
40        return klass(namespace, name, definition.get("tables", []))

Get a MetricDefinitionsView from a dict representation.

def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 42    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 43        """Get this view as LookML."""
 44        namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
 45            self.namespace
 46        )
 47        if namespace_definitions is None:
 48            return {}
 49
 50        # get all metric definitions that depend on the data source represented by this view
 51        data_source_name = re.sub("^metric_definitions_", "", self.name)
 52        data_source_definition = MetricsConfigLoader.configs.get_data_source_definition(
 53            data_source_name, self.namespace
 54        )
 55
 56        if data_source_definition is None:
 57            return {}
 58
 59        # todo: hide deprecated metrics?
 60        metric_definitions = [
 61            f"""{
 62                MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render()
 63            } AS {metric_slug},\n"""
 64            for metric_slug, metric in namespace_definitions.metrics.definitions.items()
 65            if metric.select_expression
 66            and metric.data_source.name == data_source_name
 67            and metric.type != "histogram"
 68        ]
 69
 70        if metric_definitions == []:
 71            return {}
 72
 73        # Metric definitions are intended to aggregated by client per date.
 74        # A derived table is needed to do these aggregations, instead of defining them as measures
 75        # we want to have them available as dimensions (which don't allow aggregations in their definitions)
 76        # to allow for custom measures to be later defined in Looker that aggregate these per client metrics.
 77        view_defn: Dict[str, Any] = {"name": self.name}
 78
 79        ignore_base_fields = [
 80            "client_id",
 81            "submission_date",
 82            "submission",
 83            "first_run",
 84        ] + [
 85            metric_slug
 86            for metric_slug, metric in namespace_definitions.metrics.definitions.items()
 87            if metric.select_expression
 88            and metric.data_source.name == data_source_name
 89            and metric.type != "histogram"
 90        ]
 91
 92        base_view_dimensions = {}
 93        joined_data_sources = []
 94
 95        # check if the metric data source has joins
 96        # joined data sources are generally used for creating the "Base Fields"
 97        if data_source_definition.joins:
 98            # determine the dimensions selected by the joined data sources
 99            for joined_data_source_slug, join in data_source_definition.joins.items():
100                joined_data_source = (
101                    MetricsConfigLoader.configs.get_data_source_definition(
102                        joined_data_source_slug, self.namespace
103                    )
104                )
105
106                if joined_data_source.columns_as_dimensions:
107                    joined_data_sources.append(joined_data_source)
108
109                    date_filter = None
110                    if joined_data_source.submission_date_column != "NULL":
111                        date_filter = (
112                            None
113                            if joined_data_source.submission_date_column is None
114                            or joined_data_source.submission_date_column == "NULL"
115                            else f"{joined_data_source.submission_date_column} = '2023-01-01'"
116                        )
117
118                    # create Looker dimensions by doing a dryrun
119                    query = MetricsConfigLoader.configs.get_data_source_sql(
120                        joined_data_source_slug,
121                        self.namespace,
122                        where=date_filter,
123                    ).format(dataset=self.namespace)
124
125                    base_view_dimensions[joined_data_source_slug] = (
126                        lookml_utils._generate_dimensions_from_query(
127                            query, dryrun=dryrun
128                        )
129                    )
130
131        if (
132            data_source_definition.client_id_column == "NULL"
133            and not base_view_dimensions
134        ) or data_source_definition.columns_as_dimensions:
135            # if the metrics data source doesn't have any joins then use the dimensions
136            # of the data source itself as base fields
137            date_filter = None
138            if data_source_definition.submission_date_column != "NULL":
139                date_filter = (
140                    "submission_date = '2023-01-01'"
141                    if data_source_definition.submission_date_column is None
142                    else f"{data_source_definition.submission_date_column} = '2023-01-01'"
143                )
144
145            query = MetricsConfigLoader.configs.get_data_source_sql(
146                data_source_definition.name,
147                self.namespace,
148                where=date_filter,
149                ignore_joins=True,
150            ).format(dataset=self.namespace)
151
152            base_view_dimensions[data_source_definition.name] = (
153                lookml_utils._generate_dimensions_from_query(query, dryrun)
154            )
155
156        # to prevent duplicate dimensions, especially when working with time dimensions
157        # where names are modified potentially causing naming collisions
158        seen_dimensions = set()
159        # prepare base field data for query
160        base_view_fields = []
161        for data_source, dimensions in base_view_dimensions.items():
162            for dimension in dimensions:
163                if (
164                    dimension["name"] not in ignore_base_fields
165                    and dimension["name"] not in seen_dimensions
166                    and "hidden" not in dimension
167                ):
168                    sql = (
169                        f"{data_source}.{dimension['name'].replace('__', '.')} AS"
170                        + f" {data_source}_{dimension['name']},\n"
171                    )
172                    # date/time/timestamp suffixes are removed when generating lookml dimensions, however we
173                    # need the original field name for the derived view SQL
174                    if dimension["type"] == "time" and not dimension["sql"].endswith(
175                        dimension["name"]
176                    ):
177                        suffix = dimension["sql"].split(
178                            dimension["name"].replace("__", ".")
179                        )[-1]
180                        sql = (
181                            f"{data_source}.{(dimension['name']+suffix).replace('__', '.')} AS"
182                            + f" {data_source}_{dimension['name']},\n"
183                        )
184
185                    base_view_fields.append(
186                        {
187                            "name": f"{data_source}_{dimension['name']}",
188                            "select_sql": f"{data_source}_{dimension['name']},\n",
189                            "sql": sql,
190                        }
191                    )
192                    seen_dimensions.add(dimension["name"])
193
194        client_id_field = (
195            "NULL"
196            if data_source_definition.client_id_column == "NULL"
197            else f'{data_source_definition.client_id_column or "client_id"}'
198        )
199
200        # filters for date ranges
201        where_sql = " AND ".join(
202            [
203                f"""
204                    {data_source.name}.{data_source.submission_date_column or "submission_date"}
205                    BETWEEN
206                    COALESCE(
207                        SAFE_CAST(
208                            {{% date_start submission_date %}} AS DATE
209                        ), CURRENT_DATE()) AND
210                    COALESCE(
211                        SAFE_CAST(
212                            {{% date_end submission_date %}} AS DATE
213                        ), CURRENT_DATE())
214                """
215                for data_source in [data_source_definition] + joined_data_sources
216                if data_source.submission_date_column != "NULL"
217            ]
218        )
219
220        # filte on sample_id if such a field exists
221        for field in base_view_fields:
222            if field["name"].endswith("_sample_id"):
223                where_sql += f"""
224                    AND
225                        {field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}}
226                """
227                break
228
229        view_defn["derived_table"] = {
230            "sql": f"""
231            SELECT
232                {"".join(metric_definitions)}
233                {"".join([field['select_sql'] for field in base_view_fields])}
234                {client_id_field} AS client_id,
235                {{% if aggregate_metrics_by._parameter_value == 'day' %}}
236                {data_source_definition.submission_date_column or "submission_date"} AS analysis_basis
237                {{% elsif aggregate_metrics_by._parameter_value == 'week'  %}}
238                (FORMAT_DATE(
239                    '%F',
240                    DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
241                    WEEK(MONDAY)))
242                ) AS analysis_basis
243                {{% elsif aggregate_metrics_by._parameter_value == 'month'  %}}
244                (FORMAT_DATE(
245                    '%Y-%m',
246                    {data_source_definition.submission_date_column or "submission_date"})
247                ) AS analysis_basis
248                {{% elsif aggregate_metrics_by._parameter_value == 'quarter'  %}}
249                (FORMAT_DATE(
250                    '%Y-%m',
251                    DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
252                    QUARTER))
253                ) AS analysis_basis
254                {{% elsif aggregate_metrics_by._parameter_value == 'year'  %}}
255                (EXTRACT(
256                    YEAR FROM {data_source_definition.submission_date_column or "submission_date"})
257                ) AS analysis_basis
258                {{% else %}}
259                NULL as analysis_basis
260                {{% endif %}}
261            FROM
262                (
263                    SELECT
264                        {data_source_name}.*,
265                        {"".join([field['sql'] for field in base_view_fields])}
266                    FROM
267                    {
268                        MetricsConfigLoader.configs.get_data_source_sql(
269                            data_source_name,
270                            self.namespace,
271                            select_fields=False
272                        ).format(dataset=self.namespace)
273                    }
274                    WHERE {where_sql}
275                )
276            GROUP BY
277                {"".join([field['select_sql'] for field in base_view_fields])}
278                client_id,
279                analysis_basis
280            """
281        }
282
283        view_defn["dimensions"] = self.get_dimensions()
284        view_defn["dimension_groups"] = self.get_dimension_groups()
285
286        # add the Looker dimensions
287        for data_source, dimensions in base_view_dimensions.items():
288            for dimension in dimensions:
289                if dimension["name"] not in ignore_base_fields:
290                    dimension["sql"] = (
291                        "${TABLE}." + f"{data_source}_{dimension['name']}"
292                    )
293                    dimension["group_label"] = "Base Fields"
294                    if not lookml_utils._is_dimension_group(dimension):
295                        view_defn["dimensions"].append(dimension)
296                    else:
297                        view_defn["dimension_groups"].append(dimension)
298                    # avoid duplicate dimensions
299                    ignore_base_fields.append(dimension["name"])
300
301        view_defn["measures"] = self.get_measures(
302            view_defn["dimensions"],
303        )
304        view_defn["sets"] = self._get_sets()
305        view_defn["parameters"] = self._get_parameters(view_defn["dimensions"])
306
307        return {"views": [view_defn]}

Get this view as LookML.

def get_dimensions( self, _table=None, _v1_name: Optional[str] = None, _dryrun=None) -> List[Dict[str, Any]]:
309    def get_dimensions(
310        self,
311        _table=None,
312        _v1_name: Optional[str] = None,
313        _dryrun=None,
314    ) -> List[Dict[str, Any]]:
315        """Get the set of dimensions for this view based on the metric definitions in metric-hub."""
316        namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
317            self.namespace
318        )
319        metric_definitions = namespace_definitions.metrics.definitions
320        data_source_name = re.sub("^metric_definitions_", "", self.name)
321
322        return [
323            {
324                "name": "client_id",
325                "type": "string",
326                "sql": "SAFE_CAST(${TABLE}.client_id AS STRING)",
327                "label": "Client ID",
328                "primary_key": "yes",
329                "group_label": "Base Fields",
330                "description": "Unique client identifier",
331            },
332        ] + [  # add a dimension for each metric definition
333            {
334                "name": metric_slug,
335                "group_label": "Metrics",
336                "label": metric.friendly_name
337                or lookml_utils.slug_to_title(metric_slug),
338                "description": metric.description or "",
339                "type": "number",
340                "sql": "${TABLE}." + metric_slug,
341            }
342            for metric_slug, metric in metric_definitions.items()
343            if metric.select_expression
344            and metric.data_source.name == data_source_name
345            and metric.type != "histogram"
346        ]

Get the set of dimensions for this view based on the metric definitions in metric-hub.

def get_dimension_groups(self) -> List[Dict[str, Any]]:
348    def get_dimension_groups(self) -> List[Dict[str, Any]]:
349        """Get dimension groups for this view."""
350        return [
351            {
352                "name": "submission",
353                "type": "time",
354                "group_label": "Base Fields",
355                "sql": "CAST(${TABLE}.analysis_basis AS TIMESTAMP)",
356                "label": "Submission",
357                "timeframes": [
358                    "raw",
359                    "date",
360                    "week",
361                    "month",
362                    "quarter",
363                    "year",
364                ],
365            }
366        ]

Get dimension groups for this view.

def get_measures( self, dimensions: List[dict]) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
418    def get_measures(
419        self, dimensions: List[dict]
420    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
421        """Get statistics as measures."""
422        measures = []
423        sampling = "1"
424
425        for dim in dimensions:
426            if dim["name"] == "sample_id":
427                sampling = "100 / {% parameter sampling %}"
428                break
429
430        for dimension in dimensions:
431            metric = MetricsConfigLoader.configs.get_metric_definition(
432                dimension["name"], self.namespace
433            )
434            if metric and metric.statistics:
435                for statistic_slug, statistic_conf in metric.statistics.items():
436                    dimension_label = dimension.get("label") or dimension.get("name")
437                    if statistic_slug in [
438                        "average",
439                        "max",
440                        "min",
441                        "median",
442                    ]:
443                        measures.append(
444                            {
445                                "name": f"{dimension['name']}_{statistic_slug}",
446                                "type": statistic_slug,
447                                "sql": "${TABLE}." + dimension["name"],
448                                "label": f"{dimension_label} {statistic_slug.title()}",
449                                "group_label": "Statistics",
450                                "description": f"{statistic_slug.title()} of {dimension_label}",
451                            }
452                        )
453                    elif statistic_slug == "sum":
454                        measures.append(
455                            {
456                                "name": f"{dimension['name']}_{statistic_slug}",
457                                "type": "sum",
458                                "sql": "${TABLE}." + dimension["name"] + "*" + sampling,
459                                "label": f"{dimension_label} Sum",
460                                "group_label": "Statistics",
461                                "description": f"Sum of {dimension_label}",
462                            }
463                        )
464                    elif statistic_slug == "client_count":
465                        measures.append(
466                            {
467                                "name": (
468                                    f"{dimension['name']}_{statistic_slug}_sampled"
469                                    if sampling
470                                    else f"{dimension['name']}_{statistic_slug}"
471                                ),
472                                "type": "count_distinct",
473                                "label": f"{dimension_label} Client Count",
474                                "group_label": "Statistics",
475                                "sql": "IF(${TABLE}."
476                                + f"{dimension['name']} > 0, "
477                                + "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))",
478                                "description": f"Number of clients with {dimension_label}",
479                                "hidden": "yes" if sampling else "no",
480                            }
481                        )
482
483                        if sampling:
484                            measures.append(
485                                {
486                                    "name": f"{dimension['name']}_{statistic_slug}",
487                                    "type": "number",
488                                    "label": f"{dimension_label} Client Count",
489                                    "group_label": "Statistics",
490                                    "sql": "${"
491                                    + f"{dimension['name']}_{statistic_slug}_sampled"
492                                    + "} *"
493                                    + sampling,
494                                    "description": f"Number of clients with {dimension_label}",
495                                }
496                            )
497                    elif statistic_slug == "dau_proportion":
498                        if "numerator" in statistic_conf:
499                            [numerator, numerator_stat] = statistic_conf[
500                                "numerator"
501                            ].split(".")
502                            measures.append(
503                                {
504                                    "name": "DAU_sampled" if sampling else "DAU",
505                                    "type": "count_distinct",
506                                    "label": "DAU",
507                                    "group_label": "Statistics",
508                                    "sql": "${TABLE}.client_id",
509                                    "hidden": "yes",
510                                }
511                            )
512
513                            if sampling:
514                                measures.append(
515                                    {
516                                        "name": "DAU",
517                                        "type": "number",
518                                        "label": "DAU",
519                                        "group_label": "Statistics",
520                                        "sql": "${DAU_sampled} *" + sampling,
521                                        "hidden": "yes",
522                                    }
523                                )
524
525                            measures.append(
526                                {
527                                    "name": f"{dimension['name']}_{statistic_slug}",
528                                    "type": "number",
529                                    "label": f"{dimension_label} DAU Proportion",
530                                    "sql": "SAFE_DIVIDE(${"
531                                    + f"{numerator}_{numerator_stat}"
532                                    + "}, ${DAU})",
533                                    "group_label": "Statistics",
534                                    "description": f"Proportion of daily active users with {dimension['name']}",
535                                }
536                            )
537                    elif statistic_slug == "ratio":
538                        if (
539                            "numerator" in statistic_conf
540                            and "denominator" in statistic_conf
541                        ):
542                            [numerator, numerator_stat] = statistic_conf[
543                                "numerator"
544                            ].split(".")
545                            [denominator, denominator_stat] = statistic_conf[
546                                "denominator"
547                            ].split(".")
548
549                            measures.append(
550                                {
551                                    "name": f"{dimension['name']}_{statistic_slug}",
552                                    "type": "number",
553                                    "label": f"{dimension_label} Ratio",
554                                    "sql": "SAFE_DIVIDE(${"
555                                    + f"{numerator}_{numerator_stat}"
556                                    + "}, ${"
557                                    + f"{denominator}_{denominator_stat}"
558                                    + "})",
559                                    "group_label": "Statistics",
560                                    "description": f""""
561                                        Ratio between {statistic_conf['numerator']} and
562                                        {statistic_conf['denominator']}""",
563                                }
564                            )
565                    elif statistic_slug == "rolling_average":
566                        aggregation = statistic_conf.get("aggregation", "sum")
567                        if "window_sizes" in statistic_conf:
568                            for window_size in statistic_conf["window_sizes"]:
569                                measures.append(
570                                    {
571                                        "name": f"{dimension['name']}_{window_size}_day_{statistic_slug}",
572                                        "type": "number",
573                                        "label": f"{dimension_label} {window_size} Day Rolling Average",
574                                        "sql": f"""
575                                            AVG({aggregation}(${{TABLE}}.{dimension["name"]} * {sampling})) OVER (
576                                                ROWS {window_size} PRECEDING
577                                        )""",
578                                        "group_label": "Statistics",
579                                        "description": f"{window_size} day rolling average of {dimension_label}",
580                                    }
581                                )
582
583        return measures

Get statistics as measures.