generator.views.metric_definitions_view

Class to describe a view with metrics from metric-hub.

  1"""Class to describe a view with metrics from metric-hub."""
  2
  3from __future__ import annotations
  4
  5import re
  6from typing import Any, Dict, Iterator, List, Optional, Union
  7
  8from generator.metrics_utils import MetricsConfigLoader
  9
 10from . import lookml_utils
 11from .view import View, ViewDict
 12
 13
 14class MetricDefinitionsView(View):
 15    """A view for metric-hub metrics that come from the same data source."""
 16
 17    type: str = "metric_definitions_view"
 18
 19    # Time unit divisors for converting days to different granularities
 20    TIME_UNITS = [
 21        ("date", 1),  # Daily: no conversion needed
 22        ("week", 7),  # Weekly: divide by 7
 23        ("month", 30),  # Monthly: approximate 30 days per month
 24        ("quarter", 90),  # Quarterly: approximate 90 days per quarter
 25        ("year", 365),  # Yearly: approximate 365 days per year
 26    ]
 27
 28    def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]):
 29        """Get an instance of an MetricDefinitionsView."""
 30        super().__init__(namespace, name, MetricDefinitionsView.type, tables)
 31
 32    @classmethod
 33    def from_db_views(
 34        klass,
 35        namespace: str,
 36        is_glean: bool,
 37        channels: List[Dict[str, str]],
 38        db_views: dict,
 39    ) -> Iterator[MetricDefinitionsView]:
 40        """Get Metric Definition Views from db views and app variants."""
 41        return iter(())
 42
 43    @classmethod
 44    def from_dict(
 45        klass, namespace: str, name: str, definition: ViewDict
 46    ) -> MetricDefinitionsView:
 47        """Get a MetricDefinitionsView from a dict representation."""
 48        return klass(namespace, name, definition.get("tables", []))
 49
 50    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 51        """Get this view as LookML."""
 52        namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
 53            self.namespace
 54        )
 55        if namespace_definitions is None:
 56            return {}
 57
 58        # get all metric definitions that depend on the data source represented by this view
 59        data_source_name = re.sub("^metric_definitions_", "", self.name)
 60        data_source_definition = MetricsConfigLoader.configs.get_data_source_definition(
 61            data_source_name, self.namespace
 62        )
 63
 64        if data_source_definition is None:
 65            return {}
 66
 67        # todo: hide deprecated metrics?
 68        metric_definitions = [
 69            f"""{
 70                MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render()
 71            } AS {metric_slug},\n"""
 72            for metric_slug, metric in namespace_definitions.metrics.definitions.items()
 73            if metric.select_expression
 74            and metric.data_source.name == data_source_name
 75            and metric.type != "histogram"
 76        ]
 77
 78        if metric_definitions == []:
 79            return {}
 80
 81        # Metric definitions are intended to aggregated by client per date.
 82        # A derived table is needed to do these aggregations, instead of defining them as measures
 83        # we want to have them available as dimensions (which don't allow aggregations in their definitions)
 84        # to allow for custom measures to be later defined in Looker that aggregate these per client metrics.
 85        view_defn: Dict[str, Any] = {"name": self.name}
 86
 87        ignore_base_fields = [
 88            "client_id",
 89            "submission_date",
 90            "submission",
 91            "first_run",
 92        ] + [
 93            metric_slug
 94            for metric_slug, metric in namespace_definitions.metrics.definitions.items()
 95            if metric.select_expression
 96            and metric.data_source.name == data_source_name
 97            and metric.type != "histogram"
 98        ]
 99
100        base_view_dimensions = {}
101        joined_data_sources = []
102
103        # check if the metric data source has joins
104        # joined data sources are generally used for creating the "Base Fields"
105        if data_source_definition.joins:
106            # determine the dimensions selected by the joined data sources
107            for joined_data_source_slug, join in data_source_definition.joins.items():
108                joined_data_source = (
109                    MetricsConfigLoader.configs.get_data_source_definition(
110                        joined_data_source_slug, self.namespace
111                    )
112                )
113
114                if joined_data_source.columns_as_dimensions:
115                    joined_data_sources.append(joined_data_source)
116
117                    date_filter = None
118                    if joined_data_source.submission_date_column != "NULL":
119                        date_filter = (
120                            None
121                            if joined_data_source.submission_date_column is None
122                            or joined_data_source.submission_date_column == "NULL"
123                            else f"{joined_data_source.submission_date_column} = '2023-01-01'"
124                        )
125
126                    # create Looker dimensions by doing a dryrun
127                    query = MetricsConfigLoader.configs.get_data_source_sql(
128                        joined_data_source_slug,
129                        self.namespace,
130                        where=date_filter,
131                    ).format(dataset=self.namespace)
132
133                    base_view_dimensions[joined_data_source_slug] = (
134                        lookml_utils._generate_dimensions_from_query(
135                            query, dryrun=dryrun
136                        )
137                    )
138
139        if (
140            data_source_definition.client_id_column == "NULL"
141            and not base_view_dimensions
142        ) or data_source_definition.columns_as_dimensions:
143            # if the metrics data source doesn't have any joins then use the dimensions
144            # of the data source itself as base fields
145            date_filter = None
146            if data_source_definition.submission_date_column != "NULL":
147                date_filter = (
148                    "submission_date = '2023-01-01'"
149                    if data_source_definition.submission_date_column is None
150                    else f"{data_source_definition.submission_date_column} = '2023-01-01'"
151                )
152
153            query = MetricsConfigLoader.configs.get_data_source_sql(
154                data_source_definition.name,
155                self.namespace,
156                where=date_filter,
157                ignore_joins=True,
158            ).format(dataset=self.namespace)
159
160            base_view_dimensions[data_source_definition.name] = (
161                lookml_utils._generate_dimensions_from_query(query, dryrun)
162            )
163
164        # to prevent duplicate dimensions, especially when working with time dimensions
165        # where names are modified potentially causing naming collisions
166        seen_dimensions = set()
167        # prepare base field data for query
168        base_view_fields = []
169        for data_source, dimensions in base_view_dimensions.items():
170            for dimension in dimensions:
171                if (
172                    dimension["name"] not in ignore_base_fields
173                    and dimension["name"] not in seen_dimensions
174                    and "hidden" not in dimension
175                ):
176                    sql = (
177                        f"{data_source}.{dimension['name'].replace('__', '.')} AS"
178                        + f" {data_source}_{dimension['name']},\n"
179                    )
180                    # date/time/timestamp suffixes are removed when generating lookml dimensions, however we
181                    # need the original field name for the derived view SQL
182                    if dimension["type"] == "time" and not dimension["sql"].endswith(
183                        dimension["name"]
184                    ):
185                        suffix = dimension["sql"].split(
186                            dimension["name"].replace("__", ".")
187                        )[-1]
188                        sql = (
189                            f"{data_source}.{(dimension['name']+suffix).replace('__', '.')} AS"
190                            + f" {data_source}_{dimension['name']},\n"
191                        )
192
193                    base_view_fields.append(
194                        {
195                            "name": f"{data_source}_{dimension['name']}",
196                            "select_sql": f"{data_source}_{dimension['name']},\n",
197                            "sql": sql,
198                        }
199                    )
200                    seen_dimensions.add(dimension["name"])
201
202        client_id_field = (
203            "NULL"
204            if data_source_definition.client_id_column == "NULL"
205            else f'{data_source_definition.client_id_column or "client_id"}'
206        )
207
208        # filters for date ranges
209        where_sql = " AND ".join(
210            [
211                f"""
212                    {data_source.name}.{data_source.submission_date_column or "submission_date"}
213                    {{% if analysis_period._is_filtered %}}
214                    BETWEEN
215                    DATE_SUB(
216                        COALESCE(
217                            SAFE_CAST(
218                                {{% date_start analysis_period %}} AS DATE
219                            ), CURRENT_DATE()),
220                        INTERVAL {{% parameter lookback_days %}} DAY
221                    ) AND
222                    COALESCE(
223                        SAFE_CAST(
224                            {{% date_end analysis_period %}} AS DATE
225                        ), CURRENT_DATE())
226                    {{% else %}}
227                    BETWEEN
228                    DATE_SUB(
229                        COALESCE(
230                            SAFE_CAST(
231                                {{% date_start submission_date %}} AS DATE
232                            ), CURRENT_DATE()),
233                        INTERVAL {{% parameter lookback_days %}} DAY
234                    ) AND
235                    COALESCE(
236                        SAFE_CAST(
237                            {{% date_end submission_date %}} AS DATE
238                        ), CURRENT_DATE())
239                    {{% endif %}}
240                """
241                for data_source in [data_source_definition] + joined_data_sources
242                if data_source.submission_date_column != "NULL"
243            ]
244        )
245
246        # filte on sample_id if such a field exists
247        for field in base_view_fields:
248            if field["name"].endswith("_sample_id"):
249                where_sql += f"""
250                    AND
251                        {field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}}
252                """
253                break
254
255        view_defn["derived_table"] = {
256            "sql": f"""
257            SELECT
258                {"".join(metric_definitions)}
259                {"".join([field['select_sql'] for field in base_view_fields])}
260                {client_id_field} AS client_id,
261                {{% if aggregate_metrics_by._parameter_value == 'day' %}}
262                {data_source_definition.submission_date_column or "submission_date"} AS analysis_basis
263                {{% elsif aggregate_metrics_by._parameter_value == 'week'  %}}
264                (FORMAT_DATE(
265                    '%F',
266                    DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
267                    WEEK(MONDAY)))
268                ) AS analysis_basis
269                {{% elsif aggregate_metrics_by._parameter_value == 'month'  %}}
270                (FORMAT_DATE(
271                    '%Y-%m',
272                    {data_source_definition.submission_date_column or "submission_date"})
273                ) AS analysis_basis
274                {{% elsif aggregate_metrics_by._parameter_value == 'quarter'  %}}
275                (FORMAT_DATE(
276                    '%Y-%m',
277                    DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
278                    QUARTER))
279                ) AS analysis_basis
280                {{% elsif aggregate_metrics_by._parameter_value == 'year'  %}}
281                (EXTRACT(
282                    YEAR FROM {data_source_definition.submission_date_column or "submission_date"})
283                ) AS analysis_basis
284                {{% else %}}
285                NULL as analysis_basis
286                {{% endif %}}
287            FROM
288                (
289                    SELECT
290                        {data_source_name}.*,
291                        {"".join([field['sql'] for field in base_view_fields])}
292                    FROM
293                    {
294                        MetricsConfigLoader.configs.get_data_source_sql(
295                            data_source_name,
296                            self.namespace,
297                            select_fields=False
298                        ).format(dataset=self.namespace)
299                    }
300                    WHERE {where_sql}
301                )
302            GROUP BY
303                {"".join([field['select_sql'] for field in base_view_fields])}
304                client_id,
305                analysis_basis
306            """
307        }
308
309        view_defn["dimensions"] = self.get_dimensions()
310        view_defn["dimension_groups"] = self.get_dimension_groups()
311
312        # add the Looker dimensions
313        for data_source, dimensions in base_view_dimensions.items():
314            for dimension in dimensions:
315                if dimension["name"] not in ignore_base_fields:
316                    dimension["sql"] = (
317                        "${TABLE}." + f"{data_source}_{dimension['name']}"
318                    )
319                    dimension["group_label"] = "Base Fields"
320                    if not lookml_utils._is_dimension_group(dimension):
321                        view_defn["dimensions"].append(dimension)
322                    else:
323                        view_defn["dimension_groups"].append(dimension)
324                    # avoid duplicate dimensions
325                    ignore_base_fields.append(dimension["name"])
326
327        view_defn["measures"] = self.get_measures(
328            view_defn["dimensions"],
329        )
330        view_defn["sets"] = self._get_sets()
331        view_defn["parameters"] = self._get_parameters(view_defn["dimensions"])
332        view_defn["filters"] = self._get_filters()
333
334        return {"views": [view_defn]}
335
336    def get_dimensions(
337        self,
338        _table=None,
339        _v1_name: Optional[str] = None,
340        _dryrun=None,
341    ) -> List[Dict[str, Any]]:
342        """Get the set of dimensions for this view based on the metric definitions in metric-hub."""
343        namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
344            self.namespace
345        )
346        metric_definitions = namespace_definitions.metrics.definitions
347        data_source_name = re.sub("^metric_definitions_", "", self.name)
348
349        return [
350            {
351                "name": "client_id",
352                "type": "string",
353                "sql": "SAFE_CAST(${TABLE}.client_id AS STRING)",
354                "label": "Client ID",
355                "primary_key": "yes",
356                "group_label": "Base Fields",
357                "description": "Unique client identifier",
358            },
359        ] + [  # add a dimension for each metric definition
360            {
361                "name": metric_slug,
362                "group_label": "Metrics",
363                "label": metric.friendly_name
364                or lookml_utils.slug_to_title(metric_slug),
365                "description": metric.description or "",
366                "type": "number",
367                "sql": "${TABLE}." + metric_slug,
368            }
369            for metric_slug, metric in metric_definitions.items()
370            if metric.select_expression
371            and metric.data_source.name == data_source_name
372            and metric.type != "histogram"
373        ]
374
375    def get_dimension_groups(self) -> List[Dict[str, Any]]:
376        """Get dimension groups for this view."""
377        return [
378            {
379                "name": "submission",
380                "type": "time",
381                "datatype": "date",
382                "group_label": "Base Fields",
383                "sql": "${TABLE}.analysis_basis",
384                "label": "Submission",
385                "timeframes": [
386                    "raw",
387                    "date",
388                    "week",
389                    "month",
390                    "quarter",
391                    "year",
392                ],
393            }
394        ]
395
396    def _get_sets(self) -> List[Dict[str, Any]]:
397        """Generate metric sets."""
398        # group all the metric dimensions into a set
399        dimensions = self.get_dimensions()
400        measures = self.get_measures(dimensions)
401
402        return [
403            {
404                "name": "metrics",
405                "fields": [
406                    dimension["name"]
407                    for dimension in dimensions
408                    if dimension["name"] != "client_id"
409                ]
410                + [measure["name"] for measure in measures],
411            }
412        ]
413
414    def _get_parameters(self, dimensions: List[dict]):
415        hide_sampling = "yes"
416
417        for dim in dimensions:
418            if dim["name"] == "sample_id":
419                hide_sampling = "no"
420                break
421
422        return [
423            {
424                "name": "aggregate_metrics_by",
425                "label": "Aggregate Client Metrics Per",
426                "type": "unquoted",
427                "default_value": "day",
428                "allowed_values": [
429                    {"label": "Per Day", "value": "day"},
430                    {"label": "Per Week", "value": "week"},
431                    {"label": "Per Month", "value": "month"},
432                    {"label": "Per Quarter", "value": "quarter"},
433                    {"label": "Per Year", "value": "year"},
434                    {"label": "Overall", "value": "overall"},
435                ],
436            },
437            {
438                "name": "sampling",
439                "label": "Sample of source data in %",
440                "type": "unquoted",
441                "default_value": "100",
442                "hidden": hide_sampling,
443            },
444            {
445                "name": "lookback_days",
446                "label": "Lookback (Days)",
447                "type": "unquoted",
448                "description": "Number of days added before the filtered date range. "
449                + "Useful for period-over-period comparisons.",
450                "default_value": "0",
451            },
452            {
453                "name": "date_groupby_position",
454                "label": "Date Group By Position",
455                "type": "unquoted",
456                "description": "Position of the date field in the group by clause. "
457                + "Required when submission_week, submission_month, submission_quarter, submission_year "
458                + "is selected as BigQuery can't correctly resolve the GROUP BY otherwise",
459                "default_value": "",
460            },
461        ]
462
463    def _get_filters(self):
464        return [
465            {
466                "name": "analysis_period",
467                "type": "date",
468                "label": "Analysis Period (with Lookback)",
469                "description": "Use this filter to define the main analysis period. "
470                + "The results will include the selected date range plus any additional "
471                + "days specified by the 'Lookback days' setting.",
472            }
473        ]
474
475    def get_measures(
476        self, dimensions: List[dict]
477    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
478        """Get statistics as measures."""
479        measures = []
480        sampling = "1"
481
482        for dim in dimensions:
483            if dim["name"] == "sample_id":
484                sampling = "100 / {% parameter sampling %}"
485                break
486
487        for dimension in dimensions:
488            metric = MetricsConfigLoader.configs.get_metric_definition(
489                dimension["name"], self.namespace
490            )
491            if metric and metric.statistics:
492                for statistic_slug, statistic_conf in metric.statistics.items():
493                    dimension_label = dimension.get("label") or dimension.get("name")
494                    if statistic_slug in [
495                        "average",
496                        "max",
497                        "min",
498                        "median",
499                    ]:
500                        measures.append(
501                            {
502                                "name": f"{dimension['name']}_{statistic_slug}",
503                                "type": statistic_slug,
504                                "sql": "${TABLE}." + dimension["name"],
505                                "label": f"{dimension_label} {statistic_slug.title()}",
506                                "group_label": "Statistics",
507                                "description": f"{statistic_slug.title()} of {dimension_label}",
508                            }
509                        )
510                    elif statistic_slug == "sum":
511                        measures.append(
512                            {
513                                "name": f"{dimension['name']}_{statistic_slug}",
514                                "type": "sum",
515                                "sql": "${TABLE}." + dimension["name"] + "*" + sampling,
516                                "label": f"{dimension_label} Sum",
517                                "group_label": "Statistics",
518                                "description": f"Sum of {dimension_label}",
519                            }
520                        )
521                    elif statistic_slug == "client_count":
522                        measures.append(
523                            {
524                                "name": (
525                                    f"{dimension['name']}_{statistic_slug}_sampled"
526                                    if sampling
527                                    else f"{dimension['name']}_{statistic_slug}"
528                                ),
529                                "type": "count_distinct",
530                                "label": f"{dimension_label} Client Count",
531                                "group_label": "Statistics",
532                                "sql": "IF(${TABLE}."
533                                + f"{dimension['name']} > 0, "
534                                + "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))",
535                                "description": f"Number of clients with {dimension_label}",
536                                "hidden": "yes" if sampling else "no",
537                            }
538                        )
539
540                        if sampling:
541                            measures.append(
542                                {
543                                    "name": f"{dimension['name']}_{statistic_slug}",
544                                    "type": "number",
545                                    "label": f"{dimension_label} Client Count",
546                                    "group_label": "Statistics",
547                                    "sql": "${"
548                                    + f"{dimension['name']}_{statistic_slug}_sampled"
549                                    + "} *"
550                                    + sampling,
551                                    "description": f"Number of clients with {dimension_label}",
552                                }
553                            )
554                    elif statistic_slug == "dau_proportion":
555                        if "numerator" in statistic_conf:
556                            [numerator, numerator_stat] = statistic_conf[
557                                "numerator"
558                            ].split(".")
559                            measures.append(
560                                {
561                                    "name": "DAU_sampled" if sampling else "DAU",
562                                    "type": "count_distinct",
563                                    "label": "DAU",
564                                    "group_label": "Statistics",
565                                    "sql": "${TABLE}.client_id",
566                                    "hidden": "yes",
567                                }
568                            )
569
570                            if sampling:
571                                measures.append(
572                                    {
573                                        "name": "DAU",
574                                        "type": "number",
575                                        "label": "DAU",
576                                        "group_label": "Statistics",
577                                        "sql": "${DAU_sampled} *" + sampling,
578                                        "hidden": "yes",
579                                    }
580                                )
581
582                            measures.append(
583                                {
584                                    "name": f"{dimension['name']}_{statistic_slug}",
585                                    "type": "number",
586                                    "label": f"{dimension_label} DAU Proportion",
587                                    "sql": "SAFE_DIVIDE(${"
588                                    + f"{numerator}_{numerator_stat}"
589                                    + "}, ${DAU})",
590                                    "group_label": "Statistics",
591                                    "description": f"Proportion of daily active users with {dimension['name']}",
592                                }
593                            )
594                    elif statistic_slug == "ratio":
595                        if (
596                            "numerator" in statistic_conf
597                            and "denominator" in statistic_conf
598                        ):
599                            [numerator, numerator_stat] = statistic_conf[
600                                "numerator"
601                            ].split(".")
602                            [denominator, denominator_stat] = statistic_conf[
603                                "denominator"
604                            ].split(".")
605
606                            measures.append(
607                                {
608                                    "name": f"{dimension['name']}_{statistic_slug}",
609                                    "type": "number",
610                                    "label": f"{dimension_label} Ratio",
611                                    "sql": "SAFE_DIVIDE(${"
612                                    + f"{numerator}_{numerator_stat}"
613                                    + "}, ${"
614                                    + f"{denominator}_{denominator_stat}"
615                                    + "})",
616                                    "group_label": "Statistics",
617                                    "description": f""""
618                                        Ratio between {statistic_conf['numerator']} and
619                                        {statistic_conf['denominator']}""",
620                                }
621                            )
622                    elif statistic_slug == "rolling_average":
623                        # rolling averages are computed over existing statistics (e.g. sum, ratio)
624                        aggregations = statistic_conf.get("aggregations", ["sum"])
625                        for aggregation in aggregations:
626                            # find measures that match the current dimension and aggregation type
627                            matching_measures = [
628                                m
629                                for m in measures
630                                if m["name"].startswith(
631                                    f"{dimension['name']}_{aggregation}"
632                                )
633                            ]
634                            if "window_sizes" in statistic_conf:
635                                for window_size in statistic_conf["window_sizes"]:
636                                    for matching_measure in matching_measures:
637                                        # these statistics require some time dimension to be selected
638                                        measures.append(
639                                            {
640                                                "name": f"{dimension['name']}_{statistic_slug}_{aggregation}_{window_size}_day",
641                                                "type": "number",
642                                                "label": f"{matching_measure['label']} {window_size} Day Rolling Average",
643                                                "sql": f"""
644                                                    {{% if {self.name}.submission_date._is_selected or
645                                                        {self.name}.submission_week._is_selected or
646                                                        {self.name}.submission_month._is_selected or
647                                                        {self.name}.submission_quarter._is_selected or
648                                                        {self.name}.submission_year._is_selected %}}
649                                                    AVG(${{{matching_measure['name']}}}) OVER (
650                                                        {{% if date_groupby_position._parameter_value != "" %}}
651                                                        ORDER BY {{% parameter date_groupby_position %}}
652                                                        {{% elsif {self.name}.submission_date._is_selected %}}
653                                                        ORDER BY ${{TABLE}}.analysis_basis
654                                                        {{% else %}}
655                                                        ERROR("date_groupby_position needs to be set when using submission_week,
656                                                        submission_month, submission_quarter, or submission_year")
657                                                        {{% endif %}}
658                                                        ROWS BETWEEN {window_size} PRECEDING AND CURRENT ROW
659                                                    {{% else %}}
660                                                    ERROR('Please select a "submission_*" field to compute the rolling average')
661                                                    {{% endif %}}
662                                                )""",
663                                                "group_label": "Statistics",
664                                                "description": f"{window_size} day rolling average of {dimension_label}",
665                                            }
666                                        )
667
668                    # period-over-period measures compare current values with historical values
669                    if "period_over_period" in statistic_conf:
670                        # find all statistics that have period-over-period configured
671                        matching_measures = [
672                            m
673                            for m in measures
674                            if m["name"].startswith(
675                                f"{dimension['name']}_{statistic_slug}"
676                            )
677                            and "_period_over_period_" not in m["name"]
678                        ]
679
680                        # create period-over-period measures for each configured time period
681                        for period in statistic_conf["period_over_period"].get(
682                            "periods", []
683                        ):
684                            for matching_measure in matching_measures:
685                                original_sql = matching_measure["sql"]
686
687                                # rolling averages need special handling to adjust window sizes
688                                # based on the selected time granularity
689                                if statistic_slug == "rolling_average":
690                                    sql = self._create_rolling_average_period_sql(
691                                        original_sql, period
692                                    )
693                                else:
694                                    # standard measures use LAG function with time-adjusted periods
695                                    sql = self._create_lag_period_sql(
696                                        matching_measure, period
697                                    )
698
699                                # generate different types of period-over-period comparisons
700                                for kind in statistic_conf["period_over_period"].get(
701                                    "kinds", ["previous"]
702                                ):
703                                    if kind == "difference":
704                                        comparison_sql = f"({original_sql}) - ({sql})"
705                                    elif kind == "relative_change":
706                                        comparison_sql = f"SAFE_DIVIDE(({original_sql}), NULLIF(({sql}), 0)) - 1"
707                                    else:
708                                        comparison_sql = sql
709
710                                    measures.append(
711                                        {
712                                            "name": f"{matching_measure['name']}_{period}_day_period_over_period_{kind}",
713                                            "type": "number",
714                                            "label": f"{matching_measure['label']} "
715                                            + f"{period} Day Period Over Period {kind.capitalize()}",
716                                            "description": f"Period over period {kind.capitalize()} of "
717                                            + f"{matching_measure['label']} over {period} days",
718                                            "group_label": "Statistics",
719                                            "sql": comparison_sql,
720                                        }
721                                    )
722
723        return measures
724
725    def _create_rolling_average_period_sql(self, original_sql: str, period: int) -> str:
726        """
727        Create period-over-period SQL for rolling average measures.
728
729        Rolling averages require adjusting the window size based on the selected time granularity.
730        """
731        rows_match = re.search(
732            r"ROWS BETWEEN (\d+) PRECEDING AND CURRENT ROW",
733            original_sql,
734        )
735
736        if not rows_match:
737            return original_sql
738
739        original_window_size = int(rows_match.group(1))
740        time_conditions = []
741
742        for unit, divisor in self.TIME_UNITS:
743            # calculate adjusted window size for this time granularity
744            adjusted_window = (
745                (original_window_size + period) // divisor
746                if unit != "date"
747                else original_window_size + period
748            )
749
750            condition = (
751                f"{{% {'if' if unit == 'date' else 'elsif'} "
752                + f"{self.name}.submission_{unit}._is_selected %}}"
753            )
754
755            # modify the ROWS clause to extend the window by the period
756            modified_sql = re.sub(
757                r"ROWS BETWEEN \d+ PRECEDING AND CURRENT ROW",
758                f"ROWS BETWEEN {adjusted_window} PRECEDING AND "
759                + f"{adjusted_window - original_window_size} PRECEDING",
760                original_sql,
761            )
762            time_conditions.append(f"{condition}\n{modified_sql}")
763
764        return (
765            "\n".join(time_conditions)
766            + f"\n{{% else %}}\n{original_sql}\n{{% endif %}}"
767        )
768
769    def _create_lag_period_sql(self, matching_measure: dict, period: int) -> str:
770        """
771        Create period-over-period SQL using LAG function for standard measures.
772
773        LAG function looks back N periods to get historical values. The period is adjusted
774        based on the selected time granularity (daily, weekly, monthly, etc.).
775        """
776        time_conditions = []
777
778        for unit, divisor in self.TIME_UNITS:
779            # calculate adjusted period for this time granularity
780            adjusted_period = period // divisor if unit != "date" else period
781
782            order_by = (
783                f"${{submission_{unit}}}" if unit != "date" else "${submission_date}"
784            )
785
786            condition = (
787                f"{{% {'if' if unit == 'date' else 'elsif'} "
788                + f"{self.name}.submission_{unit}._is_selected %}}"
789            )
790
791            lag_sql = f"""LAG(${{{matching_measure['name']}}}, {adjusted_period}) OVER (
792                            {{% if date_groupby_position._parameter_value != "" %}}
793                            ORDER BY {{% parameter date_groupby_position %}}
794                            {{% else %}}
795                            ORDER BY {order_by}
796                            {{% endif %}}
797                        )"""
798            time_conditions.append(f"{condition}\n{lag_sql}")
799
800        return (
801            "\n".join(time_conditions)
802            + f"\n{{% else %}}\nLAG({matching_measure['name']}, {period}) "
803            + "OVER (ORDER BY ${submission_date})\n{% endif %}"
804        )
class MetricDefinitionsView(generator.views.view.View):
 15class MetricDefinitionsView(View):
 16    """A view for metric-hub metrics that come from the same data source."""
 17
 18    type: str = "metric_definitions_view"
 19
 20    # Time unit divisors for converting days to different granularities
 21    TIME_UNITS = [
 22        ("date", 1),  # Daily: no conversion needed
 23        ("week", 7),  # Weekly: divide by 7
 24        ("month", 30),  # Monthly: approximate 30 days per month
 25        ("quarter", 90),  # Quarterly: approximate 90 days per quarter
 26        ("year", 365),  # Yearly: approximate 365 days per year
 27    ]
 28
 29    def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]):
 30        """Get an instance of an MetricDefinitionsView."""
 31        super().__init__(namespace, name, MetricDefinitionsView.type, tables)
 32
 33    @classmethod
 34    def from_db_views(
 35        klass,
 36        namespace: str,
 37        is_glean: bool,
 38        channels: List[Dict[str, str]],
 39        db_views: dict,
 40    ) -> Iterator[MetricDefinitionsView]:
 41        """Get Metric Definition Views from db views and app variants."""
 42        return iter(())
 43
 44    @classmethod
 45    def from_dict(
 46        klass, namespace: str, name: str, definition: ViewDict
 47    ) -> MetricDefinitionsView:
 48        """Get a MetricDefinitionsView from a dict representation."""
 49        return klass(namespace, name, definition.get("tables", []))
 50
 51    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 52        """Get this view as LookML."""
 53        namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
 54            self.namespace
 55        )
 56        if namespace_definitions is None:
 57            return {}
 58
 59        # get all metric definitions that depend on the data source represented by this view
 60        data_source_name = re.sub("^metric_definitions_", "", self.name)
 61        data_source_definition = MetricsConfigLoader.configs.get_data_source_definition(
 62            data_source_name, self.namespace
 63        )
 64
 65        if data_source_definition is None:
 66            return {}
 67
 68        # todo: hide deprecated metrics?
 69        metric_definitions = [
 70            f"""{
 71                MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render()
 72            } AS {metric_slug},\n"""
 73            for metric_slug, metric in namespace_definitions.metrics.definitions.items()
 74            if metric.select_expression
 75            and metric.data_source.name == data_source_name
 76            and metric.type != "histogram"
 77        ]
 78
 79        if metric_definitions == []:
 80            return {}
 81
 82        # Metric definitions are intended to aggregated by client per date.
 83        # A derived table is needed to do these aggregations, instead of defining them as measures
 84        # we want to have them available as dimensions (which don't allow aggregations in their definitions)
 85        # to allow for custom measures to be later defined in Looker that aggregate these per client metrics.
 86        view_defn: Dict[str, Any] = {"name": self.name}
 87
 88        ignore_base_fields = [
 89            "client_id",
 90            "submission_date",
 91            "submission",
 92            "first_run",
 93        ] + [
 94            metric_slug
 95            for metric_slug, metric in namespace_definitions.metrics.definitions.items()
 96            if metric.select_expression
 97            and metric.data_source.name == data_source_name
 98            and metric.type != "histogram"
 99        ]
100
101        base_view_dimensions = {}
102        joined_data_sources = []
103
104        # check if the metric data source has joins
105        # joined data sources are generally used for creating the "Base Fields"
106        if data_source_definition.joins:
107            # determine the dimensions selected by the joined data sources
108            for joined_data_source_slug, join in data_source_definition.joins.items():
109                joined_data_source = (
110                    MetricsConfigLoader.configs.get_data_source_definition(
111                        joined_data_source_slug, self.namespace
112                    )
113                )
114
115                if joined_data_source.columns_as_dimensions:
116                    joined_data_sources.append(joined_data_source)
117
118                    date_filter = None
119                    if joined_data_source.submission_date_column != "NULL":
120                        date_filter = (
121                            None
122                            if joined_data_source.submission_date_column is None
123                            or joined_data_source.submission_date_column == "NULL"
124                            else f"{joined_data_source.submission_date_column} = '2023-01-01'"
125                        )
126
127                    # create Looker dimensions by doing a dryrun
128                    query = MetricsConfigLoader.configs.get_data_source_sql(
129                        joined_data_source_slug,
130                        self.namespace,
131                        where=date_filter,
132                    ).format(dataset=self.namespace)
133
134                    base_view_dimensions[joined_data_source_slug] = (
135                        lookml_utils._generate_dimensions_from_query(
136                            query, dryrun=dryrun
137                        )
138                    )
139
140        if (
141            data_source_definition.client_id_column == "NULL"
142            and not base_view_dimensions
143        ) or data_source_definition.columns_as_dimensions:
144            # if the metrics data source doesn't have any joins then use the dimensions
145            # of the data source itself as base fields
146            date_filter = None
147            if data_source_definition.submission_date_column != "NULL":
148                date_filter = (
149                    "submission_date = '2023-01-01'"
150                    if data_source_definition.submission_date_column is None
151                    else f"{data_source_definition.submission_date_column} = '2023-01-01'"
152                )
153
154            query = MetricsConfigLoader.configs.get_data_source_sql(
155                data_source_definition.name,
156                self.namespace,
157                where=date_filter,
158                ignore_joins=True,
159            ).format(dataset=self.namespace)
160
161            base_view_dimensions[data_source_definition.name] = (
162                lookml_utils._generate_dimensions_from_query(query, dryrun)
163            )
164
165        # to prevent duplicate dimensions, especially when working with time dimensions
166        # where names are modified potentially causing naming collisions
167        seen_dimensions = set()
168        # prepare base field data for query
169        base_view_fields = []
170        for data_source, dimensions in base_view_dimensions.items():
171            for dimension in dimensions:
172                if (
173                    dimension["name"] not in ignore_base_fields
174                    and dimension["name"] not in seen_dimensions
175                    and "hidden" not in dimension
176                ):
177                    sql = (
178                        f"{data_source}.{dimension['name'].replace('__', '.')} AS"
179                        + f" {data_source}_{dimension['name']},\n"
180                    )
181                    # date/time/timestamp suffixes are removed when generating lookml dimensions, however we
182                    # need the original field name for the derived view SQL
183                    if dimension["type"] == "time" and not dimension["sql"].endswith(
184                        dimension["name"]
185                    ):
186                        suffix = dimension["sql"].split(
187                            dimension["name"].replace("__", ".")
188                        )[-1]
189                        sql = (
190                            f"{data_source}.{(dimension['name']+suffix).replace('__', '.')} AS"
191                            + f" {data_source}_{dimension['name']},\n"
192                        )
193
194                    base_view_fields.append(
195                        {
196                            "name": f"{data_source}_{dimension['name']}",
197                            "select_sql": f"{data_source}_{dimension['name']},\n",
198                            "sql": sql,
199                        }
200                    )
201                    seen_dimensions.add(dimension["name"])
202
203        client_id_field = (
204            "NULL"
205            if data_source_definition.client_id_column == "NULL"
206            else f'{data_source_definition.client_id_column or "client_id"}'
207        )
208
209        # filters for date ranges
210        where_sql = " AND ".join(
211            [
212                f"""
213                    {data_source.name}.{data_source.submission_date_column or "submission_date"}
214                    {{% if analysis_period._is_filtered %}}
215                    BETWEEN
216                    DATE_SUB(
217                        COALESCE(
218                            SAFE_CAST(
219                                {{% date_start analysis_period %}} AS DATE
220                            ), CURRENT_DATE()),
221                        INTERVAL {{% parameter lookback_days %}} DAY
222                    ) AND
223                    COALESCE(
224                        SAFE_CAST(
225                            {{% date_end analysis_period %}} AS DATE
226                        ), CURRENT_DATE())
227                    {{% else %}}
228                    BETWEEN
229                    DATE_SUB(
230                        COALESCE(
231                            SAFE_CAST(
232                                {{% date_start submission_date %}} AS DATE
233                            ), CURRENT_DATE()),
234                        INTERVAL {{% parameter lookback_days %}} DAY
235                    ) AND
236                    COALESCE(
237                        SAFE_CAST(
238                            {{% date_end submission_date %}} AS DATE
239                        ), CURRENT_DATE())
240                    {{% endif %}}
241                """
242                for data_source in [data_source_definition] + joined_data_sources
243                if data_source.submission_date_column != "NULL"
244            ]
245        )
246
247        # filte on sample_id if such a field exists
248        for field in base_view_fields:
249            if field["name"].endswith("_sample_id"):
250                where_sql += f"""
251                    AND
252                        {field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}}
253                """
254                break
255
256        view_defn["derived_table"] = {
257            "sql": f"""
258            SELECT
259                {"".join(metric_definitions)}
260                {"".join([field['select_sql'] for field in base_view_fields])}
261                {client_id_field} AS client_id,
262                {{% if aggregate_metrics_by._parameter_value == 'day' %}}
263                {data_source_definition.submission_date_column or "submission_date"} AS analysis_basis
264                {{% elsif aggregate_metrics_by._parameter_value == 'week'  %}}
265                (FORMAT_DATE(
266                    '%F',
267                    DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
268                    WEEK(MONDAY)))
269                ) AS analysis_basis
270                {{% elsif aggregate_metrics_by._parameter_value == 'month'  %}}
271                (FORMAT_DATE(
272                    '%Y-%m',
273                    {data_source_definition.submission_date_column or "submission_date"})
274                ) AS analysis_basis
275                {{% elsif aggregate_metrics_by._parameter_value == 'quarter'  %}}
276                (FORMAT_DATE(
277                    '%Y-%m',
278                    DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
279                    QUARTER))
280                ) AS analysis_basis
281                {{% elsif aggregate_metrics_by._parameter_value == 'year'  %}}
282                (EXTRACT(
283                    YEAR FROM {data_source_definition.submission_date_column or "submission_date"})
284                ) AS analysis_basis
285                {{% else %}}
286                NULL as analysis_basis
287                {{% endif %}}
288            FROM
289                (
290                    SELECT
291                        {data_source_name}.*,
292                        {"".join([field['sql'] for field in base_view_fields])}
293                    FROM
294                    {
295                        MetricsConfigLoader.configs.get_data_source_sql(
296                            data_source_name,
297                            self.namespace,
298                            select_fields=False
299                        ).format(dataset=self.namespace)
300                    }
301                    WHERE {where_sql}
302                )
303            GROUP BY
304                {"".join([field['select_sql'] for field in base_view_fields])}
305                client_id,
306                analysis_basis
307            """
308        }
309
310        view_defn["dimensions"] = self.get_dimensions()
311        view_defn["dimension_groups"] = self.get_dimension_groups()
312
313        # add the Looker dimensions
314        for data_source, dimensions in base_view_dimensions.items():
315            for dimension in dimensions:
316                if dimension["name"] not in ignore_base_fields:
317                    dimension["sql"] = (
318                        "${TABLE}." + f"{data_source}_{dimension['name']}"
319                    )
320                    dimension["group_label"] = "Base Fields"
321                    if not lookml_utils._is_dimension_group(dimension):
322                        view_defn["dimensions"].append(dimension)
323                    else:
324                        view_defn["dimension_groups"].append(dimension)
325                    # avoid duplicate dimensions
326                    ignore_base_fields.append(dimension["name"])
327
328        view_defn["measures"] = self.get_measures(
329            view_defn["dimensions"],
330        )
331        view_defn["sets"] = self._get_sets()
332        view_defn["parameters"] = self._get_parameters(view_defn["dimensions"])
333        view_defn["filters"] = self._get_filters()
334
335        return {"views": [view_defn]}
336
337    def get_dimensions(
338        self,
339        _table=None,
340        _v1_name: Optional[str] = None,
341        _dryrun=None,
342    ) -> List[Dict[str, Any]]:
343        """Get the set of dimensions for this view based on the metric definitions in metric-hub."""
344        namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
345            self.namespace
346        )
347        metric_definitions = namespace_definitions.metrics.definitions
348        data_source_name = re.sub("^metric_definitions_", "", self.name)
349
350        return [
351            {
352                "name": "client_id",
353                "type": "string",
354                "sql": "SAFE_CAST(${TABLE}.client_id AS STRING)",
355                "label": "Client ID",
356                "primary_key": "yes",
357                "group_label": "Base Fields",
358                "description": "Unique client identifier",
359            },
360        ] + [  # add a dimension for each metric definition
361            {
362                "name": metric_slug,
363                "group_label": "Metrics",
364                "label": metric.friendly_name
365                or lookml_utils.slug_to_title(metric_slug),
366                "description": metric.description or "",
367                "type": "number",
368                "sql": "${TABLE}." + metric_slug,
369            }
370            for metric_slug, metric in metric_definitions.items()
371            if metric.select_expression
372            and metric.data_source.name == data_source_name
373            and metric.type != "histogram"
374        ]
375
376    def get_dimension_groups(self) -> List[Dict[str, Any]]:
377        """Get dimension groups for this view."""
378        return [
379            {
380                "name": "submission",
381                "type": "time",
382                "datatype": "date",
383                "group_label": "Base Fields",
384                "sql": "${TABLE}.analysis_basis",
385                "label": "Submission",
386                "timeframes": [
387                    "raw",
388                    "date",
389                    "week",
390                    "month",
391                    "quarter",
392                    "year",
393                ],
394            }
395        ]
396
397    def _get_sets(self) -> List[Dict[str, Any]]:
398        """Generate metric sets."""
399        # group all the metric dimensions into a set
400        dimensions = self.get_dimensions()
401        measures = self.get_measures(dimensions)
402
403        return [
404            {
405                "name": "metrics",
406                "fields": [
407                    dimension["name"]
408                    for dimension in dimensions
409                    if dimension["name"] != "client_id"
410                ]
411                + [measure["name"] for measure in measures],
412            }
413        ]
414
415    def _get_parameters(self, dimensions: List[dict]):
416        hide_sampling = "yes"
417
418        for dim in dimensions:
419            if dim["name"] == "sample_id":
420                hide_sampling = "no"
421                break
422
423        return [
424            {
425                "name": "aggregate_metrics_by",
426                "label": "Aggregate Client Metrics Per",
427                "type": "unquoted",
428                "default_value": "day",
429                "allowed_values": [
430                    {"label": "Per Day", "value": "day"},
431                    {"label": "Per Week", "value": "week"},
432                    {"label": "Per Month", "value": "month"},
433                    {"label": "Per Quarter", "value": "quarter"},
434                    {"label": "Per Year", "value": "year"},
435                    {"label": "Overall", "value": "overall"},
436                ],
437            },
438            {
439                "name": "sampling",
440                "label": "Sample of source data in %",
441                "type": "unquoted",
442                "default_value": "100",
443                "hidden": hide_sampling,
444            },
445            {
446                "name": "lookback_days",
447                "label": "Lookback (Days)",
448                "type": "unquoted",
449                "description": "Number of days added before the filtered date range. "
450                + "Useful for period-over-period comparisons.",
451                "default_value": "0",
452            },
453            {
454                "name": "date_groupby_position",
455                "label": "Date Group By Position",
456                "type": "unquoted",
457                "description": "Position of the date field in the group by clause. "
458                + "Required when submission_week, submission_month, submission_quarter, submission_year "
459                + "is selected as BigQuery can't correctly resolve the GROUP BY otherwise",
460                "default_value": "",
461            },
462        ]
463
464    def _get_filters(self):
465        return [
466            {
467                "name": "analysis_period",
468                "type": "date",
469                "label": "Analysis Period (with Lookback)",
470                "description": "Use this filter to define the main analysis period. "
471                + "The results will include the selected date range plus any additional "
472                + "days specified by the 'Lookback days' setting.",
473            }
474        ]
475
476    def get_measures(
477        self, dimensions: List[dict]
478    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
479        """Get statistics as measures."""
480        measures = []
481        sampling = "1"
482
483        for dim in dimensions:
484            if dim["name"] == "sample_id":
485                sampling = "100 / {% parameter sampling %}"
486                break
487
488        for dimension in dimensions:
489            metric = MetricsConfigLoader.configs.get_metric_definition(
490                dimension["name"], self.namespace
491            )
492            if metric and metric.statistics:
493                for statistic_slug, statistic_conf in metric.statistics.items():
494                    dimension_label = dimension.get("label") or dimension.get("name")
495                    if statistic_slug in [
496                        "average",
497                        "max",
498                        "min",
499                        "median",
500                    ]:
501                        measures.append(
502                            {
503                                "name": f"{dimension['name']}_{statistic_slug}",
504                                "type": statistic_slug,
505                                "sql": "${TABLE}." + dimension["name"],
506                                "label": f"{dimension_label} {statistic_slug.title()}",
507                                "group_label": "Statistics",
508                                "description": f"{statistic_slug.title()} of {dimension_label}",
509                            }
510                        )
511                    elif statistic_slug == "sum":
512                        measures.append(
513                            {
514                                "name": f"{dimension['name']}_{statistic_slug}",
515                                "type": "sum",
516                                "sql": "${TABLE}." + dimension["name"] + "*" + sampling,
517                                "label": f"{dimension_label} Sum",
518                                "group_label": "Statistics",
519                                "description": f"Sum of {dimension_label}",
520                            }
521                        )
522                    elif statistic_slug == "client_count":
523                        measures.append(
524                            {
525                                "name": (
526                                    f"{dimension['name']}_{statistic_slug}_sampled"
527                                    if sampling
528                                    else f"{dimension['name']}_{statistic_slug}"
529                                ),
530                                "type": "count_distinct",
531                                "label": f"{dimension_label} Client Count",
532                                "group_label": "Statistics",
533                                "sql": "IF(${TABLE}."
534                                + f"{dimension['name']} > 0, "
535                                + "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))",
536                                "description": f"Number of clients with {dimension_label}",
537                                "hidden": "yes" if sampling else "no",
538                            }
539                        )
540
541                        if sampling:
542                            measures.append(
543                                {
544                                    "name": f"{dimension['name']}_{statistic_slug}",
545                                    "type": "number",
546                                    "label": f"{dimension_label} Client Count",
547                                    "group_label": "Statistics",
548                                    "sql": "${"
549                                    + f"{dimension['name']}_{statistic_slug}_sampled"
550                                    + "} *"
551                                    + sampling,
552                                    "description": f"Number of clients with {dimension_label}",
553                                }
554                            )
555                    elif statistic_slug == "dau_proportion":
556                        if "numerator" in statistic_conf:
557                            [numerator, numerator_stat] = statistic_conf[
558                                "numerator"
559                            ].split(".")
560                            measures.append(
561                                {
562                                    "name": "DAU_sampled" if sampling else "DAU",
563                                    "type": "count_distinct",
564                                    "label": "DAU",
565                                    "group_label": "Statistics",
566                                    "sql": "${TABLE}.client_id",
567                                    "hidden": "yes",
568                                }
569                            )
570
571                            if sampling:
572                                measures.append(
573                                    {
574                                        "name": "DAU",
575                                        "type": "number",
576                                        "label": "DAU",
577                                        "group_label": "Statistics",
578                                        "sql": "${DAU_sampled} *" + sampling,
579                                        "hidden": "yes",
580                                    }
581                                )
582
583                            measures.append(
584                                {
585                                    "name": f"{dimension['name']}_{statistic_slug}",
586                                    "type": "number",
587                                    "label": f"{dimension_label} DAU Proportion",
588                                    "sql": "SAFE_DIVIDE(${"
589                                    + f"{numerator}_{numerator_stat}"
590                                    + "}, ${DAU})",
591                                    "group_label": "Statistics",
592                                    "description": f"Proportion of daily active users with {dimension['name']}",
593                                }
594                            )
595                    elif statistic_slug == "ratio":
596                        if (
597                            "numerator" in statistic_conf
598                            and "denominator" in statistic_conf
599                        ):
600                            [numerator, numerator_stat] = statistic_conf[
601                                "numerator"
602                            ].split(".")
603                            [denominator, denominator_stat] = statistic_conf[
604                                "denominator"
605                            ].split(".")
606
607                            measures.append(
608                                {
609                                    "name": f"{dimension['name']}_{statistic_slug}",
610                                    "type": "number",
611                                    "label": f"{dimension_label} Ratio",
612                                    "sql": "SAFE_DIVIDE(${"
613                                    + f"{numerator}_{numerator_stat}"
614                                    + "}, ${"
615                                    + f"{denominator}_{denominator_stat}"
616                                    + "})",
617                                    "group_label": "Statistics",
618                                    "description": f""""
619                                        Ratio between {statistic_conf['numerator']} and
620                                        {statistic_conf['denominator']}""",
621                                }
622                            )
623                    elif statistic_slug == "rolling_average":
624                        # rolling averages are computed over existing statistics (e.g. sum, ratio)
625                        aggregations = statistic_conf.get("aggregations", ["sum"])
626                        for aggregation in aggregations:
627                            # find measures that match the current dimension and aggregation type
628                            matching_measures = [
629                                m
630                                for m in measures
631                                if m["name"].startswith(
632                                    f"{dimension['name']}_{aggregation}"
633                                )
634                            ]
635                            if "window_sizes" in statistic_conf:
636                                for window_size in statistic_conf["window_sizes"]:
637                                    for matching_measure in matching_measures:
638                                        # these statistics require some time dimension to be selected
639                                        measures.append(
640                                            {
641                                                "name": f"{dimension['name']}_{statistic_slug}_{aggregation}_{window_size}_day",
642                                                "type": "number",
643                                                "label": f"{matching_measure['label']} {window_size} Day Rolling Average",
644                                                "sql": f"""
645                                                    {{% if {self.name}.submission_date._is_selected or
646                                                        {self.name}.submission_week._is_selected or
647                                                        {self.name}.submission_month._is_selected or
648                                                        {self.name}.submission_quarter._is_selected or
649                                                        {self.name}.submission_year._is_selected %}}
650                                                    AVG(${{{matching_measure['name']}}}) OVER (
651                                                        {{% if date_groupby_position._parameter_value != "" %}}
652                                                        ORDER BY {{% parameter date_groupby_position %}}
653                                                        {{% elsif {self.name}.submission_date._is_selected %}}
654                                                        ORDER BY ${{TABLE}}.analysis_basis
655                                                        {{% else %}}
656                                                        ERROR("date_groupby_position needs to be set when using submission_week,
657                                                        submission_month, submission_quarter, or submission_year")
658                                                        {{% endif %}}
659                                                        ROWS BETWEEN {window_size} PRECEDING AND CURRENT ROW
660                                                    {{% else %}}
661                                                    ERROR('Please select a "submission_*" field to compute the rolling average')
662                                                    {{% endif %}}
663                                                )""",
664                                                "group_label": "Statistics",
665                                                "description": f"{window_size} day rolling average of {dimension_label}",
666                                            }
667                                        )
668
669                    # period-over-period measures compare current values with historical values
670                    if "period_over_period" in statistic_conf:
671                        # find all statistics that have period-over-period configured
672                        matching_measures = [
673                            m
674                            for m in measures
675                            if m["name"].startswith(
676                                f"{dimension['name']}_{statistic_slug}"
677                            )
678                            and "_period_over_period_" not in m["name"]
679                        ]
680
681                        # create period-over-period measures for each configured time period
682                        for period in statistic_conf["period_over_period"].get(
683                            "periods", []
684                        ):
685                            for matching_measure in matching_measures:
686                                original_sql = matching_measure["sql"]
687
688                                # rolling averages need special handling to adjust window sizes
689                                # based on the selected time granularity
690                                if statistic_slug == "rolling_average":
691                                    sql = self._create_rolling_average_period_sql(
692                                        original_sql, period
693                                    )
694                                else:
695                                    # standard measures use LAG function with time-adjusted periods
696                                    sql = self._create_lag_period_sql(
697                                        matching_measure, period
698                                    )
699
700                                # generate different types of period-over-period comparisons
701                                for kind in statistic_conf["period_over_period"].get(
702                                    "kinds", ["previous"]
703                                ):
704                                    if kind == "difference":
705                                        comparison_sql = f"({original_sql}) - ({sql})"
706                                    elif kind == "relative_change":
707                                        comparison_sql = f"SAFE_DIVIDE(({original_sql}), NULLIF(({sql}), 0)) - 1"
708                                    else:
709                                        comparison_sql = sql
710
711                                    measures.append(
712                                        {
713                                            "name": f"{matching_measure['name']}_{period}_day_period_over_period_{kind}",
714                                            "type": "number",
715                                            "label": f"{matching_measure['label']} "
716                                            + f"{period} Day Period Over Period {kind.capitalize()}",
717                                            "description": f"Period over period {kind.capitalize()} of "
718                                            + f"{matching_measure['label']} over {period} days",
719                                            "group_label": "Statistics",
720                                            "sql": comparison_sql,
721                                        }
722                                    )
723
724        return measures
725
726    def _create_rolling_average_period_sql(self, original_sql: str, period: int) -> str:
727        """
728        Create period-over-period SQL for rolling average measures.
729
730        Rolling averages require adjusting the window size based on the selected time granularity.
731        """
732        rows_match = re.search(
733            r"ROWS BETWEEN (\d+) PRECEDING AND CURRENT ROW",
734            original_sql,
735        )
736
737        if not rows_match:
738            return original_sql
739
740        original_window_size = int(rows_match.group(1))
741        time_conditions = []
742
743        for unit, divisor in self.TIME_UNITS:
744            # calculate adjusted window size for this time granularity
745            adjusted_window = (
746                (original_window_size + period) // divisor
747                if unit != "date"
748                else original_window_size + period
749            )
750
751            condition = (
752                f"{{% {'if' if unit == 'date' else 'elsif'} "
753                + f"{self.name}.submission_{unit}._is_selected %}}"
754            )
755
756            # modify the ROWS clause to extend the window by the period
757            modified_sql = re.sub(
758                r"ROWS BETWEEN \d+ PRECEDING AND CURRENT ROW",
759                f"ROWS BETWEEN {adjusted_window} PRECEDING AND "
760                + f"{adjusted_window - original_window_size} PRECEDING",
761                original_sql,
762            )
763            time_conditions.append(f"{condition}\n{modified_sql}")
764
765        return (
766            "\n".join(time_conditions)
767            + f"\n{{% else %}}\n{original_sql}\n{{% endif %}}"
768        )
769
770    def _create_lag_period_sql(self, matching_measure: dict, period: int) -> str:
771        """
772        Create period-over-period SQL using LAG function for standard measures.
773
774        LAG function looks back N periods to get historical values. The period is adjusted
775        based on the selected time granularity (daily, weekly, monthly, etc.).
776        """
777        time_conditions = []
778
779        for unit, divisor in self.TIME_UNITS:
780            # calculate adjusted period for this time granularity
781            adjusted_period = period // divisor if unit != "date" else period
782
783            order_by = (
784                f"${{submission_{unit}}}" if unit != "date" else "${submission_date}"
785            )
786
787            condition = (
788                f"{{% {'if' if unit == 'date' else 'elsif'} "
789                + f"{self.name}.submission_{unit}._is_selected %}}"
790            )
791
792            lag_sql = f"""LAG(${{{matching_measure['name']}}}, {adjusted_period}) OVER (
793                            {{% if date_groupby_position._parameter_value != "" %}}
794                            ORDER BY {{% parameter date_groupby_position %}}
795                            {{% else %}}
796                            ORDER BY {order_by}
797                            {{% endif %}}
798                        )"""
799            time_conditions.append(f"{condition}\n{lag_sql}")
800
801        return (
802            "\n".join(time_conditions)
803            + f"\n{{% else %}}\nLAG({matching_measure['name']}, {period}) "
804            + "OVER (ORDER BY ${submission_date})\n{% endif %}"
805        )

A view for metric-hub metrics that come from the same data source.

MetricDefinitionsView(namespace: str, name: str, tables: List[Dict[str, str]])
29    def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]):
30        """Get an instance of an MetricDefinitionsView."""
31        super().__init__(namespace, name, MetricDefinitionsView.type, tables)

Get an instance of an MetricDefinitionsView.

type: str = 'metric_definitions_view'
TIME_UNITS = [('date', 1), ('week', 7), ('month', 30), ('quarter', 90), ('year', 365)]
@classmethod
def from_db_views( klass, namespace: str, is_glean: bool, channels: List[Dict[str, str]], db_views: dict) -> Iterator[MetricDefinitionsView]:
33    @classmethod
34    def from_db_views(
35        klass,
36        namespace: str,
37        is_glean: bool,
38        channels: List[Dict[str, str]],
39        db_views: dict,
40    ) -> Iterator[MetricDefinitionsView]:
41        """Get Metric Definition Views from db views and app variants."""
42        return iter(())

Get Metric Definition Views from db views and app variants.

@classmethod
def from_dict( klass, namespace: str, name: str, definition: generator.views.view.ViewDict) -> MetricDefinitionsView:
44    @classmethod
45    def from_dict(
46        klass, namespace: str, name: str, definition: ViewDict
47    ) -> MetricDefinitionsView:
48        """Get a MetricDefinitionsView from a dict representation."""
49        return klass(namespace, name, definition.get("tables", []))

Get a MetricDefinitionsView from a dict representation.

def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 51    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 52        """Get this view as LookML."""
 53        namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
 54            self.namespace
 55        )
 56        if namespace_definitions is None:
 57            return {}
 58
 59        # get all metric definitions that depend on the data source represented by this view
 60        data_source_name = re.sub("^metric_definitions_", "", self.name)
 61        data_source_definition = MetricsConfigLoader.configs.get_data_source_definition(
 62            data_source_name, self.namespace
 63        )
 64
 65        if data_source_definition is None:
 66            return {}
 67
 68        # todo: hide deprecated metrics?
 69        metric_definitions = [
 70            f"""{
 71                MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render()
 72            } AS {metric_slug},\n"""
 73            for metric_slug, metric in namespace_definitions.metrics.definitions.items()
 74            if metric.select_expression
 75            and metric.data_source.name == data_source_name
 76            and metric.type != "histogram"
 77        ]
 78
 79        if metric_definitions == []:
 80            return {}
 81
 82        # Metric definitions are intended to aggregated by client per date.
 83        # A derived table is needed to do these aggregations, instead of defining them as measures
 84        # we want to have them available as dimensions (which don't allow aggregations in their definitions)
 85        # to allow for custom measures to be later defined in Looker that aggregate these per client metrics.
 86        view_defn: Dict[str, Any] = {"name": self.name}
 87
 88        ignore_base_fields = [
 89            "client_id",
 90            "submission_date",
 91            "submission",
 92            "first_run",
 93        ] + [
 94            metric_slug
 95            for metric_slug, metric in namespace_definitions.metrics.definitions.items()
 96            if metric.select_expression
 97            and metric.data_source.name == data_source_name
 98            and metric.type != "histogram"
 99        ]
100
101        base_view_dimensions = {}
102        joined_data_sources = []
103
104        # check if the metric data source has joins
105        # joined data sources are generally used for creating the "Base Fields"
106        if data_source_definition.joins:
107            # determine the dimensions selected by the joined data sources
108            for joined_data_source_slug, join in data_source_definition.joins.items():
109                joined_data_source = (
110                    MetricsConfigLoader.configs.get_data_source_definition(
111                        joined_data_source_slug, self.namespace
112                    )
113                )
114
115                if joined_data_source.columns_as_dimensions:
116                    joined_data_sources.append(joined_data_source)
117
118                    date_filter = None
119                    if joined_data_source.submission_date_column != "NULL":
120                        date_filter = (
121                            None
122                            if joined_data_source.submission_date_column is None
123                            or joined_data_source.submission_date_column == "NULL"
124                            else f"{joined_data_source.submission_date_column} = '2023-01-01'"
125                        )
126
127                    # create Looker dimensions by doing a dryrun
128                    query = MetricsConfigLoader.configs.get_data_source_sql(
129                        joined_data_source_slug,
130                        self.namespace,
131                        where=date_filter,
132                    ).format(dataset=self.namespace)
133
134                    base_view_dimensions[joined_data_source_slug] = (
135                        lookml_utils._generate_dimensions_from_query(
136                            query, dryrun=dryrun
137                        )
138                    )
139
140        if (
141            data_source_definition.client_id_column == "NULL"
142            and not base_view_dimensions
143        ) or data_source_definition.columns_as_dimensions:
144            # if the metrics data source doesn't have any joins then use the dimensions
145            # of the data source itself as base fields
146            date_filter = None
147            if data_source_definition.submission_date_column != "NULL":
148                date_filter = (
149                    "submission_date = '2023-01-01'"
150                    if data_source_definition.submission_date_column is None
151                    else f"{data_source_definition.submission_date_column} = '2023-01-01'"
152                )
153
154            query = MetricsConfigLoader.configs.get_data_source_sql(
155                data_source_definition.name,
156                self.namespace,
157                where=date_filter,
158                ignore_joins=True,
159            ).format(dataset=self.namespace)
160
161            base_view_dimensions[data_source_definition.name] = (
162                lookml_utils._generate_dimensions_from_query(query, dryrun)
163            )
164
165        # to prevent duplicate dimensions, especially when working with time dimensions
166        # where names are modified potentially causing naming collisions
167        seen_dimensions = set()
168        # prepare base field data for query
169        base_view_fields = []
170        for data_source, dimensions in base_view_dimensions.items():
171            for dimension in dimensions:
172                if (
173                    dimension["name"] not in ignore_base_fields
174                    and dimension["name"] not in seen_dimensions
175                    and "hidden" not in dimension
176                ):
177                    sql = (
178                        f"{data_source}.{dimension['name'].replace('__', '.')} AS"
179                        + f" {data_source}_{dimension['name']},\n"
180                    )
181                    # date/time/timestamp suffixes are removed when generating lookml dimensions, however we
182                    # need the original field name for the derived view SQL
183                    if dimension["type"] == "time" and not dimension["sql"].endswith(
184                        dimension["name"]
185                    ):
186                        suffix = dimension["sql"].split(
187                            dimension["name"].replace("__", ".")
188                        )[-1]
189                        sql = (
190                            f"{data_source}.{(dimension['name']+suffix).replace('__', '.')} AS"
191                            + f" {data_source}_{dimension['name']},\n"
192                        )
193
194                    base_view_fields.append(
195                        {
196                            "name": f"{data_source}_{dimension['name']}",
197                            "select_sql": f"{data_source}_{dimension['name']},\n",
198                            "sql": sql,
199                        }
200                    )
201                    seen_dimensions.add(dimension["name"])
202
203        client_id_field = (
204            "NULL"
205            if data_source_definition.client_id_column == "NULL"
206            else f'{data_source_definition.client_id_column or "client_id"}'
207        )
208
209        # filters for date ranges
210        where_sql = " AND ".join(
211            [
212                f"""
213                    {data_source.name}.{data_source.submission_date_column or "submission_date"}
214                    {{% if analysis_period._is_filtered %}}
215                    BETWEEN
216                    DATE_SUB(
217                        COALESCE(
218                            SAFE_CAST(
219                                {{% date_start analysis_period %}} AS DATE
220                            ), CURRENT_DATE()),
221                        INTERVAL {{% parameter lookback_days %}} DAY
222                    ) AND
223                    COALESCE(
224                        SAFE_CAST(
225                            {{% date_end analysis_period %}} AS DATE
226                        ), CURRENT_DATE())
227                    {{% else %}}
228                    BETWEEN
229                    DATE_SUB(
230                        COALESCE(
231                            SAFE_CAST(
232                                {{% date_start submission_date %}} AS DATE
233                            ), CURRENT_DATE()),
234                        INTERVAL {{% parameter lookback_days %}} DAY
235                    ) AND
236                    COALESCE(
237                        SAFE_CAST(
238                            {{% date_end submission_date %}} AS DATE
239                        ), CURRENT_DATE())
240                    {{% endif %}}
241                """
242                for data_source in [data_source_definition] + joined_data_sources
243                if data_source.submission_date_column != "NULL"
244            ]
245        )
246
247        # filte on sample_id if such a field exists
248        for field in base_view_fields:
249            if field["name"].endswith("_sample_id"):
250                where_sql += f"""
251                    AND
252                        {field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}}
253                """
254                break
255
256        view_defn["derived_table"] = {
257            "sql": f"""
258            SELECT
259                {"".join(metric_definitions)}
260                {"".join([field['select_sql'] for field in base_view_fields])}
261                {client_id_field} AS client_id,
262                {{% if aggregate_metrics_by._parameter_value == 'day' %}}
263                {data_source_definition.submission_date_column or "submission_date"} AS analysis_basis
264                {{% elsif aggregate_metrics_by._parameter_value == 'week'  %}}
265                (FORMAT_DATE(
266                    '%F',
267                    DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
268                    WEEK(MONDAY)))
269                ) AS analysis_basis
270                {{% elsif aggregate_metrics_by._parameter_value == 'month'  %}}
271                (FORMAT_DATE(
272                    '%Y-%m',
273                    {data_source_definition.submission_date_column or "submission_date"})
274                ) AS analysis_basis
275                {{% elsif aggregate_metrics_by._parameter_value == 'quarter'  %}}
276                (FORMAT_DATE(
277                    '%Y-%m',
278                    DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
279                    QUARTER))
280                ) AS analysis_basis
281                {{% elsif aggregate_metrics_by._parameter_value == 'year'  %}}
282                (EXTRACT(
283                    YEAR FROM {data_source_definition.submission_date_column or "submission_date"})
284                ) AS analysis_basis
285                {{% else %}}
286                NULL as analysis_basis
287                {{% endif %}}
288            FROM
289                (
290                    SELECT
291                        {data_source_name}.*,
292                        {"".join([field['sql'] for field in base_view_fields])}
293                    FROM
294                    {
295                        MetricsConfigLoader.configs.get_data_source_sql(
296                            data_source_name,
297                            self.namespace,
298                            select_fields=False
299                        ).format(dataset=self.namespace)
300                    }
301                    WHERE {where_sql}
302                )
303            GROUP BY
304                {"".join([field['select_sql'] for field in base_view_fields])}
305                client_id,
306                analysis_basis
307            """
308        }
309
310        view_defn["dimensions"] = self.get_dimensions()
311        view_defn["dimension_groups"] = self.get_dimension_groups()
312
313        # add the Looker dimensions
314        for data_source, dimensions in base_view_dimensions.items():
315            for dimension in dimensions:
316                if dimension["name"] not in ignore_base_fields:
317                    dimension["sql"] = (
318                        "${TABLE}." + f"{data_source}_{dimension['name']}"
319                    )
320                    dimension["group_label"] = "Base Fields"
321                    if not lookml_utils._is_dimension_group(dimension):
322                        view_defn["dimensions"].append(dimension)
323                    else:
324                        view_defn["dimension_groups"].append(dimension)
325                    # avoid duplicate dimensions
326                    ignore_base_fields.append(dimension["name"])
327
328        view_defn["measures"] = self.get_measures(
329            view_defn["dimensions"],
330        )
331        view_defn["sets"] = self._get_sets()
332        view_defn["parameters"] = self._get_parameters(view_defn["dimensions"])
333        view_defn["filters"] = self._get_filters()
334
335        return {"views": [view_defn]}

Get this view as LookML.

def get_dimensions( self, _table=None, _v1_name: Optional[str] = None, _dryrun=None) -> List[Dict[str, Any]]:
337    def get_dimensions(
338        self,
339        _table=None,
340        _v1_name: Optional[str] = None,
341        _dryrun=None,
342    ) -> List[Dict[str, Any]]:
343        """Get the set of dimensions for this view based on the metric definitions in metric-hub."""
344        namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
345            self.namespace
346        )
347        metric_definitions = namespace_definitions.metrics.definitions
348        data_source_name = re.sub("^metric_definitions_", "", self.name)
349
350        return [
351            {
352                "name": "client_id",
353                "type": "string",
354                "sql": "SAFE_CAST(${TABLE}.client_id AS STRING)",
355                "label": "Client ID",
356                "primary_key": "yes",
357                "group_label": "Base Fields",
358                "description": "Unique client identifier",
359            },
360        ] + [  # add a dimension for each metric definition
361            {
362                "name": metric_slug,
363                "group_label": "Metrics",
364                "label": metric.friendly_name
365                or lookml_utils.slug_to_title(metric_slug),
366                "description": metric.description or "",
367                "type": "number",
368                "sql": "${TABLE}." + metric_slug,
369            }
370            for metric_slug, metric in metric_definitions.items()
371            if metric.select_expression
372            and metric.data_source.name == data_source_name
373            and metric.type != "histogram"
374        ]

Get the set of dimensions for this view based on the metric definitions in metric-hub.

def get_dimension_groups(self) -> List[Dict[str, Any]]:
376    def get_dimension_groups(self) -> List[Dict[str, Any]]:
377        """Get dimension groups for this view."""
378        return [
379            {
380                "name": "submission",
381                "type": "time",
382                "datatype": "date",
383                "group_label": "Base Fields",
384                "sql": "${TABLE}.analysis_basis",
385                "label": "Submission",
386                "timeframes": [
387                    "raw",
388                    "date",
389                    "week",
390                    "month",
391                    "quarter",
392                    "year",
393                ],
394            }
395        ]

Get dimension groups for this view.

def get_measures( self, dimensions: List[dict]) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
476    def get_measures(
477        self, dimensions: List[dict]
478    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
479        """Get statistics as measures."""
480        measures = []
481        sampling = "1"
482
483        for dim in dimensions:
484            if dim["name"] == "sample_id":
485                sampling = "100 / {% parameter sampling %}"
486                break
487
488        for dimension in dimensions:
489            metric = MetricsConfigLoader.configs.get_metric_definition(
490                dimension["name"], self.namespace
491            )
492            if metric and metric.statistics:
493                for statistic_slug, statistic_conf in metric.statistics.items():
494                    dimension_label = dimension.get("label") or dimension.get("name")
495                    if statistic_slug in [
496                        "average",
497                        "max",
498                        "min",
499                        "median",
500                    ]:
501                        measures.append(
502                            {
503                                "name": f"{dimension['name']}_{statistic_slug}",
504                                "type": statistic_slug,
505                                "sql": "${TABLE}." + dimension["name"],
506                                "label": f"{dimension_label} {statistic_slug.title()}",
507                                "group_label": "Statistics",
508                                "description": f"{statistic_slug.title()} of {dimension_label}",
509                            }
510                        )
511                    elif statistic_slug == "sum":
512                        measures.append(
513                            {
514                                "name": f"{dimension['name']}_{statistic_slug}",
515                                "type": "sum",
516                                "sql": "${TABLE}." + dimension["name"] + "*" + sampling,
517                                "label": f"{dimension_label} Sum",
518                                "group_label": "Statistics",
519                                "description": f"Sum of {dimension_label}",
520                            }
521                        )
522                    elif statistic_slug == "client_count":
523                        measures.append(
524                            {
525                                "name": (
526                                    f"{dimension['name']}_{statistic_slug}_sampled"
527                                    if sampling
528                                    else f"{dimension['name']}_{statistic_slug}"
529                                ),
530                                "type": "count_distinct",
531                                "label": f"{dimension_label} Client Count",
532                                "group_label": "Statistics",
533                                "sql": "IF(${TABLE}."
534                                + f"{dimension['name']} > 0, "
535                                + "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))",
536                                "description": f"Number of clients with {dimension_label}",
537                                "hidden": "yes" if sampling else "no",
538                            }
539                        )
540
541                        if sampling:
542                            measures.append(
543                                {
544                                    "name": f"{dimension['name']}_{statistic_slug}",
545                                    "type": "number",
546                                    "label": f"{dimension_label} Client Count",
547                                    "group_label": "Statistics",
548                                    "sql": "${"
549                                    + f"{dimension['name']}_{statistic_slug}_sampled"
550                                    + "} *"
551                                    + sampling,
552                                    "description": f"Number of clients with {dimension_label}",
553                                }
554                            )
555                    elif statistic_slug == "dau_proportion":
556                        if "numerator" in statistic_conf:
557                            [numerator, numerator_stat] = statistic_conf[
558                                "numerator"
559                            ].split(".")
560                            measures.append(
561                                {
562                                    "name": "DAU_sampled" if sampling else "DAU",
563                                    "type": "count_distinct",
564                                    "label": "DAU",
565                                    "group_label": "Statistics",
566                                    "sql": "${TABLE}.client_id",
567                                    "hidden": "yes",
568                                }
569                            )
570
571                            if sampling:
572                                measures.append(
573                                    {
574                                        "name": "DAU",
575                                        "type": "number",
576                                        "label": "DAU",
577                                        "group_label": "Statistics",
578                                        "sql": "${DAU_sampled} *" + sampling,
579                                        "hidden": "yes",
580                                    }
581                                )
582
583                            measures.append(
584                                {
585                                    "name": f"{dimension['name']}_{statistic_slug}",
586                                    "type": "number",
587                                    "label": f"{dimension_label} DAU Proportion",
588                                    "sql": "SAFE_DIVIDE(${"
589                                    + f"{numerator}_{numerator_stat}"
590                                    + "}, ${DAU})",
591                                    "group_label": "Statistics",
592                                    "description": f"Proportion of daily active users with {dimension['name']}",
593                                }
594                            )
595                    elif statistic_slug == "ratio":
596                        if (
597                            "numerator" in statistic_conf
598                            and "denominator" in statistic_conf
599                        ):
600                            [numerator, numerator_stat] = statistic_conf[
601                                "numerator"
602                            ].split(".")
603                            [denominator, denominator_stat] = statistic_conf[
604                                "denominator"
605                            ].split(".")
606
607                            measures.append(
608                                {
609                                    "name": f"{dimension['name']}_{statistic_slug}",
610                                    "type": "number",
611                                    "label": f"{dimension_label} Ratio",
612                                    "sql": "SAFE_DIVIDE(${"
613                                    + f"{numerator}_{numerator_stat}"
614                                    + "}, ${"
615                                    + f"{denominator}_{denominator_stat}"
616                                    + "})",
617                                    "group_label": "Statistics",
618                                    "description": f""""
619                                        Ratio between {statistic_conf['numerator']} and
620                                        {statistic_conf['denominator']}""",
621                                }
622                            )
623                    elif statistic_slug == "rolling_average":
624                        # rolling averages are computed over existing statistics (e.g. sum, ratio)
625                        aggregations = statistic_conf.get("aggregations", ["sum"])
626                        for aggregation in aggregations:
627                            # find measures that match the current dimension and aggregation type
628                            matching_measures = [
629                                m
630                                for m in measures
631                                if m["name"].startswith(
632                                    f"{dimension['name']}_{aggregation}"
633                                )
634                            ]
635                            if "window_sizes" in statistic_conf:
636                                for window_size in statistic_conf["window_sizes"]:
637                                    for matching_measure in matching_measures:
638                                        # these statistics require some time dimension to be selected
639                                        measures.append(
640                                            {
641                                                "name": f"{dimension['name']}_{statistic_slug}_{aggregation}_{window_size}_day",
642                                                "type": "number",
643                                                "label": f"{matching_measure['label']} {window_size} Day Rolling Average",
644                                                "sql": f"""
645                                                    {{% if {self.name}.submission_date._is_selected or
646                                                        {self.name}.submission_week._is_selected or
647                                                        {self.name}.submission_month._is_selected or
648                                                        {self.name}.submission_quarter._is_selected or
649                                                        {self.name}.submission_year._is_selected %}}
650                                                    AVG(${{{matching_measure['name']}}}) OVER (
651                                                        {{% if date_groupby_position._parameter_value != "" %}}
652                                                        ORDER BY {{% parameter date_groupby_position %}}
653                                                        {{% elsif {self.name}.submission_date._is_selected %}}
654                                                        ORDER BY ${{TABLE}}.analysis_basis
655                                                        {{% else %}}
656                                                        ERROR("date_groupby_position needs to be set when using submission_week,
657                                                        submission_month, submission_quarter, or submission_year")
658                                                        {{% endif %}}
659                                                        ROWS BETWEEN {window_size} PRECEDING AND CURRENT ROW
660                                                    {{% else %}}
661                                                    ERROR('Please select a "submission_*" field to compute the rolling average')
662                                                    {{% endif %}}
663                                                )""",
664                                                "group_label": "Statistics",
665                                                "description": f"{window_size} day rolling average of {dimension_label}",
666                                            }
667                                        )
668
669                    # period-over-period measures compare current values with historical values
670                    if "period_over_period" in statistic_conf:
671                        # find all statistics that have period-over-period configured
672                        matching_measures = [
673                            m
674                            for m in measures
675                            if m["name"].startswith(
676                                f"{dimension['name']}_{statistic_slug}"
677                            )
678                            and "_period_over_period_" not in m["name"]
679                        ]
680
681                        # create period-over-period measures for each configured time period
682                        for period in statistic_conf["period_over_period"].get(
683                            "periods", []
684                        ):
685                            for matching_measure in matching_measures:
686                                original_sql = matching_measure["sql"]
687
688                                # rolling averages need special handling to adjust window sizes
689                                # based on the selected time granularity
690                                if statistic_slug == "rolling_average":
691                                    sql = self._create_rolling_average_period_sql(
692                                        original_sql, period
693                                    )
694                                else:
695                                    # standard measures use LAG function with time-adjusted periods
696                                    sql = self._create_lag_period_sql(
697                                        matching_measure, period
698                                    )
699
700                                # generate different types of period-over-period comparisons
701                                for kind in statistic_conf["period_over_period"].get(
702                                    "kinds", ["previous"]
703                                ):
704                                    if kind == "difference":
705                                        comparison_sql = f"({original_sql}) - ({sql})"
706                                    elif kind == "relative_change":
707                                        comparison_sql = f"SAFE_DIVIDE(({original_sql}), NULLIF(({sql}), 0)) - 1"
708                                    else:
709                                        comparison_sql = sql
710
711                                    measures.append(
712                                        {
713                                            "name": f"{matching_measure['name']}_{period}_day_period_over_period_{kind}",
714                                            "type": "number",
715                                            "label": f"{matching_measure['label']} "
716                                            + f"{period} Day Period Over Period {kind.capitalize()}",
717                                            "description": f"Period over period {kind.capitalize()} of "
718                                            + f"{matching_measure['label']} over {period} days",
719                                            "group_label": "Statistics",
720                                            "sql": comparison_sql,
721                                        }
722                                    )
723
724        return measures

Get statistics as measures.