generator.views.metric_definitions_view
Class to describe a view with metrics from metric-hub.
1"""Class to describe a view with metrics from metric-hub.""" 2 3from __future__ import annotations 4 5import re 6from typing import Any, Dict, Iterator, List, Optional, Union 7 8from generator.metrics_utils import MetricsConfigLoader 9 10from . import lookml_utils 11from .view import View, ViewDict 12 13 14class MetricDefinitionsView(View): 15 """A view for metric-hub metrics that come from the same data source.""" 16 17 type: str = "metric_definitions_view" 18 19 # Time unit divisors for converting days to different granularities 20 TIME_UNITS = [ 21 ("date", 1), # Daily: no conversion needed 22 ("week", 7), # Weekly: divide by 7 23 ("month", 30), # Monthly: approximate 30 days per month 24 ("quarter", 90), # Quarterly: approximate 90 days per quarter 25 ("year", 365), # Yearly: approximate 365 days per year 26 ] 27 28 def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]): 29 """Get an instance of an MetricDefinitionsView.""" 30 super().__init__(namespace, name, MetricDefinitionsView.type, tables) 31 32 @classmethod 33 def from_db_views( 34 klass, 35 namespace: str, 36 is_glean: bool, 37 channels: List[Dict[str, str]], 38 db_views: dict, 39 ) -> Iterator[MetricDefinitionsView]: 40 """Get Metric Definition Views from db views and app variants.""" 41 return iter(()) 42 43 @classmethod 44 def from_dict( 45 klass, namespace: str, name: str, definition: ViewDict 46 ) -> MetricDefinitionsView: 47 """Get a MetricDefinitionsView from a dict representation.""" 48 return klass(namespace, name, definition.get("tables", [])) 49 50 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 51 """Get this view as LookML.""" 52 namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions( 53 self.namespace 54 ) 55 if namespace_definitions is None: 56 return {} 57 58 # get all metric definitions that depend on the data source represented by this view 59 data_source_name = re.sub("^metric_definitions_", "", self.name) 60 data_source_definition = MetricsConfigLoader.configs.get_data_source_definition( 61 data_source_name, self.namespace 62 ) 63 64 if data_source_definition is None: 65 return {} 66 67 # todo: hide deprecated metrics? 68 metric_definitions = [ 69 f"""{ 70 MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render() 71 } AS {metric_slug},\n""" 72 for metric_slug, metric in namespace_definitions.metrics.definitions.items() 73 if metric.select_expression 74 and metric.data_source.name == data_source_name 75 and metric.type != "histogram" 76 ] 77 78 if metric_definitions == []: 79 return {} 80 81 # Metric definitions are intended to aggregated by client per date. 82 # A derived table is needed to do these aggregations, instead of defining them as measures 83 # we want to have them available as dimensions (which don't allow aggregations in their definitions) 84 # to allow for custom measures to be later defined in Looker that aggregate these per client metrics. 85 view_defn: Dict[str, Any] = {"name": self.name} 86 87 ignore_base_fields = [ 88 "client_id", 89 "submission_date", 90 "submission", 91 "first_run", 92 ] + [ 93 metric_slug 94 for metric_slug, metric in namespace_definitions.metrics.definitions.items() 95 if metric.select_expression 96 and metric.data_source.name == data_source_name 97 and metric.type != "histogram" 98 ] 99 100 base_view_dimensions = {} 101 joined_data_sources = [] 102 103 # check if the metric data source has joins 104 # joined data sources are generally used for creating the "Base Fields" 105 if data_source_definition.joins: 106 # determine the dimensions selected by the joined data sources 107 for joined_data_source_slug, join in data_source_definition.joins.items(): 108 joined_data_source = ( 109 MetricsConfigLoader.configs.get_data_source_definition( 110 joined_data_source_slug, self.namespace 111 ) 112 ) 113 114 if joined_data_source.columns_as_dimensions: 115 joined_data_sources.append(joined_data_source) 116 117 date_filter = None 118 if joined_data_source.submission_date_column != "NULL": 119 date_filter = ( 120 None 121 if joined_data_source.submission_date_column is None 122 or joined_data_source.submission_date_column == "NULL" 123 else f"{joined_data_source.submission_date_column} = '2023-01-01'" 124 ) 125 126 # create Looker dimensions by doing a dryrun 127 query = MetricsConfigLoader.configs.get_data_source_sql( 128 joined_data_source_slug, 129 self.namespace, 130 where=date_filter, 131 ).format(dataset=self.namespace) 132 133 base_view_dimensions[joined_data_source_slug] = ( 134 lookml_utils._generate_dimensions_from_query( 135 query, dryrun=dryrun 136 ) 137 ) 138 139 if ( 140 data_source_definition.client_id_column == "NULL" 141 and not base_view_dimensions 142 ) or data_source_definition.columns_as_dimensions: 143 # if the metrics data source doesn't have any joins then use the dimensions 144 # of the data source itself as base fields 145 date_filter = None 146 if data_source_definition.submission_date_column != "NULL": 147 date_filter = ( 148 "submission_date = '2023-01-01'" 149 if data_source_definition.submission_date_column is None 150 else f"{data_source_definition.submission_date_column} = '2023-01-01'" 151 ) 152 153 query = MetricsConfigLoader.configs.get_data_source_sql( 154 data_source_definition.name, 155 self.namespace, 156 where=date_filter, 157 ignore_joins=True, 158 ).format(dataset=self.namespace) 159 160 base_view_dimensions[data_source_definition.name] = ( 161 lookml_utils._generate_dimensions_from_query(query, dryrun) 162 ) 163 164 # to prevent duplicate dimensions, especially when working with time dimensions 165 # where names are modified potentially causing naming collisions 166 seen_dimensions = set() 167 # prepare base field data for query 168 base_view_fields = [] 169 for data_source, dimensions in base_view_dimensions.items(): 170 for dimension in dimensions: 171 if ( 172 dimension["name"] not in ignore_base_fields 173 and dimension["name"] not in seen_dimensions 174 and "hidden" not in dimension 175 ): 176 sql = ( 177 f"{data_source}.{dimension['name'].replace('__', '.')} AS" 178 + f" {data_source}_{dimension['name']},\n" 179 ) 180 # date/time/timestamp suffixes are removed when generating lookml dimensions, however we 181 # need the original field name for the derived view SQL 182 if dimension["type"] == "time" and not dimension["sql"].endswith( 183 dimension["name"] 184 ): 185 suffix = dimension["sql"].split( 186 dimension["name"].replace("__", ".") 187 )[-1] 188 sql = ( 189 f"{data_source}.{(dimension['name']+suffix).replace('__', '.')} AS" 190 + f" {data_source}_{dimension['name']},\n" 191 ) 192 193 base_view_fields.append( 194 { 195 "name": f"{data_source}_{dimension['name']}", 196 "select_sql": f"{data_source}_{dimension['name']},\n", 197 "sql": sql, 198 } 199 ) 200 seen_dimensions.add(dimension["name"]) 201 202 client_id_field = ( 203 "NULL" 204 if data_source_definition.client_id_column == "NULL" 205 else f'{data_source_definition.client_id_column or "client_id"}' 206 ) 207 208 # filters for date ranges 209 where_sql = " AND ".join( 210 [ 211 f""" 212 {data_source.name}.{data_source.submission_date_column or "submission_date"} 213 {{% if analysis_period._is_filtered %}} 214 BETWEEN 215 DATE_SUB( 216 COALESCE( 217 SAFE_CAST( 218 {{% date_start analysis_period %}} AS DATE 219 ), CURRENT_DATE()), 220 INTERVAL {{% parameter lookback_days %}} DAY 221 ) AND 222 COALESCE( 223 SAFE_CAST( 224 {{% date_end analysis_period %}} AS DATE 225 ), CURRENT_DATE()) 226 {{% else %}} 227 BETWEEN 228 DATE_SUB( 229 COALESCE( 230 SAFE_CAST( 231 {{% date_start submission_date %}} AS DATE 232 ), CURRENT_DATE()), 233 INTERVAL {{% parameter lookback_days %}} DAY 234 ) AND 235 COALESCE( 236 SAFE_CAST( 237 {{% date_end submission_date %}} AS DATE 238 ), CURRENT_DATE()) 239 {{% endif %}} 240 """ 241 for data_source in [data_source_definition] + joined_data_sources 242 if data_source.submission_date_column != "NULL" 243 ] 244 ) 245 246 # filte on sample_id if such a field exists 247 for field in base_view_fields: 248 if field["name"].endswith("_sample_id"): 249 where_sql += f""" 250 AND 251 {field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}} 252 """ 253 break 254 255 view_defn["derived_table"] = { 256 "sql": f""" 257 SELECT 258 {"".join(metric_definitions)} 259 {"".join([field['select_sql'] for field in base_view_fields])} 260 {client_id_field} AS client_id, 261 {{% if aggregate_metrics_by._parameter_value == 'day' %}} 262 {data_source_definition.submission_date_column or "submission_date"} AS analysis_basis 263 {{% elsif aggregate_metrics_by._parameter_value == 'week' %}} 264 (FORMAT_DATE( 265 '%F', 266 DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"}, 267 WEEK(MONDAY))) 268 ) AS analysis_basis 269 {{% elsif aggregate_metrics_by._parameter_value == 'month' %}} 270 (FORMAT_DATE( 271 '%Y-%m', 272 {data_source_definition.submission_date_column or "submission_date"}) 273 ) AS analysis_basis 274 {{% elsif aggregate_metrics_by._parameter_value == 'quarter' %}} 275 (FORMAT_DATE( 276 '%Y-%m', 277 DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"}, 278 QUARTER)) 279 ) AS analysis_basis 280 {{% elsif aggregate_metrics_by._parameter_value == 'year' %}} 281 (EXTRACT( 282 YEAR FROM {data_source_definition.submission_date_column or "submission_date"}) 283 ) AS analysis_basis 284 {{% else %}} 285 NULL as analysis_basis 286 {{% endif %}} 287 FROM 288 ( 289 SELECT 290 {data_source_name}.*, 291 {"".join([field['sql'] for field in base_view_fields])} 292 FROM 293 { 294 MetricsConfigLoader.configs.get_data_source_sql( 295 data_source_name, 296 self.namespace, 297 select_fields=False 298 ).format(dataset=self.namespace) 299 } 300 WHERE {where_sql} 301 ) 302 GROUP BY 303 {"".join([field['select_sql'] for field in base_view_fields])} 304 client_id, 305 analysis_basis 306 """ 307 } 308 309 view_defn["dimensions"] = self.get_dimensions() 310 view_defn["dimension_groups"] = self.get_dimension_groups() 311 312 # add the Looker dimensions 313 for data_source, dimensions in base_view_dimensions.items(): 314 for dimension in dimensions: 315 if dimension["name"] not in ignore_base_fields: 316 dimension["sql"] = ( 317 "${TABLE}." + f"{data_source}_{dimension['name']}" 318 ) 319 dimension["group_label"] = "Base Fields" 320 if not lookml_utils._is_dimension_group(dimension): 321 view_defn["dimensions"].append(dimension) 322 else: 323 view_defn["dimension_groups"].append(dimension) 324 # avoid duplicate dimensions 325 ignore_base_fields.append(dimension["name"]) 326 327 view_defn["measures"] = self.get_measures( 328 view_defn["dimensions"], 329 ) 330 view_defn["sets"] = self._get_sets() 331 view_defn["parameters"] = self._get_parameters(view_defn["dimensions"]) 332 view_defn["filters"] = self._get_filters() 333 334 return {"views": [view_defn]} 335 336 def get_dimensions( 337 self, 338 _table=None, 339 _v1_name: Optional[str] = None, 340 _dryrun=None, 341 ) -> List[Dict[str, Any]]: 342 """Get the set of dimensions for this view based on the metric definitions in metric-hub.""" 343 namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions( 344 self.namespace 345 ) 346 metric_definitions = namespace_definitions.metrics.definitions 347 data_source_name = re.sub("^metric_definitions_", "", self.name) 348 349 return [ 350 { 351 "name": "client_id", 352 "type": "string", 353 "sql": "SAFE_CAST(${TABLE}.client_id AS STRING)", 354 "label": "Client ID", 355 "primary_key": "yes", 356 "group_label": "Base Fields", 357 "description": "Unique client identifier", 358 }, 359 ] + [ # add a dimension for each metric definition 360 { 361 "name": metric_slug, 362 "group_label": "Metrics", 363 "label": metric.friendly_name 364 or lookml_utils.slug_to_title(metric_slug), 365 "description": metric.description or "", 366 "type": "number", 367 "sql": "${TABLE}." + metric_slug, 368 } 369 for metric_slug, metric in metric_definitions.items() 370 if metric.select_expression 371 and metric.data_source.name == data_source_name 372 and metric.type != "histogram" 373 ] 374 375 def get_dimension_groups(self) -> List[Dict[str, Any]]: 376 """Get dimension groups for this view.""" 377 return [ 378 { 379 "name": "submission", 380 "type": "time", 381 "datatype": "date", 382 "group_label": "Base Fields", 383 "sql": "${TABLE}.analysis_basis", 384 "label": "Submission", 385 "timeframes": [ 386 "raw", 387 "date", 388 "week", 389 "month", 390 "quarter", 391 "year", 392 ], 393 } 394 ] 395 396 def _get_sets(self) -> List[Dict[str, Any]]: 397 """Generate metric sets.""" 398 # group all the metric dimensions into a set 399 dimensions = self.get_dimensions() 400 measures = self.get_measures(dimensions) 401 402 return [ 403 { 404 "name": "metrics", 405 "fields": [ 406 dimension["name"] 407 for dimension in dimensions 408 if dimension["name"] != "client_id" 409 ] 410 + [measure["name"] for measure in measures], 411 } 412 ] 413 414 def _get_parameters(self, dimensions: List[dict]): 415 hide_sampling = "yes" 416 417 for dim in dimensions: 418 if dim["name"] == "sample_id": 419 hide_sampling = "no" 420 break 421 422 return [ 423 { 424 "name": "aggregate_metrics_by", 425 "label": "Aggregate Client Metrics Per", 426 "type": "unquoted", 427 "default_value": "day", 428 "allowed_values": [ 429 {"label": "Per Day", "value": "day"}, 430 {"label": "Per Week", "value": "week"}, 431 {"label": "Per Month", "value": "month"}, 432 {"label": "Per Quarter", "value": "quarter"}, 433 {"label": "Per Year", "value": "year"}, 434 {"label": "Overall", "value": "overall"}, 435 ], 436 }, 437 { 438 "name": "sampling", 439 "label": "Sample of source data in %", 440 "type": "unquoted", 441 "default_value": "100", 442 "hidden": hide_sampling, 443 }, 444 { 445 "name": "lookback_days", 446 "label": "Lookback (Days)", 447 "type": "unquoted", 448 "description": "Number of days added before the filtered date range. " 449 + "Useful for period-over-period comparisons.", 450 "default_value": "0", 451 }, 452 { 453 "name": "date_groupby_position", 454 "label": "Date Group By Position", 455 "type": "unquoted", 456 "description": "Position of the date field in the group by clause. " 457 + "Required when submission_week, submission_month, submission_quarter, submission_year " 458 + "is selected as BigQuery can't correctly resolve the GROUP BY otherwise", 459 "default_value": "", 460 }, 461 ] 462 463 def _get_filters(self): 464 return [ 465 { 466 "name": "analysis_period", 467 "type": "date", 468 "label": "Analysis Period (with Lookback)", 469 "description": "Use this filter to define the main analysis period. " 470 + "The results will include the selected date range plus any additional " 471 + "days specified by the 'Lookback days' setting.", 472 } 473 ] 474 475 def get_measures( 476 self, dimensions: List[dict] 477 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 478 """Get statistics as measures.""" 479 measures = [] 480 sampling = "1" 481 482 for dim in dimensions: 483 if dim["name"] == "sample_id": 484 sampling = "100 / {% parameter sampling %}" 485 break 486 487 for dimension in dimensions: 488 metric = MetricsConfigLoader.configs.get_metric_definition( 489 dimension["name"], self.namespace 490 ) 491 if metric and metric.statistics: 492 for statistic_slug, statistic_conf in metric.statistics.items(): 493 dimension_label = dimension.get("label") or dimension.get("name") 494 if statistic_slug in [ 495 "average", 496 "max", 497 "min", 498 "median", 499 ]: 500 measures.append( 501 { 502 "name": f"{dimension['name']}_{statistic_slug}", 503 "type": statistic_slug, 504 "sql": "${TABLE}." + dimension["name"], 505 "label": f"{dimension_label} {statistic_slug.title()}", 506 "group_label": "Statistics", 507 "description": f"{statistic_slug.title()} of {dimension_label}", 508 } 509 ) 510 elif statistic_slug == "sum": 511 measures.append( 512 { 513 "name": f"{dimension['name']}_{statistic_slug}", 514 "type": "sum", 515 "sql": "${TABLE}." + dimension["name"] + "*" + sampling, 516 "label": f"{dimension_label} Sum", 517 "group_label": "Statistics", 518 "description": f"Sum of {dimension_label}", 519 } 520 ) 521 elif statistic_slug == "client_count": 522 measures.append( 523 { 524 "name": ( 525 f"{dimension['name']}_{statistic_slug}_sampled" 526 if sampling 527 else f"{dimension['name']}_{statistic_slug}" 528 ), 529 "type": "count_distinct", 530 "label": f"{dimension_label} Client Count", 531 "group_label": "Statistics", 532 "sql": "IF(${TABLE}." 533 + f"{dimension['name']} > 0, " 534 + "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))", 535 "description": f"Number of clients with {dimension_label}", 536 "hidden": "yes" if sampling else "no", 537 } 538 ) 539 540 if sampling: 541 measures.append( 542 { 543 "name": f"{dimension['name']}_{statistic_slug}", 544 "type": "number", 545 "label": f"{dimension_label} Client Count", 546 "group_label": "Statistics", 547 "sql": "${" 548 + f"{dimension['name']}_{statistic_slug}_sampled" 549 + "} *" 550 + sampling, 551 "description": f"Number of clients with {dimension_label}", 552 } 553 ) 554 elif statistic_slug == "dau_proportion": 555 if "numerator" in statistic_conf: 556 [numerator, numerator_stat] = statistic_conf[ 557 "numerator" 558 ].split(".") 559 measures.append( 560 { 561 "name": "DAU_sampled" if sampling else "DAU", 562 "type": "count_distinct", 563 "label": "DAU", 564 "group_label": "Statistics", 565 "sql": "${TABLE}.client_id", 566 "hidden": "yes", 567 } 568 ) 569 570 if sampling: 571 measures.append( 572 { 573 "name": "DAU", 574 "type": "number", 575 "label": "DAU", 576 "group_label": "Statistics", 577 "sql": "${DAU_sampled} *" + sampling, 578 "hidden": "yes", 579 } 580 ) 581 582 measures.append( 583 { 584 "name": f"{dimension['name']}_{statistic_slug}", 585 "type": "number", 586 "label": f"{dimension_label} DAU Proportion", 587 "sql": "SAFE_DIVIDE(${" 588 + f"{numerator}_{numerator_stat}" 589 + "}, ${DAU})", 590 "group_label": "Statistics", 591 "description": f"Proportion of daily active users with {dimension['name']}", 592 } 593 ) 594 elif statistic_slug == "ratio": 595 if ( 596 "numerator" in statistic_conf 597 and "denominator" in statistic_conf 598 ): 599 [numerator, numerator_stat] = statistic_conf[ 600 "numerator" 601 ].split(".") 602 [denominator, denominator_stat] = statistic_conf[ 603 "denominator" 604 ].split(".") 605 606 measures.append( 607 { 608 "name": f"{dimension['name']}_{statistic_slug}", 609 "type": "number", 610 "label": f"{dimension_label} Ratio", 611 "sql": "SAFE_DIVIDE(${" 612 + f"{numerator}_{numerator_stat}" 613 + "}, ${" 614 + f"{denominator}_{denominator_stat}" 615 + "})", 616 "group_label": "Statistics", 617 "description": f"""" 618 Ratio between {statistic_conf['numerator']} and 619 {statistic_conf['denominator']}""", 620 } 621 ) 622 elif statistic_slug == "rolling_average": 623 # rolling averages are computed over existing statistics (e.g. sum, ratio) 624 aggregations = statistic_conf.get("aggregations", ["sum"]) 625 for aggregation in aggregations: 626 # find measures that match the current dimension and aggregation type 627 matching_measures = [ 628 m 629 for m in measures 630 if m["name"].startswith( 631 f"{dimension['name']}_{aggregation}" 632 ) 633 ] 634 if "window_sizes" in statistic_conf: 635 for window_size in statistic_conf["window_sizes"]: 636 for matching_measure in matching_measures: 637 # these statistics require some time dimension to be selected 638 measures.append( 639 { 640 "name": f"{dimension['name']}_{statistic_slug}_{aggregation}_{window_size}_day", 641 "type": "number", 642 "label": f"{matching_measure['label']} {window_size} Day Rolling Average", 643 "sql": f""" 644 {{% if {self.name}.submission_date._is_selected or 645 {self.name}.submission_week._is_selected or 646 {self.name}.submission_month._is_selected or 647 {self.name}.submission_quarter._is_selected or 648 {self.name}.submission_year._is_selected %}} 649 AVG(${{{matching_measure['name']}}}) OVER ( 650 {{% if date_groupby_position._parameter_value != "" %}} 651 ORDER BY {{% parameter date_groupby_position %}} 652 {{% elsif {self.name}.submission_date._is_selected %}} 653 ORDER BY ${{TABLE}}.analysis_basis 654 {{% else %}} 655 ERROR("date_groupby_position needs to be set when using submission_week, 656 submission_month, submission_quarter, or submission_year") 657 {{% endif %}} 658 ROWS BETWEEN {window_size} PRECEDING AND CURRENT ROW 659 {{% else %}} 660 ERROR('Please select a "submission_*" field to compute the rolling average') 661 {{% endif %}} 662 )""", 663 "group_label": "Statistics", 664 "description": f"{window_size} day rolling average of {dimension_label}", 665 } 666 ) 667 668 # period-over-period measures compare current values with historical values 669 if "period_over_period" in statistic_conf: 670 # find all statistics that have period-over-period configured 671 matching_measures = [ 672 m 673 for m in measures 674 if m["name"].startswith( 675 f"{dimension['name']}_{statistic_slug}" 676 ) 677 and "_period_over_period_" not in m["name"] 678 ] 679 680 # create period-over-period measures for each configured time period 681 for period in statistic_conf["period_over_period"].get( 682 "periods", [] 683 ): 684 for matching_measure in matching_measures: 685 original_sql = matching_measure["sql"] 686 687 # rolling averages need special handling to adjust window sizes 688 # based on the selected time granularity 689 if statistic_slug == "rolling_average": 690 sql = self._create_rolling_average_period_sql( 691 original_sql, period 692 ) 693 else: 694 # standard measures use LAG function with time-adjusted periods 695 sql = self._create_lag_period_sql( 696 matching_measure, period 697 ) 698 699 # generate different types of period-over-period comparisons 700 for kind in statistic_conf["period_over_period"].get( 701 "kinds", ["previous"] 702 ): 703 if kind == "difference": 704 comparison_sql = f"({original_sql}) - ({sql})" 705 elif kind == "relative_change": 706 comparison_sql = f"SAFE_DIVIDE(({original_sql}), NULLIF(({sql}), 0)) - 1" 707 else: 708 comparison_sql = sql 709 710 measures.append( 711 { 712 "name": f"{matching_measure['name']}_{period}_day_period_over_period_{kind}", 713 "type": "number", 714 "label": f"{matching_measure['label']} " 715 + f"{period} Day Period Over Period {kind.capitalize()}", 716 "description": f"Period over period {kind.capitalize()} of " 717 + f"{matching_measure['label']} over {period} days", 718 "group_label": "Statistics", 719 "sql": comparison_sql, 720 } 721 ) 722 723 return measures 724 725 def _create_rolling_average_period_sql(self, original_sql: str, period: int) -> str: 726 """ 727 Create period-over-period SQL for rolling average measures. 728 729 Rolling averages require adjusting the window size based on the selected time granularity. 730 """ 731 rows_match = re.search( 732 r"ROWS BETWEEN (\d+) PRECEDING AND CURRENT ROW", 733 original_sql, 734 ) 735 736 if not rows_match: 737 return original_sql 738 739 original_window_size = int(rows_match.group(1)) 740 time_conditions = [] 741 742 for unit, divisor in self.TIME_UNITS: 743 # calculate adjusted window size for this time granularity 744 adjusted_window = ( 745 (original_window_size + period) // divisor 746 if unit != "date" 747 else original_window_size + period 748 ) 749 750 condition = ( 751 f"{{% {'if' if unit == 'date' else 'elsif'} " 752 + f"{self.name}.submission_{unit}._is_selected %}}" 753 ) 754 755 # modify the ROWS clause to extend the window by the period 756 modified_sql = re.sub( 757 r"ROWS BETWEEN \d+ PRECEDING AND CURRENT ROW", 758 f"ROWS BETWEEN {adjusted_window} PRECEDING AND " 759 + f"{adjusted_window - original_window_size} PRECEDING", 760 original_sql, 761 ) 762 time_conditions.append(f"{condition}\n{modified_sql}") 763 764 return ( 765 "\n".join(time_conditions) 766 + f"\n{{% else %}}\n{original_sql}\n{{% endif %}}" 767 ) 768 769 def _create_lag_period_sql(self, matching_measure: dict, period: int) -> str: 770 """ 771 Create period-over-period SQL using LAG function for standard measures. 772 773 LAG function looks back N periods to get historical values. The period is adjusted 774 based on the selected time granularity (daily, weekly, monthly, etc.). 775 """ 776 time_conditions = [] 777 778 for unit, divisor in self.TIME_UNITS: 779 # calculate adjusted period for this time granularity 780 adjusted_period = period // divisor if unit != "date" else period 781 782 order_by = ( 783 f"${{submission_{unit}}}" if unit != "date" else "${submission_date}" 784 ) 785 786 condition = ( 787 f"{{% {'if' if unit == 'date' else 'elsif'} " 788 + f"{self.name}.submission_{unit}._is_selected %}}" 789 ) 790 791 lag_sql = f"""LAG(${{{matching_measure['name']}}}, {adjusted_period}) OVER ( 792 {{% if date_groupby_position._parameter_value != "" %}} 793 ORDER BY {{% parameter date_groupby_position %}} 794 {{% else %}} 795 ORDER BY {order_by} 796 {{% endif %}} 797 )""" 798 time_conditions.append(f"{condition}\n{lag_sql}") 799 800 return ( 801 "\n".join(time_conditions) 802 + f"\n{{% else %}}\nLAG({matching_measure['name']}, {period}) " 803 + "OVER (ORDER BY ${submission_date})\n{% endif %}" 804 )
15class MetricDefinitionsView(View): 16 """A view for metric-hub metrics that come from the same data source.""" 17 18 type: str = "metric_definitions_view" 19 20 # Time unit divisors for converting days to different granularities 21 TIME_UNITS = [ 22 ("date", 1), # Daily: no conversion needed 23 ("week", 7), # Weekly: divide by 7 24 ("month", 30), # Monthly: approximate 30 days per month 25 ("quarter", 90), # Quarterly: approximate 90 days per quarter 26 ("year", 365), # Yearly: approximate 365 days per year 27 ] 28 29 def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]): 30 """Get an instance of an MetricDefinitionsView.""" 31 super().__init__(namespace, name, MetricDefinitionsView.type, tables) 32 33 @classmethod 34 def from_db_views( 35 klass, 36 namespace: str, 37 is_glean: bool, 38 channels: List[Dict[str, str]], 39 db_views: dict, 40 ) -> Iterator[MetricDefinitionsView]: 41 """Get Metric Definition Views from db views and app variants.""" 42 return iter(()) 43 44 @classmethod 45 def from_dict( 46 klass, namespace: str, name: str, definition: ViewDict 47 ) -> MetricDefinitionsView: 48 """Get a MetricDefinitionsView from a dict representation.""" 49 return klass(namespace, name, definition.get("tables", [])) 50 51 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 52 """Get this view as LookML.""" 53 namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions( 54 self.namespace 55 ) 56 if namespace_definitions is None: 57 return {} 58 59 # get all metric definitions that depend on the data source represented by this view 60 data_source_name = re.sub("^metric_definitions_", "", self.name) 61 data_source_definition = MetricsConfigLoader.configs.get_data_source_definition( 62 data_source_name, self.namespace 63 ) 64 65 if data_source_definition is None: 66 return {} 67 68 # todo: hide deprecated metrics? 69 metric_definitions = [ 70 f"""{ 71 MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render() 72 } AS {metric_slug},\n""" 73 for metric_slug, metric in namespace_definitions.metrics.definitions.items() 74 if metric.select_expression 75 and metric.data_source.name == data_source_name 76 and metric.type != "histogram" 77 ] 78 79 if metric_definitions == []: 80 return {} 81 82 # Metric definitions are intended to aggregated by client per date. 83 # A derived table is needed to do these aggregations, instead of defining them as measures 84 # we want to have them available as dimensions (which don't allow aggregations in their definitions) 85 # to allow for custom measures to be later defined in Looker that aggregate these per client metrics. 86 view_defn: Dict[str, Any] = {"name": self.name} 87 88 ignore_base_fields = [ 89 "client_id", 90 "submission_date", 91 "submission", 92 "first_run", 93 ] + [ 94 metric_slug 95 for metric_slug, metric in namespace_definitions.metrics.definitions.items() 96 if metric.select_expression 97 and metric.data_source.name == data_source_name 98 and metric.type != "histogram" 99 ] 100 101 base_view_dimensions = {} 102 joined_data_sources = [] 103 104 # check if the metric data source has joins 105 # joined data sources are generally used for creating the "Base Fields" 106 if data_source_definition.joins: 107 # determine the dimensions selected by the joined data sources 108 for joined_data_source_slug, join in data_source_definition.joins.items(): 109 joined_data_source = ( 110 MetricsConfigLoader.configs.get_data_source_definition( 111 joined_data_source_slug, self.namespace 112 ) 113 ) 114 115 if joined_data_source.columns_as_dimensions: 116 joined_data_sources.append(joined_data_source) 117 118 date_filter = None 119 if joined_data_source.submission_date_column != "NULL": 120 date_filter = ( 121 None 122 if joined_data_source.submission_date_column is None 123 or joined_data_source.submission_date_column == "NULL" 124 else f"{joined_data_source.submission_date_column} = '2023-01-01'" 125 ) 126 127 # create Looker dimensions by doing a dryrun 128 query = MetricsConfigLoader.configs.get_data_source_sql( 129 joined_data_source_slug, 130 self.namespace, 131 where=date_filter, 132 ).format(dataset=self.namespace) 133 134 base_view_dimensions[joined_data_source_slug] = ( 135 lookml_utils._generate_dimensions_from_query( 136 query, dryrun=dryrun 137 ) 138 ) 139 140 if ( 141 data_source_definition.client_id_column == "NULL" 142 and not base_view_dimensions 143 ) or data_source_definition.columns_as_dimensions: 144 # if the metrics data source doesn't have any joins then use the dimensions 145 # of the data source itself as base fields 146 date_filter = None 147 if data_source_definition.submission_date_column != "NULL": 148 date_filter = ( 149 "submission_date = '2023-01-01'" 150 if data_source_definition.submission_date_column is None 151 else f"{data_source_definition.submission_date_column} = '2023-01-01'" 152 ) 153 154 query = MetricsConfigLoader.configs.get_data_source_sql( 155 data_source_definition.name, 156 self.namespace, 157 where=date_filter, 158 ignore_joins=True, 159 ).format(dataset=self.namespace) 160 161 base_view_dimensions[data_source_definition.name] = ( 162 lookml_utils._generate_dimensions_from_query(query, dryrun) 163 ) 164 165 # to prevent duplicate dimensions, especially when working with time dimensions 166 # where names are modified potentially causing naming collisions 167 seen_dimensions = set() 168 # prepare base field data for query 169 base_view_fields = [] 170 for data_source, dimensions in base_view_dimensions.items(): 171 for dimension in dimensions: 172 if ( 173 dimension["name"] not in ignore_base_fields 174 and dimension["name"] not in seen_dimensions 175 and "hidden" not in dimension 176 ): 177 sql = ( 178 f"{data_source}.{dimension['name'].replace('__', '.')} AS" 179 + f" {data_source}_{dimension['name']},\n" 180 ) 181 # date/time/timestamp suffixes are removed when generating lookml dimensions, however we 182 # need the original field name for the derived view SQL 183 if dimension["type"] == "time" and not dimension["sql"].endswith( 184 dimension["name"] 185 ): 186 suffix = dimension["sql"].split( 187 dimension["name"].replace("__", ".") 188 )[-1] 189 sql = ( 190 f"{data_source}.{(dimension['name']+suffix).replace('__', '.')} AS" 191 + f" {data_source}_{dimension['name']},\n" 192 ) 193 194 base_view_fields.append( 195 { 196 "name": f"{data_source}_{dimension['name']}", 197 "select_sql": f"{data_source}_{dimension['name']},\n", 198 "sql": sql, 199 } 200 ) 201 seen_dimensions.add(dimension["name"]) 202 203 client_id_field = ( 204 "NULL" 205 if data_source_definition.client_id_column == "NULL" 206 else f'{data_source_definition.client_id_column or "client_id"}' 207 ) 208 209 # filters for date ranges 210 where_sql = " AND ".join( 211 [ 212 f""" 213 {data_source.name}.{data_source.submission_date_column or "submission_date"} 214 {{% if analysis_period._is_filtered %}} 215 BETWEEN 216 DATE_SUB( 217 COALESCE( 218 SAFE_CAST( 219 {{% date_start analysis_period %}} AS DATE 220 ), CURRENT_DATE()), 221 INTERVAL {{% parameter lookback_days %}} DAY 222 ) AND 223 COALESCE( 224 SAFE_CAST( 225 {{% date_end analysis_period %}} AS DATE 226 ), CURRENT_DATE()) 227 {{% else %}} 228 BETWEEN 229 DATE_SUB( 230 COALESCE( 231 SAFE_CAST( 232 {{% date_start submission_date %}} AS DATE 233 ), CURRENT_DATE()), 234 INTERVAL {{% parameter lookback_days %}} DAY 235 ) AND 236 COALESCE( 237 SAFE_CAST( 238 {{% date_end submission_date %}} AS DATE 239 ), CURRENT_DATE()) 240 {{% endif %}} 241 """ 242 for data_source in [data_source_definition] + joined_data_sources 243 if data_source.submission_date_column != "NULL" 244 ] 245 ) 246 247 # filte on sample_id if such a field exists 248 for field in base_view_fields: 249 if field["name"].endswith("_sample_id"): 250 where_sql += f""" 251 AND 252 {field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}} 253 """ 254 break 255 256 view_defn["derived_table"] = { 257 "sql": f""" 258 SELECT 259 {"".join(metric_definitions)} 260 {"".join([field['select_sql'] for field in base_view_fields])} 261 {client_id_field} AS client_id, 262 {{% if aggregate_metrics_by._parameter_value == 'day' %}} 263 {data_source_definition.submission_date_column or "submission_date"} AS analysis_basis 264 {{% elsif aggregate_metrics_by._parameter_value == 'week' %}} 265 (FORMAT_DATE( 266 '%F', 267 DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"}, 268 WEEK(MONDAY))) 269 ) AS analysis_basis 270 {{% elsif aggregate_metrics_by._parameter_value == 'month' %}} 271 (FORMAT_DATE( 272 '%Y-%m', 273 {data_source_definition.submission_date_column or "submission_date"}) 274 ) AS analysis_basis 275 {{% elsif aggregate_metrics_by._parameter_value == 'quarter' %}} 276 (FORMAT_DATE( 277 '%Y-%m', 278 DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"}, 279 QUARTER)) 280 ) AS analysis_basis 281 {{% elsif aggregate_metrics_by._parameter_value == 'year' %}} 282 (EXTRACT( 283 YEAR FROM {data_source_definition.submission_date_column or "submission_date"}) 284 ) AS analysis_basis 285 {{% else %}} 286 NULL as analysis_basis 287 {{% endif %}} 288 FROM 289 ( 290 SELECT 291 {data_source_name}.*, 292 {"".join([field['sql'] for field in base_view_fields])} 293 FROM 294 { 295 MetricsConfigLoader.configs.get_data_source_sql( 296 data_source_name, 297 self.namespace, 298 select_fields=False 299 ).format(dataset=self.namespace) 300 } 301 WHERE {where_sql} 302 ) 303 GROUP BY 304 {"".join([field['select_sql'] for field in base_view_fields])} 305 client_id, 306 analysis_basis 307 """ 308 } 309 310 view_defn["dimensions"] = self.get_dimensions() 311 view_defn["dimension_groups"] = self.get_dimension_groups() 312 313 # add the Looker dimensions 314 for data_source, dimensions in base_view_dimensions.items(): 315 for dimension in dimensions: 316 if dimension["name"] not in ignore_base_fields: 317 dimension["sql"] = ( 318 "${TABLE}." + f"{data_source}_{dimension['name']}" 319 ) 320 dimension["group_label"] = "Base Fields" 321 if not lookml_utils._is_dimension_group(dimension): 322 view_defn["dimensions"].append(dimension) 323 else: 324 view_defn["dimension_groups"].append(dimension) 325 # avoid duplicate dimensions 326 ignore_base_fields.append(dimension["name"]) 327 328 view_defn["measures"] = self.get_measures( 329 view_defn["dimensions"], 330 ) 331 view_defn["sets"] = self._get_sets() 332 view_defn["parameters"] = self._get_parameters(view_defn["dimensions"]) 333 view_defn["filters"] = self._get_filters() 334 335 return {"views": [view_defn]} 336 337 def get_dimensions( 338 self, 339 _table=None, 340 _v1_name: Optional[str] = None, 341 _dryrun=None, 342 ) -> List[Dict[str, Any]]: 343 """Get the set of dimensions for this view based on the metric definitions in metric-hub.""" 344 namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions( 345 self.namespace 346 ) 347 metric_definitions = namespace_definitions.metrics.definitions 348 data_source_name = re.sub("^metric_definitions_", "", self.name) 349 350 return [ 351 { 352 "name": "client_id", 353 "type": "string", 354 "sql": "SAFE_CAST(${TABLE}.client_id AS STRING)", 355 "label": "Client ID", 356 "primary_key": "yes", 357 "group_label": "Base Fields", 358 "description": "Unique client identifier", 359 }, 360 ] + [ # add a dimension for each metric definition 361 { 362 "name": metric_slug, 363 "group_label": "Metrics", 364 "label": metric.friendly_name 365 or lookml_utils.slug_to_title(metric_slug), 366 "description": metric.description or "", 367 "type": "number", 368 "sql": "${TABLE}." + metric_slug, 369 } 370 for metric_slug, metric in metric_definitions.items() 371 if metric.select_expression 372 and metric.data_source.name == data_source_name 373 and metric.type != "histogram" 374 ] 375 376 def get_dimension_groups(self) -> List[Dict[str, Any]]: 377 """Get dimension groups for this view.""" 378 return [ 379 { 380 "name": "submission", 381 "type": "time", 382 "datatype": "date", 383 "group_label": "Base Fields", 384 "sql": "${TABLE}.analysis_basis", 385 "label": "Submission", 386 "timeframes": [ 387 "raw", 388 "date", 389 "week", 390 "month", 391 "quarter", 392 "year", 393 ], 394 } 395 ] 396 397 def _get_sets(self) -> List[Dict[str, Any]]: 398 """Generate metric sets.""" 399 # group all the metric dimensions into a set 400 dimensions = self.get_dimensions() 401 measures = self.get_measures(dimensions) 402 403 return [ 404 { 405 "name": "metrics", 406 "fields": [ 407 dimension["name"] 408 for dimension in dimensions 409 if dimension["name"] != "client_id" 410 ] 411 + [measure["name"] for measure in measures], 412 } 413 ] 414 415 def _get_parameters(self, dimensions: List[dict]): 416 hide_sampling = "yes" 417 418 for dim in dimensions: 419 if dim["name"] == "sample_id": 420 hide_sampling = "no" 421 break 422 423 return [ 424 { 425 "name": "aggregate_metrics_by", 426 "label": "Aggregate Client Metrics Per", 427 "type": "unquoted", 428 "default_value": "day", 429 "allowed_values": [ 430 {"label": "Per Day", "value": "day"}, 431 {"label": "Per Week", "value": "week"}, 432 {"label": "Per Month", "value": "month"}, 433 {"label": "Per Quarter", "value": "quarter"}, 434 {"label": "Per Year", "value": "year"}, 435 {"label": "Overall", "value": "overall"}, 436 ], 437 }, 438 { 439 "name": "sampling", 440 "label": "Sample of source data in %", 441 "type": "unquoted", 442 "default_value": "100", 443 "hidden": hide_sampling, 444 }, 445 { 446 "name": "lookback_days", 447 "label": "Lookback (Days)", 448 "type": "unquoted", 449 "description": "Number of days added before the filtered date range. " 450 + "Useful for period-over-period comparisons.", 451 "default_value": "0", 452 }, 453 { 454 "name": "date_groupby_position", 455 "label": "Date Group By Position", 456 "type": "unquoted", 457 "description": "Position of the date field in the group by clause. " 458 + "Required when submission_week, submission_month, submission_quarter, submission_year " 459 + "is selected as BigQuery can't correctly resolve the GROUP BY otherwise", 460 "default_value": "", 461 }, 462 ] 463 464 def _get_filters(self): 465 return [ 466 { 467 "name": "analysis_period", 468 "type": "date", 469 "label": "Analysis Period (with Lookback)", 470 "description": "Use this filter to define the main analysis period. " 471 + "The results will include the selected date range plus any additional " 472 + "days specified by the 'Lookback days' setting.", 473 } 474 ] 475 476 def get_measures( 477 self, dimensions: List[dict] 478 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 479 """Get statistics as measures.""" 480 measures = [] 481 sampling = "1" 482 483 for dim in dimensions: 484 if dim["name"] == "sample_id": 485 sampling = "100 / {% parameter sampling %}" 486 break 487 488 for dimension in dimensions: 489 metric = MetricsConfigLoader.configs.get_metric_definition( 490 dimension["name"], self.namespace 491 ) 492 if metric and metric.statistics: 493 for statistic_slug, statistic_conf in metric.statistics.items(): 494 dimension_label = dimension.get("label") or dimension.get("name") 495 if statistic_slug in [ 496 "average", 497 "max", 498 "min", 499 "median", 500 ]: 501 measures.append( 502 { 503 "name": f"{dimension['name']}_{statistic_slug}", 504 "type": statistic_slug, 505 "sql": "${TABLE}." + dimension["name"], 506 "label": f"{dimension_label} {statistic_slug.title()}", 507 "group_label": "Statistics", 508 "description": f"{statistic_slug.title()} of {dimension_label}", 509 } 510 ) 511 elif statistic_slug == "sum": 512 measures.append( 513 { 514 "name": f"{dimension['name']}_{statistic_slug}", 515 "type": "sum", 516 "sql": "${TABLE}." + dimension["name"] + "*" + sampling, 517 "label": f"{dimension_label} Sum", 518 "group_label": "Statistics", 519 "description": f"Sum of {dimension_label}", 520 } 521 ) 522 elif statistic_slug == "client_count": 523 measures.append( 524 { 525 "name": ( 526 f"{dimension['name']}_{statistic_slug}_sampled" 527 if sampling 528 else f"{dimension['name']}_{statistic_slug}" 529 ), 530 "type": "count_distinct", 531 "label": f"{dimension_label} Client Count", 532 "group_label": "Statistics", 533 "sql": "IF(${TABLE}." 534 + f"{dimension['name']} > 0, " 535 + "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))", 536 "description": f"Number of clients with {dimension_label}", 537 "hidden": "yes" if sampling else "no", 538 } 539 ) 540 541 if sampling: 542 measures.append( 543 { 544 "name": f"{dimension['name']}_{statistic_slug}", 545 "type": "number", 546 "label": f"{dimension_label} Client Count", 547 "group_label": "Statistics", 548 "sql": "${" 549 + f"{dimension['name']}_{statistic_slug}_sampled" 550 + "} *" 551 + sampling, 552 "description": f"Number of clients with {dimension_label}", 553 } 554 ) 555 elif statistic_slug == "dau_proportion": 556 if "numerator" in statistic_conf: 557 [numerator, numerator_stat] = statistic_conf[ 558 "numerator" 559 ].split(".") 560 measures.append( 561 { 562 "name": "DAU_sampled" if sampling else "DAU", 563 "type": "count_distinct", 564 "label": "DAU", 565 "group_label": "Statistics", 566 "sql": "${TABLE}.client_id", 567 "hidden": "yes", 568 } 569 ) 570 571 if sampling: 572 measures.append( 573 { 574 "name": "DAU", 575 "type": "number", 576 "label": "DAU", 577 "group_label": "Statistics", 578 "sql": "${DAU_sampled} *" + sampling, 579 "hidden": "yes", 580 } 581 ) 582 583 measures.append( 584 { 585 "name": f"{dimension['name']}_{statistic_slug}", 586 "type": "number", 587 "label": f"{dimension_label} DAU Proportion", 588 "sql": "SAFE_DIVIDE(${" 589 + f"{numerator}_{numerator_stat}" 590 + "}, ${DAU})", 591 "group_label": "Statistics", 592 "description": f"Proportion of daily active users with {dimension['name']}", 593 } 594 ) 595 elif statistic_slug == "ratio": 596 if ( 597 "numerator" in statistic_conf 598 and "denominator" in statistic_conf 599 ): 600 [numerator, numerator_stat] = statistic_conf[ 601 "numerator" 602 ].split(".") 603 [denominator, denominator_stat] = statistic_conf[ 604 "denominator" 605 ].split(".") 606 607 measures.append( 608 { 609 "name": f"{dimension['name']}_{statistic_slug}", 610 "type": "number", 611 "label": f"{dimension_label} Ratio", 612 "sql": "SAFE_DIVIDE(${" 613 + f"{numerator}_{numerator_stat}" 614 + "}, ${" 615 + f"{denominator}_{denominator_stat}" 616 + "})", 617 "group_label": "Statistics", 618 "description": f"""" 619 Ratio between {statistic_conf['numerator']} and 620 {statistic_conf['denominator']}""", 621 } 622 ) 623 elif statistic_slug == "rolling_average": 624 # rolling averages are computed over existing statistics (e.g. sum, ratio) 625 aggregations = statistic_conf.get("aggregations", ["sum"]) 626 for aggregation in aggregations: 627 # find measures that match the current dimension and aggregation type 628 matching_measures = [ 629 m 630 for m in measures 631 if m["name"].startswith( 632 f"{dimension['name']}_{aggregation}" 633 ) 634 ] 635 if "window_sizes" in statistic_conf: 636 for window_size in statistic_conf["window_sizes"]: 637 for matching_measure in matching_measures: 638 # these statistics require some time dimension to be selected 639 measures.append( 640 { 641 "name": f"{dimension['name']}_{statistic_slug}_{aggregation}_{window_size}_day", 642 "type": "number", 643 "label": f"{matching_measure['label']} {window_size} Day Rolling Average", 644 "sql": f""" 645 {{% if {self.name}.submission_date._is_selected or 646 {self.name}.submission_week._is_selected or 647 {self.name}.submission_month._is_selected or 648 {self.name}.submission_quarter._is_selected or 649 {self.name}.submission_year._is_selected %}} 650 AVG(${{{matching_measure['name']}}}) OVER ( 651 {{% if date_groupby_position._parameter_value != "" %}} 652 ORDER BY {{% parameter date_groupby_position %}} 653 {{% elsif {self.name}.submission_date._is_selected %}} 654 ORDER BY ${{TABLE}}.analysis_basis 655 {{% else %}} 656 ERROR("date_groupby_position needs to be set when using submission_week, 657 submission_month, submission_quarter, or submission_year") 658 {{% endif %}} 659 ROWS BETWEEN {window_size} PRECEDING AND CURRENT ROW 660 {{% else %}} 661 ERROR('Please select a "submission_*" field to compute the rolling average') 662 {{% endif %}} 663 )""", 664 "group_label": "Statistics", 665 "description": f"{window_size} day rolling average of {dimension_label}", 666 } 667 ) 668 669 # period-over-period measures compare current values with historical values 670 if "period_over_period" in statistic_conf: 671 # find all statistics that have period-over-period configured 672 matching_measures = [ 673 m 674 for m in measures 675 if m["name"].startswith( 676 f"{dimension['name']}_{statistic_slug}" 677 ) 678 and "_period_over_period_" not in m["name"] 679 ] 680 681 # create period-over-period measures for each configured time period 682 for period in statistic_conf["period_over_period"].get( 683 "periods", [] 684 ): 685 for matching_measure in matching_measures: 686 original_sql = matching_measure["sql"] 687 688 # rolling averages need special handling to adjust window sizes 689 # based on the selected time granularity 690 if statistic_slug == "rolling_average": 691 sql = self._create_rolling_average_period_sql( 692 original_sql, period 693 ) 694 else: 695 # standard measures use LAG function with time-adjusted periods 696 sql = self._create_lag_period_sql( 697 matching_measure, period 698 ) 699 700 # generate different types of period-over-period comparisons 701 for kind in statistic_conf["period_over_period"].get( 702 "kinds", ["previous"] 703 ): 704 if kind == "difference": 705 comparison_sql = f"({original_sql}) - ({sql})" 706 elif kind == "relative_change": 707 comparison_sql = f"SAFE_DIVIDE(({original_sql}), NULLIF(({sql}), 0)) - 1" 708 else: 709 comparison_sql = sql 710 711 measures.append( 712 { 713 "name": f"{matching_measure['name']}_{period}_day_period_over_period_{kind}", 714 "type": "number", 715 "label": f"{matching_measure['label']} " 716 + f"{period} Day Period Over Period {kind.capitalize()}", 717 "description": f"Period over period {kind.capitalize()} of " 718 + f"{matching_measure['label']} over {period} days", 719 "group_label": "Statistics", 720 "sql": comparison_sql, 721 } 722 ) 723 724 return measures 725 726 def _create_rolling_average_period_sql(self, original_sql: str, period: int) -> str: 727 """ 728 Create period-over-period SQL for rolling average measures. 729 730 Rolling averages require adjusting the window size based on the selected time granularity. 731 """ 732 rows_match = re.search( 733 r"ROWS BETWEEN (\d+) PRECEDING AND CURRENT ROW", 734 original_sql, 735 ) 736 737 if not rows_match: 738 return original_sql 739 740 original_window_size = int(rows_match.group(1)) 741 time_conditions = [] 742 743 for unit, divisor in self.TIME_UNITS: 744 # calculate adjusted window size for this time granularity 745 adjusted_window = ( 746 (original_window_size + period) // divisor 747 if unit != "date" 748 else original_window_size + period 749 ) 750 751 condition = ( 752 f"{{% {'if' if unit == 'date' else 'elsif'} " 753 + f"{self.name}.submission_{unit}._is_selected %}}" 754 ) 755 756 # modify the ROWS clause to extend the window by the period 757 modified_sql = re.sub( 758 r"ROWS BETWEEN \d+ PRECEDING AND CURRENT ROW", 759 f"ROWS BETWEEN {adjusted_window} PRECEDING AND " 760 + f"{adjusted_window - original_window_size} PRECEDING", 761 original_sql, 762 ) 763 time_conditions.append(f"{condition}\n{modified_sql}") 764 765 return ( 766 "\n".join(time_conditions) 767 + f"\n{{% else %}}\n{original_sql}\n{{% endif %}}" 768 ) 769 770 def _create_lag_period_sql(self, matching_measure: dict, period: int) -> str: 771 """ 772 Create period-over-period SQL using LAG function for standard measures. 773 774 LAG function looks back N periods to get historical values. The period is adjusted 775 based on the selected time granularity (daily, weekly, monthly, etc.). 776 """ 777 time_conditions = [] 778 779 for unit, divisor in self.TIME_UNITS: 780 # calculate adjusted period for this time granularity 781 adjusted_period = period // divisor if unit != "date" else period 782 783 order_by = ( 784 f"${{submission_{unit}}}" if unit != "date" else "${submission_date}" 785 ) 786 787 condition = ( 788 f"{{% {'if' if unit == 'date' else 'elsif'} " 789 + f"{self.name}.submission_{unit}._is_selected %}}" 790 ) 791 792 lag_sql = f"""LAG(${{{matching_measure['name']}}}, {adjusted_period}) OVER ( 793 {{% if date_groupby_position._parameter_value != "" %}} 794 ORDER BY {{% parameter date_groupby_position %}} 795 {{% else %}} 796 ORDER BY {order_by} 797 {{% endif %}} 798 )""" 799 time_conditions.append(f"{condition}\n{lag_sql}") 800 801 return ( 802 "\n".join(time_conditions) 803 + f"\n{{% else %}}\nLAG({matching_measure['name']}, {period}) " 804 + "OVER (ORDER BY ${submission_date})\n{% endif %}" 805 )
A view for metric-hub metrics that come from the same data source.
MetricDefinitionsView(namespace: str, name: str, tables: List[Dict[str, str]])
29 def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]): 30 """Get an instance of an MetricDefinitionsView.""" 31 super().__init__(namespace, name, MetricDefinitionsView.type, tables)
Get an instance of an MetricDefinitionsView.
@classmethod
def
from_db_views( klass, namespace: str, is_glean: bool, channels: List[Dict[str, str]], db_views: dict) -> Iterator[MetricDefinitionsView]:
33 @classmethod 34 def from_db_views( 35 klass, 36 namespace: str, 37 is_glean: bool, 38 channels: List[Dict[str, str]], 39 db_views: dict, 40 ) -> Iterator[MetricDefinitionsView]: 41 """Get Metric Definition Views from db views and app variants.""" 42 return iter(())
Get Metric Definition Views from db views and app variants.
@classmethod
def
from_dict( klass, namespace: str, name: str, definition: generator.views.view.ViewDict) -> MetricDefinitionsView:
44 @classmethod 45 def from_dict( 46 klass, namespace: str, name: str, definition: ViewDict 47 ) -> MetricDefinitionsView: 48 """Get a MetricDefinitionsView from a dict representation.""" 49 return klass(namespace, name, definition.get("tables", []))
Get a MetricDefinitionsView from a dict representation.
def
to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
51 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 52 """Get this view as LookML.""" 53 namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions( 54 self.namespace 55 ) 56 if namespace_definitions is None: 57 return {} 58 59 # get all metric definitions that depend on the data source represented by this view 60 data_source_name = re.sub("^metric_definitions_", "", self.name) 61 data_source_definition = MetricsConfigLoader.configs.get_data_source_definition( 62 data_source_name, self.namespace 63 ) 64 65 if data_source_definition is None: 66 return {} 67 68 # todo: hide deprecated metrics? 69 metric_definitions = [ 70 f"""{ 71 MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render() 72 } AS {metric_slug},\n""" 73 for metric_slug, metric in namespace_definitions.metrics.definitions.items() 74 if metric.select_expression 75 and metric.data_source.name == data_source_name 76 and metric.type != "histogram" 77 ] 78 79 if metric_definitions == []: 80 return {} 81 82 # Metric definitions are intended to aggregated by client per date. 83 # A derived table is needed to do these aggregations, instead of defining them as measures 84 # we want to have them available as dimensions (which don't allow aggregations in their definitions) 85 # to allow for custom measures to be later defined in Looker that aggregate these per client metrics. 86 view_defn: Dict[str, Any] = {"name": self.name} 87 88 ignore_base_fields = [ 89 "client_id", 90 "submission_date", 91 "submission", 92 "first_run", 93 ] + [ 94 metric_slug 95 for metric_slug, metric in namespace_definitions.metrics.definitions.items() 96 if metric.select_expression 97 and metric.data_source.name == data_source_name 98 and metric.type != "histogram" 99 ] 100 101 base_view_dimensions = {} 102 joined_data_sources = [] 103 104 # check if the metric data source has joins 105 # joined data sources are generally used for creating the "Base Fields" 106 if data_source_definition.joins: 107 # determine the dimensions selected by the joined data sources 108 for joined_data_source_slug, join in data_source_definition.joins.items(): 109 joined_data_source = ( 110 MetricsConfigLoader.configs.get_data_source_definition( 111 joined_data_source_slug, self.namespace 112 ) 113 ) 114 115 if joined_data_source.columns_as_dimensions: 116 joined_data_sources.append(joined_data_source) 117 118 date_filter = None 119 if joined_data_source.submission_date_column != "NULL": 120 date_filter = ( 121 None 122 if joined_data_source.submission_date_column is None 123 or joined_data_source.submission_date_column == "NULL" 124 else f"{joined_data_source.submission_date_column} = '2023-01-01'" 125 ) 126 127 # create Looker dimensions by doing a dryrun 128 query = MetricsConfigLoader.configs.get_data_source_sql( 129 joined_data_source_slug, 130 self.namespace, 131 where=date_filter, 132 ).format(dataset=self.namespace) 133 134 base_view_dimensions[joined_data_source_slug] = ( 135 lookml_utils._generate_dimensions_from_query( 136 query, dryrun=dryrun 137 ) 138 ) 139 140 if ( 141 data_source_definition.client_id_column == "NULL" 142 and not base_view_dimensions 143 ) or data_source_definition.columns_as_dimensions: 144 # if the metrics data source doesn't have any joins then use the dimensions 145 # of the data source itself as base fields 146 date_filter = None 147 if data_source_definition.submission_date_column != "NULL": 148 date_filter = ( 149 "submission_date = '2023-01-01'" 150 if data_source_definition.submission_date_column is None 151 else f"{data_source_definition.submission_date_column} = '2023-01-01'" 152 ) 153 154 query = MetricsConfigLoader.configs.get_data_source_sql( 155 data_source_definition.name, 156 self.namespace, 157 where=date_filter, 158 ignore_joins=True, 159 ).format(dataset=self.namespace) 160 161 base_view_dimensions[data_source_definition.name] = ( 162 lookml_utils._generate_dimensions_from_query(query, dryrun) 163 ) 164 165 # to prevent duplicate dimensions, especially when working with time dimensions 166 # where names are modified potentially causing naming collisions 167 seen_dimensions = set() 168 # prepare base field data for query 169 base_view_fields = [] 170 for data_source, dimensions in base_view_dimensions.items(): 171 for dimension in dimensions: 172 if ( 173 dimension["name"] not in ignore_base_fields 174 and dimension["name"] not in seen_dimensions 175 and "hidden" not in dimension 176 ): 177 sql = ( 178 f"{data_source}.{dimension['name'].replace('__', '.')} AS" 179 + f" {data_source}_{dimension['name']},\n" 180 ) 181 # date/time/timestamp suffixes are removed when generating lookml dimensions, however we 182 # need the original field name for the derived view SQL 183 if dimension["type"] == "time" and not dimension["sql"].endswith( 184 dimension["name"] 185 ): 186 suffix = dimension["sql"].split( 187 dimension["name"].replace("__", ".") 188 )[-1] 189 sql = ( 190 f"{data_source}.{(dimension['name']+suffix).replace('__', '.')} AS" 191 + f" {data_source}_{dimension['name']},\n" 192 ) 193 194 base_view_fields.append( 195 { 196 "name": f"{data_source}_{dimension['name']}", 197 "select_sql": f"{data_source}_{dimension['name']},\n", 198 "sql": sql, 199 } 200 ) 201 seen_dimensions.add(dimension["name"]) 202 203 client_id_field = ( 204 "NULL" 205 if data_source_definition.client_id_column == "NULL" 206 else f'{data_source_definition.client_id_column or "client_id"}' 207 ) 208 209 # filters for date ranges 210 where_sql = " AND ".join( 211 [ 212 f""" 213 {data_source.name}.{data_source.submission_date_column or "submission_date"} 214 {{% if analysis_period._is_filtered %}} 215 BETWEEN 216 DATE_SUB( 217 COALESCE( 218 SAFE_CAST( 219 {{% date_start analysis_period %}} AS DATE 220 ), CURRENT_DATE()), 221 INTERVAL {{% parameter lookback_days %}} DAY 222 ) AND 223 COALESCE( 224 SAFE_CAST( 225 {{% date_end analysis_period %}} AS DATE 226 ), CURRENT_DATE()) 227 {{% else %}} 228 BETWEEN 229 DATE_SUB( 230 COALESCE( 231 SAFE_CAST( 232 {{% date_start submission_date %}} AS DATE 233 ), CURRENT_DATE()), 234 INTERVAL {{% parameter lookback_days %}} DAY 235 ) AND 236 COALESCE( 237 SAFE_CAST( 238 {{% date_end submission_date %}} AS DATE 239 ), CURRENT_DATE()) 240 {{% endif %}} 241 """ 242 for data_source in [data_source_definition] + joined_data_sources 243 if data_source.submission_date_column != "NULL" 244 ] 245 ) 246 247 # filte on sample_id if such a field exists 248 for field in base_view_fields: 249 if field["name"].endswith("_sample_id"): 250 where_sql += f""" 251 AND 252 {field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}} 253 """ 254 break 255 256 view_defn["derived_table"] = { 257 "sql": f""" 258 SELECT 259 {"".join(metric_definitions)} 260 {"".join([field['select_sql'] for field in base_view_fields])} 261 {client_id_field} AS client_id, 262 {{% if aggregate_metrics_by._parameter_value == 'day' %}} 263 {data_source_definition.submission_date_column or "submission_date"} AS analysis_basis 264 {{% elsif aggregate_metrics_by._parameter_value == 'week' %}} 265 (FORMAT_DATE( 266 '%F', 267 DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"}, 268 WEEK(MONDAY))) 269 ) AS analysis_basis 270 {{% elsif aggregate_metrics_by._parameter_value == 'month' %}} 271 (FORMAT_DATE( 272 '%Y-%m', 273 {data_source_definition.submission_date_column or "submission_date"}) 274 ) AS analysis_basis 275 {{% elsif aggregate_metrics_by._parameter_value == 'quarter' %}} 276 (FORMAT_DATE( 277 '%Y-%m', 278 DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"}, 279 QUARTER)) 280 ) AS analysis_basis 281 {{% elsif aggregate_metrics_by._parameter_value == 'year' %}} 282 (EXTRACT( 283 YEAR FROM {data_source_definition.submission_date_column or "submission_date"}) 284 ) AS analysis_basis 285 {{% else %}} 286 NULL as analysis_basis 287 {{% endif %}} 288 FROM 289 ( 290 SELECT 291 {data_source_name}.*, 292 {"".join([field['sql'] for field in base_view_fields])} 293 FROM 294 { 295 MetricsConfigLoader.configs.get_data_source_sql( 296 data_source_name, 297 self.namespace, 298 select_fields=False 299 ).format(dataset=self.namespace) 300 } 301 WHERE {where_sql} 302 ) 303 GROUP BY 304 {"".join([field['select_sql'] for field in base_view_fields])} 305 client_id, 306 analysis_basis 307 """ 308 } 309 310 view_defn["dimensions"] = self.get_dimensions() 311 view_defn["dimension_groups"] = self.get_dimension_groups() 312 313 # add the Looker dimensions 314 for data_source, dimensions in base_view_dimensions.items(): 315 for dimension in dimensions: 316 if dimension["name"] not in ignore_base_fields: 317 dimension["sql"] = ( 318 "${TABLE}." + f"{data_source}_{dimension['name']}" 319 ) 320 dimension["group_label"] = "Base Fields" 321 if not lookml_utils._is_dimension_group(dimension): 322 view_defn["dimensions"].append(dimension) 323 else: 324 view_defn["dimension_groups"].append(dimension) 325 # avoid duplicate dimensions 326 ignore_base_fields.append(dimension["name"]) 327 328 view_defn["measures"] = self.get_measures( 329 view_defn["dimensions"], 330 ) 331 view_defn["sets"] = self._get_sets() 332 view_defn["parameters"] = self._get_parameters(view_defn["dimensions"]) 333 view_defn["filters"] = self._get_filters() 334 335 return {"views": [view_defn]}
Get this view as LookML.
def
get_dimensions( self, _table=None, _v1_name: Optional[str] = None, _dryrun=None) -> List[Dict[str, Any]]:
337 def get_dimensions( 338 self, 339 _table=None, 340 _v1_name: Optional[str] = None, 341 _dryrun=None, 342 ) -> List[Dict[str, Any]]: 343 """Get the set of dimensions for this view based on the metric definitions in metric-hub.""" 344 namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions( 345 self.namespace 346 ) 347 metric_definitions = namespace_definitions.metrics.definitions 348 data_source_name = re.sub("^metric_definitions_", "", self.name) 349 350 return [ 351 { 352 "name": "client_id", 353 "type": "string", 354 "sql": "SAFE_CAST(${TABLE}.client_id AS STRING)", 355 "label": "Client ID", 356 "primary_key": "yes", 357 "group_label": "Base Fields", 358 "description": "Unique client identifier", 359 }, 360 ] + [ # add a dimension for each metric definition 361 { 362 "name": metric_slug, 363 "group_label": "Metrics", 364 "label": metric.friendly_name 365 or lookml_utils.slug_to_title(metric_slug), 366 "description": metric.description or "", 367 "type": "number", 368 "sql": "${TABLE}." + metric_slug, 369 } 370 for metric_slug, metric in metric_definitions.items() 371 if metric.select_expression 372 and metric.data_source.name == data_source_name 373 and metric.type != "histogram" 374 ]
Get the set of dimensions for this view based on the metric definitions in metric-hub.
def
get_dimension_groups(self) -> List[Dict[str, Any]]:
376 def get_dimension_groups(self) -> List[Dict[str, Any]]: 377 """Get dimension groups for this view.""" 378 return [ 379 { 380 "name": "submission", 381 "type": "time", 382 "datatype": "date", 383 "group_label": "Base Fields", 384 "sql": "${TABLE}.analysis_basis", 385 "label": "Submission", 386 "timeframes": [ 387 "raw", 388 "date", 389 "week", 390 "month", 391 "quarter", 392 "year", 393 ], 394 } 395 ]
Get dimension groups for this view.
def
get_measures( self, dimensions: List[dict]) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
476 def get_measures( 477 self, dimensions: List[dict] 478 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 479 """Get statistics as measures.""" 480 measures = [] 481 sampling = "1" 482 483 for dim in dimensions: 484 if dim["name"] == "sample_id": 485 sampling = "100 / {% parameter sampling %}" 486 break 487 488 for dimension in dimensions: 489 metric = MetricsConfigLoader.configs.get_metric_definition( 490 dimension["name"], self.namespace 491 ) 492 if metric and metric.statistics: 493 for statistic_slug, statistic_conf in metric.statistics.items(): 494 dimension_label = dimension.get("label") or dimension.get("name") 495 if statistic_slug in [ 496 "average", 497 "max", 498 "min", 499 "median", 500 ]: 501 measures.append( 502 { 503 "name": f"{dimension['name']}_{statistic_slug}", 504 "type": statistic_slug, 505 "sql": "${TABLE}." + dimension["name"], 506 "label": f"{dimension_label} {statistic_slug.title()}", 507 "group_label": "Statistics", 508 "description": f"{statistic_slug.title()} of {dimension_label}", 509 } 510 ) 511 elif statistic_slug == "sum": 512 measures.append( 513 { 514 "name": f"{dimension['name']}_{statistic_slug}", 515 "type": "sum", 516 "sql": "${TABLE}." + dimension["name"] + "*" + sampling, 517 "label": f"{dimension_label} Sum", 518 "group_label": "Statistics", 519 "description": f"Sum of {dimension_label}", 520 } 521 ) 522 elif statistic_slug == "client_count": 523 measures.append( 524 { 525 "name": ( 526 f"{dimension['name']}_{statistic_slug}_sampled" 527 if sampling 528 else f"{dimension['name']}_{statistic_slug}" 529 ), 530 "type": "count_distinct", 531 "label": f"{dimension_label} Client Count", 532 "group_label": "Statistics", 533 "sql": "IF(${TABLE}." 534 + f"{dimension['name']} > 0, " 535 + "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))", 536 "description": f"Number of clients with {dimension_label}", 537 "hidden": "yes" if sampling else "no", 538 } 539 ) 540 541 if sampling: 542 measures.append( 543 { 544 "name": f"{dimension['name']}_{statistic_slug}", 545 "type": "number", 546 "label": f"{dimension_label} Client Count", 547 "group_label": "Statistics", 548 "sql": "${" 549 + f"{dimension['name']}_{statistic_slug}_sampled" 550 + "} *" 551 + sampling, 552 "description": f"Number of clients with {dimension_label}", 553 } 554 ) 555 elif statistic_slug == "dau_proportion": 556 if "numerator" in statistic_conf: 557 [numerator, numerator_stat] = statistic_conf[ 558 "numerator" 559 ].split(".") 560 measures.append( 561 { 562 "name": "DAU_sampled" if sampling else "DAU", 563 "type": "count_distinct", 564 "label": "DAU", 565 "group_label": "Statistics", 566 "sql": "${TABLE}.client_id", 567 "hidden": "yes", 568 } 569 ) 570 571 if sampling: 572 measures.append( 573 { 574 "name": "DAU", 575 "type": "number", 576 "label": "DAU", 577 "group_label": "Statistics", 578 "sql": "${DAU_sampled} *" + sampling, 579 "hidden": "yes", 580 } 581 ) 582 583 measures.append( 584 { 585 "name": f"{dimension['name']}_{statistic_slug}", 586 "type": "number", 587 "label": f"{dimension_label} DAU Proportion", 588 "sql": "SAFE_DIVIDE(${" 589 + f"{numerator}_{numerator_stat}" 590 + "}, ${DAU})", 591 "group_label": "Statistics", 592 "description": f"Proportion of daily active users with {dimension['name']}", 593 } 594 ) 595 elif statistic_slug == "ratio": 596 if ( 597 "numerator" in statistic_conf 598 and "denominator" in statistic_conf 599 ): 600 [numerator, numerator_stat] = statistic_conf[ 601 "numerator" 602 ].split(".") 603 [denominator, denominator_stat] = statistic_conf[ 604 "denominator" 605 ].split(".") 606 607 measures.append( 608 { 609 "name": f"{dimension['name']}_{statistic_slug}", 610 "type": "number", 611 "label": f"{dimension_label} Ratio", 612 "sql": "SAFE_DIVIDE(${" 613 + f"{numerator}_{numerator_stat}" 614 + "}, ${" 615 + f"{denominator}_{denominator_stat}" 616 + "})", 617 "group_label": "Statistics", 618 "description": f"""" 619 Ratio between {statistic_conf['numerator']} and 620 {statistic_conf['denominator']}""", 621 } 622 ) 623 elif statistic_slug == "rolling_average": 624 # rolling averages are computed over existing statistics (e.g. sum, ratio) 625 aggregations = statistic_conf.get("aggregations", ["sum"]) 626 for aggregation in aggregations: 627 # find measures that match the current dimension and aggregation type 628 matching_measures = [ 629 m 630 for m in measures 631 if m["name"].startswith( 632 f"{dimension['name']}_{aggregation}" 633 ) 634 ] 635 if "window_sizes" in statistic_conf: 636 for window_size in statistic_conf["window_sizes"]: 637 for matching_measure in matching_measures: 638 # these statistics require some time dimension to be selected 639 measures.append( 640 { 641 "name": f"{dimension['name']}_{statistic_slug}_{aggregation}_{window_size}_day", 642 "type": "number", 643 "label": f"{matching_measure['label']} {window_size} Day Rolling Average", 644 "sql": f""" 645 {{% if {self.name}.submission_date._is_selected or 646 {self.name}.submission_week._is_selected or 647 {self.name}.submission_month._is_selected or 648 {self.name}.submission_quarter._is_selected or 649 {self.name}.submission_year._is_selected %}} 650 AVG(${{{matching_measure['name']}}}) OVER ( 651 {{% if date_groupby_position._parameter_value != "" %}} 652 ORDER BY {{% parameter date_groupby_position %}} 653 {{% elsif {self.name}.submission_date._is_selected %}} 654 ORDER BY ${{TABLE}}.analysis_basis 655 {{% else %}} 656 ERROR("date_groupby_position needs to be set when using submission_week, 657 submission_month, submission_quarter, or submission_year") 658 {{% endif %}} 659 ROWS BETWEEN {window_size} PRECEDING AND CURRENT ROW 660 {{% else %}} 661 ERROR('Please select a "submission_*" field to compute the rolling average') 662 {{% endif %}} 663 )""", 664 "group_label": "Statistics", 665 "description": f"{window_size} day rolling average of {dimension_label}", 666 } 667 ) 668 669 # period-over-period measures compare current values with historical values 670 if "period_over_period" in statistic_conf: 671 # find all statistics that have period-over-period configured 672 matching_measures = [ 673 m 674 for m in measures 675 if m["name"].startswith( 676 f"{dimension['name']}_{statistic_slug}" 677 ) 678 and "_period_over_period_" not in m["name"] 679 ] 680 681 # create period-over-period measures for each configured time period 682 for period in statistic_conf["period_over_period"].get( 683 "periods", [] 684 ): 685 for matching_measure in matching_measures: 686 original_sql = matching_measure["sql"] 687 688 # rolling averages need special handling to adjust window sizes 689 # based on the selected time granularity 690 if statistic_slug == "rolling_average": 691 sql = self._create_rolling_average_period_sql( 692 original_sql, period 693 ) 694 else: 695 # standard measures use LAG function with time-adjusted periods 696 sql = self._create_lag_period_sql( 697 matching_measure, period 698 ) 699 700 # generate different types of period-over-period comparisons 701 for kind in statistic_conf["period_over_period"].get( 702 "kinds", ["previous"] 703 ): 704 if kind == "difference": 705 comparison_sql = f"({original_sql}) - ({sql})" 706 elif kind == "relative_change": 707 comparison_sql = f"SAFE_DIVIDE(({original_sql}), NULLIF(({sql}), 0)) - 1" 708 else: 709 comparison_sql = sql 710 711 measures.append( 712 { 713 "name": f"{matching_measure['name']}_{period}_day_period_over_period_{kind}", 714 "type": "number", 715 "label": f"{matching_measure['label']} " 716 + f"{period} Day Period Over Period {kind.capitalize()}", 717 "description": f"Period over period {kind.capitalize()} of " 718 + f"{matching_measure['label']} over {period} days", 719 "group_label": "Statistics", 720 "sql": comparison_sql, 721 } 722 ) 723 724 return measures
Get statistics as measures.