generator.views.metric_definitions_view
Class to describe a view with metrics from metric-hub.
1"""Class to describe a view with metrics from metric-hub.""" 2 3from __future__ import annotations 4 5import re 6from typing import Any, Dict, Iterator, List, Optional, Union 7 8from generator.metrics_utils import MetricsConfigLoader 9 10from . import lookml_utils 11from .view import View, ViewDict 12 13 14class MetricDefinitionsView(View): 15 """A view for metric-hub metrics that come from the same data source.""" 16 17 type: str = "metric_definitions_view" 18 19 def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]): 20 """Get an instance of an MetricDefinitionsView.""" 21 super().__init__(namespace, name, MetricDefinitionsView.type, tables) 22 23 @classmethod 24 def from_db_views( 25 klass, 26 namespace: str, 27 is_glean: bool, 28 channels: List[Dict[str, str]], 29 db_views: dict, 30 ) -> Iterator[MetricDefinitionsView]: 31 """Get Metric Definition Views from db views and app variants.""" 32 return iter(()) 33 34 @classmethod 35 def from_dict( 36 klass, namespace: str, name: str, definition: ViewDict 37 ) -> MetricDefinitionsView: 38 """Get a MetricDefinitionsView from a dict representation.""" 39 return klass(namespace, name, definition.get("tables", [])) 40 41 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 42 """Get this view as LookML.""" 43 namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions( 44 self.namespace 45 ) 46 if namespace_definitions is None: 47 return {} 48 49 # get all metric definitions that depend on the data source represented by this view 50 data_source_name = re.sub("^metric_definitions_", "", self.name) 51 data_source_definition = MetricsConfigLoader.configs.get_data_source_definition( 52 data_source_name, self.namespace 53 ) 54 55 if data_source_definition is None: 56 return {} 57 58 # todo: hide deprecated metrics? 59 metric_definitions = [ 60 f"""{ 61 MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render() 62 } AS {metric_slug},\n""" 63 for metric_slug, metric in namespace_definitions.metrics.definitions.items() 64 if metric.select_expression 65 and metric.data_source.name == data_source_name 66 and metric.type != "histogram" 67 ] 68 69 if metric_definitions == []: 70 return {} 71 72 # Metric definitions are intended to aggregated by client per date. 73 # A derived table is needed to do these aggregations, instead of defining them as measures 74 # we want to have them available as dimensions (which don't allow aggregations in their definitions) 75 # to allow for custom measures to be later defined in Looker that aggregate these per client metrics. 76 view_defn: Dict[str, Any] = {"name": self.name} 77 78 ignore_base_fields = [ 79 "client_id", 80 "submission_date", 81 "submission", 82 "first_run", 83 ] + [ 84 metric_slug 85 for metric_slug, metric in namespace_definitions.metrics.definitions.items() 86 if metric.select_expression 87 and metric.data_source.name == data_source_name 88 and metric.type != "histogram" 89 ] 90 91 base_view_dimensions = {} 92 joined_data_sources = [] 93 94 # check if the metric data source has joins 95 # joined data sources are generally used for creating the "Base Fields" 96 if data_source_definition.joins: 97 # determine the dimensions selected by the joined data sources 98 for joined_data_source_slug, join in data_source_definition.joins.items(): 99 joined_data_source = ( 100 MetricsConfigLoader.configs.get_data_source_definition( 101 joined_data_source_slug, self.namespace 102 ) 103 ) 104 105 if joined_data_source.columns_as_dimensions: 106 joined_data_sources.append(joined_data_source) 107 108 date_filter = None 109 if joined_data_source.submission_date_column != "NULL": 110 date_filter = ( 111 None 112 if joined_data_source.submission_date_column is None 113 or joined_data_source.submission_date_column == "NULL" 114 else f"{joined_data_source.submission_date_column} = '2023-01-01'" 115 ) 116 117 # create Looker dimensions by doing a dryrun 118 query = MetricsConfigLoader.configs.get_data_source_sql( 119 joined_data_source_slug, 120 self.namespace, 121 where=date_filter, 122 ).format(dataset=self.namespace) 123 124 base_view_dimensions[joined_data_source_slug] = ( 125 lookml_utils._generate_dimensions_from_query( 126 query, dryrun=dryrun 127 ) 128 ) 129 130 if ( 131 data_source_definition.client_id_column == "NULL" 132 or data_source_definition.columns_as_dimensions 133 ): 134 # if the metrics data source doesn't have any joins then use the dimensions 135 # of the data source itself as base fields 136 date_filter = None 137 if data_source_definition.submission_date_column != "NULL": 138 date_filter = ( 139 "submission_date = '2023-01-01'" 140 if data_source_definition.submission_date_column is None 141 else f"{data_source_definition.submission_date_column} = '2023-01-01'" 142 ) 143 144 query = MetricsConfigLoader.configs.get_data_source_sql( 145 data_source_definition.name, 146 self.namespace, 147 where=date_filter, 148 ignore_joins=True, 149 ).format(dataset=self.namespace) 150 151 base_view_dimensions[data_source_definition.name] = ( 152 lookml_utils._generate_dimensions_from_query(query, dryrun) 153 ) 154 155 # prepare base field data for query 156 base_view_fields = [ 157 { 158 "name": f"{data_source}_{dimension['name']}", 159 "select_sql": f"{data_source}_{dimension['name']},\n", 160 "sql": f"{data_source}.{dimension['name'].replace('__', '.')} AS {data_source}_{dimension['name']},\n", 161 } 162 for data_source, dimensions in base_view_dimensions.items() 163 for dimension in dimensions 164 if dimension["name"] not in ignore_base_fields 165 and "hidden" not in dimension 166 and dimension["type"] != "time" 167 ] 168 169 client_id_field = ( 170 "NULL" 171 if data_source_definition.client_id_column == "NULL" 172 else f'{data_source_definition.client_id_column or "client_id"}' 173 ) 174 175 # filters for date ranges 176 where_sql = " AND ".join( 177 [ 178 f""" 179 {data_source.name}.{data_source.submission_date_column or "submission_date"} 180 BETWEEN 181 COALESCE( 182 SAFE_CAST( 183 {{% date_start submission_date %}} AS DATE 184 ), CURRENT_DATE()) AND 185 COALESCE( 186 SAFE_CAST( 187 {{% date_end submission_date %}} AS DATE 188 ), CURRENT_DATE()) 189 """ 190 for data_source in [data_source_definition] + joined_data_sources 191 if data_source.submission_date_column != "NULL" 192 ] 193 ) 194 195 # filte on sample_id if such a field exists 196 for field in base_view_fields: 197 if field["name"].endswith("_sample_id"): 198 where_sql += f""" 199 AND 200 {field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}} 201 """ 202 break 203 204 view_defn["derived_table"] = { 205 "sql": f""" 206 SELECT 207 {"".join(metric_definitions)} 208 {"".join([field['select_sql'] for field in base_view_fields])} 209 {client_id_field} AS client_id, 210 {{% if aggregate_metrics_by._parameter_value == 'day' %}} 211 {data_source_definition.submission_date_column or "submission_date"} AS analysis_basis 212 {{% elsif aggregate_metrics_by._parameter_value == 'week' %}} 213 (FORMAT_DATE( 214 '%F', 215 DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"}, 216 WEEK(MONDAY))) 217 ) AS analysis_basis 218 {{% elsif aggregate_metrics_by._parameter_value == 'month' %}} 219 (FORMAT_DATE( 220 '%Y-%m', 221 {data_source_definition.submission_date_column or "submission_date"}) 222 ) AS analysis_basis 223 {{% elsif aggregate_metrics_by._parameter_value == 'quarter' %}} 224 (FORMAT_DATE( 225 '%Y-%m', 226 DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"}, 227 QUARTER)) 228 ) AS analysis_basis 229 {{% elsif aggregate_metrics_by._parameter_value == 'year' %}} 230 (EXTRACT( 231 YEAR FROM {data_source_definition.submission_date_column or "submission_date"}) 232 ) AS analysis_basis 233 {{% else %}} 234 NULL as analysis_basis 235 {{% endif %}} 236 FROM 237 ( 238 SELECT 239 {data_source_name}.*, 240 {"".join([field['sql'] for field in base_view_fields])} 241 FROM 242 { 243 MetricsConfigLoader.configs.get_data_source_sql( 244 data_source_name, 245 self.namespace, 246 select_fields=False 247 ).format(dataset=self.namespace) 248 } 249 WHERE {where_sql} 250 ) 251 GROUP BY 252 {"".join([field['select_sql'] for field in base_view_fields])} 253 client_id, 254 analysis_basis 255 """ 256 } 257 258 view_defn["dimensions"] = self.get_dimensions() 259 view_defn["dimension_groups"] = self.get_dimension_groups() 260 261 # add the Looker dimensions 262 for data_source, dimensions in base_view_dimensions.items(): 263 for dimension in dimensions: 264 if ( 265 dimension["name"] not in ignore_base_fields 266 and dimension.get("type", "") != "time" 267 ): 268 dimension["sql"] = ( 269 "${TABLE}." + f"{data_source}_{dimension['name']}" 270 ) 271 dimension["group_label"] = "Base Fields" 272 if not lookml_utils._is_dimension_group(dimension): 273 view_defn["dimensions"].append(dimension) 274 else: 275 view_defn["dimension_groups"].append(dimension) 276 # avoid duplicate dimensions 277 ignore_base_fields.append(dimension["name"]) 278 279 view_defn["measures"] = self.get_measures( 280 view_defn["dimensions"], 281 ) 282 view_defn["sets"] = self._get_sets() 283 view_defn["parameters"] = self._get_parameters(view_defn["dimensions"]) 284 285 return {"views": [view_defn]} 286 287 def get_dimensions( 288 self, 289 _table=None, 290 _v1_name: Optional[str] = None, 291 _dryrun=None, 292 ) -> List[Dict[str, Any]]: 293 """Get the set of dimensions for this view based on the metric definitions in metric-hub.""" 294 namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions( 295 self.namespace 296 ) 297 metric_definitions = namespace_definitions.metrics.definitions 298 data_source_name = re.sub("^metric_definitions_", "", self.name) 299 300 return [ 301 { 302 "name": "client_id", 303 "type": "string", 304 "sql": "SAFE_CAST(${TABLE}.client_id AS STRING)", 305 "label": "Client ID", 306 "primary_key": "yes", 307 "group_label": "Base Fields", 308 "description": "Unique client identifier", 309 }, 310 ] + [ # add a dimension for each metric definition 311 { 312 "name": metric_slug, 313 "group_label": "Metrics", 314 "label": metric.friendly_name 315 or lookml_utils.slug_to_title(metric_slug), 316 "description": metric.description or "", 317 "type": "number", 318 "sql": "${TABLE}." + metric_slug, 319 } 320 for metric_slug, metric in metric_definitions.items() 321 if metric.select_expression 322 and metric.data_source.name == data_source_name 323 and metric.type != "histogram" 324 ] 325 326 def get_dimension_groups(self) -> List[Dict[str, Any]]: 327 """Get dimension groups for this view.""" 328 return [ 329 { 330 "name": "submission", 331 "type": "time", 332 "group_label": "Base Fields", 333 "sql": "CAST(${TABLE}.analysis_basis AS TIMESTAMP)", 334 "label": "Submission", 335 "timeframes": [ 336 "raw", 337 "date", 338 "week", 339 "month", 340 "quarter", 341 "year", 342 ], 343 } 344 ] 345 346 def _get_sets(self) -> List[Dict[str, Any]]: 347 """Generate metric sets.""" 348 # group all the metric dimensions into a set 349 dimensions = self.get_dimensions() 350 measures = self.get_measures(dimensions) 351 352 return [ 353 { 354 "name": "metrics", 355 "fields": [ 356 dimension["name"] 357 for dimension in dimensions 358 if dimension["name"] != "client_id" 359 ] 360 + [measure["name"] for measure in measures], 361 } 362 ] 363 364 def _get_parameters(self, dimensions: List[dict]): 365 hide_sampling = "yes" 366 367 for dim in dimensions: 368 if dim["name"] == "sample_id": 369 hide_sampling = "no" 370 break 371 372 return [ 373 { 374 "name": "aggregate_metrics_by", 375 "label": "Aggregate Client Metrics Per", 376 "type": "unquoted", 377 "default_value": "day", 378 "allowed_values": [ 379 {"label": "Per Day", "value": "day"}, 380 {"label": "Per Week", "value": "week"}, 381 {"label": "Per Month", "value": "month"}, 382 {"label": "Per Quarter", "value": "quarter"}, 383 {"label": "Per Year", "value": "year"}, 384 {"label": "Overall", "value": "overall"}, 385 ], 386 }, 387 { 388 "name": "sampling", 389 "label": "Sample of source data in %", 390 "type": "unquoted", 391 "default_value": "100", 392 "hidden": hide_sampling, 393 }, 394 ] 395 396 def get_measures( 397 self, dimensions: List[dict] 398 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 399 """Get statistics as measures.""" 400 measures = [] 401 sampling = "1" 402 403 for dim in dimensions: 404 if dim["name"] == "sample_id": 405 sampling = "100 / {% parameter sampling %}" 406 break 407 408 for dimension in dimensions: 409 metric = MetricsConfigLoader.configs.get_metric_definition( 410 dimension["name"], self.namespace 411 ) 412 if metric and metric.statistics: 413 for statistic_slug, statistic_conf in metric.statistics.items(): 414 dimension_label = dimension.get("label") or dimension.get("name") 415 if statistic_slug in [ 416 "average", 417 "max", 418 "min", 419 "median", 420 ]: 421 measures.append( 422 { 423 "name": f"{dimension['name']}_{statistic_slug}", 424 "type": statistic_slug, 425 "sql": "${TABLE}." + dimension["name"], 426 "label": f"{dimension_label} {statistic_slug.title()}", 427 "group_label": "Statistics", 428 "description": f"{statistic_slug.title()} of {dimension_label}", 429 } 430 ) 431 elif statistic_slug == "sum": 432 measures.append( 433 { 434 "name": f"{dimension['name']}_{statistic_slug}", 435 "type": "sum", 436 "sql": "${TABLE}." + dimension["name"] + "*" + sampling, 437 "label": f"{dimension_label} Sum", 438 "group_label": "Statistics", 439 "description": f"Sum of {dimension_label}", 440 } 441 ) 442 elif statistic_slug == "client_count": 443 measures.append( 444 { 445 "name": ( 446 f"{dimension['name']}_{statistic_slug}_sampled" 447 if sampling 448 else f"{dimension['name']}_{statistic_slug}" 449 ), 450 "type": "count_distinct", 451 "label": f"{dimension_label} Client Count", 452 "group_label": "Statistics", 453 "sql": "IF(${TABLE}." 454 + f"{dimension['name']} > 0, " 455 + "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))", 456 "description": f"Number of clients with {dimension_label}", 457 "hidden": "yes" if sampling else "no", 458 } 459 ) 460 461 if sampling: 462 measures.append( 463 { 464 "name": f"{dimension['name']}_{statistic_slug}", 465 "type": "number", 466 "label": f"{dimension_label} Client Count", 467 "group_label": "Statistics", 468 "sql": "${" 469 + f"{dimension['name']}_{statistic_slug}_sampled" 470 + "} *" 471 + sampling, 472 "description": f"Number of clients with {dimension_label}", 473 } 474 ) 475 elif statistic_slug == "dau_proportion": 476 if "numerator" in statistic_conf: 477 [numerator, numerator_stat] = statistic_conf[ 478 "numerator" 479 ].split(".") 480 measures.append( 481 { 482 "name": "DAU_sampled" if sampling else "DAU", 483 "type": "count_distinct", 484 "label": "DAU", 485 "group_label": "Statistics", 486 "sql": "${TABLE}.client_id", 487 "hidden": "yes", 488 } 489 ) 490 491 if sampling: 492 measures.append( 493 { 494 "name": "DAU", 495 "type": "number", 496 "label": "DAU", 497 "group_label": "Statistics", 498 "sql": "${DAU_sampled} *" + sampling, 499 "hidden": "yes", 500 } 501 ) 502 503 measures.append( 504 { 505 "name": f"{dimension['name']}_{statistic_slug}", 506 "type": "number", 507 "label": f"{dimension_label} DAU Proportion", 508 "sql": "SAFE_DIVIDE(${" 509 + f"{numerator}_{numerator_stat}" 510 + "}, ${DAU})", 511 "group_label": "Statistics", 512 "description": f"Proportion of daily active users with {dimension['name']}", 513 } 514 ) 515 elif statistic_slug == "ratio": 516 if ( 517 "numerator" in statistic_conf 518 and "denominator" in statistic_conf 519 ): 520 [numerator, numerator_stat] = statistic_conf[ 521 "numerator" 522 ].split(".") 523 [denominator, denominator_stat] = statistic_conf[ 524 "denominator" 525 ].split(".") 526 527 measures.append( 528 { 529 "name": f"{dimension['name']}_{statistic_slug}", 530 "type": "number", 531 "label": f"{dimension_label} Ratio", 532 "sql": "SAFE_DIVIDE(${" 533 + f"{numerator}_{numerator_stat}" 534 + "}, ${" 535 + f"{denominator}_{denominator_stat}" 536 + "})", 537 "group_label": "Statistics", 538 "description": f"""" 539 Ratio between {statistic_conf['numerator']} and 540 {statistic_conf['denominator']}""", 541 } 542 ) 543 elif statistic_slug == "rolling_average": 544 aggregation = statistic_conf.get("aggregation", "sum") 545 if "window_sizes" in statistic_conf: 546 for window_size in statistic_conf["window_sizes"]: 547 measures.append( 548 { 549 "name": f"{dimension['name']}_{window_size}_day_{statistic_slug}", 550 "type": "number", 551 "label": f"{dimension_label} {window_size} Day Rolling Average", 552 "sql": f""" 553 AVG({aggregation}(${{TABLE}}.{dimension["name"]} * {sampling})) OVER ( 554 ROWS {window_size} PRECEDING 555 )""", 556 "group_label": "Statistics", 557 "description": f"{window_size} day rolling average of {dimension_label}", 558 } 559 ) 560 561 return measures
15class MetricDefinitionsView(View): 16 """A view for metric-hub metrics that come from the same data source.""" 17 18 type: str = "metric_definitions_view" 19 20 def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]): 21 """Get an instance of an MetricDefinitionsView.""" 22 super().__init__(namespace, name, MetricDefinitionsView.type, tables) 23 24 @classmethod 25 def from_db_views( 26 klass, 27 namespace: str, 28 is_glean: bool, 29 channels: List[Dict[str, str]], 30 db_views: dict, 31 ) -> Iterator[MetricDefinitionsView]: 32 """Get Metric Definition Views from db views and app variants.""" 33 return iter(()) 34 35 @classmethod 36 def from_dict( 37 klass, namespace: str, name: str, definition: ViewDict 38 ) -> MetricDefinitionsView: 39 """Get a MetricDefinitionsView from a dict representation.""" 40 return klass(namespace, name, definition.get("tables", [])) 41 42 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 43 """Get this view as LookML.""" 44 namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions( 45 self.namespace 46 ) 47 if namespace_definitions is None: 48 return {} 49 50 # get all metric definitions that depend on the data source represented by this view 51 data_source_name = re.sub("^metric_definitions_", "", self.name) 52 data_source_definition = MetricsConfigLoader.configs.get_data_source_definition( 53 data_source_name, self.namespace 54 ) 55 56 if data_source_definition is None: 57 return {} 58 59 # todo: hide deprecated metrics? 60 metric_definitions = [ 61 f"""{ 62 MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render() 63 } AS {metric_slug},\n""" 64 for metric_slug, metric in namespace_definitions.metrics.definitions.items() 65 if metric.select_expression 66 and metric.data_source.name == data_source_name 67 and metric.type != "histogram" 68 ] 69 70 if metric_definitions == []: 71 return {} 72 73 # Metric definitions are intended to aggregated by client per date. 74 # A derived table is needed to do these aggregations, instead of defining them as measures 75 # we want to have them available as dimensions (which don't allow aggregations in their definitions) 76 # to allow for custom measures to be later defined in Looker that aggregate these per client metrics. 77 view_defn: Dict[str, Any] = {"name": self.name} 78 79 ignore_base_fields = [ 80 "client_id", 81 "submission_date", 82 "submission", 83 "first_run", 84 ] + [ 85 metric_slug 86 for metric_slug, metric in namespace_definitions.metrics.definitions.items() 87 if metric.select_expression 88 and metric.data_source.name == data_source_name 89 and metric.type != "histogram" 90 ] 91 92 base_view_dimensions = {} 93 joined_data_sources = [] 94 95 # check if the metric data source has joins 96 # joined data sources are generally used for creating the "Base Fields" 97 if data_source_definition.joins: 98 # determine the dimensions selected by the joined data sources 99 for joined_data_source_slug, join in data_source_definition.joins.items(): 100 joined_data_source = ( 101 MetricsConfigLoader.configs.get_data_source_definition( 102 joined_data_source_slug, self.namespace 103 ) 104 ) 105 106 if joined_data_source.columns_as_dimensions: 107 joined_data_sources.append(joined_data_source) 108 109 date_filter = None 110 if joined_data_source.submission_date_column != "NULL": 111 date_filter = ( 112 None 113 if joined_data_source.submission_date_column is None 114 or joined_data_source.submission_date_column == "NULL" 115 else f"{joined_data_source.submission_date_column} = '2023-01-01'" 116 ) 117 118 # create Looker dimensions by doing a dryrun 119 query = MetricsConfigLoader.configs.get_data_source_sql( 120 joined_data_source_slug, 121 self.namespace, 122 where=date_filter, 123 ).format(dataset=self.namespace) 124 125 base_view_dimensions[joined_data_source_slug] = ( 126 lookml_utils._generate_dimensions_from_query( 127 query, dryrun=dryrun 128 ) 129 ) 130 131 if ( 132 data_source_definition.client_id_column == "NULL" 133 or data_source_definition.columns_as_dimensions 134 ): 135 # if the metrics data source doesn't have any joins then use the dimensions 136 # of the data source itself as base fields 137 date_filter = None 138 if data_source_definition.submission_date_column != "NULL": 139 date_filter = ( 140 "submission_date = '2023-01-01'" 141 if data_source_definition.submission_date_column is None 142 else f"{data_source_definition.submission_date_column} = '2023-01-01'" 143 ) 144 145 query = MetricsConfigLoader.configs.get_data_source_sql( 146 data_source_definition.name, 147 self.namespace, 148 where=date_filter, 149 ignore_joins=True, 150 ).format(dataset=self.namespace) 151 152 base_view_dimensions[data_source_definition.name] = ( 153 lookml_utils._generate_dimensions_from_query(query, dryrun) 154 ) 155 156 # prepare base field data for query 157 base_view_fields = [ 158 { 159 "name": f"{data_source}_{dimension['name']}", 160 "select_sql": f"{data_source}_{dimension['name']},\n", 161 "sql": f"{data_source}.{dimension['name'].replace('__', '.')} AS {data_source}_{dimension['name']},\n", 162 } 163 for data_source, dimensions in base_view_dimensions.items() 164 for dimension in dimensions 165 if dimension["name"] not in ignore_base_fields 166 and "hidden" not in dimension 167 and dimension["type"] != "time" 168 ] 169 170 client_id_field = ( 171 "NULL" 172 if data_source_definition.client_id_column == "NULL" 173 else f'{data_source_definition.client_id_column or "client_id"}' 174 ) 175 176 # filters for date ranges 177 where_sql = " AND ".join( 178 [ 179 f""" 180 {data_source.name}.{data_source.submission_date_column or "submission_date"} 181 BETWEEN 182 COALESCE( 183 SAFE_CAST( 184 {{% date_start submission_date %}} AS DATE 185 ), CURRENT_DATE()) AND 186 COALESCE( 187 SAFE_CAST( 188 {{% date_end submission_date %}} AS DATE 189 ), CURRENT_DATE()) 190 """ 191 for data_source in [data_source_definition] + joined_data_sources 192 if data_source.submission_date_column != "NULL" 193 ] 194 ) 195 196 # filte on sample_id if such a field exists 197 for field in base_view_fields: 198 if field["name"].endswith("_sample_id"): 199 where_sql += f""" 200 AND 201 {field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}} 202 """ 203 break 204 205 view_defn["derived_table"] = { 206 "sql": f""" 207 SELECT 208 {"".join(metric_definitions)} 209 {"".join([field['select_sql'] for field in base_view_fields])} 210 {client_id_field} AS client_id, 211 {{% if aggregate_metrics_by._parameter_value == 'day' %}} 212 {data_source_definition.submission_date_column or "submission_date"} AS analysis_basis 213 {{% elsif aggregate_metrics_by._parameter_value == 'week' %}} 214 (FORMAT_DATE( 215 '%F', 216 DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"}, 217 WEEK(MONDAY))) 218 ) AS analysis_basis 219 {{% elsif aggregate_metrics_by._parameter_value == 'month' %}} 220 (FORMAT_DATE( 221 '%Y-%m', 222 {data_source_definition.submission_date_column or "submission_date"}) 223 ) AS analysis_basis 224 {{% elsif aggregate_metrics_by._parameter_value == 'quarter' %}} 225 (FORMAT_DATE( 226 '%Y-%m', 227 DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"}, 228 QUARTER)) 229 ) AS analysis_basis 230 {{% elsif aggregate_metrics_by._parameter_value == 'year' %}} 231 (EXTRACT( 232 YEAR FROM {data_source_definition.submission_date_column or "submission_date"}) 233 ) AS analysis_basis 234 {{% else %}} 235 NULL as analysis_basis 236 {{% endif %}} 237 FROM 238 ( 239 SELECT 240 {data_source_name}.*, 241 {"".join([field['sql'] for field in base_view_fields])} 242 FROM 243 { 244 MetricsConfigLoader.configs.get_data_source_sql( 245 data_source_name, 246 self.namespace, 247 select_fields=False 248 ).format(dataset=self.namespace) 249 } 250 WHERE {where_sql} 251 ) 252 GROUP BY 253 {"".join([field['select_sql'] for field in base_view_fields])} 254 client_id, 255 analysis_basis 256 """ 257 } 258 259 view_defn["dimensions"] = self.get_dimensions() 260 view_defn["dimension_groups"] = self.get_dimension_groups() 261 262 # add the Looker dimensions 263 for data_source, dimensions in base_view_dimensions.items(): 264 for dimension in dimensions: 265 if ( 266 dimension["name"] not in ignore_base_fields 267 and dimension.get("type", "") != "time" 268 ): 269 dimension["sql"] = ( 270 "${TABLE}." + f"{data_source}_{dimension['name']}" 271 ) 272 dimension["group_label"] = "Base Fields" 273 if not lookml_utils._is_dimension_group(dimension): 274 view_defn["dimensions"].append(dimension) 275 else: 276 view_defn["dimension_groups"].append(dimension) 277 # avoid duplicate dimensions 278 ignore_base_fields.append(dimension["name"]) 279 280 view_defn["measures"] = self.get_measures( 281 view_defn["dimensions"], 282 ) 283 view_defn["sets"] = self._get_sets() 284 view_defn["parameters"] = self._get_parameters(view_defn["dimensions"]) 285 286 return {"views": [view_defn]} 287 288 def get_dimensions( 289 self, 290 _table=None, 291 _v1_name: Optional[str] = None, 292 _dryrun=None, 293 ) -> List[Dict[str, Any]]: 294 """Get the set of dimensions for this view based on the metric definitions in metric-hub.""" 295 namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions( 296 self.namespace 297 ) 298 metric_definitions = namespace_definitions.metrics.definitions 299 data_source_name = re.sub("^metric_definitions_", "", self.name) 300 301 return [ 302 { 303 "name": "client_id", 304 "type": "string", 305 "sql": "SAFE_CAST(${TABLE}.client_id AS STRING)", 306 "label": "Client ID", 307 "primary_key": "yes", 308 "group_label": "Base Fields", 309 "description": "Unique client identifier", 310 }, 311 ] + [ # add a dimension for each metric definition 312 { 313 "name": metric_slug, 314 "group_label": "Metrics", 315 "label": metric.friendly_name 316 or lookml_utils.slug_to_title(metric_slug), 317 "description": metric.description or "", 318 "type": "number", 319 "sql": "${TABLE}." + metric_slug, 320 } 321 for metric_slug, metric in metric_definitions.items() 322 if metric.select_expression 323 and metric.data_source.name == data_source_name 324 and metric.type != "histogram" 325 ] 326 327 def get_dimension_groups(self) -> List[Dict[str, Any]]: 328 """Get dimension groups for this view.""" 329 return [ 330 { 331 "name": "submission", 332 "type": "time", 333 "group_label": "Base Fields", 334 "sql": "CAST(${TABLE}.analysis_basis AS TIMESTAMP)", 335 "label": "Submission", 336 "timeframes": [ 337 "raw", 338 "date", 339 "week", 340 "month", 341 "quarter", 342 "year", 343 ], 344 } 345 ] 346 347 def _get_sets(self) -> List[Dict[str, Any]]: 348 """Generate metric sets.""" 349 # group all the metric dimensions into a set 350 dimensions = self.get_dimensions() 351 measures = self.get_measures(dimensions) 352 353 return [ 354 { 355 "name": "metrics", 356 "fields": [ 357 dimension["name"] 358 for dimension in dimensions 359 if dimension["name"] != "client_id" 360 ] 361 + [measure["name"] for measure in measures], 362 } 363 ] 364 365 def _get_parameters(self, dimensions: List[dict]): 366 hide_sampling = "yes" 367 368 for dim in dimensions: 369 if dim["name"] == "sample_id": 370 hide_sampling = "no" 371 break 372 373 return [ 374 { 375 "name": "aggregate_metrics_by", 376 "label": "Aggregate Client Metrics Per", 377 "type": "unquoted", 378 "default_value": "day", 379 "allowed_values": [ 380 {"label": "Per Day", "value": "day"}, 381 {"label": "Per Week", "value": "week"}, 382 {"label": "Per Month", "value": "month"}, 383 {"label": "Per Quarter", "value": "quarter"}, 384 {"label": "Per Year", "value": "year"}, 385 {"label": "Overall", "value": "overall"}, 386 ], 387 }, 388 { 389 "name": "sampling", 390 "label": "Sample of source data in %", 391 "type": "unquoted", 392 "default_value": "100", 393 "hidden": hide_sampling, 394 }, 395 ] 396 397 def get_measures( 398 self, dimensions: List[dict] 399 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 400 """Get statistics as measures.""" 401 measures = [] 402 sampling = "1" 403 404 for dim in dimensions: 405 if dim["name"] == "sample_id": 406 sampling = "100 / {% parameter sampling %}" 407 break 408 409 for dimension in dimensions: 410 metric = MetricsConfigLoader.configs.get_metric_definition( 411 dimension["name"], self.namespace 412 ) 413 if metric and metric.statistics: 414 for statistic_slug, statistic_conf in metric.statistics.items(): 415 dimension_label = dimension.get("label") or dimension.get("name") 416 if statistic_slug in [ 417 "average", 418 "max", 419 "min", 420 "median", 421 ]: 422 measures.append( 423 { 424 "name": f"{dimension['name']}_{statistic_slug}", 425 "type": statistic_slug, 426 "sql": "${TABLE}." + dimension["name"], 427 "label": f"{dimension_label} {statistic_slug.title()}", 428 "group_label": "Statistics", 429 "description": f"{statistic_slug.title()} of {dimension_label}", 430 } 431 ) 432 elif statistic_slug == "sum": 433 measures.append( 434 { 435 "name": f"{dimension['name']}_{statistic_slug}", 436 "type": "sum", 437 "sql": "${TABLE}." + dimension["name"] + "*" + sampling, 438 "label": f"{dimension_label} Sum", 439 "group_label": "Statistics", 440 "description": f"Sum of {dimension_label}", 441 } 442 ) 443 elif statistic_slug == "client_count": 444 measures.append( 445 { 446 "name": ( 447 f"{dimension['name']}_{statistic_slug}_sampled" 448 if sampling 449 else f"{dimension['name']}_{statistic_slug}" 450 ), 451 "type": "count_distinct", 452 "label": f"{dimension_label} Client Count", 453 "group_label": "Statistics", 454 "sql": "IF(${TABLE}." 455 + f"{dimension['name']} > 0, " 456 + "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))", 457 "description": f"Number of clients with {dimension_label}", 458 "hidden": "yes" if sampling else "no", 459 } 460 ) 461 462 if sampling: 463 measures.append( 464 { 465 "name": f"{dimension['name']}_{statistic_slug}", 466 "type": "number", 467 "label": f"{dimension_label} Client Count", 468 "group_label": "Statistics", 469 "sql": "${" 470 + f"{dimension['name']}_{statistic_slug}_sampled" 471 + "} *" 472 + sampling, 473 "description": f"Number of clients with {dimension_label}", 474 } 475 ) 476 elif statistic_slug == "dau_proportion": 477 if "numerator" in statistic_conf: 478 [numerator, numerator_stat] = statistic_conf[ 479 "numerator" 480 ].split(".") 481 measures.append( 482 { 483 "name": "DAU_sampled" if sampling else "DAU", 484 "type": "count_distinct", 485 "label": "DAU", 486 "group_label": "Statistics", 487 "sql": "${TABLE}.client_id", 488 "hidden": "yes", 489 } 490 ) 491 492 if sampling: 493 measures.append( 494 { 495 "name": "DAU", 496 "type": "number", 497 "label": "DAU", 498 "group_label": "Statistics", 499 "sql": "${DAU_sampled} *" + sampling, 500 "hidden": "yes", 501 } 502 ) 503 504 measures.append( 505 { 506 "name": f"{dimension['name']}_{statistic_slug}", 507 "type": "number", 508 "label": f"{dimension_label} DAU Proportion", 509 "sql": "SAFE_DIVIDE(${" 510 + f"{numerator}_{numerator_stat}" 511 + "}, ${DAU})", 512 "group_label": "Statistics", 513 "description": f"Proportion of daily active users with {dimension['name']}", 514 } 515 ) 516 elif statistic_slug == "ratio": 517 if ( 518 "numerator" in statistic_conf 519 and "denominator" in statistic_conf 520 ): 521 [numerator, numerator_stat] = statistic_conf[ 522 "numerator" 523 ].split(".") 524 [denominator, denominator_stat] = statistic_conf[ 525 "denominator" 526 ].split(".") 527 528 measures.append( 529 { 530 "name": f"{dimension['name']}_{statistic_slug}", 531 "type": "number", 532 "label": f"{dimension_label} Ratio", 533 "sql": "SAFE_DIVIDE(${" 534 + f"{numerator}_{numerator_stat}" 535 + "}, ${" 536 + f"{denominator}_{denominator_stat}" 537 + "})", 538 "group_label": "Statistics", 539 "description": f"""" 540 Ratio between {statistic_conf['numerator']} and 541 {statistic_conf['denominator']}""", 542 } 543 ) 544 elif statistic_slug == "rolling_average": 545 aggregation = statistic_conf.get("aggregation", "sum") 546 if "window_sizes" in statistic_conf: 547 for window_size in statistic_conf["window_sizes"]: 548 measures.append( 549 { 550 "name": f"{dimension['name']}_{window_size}_day_{statistic_slug}", 551 "type": "number", 552 "label": f"{dimension_label} {window_size} Day Rolling Average", 553 "sql": f""" 554 AVG({aggregation}(${{TABLE}}.{dimension["name"]} * {sampling})) OVER ( 555 ROWS {window_size} PRECEDING 556 )""", 557 "group_label": "Statistics", 558 "description": f"{window_size} day rolling average of {dimension_label}", 559 } 560 ) 561 562 return measures
A view for metric-hub metrics that come from the same data source.
MetricDefinitionsView(namespace: str, name: str, tables: List[Dict[str, str]])
20 def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]): 21 """Get an instance of an MetricDefinitionsView.""" 22 super().__init__(namespace, name, MetricDefinitionsView.type, tables)
Get an instance of an MetricDefinitionsView.
@classmethod
def
from_db_views( klass, namespace: str, is_glean: bool, channels: List[Dict[str, str]], db_views: dict) -> Iterator[MetricDefinitionsView]:
24 @classmethod 25 def from_db_views( 26 klass, 27 namespace: str, 28 is_glean: bool, 29 channels: List[Dict[str, str]], 30 db_views: dict, 31 ) -> Iterator[MetricDefinitionsView]: 32 """Get Metric Definition Views from db views and app variants.""" 33 return iter(())
Get Metric Definition Views from db views and app variants.
@classmethod
def
from_dict( klass, namespace: str, name: str, definition: generator.views.view.ViewDict) -> MetricDefinitionsView:
35 @classmethod 36 def from_dict( 37 klass, namespace: str, name: str, definition: ViewDict 38 ) -> MetricDefinitionsView: 39 """Get a MetricDefinitionsView from a dict representation.""" 40 return klass(namespace, name, definition.get("tables", []))
Get a MetricDefinitionsView from a dict representation.
def
to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
42 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 43 """Get this view as LookML.""" 44 namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions( 45 self.namespace 46 ) 47 if namespace_definitions is None: 48 return {} 49 50 # get all metric definitions that depend on the data source represented by this view 51 data_source_name = re.sub("^metric_definitions_", "", self.name) 52 data_source_definition = MetricsConfigLoader.configs.get_data_source_definition( 53 data_source_name, self.namespace 54 ) 55 56 if data_source_definition is None: 57 return {} 58 59 # todo: hide deprecated metrics? 60 metric_definitions = [ 61 f"""{ 62 MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render() 63 } AS {metric_slug},\n""" 64 for metric_slug, metric in namespace_definitions.metrics.definitions.items() 65 if metric.select_expression 66 and metric.data_source.name == data_source_name 67 and metric.type != "histogram" 68 ] 69 70 if metric_definitions == []: 71 return {} 72 73 # Metric definitions are intended to aggregated by client per date. 74 # A derived table is needed to do these aggregations, instead of defining them as measures 75 # we want to have them available as dimensions (which don't allow aggregations in their definitions) 76 # to allow for custom measures to be later defined in Looker that aggregate these per client metrics. 77 view_defn: Dict[str, Any] = {"name": self.name} 78 79 ignore_base_fields = [ 80 "client_id", 81 "submission_date", 82 "submission", 83 "first_run", 84 ] + [ 85 metric_slug 86 for metric_slug, metric in namespace_definitions.metrics.definitions.items() 87 if metric.select_expression 88 and metric.data_source.name == data_source_name 89 and metric.type != "histogram" 90 ] 91 92 base_view_dimensions = {} 93 joined_data_sources = [] 94 95 # check if the metric data source has joins 96 # joined data sources are generally used for creating the "Base Fields" 97 if data_source_definition.joins: 98 # determine the dimensions selected by the joined data sources 99 for joined_data_source_slug, join in data_source_definition.joins.items(): 100 joined_data_source = ( 101 MetricsConfigLoader.configs.get_data_source_definition( 102 joined_data_source_slug, self.namespace 103 ) 104 ) 105 106 if joined_data_source.columns_as_dimensions: 107 joined_data_sources.append(joined_data_source) 108 109 date_filter = None 110 if joined_data_source.submission_date_column != "NULL": 111 date_filter = ( 112 None 113 if joined_data_source.submission_date_column is None 114 or joined_data_source.submission_date_column == "NULL" 115 else f"{joined_data_source.submission_date_column} = '2023-01-01'" 116 ) 117 118 # create Looker dimensions by doing a dryrun 119 query = MetricsConfigLoader.configs.get_data_source_sql( 120 joined_data_source_slug, 121 self.namespace, 122 where=date_filter, 123 ).format(dataset=self.namespace) 124 125 base_view_dimensions[joined_data_source_slug] = ( 126 lookml_utils._generate_dimensions_from_query( 127 query, dryrun=dryrun 128 ) 129 ) 130 131 if ( 132 data_source_definition.client_id_column == "NULL" 133 or data_source_definition.columns_as_dimensions 134 ): 135 # if the metrics data source doesn't have any joins then use the dimensions 136 # of the data source itself as base fields 137 date_filter = None 138 if data_source_definition.submission_date_column != "NULL": 139 date_filter = ( 140 "submission_date = '2023-01-01'" 141 if data_source_definition.submission_date_column is None 142 else f"{data_source_definition.submission_date_column} = '2023-01-01'" 143 ) 144 145 query = MetricsConfigLoader.configs.get_data_source_sql( 146 data_source_definition.name, 147 self.namespace, 148 where=date_filter, 149 ignore_joins=True, 150 ).format(dataset=self.namespace) 151 152 base_view_dimensions[data_source_definition.name] = ( 153 lookml_utils._generate_dimensions_from_query(query, dryrun) 154 ) 155 156 # prepare base field data for query 157 base_view_fields = [ 158 { 159 "name": f"{data_source}_{dimension['name']}", 160 "select_sql": f"{data_source}_{dimension['name']},\n", 161 "sql": f"{data_source}.{dimension['name'].replace('__', '.')} AS {data_source}_{dimension['name']},\n", 162 } 163 for data_source, dimensions in base_view_dimensions.items() 164 for dimension in dimensions 165 if dimension["name"] not in ignore_base_fields 166 and "hidden" not in dimension 167 and dimension["type"] != "time" 168 ] 169 170 client_id_field = ( 171 "NULL" 172 if data_source_definition.client_id_column == "NULL" 173 else f'{data_source_definition.client_id_column or "client_id"}' 174 ) 175 176 # filters for date ranges 177 where_sql = " AND ".join( 178 [ 179 f""" 180 {data_source.name}.{data_source.submission_date_column or "submission_date"} 181 BETWEEN 182 COALESCE( 183 SAFE_CAST( 184 {{% date_start submission_date %}} AS DATE 185 ), CURRENT_DATE()) AND 186 COALESCE( 187 SAFE_CAST( 188 {{% date_end submission_date %}} AS DATE 189 ), CURRENT_DATE()) 190 """ 191 for data_source in [data_source_definition] + joined_data_sources 192 if data_source.submission_date_column != "NULL" 193 ] 194 ) 195 196 # filte on sample_id if such a field exists 197 for field in base_view_fields: 198 if field["name"].endswith("_sample_id"): 199 where_sql += f""" 200 AND 201 {field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}} 202 """ 203 break 204 205 view_defn["derived_table"] = { 206 "sql": f""" 207 SELECT 208 {"".join(metric_definitions)} 209 {"".join([field['select_sql'] for field in base_view_fields])} 210 {client_id_field} AS client_id, 211 {{% if aggregate_metrics_by._parameter_value == 'day' %}} 212 {data_source_definition.submission_date_column or "submission_date"} AS analysis_basis 213 {{% elsif aggregate_metrics_by._parameter_value == 'week' %}} 214 (FORMAT_DATE( 215 '%F', 216 DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"}, 217 WEEK(MONDAY))) 218 ) AS analysis_basis 219 {{% elsif aggregate_metrics_by._parameter_value == 'month' %}} 220 (FORMAT_DATE( 221 '%Y-%m', 222 {data_source_definition.submission_date_column or "submission_date"}) 223 ) AS analysis_basis 224 {{% elsif aggregate_metrics_by._parameter_value == 'quarter' %}} 225 (FORMAT_DATE( 226 '%Y-%m', 227 DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"}, 228 QUARTER)) 229 ) AS analysis_basis 230 {{% elsif aggregate_metrics_by._parameter_value == 'year' %}} 231 (EXTRACT( 232 YEAR FROM {data_source_definition.submission_date_column or "submission_date"}) 233 ) AS analysis_basis 234 {{% else %}} 235 NULL as analysis_basis 236 {{% endif %}} 237 FROM 238 ( 239 SELECT 240 {data_source_name}.*, 241 {"".join([field['sql'] for field in base_view_fields])} 242 FROM 243 { 244 MetricsConfigLoader.configs.get_data_source_sql( 245 data_source_name, 246 self.namespace, 247 select_fields=False 248 ).format(dataset=self.namespace) 249 } 250 WHERE {where_sql} 251 ) 252 GROUP BY 253 {"".join([field['select_sql'] for field in base_view_fields])} 254 client_id, 255 analysis_basis 256 """ 257 } 258 259 view_defn["dimensions"] = self.get_dimensions() 260 view_defn["dimension_groups"] = self.get_dimension_groups() 261 262 # add the Looker dimensions 263 for data_source, dimensions in base_view_dimensions.items(): 264 for dimension in dimensions: 265 if ( 266 dimension["name"] not in ignore_base_fields 267 and dimension.get("type", "") != "time" 268 ): 269 dimension["sql"] = ( 270 "${TABLE}." + f"{data_source}_{dimension['name']}" 271 ) 272 dimension["group_label"] = "Base Fields" 273 if not lookml_utils._is_dimension_group(dimension): 274 view_defn["dimensions"].append(dimension) 275 else: 276 view_defn["dimension_groups"].append(dimension) 277 # avoid duplicate dimensions 278 ignore_base_fields.append(dimension["name"]) 279 280 view_defn["measures"] = self.get_measures( 281 view_defn["dimensions"], 282 ) 283 view_defn["sets"] = self._get_sets() 284 view_defn["parameters"] = self._get_parameters(view_defn["dimensions"]) 285 286 return {"views": [view_defn]}
Get this view as LookML.
def
get_dimensions( self, _table=None, _v1_name: Optional[str] = None, _dryrun=None) -> List[Dict[str, Any]]:
288 def get_dimensions( 289 self, 290 _table=None, 291 _v1_name: Optional[str] = None, 292 _dryrun=None, 293 ) -> List[Dict[str, Any]]: 294 """Get the set of dimensions for this view based on the metric definitions in metric-hub.""" 295 namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions( 296 self.namespace 297 ) 298 metric_definitions = namespace_definitions.metrics.definitions 299 data_source_name = re.sub("^metric_definitions_", "", self.name) 300 301 return [ 302 { 303 "name": "client_id", 304 "type": "string", 305 "sql": "SAFE_CAST(${TABLE}.client_id AS STRING)", 306 "label": "Client ID", 307 "primary_key": "yes", 308 "group_label": "Base Fields", 309 "description": "Unique client identifier", 310 }, 311 ] + [ # add a dimension for each metric definition 312 { 313 "name": metric_slug, 314 "group_label": "Metrics", 315 "label": metric.friendly_name 316 or lookml_utils.slug_to_title(metric_slug), 317 "description": metric.description or "", 318 "type": "number", 319 "sql": "${TABLE}." + metric_slug, 320 } 321 for metric_slug, metric in metric_definitions.items() 322 if metric.select_expression 323 and metric.data_source.name == data_source_name 324 and metric.type != "histogram" 325 ]
Get the set of dimensions for this view based on the metric definitions in metric-hub.
def
get_dimension_groups(self) -> List[Dict[str, Any]]:
327 def get_dimension_groups(self) -> List[Dict[str, Any]]: 328 """Get dimension groups for this view.""" 329 return [ 330 { 331 "name": "submission", 332 "type": "time", 333 "group_label": "Base Fields", 334 "sql": "CAST(${TABLE}.analysis_basis AS TIMESTAMP)", 335 "label": "Submission", 336 "timeframes": [ 337 "raw", 338 "date", 339 "week", 340 "month", 341 "quarter", 342 "year", 343 ], 344 } 345 ]
Get dimension groups for this view.
def
get_measures( self, dimensions: List[dict]) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
397 def get_measures( 398 self, dimensions: List[dict] 399 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 400 """Get statistics as measures.""" 401 measures = [] 402 sampling = "1" 403 404 for dim in dimensions: 405 if dim["name"] == "sample_id": 406 sampling = "100 / {% parameter sampling %}" 407 break 408 409 for dimension in dimensions: 410 metric = MetricsConfigLoader.configs.get_metric_definition( 411 dimension["name"], self.namespace 412 ) 413 if metric and metric.statistics: 414 for statistic_slug, statistic_conf in metric.statistics.items(): 415 dimension_label = dimension.get("label") or dimension.get("name") 416 if statistic_slug in [ 417 "average", 418 "max", 419 "min", 420 "median", 421 ]: 422 measures.append( 423 { 424 "name": f"{dimension['name']}_{statistic_slug}", 425 "type": statistic_slug, 426 "sql": "${TABLE}." + dimension["name"], 427 "label": f"{dimension_label} {statistic_slug.title()}", 428 "group_label": "Statistics", 429 "description": f"{statistic_slug.title()} of {dimension_label}", 430 } 431 ) 432 elif statistic_slug == "sum": 433 measures.append( 434 { 435 "name": f"{dimension['name']}_{statistic_slug}", 436 "type": "sum", 437 "sql": "${TABLE}." + dimension["name"] + "*" + sampling, 438 "label": f"{dimension_label} Sum", 439 "group_label": "Statistics", 440 "description": f"Sum of {dimension_label}", 441 } 442 ) 443 elif statistic_slug == "client_count": 444 measures.append( 445 { 446 "name": ( 447 f"{dimension['name']}_{statistic_slug}_sampled" 448 if sampling 449 else f"{dimension['name']}_{statistic_slug}" 450 ), 451 "type": "count_distinct", 452 "label": f"{dimension_label} Client Count", 453 "group_label": "Statistics", 454 "sql": "IF(${TABLE}." 455 + f"{dimension['name']} > 0, " 456 + "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))", 457 "description": f"Number of clients with {dimension_label}", 458 "hidden": "yes" if sampling else "no", 459 } 460 ) 461 462 if sampling: 463 measures.append( 464 { 465 "name": f"{dimension['name']}_{statistic_slug}", 466 "type": "number", 467 "label": f"{dimension_label} Client Count", 468 "group_label": "Statistics", 469 "sql": "${" 470 + f"{dimension['name']}_{statistic_slug}_sampled" 471 + "} *" 472 + sampling, 473 "description": f"Number of clients with {dimension_label}", 474 } 475 ) 476 elif statistic_slug == "dau_proportion": 477 if "numerator" in statistic_conf: 478 [numerator, numerator_stat] = statistic_conf[ 479 "numerator" 480 ].split(".") 481 measures.append( 482 { 483 "name": "DAU_sampled" if sampling else "DAU", 484 "type": "count_distinct", 485 "label": "DAU", 486 "group_label": "Statistics", 487 "sql": "${TABLE}.client_id", 488 "hidden": "yes", 489 } 490 ) 491 492 if sampling: 493 measures.append( 494 { 495 "name": "DAU", 496 "type": "number", 497 "label": "DAU", 498 "group_label": "Statistics", 499 "sql": "${DAU_sampled} *" + sampling, 500 "hidden": "yes", 501 } 502 ) 503 504 measures.append( 505 { 506 "name": f"{dimension['name']}_{statistic_slug}", 507 "type": "number", 508 "label": f"{dimension_label} DAU Proportion", 509 "sql": "SAFE_DIVIDE(${" 510 + f"{numerator}_{numerator_stat}" 511 + "}, ${DAU})", 512 "group_label": "Statistics", 513 "description": f"Proportion of daily active users with {dimension['name']}", 514 } 515 ) 516 elif statistic_slug == "ratio": 517 if ( 518 "numerator" in statistic_conf 519 and "denominator" in statistic_conf 520 ): 521 [numerator, numerator_stat] = statistic_conf[ 522 "numerator" 523 ].split(".") 524 [denominator, denominator_stat] = statistic_conf[ 525 "denominator" 526 ].split(".") 527 528 measures.append( 529 { 530 "name": f"{dimension['name']}_{statistic_slug}", 531 "type": "number", 532 "label": f"{dimension_label} Ratio", 533 "sql": "SAFE_DIVIDE(${" 534 + f"{numerator}_{numerator_stat}" 535 + "}, ${" 536 + f"{denominator}_{denominator_stat}" 537 + "})", 538 "group_label": "Statistics", 539 "description": f"""" 540 Ratio between {statistic_conf['numerator']} and 541 {statistic_conf['denominator']}""", 542 } 543 ) 544 elif statistic_slug == "rolling_average": 545 aggregation = statistic_conf.get("aggregation", "sum") 546 if "window_sizes" in statistic_conf: 547 for window_size in statistic_conf["window_sizes"]: 548 measures.append( 549 { 550 "name": f"{dimension['name']}_{window_size}_day_{statistic_slug}", 551 "type": "number", 552 "label": f"{dimension_label} {window_size} Day Rolling Average", 553 "sql": f""" 554 AVG({aggregation}(${{TABLE}}.{dimension["name"]} * {sampling})) OVER ( 555 ROWS {window_size} PRECEDING 556 )""", 557 "group_label": "Statistics", 558 "description": f"{window_size} day rolling average of {dimension_label}", 559 } 560 ) 561 562 return measures
Get statistics as measures.