generator.views.metric_definitions_view
Class to describe a view with metrics from metric-hub.
1"""Class to describe a view with metrics from metric-hub.""" 2 3from __future__ import annotations 4 5import re 6from typing import Any, Dict, Iterator, List, Optional, Union 7 8from generator.metrics_utils import MetricsConfigLoader 9 10from . import lookml_utils 11from .view import View, ViewDict 12 13 14class MetricDefinitionsView(View): 15 """A view for metric-hub metrics that come from the same data source.""" 16 17 type: str = "metric_definitions_view" 18 19 def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]): 20 """Get an instance of an MetricDefinitionsView.""" 21 super().__init__(namespace, name, MetricDefinitionsView.type, tables) 22 23 @classmethod 24 def from_db_views( 25 klass, 26 namespace: str, 27 is_glean: bool, 28 channels: List[Dict[str, str]], 29 db_views: dict, 30 ) -> Iterator[MetricDefinitionsView]: 31 """Get Metric Definition Views from db views and app variants.""" 32 return iter(()) 33 34 @classmethod 35 def from_dict( 36 klass, namespace: str, name: str, definition: ViewDict 37 ) -> MetricDefinitionsView: 38 """Get a MetricDefinitionsView from a dict representation.""" 39 return klass(namespace, name, definition.get("tables", [])) 40 41 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 42 """Get this view as LookML.""" 43 namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions( 44 self.namespace 45 ) 46 if namespace_definitions is None: 47 return {} 48 49 # get all metric definitions that depend on the data source represented by this view 50 data_source_name = re.sub("^metric_definitions_", "", self.name) 51 data_source_definition = MetricsConfigLoader.configs.get_data_source_definition( 52 data_source_name, self.namespace 53 ) 54 55 if data_source_definition is None: 56 return {} 57 58 # todo: hide deprecated metrics? 59 metric_definitions = [ 60 f"""{ 61 MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render() 62 } AS {metric_slug},\n""" 63 for metric_slug, metric in namespace_definitions.metrics.definitions.items() 64 if metric.select_expression 65 and metric.data_source.name == data_source_name 66 and metric.type != "histogram" 67 ] 68 69 if metric_definitions == []: 70 return {} 71 72 # Metric definitions are intended to aggregated by client per date. 73 # A derived table is needed to do these aggregations, instead of defining them as measures 74 # we want to have them available as dimensions (which don't allow aggregations in their definitions) 75 # to allow for custom measures to be later defined in Looker that aggregate these per client metrics. 76 view_defn: Dict[str, Any] = {"name": self.name} 77 78 ignore_base_fields = [ 79 "client_id", 80 "submission_date", 81 "submission", 82 "first_run", 83 ] + [ 84 metric_slug 85 for metric_slug, metric in namespace_definitions.metrics.definitions.items() 86 if metric.select_expression 87 and metric.data_source.name == data_source_name 88 and metric.type != "histogram" 89 ] 90 91 base_view_dimensions = {} 92 joined_data_sources = [] 93 94 # check if the metric data source has joins 95 # joined data sources are generally used for creating the "Base Fields" 96 if data_source_definition.joins: 97 # determine the dimensions selected by the joined data sources 98 for joined_data_source_slug, join in data_source_definition.joins.items(): 99 joined_data_source = ( 100 MetricsConfigLoader.configs.get_data_source_definition( 101 joined_data_source_slug, self.namespace 102 ) 103 ) 104 105 if joined_data_source.columns_as_dimensions: 106 joined_data_sources.append(joined_data_source) 107 108 date_filter = None 109 if joined_data_source.submission_date_column != "NULL": 110 date_filter = ( 111 None 112 if joined_data_source.submission_date_column is None 113 or joined_data_source.submission_date_column == "NULL" 114 else f"{joined_data_source.submission_date_column} = '2023-01-01'" 115 ) 116 117 # create Looker dimensions by doing a dryrun 118 query = MetricsConfigLoader.configs.get_data_source_sql( 119 joined_data_source_slug, 120 self.namespace, 121 where=date_filter, 122 ).format(dataset=self.namespace) 123 124 base_view_dimensions[joined_data_source_slug] = ( 125 lookml_utils._generate_dimensions_from_query( 126 query, dryrun=dryrun 127 ) 128 ) 129 130 if ( 131 data_source_definition.client_id_column == "NULL" 132 and not base_view_dimensions 133 ) or data_source_definition.columns_as_dimensions: 134 # if the metrics data source doesn't have any joins then use the dimensions 135 # of the data source itself as base fields 136 date_filter = None 137 if data_source_definition.submission_date_column != "NULL": 138 date_filter = ( 139 "submission_date = '2023-01-01'" 140 if data_source_definition.submission_date_column is None 141 else f"{data_source_definition.submission_date_column} = '2023-01-01'" 142 ) 143 144 query = MetricsConfigLoader.configs.get_data_source_sql( 145 data_source_definition.name, 146 self.namespace, 147 where=date_filter, 148 ignore_joins=True, 149 ).format(dataset=self.namespace) 150 151 base_view_dimensions[data_source_definition.name] = ( 152 lookml_utils._generate_dimensions_from_query(query, dryrun) 153 ) 154 155 # to prevent duplicate dimensions, especially when working with time dimensions 156 # where names are modified potentially causing naming collisions 157 seen_dimensions = set() 158 # prepare base field data for query 159 base_view_fields = [] 160 for data_source, dimensions in base_view_dimensions.items(): 161 for dimension in dimensions: 162 if ( 163 dimension["name"] not in ignore_base_fields 164 and dimension["name"] not in seen_dimensions 165 and "hidden" not in dimension 166 ): 167 sql = ( 168 f"{data_source}.{dimension['name'].replace('__', '.')} AS" 169 + f" {data_source}_{dimension['name']},\n" 170 ) 171 # date/time/timestamp suffixes are removed when generating lookml dimensions, however we 172 # need the original field name for the derived view SQL 173 if dimension["type"] == "time" and not dimension["sql"].endswith( 174 dimension["name"] 175 ): 176 suffix = dimension["sql"].split( 177 dimension["name"].replace("__", ".") 178 )[-1] 179 sql = ( 180 f"{data_source}.{(dimension['name']+suffix).replace('__', '.')} AS" 181 + f" {data_source}_{dimension['name']},\n" 182 ) 183 184 base_view_fields.append( 185 { 186 "name": f"{data_source}_{dimension['name']}", 187 "select_sql": f"{data_source}_{dimension['name']},\n", 188 "sql": sql, 189 } 190 ) 191 seen_dimensions.add(dimension["name"]) 192 193 client_id_field = ( 194 "NULL" 195 if data_source_definition.client_id_column == "NULL" 196 else f'{data_source_definition.client_id_column or "client_id"}' 197 ) 198 199 # filters for date ranges 200 where_sql = " AND ".join( 201 [ 202 f""" 203 {data_source.name}.{data_source.submission_date_column or "submission_date"} 204 BETWEEN 205 COALESCE( 206 SAFE_CAST( 207 {{% date_start submission_date %}} AS DATE 208 ), CURRENT_DATE()) AND 209 COALESCE( 210 SAFE_CAST( 211 {{% date_end submission_date %}} AS DATE 212 ), CURRENT_DATE()) 213 """ 214 for data_source in [data_source_definition] + joined_data_sources 215 if data_source.submission_date_column != "NULL" 216 ] 217 ) 218 219 # filte on sample_id if such a field exists 220 for field in base_view_fields: 221 if field["name"].endswith("_sample_id"): 222 where_sql += f""" 223 AND 224 {field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}} 225 """ 226 break 227 228 view_defn["derived_table"] = { 229 "sql": f""" 230 SELECT 231 {"".join(metric_definitions)} 232 {"".join([field['select_sql'] for field in base_view_fields])} 233 {client_id_field} AS client_id, 234 {{% if aggregate_metrics_by._parameter_value == 'day' %}} 235 {data_source_definition.submission_date_column or "submission_date"} AS analysis_basis 236 {{% elsif aggregate_metrics_by._parameter_value == 'week' %}} 237 (FORMAT_DATE( 238 '%F', 239 DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"}, 240 WEEK(MONDAY))) 241 ) AS analysis_basis 242 {{% elsif aggregate_metrics_by._parameter_value == 'month' %}} 243 (FORMAT_DATE( 244 '%Y-%m', 245 {data_source_definition.submission_date_column or "submission_date"}) 246 ) AS analysis_basis 247 {{% elsif aggregate_metrics_by._parameter_value == 'quarter' %}} 248 (FORMAT_DATE( 249 '%Y-%m', 250 DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"}, 251 QUARTER)) 252 ) AS analysis_basis 253 {{% elsif aggregate_metrics_by._parameter_value == 'year' %}} 254 (EXTRACT( 255 YEAR FROM {data_source_definition.submission_date_column or "submission_date"}) 256 ) AS analysis_basis 257 {{% else %}} 258 NULL as analysis_basis 259 {{% endif %}} 260 FROM 261 ( 262 SELECT 263 {data_source_name}.*, 264 {"".join([field['sql'] for field in base_view_fields])} 265 FROM 266 { 267 MetricsConfigLoader.configs.get_data_source_sql( 268 data_source_name, 269 self.namespace, 270 select_fields=False 271 ).format(dataset=self.namespace) 272 } 273 WHERE {where_sql} 274 ) 275 GROUP BY 276 {"".join([field['select_sql'] for field in base_view_fields])} 277 client_id, 278 analysis_basis 279 """ 280 } 281 282 view_defn["dimensions"] = self.get_dimensions() 283 view_defn["dimension_groups"] = self.get_dimension_groups() 284 285 # add the Looker dimensions 286 for data_source, dimensions in base_view_dimensions.items(): 287 for dimension in dimensions: 288 if dimension["name"] not in ignore_base_fields: 289 dimension["sql"] = ( 290 "${TABLE}." + f"{data_source}_{dimension['name']}" 291 ) 292 dimension["group_label"] = "Base Fields" 293 if not lookml_utils._is_dimension_group(dimension): 294 view_defn["dimensions"].append(dimension) 295 else: 296 view_defn["dimension_groups"].append(dimension) 297 # avoid duplicate dimensions 298 ignore_base_fields.append(dimension["name"]) 299 300 view_defn["measures"] = self.get_measures( 301 view_defn["dimensions"], 302 ) 303 view_defn["sets"] = self._get_sets() 304 view_defn["parameters"] = self._get_parameters(view_defn["dimensions"]) 305 306 return {"views": [view_defn]} 307 308 def get_dimensions( 309 self, 310 _table=None, 311 _v1_name: Optional[str] = None, 312 _dryrun=None, 313 ) -> List[Dict[str, Any]]: 314 """Get the set of dimensions for this view based on the metric definitions in metric-hub.""" 315 namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions( 316 self.namespace 317 ) 318 metric_definitions = namespace_definitions.metrics.definitions 319 data_source_name = re.sub("^metric_definitions_", "", self.name) 320 321 return [ 322 { 323 "name": "client_id", 324 "type": "string", 325 "sql": "SAFE_CAST(${TABLE}.client_id AS STRING)", 326 "label": "Client ID", 327 "primary_key": "yes", 328 "group_label": "Base Fields", 329 "description": "Unique client identifier", 330 }, 331 ] + [ # add a dimension for each metric definition 332 { 333 "name": metric_slug, 334 "group_label": "Metrics", 335 "label": metric.friendly_name 336 or lookml_utils.slug_to_title(metric_slug), 337 "description": metric.description or "", 338 "type": "number", 339 "sql": "${TABLE}." + metric_slug, 340 } 341 for metric_slug, metric in metric_definitions.items() 342 if metric.select_expression 343 and metric.data_source.name == data_source_name 344 and metric.type != "histogram" 345 ] 346 347 def get_dimension_groups(self) -> List[Dict[str, Any]]: 348 """Get dimension groups for this view.""" 349 return [ 350 { 351 "name": "submission", 352 "type": "time", 353 "group_label": "Base Fields", 354 "sql": "CAST(${TABLE}.analysis_basis AS TIMESTAMP)", 355 "label": "Submission", 356 "timeframes": [ 357 "raw", 358 "date", 359 "week", 360 "month", 361 "quarter", 362 "year", 363 ], 364 } 365 ] 366 367 def _get_sets(self) -> List[Dict[str, Any]]: 368 """Generate metric sets.""" 369 # group all the metric dimensions into a set 370 dimensions = self.get_dimensions() 371 measures = self.get_measures(dimensions) 372 373 return [ 374 { 375 "name": "metrics", 376 "fields": [ 377 dimension["name"] 378 for dimension in dimensions 379 if dimension["name"] != "client_id" 380 ] 381 + [measure["name"] for measure in measures], 382 } 383 ] 384 385 def _get_parameters(self, dimensions: List[dict]): 386 hide_sampling = "yes" 387 388 for dim in dimensions: 389 if dim["name"] == "sample_id": 390 hide_sampling = "no" 391 break 392 393 return [ 394 { 395 "name": "aggregate_metrics_by", 396 "label": "Aggregate Client Metrics Per", 397 "type": "unquoted", 398 "default_value": "day", 399 "allowed_values": [ 400 {"label": "Per Day", "value": "day"}, 401 {"label": "Per Week", "value": "week"}, 402 {"label": "Per Month", "value": "month"}, 403 {"label": "Per Quarter", "value": "quarter"}, 404 {"label": "Per Year", "value": "year"}, 405 {"label": "Overall", "value": "overall"}, 406 ], 407 }, 408 { 409 "name": "sampling", 410 "label": "Sample of source data in %", 411 "type": "unquoted", 412 "default_value": "100", 413 "hidden": hide_sampling, 414 }, 415 ] 416 417 def get_measures( 418 self, dimensions: List[dict] 419 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 420 """Get statistics as measures.""" 421 measures = [] 422 sampling = "1" 423 424 for dim in dimensions: 425 if dim["name"] == "sample_id": 426 sampling = "100 / {% parameter sampling %}" 427 break 428 429 for dimension in dimensions: 430 metric = MetricsConfigLoader.configs.get_metric_definition( 431 dimension["name"], self.namespace 432 ) 433 if metric and metric.statistics: 434 for statistic_slug, statistic_conf in metric.statistics.items(): 435 dimension_label = dimension.get("label") or dimension.get("name") 436 if statistic_slug in [ 437 "average", 438 "max", 439 "min", 440 "median", 441 ]: 442 measures.append( 443 { 444 "name": f"{dimension['name']}_{statistic_slug}", 445 "type": statistic_slug, 446 "sql": "${TABLE}." + dimension["name"], 447 "label": f"{dimension_label} {statistic_slug.title()}", 448 "group_label": "Statistics", 449 "description": f"{statistic_slug.title()} of {dimension_label}", 450 } 451 ) 452 elif statistic_slug == "sum": 453 measures.append( 454 { 455 "name": f"{dimension['name']}_{statistic_slug}", 456 "type": "sum", 457 "sql": "${TABLE}." + dimension["name"] + "*" + sampling, 458 "label": f"{dimension_label} Sum", 459 "group_label": "Statistics", 460 "description": f"Sum of {dimension_label}", 461 } 462 ) 463 elif statistic_slug == "client_count": 464 measures.append( 465 { 466 "name": ( 467 f"{dimension['name']}_{statistic_slug}_sampled" 468 if sampling 469 else f"{dimension['name']}_{statistic_slug}" 470 ), 471 "type": "count_distinct", 472 "label": f"{dimension_label} Client Count", 473 "group_label": "Statistics", 474 "sql": "IF(${TABLE}." 475 + f"{dimension['name']} > 0, " 476 + "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))", 477 "description": f"Number of clients with {dimension_label}", 478 "hidden": "yes" if sampling else "no", 479 } 480 ) 481 482 if sampling: 483 measures.append( 484 { 485 "name": f"{dimension['name']}_{statistic_slug}", 486 "type": "number", 487 "label": f"{dimension_label} Client Count", 488 "group_label": "Statistics", 489 "sql": "${" 490 + f"{dimension['name']}_{statistic_slug}_sampled" 491 + "} *" 492 + sampling, 493 "description": f"Number of clients with {dimension_label}", 494 } 495 ) 496 elif statistic_slug == "dau_proportion": 497 if "numerator" in statistic_conf: 498 [numerator, numerator_stat] = statistic_conf[ 499 "numerator" 500 ].split(".") 501 measures.append( 502 { 503 "name": "DAU_sampled" if sampling else "DAU", 504 "type": "count_distinct", 505 "label": "DAU", 506 "group_label": "Statistics", 507 "sql": "${TABLE}.client_id", 508 "hidden": "yes", 509 } 510 ) 511 512 if sampling: 513 measures.append( 514 { 515 "name": "DAU", 516 "type": "number", 517 "label": "DAU", 518 "group_label": "Statistics", 519 "sql": "${DAU_sampled} *" + sampling, 520 "hidden": "yes", 521 } 522 ) 523 524 measures.append( 525 { 526 "name": f"{dimension['name']}_{statistic_slug}", 527 "type": "number", 528 "label": f"{dimension_label} DAU Proportion", 529 "sql": "SAFE_DIVIDE(${" 530 + f"{numerator}_{numerator_stat}" 531 + "}, ${DAU})", 532 "group_label": "Statistics", 533 "description": f"Proportion of daily active users with {dimension['name']}", 534 } 535 ) 536 elif statistic_slug == "ratio": 537 if ( 538 "numerator" in statistic_conf 539 and "denominator" in statistic_conf 540 ): 541 [numerator, numerator_stat] = statistic_conf[ 542 "numerator" 543 ].split(".") 544 [denominator, denominator_stat] = statistic_conf[ 545 "denominator" 546 ].split(".") 547 548 measures.append( 549 { 550 "name": f"{dimension['name']}_{statistic_slug}", 551 "type": "number", 552 "label": f"{dimension_label} Ratio", 553 "sql": "SAFE_DIVIDE(${" 554 + f"{numerator}_{numerator_stat}" 555 + "}, ${" 556 + f"{denominator}_{denominator_stat}" 557 + "})", 558 "group_label": "Statistics", 559 "description": f"""" 560 Ratio between {statistic_conf['numerator']} and 561 {statistic_conf['denominator']}""", 562 } 563 ) 564 elif statistic_slug == "rolling_average": 565 aggregation = statistic_conf.get("aggregation", "sum") 566 if "window_sizes" in statistic_conf: 567 for window_size in statistic_conf["window_sizes"]: 568 measures.append( 569 { 570 "name": f"{dimension['name']}_{window_size}_day_{statistic_slug}", 571 "type": "number", 572 "label": f"{dimension_label} {window_size} Day Rolling Average", 573 "sql": f""" 574 AVG({aggregation}(${{TABLE}}.{dimension["name"]} * {sampling})) OVER ( 575 ROWS {window_size} PRECEDING 576 )""", 577 "group_label": "Statistics", 578 "description": f"{window_size} day rolling average of {dimension_label}", 579 } 580 ) 581 582 return measures
15class MetricDefinitionsView(View): 16 """A view for metric-hub metrics that come from the same data source.""" 17 18 type: str = "metric_definitions_view" 19 20 def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]): 21 """Get an instance of an MetricDefinitionsView.""" 22 super().__init__(namespace, name, MetricDefinitionsView.type, tables) 23 24 @classmethod 25 def from_db_views( 26 klass, 27 namespace: str, 28 is_glean: bool, 29 channels: List[Dict[str, str]], 30 db_views: dict, 31 ) -> Iterator[MetricDefinitionsView]: 32 """Get Metric Definition Views from db views and app variants.""" 33 return iter(()) 34 35 @classmethod 36 def from_dict( 37 klass, namespace: str, name: str, definition: ViewDict 38 ) -> MetricDefinitionsView: 39 """Get a MetricDefinitionsView from a dict representation.""" 40 return klass(namespace, name, definition.get("tables", [])) 41 42 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 43 """Get this view as LookML.""" 44 namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions( 45 self.namespace 46 ) 47 if namespace_definitions is None: 48 return {} 49 50 # get all metric definitions that depend on the data source represented by this view 51 data_source_name = re.sub("^metric_definitions_", "", self.name) 52 data_source_definition = MetricsConfigLoader.configs.get_data_source_definition( 53 data_source_name, self.namespace 54 ) 55 56 if data_source_definition is None: 57 return {} 58 59 # todo: hide deprecated metrics? 60 metric_definitions = [ 61 f"""{ 62 MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render() 63 } AS {metric_slug},\n""" 64 for metric_slug, metric in namespace_definitions.metrics.definitions.items() 65 if metric.select_expression 66 and metric.data_source.name == data_source_name 67 and metric.type != "histogram" 68 ] 69 70 if metric_definitions == []: 71 return {} 72 73 # Metric definitions are intended to aggregated by client per date. 74 # A derived table is needed to do these aggregations, instead of defining them as measures 75 # we want to have them available as dimensions (which don't allow aggregations in their definitions) 76 # to allow for custom measures to be later defined in Looker that aggregate these per client metrics. 77 view_defn: Dict[str, Any] = {"name": self.name} 78 79 ignore_base_fields = [ 80 "client_id", 81 "submission_date", 82 "submission", 83 "first_run", 84 ] + [ 85 metric_slug 86 for metric_slug, metric in namespace_definitions.metrics.definitions.items() 87 if metric.select_expression 88 and metric.data_source.name == data_source_name 89 and metric.type != "histogram" 90 ] 91 92 base_view_dimensions = {} 93 joined_data_sources = [] 94 95 # check if the metric data source has joins 96 # joined data sources are generally used for creating the "Base Fields" 97 if data_source_definition.joins: 98 # determine the dimensions selected by the joined data sources 99 for joined_data_source_slug, join in data_source_definition.joins.items(): 100 joined_data_source = ( 101 MetricsConfigLoader.configs.get_data_source_definition( 102 joined_data_source_slug, self.namespace 103 ) 104 ) 105 106 if joined_data_source.columns_as_dimensions: 107 joined_data_sources.append(joined_data_source) 108 109 date_filter = None 110 if joined_data_source.submission_date_column != "NULL": 111 date_filter = ( 112 None 113 if joined_data_source.submission_date_column is None 114 or joined_data_source.submission_date_column == "NULL" 115 else f"{joined_data_source.submission_date_column} = '2023-01-01'" 116 ) 117 118 # create Looker dimensions by doing a dryrun 119 query = MetricsConfigLoader.configs.get_data_source_sql( 120 joined_data_source_slug, 121 self.namespace, 122 where=date_filter, 123 ).format(dataset=self.namespace) 124 125 base_view_dimensions[joined_data_source_slug] = ( 126 lookml_utils._generate_dimensions_from_query( 127 query, dryrun=dryrun 128 ) 129 ) 130 131 if ( 132 data_source_definition.client_id_column == "NULL" 133 and not base_view_dimensions 134 ) or data_source_definition.columns_as_dimensions: 135 # if the metrics data source doesn't have any joins then use the dimensions 136 # of the data source itself as base fields 137 date_filter = None 138 if data_source_definition.submission_date_column != "NULL": 139 date_filter = ( 140 "submission_date = '2023-01-01'" 141 if data_source_definition.submission_date_column is None 142 else f"{data_source_definition.submission_date_column} = '2023-01-01'" 143 ) 144 145 query = MetricsConfigLoader.configs.get_data_source_sql( 146 data_source_definition.name, 147 self.namespace, 148 where=date_filter, 149 ignore_joins=True, 150 ).format(dataset=self.namespace) 151 152 base_view_dimensions[data_source_definition.name] = ( 153 lookml_utils._generate_dimensions_from_query(query, dryrun) 154 ) 155 156 # to prevent duplicate dimensions, especially when working with time dimensions 157 # where names are modified potentially causing naming collisions 158 seen_dimensions = set() 159 # prepare base field data for query 160 base_view_fields = [] 161 for data_source, dimensions in base_view_dimensions.items(): 162 for dimension in dimensions: 163 if ( 164 dimension["name"] not in ignore_base_fields 165 and dimension["name"] not in seen_dimensions 166 and "hidden" not in dimension 167 ): 168 sql = ( 169 f"{data_source}.{dimension['name'].replace('__', '.')} AS" 170 + f" {data_source}_{dimension['name']},\n" 171 ) 172 # date/time/timestamp suffixes are removed when generating lookml dimensions, however we 173 # need the original field name for the derived view SQL 174 if dimension["type"] == "time" and not dimension["sql"].endswith( 175 dimension["name"] 176 ): 177 suffix = dimension["sql"].split( 178 dimension["name"].replace("__", ".") 179 )[-1] 180 sql = ( 181 f"{data_source}.{(dimension['name']+suffix).replace('__', '.')} AS" 182 + f" {data_source}_{dimension['name']},\n" 183 ) 184 185 base_view_fields.append( 186 { 187 "name": f"{data_source}_{dimension['name']}", 188 "select_sql": f"{data_source}_{dimension['name']},\n", 189 "sql": sql, 190 } 191 ) 192 seen_dimensions.add(dimension["name"]) 193 194 client_id_field = ( 195 "NULL" 196 if data_source_definition.client_id_column == "NULL" 197 else f'{data_source_definition.client_id_column or "client_id"}' 198 ) 199 200 # filters for date ranges 201 where_sql = " AND ".join( 202 [ 203 f""" 204 {data_source.name}.{data_source.submission_date_column or "submission_date"} 205 BETWEEN 206 COALESCE( 207 SAFE_CAST( 208 {{% date_start submission_date %}} AS DATE 209 ), CURRENT_DATE()) AND 210 COALESCE( 211 SAFE_CAST( 212 {{% date_end submission_date %}} AS DATE 213 ), CURRENT_DATE()) 214 """ 215 for data_source in [data_source_definition] + joined_data_sources 216 if data_source.submission_date_column != "NULL" 217 ] 218 ) 219 220 # filte on sample_id if such a field exists 221 for field in base_view_fields: 222 if field["name"].endswith("_sample_id"): 223 where_sql += f""" 224 AND 225 {field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}} 226 """ 227 break 228 229 view_defn["derived_table"] = { 230 "sql": f""" 231 SELECT 232 {"".join(metric_definitions)} 233 {"".join([field['select_sql'] for field in base_view_fields])} 234 {client_id_field} AS client_id, 235 {{% if aggregate_metrics_by._parameter_value == 'day' %}} 236 {data_source_definition.submission_date_column or "submission_date"} AS analysis_basis 237 {{% elsif aggregate_metrics_by._parameter_value == 'week' %}} 238 (FORMAT_DATE( 239 '%F', 240 DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"}, 241 WEEK(MONDAY))) 242 ) AS analysis_basis 243 {{% elsif aggregate_metrics_by._parameter_value == 'month' %}} 244 (FORMAT_DATE( 245 '%Y-%m', 246 {data_source_definition.submission_date_column or "submission_date"}) 247 ) AS analysis_basis 248 {{% elsif aggregate_metrics_by._parameter_value == 'quarter' %}} 249 (FORMAT_DATE( 250 '%Y-%m', 251 DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"}, 252 QUARTER)) 253 ) AS analysis_basis 254 {{% elsif aggregate_metrics_by._parameter_value == 'year' %}} 255 (EXTRACT( 256 YEAR FROM {data_source_definition.submission_date_column or "submission_date"}) 257 ) AS analysis_basis 258 {{% else %}} 259 NULL as analysis_basis 260 {{% endif %}} 261 FROM 262 ( 263 SELECT 264 {data_source_name}.*, 265 {"".join([field['sql'] for field in base_view_fields])} 266 FROM 267 { 268 MetricsConfigLoader.configs.get_data_source_sql( 269 data_source_name, 270 self.namespace, 271 select_fields=False 272 ).format(dataset=self.namespace) 273 } 274 WHERE {where_sql} 275 ) 276 GROUP BY 277 {"".join([field['select_sql'] for field in base_view_fields])} 278 client_id, 279 analysis_basis 280 """ 281 } 282 283 view_defn["dimensions"] = self.get_dimensions() 284 view_defn["dimension_groups"] = self.get_dimension_groups() 285 286 # add the Looker dimensions 287 for data_source, dimensions in base_view_dimensions.items(): 288 for dimension in dimensions: 289 if dimension["name"] not in ignore_base_fields: 290 dimension["sql"] = ( 291 "${TABLE}." + f"{data_source}_{dimension['name']}" 292 ) 293 dimension["group_label"] = "Base Fields" 294 if not lookml_utils._is_dimension_group(dimension): 295 view_defn["dimensions"].append(dimension) 296 else: 297 view_defn["dimension_groups"].append(dimension) 298 # avoid duplicate dimensions 299 ignore_base_fields.append(dimension["name"]) 300 301 view_defn["measures"] = self.get_measures( 302 view_defn["dimensions"], 303 ) 304 view_defn["sets"] = self._get_sets() 305 view_defn["parameters"] = self._get_parameters(view_defn["dimensions"]) 306 307 return {"views": [view_defn]} 308 309 def get_dimensions( 310 self, 311 _table=None, 312 _v1_name: Optional[str] = None, 313 _dryrun=None, 314 ) -> List[Dict[str, Any]]: 315 """Get the set of dimensions for this view based on the metric definitions in metric-hub.""" 316 namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions( 317 self.namespace 318 ) 319 metric_definitions = namespace_definitions.metrics.definitions 320 data_source_name = re.sub("^metric_definitions_", "", self.name) 321 322 return [ 323 { 324 "name": "client_id", 325 "type": "string", 326 "sql": "SAFE_CAST(${TABLE}.client_id AS STRING)", 327 "label": "Client ID", 328 "primary_key": "yes", 329 "group_label": "Base Fields", 330 "description": "Unique client identifier", 331 }, 332 ] + [ # add a dimension for each metric definition 333 { 334 "name": metric_slug, 335 "group_label": "Metrics", 336 "label": metric.friendly_name 337 or lookml_utils.slug_to_title(metric_slug), 338 "description": metric.description or "", 339 "type": "number", 340 "sql": "${TABLE}." + metric_slug, 341 } 342 for metric_slug, metric in metric_definitions.items() 343 if metric.select_expression 344 and metric.data_source.name == data_source_name 345 and metric.type != "histogram" 346 ] 347 348 def get_dimension_groups(self) -> List[Dict[str, Any]]: 349 """Get dimension groups for this view.""" 350 return [ 351 { 352 "name": "submission", 353 "type": "time", 354 "group_label": "Base Fields", 355 "sql": "CAST(${TABLE}.analysis_basis AS TIMESTAMP)", 356 "label": "Submission", 357 "timeframes": [ 358 "raw", 359 "date", 360 "week", 361 "month", 362 "quarter", 363 "year", 364 ], 365 } 366 ] 367 368 def _get_sets(self) -> List[Dict[str, Any]]: 369 """Generate metric sets.""" 370 # group all the metric dimensions into a set 371 dimensions = self.get_dimensions() 372 measures = self.get_measures(dimensions) 373 374 return [ 375 { 376 "name": "metrics", 377 "fields": [ 378 dimension["name"] 379 for dimension in dimensions 380 if dimension["name"] != "client_id" 381 ] 382 + [measure["name"] for measure in measures], 383 } 384 ] 385 386 def _get_parameters(self, dimensions: List[dict]): 387 hide_sampling = "yes" 388 389 for dim in dimensions: 390 if dim["name"] == "sample_id": 391 hide_sampling = "no" 392 break 393 394 return [ 395 { 396 "name": "aggregate_metrics_by", 397 "label": "Aggregate Client Metrics Per", 398 "type": "unquoted", 399 "default_value": "day", 400 "allowed_values": [ 401 {"label": "Per Day", "value": "day"}, 402 {"label": "Per Week", "value": "week"}, 403 {"label": "Per Month", "value": "month"}, 404 {"label": "Per Quarter", "value": "quarter"}, 405 {"label": "Per Year", "value": "year"}, 406 {"label": "Overall", "value": "overall"}, 407 ], 408 }, 409 { 410 "name": "sampling", 411 "label": "Sample of source data in %", 412 "type": "unquoted", 413 "default_value": "100", 414 "hidden": hide_sampling, 415 }, 416 ] 417 418 def get_measures( 419 self, dimensions: List[dict] 420 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 421 """Get statistics as measures.""" 422 measures = [] 423 sampling = "1" 424 425 for dim in dimensions: 426 if dim["name"] == "sample_id": 427 sampling = "100 / {% parameter sampling %}" 428 break 429 430 for dimension in dimensions: 431 metric = MetricsConfigLoader.configs.get_metric_definition( 432 dimension["name"], self.namespace 433 ) 434 if metric and metric.statistics: 435 for statistic_slug, statistic_conf in metric.statistics.items(): 436 dimension_label = dimension.get("label") or dimension.get("name") 437 if statistic_slug in [ 438 "average", 439 "max", 440 "min", 441 "median", 442 ]: 443 measures.append( 444 { 445 "name": f"{dimension['name']}_{statistic_slug}", 446 "type": statistic_slug, 447 "sql": "${TABLE}." + dimension["name"], 448 "label": f"{dimension_label} {statistic_slug.title()}", 449 "group_label": "Statistics", 450 "description": f"{statistic_slug.title()} of {dimension_label}", 451 } 452 ) 453 elif statistic_slug == "sum": 454 measures.append( 455 { 456 "name": f"{dimension['name']}_{statistic_slug}", 457 "type": "sum", 458 "sql": "${TABLE}." + dimension["name"] + "*" + sampling, 459 "label": f"{dimension_label} Sum", 460 "group_label": "Statistics", 461 "description": f"Sum of {dimension_label}", 462 } 463 ) 464 elif statistic_slug == "client_count": 465 measures.append( 466 { 467 "name": ( 468 f"{dimension['name']}_{statistic_slug}_sampled" 469 if sampling 470 else f"{dimension['name']}_{statistic_slug}" 471 ), 472 "type": "count_distinct", 473 "label": f"{dimension_label} Client Count", 474 "group_label": "Statistics", 475 "sql": "IF(${TABLE}." 476 + f"{dimension['name']} > 0, " 477 + "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))", 478 "description": f"Number of clients with {dimension_label}", 479 "hidden": "yes" if sampling else "no", 480 } 481 ) 482 483 if sampling: 484 measures.append( 485 { 486 "name": f"{dimension['name']}_{statistic_slug}", 487 "type": "number", 488 "label": f"{dimension_label} Client Count", 489 "group_label": "Statistics", 490 "sql": "${" 491 + f"{dimension['name']}_{statistic_slug}_sampled" 492 + "} *" 493 + sampling, 494 "description": f"Number of clients with {dimension_label}", 495 } 496 ) 497 elif statistic_slug == "dau_proportion": 498 if "numerator" in statistic_conf: 499 [numerator, numerator_stat] = statistic_conf[ 500 "numerator" 501 ].split(".") 502 measures.append( 503 { 504 "name": "DAU_sampled" if sampling else "DAU", 505 "type": "count_distinct", 506 "label": "DAU", 507 "group_label": "Statistics", 508 "sql": "${TABLE}.client_id", 509 "hidden": "yes", 510 } 511 ) 512 513 if sampling: 514 measures.append( 515 { 516 "name": "DAU", 517 "type": "number", 518 "label": "DAU", 519 "group_label": "Statistics", 520 "sql": "${DAU_sampled} *" + sampling, 521 "hidden": "yes", 522 } 523 ) 524 525 measures.append( 526 { 527 "name": f"{dimension['name']}_{statistic_slug}", 528 "type": "number", 529 "label": f"{dimension_label} DAU Proportion", 530 "sql": "SAFE_DIVIDE(${" 531 + f"{numerator}_{numerator_stat}" 532 + "}, ${DAU})", 533 "group_label": "Statistics", 534 "description": f"Proportion of daily active users with {dimension['name']}", 535 } 536 ) 537 elif statistic_slug == "ratio": 538 if ( 539 "numerator" in statistic_conf 540 and "denominator" in statistic_conf 541 ): 542 [numerator, numerator_stat] = statistic_conf[ 543 "numerator" 544 ].split(".") 545 [denominator, denominator_stat] = statistic_conf[ 546 "denominator" 547 ].split(".") 548 549 measures.append( 550 { 551 "name": f"{dimension['name']}_{statistic_slug}", 552 "type": "number", 553 "label": f"{dimension_label} Ratio", 554 "sql": "SAFE_DIVIDE(${" 555 + f"{numerator}_{numerator_stat}" 556 + "}, ${" 557 + f"{denominator}_{denominator_stat}" 558 + "})", 559 "group_label": "Statistics", 560 "description": f"""" 561 Ratio between {statistic_conf['numerator']} and 562 {statistic_conf['denominator']}""", 563 } 564 ) 565 elif statistic_slug == "rolling_average": 566 aggregation = statistic_conf.get("aggregation", "sum") 567 if "window_sizes" in statistic_conf: 568 for window_size in statistic_conf["window_sizes"]: 569 measures.append( 570 { 571 "name": f"{dimension['name']}_{window_size}_day_{statistic_slug}", 572 "type": "number", 573 "label": f"{dimension_label} {window_size} Day Rolling Average", 574 "sql": f""" 575 AVG({aggregation}(${{TABLE}}.{dimension["name"]} * {sampling})) OVER ( 576 ROWS {window_size} PRECEDING 577 )""", 578 "group_label": "Statistics", 579 "description": f"{window_size} day rolling average of {dimension_label}", 580 } 581 ) 582 583 return measures
A view for metric-hub metrics that come from the same data source.
MetricDefinitionsView(namespace: str, name: str, tables: List[Dict[str, str]])
20 def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]): 21 """Get an instance of an MetricDefinitionsView.""" 22 super().__init__(namespace, name, MetricDefinitionsView.type, tables)
Get an instance of an MetricDefinitionsView.
@classmethod
def
from_db_views( klass, namespace: str, is_glean: bool, channels: List[Dict[str, str]], db_views: dict) -> Iterator[MetricDefinitionsView]:
24 @classmethod 25 def from_db_views( 26 klass, 27 namespace: str, 28 is_glean: bool, 29 channels: List[Dict[str, str]], 30 db_views: dict, 31 ) -> Iterator[MetricDefinitionsView]: 32 """Get Metric Definition Views from db views and app variants.""" 33 return iter(())
Get Metric Definition Views from db views and app variants.
@classmethod
def
from_dict( klass, namespace: str, name: str, definition: generator.views.view.ViewDict) -> MetricDefinitionsView:
35 @classmethod 36 def from_dict( 37 klass, namespace: str, name: str, definition: ViewDict 38 ) -> MetricDefinitionsView: 39 """Get a MetricDefinitionsView from a dict representation.""" 40 return klass(namespace, name, definition.get("tables", []))
Get a MetricDefinitionsView from a dict representation.
def
to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
42 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 43 """Get this view as LookML.""" 44 namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions( 45 self.namespace 46 ) 47 if namespace_definitions is None: 48 return {} 49 50 # get all metric definitions that depend on the data source represented by this view 51 data_source_name = re.sub("^metric_definitions_", "", self.name) 52 data_source_definition = MetricsConfigLoader.configs.get_data_source_definition( 53 data_source_name, self.namespace 54 ) 55 56 if data_source_definition is None: 57 return {} 58 59 # todo: hide deprecated metrics? 60 metric_definitions = [ 61 f"""{ 62 MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render() 63 } AS {metric_slug},\n""" 64 for metric_slug, metric in namespace_definitions.metrics.definitions.items() 65 if metric.select_expression 66 and metric.data_source.name == data_source_name 67 and metric.type != "histogram" 68 ] 69 70 if metric_definitions == []: 71 return {} 72 73 # Metric definitions are intended to aggregated by client per date. 74 # A derived table is needed to do these aggregations, instead of defining them as measures 75 # we want to have them available as dimensions (which don't allow aggregations in their definitions) 76 # to allow for custom measures to be later defined in Looker that aggregate these per client metrics. 77 view_defn: Dict[str, Any] = {"name": self.name} 78 79 ignore_base_fields = [ 80 "client_id", 81 "submission_date", 82 "submission", 83 "first_run", 84 ] + [ 85 metric_slug 86 for metric_slug, metric in namespace_definitions.metrics.definitions.items() 87 if metric.select_expression 88 and metric.data_source.name == data_source_name 89 and metric.type != "histogram" 90 ] 91 92 base_view_dimensions = {} 93 joined_data_sources = [] 94 95 # check if the metric data source has joins 96 # joined data sources are generally used for creating the "Base Fields" 97 if data_source_definition.joins: 98 # determine the dimensions selected by the joined data sources 99 for joined_data_source_slug, join in data_source_definition.joins.items(): 100 joined_data_source = ( 101 MetricsConfigLoader.configs.get_data_source_definition( 102 joined_data_source_slug, self.namespace 103 ) 104 ) 105 106 if joined_data_source.columns_as_dimensions: 107 joined_data_sources.append(joined_data_source) 108 109 date_filter = None 110 if joined_data_source.submission_date_column != "NULL": 111 date_filter = ( 112 None 113 if joined_data_source.submission_date_column is None 114 or joined_data_source.submission_date_column == "NULL" 115 else f"{joined_data_source.submission_date_column} = '2023-01-01'" 116 ) 117 118 # create Looker dimensions by doing a dryrun 119 query = MetricsConfigLoader.configs.get_data_source_sql( 120 joined_data_source_slug, 121 self.namespace, 122 where=date_filter, 123 ).format(dataset=self.namespace) 124 125 base_view_dimensions[joined_data_source_slug] = ( 126 lookml_utils._generate_dimensions_from_query( 127 query, dryrun=dryrun 128 ) 129 ) 130 131 if ( 132 data_source_definition.client_id_column == "NULL" 133 and not base_view_dimensions 134 ) or data_source_definition.columns_as_dimensions: 135 # if the metrics data source doesn't have any joins then use the dimensions 136 # of the data source itself as base fields 137 date_filter = None 138 if data_source_definition.submission_date_column != "NULL": 139 date_filter = ( 140 "submission_date = '2023-01-01'" 141 if data_source_definition.submission_date_column is None 142 else f"{data_source_definition.submission_date_column} = '2023-01-01'" 143 ) 144 145 query = MetricsConfigLoader.configs.get_data_source_sql( 146 data_source_definition.name, 147 self.namespace, 148 where=date_filter, 149 ignore_joins=True, 150 ).format(dataset=self.namespace) 151 152 base_view_dimensions[data_source_definition.name] = ( 153 lookml_utils._generate_dimensions_from_query(query, dryrun) 154 ) 155 156 # to prevent duplicate dimensions, especially when working with time dimensions 157 # where names are modified potentially causing naming collisions 158 seen_dimensions = set() 159 # prepare base field data for query 160 base_view_fields = [] 161 for data_source, dimensions in base_view_dimensions.items(): 162 for dimension in dimensions: 163 if ( 164 dimension["name"] not in ignore_base_fields 165 and dimension["name"] not in seen_dimensions 166 and "hidden" not in dimension 167 ): 168 sql = ( 169 f"{data_source}.{dimension['name'].replace('__', '.')} AS" 170 + f" {data_source}_{dimension['name']},\n" 171 ) 172 # date/time/timestamp suffixes are removed when generating lookml dimensions, however we 173 # need the original field name for the derived view SQL 174 if dimension["type"] == "time" and not dimension["sql"].endswith( 175 dimension["name"] 176 ): 177 suffix = dimension["sql"].split( 178 dimension["name"].replace("__", ".") 179 )[-1] 180 sql = ( 181 f"{data_source}.{(dimension['name']+suffix).replace('__', '.')} AS" 182 + f" {data_source}_{dimension['name']},\n" 183 ) 184 185 base_view_fields.append( 186 { 187 "name": f"{data_source}_{dimension['name']}", 188 "select_sql": f"{data_source}_{dimension['name']},\n", 189 "sql": sql, 190 } 191 ) 192 seen_dimensions.add(dimension["name"]) 193 194 client_id_field = ( 195 "NULL" 196 if data_source_definition.client_id_column == "NULL" 197 else f'{data_source_definition.client_id_column or "client_id"}' 198 ) 199 200 # filters for date ranges 201 where_sql = " AND ".join( 202 [ 203 f""" 204 {data_source.name}.{data_source.submission_date_column or "submission_date"} 205 BETWEEN 206 COALESCE( 207 SAFE_CAST( 208 {{% date_start submission_date %}} AS DATE 209 ), CURRENT_DATE()) AND 210 COALESCE( 211 SAFE_CAST( 212 {{% date_end submission_date %}} AS DATE 213 ), CURRENT_DATE()) 214 """ 215 for data_source in [data_source_definition] + joined_data_sources 216 if data_source.submission_date_column != "NULL" 217 ] 218 ) 219 220 # filte on sample_id if such a field exists 221 for field in base_view_fields: 222 if field["name"].endswith("_sample_id"): 223 where_sql += f""" 224 AND 225 {field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}} 226 """ 227 break 228 229 view_defn["derived_table"] = { 230 "sql": f""" 231 SELECT 232 {"".join(metric_definitions)} 233 {"".join([field['select_sql'] for field in base_view_fields])} 234 {client_id_field} AS client_id, 235 {{% if aggregate_metrics_by._parameter_value == 'day' %}} 236 {data_source_definition.submission_date_column or "submission_date"} AS analysis_basis 237 {{% elsif aggregate_metrics_by._parameter_value == 'week' %}} 238 (FORMAT_DATE( 239 '%F', 240 DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"}, 241 WEEK(MONDAY))) 242 ) AS analysis_basis 243 {{% elsif aggregate_metrics_by._parameter_value == 'month' %}} 244 (FORMAT_DATE( 245 '%Y-%m', 246 {data_source_definition.submission_date_column or "submission_date"}) 247 ) AS analysis_basis 248 {{% elsif aggregate_metrics_by._parameter_value == 'quarter' %}} 249 (FORMAT_DATE( 250 '%Y-%m', 251 DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"}, 252 QUARTER)) 253 ) AS analysis_basis 254 {{% elsif aggregate_metrics_by._parameter_value == 'year' %}} 255 (EXTRACT( 256 YEAR FROM {data_source_definition.submission_date_column or "submission_date"}) 257 ) AS analysis_basis 258 {{% else %}} 259 NULL as analysis_basis 260 {{% endif %}} 261 FROM 262 ( 263 SELECT 264 {data_source_name}.*, 265 {"".join([field['sql'] for field in base_view_fields])} 266 FROM 267 { 268 MetricsConfigLoader.configs.get_data_source_sql( 269 data_source_name, 270 self.namespace, 271 select_fields=False 272 ).format(dataset=self.namespace) 273 } 274 WHERE {where_sql} 275 ) 276 GROUP BY 277 {"".join([field['select_sql'] for field in base_view_fields])} 278 client_id, 279 analysis_basis 280 """ 281 } 282 283 view_defn["dimensions"] = self.get_dimensions() 284 view_defn["dimension_groups"] = self.get_dimension_groups() 285 286 # add the Looker dimensions 287 for data_source, dimensions in base_view_dimensions.items(): 288 for dimension in dimensions: 289 if dimension["name"] not in ignore_base_fields: 290 dimension["sql"] = ( 291 "${TABLE}." + f"{data_source}_{dimension['name']}" 292 ) 293 dimension["group_label"] = "Base Fields" 294 if not lookml_utils._is_dimension_group(dimension): 295 view_defn["dimensions"].append(dimension) 296 else: 297 view_defn["dimension_groups"].append(dimension) 298 # avoid duplicate dimensions 299 ignore_base_fields.append(dimension["name"]) 300 301 view_defn["measures"] = self.get_measures( 302 view_defn["dimensions"], 303 ) 304 view_defn["sets"] = self._get_sets() 305 view_defn["parameters"] = self._get_parameters(view_defn["dimensions"]) 306 307 return {"views": [view_defn]}
Get this view as LookML.
def
get_dimensions( self, _table=None, _v1_name: Optional[str] = None, _dryrun=None) -> List[Dict[str, Any]]:
309 def get_dimensions( 310 self, 311 _table=None, 312 _v1_name: Optional[str] = None, 313 _dryrun=None, 314 ) -> List[Dict[str, Any]]: 315 """Get the set of dimensions for this view based on the metric definitions in metric-hub.""" 316 namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions( 317 self.namespace 318 ) 319 metric_definitions = namespace_definitions.metrics.definitions 320 data_source_name = re.sub("^metric_definitions_", "", self.name) 321 322 return [ 323 { 324 "name": "client_id", 325 "type": "string", 326 "sql": "SAFE_CAST(${TABLE}.client_id AS STRING)", 327 "label": "Client ID", 328 "primary_key": "yes", 329 "group_label": "Base Fields", 330 "description": "Unique client identifier", 331 }, 332 ] + [ # add a dimension for each metric definition 333 { 334 "name": metric_slug, 335 "group_label": "Metrics", 336 "label": metric.friendly_name 337 or lookml_utils.slug_to_title(metric_slug), 338 "description": metric.description or "", 339 "type": "number", 340 "sql": "${TABLE}." + metric_slug, 341 } 342 for metric_slug, metric in metric_definitions.items() 343 if metric.select_expression 344 and metric.data_source.name == data_source_name 345 and metric.type != "histogram" 346 ]
Get the set of dimensions for this view based on the metric definitions in metric-hub.
def
get_dimension_groups(self) -> List[Dict[str, Any]]:
348 def get_dimension_groups(self) -> List[Dict[str, Any]]: 349 """Get dimension groups for this view.""" 350 return [ 351 { 352 "name": "submission", 353 "type": "time", 354 "group_label": "Base Fields", 355 "sql": "CAST(${TABLE}.analysis_basis AS TIMESTAMP)", 356 "label": "Submission", 357 "timeframes": [ 358 "raw", 359 "date", 360 "week", 361 "month", 362 "quarter", 363 "year", 364 ], 365 } 366 ]
Get dimension groups for this view.
def
get_measures( self, dimensions: List[dict]) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
418 def get_measures( 419 self, dimensions: List[dict] 420 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 421 """Get statistics as measures.""" 422 measures = [] 423 sampling = "1" 424 425 for dim in dimensions: 426 if dim["name"] == "sample_id": 427 sampling = "100 / {% parameter sampling %}" 428 break 429 430 for dimension in dimensions: 431 metric = MetricsConfigLoader.configs.get_metric_definition( 432 dimension["name"], self.namespace 433 ) 434 if metric and metric.statistics: 435 for statistic_slug, statistic_conf in metric.statistics.items(): 436 dimension_label = dimension.get("label") or dimension.get("name") 437 if statistic_slug in [ 438 "average", 439 "max", 440 "min", 441 "median", 442 ]: 443 measures.append( 444 { 445 "name": f"{dimension['name']}_{statistic_slug}", 446 "type": statistic_slug, 447 "sql": "${TABLE}." + dimension["name"], 448 "label": f"{dimension_label} {statistic_slug.title()}", 449 "group_label": "Statistics", 450 "description": f"{statistic_slug.title()} of {dimension_label}", 451 } 452 ) 453 elif statistic_slug == "sum": 454 measures.append( 455 { 456 "name": f"{dimension['name']}_{statistic_slug}", 457 "type": "sum", 458 "sql": "${TABLE}." + dimension["name"] + "*" + sampling, 459 "label": f"{dimension_label} Sum", 460 "group_label": "Statistics", 461 "description": f"Sum of {dimension_label}", 462 } 463 ) 464 elif statistic_slug == "client_count": 465 measures.append( 466 { 467 "name": ( 468 f"{dimension['name']}_{statistic_slug}_sampled" 469 if sampling 470 else f"{dimension['name']}_{statistic_slug}" 471 ), 472 "type": "count_distinct", 473 "label": f"{dimension_label} Client Count", 474 "group_label": "Statistics", 475 "sql": "IF(${TABLE}." 476 + f"{dimension['name']} > 0, " 477 + "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))", 478 "description": f"Number of clients with {dimension_label}", 479 "hidden": "yes" if sampling else "no", 480 } 481 ) 482 483 if sampling: 484 measures.append( 485 { 486 "name": f"{dimension['name']}_{statistic_slug}", 487 "type": "number", 488 "label": f"{dimension_label} Client Count", 489 "group_label": "Statistics", 490 "sql": "${" 491 + f"{dimension['name']}_{statistic_slug}_sampled" 492 + "} *" 493 + sampling, 494 "description": f"Number of clients with {dimension_label}", 495 } 496 ) 497 elif statistic_slug == "dau_proportion": 498 if "numerator" in statistic_conf: 499 [numerator, numerator_stat] = statistic_conf[ 500 "numerator" 501 ].split(".") 502 measures.append( 503 { 504 "name": "DAU_sampled" if sampling else "DAU", 505 "type": "count_distinct", 506 "label": "DAU", 507 "group_label": "Statistics", 508 "sql": "${TABLE}.client_id", 509 "hidden": "yes", 510 } 511 ) 512 513 if sampling: 514 measures.append( 515 { 516 "name": "DAU", 517 "type": "number", 518 "label": "DAU", 519 "group_label": "Statistics", 520 "sql": "${DAU_sampled} *" + sampling, 521 "hidden": "yes", 522 } 523 ) 524 525 measures.append( 526 { 527 "name": f"{dimension['name']}_{statistic_slug}", 528 "type": "number", 529 "label": f"{dimension_label} DAU Proportion", 530 "sql": "SAFE_DIVIDE(${" 531 + f"{numerator}_{numerator_stat}" 532 + "}, ${DAU})", 533 "group_label": "Statistics", 534 "description": f"Proportion of daily active users with {dimension['name']}", 535 } 536 ) 537 elif statistic_slug == "ratio": 538 if ( 539 "numerator" in statistic_conf 540 and "denominator" in statistic_conf 541 ): 542 [numerator, numerator_stat] = statistic_conf[ 543 "numerator" 544 ].split(".") 545 [denominator, denominator_stat] = statistic_conf[ 546 "denominator" 547 ].split(".") 548 549 measures.append( 550 { 551 "name": f"{dimension['name']}_{statistic_slug}", 552 "type": "number", 553 "label": f"{dimension_label} Ratio", 554 "sql": "SAFE_DIVIDE(${" 555 + f"{numerator}_{numerator_stat}" 556 + "}, ${" 557 + f"{denominator}_{denominator_stat}" 558 + "})", 559 "group_label": "Statistics", 560 "description": f"""" 561 Ratio between {statistic_conf['numerator']} and 562 {statistic_conf['denominator']}""", 563 } 564 ) 565 elif statistic_slug == "rolling_average": 566 aggregation = statistic_conf.get("aggregation", "sum") 567 if "window_sizes" in statistic_conf: 568 for window_size in statistic_conf["window_sizes"]: 569 measures.append( 570 { 571 "name": f"{dimension['name']}_{window_size}_day_{statistic_slug}", 572 "type": "number", 573 "label": f"{dimension_label} {window_size} Day Rolling Average", 574 "sql": f""" 575 AVG({aggregation}(${{TABLE}}.{dimension["name"]} * {sampling})) OVER ( 576 ROWS {window_size} PRECEDING 577 )""", 578 "group_label": "Statistics", 579 "description": f"{window_size} day rolling average of {dimension_label}", 580 } 581 ) 582 583 return measures
Get statistics as measures.