generator.views.glean_ping_view
Class to describe a Glean Ping View.
1"""Class to describe a Glean Ping View.""" 2 3import logging 4import re 5from collections import Counter 6from textwrap import dedent 7from typing import Any, Dict, Iterable, List, Optional, Tuple, Union 8 9import click 10from mozilla_schema_generator.glean_ping import GleanPing 11from mozilla_schema_generator.probes import GleanProbe 12 13from . import lookml_utils 14from .lookml_utils import slug_to_title 15from .ping_view import PingView 16 17DISTRIBUTION_TYPES = { 18 "timing_distribution", 19 "memory_distribution", 20 "custom_distribution", 21} 22 23 24ALLOWED_TYPES = DISTRIBUTION_TYPES | { 25 "boolean", 26 "labeled_boolean", 27 "counter", 28 "labeled_counter", 29 "datetime", 30 "jwe", 31 "quantity", 32 "string", 33 "labeled_string", 34 "rate", 35 "timespan", 36 "uuid", 37 "url", 38 "text", 39 "labeled_quantity", 40} 41 42# Bug 1737656 - some metric types are exposed under different names 43# We need to map to the new name when building dimensions. 44RENAMED_METRIC_TYPES = { 45 "jwe": "jwe2", 46 "text": "text2", 47 "url": "url2", 48} 49 50 51DISALLOWED_PINGS = {"events"} 52 53# List of labeled counter names for which a suggest explore should be generated. 54# Generating suggest explores for all labeled counters slows down Looker. 55SUGGESTS_FOR_LABELED_COUNTERS = {"metrics__labeled_counter__glean_error_invalid_label"} 56 57 58class GleanPingView(PingView): 59 """A view on a ping table for an application using the Glean SDK.""" 60 61 type: str = "glean_ping_view" 62 allow_glean: bool = True 63 64 @classmethod 65 def from_db_views(klass, *args, **kwargs): 66 """Generate GleanPingViews from db views.""" 67 for view in super().from_db_views(*args, **kwargs): 68 if view.name not in DISALLOWED_PINGS: 69 yield view 70 71 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 72 """Generate LookML for this view. 73 74 The Glean views include a labeled metrics, which need to be joined 75 against the view in the explore. 76 """ 77 lookml = super().to_lookml(v1_name, dryrun=dryrun) 78 # ignore nested join views 79 lookml["views"] = [lookml["views"][0]] 80 81 # iterate over all of the glean metrics and generate views for unnested 82 # fields as necessary. Append them to the list of existing view 83 # definitions. 84 table = next( 85 (table for table in self.tables if table.get("channel") == "release"), 86 self.tables[0], 87 )["table"] 88 dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun) 89 dimension_names = {dimension["name"] for dimension in dimensions} 90 91 client_id_field = self.get_client_id(dimensions, table) 92 93 view_definitions = [] 94 metrics = self._get_glean_metrics(v1_name) 95 for metric in metrics: 96 looker_name = self._to_looker_name(metric) 97 if looker_name not in dimension_names: 98 continue # skip metrics with no matching dimension 99 if metric.type == "labeled_counter": 100 view_name = f"{self.name}__{looker_name}" 101 suggest_name = f"suggest__{view_name}" 102 103 category, name = [ 104 slug_to_title(v) for v in self._get_category_and_name(metric) 105 ] 106 view_label = f"{category} - {name}" 107 metric_hidden = "no" if metric.is_in_source() else "yes" 108 109 measures = [ 110 { 111 "name": "count", 112 "type": "sum", 113 "sql": "${value}", 114 "hidden": metric_hidden, 115 } 116 ] 117 118 if client_id_field is not None: 119 # client_id field is missing for pings with minimal Glean schema 120 measures.append( 121 { 122 "name": "client_count", 123 "type": "count_distinct", 124 "sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end", 125 "hidden": metric_hidden, 126 } 127 ) 128 129 join_view: Dict[str, Any] = { 130 "name": view_name, 131 "label": view_label, 132 "dimensions": [ 133 { 134 "name": "document_id", 135 "type": "string", 136 "sql": f"${{{self.name}.document_id}}", 137 "hidden": "yes", 138 }, 139 # labeled counters need a primary key that incorporates 140 # their labels, otherwise we get jumbled results: 141 # https://github.com/mozilla/lookml-generator/issues/171 142 { 143 "name": "document_label_id", 144 "type": "string", 145 "sql": f"${{{self.name}.document_id}}-${{label}}", 146 "primary_key": "yes", 147 "hidden": "yes", 148 }, 149 { 150 "name": "value", 151 "type": "number", 152 "sql": "${TABLE}.value", 153 "hidden": "yes", 154 }, 155 ], 156 "measures": measures, 157 } 158 159 if looker_name in SUGGESTS_FOR_LABELED_COUNTERS: 160 join_view["dimensions"].append( 161 { 162 "name": "label", 163 "type": "string", 164 "sql": "${TABLE}.key", 165 "suggest_explore": suggest_name, 166 "suggest_dimension": f"{suggest_name}.key", 167 "hidden": metric_hidden, 168 }, 169 ) 170 171 suggest_view = { 172 "name": suggest_name, 173 "derived_table": { 174 "sql": dedent( 175 f""" 176 select 177 m.key, 178 count(*) as n 179 from {table} as t, 180 unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m 181 where date(submission_timestamp) > date_sub(current_date, interval 30 day) 182 and sample_id = 0 183 group by key 184 order by n desc 185 """ 186 ) 187 }, 188 "dimensions": [ 189 {"name": "key", "type": "string", "sql": "${TABLE}.key"} 190 ], 191 } 192 view_definitions += [join_view, suggest_view] 193 else: 194 join_view["dimensions"].append( 195 { 196 "name": "label", 197 "type": "string", 198 "sql": "${TABLE}.key", 199 "hidden": metric_hidden, 200 }, 201 ) 202 view_definitions += [join_view] 203 204 # deduplicate view definitions, because somehow a few entries make it in 205 # twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure 206 view_definitions = sorted( 207 {v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"] # type: ignore 208 ) 209 210 [project, dataset, table] = table.split(".") 211 table_schema = dryrun.create( 212 project=project, 213 dataset=dataset, 214 table=table, 215 ).get_table_schema() 216 nested_views = lookml_utils._generate_nested_dimension_views( 217 table_schema, self.name 218 ) 219 220 lookml["views"] += view_definitions + nested_views 221 222 return lookml 223 224 def _get_links(self, dimension: dict) -> List[Dict[str, str]]: 225 """Get a link annotation given a metric name.""" 226 name = self._get_name(dimension) 227 title = slug_to_title(name) 228 return [ 229 { 230 "label": (f"Glean Dictionary reference for {title}"), 231 "url": ( 232 f"https://dictionary.telemetry.mozilla.org" 233 f"/apps/{self.namespace}/metrics/{name}" 234 ), 235 "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png", 236 } 237 ] 238 239 def _get_name(self, dimension: dict) -> str: 240 return dimension["name"].split("__")[-1] 241 242 def _get_metric_type(self, dimension: dict) -> str: 243 return dimension["name"].split("__")[1] 244 245 def _is_metric(self, dimension) -> bool: 246 return dimension["name"].startswith("metrics__") 247 248 def _get_glean_metrics(self, v1_name: Optional[str]) -> List[GleanProbe]: 249 if v1_name is None: 250 logging.error( 251 f"Error: Missing v1 name for ping {self.name} in namespace {self.namespace}" 252 ) 253 return [] 254 255 repo = next((r for r in GleanPing.get_repos() if r["name"] == v1_name)) 256 glean_app = GleanPing(repo) 257 258 ping_probes = [] 259 probe_ids = set() 260 for probe in glean_app.get_probes(): 261 send_in_pings_snakecase = [ 262 ping.replace("-", "_") for ping in probe.definition["send_in_pings"] 263 ] 264 if self.name not in send_in_pings_snakecase: 265 continue 266 if probe.id in probe_ids: 267 # Some ids are duplicated, ignore them 268 continue 269 270 ping_probes.append(probe) 271 probe_ids.add(probe.id) 272 273 return ping_probes 274 275 def _get_category_and_name(self, metric: GleanProbe) -> Tuple[str, str]: 276 *category, name = metric.id.split(".") 277 category = "_".join(category) 278 279 return category, name 280 281 def _to_looker_name(self, metric: GleanProbe, suffix: str = "") -> str: 282 """Convert a glean probe into a looker name.""" 283 category, name = self._get_category_and_name(metric) 284 285 sep = "" if not category else "_" 286 label = name 287 looker_name = f"metrics__{metric.type}__{category}{sep}{label}" 288 if suffix: 289 looker_name = f"{looker_name}__{suffix}" 290 return looker_name 291 292 def _make_dimension( 293 self, metric: GleanProbe, suffix: str, sql_map: Dict[str, Dict[str, str]] 294 ) -> Optional[Dict[str, Union[str, List[Dict[str, str]]]]]: 295 *category, name = metric.id.split(".") 296 category = "_".join(category) 297 298 sep = "" if not category else "_" 299 label = name 300 type = RENAMED_METRIC_TYPES.get(metric.type, metric.type) 301 looker_name = f"metrics__{type}__{category}{sep}{name}" 302 if suffix: 303 label = f"{name}_{suffix}" 304 looker_name = f"{looker_name}__{suffix}" 305 306 if looker_name not in sql_map: 307 return None 308 309 group_label = slug_to_title(category) 310 group_item_label = slug_to_title(label) 311 312 if not group_label: 313 group_label = "Glean" 314 315 friendly_name = f"{group_label} {group_item_label}" 316 317 lookml = { 318 "name": looker_name, 319 "label": friendly_name, 320 # metrics that are no longer in the source are hidden by default 321 "hidden": "no" if metric.is_in_source() else "yes", 322 "sql": sql_map[looker_name]["sql"], 323 "type": sql_map[looker_name]["type"], 324 "group_label": group_label, 325 "group_item_label": group_item_label, 326 "links": [ 327 { 328 "label": (f"Glean Dictionary reference for {friendly_name}"), 329 "url": ( 330 f"https://dictionary.telemetry.mozilla.org" 331 f"/apps/{self.namespace}/metrics/{category}{sep}{name}" 332 ), 333 "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png", 334 }, 335 ], 336 } 337 338 if lookml["type"] == "time": 339 # Remove any _{type} suffix from the dimension group name because each timeframe 340 # will add a _{type} suffix to its individual dimension name. 341 lookml["name"] = re.sub("_(date|time(stamp)?)$", "", looker_name) 342 lookml["timeframes"] = [ 343 "raw", 344 "time", 345 "date", 346 "week", 347 "month", 348 "quarter", 349 "year", 350 ] 351 # Dimension groups should not be nested (see issue #82). 352 del lookml["group_label"] 353 del lookml["group_item_label"] 354 # Links are not supported for dimension groups. 355 del lookml["links"] 356 357 # remove some elements from the definition if we're handling a labeled 358 # counter, as an initial join dimension 359 if metric.type == "labeled_counter": 360 # this field is not used since labeled counters are maps 361 del lookml["type"] 362 lookml["hidden"] = "yes" 363 364 if metric.description: 365 lookml["description"] = metric.description 366 367 return lookml 368 369 def _get_metric_dimensions( 370 self, metric: GleanProbe, sql_map: Dict[str, Dict[str, str]] 371 ) -> Iterable[Optional[Dict[str, Union[str, List[Dict[str, str]]]]]]: 372 if metric.type == "rate": 373 for suffix in ("numerator", "denominator"): 374 yield self._make_dimension(metric, suffix, sql_map) 375 elif metric.type in DISTRIBUTION_TYPES: 376 yield self._make_dimension(metric, "sum", sql_map) 377 elif metric.type == "timespan": 378 yield self._make_dimension(metric, "value", sql_map) 379 elif metric.type in ALLOWED_TYPES: 380 yield self._make_dimension(metric, "", sql_map) 381 382 def _get_glean_metric_dimensions( 383 self, all_fields: List[dict], v1_name: Optional[str] 384 ): 385 sql_map = { 386 f["name"]: {"sql": f["sql"], "type": f.get("type", "string")} 387 for f in all_fields 388 } 389 metrics = self._get_glean_metrics(v1_name) 390 return [ 391 dimension 392 for metric in metrics 393 for dimension in self._get_metric_dimensions(metric, sql_map) 394 if dimension is not None 395 ] 396 397 def _add_link(self, dimension): 398 annotations = {} 399 if self._is_metric(dimension) and not self._get_metric_type( 400 dimension 401 ).startswith("labeled"): 402 annotations["links"] = self._get_links(dimension) 403 404 return dict(dimension, **annotations) 405 406 def get_dimensions( 407 self, table, v1_name: Optional[str], dryrun 408 ) -> List[Dict[str, Any]]: 409 """Get the set of dimensions for this view.""" 410 all_fields = super().get_dimensions(table, v1_name, dryrun=dryrun) 411 fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [ 412 self._add_link(d) 413 for d in all_fields 414 if not d["name"].startswith("metrics__") 415 ] 416 # later entries will override earlier entries, if there are duplicates 417 field_dict = {f["name"]: f for f in fields} 418 return list(field_dict.values()) 419 420 def get_measures( 421 self, dimensions: List[dict], table: str, v1_name: Optional[str] 422 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 423 """Generate measures from a list of dimensions. 424 425 When no dimension-specific measures are found, return a single "count" measure. 426 427 Raise ClickException if dimensions result in duplicate measures. 428 """ 429 measures = super().get_measures(dimensions, table, v1_name) 430 client_id_field = self.get_client_id(dimensions, table) 431 432 for dimension in dimensions: 433 if ( 434 self._is_metric(dimension) 435 and self._get_metric_type(dimension) == "counter" 436 ): 437 # handle the counters in the metric ping 438 name = self._get_name(dimension) 439 dimension_name = dimension["name"] 440 measures += [ 441 { 442 "name": name, 443 "type": "sum", 444 "sql": f"${{{dimension_name}}}", 445 "links": self._get_links(dimension), 446 }, 447 ] 448 449 if client_id_field is not None: 450 measures += [ 451 { 452 "name": f"{name}_client_count", 453 "type": "count_distinct", 454 "filters": [{dimension_name: ">0"}], 455 "sql": f"${{{client_id_field}}}", 456 "links": self._get_links(dimension), 457 }, 458 ] 459 460 # check if there are any duplicate values 461 names = [measure["name"] for measure in measures] 462 duplicates = [k for k, v in Counter(names).items() if v > 1] 463 if duplicates: 464 raise click.ClickException( 465 f"duplicate measures {duplicates!r} for table {table!r}" 466 ) 467 468 return measures
DISTRIBUTION_TYPES =
{'custom_distribution', 'memory_distribution', 'timing_distribution'}
ALLOWED_TYPES =
{'string', 'text', 'labeled_boolean', 'custom_distribution', 'url', 'labeled_quantity', 'timing_distribution', 'quantity', 'datetime', 'uuid', 'rate', 'boolean', 'counter', 'labeled_counter', 'jwe', 'labeled_string', 'memory_distribution', 'timespan'}
RENAMED_METRIC_TYPES =
{'jwe': 'jwe2', 'text': 'text2', 'url': 'url2'}
DISALLOWED_PINGS =
{'events'}
SUGGESTS_FOR_LABELED_COUNTERS =
{'metrics__labeled_counter__glean_error_invalid_label'}
59class GleanPingView(PingView): 60 """A view on a ping table for an application using the Glean SDK.""" 61 62 type: str = "glean_ping_view" 63 allow_glean: bool = True 64 65 @classmethod 66 def from_db_views(klass, *args, **kwargs): 67 """Generate GleanPingViews from db views.""" 68 for view in super().from_db_views(*args, **kwargs): 69 if view.name not in DISALLOWED_PINGS: 70 yield view 71 72 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 73 """Generate LookML for this view. 74 75 The Glean views include a labeled metrics, which need to be joined 76 against the view in the explore. 77 """ 78 lookml = super().to_lookml(v1_name, dryrun=dryrun) 79 # ignore nested join views 80 lookml["views"] = [lookml["views"][0]] 81 82 # iterate over all of the glean metrics and generate views for unnested 83 # fields as necessary. Append them to the list of existing view 84 # definitions. 85 table = next( 86 (table for table in self.tables if table.get("channel") == "release"), 87 self.tables[0], 88 )["table"] 89 dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun) 90 dimension_names = {dimension["name"] for dimension in dimensions} 91 92 client_id_field = self.get_client_id(dimensions, table) 93 94 view_definitions = [] 95 metrics = self._get_glean_metrics(v1_name) 96 for metric in metrics: 97 looker_name = self._to_looker_name(metric) 98 if looker_name not in dimension_names: 99 continue # skip metrics with no matching dimension 100 if metric.type == "labeled_counter": 101 view_name = f"{self.name}__{looker_name}" 102 suggest_name = f"suggest__{view_name}" 103 104 category, name = [ 105 slug_to_title(v) for v in self._get_category_and_name(metric) 106 ] 107 view_label = f"{category} - {name}" 108 metric_hidden = "no" if metric.is_in_source() else "yes" 109 110 measures = [ 111 { 112 "name": "count", 113 "type": "sum", 114 "sql": "${value}", 115 "hidden": metric_hidden, 116 } 117 ] 118 119 if client_id_field is not None: 120 # client_id field is missing for pings with minimal Glean schema 121 measures.append( 122 { 123 "name": "client_count", 124 "type": "count_distinct", 125 "sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end", 126 "hidden": metric_hidden, 127 } 128 ) 129 130 join_view: Dict[str, Any] = { 131 "name": view_name, 132 "label": view_label, 133 "dimensions": [ 134 { 135 "name": "document_id", 136 "type": "string", 137 "sql": f"${{{self.name}.document_id}}", 138 "hidden": "yes", 139 }, 140 # labeled counters need a primary key that incorporates 141 # their labels, otherwise we get jumbled results: 142 # https://github.com/mozilla/lookml-generator/issues/171 143 { 144 "name": "document_label_id", 145 "type": "string", 146 "sql": f"${{{self.name}.document_id}}-${{label}}", 147 "primary_key": "yes", 148 "hidden": "yes", 149 }, 150 { 151 "name": "value", 152 "type": "number", 153 "sql": "${TABLE}.value", 154 "hidden": "yes", 155 }, 156 ], 157 "measures": measures, 158 } 159 160 if looker_name in SUGGESTS_FOR_LABELED_COUNTERS: 161 join_view["dimensions"].append( 162 { 163 "name": "label", 164 "type": "string", 165 "sql": "${TABLE}.key", 166 "suggest_explore": suggest_name, 167 "suggest_dimension": f"{suggest_name}.key", 168 "hidden": metric_hidden, 169 }, 170 ) 171 172 suggest_view = { 173 "name": suggest_name, 174 "derived_table": { 175 "sql": dedent( 176 f""" 177 select 178 m.key, 179 count(*) as n 180 from {table} as t, 181 unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m 182 where date(submission_timestamp) > date_sub(current_date, interval 30 day) 183 and sample_id = 0 184 group by key 185 order by n desc 186 """ 187 ) 188 }, 189 "dimensions": [ 190 {"name": "key", "type": "string", "sql": "${TABLE}.key"} 191 ], 192 } 193 view_definitions += [join_view, suggest_view] 194 else: 195 join_view["dimensions"].append( 196 { 197 "name": "label", 198 "type": "string", 199 "sql": "${TABLE}.key", 200 "hidden": metric_hidden, 201 }, 202 ) 203 view_definitions += [join_view] 204 205 # deduplicate view definitions, because somehow a few entries make it in 206 # twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure 207 view_definitions = sorted( 208 {v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"] # type: ignore 209 ) 210 211 [project, dataset, table] = table.split(".") 212 table_schema = dryrun.create( 213 project=project, 214 dataset=dataset, 215 table=table, 216 ).get_table_schema() 217 nested_views = lookml_utils._generate_nested_dimension_views( 218 table_schema, self.name 219 ) 220 221 lookml["views"] += view_definitions + nested_views 222 223 return lookml 224 225 def _get_links(self, dimension: dict) -> List[Dict[str, str]]: 226 """Get a link annotation given a metric name.""" 227 name = self._get_name(dimension) 228 title = slug_to_title(name) 229 return [ 230 { 231 "label": (f"Glean Dictionary reference for {title}"), 232 "url": ( 233 f"https://dictionary.telemetry.mozilla.org" 234 f"/apps/{self.namespace}/metrics/{name}" 235 ), 236 "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png", 237 } 238 ] 239 240 def _get_name(self, dimension: dict) -> str: 241 return dimension["name"].split("__")[-1] 242 243 def _get_metric_type(self, dimension: dict) -> str: 244 return dimension["name"].split("__")[1] 245 246 def _is_metric(self, dimension) -> bool: 247 return dimension["name"].startswith("metrics__") 248 249 def _get_glean_metrics(self, v1_name: Optional[str]) -> List[GleanProbe]: 250 if v1_name is None: 251 logging.error( 252 f"Error: Missing v1 name for ping {self.name} in namespace {self.namespace}" 253 ) 254 return [] 255 256 repo = next((r for r in GleanPing.get_repos() if r["name"] == v1_name)) 257 glean_app = GleanPing(repo) 258 259 ping_probes = [] 260 probe_ids = set() 261 for probe in glean_app.get_probes(): 262 send_in_pings_snakecase = [ 263 ping.replace("-", "_") for ping in probe.definition["send_in_pings"] 264 ] 265 if self.name not in send_in_pings_snakecase: 266 continue 267 if probe.id in probe_ids: 268 # Some ids are duplicated, ignore them 269 continue 270 271 ping_probes.append(probe) 272 probe_ids.add(probe.id) 273 274 return ping_probes 275 276 def _get_category_and_name(self, metric: GleanProbe) -> Tuple[str, str]: 277 *category, name = metric.id.split(".") 278 category = "_".join(category) 279 280 return category, name 281 282 def _to_looker_name(self, metric: GleanProbe, suffix: str = "") -> str: 283 """Convert a glean probe into a looker name.""" 284 category, name = self._get_category_and_name(metric) 285 286 sep = "" if not category else "_" 287 label = name 288 looker_name = f"metrics__{metric.type}__{category}{sep}{label}" 289 if suffix: 290 looker_name = f"{looker_name}__{suffix}" 291 return looker_name 292 293 def _make_dimension( 294 self, metric: GleanProbe, suffix: str, sql_map: Dict[str, Dict[str, str]] 295 ) -> Optional[Dict[str, Union[str, List[Dict[str, str]]]]]: 296 *category, name = metric.id.split(".") 297 category = "_".join(category) 298 299 sep = "" if not category else "_" 300 label = name 301 type = RENAMED_METRIC_TYPES.get(metric.type, metric.type) 302 looker_name = f"metrics__{type}__{category}{sep}{name}" 303 if suffix: 304 label = f"{name}_{suffix}" 305 looker_name = f"{looker_name}__{suffix}" 306 307 if looker_name not in sql_map: 308 return None 309 310 group_label = slug_to_title(category) 311 group_item_label = slug_to_title(label) 312 313 if not group_label: 314 group_label = "Glean" 315 316 friendly_name = f"{group_label} {group_item_label}" 317 318 lookml = { 319 "name": looker_name, 320 "label": friendly_name, 321 # metrics that are no longer in the source are hidden by default 322 "hidden": "no" if metric.is_in_source() else "yes", 323 "sql": sql_map[looker_name]["sql"], 324 "type": sql_map[looker_name]["type"], 325 "group_label": group_label, 326 "group_item_label": group_item_label, 327 "links": [ 328 { 329 "label": (f"Glean Dictionary reference for {friendly_name}"), 330 "url": ( 331 f"https://dictionary.telemetry.mozilla.org" 332 f"/apps/{self.namespace}/metrics/{category}{sep}{name}" 333 ), 334 "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png", 335 }, 336 ], 337 } 338 339 if lookml["type"] == "time": 340 # Remove any _{type} suffix from the dimension group name because each timeframe 341 # will add a _{type} suffix to its individual dimension name. 342 lookml["name"] = re.sub("_(date|time(stamp)?)$", "", looker_name) 343 lookml["timeframes"] = [ 344 "raw", 345 "time", 346 "date", 347 "week", 348 "month", 349 "quarter", 350 "year", 351 ] 352 # Dimension groups should not be nested (see issue #82). 353 del lookml["group_label"] 354 del lookml["group_item_label"] 355 # Links are not supported for dimension groups. 356 del lookml["links"] 357 358 # remove some elements from the definition if we're handling a labeled 359 # counter, as an initial join dimension 360 if metric.type == "labeled_counter": 361 # this field is not used since labeled counters are maps 362 del lookml["type"] 363 lookml["hidden"] = "yes" 364 365 if metric.description: 366 lookml["description"] = metric.description 367 368 return lookml 369 370 def _get_metric_dimensions( 371 self, metric: GleanProbe, sql_map: Dict[str, Dict[str, str]] 372 ) -> Iterable[Optional[Dict[str, Union[str, List[Dict[str, str]]]]]]: 373 if metric.type == "rate": 374 for suffix in ("numerator", "denominator"): 375 yield self._make_dimension(metric, suffix, sql_map) 376 elif metric.type in DISTRIBUTION_TYPES: 377 yield self._make_dimension(metric, "sum", sql_map) 378 elif metric.type == "timespan": 379 yield self._make_dimension(metric, "value", sql_map) 380 elif metric.type in ALLOWED_TYPES: 381 yield self._make_dimension(metric, "", sql_map) 382 383 def _get_glean_metric_dimensions( 384 self, all_fields: List[dict], v1_name: Optional[str] 385 ): 386 sql_map = { 387 f["name"]: {"sql": f["sql"], "type": f.get("type", "string")} 388 for f in all_fields 389 } 390 metrics = self._get_glean_metrics(v1_name) 391 return [ 392 dimension 393 for metric in metrics 394 for dimension in self._get_metric_dimensions(metric, sql_map) 395 if dimension is not None 396 ] 397 398 def _add_link(self, dimension): 399 annotations = {} 400 if self._is_metric(dimension) and not self._get_metric_type( 401 dimension 402 ).startswith("labeled"): 403 annotations["links"] = self._get_links(dimension) 404 405 return dict(dimension, **annotations) 406 407 def get_dimensions( 408 self, table, v1_name: Optional[str], dryrun 409 ) -> List[Dict[str, Any]]: 410 """Get the set of dimensions for this view.""" 411 all_fields = super().get_dimensions(table, v1_name, dryrun=dryrun) 412 fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [ 413 self._add_link(d) 414 for d in all_fields 415 if not d["name"].startswith("metrics__") 416 ] 417 # later entries will override earlier entries, if there are duplicates 418 field_dict = {f["name"]: f for f in fields} 419 return list(field_dict.values()) 420 421 def get_measures( 422 self, dimensions: List[dict], table: str, v1_name: Optional[str] 423 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 424 """Generate measures from a list of dimensions. 425 426 When no dimension-specific measures are found, return a single "count" measure. 427 428 Raise ClickException if dimensions result in duplicate measures. 429 """ 430 measures = super().get_measures(dimensions, table, v1_name) 431 client_id_field = self.get_client_id(dimensions, table) 432 433 for dimension in dimensions: 434 if ( 435 self._is_metric(dimension) 436 and self._get_metric_type(dimension) == "counter" 437 ): 438 # handle the counters in the metric ping 439 name = self._get_name(dimension) 440 dimension_name = dimension["name"] 441 measures += [ 442 { 443 "name": name, 444 "type": "sum", 445 "sql": f"${{{dimension_name}}}", 446 "links": self._get_links(dimension), 447 }, 448 ] 449 450 if client_id_field is not None: 451 measures += [ 452 { 453 "name": f"{name}_client_count", 454 "type": "count_distinct", 455 "filters": [{dimension_name: ">0"}], 456 "sql": f"${{{client_id_field}}}", 457 "links": self._get_links(dimension), 458 }, 459 ] 460 461 # check if there are any duplicate values 462 names = [measure["name"] for measure in measures] 463 duplicates = [k for k, v in Counter(names).items() if v > 1] 464 if duplicates: 465 raise click.ClickException( 466 f"duplicate measures {duplicates!r} for table {table!r}" 467 ) 468 469 return measures
A view on a ping table for an application using the Glean SDK.
@classmethod
def
from_db_views(klass, *args, **kwargs):
65 @classmethod 66 def from_db_views(klass, *args, **kwargs): 67 """Generate GleanPingViews from db views.""" 68 for view in super().from_db_views(*args, **kwargs): 69 if view.name not in DISALLOWED_PINGS: 70 yield view
Generate GleanPingViews from db views.
def
to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
72 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 73 """Generate LookML for this view. 74 75 The Glean views include a labeled metrics, which need to be joined 76 against the view in the explore. 77 """ 78 lookml = super().to_lookml(v1_name, dryrun=dryrun) 79 # ignore nested join views 80 lookml["views"] = [lookml["views"][0]] 81 82 # iterate over all of the glean metrics and generate views for unnested 83 # fields as necessary. Append them to the list of existing view 84 # definitions. 85 table = next( 86 (table for table in self.tables if table.get("channel") == "release"), 87 self.tables[0], 88 )["table"] 89 dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun) 90 dimension_names = {dimension["name"] for dimension in dimensions} 91 92 client_id_field = self.get_client_id(dimensions, table) 93 94 view_definitions = [] 95 metrics = self._get_glean_metrics(v1_name) 96 for metric in metrics: 97 looker_name = self._to_looker_name(metric) 98 if looker_name not in dimension_names: 99 continue # skip metrics with no matching dimension 100 if metric.type == "labeled_counter": 101 view_name = f"{self.name}__{looker_name}" 102 suggest_name = f"suggest__{view_name}" 103 104 category, name = [ 105 slug_to_title(v) for v in self._get_category_and_name(metric) 106 ] 107 view_label = f"{category} - {name}" 108 metric_hidden = "no" if metric.is_in_source() else "yes" 109 110 measures = [ 111 { 112 "name": "count", 113 "type": "sum", 114 "sql": "${value}", 115 "hidden": metric_hidden, 116 } 117 ] 118 119 if client_id_field is not None: 120 # client_id field is missing for pings with minimal Glean schema 121 measures.append( 122 { 123 "name": "client_count", 124 "type": "count_distinct", 125 "sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end", 126 "hidden": metric_hidden, 127 } 128 ) 129 130 join_view: Dict[str, Any] = { 131 "name": view_name, 132 "label": view_label, 133 "dimensions": [ 134 { 135 "name": "document_id", 136 "type": "string", 137 "sql": f"${{{self.name}.document_id}}", 138 "hidden": "yes", 139 }, 140 # labeled counters need a primary key that incorporates 141 # their labels, otherwise we get jumbled results: 142 # https://github.com/mozilla/lookml-generator/issues/171 143 { 144 "name": "document_label_id", 145 "type": "string", 146 "sql": f"${{{self.name}.document_id}}-${{label}}", 147 "primary_key": "yes", 148 "hidden": "yes", 149 }, 150 { 151 "name": "value", 152 "type": "number", 153 "sql": "${TABLE}.value", 154 "hidden": "yes", 155 }, 156 ], 157 "measures": measures, 158 } 159 160 if looker_name in SUGGESTS_FOR_LABELED_COUNTERS: 161 join_view["dimensions"].append( 162 { 163 "name": "label", 164 "type": "string", 165 "sql": "${TABLE}.key", 166 "suggest_explore": suggest_name, 167 "suggest_dimension": f"{suggest_name}.key", 168 "hidden": metric_hidden, 169 }, 170 ) 171 172 suggest_view = { 173 "name": suggest_name, 174 "derived_table": { 175 "sql": dedent( 176 f""" 177 select 178 m.key, 179 count(*) as n 180 from {table} as t, 181 unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m 182 where date(submission_timestamp) > date_sub(current_date, interval 30 day) 183 and sample_id = 0 184 group by key 185 order by n desc 186 """ 187 ) 188 }, 189 "dimensions": [ 190 {"name": "key", "type": "string", "sql": "${TABLE}.key"} 191 ], 192 } 193 view_definitions += [join_view, suggest_view] 194 else: 195 join_view["dimensions"].append( 196 { 197 "name": "label", 198 "type": "string", 199 "sql": "${TABLE}.key", 200 "hidden": metric_hidden, 201 }, 202 ) 203 view_definitions += [join_view] 204 205 # deduplicate view definitions, because somehow a few entries make it in 206 # twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure 207 view_definitions = sorted( 208 {v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"] # type: ignore 209 ) 210 211 [project, dataset, table] = table.split(".") 212 table_schema = dryrun.create( 213 project=project, 214 dataset=dataset, 215 table=table, 216 ).get_table_schema() 217 nested_views = lookml_utils._generate_nested_dimension_views( 218 table_schema, self.name 219 ) 220 221 lookml["views"] += view_definitions + nested_views 222 223 return lookml
Generate LookML for this view.
The Glean views include a labeled metrics, which need to be joined against the view in the explore.
def
get_dimensions(self, table, v1_name: Optional[str], dryrun) -> List[Dict[str, Any]]:
407 def get_dimensions( 408 self, table, v1_name: Optional[str], dryrun 409 ) -> List[Dict[str, Any]]: 410 """Get the set of dimensions for this view.""" 411 all_fields = super().get_dimensions(table, v1_name, dryrun=dryrun) 412 fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [ 413 self._add_link(d) 414 for d in all_fields 415 if not d["name"].startswith("metrics__") 416 ] 417 # later entries will override earlier entries, if there are duplicates 418 field_dict = {f["name"]: f for f in fields} 419 return list(field_dict.values())
Get the set of dimensions for this view.
def
get_measures( self, dimensions: List[dict], table: str, v1_name: Optional[str]) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
421 def get_measures( 422 self, dimensions: List[dict], table: str, v1_name: Optional[str] 423 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 424 """Generate measures from a list of dimensions. 425 426 When no dimension-specific measures are found, return a single "count" measure. 427 428 Raise ClickException if dimensions result in duplicate measures. 429 """ 430 measures = super().get_measures(dimensions, table, v1_name) 431 client_id_field = self.get_client_id(dimensions, table) 432 433 for dimension in dimensions: 434 if ( 435 self._is_metric(dimension) 436 and self._get_metric_type(dimension) == "counter" 437 ): 438 # handle the counters in the metric ping 439 name = self._get_name(dimension) 440 dimension_name = dimension["name"] 441 measures += [ 442 { 443 "name": name, 444 "type": "sum", 445 "sql": f"${{{dimension_name}}}", 446 "links": self._get_links(dimension), 447 }, 448 ] 449 450 if client_id_field is not None: 451 measures += [ 452 { 453 "name": f"{name}_client_count", 454 "type": "count_distinct", 455 "filters": [{dimension_name: ">0"}], 456 "sql": f"${{{client_id_field}}}", 457 "links": self._get_links(dimension), 458 }, 459 ] 460 461 # check if there are any duplicate values 462 names = [measure["name"] for measure in measures] 463 duplicates = [k for k, v in Counter(names).items() if v > 1] 464 if duplicates: 465 raise click.ClickException( 466 f"duplicate measures {duplicates!r} for table {table!r}" 467 ) 468 469 return measures
Generate measures from a list of dimensions.
When no dimension-specific measures are found, return a single "count" measure.
Raise ClickException if dimensions result in duplicate measures.