generator.views.glean_ping_view
Class to describe a Glean Ping View.
1"""Class to describe a Glean Ping View.""" 2 3import logging 4import re 5from collections import Counter 6from textwrap import dedent 7from typing import Any, Dict, Iterable, List, Optional, Tuple, Union 8 9import click 10from mozilla_schema_generator.glean_ping import GleanPing 11from mozilla_schema_generator.probes import GleanProbe 12 13from . import lookml_utils 14from .lookml_utils import slug_to_title 15from .ping_view import PingView 16 17DISTRIBUTION_TYPES = { 18 "timing_distribution", 19 "memory_distribution", 20 "custom_distribution", 21} 22 23 24ALLOWED_TYPES = DISTRIBUTION_TYPES | { 25 "boolean", 26 "labeled_boolean", 27 "counter", 28 "labeled_counter", 29 "datetime", 30 "jwe", 31 "quantity", 32 "string", 33 "labeled_string", 34 "rate", 35 "timespan", 36 "uuid", 37 "url", 38 "text", 39} 40 41# Bug 1737656 - some metric types are exposed under different names 42# We need to map to the new name when building dimensions. 43RENAMED_METRIC_TYPES = { 44 "jwe": "jwe2", 45 "text": "text2", 46 "url": "url2", 47} 48 49 50DISALLOWED_PINGS = {"events"} 51 52# List of labeled counter names for which a suggest explore should be generated. 53# Generating suggest explores for all labeled counters slows down Looker. 54SUGGESTS_FOR_LABELED_COUNTERS = {"metrics__labeled_counter__glean_error_invalid_label"} 55 56 57class GleanPingView(PingView): 58 """A view on a ping table for an application using the Glean SDK.""" 59 60 type: str = "glean_ping_view" 61 allow_glean: bool = True 62 63 @classmethod 64 def from_db_views(klass, *args, **kwargs): 65 """Generate GleanPingViews from db views.""" 66 for view in super().from_db_views(*args, **kwargs): 67 if view.name not in DISALLOWED_PINGS: 68 yield view 69 70 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 71 """Generate LookML for this view. 72 73 The Glean views include a labeled metrics, which need to be joined 74 against the view in the explore. 75 """ 76 lookml = super().to_lookml(v1_name, dryrun=dryrun) 77 # ignore nested join views 78 lookml["views"] = [lookml["views"][0]] 79 80 # iterate over all of the glean metrics and generate views for unnested 81 # fields as necessary. Append them to the list of existing view 82 # definitions. 83 table = next( 84 (table for table in self.tables if table.get("channel") == "release"), 85 self.tables[0], 86 )["table"] 87 dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun) 88 dimension_names = {dimension["name"] for dimension in dimensions} 89 90 client_id_field = self.get_client_id(dimensions, table) 91 92 view_definitions = [] 93 metrics = self._get_glean_metrics(v1_name) 94 for metric in metrics: 95 looker_name = self._to_looker_name(metric) 96 if looker_name not in dimension_names: 97 continue # skip metrics with no matching dimension 98 if metric.type == "labeled_counter": 99 view_name = f"{self.name}__{looker_name}" 100 suggest_name = f"suggest__{view_name}" 101 102 category, name = [ 103 slug_to_title(v) for v in self._get_category_and_name(metric) 104 ] 105 view_label = f"{category} - {name}" 106 metric_hidden = "no" if metric.is_in_source() else "yes" 107 108 measures = [ 109 { 110 "name": "count", 111 "type": "sum", 112 "sql": "${value}", 113 "hidden": metric_hidden, 114 } 115 ] 116 117 if client_id_field is not None: 118 # client_id field is missing for pings with minimal Glean schema 119 measures.append( 120 { 121 "name": "client_count", 122 "type": "count_distinct", 123 "sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end", 124 "hidden": metric_hidden, 125 } 126 ) 127 128 join_view: Dict[str, Any] = { 129 "name": view_name, 130 "label": view_label, 131 "dimensions": [ 132 { 133 "name": "document_id", 134 "type": "string", 135 "sql": f"${{{self.name}.document_id}}", 136 "hidden": "yes", 137 }, 138 # labeled counters need a primary key that incorporates 139 # their labels, otherwise we get jumbled results: 140 # https://github.com/mozilla/lookml-generator/issues/171 141 { 142 "name": "document_label_id", 143 "type": "string", 144 "sql": f"${{{self.name}.document_id}}-${{label}}", 145 "primary_key": "yes", 146 "hidden": "yes", 147 }, 148 { 149 "name": "value", 150 "type": "number", 151 "sql": "${TABLE}.value", 152 "hidden": "yes", 153 }, 154 ], 155 "measures": measures, 156 } 157 158 if looker_name in SUGGESTS_FOR_LABELED_COUNTERS: 159 join_view["dimensions"].append( 160 { 161 "name": "label", 162 "type": "string", 163 "sql": "${TABLE}.key", 164 "suggest_explore": suggest_name, 165 "suggest_dimension": f"{suggest_name}.key", 166 "hidden": metric_hidden, 167 }, 168 ) 169 170 suggest_view = { 171 "name": suggest_name, 172 "derived_table": { 173 "sql": dedent( 174 f""" 175 select 176 m.key, 177 count(*) as n 178 from {table} as t, 179 unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m 180 where date(submission_timestamp) > date_sub(current_date, interval 30 day) 181 and sample_id = 0 182 group by key 183 order by n desc 184 """ 185 ) 186 }, 187 "dimensions": [ 188 {"name": "key", "type": "string", "sql": "${TABLE}.key"} 189 ], 190 } 191 view_definitions += [join_view, suggest_view] 192 else: 193 join_view["dimensions"].append( 194 { 195 "name": "label", 196 "type": "string", 197 "sql": "${TABLE}.key", 198 "hidden": metric_hidden, 199 }, 200 ) 201 view_definitions += [join_view] 202 203 # deduplicate view definitions, because somehow a few entries make it in 204 # twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure 205 view_definitions = sorted( 206 {v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"] # type: ignore 207 ) 208 209 [project, dataset, table] = table.split(".") 210 table_schema = dryrun.create( 211 project=project, 212 dataset=dataset, 213 table=table, 214 ).get_table_schema() 215 nested_views = lookml_utils._generate_nested_dimension_views( 216 table_schema, self.name 217 ) 218 219 lookml["views"] += view_definitions + nested_views 220 221 return lookml 222 223 def _get_links(self, dimension: dict) -> List[Dict[str, str]]: 224 """Get a link annotation given a metric name.""" 225 name = self._get_name(dimension) 226 title = slug_to_title(name) 227 return [ 228 { 229 "label": (f"Glean Dictionary reference for {title}"), 230 "url": ( 231 f"https://dictionary.telemetry.mozilla.org" 232 f"/apps/{self.namespace}/metrics/{name}" 233 ), 234 "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png", 235 } 236 ] 237 238 def _get_name(self, dimension: dict) -> str: 239 return dimension["name"].split("__")[-1] 240 241 def _get_metric_type(self, dimension: dict) -> str: 242 return dimension["name"].split("__")[1] 243 244 def _is_metric(self, dimension) -> bool: 245 return dimension["name"].startswith("metrics__") 246 247 def _get_glean_metrics(self, v1_name: Optional[str]) -> List[GleanProbe]: 248 if v1_name is None: 249 logging.error( 250 f"Error: Missing v1 name for ping {self.name} in namespace {self.namespace}" 251 ) 252 return [] 253 254 repo = next((r for r in GleanPing.get_repos() if r["name"] == v1_name)) 255 glean_app = GleanPing(repo) 256 257 ping_probes = [] 258 probe_ids = set() 259 for probe in glean_app.get_probes(): 260 send_in_pings_snakecase = [ 261 ping.replace("-", "_") for ping in probe.definition["send_in_pings"] 262 ] 263 if self.name not in send_in_pings_snakecase: 264 continue 265 if probe.id in probe_ids: 266 # Some ids are duplicated, ignore them 267 continue 268 269 ping_probes.append(probe) 270 probe_ids.add(probe.id) 271 272 return ping_probes 273 274 def _get_category_and_name(self, metric: GleanProbe) -> Tuple[str, str]: 275 *category, name = metric.id.split(".") 276 category = "_".join(category) 277 278 return category, name 279 280 def _to_looker_name(self, metric: GleanProbe, suffix: str = "") -> str: 281 """Convert a glean probe into a looker name.""" 282 category, name = self._get_category_and_name(metric) 283 284 sep = "" if not category else "_" 285 label = name 286 looker_name = f"metrics__{metric.type}__{category}{sep}{label}" 287 if suffix: 288 looker_name = f"{looker_name}__{suffix}" 289 return looker_name 290 291 def _make_dimension( 292 self, metric: GleanProbe, suffix: str, sql_map: Dict[str, Dict[str, str]] 293 ) -> Optional[Dict[str, Union[str, List[Dict[str, str]]]]]: 294 *category, name = metric.id.split(".") 295 category = "_".join(category) 296 297 sep = "" if not category else "_" 298 label = name 299 type = RENAMED_METRIC_TYPES.get(metric.type, metric.type) 300 looker_name = f"metrics__{type}__{category}{sep}{name}" 301 if suffix: 302 label = f"{name}_{suffix}" 303 looker_name = f"{looker_name}__{suffix}" 304 305 if looker_name not in sql_map: 306 return None 307 308 group_label = slug_to_title(category) 309 group_item_label = slug_to_title(label) 310 311 if not group_label: 312 group_label = "Glean" 313 314 friendly_name = f"{group_label} {group_item_label}" 315 316 lookml = { 317 "name": looker_name, 318 "label": friendly_name, 319 # metrics that are no longer in the source are hidden by default 320 "hidden": "no" if metric.is_in_source() else "yes", 321 "sql": sql_map[looker_name]["sql"], 322 "type": sql_map[looker_name]["type"], 323 "group_label": group_label, 324 "group_item_label": group_item_label, 325 "links": [ 326 { 327 "label": (f"Glean Dictionary reference for {friendly_name}"), 328 "url": ( 329 f"https://dictionary.telemetry.mozilla.org" 330 f"/apps/{self.namespace}/metrics/{category}{sep}{name}" 331 ), 332 "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png", 333 }, 334 ], 335 } 336 337 if lookml["type"] == "time": 338 # Remove any _{type} suffix from the dimension group name because each timeframe 339 # will add a _{type} suffix to its individual dimension name. 340 lookml["name"] = re.sub("_(date|time(stamp)?)$", "", looker_name) 341 lookml["timeframes"] = [ 342 "raw", 343 "time", 344 "date", 345 "week", 346 "month", 347 "quarter", 348 "year", 349 ] 350 # Dimension groups should not be nested (see issue #82). 351 del lookml["group_label"] 352 del lookml["group_item_label"] 353 # Links are not supported for dimension groups. 354 del lookml["links"] 355 356 # remove some elements from the definition if we're handling a labeled 357 # counter, as an initial join dimension 358 if metric.type == "labeled_counter": 359 # this field is not used since labeled counters are maps 360 del lookml["type"] 361 lookml["hidden"] = "yes" 362 363 if metric.description: 364 lookml["description"] = metric.description 365 366 return lookml 367 368 def _get_metric_dimensions( 369 self, metric: GleanProbe, sql_map: Dict[str, Dict[str, str]] 370 ) -> Iterable[Optional[Dict[str, Union[str, List[Dict[str, str]]]]]]: 371 if metric.type == "rate": 372 for suffix in ("numerator", "denominator"): 373 yield self._make_dimension(metric, suffix, sql_map) 374 elif metric.type in DISTRIBUTION_TYPES: 375 yield self._make_dimension(metric, "sum", sql_map) 376 elif metric.type == "timespan": 377 yield self._make_dimension(metric, "value", sql_map) 378 elif metric.type in ALLOWED_TYPES: 379 yield self._make_dimension(metric, "", sql_map) 380 381 def _get_glean_metric_dimensions( 382 self, all_fields: List[dict], v1_name: Optional[str] 383 ): 384 sql_map = { 385 f["name"]: {"sql": f["sql"], "type": f.get("type", "string")} 386 for f in all_fields 387 } 388 metrics = self._get_glean_metrics(v1_name) 389 return [ 390 dimension 391 for metric in metrics 392 for dimension in self._get_metric_dimensions(metric, sql_map) 393 if dimension is not None 394 ] 395 396 def _add_link(self, dimension): 397 annotations = {} 398 if self._is_metric(dimension) and not self._get_metric_type( 399 dimension 400 ).startswith("labeled"): 401 annotations["links"] = self._get_links(dimension) 402 403 return dict(dimension, **annotations) 404 405 def get_dimensions( 406 self, table, v1_name: Optional[str], dryrun 407 ) -> List[Dict[str, Any]]: 408 """Get the set of dimensions for this view.""" 409 all_fields = super().get_dimensions(table, v1_name, dryrun=dryrun) 410 fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [ 411 self._add_link(d) 412 for d in all_fields 413 if not d["name"].startswith("metrics__") 414 ] 415 # later entries will override earlier entries, if there are duplicates 416 field_dict = {f["name"]: f for f in fields} 417 return list(field_dict.values()) 418 419 def get_measures( 420 self, dimensions: List[dict], table: str, v1_name: Optional[str] 421 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 422 """Generate measures from a list of dimensions. 423 424 When no dimension-specific measures are found, return a single "count" measure. 425 426 Raise ClickException if dimensions result in duplicate measures. 427 """ 428 measures = super().get_measures(dimensions, table, v1_name) 429 client_id_field = self.get_client_id(dimensions, table) 430 431 for dimension in dimensions: 432 if ( 433 self._is_metric(dimension) 434 and self._get_metric_type(dimension) == "counter" 435 ): 436 # handle the counters in the metric ping 437 name = self._get_name(dimension) 438 dimension_name = dimension["name"] 439 measures += [ 440 { 441 "name": name, 442 "type": "sum", 443 "sql": f"${{{dimension_name}}}", 444 "links": self._get_links(dimension), 445 }, 446 ] 447 448 if client_id_field is not None: 449 measures += [ 450 { 451 "name": f"{name}_client_count", 452 "type": "count_distinct", 453 "filters": [{dimension_name: ">0"}], 454 "sql": f"${{{client_id_field}}}", 455 "links": self._get_links(dimension), 456 }, 457 ] 458 459 # check if there are any duplicate values 460 names = [measure["name"] for measure in measures] 461 duplicates = [k for k, v in Counter(names).items() if v > 1] 462 if duplicates: 463 raise click.ClickException( 464 f"duplicate measures {duplicates!r} for table {table!r}" 465 ) 466 467 return measures
DISTRIBUTION_TYPES =
{'timing_distribution', 'memory_distribution', 'custom_distribution'}
ALLOWED_TYPES =
{'rate', 'datetime', 'labeled_counter', 'jwe', 'labeled_boolean', 'labeled_string', 'timing_distribution', 'quantity', 'custom_distribution', 'boolean', 'counter', 'timespan', 'text', 'string', 'url', 'memory_distribution', 'uuid'}
RENAMED_METRIC_TYPES =
{'jwe': 'jwe2', 'text': 'text2', 'url': 'url2'}
DISALLOWED_PINGS =
{'events'}
SUGGESTS_FOR_LABELED_COUNTERS =
{'metrics__labeled_counter__glean_error_invalid_label'}
58class GleanPingView(PingView): 59 """A view on a ping table for an application using the Glean SDK.""" 60 61 type: str = "glean_ping_view" 62 allow_glean: bool = True 63 64 @classmethod 65 def from_db_views(klass, *args, **kwargs): 66 """Generate GleanPingViews from db views.""" 67 for view in super().from_db_views(*args, **kwargs): 68 if view.name not in DISALLOWED_PINGS: 69 yield view 70 71 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 72 """Generate LookML for this view. 73 74 The Glean views include a labeled metrics, which need to be joined 75 against the view in the explore. 76 """ 77 lookml = super().to_lookml(v1_name, dryrun=dryrun) 78 # ignore nested join views 79 lookml["views"] = [lookml["views"][0]] 80 81 # iterate over all of the glean metrics and generate views for unnested 82 # fields as necessary. Append them to the list of existing view 83 # definitions. 84 table = next( 85 (table for table in self.tables if table.get("channel") == "release"), 86 self.tables[0], 87 )["table"] 88 dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun) 89 dimension_names = {dimension["name"] for dimension in dimensions} 90 91 client_id_field = self.get_client_id(dimensions, table) 92 93 view_definitions = [] 94 metrics = self._get_glean_metrics(v1_name) 95 for metric in metrics: 96 looker_name = self._to_looker_name(metric) 97 if looker_name not in dimension_names: 98 continue # skip metrics with no matching dimension 99 if metric.type == "labeled_counter": 100 view_name = f"{self.name}__{looker_name}" 101 suggest_name = f"suggest__{view_name}" 102 103 category, name = [ 104 slug_to_title(v) for v in self._get_category_and_name(metric) 105 ] 106 view_label = f"{category} - {name}" 107 metric_hidden = "no" if metric.is_in_source() else "yes" 108 109 measures = [ 110 { 111 "name": "count", 112 "type": "sum", 113 "sql": "${value}", 114 "hidden": metric_hidden, 115 } 116 ] 117 118 if client_id_field is not None: 119 # client_id field is missing for pings with minimal Glean schema 120 measures.append( 121 { 122 "name": "client_count", 123 "type": "count_distinct", 124 "sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end", 125 "hidden": metric_hidden, 126 } 127 ) 128 129 join_view: Dict[str, Any] = { 130 "name": view_name, 131 "label": view_label, 132 "dimensions": [ 133 { 134 "name": "document_id", 135 "type": "string", 136 "sql": f"${{{self.name}.document_id}}", 137 "hidden": "yes", 138 }, 139 # labeled counters need a primary key that incorporates 140 # their labels, otherwise we get jumbled results: 141 # https://github.com/mozilla/lookml-generator/issues/171 142 { 143 "name": "document_label_id", 144 "type": "string", 145 "sql": f"${{{self.name}.document_id}}-${{label}}", 146 "primary_key": "yes", 147 "hidden": "yes", 148 }, 149 { 150 "name": "value", 151 "type": "number", 152 "sql": "${TABLE}.value", 153 "hidden": "yes", 154 }, 155 ], 156 "measures": measures, 157 } 158 159 if looker_name in SUGGESTS_FOR_LABELED_COUNTERS: 160 join_view["dimensions"].append( 161 { 162 "name": "label", 163 "type": "string", 164 "sql": "${TABLE}.key", 165 "suggest_explore": suggest_name, 166 "suggest_dimension": f"{suggest_name}.key", 167 "hidden": metric_hidden, 168 }, 169 ) 170 171 suggest_view = { 172 "name": suggest_name, 173 "derived_table": { 174 "sql": dedent( 175 f""" 176 select 177 m.key, 178 count(*) as n 179 from {table} as t, 180 unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m 181 where date(submission_timestamp) > date_sub(current_date, interval 30 day) 182 and sample_id = 0 183 group by key 184 order by n desc 185 """ 186 ) 187 }, 188 "dimensions": [ 189 {"name": "key", "type": "string", "sql": "${TABLE}.key"} 190 ], 191 } 192 view_definitions += [join_view, suggest_view] 193 else: 194 join_view["dimensions"].append( 195 { 196 "name": "label", 197 "type": "string", 198 "sql": "${TABLE}.key", 199 "hidden": metric_hidden, 200 }, 201 ) 202 view_definitions += [join_view] 203 204 # deduplicate view definitions, because somehow a few entries make it in 205 # twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure 206 view_definitions = sorted( 207 {v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"] # type: ignore 208 ) 209 210 [project, dataset, table] = table.split(".") 211 table_schema = dryrun.create( 212 project=project, 213 dataset=dataset, 214 table=table, 215 ).get_table_schema() 216 nested_views = lookml_utils._generate_nested_dimension_views( 217 table_schema, self.name 218 ) 219 220 lookml["views"] += view_definitions + nested_views 221 222 return lookml 223 224 def _get_links(self, dimension: dict) -> List[Dict[str, str]]: 225 """Get a link annotation given a metric name.""" 226 name = self._get_name(dimension) 227 title = slug_to_title(name) 228 return [ 229 { 230 "label": (f"Glean Dictionary reference for {title}"), 231 "url": ( 232 f"https://dictionary.telemetry.mozilla.org" 233 f"/apps/{self.namespace}/metrics/{name}" 234 ), 235 "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png", 236 } 237 ] 238 239 def _get_name(self, dimension: dict) -> str: 240 return dimension["name"].split("__")[-1] 241 242 def _get_metric_type(self, dimension: dict) -> str: 243 return dimension["name"].split("__")[1] 244 245 def _is_metric(self, dimension) -> bool: 246 return dimension["name"].startswith("metrics__") 247 248 def _get_glean_metrics(self, v1_name: Optional[str]) -> List[GleanProbe]: 249 if v1_name is None: 250 logging.error( 251 f"Error: Missing v1 name for ping {self.name} in namespace {self.namespace}" 252 ) 253 return [] 254 255 repo = next((r for r in GleanPing.get_repos() if r["name"] == v1_name)) 256 glean_app = GleanPing(repo) 257 258 ping_probes = [] 259 probe_ids = set() 260 for probe in glean_app.get_probes(): 261 send_in_pings_snakecase = [ 262 ping.replace("-", "_") for ping in probe.definition["send_in_pings"] 263 ] 264 if self.name not in send_in_pings_snakecase: 265 continue 266 if probe.id in probe_ids: 267 # Some ids are duplicated, ignore them 268 continue 269 270 ping_probes.append(probe) 271 probe_ids.add(probe.id) 272 273 return ping_probes 274 275 def _get_category_and_name(self, metric: GleanProbe) -> Tuple[str, str]: 276 *category, name = metric.id.split(".") 277 category = "_".join(category) 278 279 return category, name 280 281 def _to_looker_name(self, metric: GleanProbe, suffix: str = "") -> str: 282 """Convert a glean probe into a looker name.""" 283 category, name = self._get_category_and_name(metric) 284 285 sep = "" if not category else "_" 286 label = name 287 looker_name = f"metrics__{metric.type}__{category}{sep}{label}" 288 if suffix: 289 looker_name = f"{looker_name}__{suffix}" 290 return looker_name 291 292 def _make_dimension( 293 self, metric: GleanProbe, suffix: str, sql_map: Dict[str, Dict[str, str]] 294 ) -> Optional[Dict[str, Union[str, List[Dict[str, str]]]]]: 295 *category, name = metric.id.split(".") 296 category = "_".join(category) 297 298 sep = "" if not category else "_" 299 label = name 300 type = RENAMED_METRIC_TYPES.get(metric.type, metric.type) 301 looker_name = f"metrics__{type}__{category}{sep}{name}" 302 if suffix: 303 label = f"{name}_{suffix}" 304 looker_name = f"{looker_name}__{suffix}" 305 306 if looker_name not in sql_map: 307 return None 308 309 group_label = slug_to_title(category) 310 group_item_label = slug_to_title(label) 311 312 if not group_label: 313 group_label = "Glean" 314 315 friendly_name = f"{group_label} {group_item_label}" 316 317 lookml = { 318 "name": looker_name, 319 "label": friendly_name, 320 # metrics that are no longer in the source are hidden by default 321 "hidden": "no" if metric.is_in_source() else "yes", 322 "sql": sql_map[looker_name]["sql"], 323 "type": sql_map[looker_name]["type"], 324 "group_label": group_label, 325 "group_item_label": group_item_label, 326 "links": [ 327 { 328 "label": (f"Glean Dictionary reference for {friendly_name}"), 329 "url": ( 330 f"https://dictionary.telemetry.mozilla.org" 331 f"/apps/{self.namespace}/metrics/{category}{sep}{name}" 332 ), 333 "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png", 334 }, 335 ], 336 } 337 338 if lookml["type"] == "time": 339 # Remove any _{type} suffix from the dimension group name because each timeframe 340 # will add a _{type} suffix to its individual dimension name. 341 lookml["name"] = re.sub("_(date|time(stamp)?)$", "", looker_name) 342 lookml["timeframes"] = [ 343 "raw", 344 "time", 345 "date", 346 "week", 347 "month", 348 "quarter", 349 "year", 350 ] 351 # Dimension groups should not be nested (see issue #82). 352 del lookml["group_label"] 353 del lookml["group_item_label"] 354 # Links are not supported for dimension groups. 355 del lookml["links"] 356 357 # remove some elements from the definition if we're handling a labeled 358 # counter, as an initial join dimension 359 if metric.type == "labeled_counter": 360 # this field is not used since labeled counters are maps 361 del lookml["type"] 362 lookml["hidden"] = "yes" 363 364 if metric.description: 365 lookml["description"] = metric.description 366 367 return lookml 368 369 def _get_metric_dimensions( 370 self, metric: GleanProbe, sql_map: Dict[str, Dict[str, str]] 371 ) -> Iterable[Optional[Dict[str, Union[str, List[Dict[str, str]]]]]]: 372 if metric.type == "rate": 373 for suffix in ("numerator", "denominator"): 374 yield self._make_dimension(metric, suffix, sql_map) 375 elif metric.type in DISTRIBUTION_TYPES: 376 yield self._make_dimension(metric, "sum", sql_map) 377 elif metric.type == "timespan": 378 yield self._make_dimension(metric, "value", sql_map) 379 elif metric.type in ALLOWED_TYPES: 380 yield self._make_dimension(metric, "", sql_map) 381 382 def _get_glean_metric_dimensions( 383 self, all_fields: List[dict], v1_name: Optional[str] 384 ): 385 sql_map = { 386 f["name"]: {"sql": f["sql"], "type": f.get("type", "string")} 387 for f in all_fields 388 } 389 metrics = self._get_glean_metrics(v1_name) 390 return [ 391 dimension 392 for metric in metrics 393 for dimension in self._get_metric_dimensions(metric, sql_map) 394 if dimension is not None 395 ] 396 397 def _add_link(self, dimension): 398 annotations = {} 399 if self._is_metric(dimension) and not self._get_metric_type( 400 dimension 401 ).startswith("labeled"): 402 annotations["links"] = self._get_links(dimension) 403 404 return dict(dimension, **annotations) 405 406 def get_dimensions( 407 self, table, v1_name: Optional[str], dryrun 408 ) -> List[Dict[str, Any]]: 409 """Get the set of dimensions for this view.""" 410 all_fields = super().get_dimensions(table, v1_name, dryrun=dryrun) 411 fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [ 412 self._add_link(d) 413 for d in all_fields 414 if not d["name"].startswith("metrics__") 415 ] 416 # later entries will override earlier entries, if there are duplicates 417 field_dict = {f["name"]: f for f in fields} 418 return list(field_dict.values()) 419 420 def get_measures( 421 self, dimensions: List[dict], table: str, v1_name: Optional[str] 422 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 423 """Generate measures from a list of dimensions. 424 425 When no dimension-specific measures are found, return a single "count" measure. 426 427 Raise ClickException if dimensions result in duplicate measures. 428 """ 429 measures = super().get_measures(dimensions, table, v1_name) 430 client_id_field = self.get_client_id(dimensions, table) 431 432 for dimension in dimensions: 433 if ( 434 self._is_metric(dimension) 435 and self._get_metric_type(dimension) == "counter" 436 ): 437 # handle the counters in the metric ping 438 name = self._get_name(dimension) 439 dimension_name = dimension["name"] 440 measures += [ 441 { 442 "name": name, 443 "type": "sum", 444 "sql": f"${{{dimension_name}}}", 445 "links": self._get_links(dimension), 446 }, 447 ] 448 449 if client_id_field is not None: 450 measures += [ 451 { 452 "name": f"{name}_client_count", 453 "type": "count_distinct", 454 "filters": [{dimension_name: ">0"}], 455 "sql": f"${{{client_id_field}}}", 456 "links": self._get_links(dimension), 457 }, 458 ] 459 460 # check if there are any duplicate values 461 names = [measure["name"] for measure in measures] 462 duplicates = [k for k, v in Counter(names).items() if v > 1] 463 if duplicates: 464 raise click.ClickException( 465 f"duplicate measures {duplicates!r} for table {table!r}" 466 ) 467 468 return measures
A view on a ping table for an application using the Glean SDK.
@classmethod
def
from_db_views(klass, *args, **kwargs):
64 @classmethod 65 def from_db_views(klass, *args, **kwargs): 66 """Generate GleanPingViews from db views.""" 67 for view in super().from_db_views(*args, **kwargs): 68 if view.name not in DISALLOWED_PINGS: 69 yield view
Generate GleanPingViews from db views.
def
to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
71 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 72 """Generate LookML for this view. 73 74 The Glean views include a labeled metrics, which need to be joined 75 against the view in the explore. 76 """ 77 lookml = super().to_lookml(v1_name, dryrun=dryrun) 78 # ignore nested join views 79 lookml["views"] = [lookml["views"][0]] 80 81 # iterate over all of the glean metrics and generate views for unnested 82 # fields as necessary. Append them to the list of existing view 83 # definitions. 84 table = next( 85 (table for table in self.tables if table.get("channel") == "release"), 86 self.tables[0], 87 )["table"] 88 dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun) 89 dimension_names = {dimension["name"] for dimension in dimensions} 90 91 client_id_field = self.get_client_id(dimensions, table) 92 93 view_definitions = [] 94 metrics = self._get_glean_metrics(v1_name) 95 for metric in metrics: 96 looker_name = self._to_looker_name(metric) 97 if looker_name not in dimension_names: 98 continue # skip metrics with no matching dimension 99 if metric.type == "labeled_counter": 100 view_name = f"{self.name}__{looker_name}" 101 suggest_name = f"suggest__{view_name}" 102 103 category, name = [ 104 slug_to_title(v) for v in self._get_category_and_name(metric) 105 ] 106 view_label = f"{category} - {name}" 107 metric_hidden = "no" if metric.is_in_source() else "yes" 108 109 measures = [ 110 { 111 "name": "count", 112 "type": "sum", 113 "sql": "${value}", 114 "hidden": metric_hidden, 115 } 116 ] 117 118 if client_id_field is not None: 119 # client_id field is missing for pings with minimal Glean schema 120 measures.append( 121 { 122 "name": "client_count", 123 "type": "count_distinct", 124 "sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end", 125 "hidden": metric_hidden, 126 } 127 ) 128 129 join_view: Dict[str, Any] = { 130 "name": view_name, 131 "label": view_label, 132 "dimensions": [ 133 { 134 "name": "document_id", 135 "type": "string", 136 "sql": f"${{{self.name}.document_id}}", 137 "hidden": "yes", 138 }, 139 # labeled counters need a primary key that incorporates 140 # their labels, otherwise we get jumbled results: 141 # https://github.com/mozilla/lookml-generator/issues/171 142 { 143 "name": "document_label_id", 144 "type": "string", 145 "sql": f"${{{self.name}.document_id}}-${{label}}", 146 "primary_key": "yes", 147 "hidden": "yes", 148 }, 149 { 150 "name": "value", 151 "type": "number", 152 "sql": "${TABLE}.value", 153 "hidden": "yes", 154 }, 155 ], 156 "measures": measures, 157 } 158 159 if looker_name in SUGGESTS_FOR_LABELED_COUNTERS: 160 join_view["dimensions"].append( 161 { 162 "name": "label", 163 "type": "string", 164 "sql": "${TABLE}.key", 165 "suggest_explore": suggest_name, 166 "suggest_dimension": f"{suggest_name}.key", 167 "hidden": metric_hidden, 168 }, 169 ) 170 171 suggest_view = { 172 "name": suggest_name, 173 "derived_table": { 174 "sql": dedent( 175 f""" 176 select 177 m.key, 178 count(*) as n 179 from {table} as t, 180 unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m 181 where date(submission_timestamp) > date_sub(current_date, interval 30 day) 182 and sample_id = 0 183 group by key 184 order by n desc 185 """ 186 ) 187 }, 188 "dimensions": [ 189 {"name": "key", "type": "string", "sql": "${TABLE}.key"} 190 ], 191 } 192 view_definitions += [join_view, suggest_view] 193 else: 194 join_view["dimensions"].append( 195 { 196 "name": "label", 197 "type": "string", 198 "sql": "${TABLE}.key", 199 "hidden": metric_hidden, 200 }, 201 ) 202 view_definitions += [join_view] 203 204 # deduplicate view definitions, because somehow a few entries make it in 205 # twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure 206 view_definitions = sorted( 207 {v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"] # type: ignore 208 ) 209 210 [project, dataset, table] = table.split(".") 211 table_schema = dryrun.create( 212 project=project, 213 dataset=dataset, 214 table=table, 215 ).get_table_schema() 216 nested_views = lookml_utils._generate_nested_dimension_views( 217 table_schema, self.name 218 ) 219 220 lookml["views"] += view_definitions + nested_views 221 222 return lookml
Generate LookML for this view.
The Glean views include a labeled metrics, which need to be joined against the view in the explore.
def
get_dimensions(self, table, v1_name: Optional[str], dryrun) -> List[Dict[str, Any]]:
406 def get_dimensions( 407 self, table, v1_name: Optional[str], dryrun 408 ) -> List[Dict[str, Any]]: 409 """Get the set of dimensions for this view.""" 410 all_fields = super().get_dimensions(table, v1_name, dryrun=dryrun) 411 fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [ 412 self._add_link(d) 413 for d in all_fields 414 if not d["name"].startswith("metrics__") 415 ] 416 # later entries will override earlier entries, if there are duplicates 417 field_dict = {f["name"]: f for f in fields} 418 return list(field_dict.values())
Get the set of dimensions for this view.
def
get_measures( self, dimensions: List[dict], table: str, v1_name: Optional[str]) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
420 def get_measures( 421 self, dimensions: List[dict], table: str, v1_name: Optional[str] 422 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 423 """Generate measures from a list of dimensions. 424 425 When no dimension-specific measures are found, return a single "count" measure. 426 427 Raise ClickException if dimensions result in duplicate measures. 428 """ 429 measures = super().get_measures(dimensions, table, v1_name) 430 client_id_field = self.get_client_id(dimensions, table) 431 432 for dimension in dimensions: 433 if ( 434 self._is_metric(dimension) 435 and self._get_metric_type(dimension) == "counter" 436 ): 437 # handle the counters in the metric ping 438 name = self._get_name(dimension) 439 dimension_name = dimension["name"] 440 measures += [ 441 { 442 "name": name, 443 "type": "sum", 444 "sql": f"${{{dimension_name}}}", 445 "links": self._get_links(dimension), 446 }, 447 ] 448 449 if client_id_field is not None: 450 measures += [ 451 { 452 "name": f"{name}_client_count", 453 "type": "count_distinct", 454 "filters": [{dimension_name: ">0"}], 455 "sql": f"${{{client_id_field}}}", 456 "links": self._get_links(dimension), 457 }, 458 ] 459 460 # check if there are any duplicate values 461 names = [measure["name"] for measure in measures] 462 duplicates = [k for k, v in Counter(names).items() if v > 1] 463 if duplicates: 464 raise click.ClickException( 465 f"duplicate measures {duplicates!r} for table {table!r}" 466 ) 467 468 return measures
Generate measures from a list of dimensions.
When no dimension-specific measures are found, return a single "count" measure.
Raise ClickException if dimensions result in duplicate measures.