generator.views.glean_ping_view
Class to describe a Glean Ping View.
1"""Class to describe a Glean Ping View.""" 2 3import logging 4import re 5from collections import Counter 6from textwrap import dedent 7from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union 8 9import click 10from mozilla_schema_generator.glean_ping import GleanPing 11from mozilla_schema_generator.probes import GleanProbe 12 13from . import lookml_utils 14from .lookml_utils import slug_to_title 15from .ping_view import PingView 16 17DISTRIBUTION_TYPES = { 18 "timing_distribution", 19 "memory_distribution", 20 "custom_distribution", 21} 22 23 24ALLOWED_TYPES = DISTRIBUTION_TYPES | { 25 "boolean", 26 "labeled_boolean", 27 "counter", 28 "labeled_counter", 29 "datetime", 30 "jwe", 31 "quantity", 32 "string", 33 "labeled_string", 34 "rate", 35 "timespan", 36 "uuid", 37 "url", 38 "text", 39 "labeled_quantity", 40} 41 42# Bug 1737656 - some metric types are exposed under different names 43# We need to map to the new name when building dimensions. 44RENAMED_METRIC_TYPES = { 45 "jwe": "jwe2", 46 "text": "text2", 47 "url": "url2", 48} 49 50 51DISALLOWED_PINGS = {"events", "events_stream"} 52 53# List of labeled counter names for which a suggest explore should be generated. 54# Generating suggest explores for all labeled counters slows down Looker. 55SUGGESTS_FOR_LABELED_COUNTERS: Set[str] = set() 56 57 58class GleanPingView(PingView): 59 """A view on a ping table for an application using the Glean SDK.""" 60 61 type: str = "glean_ping_view" 62 allow_glean: bool = True 63 64 @classmethod 65 def from_db_views(klass, *args, **kwargs): 66 """Generate GleanPingViews from db views.""" 67 for view in super().from_db_views(*args, **kwargs): 68 if view.name not in DISALLOWED_PINGS: 69 yield view 70 71 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 72 """Generate LookML for this view. 73 74 The Glean views include a labeled metrics, which need to be joined 75 against the view in the explore. 76 """ 77 lookml = super().to_lookml(v1_name, dryrun=dryrun) 78 # ignore nested join views 79 lookml["views"] = [lookml["views"][0]] 80 81 # iterate over all of the glean metrics and generate views for unnested 82 # fields as necessary. Append them to the list of existing view 83 # definitions. 84 table = next( 85 (table for table in self.tables if table.get("channel") == "release"), 86 self.tables[0], 87 )["table"] 88 dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun) 89 dimension_names = {dimension["name"] for dimension in dimensions} 90 91 client_id_field = self.get_client_id(dimensions, table) 92 93 view_definitions = [] 94 metrics = self._get_glean_metrics(v1_name) 95 for metric in metrics: 96 looker_name = self._to_looker_name(metric) 97 if looker_name not in dimension_names: 98 continue # skip metrics with no matching dimension 99 if metric.type == "labeled_counter": 100 view_name = f"{self.name}__{looker_name}" 101 suggest_name = f"suggest__{view_name}" 102 103 category, name = [ 104 slug_to_title(v) for v in self._get_category_and_name(metric) 105 ] 106 view_label = f"{category}: {name}" 107 metric_hidden = "no" if metric.is_in_source() else "yes" 108 109 measures = [ 110 { 111 "name": "count", 112 "type": "sum", 113 "sql": "${value}", 114 "hidden": metric_hidden, 115 } 116 ] 117 118 if client_id_field is not None: 119 # client_id field is missing for pings with minimal Glean schema 120 measures.append( 121 { 122 "name": "client_count", 123 "type": "count_distinct", 124 "sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end", 125 "hidden": metric_hidden, 126 } 127 ) 128 129 join_view: Dict[str, Any] = { 130 "name": view_name, 131 "label": view_label, 132 "dimensions": [ 133 { 134 "name": "document_id", 135 "type": "string", 136 "sql": f"${{{self.name}.document_id}}", 137 "hidden": "yes", 138 }, 139 # labeled counters need a primary key that incorporates 140 # their labels, otherwise we get jumbled results: 141 # https://github.com/mozilla/lookml-generator/issues/171 142 { 143 "name": "document_label_id", 144 "type": "string", 145 "sql": f"${{{self.name}.document_id}}-${{label}}", 146 "primary_key": "yes", 147 "hidden": "yes", 148 }, 149 { 150 "name": "value", 151 "type": "number", 152 "sql": "${TABLE}.value", 153 "hidden": "yes", 154 }, 155 ], 156 "measures": measures, 157 } 158 159 if looker_name in SUGGESTS_FOR_LABELED_COUNTERS: 160 join_view["dimensions"].append( 161 { 162 "name": "label", 163 "type": "string", 164 "sql": "${TABLE}.key", 165 "suggest_explore": suggest_name, 166 "suggest_dimension": f"{suggest_name}.key", 167 "hidden": metric_hidden, 168 }, 169 ) 170 171 suggest_view = { 172 "name": suggest_name, 173 "derived_table": { 174 "sql": dedent( 175 f""" 176 select 177 m.key, 178 count(*) as n 179 from {table} as t, 180 unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m 181 where date(submission_timestamp) > date_sub(current_date, interval 30 day) 182 and sample_id = 0 183 group by key 184 order by n desc 185 """ 186 ) 187 }, 188 "dimensions": [ 189 {"name": "key", "type": "string", "sql": "${TABLE}.key"} 190 ], 191 } 192 view_definitions += [join_view, suggest_view] 193 else: 194 join_view["dimensions"].append( 195 { 196 "name": "label", 197 "type": "string", 198 "sql": "${TABLE}.key", 199 "hidden": metric_hidden, 200 }, 201 ) 202 view_definitions += [join_view] 203 204 # deduplicate view definitions, because somehow a few entries make it in 205 # twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure 206 view_definitions = sorted( 207 {v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"] # type: ignore 208 ) 209 210 [project, dataset, table] = table.split(".") 211 table_schema = dryrun.create( 212 project=project, 213 dataset=dataset, 214 table=table, 215 ).get_table_schema() 216 nested_views = lookml_utils._generate_nested_dimension_views( 217 table_schema, self.name 218 ) 219 220 lookml["views"] += view_definitions + nested_views 221 222 return lookml 223 224 def _get_links(self, dimension: dict) -> List[Dict[str, str]]: 225 """Get a link annotation given a metric name.""" 226 name = self._get_name(dimension) 227 title = slug_to_title(name) 228 return [ 229 { 230 "label": (f"Glean Dictionary reference for {title}"), 231 "url": ( 232 f"https://dictionary.telemetry.mozilla.org" 233 f"/apps/{self.namespace}/metrics/{name}" 234 ), 235 "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png", 236 } 237 ] 238 239 def _get_name(self, dimension: dict) -> str: 240 return dimension["name"].split("__")[-1] 241 242 def _get_metric_type(self, dimension: dict) -> str: 243 return dimension["name"].split("__")[1] 244 245 def _is_metric(self, dimension) -> bool: 246 return dimension["name"].startswith("metrics__") 247 248 def _get_glean_metrics(self, v1_name: Optional[str]) -> List[GleanProbe]: 249 if v1_name is None: 250 logging.error( 251 f"Error: Missing v1 name for ping {self.name} in namespace {self.namespace}" 252 ) 253 return [] 254 255 repo = next((r for r in GleanPing.get_repos() if r["name"] == v1_name)) 256 glean_app = GleanPing(repo) 257 258 ping_probes = [] 259 probe_ids = set() 260 for probe in glean_app.get_probes(): 261 send_in_pings_snakecase = [ 262 ping.replace("-", "_") for ping in probe.definition["send_in_pings"] 263 ] 264 if self.name not in send_in_pings_snakecase: 265 continue 266 if probe.id in probe_ids: 267 # Some ids are duplicated, ignore them 268 continue 269 270 ping_probes.append(probe) 271 probe_ids.add(probe.id) 272 273 return ping_probes 274 275 def _get_category_and_name(self, metric: GleanProbe) -> Tuple[str, str]: 276 *category, name = metric.id.split(".") 277 category = "_".join(category) 278 279 return category, name 280 281 def _to_looker_name(self, metric: GleanProbe, suffix: str = "") -> str: 282 """Convert a glean probe into a looker name.""" 283 category, name = self._get_category_and_name(metric) 284 285 sep = "" if not category else "_" 286 label = name 287 looker_name = f"metrics__{metric.type}__{category}{sep}{label}" 288 if suffix: 289 looker_name = f"{looker_name}__{suffix}" 290 return looker_name 291 292 def _make_dimension( 293 self, metric: GleanProbe, suffix: str, sql_map: Dict[str, Dict[str, str]] 294 ) -> Optional[Dict[str, Union[str, List[Dict[str, str]]]]]: 295 *category, name = metric.id.split(".") 296 category = "_".join(category) 297 298 sep = "" if not category else "_" 299 label = name 300 type = RENAMED_METRIC_TYPES.get(metric.type, metric.type) 301 looker_name = f"metrics__{type}__{category}{sep}{name}" 302 if suffix: 303 label = f"{name}_{suffix}" 304 looker_name = f"{looker_name}__{suffix}" 305 306 if looker_name not in sql_map: 307 return None 308 309 group_label = slug_to_title(category) 310 group_item_label = slug_to_title(label) 311 312 if not group_label: 313 group_label = "Glean" 314 315 friendly_name = f"{group_label}: {group_item_label}" 316 317 lookml = { 318 "name": looker_name, 319 "label": friendly_name, 320 # metrics that are no longer in the source are hidden by default 321 "hidden": "no" if metric.is_in_source() else "yes", 322 "sql": sql_map[looker_name]["sql"], 323 "type": sql_map[looker_name]["type"], 324 "group_label": group_label, 325 "group_item_label": group_item_label, 326 "links": [ 327 { 328 "label": (f"Glean Dictionary reference for {friendly_name}"), 329 "url": ( 330 f"https://dictionary.telemetry.mozilla.org" 331 f"/apps/{self.namespace}/metrics/{category}{sep}{name}" 332 ), 333 "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png", 334 }, 335 ], 336 } 337 338 if lookml["type"] == "time": 339 # Remove any _{type} suffix from the dimension group name because each timeframe 340 # will add a _{type} suffix to its individual dimension name. 341 lookml["name"] = re.sub("_(date|time(stamp)?)$", "", looker_name) 342 lookml["timeframes"] = [ 343 timeframe 344 for timeframe in ( 345 "raw", 346 "time", 347 "date", 348 "week", 349 "month", 350 "quarter", 351 "year", 352 ) 353 # Exclude timeframes where the resulting dimension would conflict with an existing dimension. 354 if f"{lookml['name']}_{timeframe}" not in sql_map 355 ] 356 # Dimension groups should not be nested (see issue #82). 357 del lookml["group_label"] 358 del lookml["group_item_label"] 359 # Links are not supported for dimension groups. 360 del lookml["links"] 361 362 # remove some elements from the definition if we're handling a labeled 363 # counter, as an initial join dimension 364 if metric.type == "labeled_counter": 365 # this field is not used since labeled counters are maps 366 del lookml["type"] 367 lookml["hidden"] = "yes" 368 369 if metric.description: 370 lookml["description"] = metric.description 371 372 return lookml 373 374 def _get_metric_dimensions( 375 self, metric: GleanProbe, sql_map: Dict[str, Dict[str, str]] 376 ) -> Iterable[Optional[Dict[str, Union[str, List[Dict[str, str]]]]]]: 377 if metric.type == "rate": 378 for suffix in ("numerator", "denominator"): 379 yield self._make_dimension(metric, suffix, sql_map) 380 elif metric.type in DISTRIBUTION_TYPES: 381 yield self._make_dimension(metric, "sum", sql_map) 382 elif metric.type == "timespan": 383 yield self._make_dimension(metric, "value", sql_map) 384 elif metric.type in ALLOWED_TYPES: 385 yield self._make_dimension(metric, "", sql_map) 386 387 def _get_glean_metric_dimensions( 388 self, all_fields: List[dict], v1_name: Optional[str] 389 ): 390 sql_map = { 391 f["name"]: {"sql": f["sql"], "type": f.get("type", "string")} 392 for f in all_fields 393 } 394 metrics = self._get_glean_metrics(v1_name) 395 return [ 396 dimension 397 for metric in metrics 398 for dimension in self._get_metric_dimensions(metric, sql_map) 399 if dimension is not None 400 ] 401 402 def _add_link(self, dimension): 403 annotations = {} 404 if self._is_metric(dimension) and not self._get_metric_type( 405 dimension 406 ).startswith("labeled"): 407 annotations["links"] = self._get_links(dimension) 408 409 return dict(dimension, **annotations) 410 411 def get_dimensions( 412 self, table, v1_name: Optional[str], dryrun 413 ) -> List[Dict[str, Any]]: 414 """Get the set of dimensions for this view.""" 415 all_fields = super().get_dimensions(table, v1_name, dryrun=dryrun) 416 fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [ 417 self._add_link(d) 418 for d in all_fields 419 if not d["name"].startswith("metrics__") 420 ] 421 # later entries will override earlier entries, if there are duplicates 422 field_dict = {f["name"]: f for f in fields} 423 return list(field_dict.values()) 424 425 def get_measures( 426 self, dimensions: List[dict], table: str, v1_name: Optional[str] 427 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 428 """Generate measures from a list of dimensions. 429 430 When no dimension-specific measures are found, return a single "count" measure. 431 432 Raise ClickException if dimensions result in duplicate measures. 433 """ 434 measures = super().get_measures(dimensions, table, v1_name) 435 client_id_field = self.get_client_id(dimensions, table) 436 437 for dimension in dimensions: 438 if ( 439 self._is_metric(dimension) 440 and self._get_metric_type(dimension) == "counter" 441 ): 442 # handle the counters in the metric ping 443 name = self._get_name(dimension) 444 dimension_name = dimension["name"] 445 measures += [ 446 { 447 "name": name, 448 "type": "sum", 449 "sql": f"${{{dimension_name}}}", 450 "links": self._get_links(dimension), 451 }, 452 ] 453 454 if client_id_field is not None: 455 measures += [ 456 { 457 "name": f"{name}_client_count", 458 "type": "count_distinct", 459 "filters": [{dimension_name: ">0"}], 460 "sql": f"${{{client_id_field}}}", 461 "links": self._get_links(dimension), 462 }, 463 ] 464 465 # check if there are any duplicate values 466 names = [measure["name"] for measure in measures] 467 duplicates = [k for k, v in Counter(names).items() if v > 1] 468 if duplicates: 469 raise click.ClickException( 470 f"duplicate measures {duplicates!r} for table {table!r}" 471 ) 472 473 return measures
DISTRIBUTION_TYPES =
{'timing_distribution', 'memory_distribution', 'custom_distribution'}
ALLOWED_TYPES =
{'boolean', 'url', 'labeled_counter', 'labeled_quantity', 'text', 'timing_distribution', 'quantity', 'counter', 'labeled_boolean', 'timespan', 'datetime', 'labeled_string', 'rate', 'memory_distribution', 'string', 'jwe', 'custom_distribution', 'uuid'}
RENAMED_METRIC_TYPES =
{'jwe': 'jwe2', 'text': 'text2', 'url': 'url2'}
DISALLOWED_PINGS =
{'events', 'events_stream'}
SUGGESTS_FOR_LABELED_COUNTERS: Set[str] =
set()
59class GleanPingView(PingView): 60 """A view on a ping table for an application using the Glean SDK.""" 61 62 type: str = "glean_ping_view" 63 allow_glean: bool = True 64 65 @classmethod 66 def from_db_views(klass, *args, **kwargs): 67 """Generate GleanPingViews from db views.""" 68 for view in super().from_db_views(*args, **kwargs): 69 if view.name not in DISALLOWED_PINGS: 70 yield view 71 72 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 73 """Generate LookML for this view. 74 75 The Glean views include a labeled metrics, which need to be joined 76 against the view in the explore. 77 """ 78 lookml = super().to_lookml(v1_name, dryrun=dryrun) 79 # ignore nested join views 80 lookml["views"] = [lookml["views"][0]] 81 82 # iterate over all of the glean metrics and generate views for unnested 83 # fields as necessary. Append them to the list of existing view 84 # definitions. 85 table = next( 86 (table for table in self.tables if table.get("channel") == "release"), 87 self.tables[0], 88 )["table"] 89 dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun) 90 dimension_names = {dimension["name"] for dimension in dimensions} 91 92 client_id_field = self.get_client_id(dimensions, table) 93 94 view_definitions = [] 95 metrics = self._get_glean_metrics(v1_name) 96 for metric in metrics: 97 looker_name = self._to_looker_name(metric) 98 if looker_name not in dimension_names: 99 continue # skip metrics with no matching dimension 100 if metric.type == "labeled_counter": 101 view_name = f"{self.name}__{looker_name}" 102 suggest_name = f"suggest__{view_name}" 103 104 category, name = [ 105 slug_to_title(v) for v in self._get_category_and_name(metric) 106 ] 107 view_label = f"{category}: {name}" 108 metric_hidden = "no" if metric.is_in_source() else "yes" 109 110 measures = [ 111 { 112 "name": "count", 113 "type": "sum", 114 "sql": "${value}", 115 "hidden": metric_hidden, 116 } 117 ] 118 119 if client_id_field is not None: 120 # client_id field is missing for pings with minimal Glean schema 121 measures.append( 122 { 123 "name": "client_count", 124 "type": "count_distinct", 125 "sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end", 126 "hidden": metric_hidden, 127 } 128 ) 129 130 join_view: Dict[str, Any] = { 131 "name": view_name, 132 "label": view_label, 133 "dimensions": [ 134 { 135 "name": "document_id", 136 "type": "string", 137 "sql": f"${{{self.name}.document_id}}", 138 "hidden": "yes", 139 }, 140 # labeled counters need a primary key that incorporates 141 # their labels, otherwise we get jumbled results: 142 # https://github.com/mozilla/lookml-generator/issues/171 143 { 144 "name": "document_label_id", 145 "type": "string", 146 "sql": f"${{{self.name}.document_id}}-${{label}}", 147 "primary_key": "yes", 148 "hidden": "yes", 149 }, 150 { 151 "name": "value", 152 "type": "number", 153 "sql": "${TABLE}.value", 154 "hidden": "yes", 155 }, 156 ], 157 "measures": measures, 158 } 159 160 if looker_name in SUGGESTS_FOR_LABELED_COUNTERS: 161 join_view["dimensions"].append( 162 { 163 "name": "label", 164 "type": "string", 165 "sql": "${TABLE}.key", 166 "suggest_explore": suggest_name, 167 "suggest_dimension": f"{suggest_name}.key", 168 "hidden": metric_hidden, 169 }, 170 ) 171 172 suggest_view = { 173 "name": suggest_name, 174 "derived_table": { 175 "sql": dedent( 176 f""" 177 select 178 m.key, 179 count(*) as n 180 from {table} as t, 181 unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m 182 where date(submission_timestamp) > date_sub(current_date, interval 30 day) 183 and sample_id = 0 184 group by key 185 order by n desc 186 """ 187 ) 188 }, 189 "dimensions": [ 190 {"name": "key", "type": "string", "sql": "${TABLE}.key"} 191 ], 192 } 193 view_definitions += [join_view, suggest_view] 194 else: 195 join_view["dimensions"].append( 196 { 197 "name": "label", 198 "type": "string", 199 "sql": "${TABLE}.key", 200 "hidden": metric_hidden, 201 }, 202 ) 203 view_definitions += [join_view] 204 205 # deduplicate view definitions, because somehow a few entries make it in 206 # twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure 207 view_definitions = sorted( 208 {v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"] # type: ignore 209 ) 210 211 [project, dataset, table] = table.split(".") 212 table_schema = dryrun.create( 213 project=project, 214 dataset=dataset, 215 table=table, 216 ).get_table_schema() 217 nested_views = lookml_utils._generate_nested_dimension_views( 218 table_schema, self.name 219 ) 220 221 lookml["views"] += view_definitions + nested_views 222 223 return lookml 224 225 def _get_links(self, dimension: dict) -> List[Dict[str, str]]: 226 """Get a link annotation given a metric name.""" 227 name = self._get_name(dimension) 228 title = slug_to_title(name) 229 return [ 230 { 231 "label": (f"Glean Dictionary reference for {title}"), 232 "url": ( 233 f"https://dictionary.telemetry.mozilla.org" 234 f"/apps/{self.namespace}/metrics/{name}" 235 ), 236 "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png", 237 } 238 ] 239 240 def _get_name(self, dimension: dict) -> str: 241 return dimension["name"].split("__")[-1] 242 243 def _get_metric_type(self, dimension: dict) -> str: 244 return dimension["name"].split("__")[1] 245 246 def _is_metric(self, dimension) -> bool: 247 return dimension["name"].startswith("metrics__") 248 249 def _get_glean_metrics(self, v1_name: Optional[str]) -> List[GleanProbe]: 250 if v1_name is None: 251 logging.error( 252 f"Error: Missing v1 name for ping {self.name} in namespace {self.namespace}" 253 ) 254 return [] 255 256 repo = next((r for r in GleanPing.get_repos() if r["name"] == v1_name)) 257 glean_app = GleanPing(repo) 258 259 ping_probes = [] 260 probe_ids = set() 261 for probe in glean_app.get_probes(): 262 send_in_pings_snakecase = [ 263 ping.replace("-", "_") for ping in probe.definition["send_in_pings"] 264 ] 265 if self.name not in send_in_pings_snakecase: 266 continue 267 if probe.id in probe_ids: 268 # Some ids are duplicated, ignore them 269 continue 270 271 ping_probes.append(probe) 272 probe_ids.add(probe.id) 273 274 return ping_probes 275 276 def _get_category_and_name(self, metric: GleanProbe) -> Tuple[str, str]: 277 *category, name = metric.id.split(".") 278 category = "_".join(category) 279 280 return category, name 281 282 def _to_looker_name(self, metric: GleanProbe, suffix: str = "") -> str: 283 """Convert a glean probe into a looker name.""" 284 category, name = self._get_category_and_name(metric) 285 286 sep = "" if not category else "_" 287 label = name 288 looker_name = f"metrics__{metric.type}__{category}{sep}{label}" 289 if suffix: 290 looker_name = f"{looker_name}__{suffix}" 291 return looker_name 292 293 def _make_dimension( 294 self, metric: GleanProbe, suffix: str, sql_map: Dict[str, Dict[str, str]] 295 ) -> Optional[Dict[str, Union[str, List[Dict[str, str]]]]]: 296 *category, name = metric.id.split(".") 297 category = "_".join(category) 298 299 sep = "" if not category else "_" 300 label = name 301 type = RENAMED_METRIC_TYPES.get(metric.type, metric.type) 302 looker_name = f"metrics__{type}__{category}{sep}{name}" 303 if suffix: 304 label = f"{name}_{suffix}" 305 looker_name = f"{looker_name}__{suffix}" 306 307 if looker_name not in sql_map: 308 return None 309 310 group_label = slug_to_title(category) 311 group_item_label = slug_to_title(label) 312 313 if not group_label: 314 group_label = "Glean" 315 316 friendly_name = f"{group_label}: {group_item_label}" 317 318 lookml = { 319 "name": looker_name, 320 "label": friendly_name, 321 # metrics that are no longer in the source are hidden by default 322 "hidden": "no" if metric.is_in_source() else "yes", 323 "sql": sql_map[looker_name]["sql"], 324 "type": sql_map[looker_name]["type"], 325 "group_label": group_label, 326 "group_item_label": group_item_label, 327 "links": [ 328 { 329 "label": (f"Glean Dictionary reference for {friendly_name}"), 330 "url": ( 331 f"https://dictionary.telemetry.mozilla.org" 332 f"/apps/{self.namespace}/metrics/{category}{sep}{name}" 333 ), 334 "icon_url": "https://dictionary.telemetry.mozilla.org/favicon.png", 335 }, 336 ], 337 } 338 339 if lookml["type"] == "time": 340 # Remove any _{type} suffix from the dimension group name because each timeframe 341 # will add a _{type} suffix to its individual dimension name. 342 lookml["name"] = re.sub("_(date|time(stamp)?)$", "", looker_name) 343 lookml["timeframes"] = [ 344 timeframe 345 for timeframe in ( 346 "raw", 347 "time", 348 "date", 349 "week", 350 "month", 351 "quarter", 352 "year", 353 ) 354 # Exclude timeframes where the resulting dimension would conflict with an existing dimension. 355 if f"{lookml['name']}_{timeframe}" not in sql_map 356 ] 357 # Dimension groups should not be nested (see issue #82). 358 del lookml["group_label"] 359 del lookml["group_item_label"] 360 # Links are not supported for dimension groups. 361 del lookml["links"] 362 363 # remove some elements from the definition if we're handling a labeled 364 # counter, as an initial join dimension 365 if metric.type == "labeled_counter": 366 # this field is not used since labeled counters are maps 367 del lookml["type"] 368 lookml["hidden"] = "yes" 369 370 if metric.description: 371 lookml["description"] = metric.description 372 373 return lookml 374 375 def _get_metric_dimensions( 376 self, metric: GleanProbe, sql_map: Dict[str, Dict[str, str]] 377 ) -> Iterable[Optional[Dict[str, Union[str, List[Dict[str, str]]]]]]: 378 if metric.type == "rate": 379 for suffix in ("numerator", "denominator"): 380 yield self._make_dimension(metric, suffix, sql_map) 381 elif metric.type in DISTRIBUTION_TYPES: 382 yield self._make_dimension(metric, "sum", sql_map) 383 elif metric.type == "timespan": 384 yield self._make_dimension(metric, "value", sql_map) 385 elif metric.type in ALLOWED_TYPES: 386 yield self._make_dimension(metric, "", sql_map) 387 388 def _get_glean_metric_dimensions( 389 self, all_fields: List[dict], v1_name: Optional[str] 390 ): 391 sql_map = { 392 f["name"]: {"sql": f["sql"], "type": f.get("type", "string")} 393 for f in all_fields 394 } 395 metrics = self._get_glean_metrics(v1_name) 396 return [ 397 dimension 398 for metric in metrics 399 for dimension in self._get_metric_dimensions(metric, sql_map) 400 if dimension is not None 401 ] 402 403 def _add_link(self, dimension): 404 annotations = {} 405 if self._is_metric(dimension) and not self._get_metric_type( 406 dimension 407 ).startswith("labeled"): 408 annotations["links"] = self._get_links(dimension) 409 410 return dict(dimension, **annotations) 411 412 def get_dimensions( 413 self, table, v1_name: Optional[str], dryrun 414 ) -> List[Dict[str, Any]]: 415 """Get the set of dimensions for this view.""" 416 all_fields = super().get_dimensions(table, v1_name, dryrun=dryrun) 417 fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [ 418 self._add_link(d) 419 for d in all_fields 420 if not d["name"].startswith("metrics__") 421 ] 422 # later entries will override earlier entries, if there are duplicates 423 field_dict = {f["name"]: f for f in fields} 424 return list(field_dict.values()) 425 426 def get_measures( 427 self, dimensions: List[dict], table: str, v1_name: Optional[str] 428 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 429 """Generate measures from a list of dimensions. 430 431 When no dimension-specific measures are found, return a single "count" measure. 432 433 Raise ClickException if dimensions result in duplicate measures. 434 """ 435 measures = super().get_measures(dimensions, table, v1_name) 436 client_id_field = self.get_client_id(dimensions, table) 437 438 for dimension in dimensions: 439 if ( 440 self._is_metric(dimension) 441 and self._get_metric_type(dimension) == "counter" 442 ): 443 # handle the counters in the metric ping 444 name = self._get_name(dimension) 445 dimension_name = dimension["name"] 446 measures += [ 447 { 448 "name": name, 449 "type": "sum", 450 "sql": f"${{{dimension_name}}}", 451 "links": self._get_links(dimension), 452 }, 453 ] 454 455 if client_id_field is not None: 456 measures += [ 457 { 458 "name": f"{name}_client_count", 459 "type": "count_distinct", 460 "filters": [{dimension_name: ">0"}], 461 "sql": f"${{{client_id_field}}}", 462 "links": self._get_links(dimension), 463 }, 464 ] 465 466 # check if there are any duplicate values 467 names = [measure["name"] for measure in measures] 468 duplicates = [k for k, v in Counter(names).items() if v > 1] 469 if duplicates: 470 raise click.ClickException( 471 f"duplicate measures {duplicates!r} for table {table!r}" 472 ) 473 474 return measures
A view on a ping table for an application using the Glean SDK.
@classmethod
def
from_db_views(klass, *args, **kwargs):
65 @classmethod 66 def from_db_views(klass, *args, **kwargs): 67 """Generate GleanPingViews from db views.""" 68 for view in super().from_db_views(*args, **kwargs): 69 if view.name not in DISALLOWED_PINGS: 70 yield view
Generate GleanPingViews from db views.
def
to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
72 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 73 """Generate LookML for this view. 74 75 The Glean views include a labeled metrics, which need to be joined 76 against the view in the explore. 77 """ 78 lookml = super().to_lookml(v1_name, dryrun=dryrun) 79 # ignore nested join views 80 lookml["views"] = [lookml["views"][0]] 81 82 # iterate over all of the glean metrics and generate views for unnested 83 # fields as necessary. Append them to the list of existing view 84 # definitions. 85 table = next( 86 (table for table in self.tables if table.get("channel") == "release"), 87 self.tables[0], 88 )["table"] 89 dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun) 90 dimension_names = {dimension["name"] for dimension in dimensions} 91 92 client_id_field = self.get_client_id(dimensions, table) 93 94 view_definitions = [] 95 metrics = self._get_glean_metrics(v1_name) 96 for metric in metrics: 97 looker_name = self._to_looker_name(metric) 98 if looker_name not in dimension_names: 99 continue # skip metrics with no matching dimension 100 if metric.type == "labeled_counter": 101 view_name = f"{self.name}__{looker_name}" 102 suggest_name = f"suggest__{view_name}" 103 104 category, name = [ 105 slug_to_title(v) for v in self._get_category_and_name(metric) 106 ] 107 view_label = f"{category}: {name}" 108 metric_hidden = "no" if metric.is_in_source() else "yes" 109 110 measures = [ 111 { 112 "name": "count", 113 "type": "sum", 114 "sql": "${value}", 115 "hidden": metric_hidden, 116 } 117 ] 118 119 if client_id_field is not None: 120 # client_id field is missing for pings with minimal Glean schema 121 measures.append( 122 { 123 "name": "client_count", 124 "type": "count_distinct", 125 "sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end", 126 "hidden": metric_hidden, 127 } 128 ) 129 130 join_view: Dict[str, Any] = { 131 "name": view_name, 132 "label": view_label, 133 "dimensions": [ 134 { 135 "name": "document_id", 136 "type": "string", 137 "sql": f"${{{self.name}.document_id}}", 138 "hidden": "yes", 139 }, 140 # labeled counters need a primary key that incorporates 141 # their labels, otherwise we get jumbled results: 142 # https://github.com/mozilla/lookml-generator/issues/171 143 { 144 "name": "document_label_id", 145 "type": "string", 146 "sql": f"${{{self.name}.document_id}}-${{label}}", 147 "primary_key": "yes", 148 "hidden": "yes", 149 }, 150 { 151 "name": "value", 152 "type": "number", 153 "sql": "${TABLE}.value", 154 "hidden": "yes", 155 }, 156 ], 157 "measures": measures, 158 } 159 160 if looker_name in SUGGESTS_FOR_LABELED_COUNTERS: 161 join_view["dimensions"].append( 162 { 163 "name": "label", 164 "type": "string", 165 "sql": "${TABLE}.key", 166 "suggest_explore": suggest_name, 167 "suggest_dimension": f"{suggest_name}.key", 168 "hidden": metric_hidden, 169 }, 170 ) 171 172 suggest_view = { 173 "name": suggest_name, 174 "derived_table": { 175 "sql": dedent( 176 f""" 177 select 178 m.key, 179 count(*) as n 180 from {table} as t, 181 unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m 182 where date(submission_timestamp) > date_sub(current_date, interval 30 day) 183 and sample_id = 0 184 group by key 185 order by n desc 186 """ 187 ) 188 }, 189 "dimensions": [ 190 {"name": "key", "type": "string", "sql": "${TABLE}.key"} 191 ], 192 } 193 view_definitions += [join_view, suggest_view] 194 else: 195 join_view["dimensions"].append( 196 { 197 "name": "label", 198 "type": "string", 199 "sql": "${TABLE}.key", 200 "hidden": metric_hidden, 201 }, 202 ) 203 view_definitions += [join_view] 204 205 # deduplicate view definitions, because somehow a few entries make it in 206 # twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure 207 view_definitions = sorted( 208 {v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"] # type: ignore 209 ) 210 211 [project, dataset, table] = table.split(".") 212 table_schema = dryrun.create( 213 project=project, 214 dataset=dataset, 215 table=table, 216 ).get_table_schema() 217 nested_views = lookml_utils._generate_nested_dimension_views( 218 table_schema, self.name 219 ) 220 221 lookml["views"] += view_definitions + nested_views 222 223 return lookml
Generate LookML for this view.
The Glean views include a labeled metrics, which need to be joined against the view in the explore.
def
get_dimensions(self, table, v1_name: Optional[str], dryrun) -> List[Dict[str, Any]]:
412 def get_dimensions( 413 self, table, v1_name: Optional[str], dryrun 414 ) -> List[Dict[str, Any]]: 415 """Get the set of dimensions for this view.""" 416 all_fields = super().get_dimensions(table, v1_name, dryrun=dryrun) 417 fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [ 418 self._add_link(d) 419 for d in all_fields 420 if not d["name"].startswith("metrics__") 421 ] 422 # later entries will override earlier entries, if there are duplicates 423 field_dict = {f["name"]: f for f in fields} 424 return list(field_dict.values())
Get the set of dimensions for this view.
def
get_measures( self, dimensions: List[dict], table: str, v1_name: Optional[str]) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
426 def get_measures( 427 self, dimensions: List[dict], table: str, v1_name: Optional[str] 428 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 429 """Generate measures from a list of dimensions. 430 431 When no dimension-specific measures are found, return a single "count" measure. 432 433 Raise ClickException if dimensions result in duplicate measures. 434 """ 435 measures = super().get_measures(dimensions, table, v1_name) 436 client_id_field = self.get_client_id(dimensions, table) 437 438 for dimension in dimensions: 439 if ( 440 self._is_metric(dimension) 441 and self._get_metric_type(dimension) == "counter" 442 ): 443 # handle the counters in the metric ping 444 name = self._get_name(dimension) 445 dimension_name = dimension["name"] 446 measures += [ 447 { 448 "name": name, 449 "type": "sum", 450 "sql": f"${{{dimension_name}}}", 451 "links": self._get_links(dimension), 452 }, 453 ] 454 455 if client_id_field is not None: 456 measures += [ 457 { 458 "name": f"{name}_client_count", 459 "type": "count_distinct", 460 "filters": [{dimension_name: ">0"}], 461 "sql": f"${{{client_id_field}}}", 462 "links": self._get_links(dimension), 463 }, 464 ] 465 466 # check if there are any duplicate values 467 names = [measure["name"] for measure in measures] 468 duplicates = [k for k, v in Counter(names).items() if v > 1] 469 if duplicates: 470 raise click.ClickException( 471 f"duplicate measures {duplicates!r} for table {table!r}" 472 ) 473 474 return measures
Generate measures from a list of dimensions.
When no dimension-specific measures are found, return a single "count" measure.
Raise ClickException if dimensions result in duplicate measures.