generator.namespaces
Generate namespaces.yaml.
1"""Generate namespaces.yaml.""" 2 3import fnmatch 4import json 5import re 6import urllib.request 7import warnings 8from collections.abc import Mapping 9from datetime import datetime 10from itertools import groupby 11from operator import itemgetter 12from pathlib import Path 13from typing import Any, Dict, List, Union 14 15import click 16import yaml 17from google.cloud import bigquery 18 19from generator import operational_monitoring_utils 20 21from .explores import EXPLORE_TYPES 22from .metrics_utils import LOOKER_METRIC_HUB_REPO, METRIC_HUB_REPO, MetricsConfigLoader 23from .views import VIEW_TYPES, View, lookml_utils 24 25DEFAULT_GENERATED_SQL_URI = ( 26 "https://github.com/mozilla/bigquery-etl/archive/generated-sql.tar.gz" 27) 28 29PROBE_INFO_BASE_URI = "https://probeinfo.telemetry.mozilla.org" 30DEFAULT_SPOKE = "looker-spoke-default" 31OPMON_DATASET = "operational_monitoring" 32PROD_PROJECT = "moz-fx-data-shared-prod" 33 34 35def _normalize_slug(name): 36 return re.sub(r"[^a-zA-Z0-9_]", "_", name) 37 38 39def _merge_namespaces(dct, merge_dct): 40 """Recursively merge namespaces.""" 41 for k, _ in merge_dct.items(): 42 if k in dct and isinstance(dct[k], dict) and isinstance(merge_dct[k], Mapping): 43 if "glean_app" in merge_dct[k] and merge_dct[k]["glean_app"] is False: 44 # if glean_app gets set to False, Glean views and explores should not be generated 45 dct[k] = merge_dct[k] 46 else: 47 _merge_namespaces(dct[k], merge_dct[k]) 48 else: 49 if k == "owners" and "owners" in dct: 50 # combine owners 51 dct[k] += merge_dct[k] 52 else: 53 dct[k] = merge_dct[k] 54 55 56def _get_opmon(bq_client: bigquery.Client, namespaces: Dict[str, Any]): 57 om_content: Dict[str, Any] = {"views": {}, "explores": {}, "dashboards": {}} 58 # get operational monitoring namespace information 59 60 opmon_namespace = namespaces["operational_monitoring"] 61 views = opmon_namespace.get("views") 62 63 if views is None: 64 print("No views defined for operational monitoring") 65 return {} 66 67 projects_view = views.get("projects") 68 69 if projects_view is None: 70 print("No projects view defined for operational monitoring") 71 return {} 72 73 projects_table = projects_view["tables"][0]["table"] 74 projects = operational_monitoring_utils.get_active_projects( 75 bq_client, project_table=projects_table 76 ) 77 78 # Iterating over all defined operational monitoring projects 79 for project in projects: 80 table_prefix = _normalize_slug(project["slug"]) 81 project_name = lookml_utils.slug_to_title( 82 re.sub("[^0-9a-zA-Z_]+", "_", "_".join(project["name"].lower().split(" "))) 83 ) 84 branches = project.get("branches", ["enabled", "disabled"]) 85 86 # append view and explore for data type 87 table = f"{PROD_PROJECT}.{OPMON_DATASET}.{table_prefix}_statistics" 88 dimensions = operational_monitoring_utils.get_dimension_defaults( 89 bq_client, table, project["dimensions"] 90 ) 91 om_content["views"][table_prefix] = { 92 "type": "operational_monitoring_view", 93 "tables": [ 94 { 95 "table": table, 96 "xaxis": project["xaxis"], 97 "dimensions": dimensions, 98 } 99 ], 100 } 101 om_content["explores"][table_prefix] = { 102 "type": "operational_monitoring_explore", 103 "views": {"base_view": f"{table_prefix}"}, 104 "branches": branches, 105 "xaxis": project["xaxis"], 106 "dimensions": dimensions, 107 "summaries": project["summaries"], 108 } 109 110 if "alerting" in project and project["alerting"]: 111 # create an alerting view if available 112 om_content["views"][f"{table_prefix}_alerts"] = { 113 "type": "operational_monitoring_alerting_view", 114 "tables": [ 115 { 116 "table": f"{PROD_PROJECT}.{OPMON_DATASET}.{table_prefix}_alerts", 117 } 118 ], 119 } 120 om_content["explores"][f"{table_prefix}_alerts"] = { 121 "type": "operational_monitoring_alerting_explore", 122 "views": {"base_view": f"{table_prefix}_alerts"}, 123 } 124 125 om_content["dashboards"][table_prefix] = { 126 "type": "operational_monitoring_dashboard", 127 "title": project_name, 128 "tables": [ 129 { 130 "explore": f"{table_prefix}", 131 "table": f"{PROD_PROJECT}.{OPMON_DATASET}.{table_prefix}_statistics", 132 "branches": branches, 133 "xaxis": project["xaxis"], 134 "compact_visualization": project.get( 135 "compact_visualization", False 136 ), 137 "dimensions": dimensions, 138 "group_by_dimension": project.get("group_by_dimension", None), 139 "summaries": project["summaries"], 140 } 141 ], 142 } 143 144 if "alerting" in project and project["alerting"]: 145 om_content["dashboards"][table_prefix]["tables"].append( 146 { 147 "explore": f"{table_prefix}_alerts", 148 "table": f"{PROD_PROJECT}.{OPMON_DATASET}.{table_prefix}_alerts", 149 } 150 ) 151 152 return om_content 153 154 155def _get_metric_hub_namespaces(existing_namespaces): 156 metric_hub_data_sources = _get_metric_hub_data_sources() 157 158 metric_hub_namespaces = {} 159 for namespace, metric_hub_data_sources in metric_hub_data_sources.items(): 160 # each data source definition is represented by a view and an explore 161 explores = {} 162 views = {} 163 for data_source in sorted(metric_hub_data_sources): 164 views[f"metric_definitions_{data_source}"] = { 165 "type": "metric_definitions_view" 166 } 167 168 explores[f"metric_definitions_{data_source}"] = { 169 "type": "metric_definitions_explore", 170 "views": {"base_view": f"metric_definitions_{data_source}"}, 171 } 172 173 metric_hub_namespaces[namespace] = { 174 "pretty_name": lookml_utils.slug_to_title(namespace), 175 "views": views, 176 "explores": explores, 177 } 178 179 return metric_hub_namespaces 180 181 182def _get_glean_apps( 183 app_listings_uri: str, 184) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 185 # define key function and reuse it for sorted and groupby 186 if app_listings_uri.startswith(PROBE_INFO_BASE_URI): 187 # For probe-info-service requests, add query param to bypass cloudfront cache 188 app_listings_uri += f"?t={datetime.utcnow().isoformat()}" 189 190 get_app_name = itemgetter("app_name") 191 with urllib.request.urlopen(app_listings_uri) as f: 192 # groupby requires input be sorted by key to produce one result per key 193 app_listings = sorted(json.loads(f.read()), key=get_app_name) 194 195 apps = [] 196 for app_name, group in groupby(app_listings, get_app_name): 197 variants = list(group) 198 199 # use canonical_app_name where channel=="release" or the first one 200 release_variant = next( 201 ( 202 channel 203 for channel in variants 204 if channel.get("app_channel") == "release" 205 ), 206 variants[0], 207 ) 208 209 canonical_app_name = release_variant["canonical_app_name"] 210 v1_name = release_variant["v1_name"] 211 emails = release_variant["notification_emails"] 212 213 # we use the `source_dataset` concept to figure out what reference 214 # we should be looking for inside bigquery-etl 215 # For release we are currently using an app-level dataset which 216 # references the app id specific one (so we look for that view as 217 # a reference). 218 # For other channels, we refer to the stable tables 219 channels = [ 220 { 221 "channel": channel.get("app_channel"), 222 "dataset": ( 223 channel.get("app_name").replace("-", "_") 224 if channel.get("app_channel") == "release" 225 else channel.get("bq_dataset_family") 226 ), 227 "source_dataset": ( 228 channel.get("bq_dataset_family") 229 if channel.get("app_channel") == "release" 230 else channel.get("bq_dataset_family") + "_stable" 231 ), 232 } 233 for channel in variants 234 if not channel.get("deprecated") 235 ] 236 237 # If all channels are deprecated, don't include this app 238 if channels: 239 apps.append( 240 { 241 "name": app_name, 242 "pretty_name": canonical_app_name, 243 "channels": channels, 244 "owners": emails, 245 "glean_app": True, 246 "v1_name": v1_name, 247 } 248 ) 249 250 return apps 251 252 253def _get_looker_views( 254 app: Dict[str, Union[str, List[Dict[str, str]]]], 255 db_views: Dict[str, Dict[str, List[List[str]]]], 256) -> List[View]: 257 views, view_names = [], [] 258 259 for klass in VIEW_TYPES.values(): 260 for view in klass.from_db_views( # type: ignore 261 app["name"], app["glean_app"], app["channels"], db_views 262 ): 263 if view.name in view_names: 264 raise KeyError( 265 ( 266 f"Duplicate Looker View name {view.name} " 267 f"when generating views for namespace {app['name']}" 268 ) 269 ) 270 views.append(view) 271 view_names.append(view.name) 272 273 return views 274 275 276def _get_explores(views: List[View]) -> dict: 277 explores = {} 278 for _, klass in EXPLORE_TYPES.items(): 279 for explore in klass.from_views(views): # type: ignore 280 explores.update(explore.to_dict()) 281 282 return explores 283 284 285def _get_metric_hub_data_sources() -> Dict[str, List[str]]: 286 """Get data source definitions from metric-hub repository for each namespace.""" 287 data_sources_per_namespace: Dict[str, List[str]] = {} 288 for definition in MetricsConfigLoader.configs.definitions: 289 for data_source_slug in definition.spec.data_sources.definitions.keys(): 290 if ( 291 len( 292 MetricsConfigLoader.metrics_of_data_source( 293 data_source_slug, definition.platform 294 ) 295 ) 296 > 0 # ignore data sources that are not used for any metric definition 297 ): 298 if definition.platform in data_sources_per_namespace: 299 data_sources_per_namespace[definition.platform].append( 300 data_source_slug 301 ) 302 else: 303 data_sources_per_namespace[definition.platform] = [data_source_slug] 304 305 return data_sources_per_namespace 306 307 308@click.command(help=__doc__) 309@click.option( 310 "--custom-namespaces", 311 default="custom-namespaces.yaml", 312 type=click.File(), 313 help="Path to a custom namespaces file", 314) 315@click.option( 316 "--generated-sql-uri", 317 default=DEFAULT_GENERATED_SQL_URI, 318 help="URI of a tar archive of the bigquery-etl generated-sql branch, which is " 319 "used to list views and determine whether they reference stable tables", 320) 321@click.option( 322 "--app-listings-uri", 323 default="https://probeinfo.telemetry.mozilla.org/v2/glean/app-listings", 324 help="URI for probeinfo service v2 glean app listings", 325) 326@click.option( 327 "--disallowlist", 328 type=click.File(), 329 default="namespaces-disallowlist.yaml", 330 help="Path to namespace disallow list", 331) 332@click.option( 333 "--metric-hub-repos", 334 "--metric_hub_repos", 335 multiple=True, 336 default=[METRIC_HUB_REPO, LOOKER_METRIC_HUB_REPO], 337 help="Repos to load metric configs from.", 338) 339@click.option( 340 "--ignore", 341 multiple=True, 342 default=[], 343 help="Namespaces to ignore during generation.", 344) 345@click.option( 346 "--use_cloud_function", 347 "--use-cloud-function", 348 help="Use the Cloud Function to run dry runs during LookML generation.", 349 type=bool, 350) 351def namespaces( 352 custom_namespaces, 353 generated_sql_uri, 354 app_listings_uri, 355 disallowlist, 356 metric_hub_repos, 357 ignore, 358 use_cloud_function, 359): 360 """Generate namespaces.yaml.""" 361 warnings.filterwarnings("ignore", module="google.auth._default") 362 glean_apps = _get_glean_apps(app_listings_uri) 363 db_views = lookml_utils.get_bigquery_view_reference_map(generated_sql_uri) 364 365 namespaces = {} 366 for app in glean_apps: 367 if app["name"] not in ignore: 368 looker_views = _get_looker_views(app, db_views) 369 explores = _get_explores(looker_views) 370 views_as_dict = {view.name: view.as_dict() for view in looker_views} 371 372 namespaces[app["name"]] = { 373 "owners": app["owners"], 374 "pretty_name": app["pretty_name"], 375 "views": views_as_dict, 376 "explores": explores, 377 "glean_app": True, 378 } 379 380 if custom_namespaces is not None: 381 custom_namespaces = yaml.safe_load(custom_namespaces.read()) or {} 382 # remove namespaces that should be ignored 383 for ignored_namespace in ignore: 384 if ignored_namespace in custom_namespaces: 385 del custom_namespaces[ignored_namespace] 386 387 # generating operational monitoring namespace, if available 388 if "operational_monitoring" in custom_namespaces: 389 if use_cloud_function: 390 raise Exception("Cannot generate OpMon using dry run Cloud Function") 391 392 client = bigquery.Client() 393 opmon = _get_opmon(bq_client=client, namespaces=custom_namespaces) 394 custom_namespaces["operational_monitoring"].update(opmon) 395 396 _merge_namespaces(namespaces, custom_namespaces) 397 398 if metric_hub_repos: 399 MetricsConfigLoader.update_repos(metric_hub_repos) 400 401 _merge_namespaces(namespaces, _get_metric_hub_namespaces(namespaces)) 402 403 disallowed_namespaces = yaml.safe_load(disallowlist.read()) or {} 404 disallowed_regex = [ 405 fnmatch.translate(namespace) for namespace in disallowed_namespaces 406 ] 407 disallowed_namespaces_pattern = re.compile("|".join(disallowed_regex)) 408 409 updated_namespaces = {} 410 for namespace, _ in namespaces.items(): 411 if ( 412 not disallowed_namespaces_pattern.fullmatch(namespace) 413 and namespace not in ignore 414 ): 415 if "spoke" not in namespaces[namespace]: 416 namespaces[namespace]["spoke"] = DEFAULT_SPOKE 417 if "glean_app" not in namespaces[namespace]: 418 namespaces[namespace]["glean_app"] = False 419 updated_namespaces[namespace] = namespaces[namespace] 420 421 Path("namespaces.yaml").write_text(yaml.safe_dump(updated_namespaces))
DEFAULT_GENERATED_SQL_URI =
'https://github.com/mozilla/bigquery-etl/archive/generated-sql.tar.gz'
PROBE_INFO_BASE_URI =
'https://probeinfo.telemetry.mozilla.org'
DEFAULT_SPOKE =
'looker-spoke-default'
OPMON_DATASET =
'operational_monitoring'
PROD_PROJECT =
'moz-fx-data-shared-prod'
namespaces =
<Command namespaces>
Generate namespaces.yaml.