generator.namespaces
Generate namespaces.yaml.
1"""Generate namespaces.yaml.""" 2 3import fnmatch 4import json 5import re 6import urllib.request 7import warnings 8from collections.abc import Mapping 9from datetime import datetime 10from itertools import groupby 11from operator import itemgetter 12from pathlib import Path 13from typing import Any, Dict, List, Union 14 15import click 16import yaml 17from google.cloud import bigquery 18 19from generator import operational_monitoring_utils 20 21from .explores import EXPLORE_TYPES 22from .metrics_utils import LOOKER_METRIC_HUB_REPO, METRIC_HUB_REPO, MetricsConfigLoader 23from .views import VIEW_TYPES, View, lookml_utils 24 25DEFAULT_GENERATED_SQL_URI = ( 26 "https://github.com/mozilla/bigquery-etl/archive/generated-sql.tar.gz" 27) 28 29PROBE_INFO_BASE_URI = "https://probeinfo.telemetry.mozilla.org" 30DEFAULT_SPOKE = "looker-spoke-default" 31OPMON_DATASET = "operational_monitoring" 32PROD_PROJECT = "moz-fx-data-shared-prod" 33 34 35def _normalize_slug(name): 36 return re.sub(r"[^a-zA-Z0-9_]", "_", name) 37 38 39def _merge_namespaces(dct, merge_dct): 40 """Recursively merge namespaces.""" 41 for k, _ in merge_dct.items(): 42 if k in dct and isinstance(dct[k], dict) and isinstance(merge_dct[k], Mapping): 43 if "glean_app" in merge_dct[k] and merge_dct[k]["glean_app"] is False: 44 # if glean_app gets set to False, Glean views and explores should not be generated 45 dct[k] = merge_dct[k] 46 else: 47 _merge_namespaces(dct[k], merge_dct[k]) 48 else: 49 if k == "owners" and "owners" in dct: 50 # combine owners 51 dct[k] += merge_dct[k] 52 else: 53 dct[k] = merge_dct[k] 54 55 56def _get_opmon(bq_client: bigquery.Client, namespaces: Dict[str, Any]): 57 om_content: Dict[str, Any] = {"views": {}, "explores": {}, "dashboards": {}} 58 # get operational monitoring namespace information 59 60 opmon_namespace = namespaces["operational_monitoring"] 61 views = opmon_namespace.get("views") 62 63 if views is None: 64 print("No views defined for operational monitoring") 65 return {} 66 67 projects_view = views.get("projects") 68 69 if projects_view is None: 70 print("No projects view defined for operational monitoring") 71 return {} 72 73 projects_table = projects_view["tables"][0]["table"] 74 projects = operational_monitoring_utils.get_active_projects( 75 bq_client, project_table=projects_table 76 ) 77 78 # Iterating over all defined operational monitoring projects 79 for project in projects: 80 table_prefix = _normalize_slug(project["slug"]) 81 project_name = lookml_utils.slug_to_title( 82 re.sub("[^0-9a-zA-Z_]+", "_", "_".join(project["name"].lower().split(" "))) 83 ) 84 branches = project.get("branches", ["enabled", "disabled"]) 85 86 # append view and explore for data type 87 table = f"{PROD_PROJECT}.{OPMON_DATASET}.{table_prefix}_statistics" 88 dimensions = operational_monitoring_utils.get_dimension_defaults( 89 bq_client, table, project["dimensions"] 90 ) 91 om_content["views"][table_prefix] = { 92 "type": "operational_monitoring_view", 93 "tables": [ 94 { 95 "table": table, 96 "xaxis": project["xaxis"], 97 "dimensions": dimensions, 98 } 99 ], 100 } 101 om_content["explores"][table_prefix] = { 102 "type": "operational_monitoring_explore", 103 "views": {"base_view": f"{table_prefix}"}, 104 "branches": branches, 105 "xaxis": project["xaxis"], 106 "dimensions": dimensions, 107 "summaries": project["summaries"], 108 } 109 110 if "alerting" in project and project["alerting"]: 111 # create an alerting view if available 112 om_content["views"][f"{table_prefix}_alerts"] = { 113 "type": "operational_monitoring_alerting_view", 114 "tables": [ 115 { 116 "table": f"{PROD_PROJECT}.{OPMON_DATASET}.{table_prefix}_alerts", 117 } 118 ], 119 } 120 om_content["explores"][f"{table_prefix}_alerts"] = { 121 "type": "operational_monitoring_alerting_explore", 122 "views": {"base_view": f"{table_prefix}_alerts"}, 123 } 124 125 om_content["dashboards"][table_prefix] = { 126 "type": "operational_monitoring_dashboard", 127 "title": project_name, 128 "tables": [ 129 { 130 "explore": f"{table_prefix}", 131 "table": f"{PROD_PROJECT}.{OPMON_DATASET}.{table_prefix}_statistics", 132 "branches": branches, 133 "xaxis": project["xaxis"], 134 "compact_visualization": project.get( 135 "compact_visualization", False 136 ), 137 "dimensions": dimensions, 138 "group_by_dimension": project.get("group_by_dimension", None), 139 "summaries": project["summaries"], 140 } 141 ], 142 } 143 144 if "alerting" in project and project["alerting"]: 145 om_content["dashboards"][table_prefix]["tables"].append( 146 { 147 "explore": f"{table_prefix}_alerts", 148 "table": f"{PROD_PROJECT}.{OPMON_DATASET}.{table_prefix}_alerts", 149 } 150 ) 151 152 return om_content 153 154 155def _get_metric_hub_namespaces(existing_namespaces): 156 metric_hub_data_sources = _get_metric_hub_data_sources() 157 158 metric_hub_namespaces = {} 159 for namespace, metric_hub_data_sources in metric_hub_data_sources.items(): 160 # each data source definition is represented by a view and an explore 161 explores = {} 162 views = {} 163 for data_source in sorted(metric_hub_data_sources): 164 views[f"metric_definitions_{data_source}"] = { 165 "type": "metric_definitions_view" 166 } 167 168 explores[f"metric_definitions_{data_source}"] = { 169 "type": "metric_definitions_explore", 170 "views": {"base_view": f"metric_definitions_{data_source}"}, 171 } 172 173 metric_hub_namespaces[namespace] = { 174 "pretty_name": lookml_utils.slug_to_title(namespace), 175 "views": views, 176 "explores": explores, 177 } 178 179 return metric_hub_namespaces 180 181 182def _get_glean_apps( 183 app_listings_uri: str, 184) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 185 # define key function and reuse it for sorted and groupby 186 if app_listings_uri.startswith(PROBE_INFO_BASE_URI): 187 # For probe-info-service requests, add query param to bypass cloudfront cache 188 app_listings_uri += f"?t={datetime.utcnow().isoformat()}" 189 190 get_app_name = itemgetter("app_name") 191 with urllib.request.urlopen(app_listings_uri) as f: 192 # groupby requires input be sorted by key to produce one result per key 193 app_listings = sorted(json.loads(f.read()), key=get_app_name) 194 195 apps = [] 196 for app_name, group in groupby(app_listings, get_app_name): 197 variants = list(group) 198 199 # use canonical_app_name where channel=="release" or the first one 200 release_variant = next( 201 ( 202 channel 203 for channel in variants 204 if channel.get("app_channel") == "release" 205 ), 206 variants[0], 207 ) 208 209 canonical_app_name = release_variant["canonical_app_name"] 210 v1_name = release_variant["v1_name"] 211 emails = release_variant["notification_emails"] 212 213 # we use the `source_dataset` concept to figure out what reference 214 # we should be looking for inside bigquery-etl 215 # For release we are currently using an app-level dataset which 216 # references the app id specific one (so we look for that view as 217 # a reference). 218 # For other channels, we refer to the stable tables 219 channels = [ 220 { 221 "channel": channel.get("app_channel"), 222 "dataset": ( 223 channel.get("app_name").replace("-", "_") 224 if channel.get("app_channel") == "release" 225 else channel.get("bq_dataset_family") 226 ), 227 "source_dataset": ( 228 channel.get("bq_dataset_family") 229 if channel.get("app_channel") == "release" 230 else channel.get("bq_dataset_family") + "_stable" 231 ), 232 } 233 for channel in variants 234 if not channel.get("deprecated") 235 ] 236 237 # If all channels are deprecated, don't include this app 238 if channels: 239 apps.append( 240 { 241 "name": app_name, 242 "pretty_name": canonical_app_name, 243 "channels": channels, 244 "owners": emails, 245 "glean_app": True, 246 "v1_name": v1_name, 247 } 248 ) 249 250 return apps 251 252 253def _get_looker_views( 254 app: Dict[str, Union[str, List[Dict[str, str]]]], 255 db_views: Dict[str, Dict[str, List[List[str]]]], 256) -> List[View]: 257 views, view_names = [], [] 258 259 for klass in VIEW_TYPES.values(): 260 for view in klass.from_db_views( # type: ignore 261 app["name"], app["glean_app"], app["channels"], db_views 262 ): 263 if view.name in view_names: 264 raise KeyError( 265 ( 266 f"Duplicate Looker View name {view.name} " 267 f"when generating views for namespace {app['name']}" 268 ) 269 ) 270 views.append(view) 271 view_names.append(view.name) 272 273 return views 274 275 276def _get_explores(views: List[View]) -> dict: 277 explores = {} 278 for _, klass in EXPLORE_TYPES.items(): 279 for explore in klass.from_views(views): # type: ignore 280 explores.update(explore.to_dict()) 281 282 return explores 283 284 285def _get_metric_hub_data_sources() -> Dict[str, List[str]]: 286 """Get data source definitions from metric-hub repository for each namespace.""" 287 data_sources_per_namespace: Dict[str, List[str]] = {} 288 for definition in MetricsConfigLoader.configs.definitions: 289 for data_source_slug in definition.spec.data_sources.definitions.keys(): 290 if ( 291 len( 292 MetricsConfigLoader.metrics_of_data_source( 293 data_source_slug, definition.platform 294 ) 295 ) 296 > 0 # ignore data sources that are not used for any metric definition 297 ): 298 if definition.platform in data_sources_per_namespace: 299 data_sources_per_namespace[definition.platform].append( 300 data_source_slug 301 ) 302 else: 303 data_sources_per_namespace[definition.platform] = [data_source_slug] 304 305 return data_sources_per_namespace 306 307 308@click.command(help=__doc__) 309@click.option( 310 "--custom-namespaces", 311 default="custom-namespaces.yaml", 312 type=click.File(), 313 help="Path to a custom namespaces file", 314) 315@click.option( 316 "--generated-sql-uri", 317 default=DEFAULT_GENERATED_SQL_URI, 318 help="URI of a tar archive of the bigquery-etl generated-sql branch, which is " 319 "used to list views and determine whether they reference stable tables", 320) 321@click.option( 322 "--app-listings-uri", 323 default="https://probeinfo.telemetry.mozilla.org/v2/glean/app-listings", 324 help="URI for probeinfo service v2 glean app listings", 325) 326@click.option( 327 "--disallowlist", 328 type=click.File(), 329 default="namespaces-disallowlist.yaml", 330 help="Path to namespace disallow list", 331) 332@click.option( 333 "--metric-hub-repos", 334 "--metric_hub_repos", 335 multiple=True, 336 default=[METRIC_HUB_REPO, LOOKER_METRIC_HUB_REPO], 337 help="Repos to load metric configs from.", 338) 339def namespaces( 340 custom_namespaces, 341 generated_sql_uri, 342 app_listings_uri, 343 disallowlist, 344 metric_hub_repos, 345): 346 """Generate namespaces.yaml.""" 347 warnings.filterwarnings("ignore", module="google.auth._default") 348 glean_apps = _get_glean_apps(app_listings_uri) 349 db_views = lookml_utils.get_bigquery_view_reference_map(generated_sql_uri) 350 351 namespaces = {} 352 for app in glean_apps: 353 looker_views = _get_looker_views(app, db_views) 354 explores = _get_explores(looker_views) 355 views_as_dict = {view.name: view.as_dict() for view in looker_views} 356 357 namespaces[app["name"]] = { 358 "owners": app["owners"], 359 "pretty_name": app["pretty_name"], 360 "views": views_as_dict, 361 "explores": explores, 362 "glean_app": True, 363 } 364 365 if custom_namespaces is not None: 366 custom_namespaces = yaml.safe_load(custom_namespaces.read()) or {} 367 368 # generating operational monitoring namespace, if available 369 if "operational_monitoring" in custom_namespaces: 370 client = bigquery.Client() 371 opmon = _get_opmon(bq_client=client, namespaces=custom_namespaces) 372 custom_namespaces["operational_monitoring"].update(opmon) 373 374 _merge_namespaces(namespaces, custom_namespaces) 375 376 if metric_hub_repos: 377 MetricsConfigLoader.update_repos(metric_hub_repos) 378 379 _merge_namespaces(namespaces, _get_metric_hub_namespaces(namespaces)) 380 381 disallowed_namespaces = yaml.safe_load(disallowlist.read()) or {} 382 disallowed_regex = [ 383 fnmatch.translate(namespace) for namespace in disallowed_namespaces 384 ] 385 disallowed_namespaces_pattern = re.compile("|".join(disallowed_regex)) 386 387 updated_namespaces = {} 388 for namespace, _ in namespaces.items(): 389 if not disallowed_namespaces_pattern.fullmatch(namespace): 390 if "spoke" not in namespaces[namespace]: 391 namespaces[namespace]["spoke"] = DEFAULT_SPOKE 392 if "glean_app" not in namespaces[namespace]: 393 namespaces[namespace]["glean_app"] = False 394 updated_namespaces[namespace] = namespaces[namespace] 395 396 Path("namespaces.yaml").write_text(yaml.safe_dump(updated_namespaces))
DEFAULT_GENERATED_SQL_URI =
'https://github.com/mozilla/bigquery-etl/archive/generated-sql.tar.gz'
PROBE_INFO_BASE_URI =
'https://probeinfo.telemetry.mozilla.org'
DEFAULT_SPOKE =
'looker-spoke-default'
OPMON_DATASET =
'operational_monitoring'
PROD_PROJECT =
'moz-fx-data-shared-prod'
namespaces =
<Command namespaces>
Generate namespaces.yaml.