generator.lookml
Generate lookml from namespaces.
1"""Generate lookml from namespaces.""" 2 3import logging 4from functools import partial 5from multiprocessing.pool import Pool 6from pathlib import Path 7from typing import Any, Dict, Iterable, Optional 8 9import click 10import lkml 11import yaml 12 13from generator.utils import get_file_from_looker_hub 14 15from .dashboards import DASHBOARD_TYPES 16from .dryrun import DryRunContext, DryRunError, Errors, credentials, id_token 17from .explores import EXPLORE_TYPES 18from .metrics_utils import LOOKER_METRIC_HUB_REPO, METRIC_HUB_REPO, MetricsConfigLoader 19from .namespaces import _get_glean_apps 20from .views import VIEW_TYPES, View, ViewDict 21from .views.datagroups import generate_datagroup 22 23FILE_HEADER = """ 24# *Do not manually modify this file* 25# 26# This file has been generated via https://github.com/mozilla/lookml-generator 27# You can extend this view in the looker-spoke-default project (https://github.com/mozilla/looker-spoke-default) 28 29""" 30 31 32def _generate_view( 33 out_dir: Path, 34 view: View, 35 v1_name: Optional[str], 36 dryrun, 37) -> Optional[Path]: 38 logging.info( 39 f"Generating lookml for view {view.name} in {view.namespace} of type {view.view_type}" 40 ) 41 path = out_dir / f"{view.name}.view.lkml" 42 43 try: 44 lookml = view.to_lookml(v1_name, dryrun) 45 if lookml == {}: 46 return None 47 48 # lkml.dump may return None, in which case write an empty file 49 path.write_text(FILE_HEADER + (lkml.dump(lookml) or "")) 50 return path 51 except DryRunError as e: 52 if e.error == Errors.PERMISSION_DENIED and e.use_cloud_function: 53 print( 54 f"Permission error dry running {view.name}. Copy existing {path} file from looker-hub." 55 ) 56 try: 57 get_file_from_looker_hub(path) 58 return path 59 except Exception as ex: 60 print(f"Skip generating view for {path}: {ex}") 61 return None 62 else: 63 raise 64 65 66def _generate_explore( 67 out_dir: Path, 68 namespace: str, 69 explore_name: str, 70 explore_info: Any, 71 views_dir: Path, 72 v1_name: Optional[ 73 str 74 ], # v1_name for Glean explores: see: https://mozilla.github.io/probe-scraper/#tag/library 75) -> Path: 76 logging.info(f"Generating lookml for explore {explore_name} in {namespace}") 77 explore_by_type = EXPLORE_TYPES[explore_info["type"]].from_dict( 78 explore_name, explore_info, views_dir 79 ) 80 81 hidden = explore_info.get("hidden", False) 82 83 file_lookml = { 84 # Looker validates all included files, 85 # so if we're not explicit about files here, validation takes 86 # forever as looker re-validates all views for every explore (if we used *). 87 "includes": [ 88 f"/looker-hub/{namespace}/views/{view}.view.lkml" 89 for view in explore_by_type.get_dependent_views() 90 ], 91 "explores": explore_by_type.to_lookml(v1_name, hidden), 92 } 93 path = out_dir / (explore_name + ".explore.lkml") 94 # lkml.dump may return None, in which case write an empty file 95 path.write_text(FILE_HEADER + (lkml.dump(file_lookml) or "")) 96 return path 97 98 99def _generate_dashboard( 100 dash_dir: Path, 101 namespace: str, 102 dashboard_name: str, 103 dashboard_info: Any, 104): 105 logging.info(f"Generating lookml for dashboard {dashboard_name} in {namespace}") 106 dashboard = DASHBOARD_TYPES[dashboard_info["type"]].from_dict( 107 namespace, dashboard_name, dashboard_info 108 ) 109 110 dashboard_lookml = dashboard.to_lookml() 111 dash_path = dash_dir / f"{dashboard_name}.dashboard.lookml" 112 dash_path.write_text(FILE_HEADER + dashboard_lookml) 113 return dash_path 114 115 116def _get_views_from_dict(views: Dict[str, ViewDict], namespace: str) -> Iterable[View]: 117 for view_name, view_info in views.items(): 118 yield VIEW_TYPES[view_info["type"]].from_dict( # type: ignore 119 namespace, view_name, view_info 120 ) 121 122 123def _glean_apps_to_v1_map(glean_apps): 124 return {d["name"]: d["v1_name"] for d in glean_apps} 125 126 127def _run_generation(func): 128 """ 129 Run the partially applied generate function. 130 131 For parallel execution. 132 """ 133 return func() 134 135 136def _update_metric_repos(metric_hub_repos): 137 """Update metric hub repos when initializing the processes.""" 138 MetricsConfigLoader.update_repos(metric_hub_repos) 139 140 141def _lookml( 142 namespaces, 143 glean_apps, 144 target_dir, 145 dryrun, 146 namespace_filter=[], 147 parallelism: int = 8, 148 metric_hub_repos=[], 149): 150 namespaces_content = namespaces.read() 151 _namespaces = yaml.safe_load(namespaces_content) 152 target = Path(target_dir) 153 target.mkdir(parents=True, exist_ok=True) 154 155 # Write namespaces file to target directory, for use 156 # by the Glean Dictionary and other tools 157 with open(target / "namespaces.yaml", "w") as target_namespaces_file: 158 target_namespaces_file.write(namespaces_content) 159 160 generate_views = [] 161 generate_datagroups = [] 162 generate_explores = [] 163 generate_dashboards = [] 164 v1_mapping = _glean_apps_to_v1_map(glean_apps) 165 166 for namespace, lookml_objects in _namespaces.items(): 167 if len(namespace_filter) == 0 or namespace in namespace_filter: 168 view_dir = target / namespace / "views" 169 view_dir.mkdir(parents=True, exist_ok=True) 170 views = list( 171 _get_views_from_dict(lookml_objects.get("views", {}), namespace) 172 ) 173 174 v1_name: Optional[str] = v1_mapping.get(namespace) 175 for view in views: 176 generate_views.append( 177 partial( 178 _generate_view, 179 view_dir, 180 view, 181 v1_name, 182 dryrun, 183 ) 184 ) 185 generate_datagroups.append( 186 partial( 187 generate_datagroup, 188 view, 189 target, 190 namespace, 191 dryrun, 192 ) 193 ) 194 195 explore_dir = target / namespace / "explores" 196 explore_dir.mkdir(parents=True, exist_ok=True) 197 explores = lookml_objects.get("explores", {}) 198 generate_explores += [ 199 partial( 200 _generate_explore, 201 explore_dir, 202 namespace, 203 explore_name, 204 explore, 205 view_dir, 206 v1_name, 207 ) 208 for explore_name, explore in explores.items() 209 ] 210 211 dashboard_dir = target / namespace / "dashboards" 212 dashboard_dir.mkdir(parents=True, exist_ok=True) 213 dashboards = lookml_objects.get("dashboards", {}) 214 generate_dashboards += [ 215 partial( 216 _generate_dashboard, 217 dashboard_dir, 218 namespace, 219 dashboard_name, 220 dashboard, 221 ) 222 for dashboard_name, dashboard in dashboards.items() 223 ] 224 225 if parallelism == 1: 226 # run without using multiprocessing 227 # this is needed for the unit tests to work as mocks are not shared across processes 228 logging.info(" Generating views") 229 for generate_view_func in generate_views: 230 generate_view_func() 231 logging.info(" Generating datagroups") 232 for generate_datagroup_func in generate_datagroups: 233 generate_datagroup_func() 234 logging.info(" Generating explores") 235 for generate_explore_func in generate_explores: 236 generate_explore_func() 237 logging.info(" Generating dashboards") 238 for generate_dashboard_func in generate_dashboards: 239 generate_dashboard_func() 240 else: 241 with Pool( 242 parallelism, initializer=partial(_update_metric_repos, metric_hub_repos) 243 ) as pool: 244 logging.info(" Generating views and datagroups") 245 pool.map(_run_generation, generate_views + generate_datagroups) 246 logging.info(" Generating explores") 247 pool.map( 248 _run_generation, 249 generate_explores, 250 ) 251 logging.info(" Generating dashboards") 252 pool.map( 253 _run_generation, 254 generate_dashboards, 255 ) 256 257 258@click.command(help=__doc__) 259@click.option( 260 "--namespaces", 261 default="namespaces.yaml", 262 type=click.File(), 263 help="Path to a yaml namespaces file", 264) 265@click.option( 266 "--app-listings-uri", 267 default="https://probeinfo.telemetry.mozilla.org/v2/glean/app-listings", 268 help="URI for probeinfo service v2 glean app listings", 269) 270@click.option( 271 "--target-dir", 272 default="looker-hub/", 273 type=click.Path(), 274 help="Path to a directory where lookml will be written", 275) 276@click.option( 277 "--metric-hub-repos", 278 "--metric-hub-repos", 279 multiple=True, 280 default=[METRIC_HUB_REPO, LOOKER_METRIC_HUB_REPO], 281 help="Repos to load metric configs from.", 282) 283@click.option( 284 "--only", 285 multiple=True, 286 default=[], 287 help="List of namespace names to generate lookml for.", 288) 289@click.option( 290 "--use_cloud_function", 291 "--use-cloud-function", 292 help="Use the Cloud Function to run dry runs during LookML generation.", 293 type=bool, 294) 295@click.option( 296 "--parallelism", 297 "-p", 298 default=8, 299 type=int, 300 help="Number of processes to use for LookML generation", 301) 302def lookml( 303 namespaces, 304 app_listings_uri, 305 target_dir, 306 metric_hub_repos, 307 only, 308 use_cloud_function, 309 parallelism, 310): 311 """Generate lookml from namespaces.""" 312 if metric_hub_repos: 313 MetricsConfigLoader.update_repos(metric_hub_repos) 314 glean_apps = _get_glean_apps(app_listings_uri) 315 316 dry_run_id_token = None 317 creds = None 318 if use_cloud_function: 319 dry_run_id_token = id_token() 320 else: 321 creds = credentials() 322 323 dryrun = DryRunContext( 324 use_cloud_function=use_cloud_function, 325 id_token=dry_run_id_token, 326 credentials=creds, 327 ) 328 329 return _lookml( 330 namespaces, 331 glean_apps, 332 target_dir, 333 dryrun, 334 only, 335 parallelism, 336 metric_hub_repos, 337 )
FILE_HEADER =
'\n# *Do not manually modify this file*\n#\n# This file has been generated via https://github.com/mozilla/lookml-generator\n# You can extend this view in the looker-spoke-default project (https://github.com/mozilla/looker-spoke-default)\n\n'
lookml =
<Command lookml>
Generate lookml from namespaces.