generator.lookml
Generate lookml from namespaces.
1"""Generate lookml from namespaces.""" 2 3import logging 4from functools import partial 5from multiprocessing.pool import Pool 6from pathlib import Path 7from typing import Any, Dict, Iterable, Optional 8 9import click 10import lkml 11import yaml 12 13from generator.utils import get_file_from_looker_hub 14 15from .dashboards import DASHBOARD_TYPES 16from .dryrun import DryRunContext, DryRunError, Errors, credentials, id_token 17from .explores import EXPLORE_TYPES 18from .metrics_utils import LOOKER_METRIC_HUB_REPO, METRIC_HUB_REPO, MetricsConfigLoader 19from .namespaces import _get_glean_apps 20from .views import VIEW_TYPES, View, ViewDict 21from .views.datagroups import generate_datagroup 22 23FILE_HEADER = """ 24# *Do not manually modify this file* 25# 26# This file has been generated via https://github.com/mozilla/lookml-generator 27# You can extend this view in the looker-spoke-default project (https://github.com/mozilla/looker-spoke-default) 28 29""" 30 31 32def _generate_view( 33 out_dir: Path, 34 view: View, 35 v1_name: Optional[str], 36 dryrun, 37) -> Optional[Path]: 38 logging.info( 39 f"Generating lookml for view {view.name} in {view.namespace} of type {view.view_type}" 40 ) 41 path = out_dir / f"{view.name}.view.lkml" 42 43 try: 44 lookml = view.to_lookml(v1_name, dryrun) 45 if lookml == {}: 46 return None 47 48 # lkml.dump may return None, in which case write an empty file 49 path.write_text(FILE_HEADER + (lkml.dump(lookml) or "")) 50 return path 51 except DryRunError as e: 52 if e.error == Errors.PERMISSION_DENIED and e.use_cloud_function: 53 print( 54 f"Permission error dry running {view.name}. Copy existing {path} file from looker-hub." 55 ) 56 try: 57 get_file_from_looker_hub(path) 58 return path 59 except Exception as ex: 60 print(f"Skip generating view for {path}: {ex}") 61 return None 62 else: 63 raise 64 65 66def _generate_explore( 67 out_dir: Path, 68 namespace: str, 69 explore_name: str, 70 explore_info: Any, 71 views_dir: Path, 72 v1_name: Optional[ 73 str 74 ], # v1_name for Glean explores: see: https://mozilla.github.io/probe-scraper/#tag/library 75) -> Path: 76 logging.info(f"Generating lookml for explore {explore_name} in {namespace}") 77 explore_by_type = EXPLORE_TYPES[explore_info["type"]].from_dict( 78 explore_name, explore_info, views_dir 79 ) 80 81 hidden = explore_info.get("hidden", False) 82 83 datagroup_includes = [] 84 if datagroup := explore_by_type.get_datagroup(): 85 datagroup_includes += [ 86 f"/looker-hub/{namespace}/datagroups/{datagroup}.datagroup.lkml" 87 ] 88 89 file_lookml = { 90 # Looker validates all included files, 91 # so if we're not explicit about files here, validation takes 92 # forever as looker re-validates all views for every explore (if we used *). 93 "includes": [ 94 f"/looker-hub/{namespace}/views/{view}.view.lkml" 95 for view in explore_by_type.get_dependent_views() 96 ] 97 + datagroup_includes, 98 "explores": explore_by_type.to_lookml(v1_name, hidden), 99 } 100 path = out_dir / (explore_name + ".explore.lkml") 101 # lkml.dump may return None, in which case write an empty file 102 path.write_text(FILE_HEADER + (lkml.dump(file_lookml) or "")) 103 return path 104 105 106def _generate_dashboard( 107 dash_dir: Path, 108 namespace: str, 109 dashboard_name: str, 110 dashboard_info: Any, 111): 112 logging.info(f"Generating lookml for dashboard {dashboard_name} in {namespace}") 113 dashboard = DASHBOARD_TYPES[dashboard_info["type"]].from_dict( 114 namespace, dashboard_name, dashboard_info 115 ) 116 117 dashboard_lookml = dashboard.to_lookml() 118 dash_path = dash_dir / f"{dashboard_name}.dashboard.lookml" 119 dash_path.write_text(FILE_HEADER + dashboard_lookml) 120 return dash_path 121 122 123def _get_views_from_dict(views: Dict[str, ViewDict], namespace: str) -> Iterable[View]: 124 for view_name, view_info in views.items(): 125 yield VIEW_TYPES[view_info["type"]].from_dict( # type: ignore 126 namespace, view_name, view_info 127 ) 128 129 130def _glean_apps_to_v1_map(glean_apps): 131 return {d["name"]: d["v1_name"] for d in glean_apps} 132 133 134def _run_generation(func): 135 """ 136 Run the partially applied generate function. 137 138 For parallel execution. 139 """ 140 return func() 141 142 143def _update_metric_repos(metric_hub_repos): 144 """Update metric hub repos when initializing the processes.""" 145 MetricsConfigLoader.update_repos(metric_hub_repos) 146 147 148def _lookml( 149 namespaces, 150 glean_apps, 151 target_dir, 152 dryrun, 153 namespace_filter=[], 154 parallelism: int = 8, 155 metric_hub_repos=[], 156): 157 namespaces_content = namespaces.read() 158 _namespaces = yaml.safe_load(namespaces_content) 159 target = Path(target_dir) 160 target.mkdir(parents=True, exist_ok=True) 161 162 # Write namespaces file to target directory, for use 163 # by the Glean Dictionary and other tools 164 with open(target / "namespaces.yaml", "w") as target_namespaces_file: 165 target_namespaces_file.write(namespaces_content) 166 167 generate_views = [] 168 generate_datagroups = [] 169 generate_explores = [] 170 generate_dashboards = [] 171 v1_mapping = _glean_apps_to_v1_map(glean_apps) 172 173 for namespace, lookml_objects in _namespaces.items(): 174 if len(namespace_filter) == 0 or namespace in namespace_filter: 175 view_dir = target / namespace / "views" 176 view_dir.mkdir(parents=True, exist_ok=True) 177 views = list( 178 _get_views_from_dict(lookml_objects.get("views", {}), namespace) 179 ) 180 181 v1_name: Optional[str] = v1_mapping.get(namespace) 182 for view in views: 183 generate_views.append( 184 partial( 185 _generate_view, 186 view_dir, 187 view, 188 v1_name, 189 dryrun, 190 ) 191 ) 192 generate_datagroups.append( 193 partial( 194 generate_datagroup, 195 view, 196 target, 197 namespace, 198 dryrun, 199 ) 200 ) 201 202 explore_dir = target / namespace / "explores" 203 explore_dir.mkdir(parents=True, exist_ok=True) 204 explores = lookml_objects.get("explores", {}) 205 generate_explores += [ 206 partial( 207 _generate_explore, 208 explore_dir, 209 namespace, 210 explore_name, 211 explore, 212 view_dir, 213 v1_name, 214 ) 215 for explore_name, explore in explores.items() 216 ] 217 218 dashboard_dir = target / namespace / "dashboards" 219 dashboard_dir.mkdir(parents=True, exist_ok=True) 220 dashboards = lookml_objects.get("dashboards", {}) 221 generate_dashboards += [ 222 partial( 223 _generate_dashboard, 224 dashboard_dir, 225 namespace, 226 dashboard_name, 227 dashboard, 228 ) 229 for dashboard_name, dashboard in dashboards.items() 230 ] 231 232 if parallelism == 1: 233 # run without using multiprocessing 234 # this is needed for the unit tests to work as mocks are not shared across processes 235 logging.info(" Generating views") 236 for generate_view_func in generate_views: 237 generate_view_func() 238 logging.info(" Generating datagroups") 239 for generate_datagroup_func in generate_datagroups: 240 generate_datagroup_func() 241 logging.info(" Generating explores") 242 for generate_explore_func in generate_explores: 243 generate_explore_func() 244 logging.info(" Generating dashboards") 245 for generate_dashboard_func in generate_dashboards: 246 generate_dashboard_func() 247 else: 248 with Pool( 249 parallelism, initializer=partial(_update_metric_repos, metric_hub_repos) 250 ) as pool: 251 logging.info(" Generating views and datagroups") 252 pool.map(_run_generation, generate_views + generate_datagroups) 253 logging.info(" Generating explores") 254 pool.map( 255 _run_generation, 256 generate_explores, 257 ) 258 logging.info(" Generating dashboards") 259 pool.map( 260 _run_generation, 261 generate_dashboards, 262 ) 263 264 265@click.command(help=__doc__) 266@click.option( 267 "--namespaces", 268 default="namespaces.yaml", 269 type=click.File(), 270 help="Path to a yaml namespaces file", 271) 272@click.option( 273 "--app-listings-uri", 274 default="https://probeinfo.telemetry.mozilla.org/v2/glean/app-listings", 275 help="URI for probeinfo service v2 glean app listings", 276) 277@click.option( 278 "--target-dir", 279 default="looker-hub/", 280 type=click.Path(), 281 help="Path to a directory where lookml will be written", 282) 283@click.option( 284 "--metric-hub-repos", 285 "--metric-hub-repos", 286 multiple=True, 287 default=[METRIC_HUB_REPO, LOOKER_METRIC_HUB_REPO], 288 help="Repos to load metric configs from.", 289) 290@click.option( 291 "--only", 292 multiple=True, 293 default=[], 294 help="List of namespace names to generate lookml for.", 295) 296@click.option( 297 "--use_cloud_function", 298 "--use-cloud-function", 299 help="Use the Cloud Function to run dry runs during LookML generation.", 300 type=bool, 301) 302@click.option( 303 "--parallelism", 304 "-p", 305 default=8, 306 type=int, 307 help="Number of processes to use for LookML generation", 308) 309def lookml( 310 namespaces, 311 app_listings_uri, 312 target_dir, 313 metric_hub_repos, 314 only, 315 use_cloud_function, 316 parallelism, 317): 318 """Generate lookml from namespaces.""" 319 if metric_hub_repos: 320 MetricsConfigLoader.update_repos(metric_hub_repos) 321 glean_apps = _get_glean_apps(app_listings_uri) 322 323 dry_run_id_token = None 324 creds = None 325 if use_cloud_function: 326 dry_run_id_token = id_token() 327 else: 328 creds = credentials() 329 330 dryrun = DryRunContext( 331 use_cloud_function=use_cloud_function, 332 id_token=dry_run_id_token, 333 credentials=creds, 334 ) 335 336 return _lookml( 337 namespaces, 338 glean_apps, 339 target_dir, 340 dryrun, 341 only, 342 parallelism, 343 metric_hub_repos, 344 )
FILE_HEADER =
'\n# *Do not manually modify this file*\n#\n# This file has been generated via https://github.com/mozilla/lookml-generator\n# You can extend this view in the looker-spoke-default project (https://github.com/mozilla/looker-spoke-default)\n\n'
lookml =
<Command lookml>
Generate lookml from namespaces.