generator.lookml

Generate lookml from namespaces.

  1"""Generate lookml from namespaces."""
  2
  3import logging
  4from functools import partial
  5from multiprocessing.pool import Pool
  6from pathlib import Path
  7from typing import Any, Dict, Iterable, Optional
  8
  9import click
 10import lkml
 11import yaml
 12
 13from generator.utils import get_file_from_looker_hub
 14
 15from .dashboards import DASHBOARD_TYPES
 16from .dryrun import DryRunContext, DryRunError, Errors, credentials, id_token
 17from .explores import EXPLORE_TYPES
 18from .metrics_utils import LOOKER_METRIC_HUB_REPO, METRIC_HUB_REPO, MetricsConfigLoader
 19from .namespaces import _get_glean_apps
 20from .views import VIEW_TYPES, View, ViewDict
 21from .views.datagroups import generate_datagroup
 22
 23FILE_HEADER = """
 24# *Do not manually modify this file*
 25#
 26# This file has been generated via https://github.com/mozilla/lookml-generator
 27# You can extend this view in the looker-spoke-default project (https://github.com/mozilla/looker-spoke-default)
 28
 29"""
 30
 31
 32def _generate_view(
 33    out_dir: Path,
 34    view: View,
 35    v1_name: Optional[str],
 36    dryrun,
 37) -> Optional[Path]:
 38    logging.info(
 39        f"Generating lookml for view {view.name} in {view.namespace} of type {view.view_type}"
 40    )
 41    path = out_dir / f"{view.name}.view.lkml"
 42
 43    try:
 44        lookml = view.to_lookml(v1_name, dryrun)
 45        if lookml == {}:
 46            return None
 47
 48        # lkml.dump may return None, in which case write an empty file
 49        path.write_text(FILE_HEADER + (lkml.dump(lookml) or ""))
 50        return path
 51    except DryRunError as e:
 52        if e.error == Errors.PERMISSION_DENIED and e.use_cloud_function:
 53            print(
 54                f"Permission error dry running {view.name}. Copy existing {path} file from looker-hub."
 55            )
 56            try:
 57                get_file_from_looker_hub(path)
 58                return path
 59            except Exception as ex:
 60                print(f"Skip generating view for {path}: {ex}")
 61                return None
 62        else:
 63            raise
 64
 65
 66def _generate_explore(
 67    out_dir: Path,
 68    namespace: str,
 69    explore_name: str,
 70    explore_info: Any,
 71    views_dir: Path,
 72    v1_name: Optional[
 73        str
 74    ],  # v1_name for Glean explores: see: https://mozilla.github.io/probe-scraper/#tag/library
 75) -> Path:
 76    logging.info(f"Generating lookml for explore {explore_name} in {namespace}")
 77    explore_by_type = EXPLORE_TYPES[explore_info["type"]].from_dict(
 78        explore_name, explore_info, views_dir
 79    )
 80
 81    hidden = explore_info.get("hidden", False)
 82
 83    datagroup_includes = []
 84    if datagroup := explore_by_type.get_datagroup():
 85        datagroup_includes += [
 86            f"/looker-hub/{namespace}/datagroups/{datagroup}.datagroup.lkml"
 87        ]
 88
 89    file_lookml = {
 90        # Looker validates all included files,
 91        # so if we're not explicit about files here, validation takes
 92        # forever as looker re-validates all views for every explore (if we used *).
 93        "includes": [
 94            f"/looker-hub/{namespace}/views/{view}.view.lkml"
 95            for view in explore_by_type.get_dependent_views()
 96        ]
 97        + datagroup_includes,
 98        "explores": explore_by_type.to_lookml(v1_name, hidden),
 99    }
100    path = out_dir / (explore_name + ".explore.lkml")
101    # lkml.dump may return None, in which case write an empty file
102    path.write_text(FILE_HEADER + (lkml.dump(file_lookml) or ""))
103    return path
104
105
106def _generate_dashboard(
107    dash_dir: Path,
108    namespace: str,
109    dashboard_name: str,
110    dashboard_info: Any,
111):
112    logging.info(f"Generating lookml for dashboard {dashboard_name} in {namespace}")
113    dashboard = DASHBOARD_TYPES[dashboard_info["type"]].from_dict(
114        namespace, dashboard_name, dashboard_info
115    )
116
117    dashboard_lookml = dashboard.to_lookml()
118    dash_path = dash_dir / f"{dashboard_name}.dashboard.lookml"
119    dash_path.write_text(FILE_HEADER + dashboard_lookml)
120    return dash_path
121
122
123def _get_views_from_dict(views: Dict[str, ViewDict], namespace: str) -> Iterable[View]:
124    for view_name, view_info in views.items():
125        yield VIEW_TYPES[view_info["type"]].from_dict(  # type: ignore
126            namespace, view_name, view_info
127        )
128
129
130def _glean_apps_to_v1_map(glean_apps):
131    return {d["name"]: d["v1_name"] for d in glean_apps}
132
133
134def _run_generation(func):
135    """
136    Run the partially applied generate function.
137
138    For parallel execution.
139    """
140    return func()
141
142
143def _update_metric_repos(metric_hub_repos):
144    """Update metric hub repos when initializing the processes."""
145    MetricsConfigLoader.update_repos(metric_hub_repos)
146
147
148def _lookml(
149    namespaces,
150    glean_apps,
151    target_dir,
152    dryrun,
153    namespace_filter=[],
154    parallelism: int = 8,
155    metric_hub_repos=[],
156):
157    namespaces_content = namespaces.read()
158    _namespaces = yaml.safe_load(namespaces_content)
159    target = Path(target_dir)
160    target.mkdir(parents=True, exist_ok=True)
161
162    # Write namespaces file to target directory, for use
163    # by the Glean Dictionary and other tools
164    with open(target / "namespaces.yaml", "w") as target_namespaces_file:
165        target_namespaces_file.write(namespaces_content)
166
167    generate_views = []
168    generate_datagroups = []
169    generate_explores = []
170    generate_dashboards = []
171    v1_mapping = _glean_apps_to_v1_map(glean_apps)
172
173    for namespace, lookml_objects in _namespaces.items():
174        if len(namespace_filter) == 0 or namespace in namespace_filter:
175            view_dir = target / namespace / "views"
176            view_dir.mkdir(parents=True, exist_ok=True)
177            views = list(
178                _get_views_from_dict(lookml_objects.get("views", {}), namespace)
179            )
180
181            v1_name: Optional[str] = v1_mapping.get(namespace)
182            for view in views:
183                generate_views.append(
184                    partial(
185                        _generate_view,
186                        view_dir,
187                        view,
188                        v1_name,
189                        dryrun,
190                    )
191                )
192                generate_datagroups.append(
193                    partial(
194                        generate_datagroup,
195                        view,
196                        target,
197                        namespace,
198                        dryrun,
199                    )
200                )
201
202            explore_dir = target / namespace / "explores"
203            explore_dir.mkdir(parents=True, exist_ok=True)
204            explores = lookml_objects.get("explores", {})
205            generate_explores += [
206                partial(
207                    _generate_explore,
208                    explore_dir,
209                    namespace,
210                    explore_name,
211                    explore,
212                    view_dir,
213                    v1_name,
214                )
215                for explore_name, explore in explores.items()
216            ]
217
218            dashboard_dir = target / namespace / "dashboards"
219            dashboard_dir.mkdir(parents=True, exist_ok=True)
220            dashboards = lookml_objects.get("dashboards", {})
221            generate_dashboards += [
222                partial(
223                    _generate_dashboard,
224                    dashboard_dir,
225                    namespace,
226                    dashboard_name,
227                    dashboard,
228                )
229                for dashboard_name, dashboard in dashboards.items()
230            ]
231
232    if parallelism == 1:
233        # run without using multiprocessing
234        # this is needed for the unit tests to work as mocks are not shared across processes
235        logging.info("  Generating views")
236        for generate_view_func in generate_views:
237            generate_view_func()
238        logging.info("  Generating datagroups")
239        for generate_datagroup_func in generate_datagroups:
240            generate_datagroup_func()
241        logging.info("  Generating explores")
242        for generate_explore_func in generate_explores:
243            generate_explore_func()
244        logging.info("  Generating dashboards")
245        for generate_dashboard_func in generate_dashboards:
246            generate_dashboard_func()
247    else:
248        with Pool(
249            parallelism, initializer=partial(_update_metric_repos, metric_hub_repos)
250        ) as pool:
251            logging.info("  Generating views and datagroups")
252            pool.map(_run_generation, generate_views + generate_datagroups)
253            logging.info("  Generating explores")
254            pool.map(
255                _run_generation,
256                generate_explores,
257            )
258            logging.info("  Generating dashboards")
259            pool.map(
260                _run_generation,
261                generate_dashboards,
262            )
263
264
265@click.command(help=__doc__)
266@click.option(
267    "--namespaces",
268    default="namespaces.yaml",
269    type=click.File(),
270    help="Path to a yaml namespaces file",
271)
272@click.option(
273    "--app-listings-uri",
274    default="https://probeinfo.telemetry.mozilla.org/v2/glean/app-listings",
275    help="URI for probeinfo service v2 glean app listings",
276)
277@click.option(
278    "--target-dir",
279    default="looker-hub/",
280    type=click.Path(),
281    help="Path to a directory where lookml will be written",
282)
283@click.option(
284    "--metric-hub-repos",
285    "--metric-hub-repos",
286    multiple=True,
287    default=[METRIC_HUB_REPO, LOOKER_METRIC_HUB_REPO],
288    help="Repos to load metric configs from.",
289)
290@click.option(
291    "--only",
292    multiple=True,
293    default=[],
294    help="List of namespace names to generate lookml for.",
295)
296@click.option(
297    "--use_cloud_function",
298    "--use-cloud-function",
299    help="Use the Cloud Function to run dry runs during LookML generation.",
300    type=bool,
301)
302@click.option(
303    "--parallelism",
304    "-p",
305    default=8,
306    type=int,
307    help="Number of processes to use for LookML generation",
308)
309def lookml(
310    namespaces,
311    app_listings_uri,
312    target_dir,
313    metric_hub_repos,
314    only,
315    use_cloud_function,
316    parallelism,
317):
318    """Generate lookml from namespaces."""
319    if metric_hub_repos:
320        MetricsConfigLoader.update_repos(metric_hub_repos)
321    glean_apps = _get_glean_apps(app_listings_uri)
322
323    dry_run_id_token = None
324    creds = None
325    if use_cloud_function:
326        dry_run_id_token = id_token()
327    else:
328        creds = credentials()
329
330    dryrun = DryRunContext(
331        use_cloud_function=use_cloud_function,
332        id_token=dry_run_id_token,
333        credentials=creds,
334    )
335
336    return _lookml(
337        namespaces,
338        glean_apps,
339        target_dir,
340        dryrun,
341        only,
342        parallelism,
343        metric_hub_repos,
344    )
FILE_HEADER = '\n# *Do not manually modify this file*\n#\n# This file has been generated via https://github.com/mozilla/lookml-generator\n# You can extend this view in the looker-spoke-default project (https://github.com/mozilla/looker-spoke-default)\n\n'
lookml = <Command lookml>

Generate lookml from namespaces.