generator.lookml

Generate lookml from namespaces.

  1"""Generate lookml from namespaces."""
  2
  3import logging
  4from functools import partial
  5from multiprocessing.pool import Pool
  6from pathlib import Path
  7from typing import Any, Dict, Iterable, Optional
  8
  9import click
 10import lkml
 11import yaml
 12
 13from generator.utils import get_file_from_looker_hub
 14
 15from .dashboards import DASHBOARD_TYPES
 16from .dryrun import DryRunContext, DryRunError, Errors, credentials, id_token
 17from .explores import EXPLORE_TYPES
 18from .metrics_utils import LOOKER_METRIC_HUB_REPO, METRIC_HUB_REPO, MetricsConfigLoader
 19from .namespaces import _get_glean_apps
 20from .views import VIEW_TYPES, View, ViewDict
 21from .views.datagroups import generate_datagroup
 22
 23FILE_HEADER = """
 24# *Do not manually modify this file*
 25#
 26# This file has been generated via https://github.com/mozilla/lookml-generator
 27# You can extend this view in the looker-spoke-default project (https://github.com/mozilla/looker-spoke-default)
 28
 29"""
 30
 31
 32def _generate_view(
 33    out_dir: Path,
 34    view: View,
 35    v1_name: Optional[str],
 36    dryrun,
 37) -> Optional[Path]:
 38    logging.info(
 39        f"Generating lookml for view {view.name} in {view.namespace} of type {view.view_type}"
 40    )
 41    path = out_dir / f"{view.name}.view.lkml"
 42
 43    try:
 44        lookml = view.to_lookml(v1_name, dryrun)
 45        if lookml == {}:
 46            return None
 47
 48        # lkml.dump may return None, in which case write an empty file
 49        path.write_text(FILE_HEADER + (lkml.dump(lookml) or ""))
 50        return path
 51    except DryRunError as e:
 52        if e.error == Errors.PERMISSION_DENIED and e.use_cloud_function:
 53            print(
 54                f"Permission error dry running {view.name}. Copy existing {path} file from looker-hub."
 55            )
 56            try:
 57                get_file_from_looker_hub(path)
 58                return path
 59            except Exception as ex:
 60                print(f"Skip generating view for {path}: {ex}")
 61                return None
 62        else:
 63            raise
 64
 65
 66def _generate_explore(
 67    out_dir: Path,
 68    namespace: str,
 69    explore_name: str,
 70    explore_info: Any,
 71    views_dir: Path,
 72    v1_name: Optional[
 73        str
 74    ],  # v1_name for Glean explores: see: https://mozilla.github.io/probe-scraper/#tag/library
 75) -> Path:
 76    logging.info(f"Generating lookml for explore {explore_name} in {namespace}")
 77    explore_by_type = EXPLORE_TYPES[explore_info["type"]].from_dict(
 78        explore_name, explore_info, views_dir
 79    )
 80
 81    hidden = explore_info.get("hidden", False)
 82
 83    file_lookml = {
 84        # Looker validates all included files,
 85        # so if we're not explicit about files here, validation takes
 86        # forever as looker re-validates all views for every explore (if we used *).
 87        "includes": [
 88            f"/looker-hub/{namespace}/views/{view}.view.lkml"
 89            for view in explore_by_type.get_dependent_views()
 90        ],
 91        "explores": explore_by_type.to_lookml(v1_name, hidden),
 92    }
 93    path = out_dir / (explore_name + ".explore.lkml")
 94    # lkml.dump may return None, in which case write an empty file
 95    path.write_text(FILE_HEADER + (lkml.dump(file_lookml) or ""))
 96    return path
 97
 98
 99def _generate_dashboard(
100    dash_dir: Path,
101    namespace: str,
102    dashboard_name: str,
103    dashboard_info: Any,
104):
105    logging.info(f"Generating lookml for dashboard {dashboard_name} in {namespace}")
106    dashboard = DASHBOARD_TYPES[dashboard_info["type"]].from_dict(
107        namespace, dashboard_name, dashboard_info
108    )
109
110    dashboard_lookml = dashboard.to_lookml()
111    dash_path = dash_dir / f"{dashboard_name}.dashboard.lookml"
112    dash_path.write_text(FILE_HEADER + dashboard_lookml)
113    return dash_path
114
115
116def _get_views_from_dict(views: Dict[str, ViewDict], namespace: str) -> Iterable[View]:
117    for view_name, view_info in views.items():
118        yield VIEW_TYPES[view_info["type"]].from_dict(  # type: ignore
119            namespace, view_name, view_info
120        )
121
122
123def _glean_apps_to_v1_map(glean_apps):
124    return {d["name"]: d["v1_name"] for d in glean_apps}
125
126
127def _run_generation(func):
128    """
129    Run the partially applied generate function.
130
131    For parallel execution.
132    """
133    return func()
134
135
136def _update_metric_repos(metric_hub_repos):
137    """Update metric hub repos when initializing the processes."""
138    MetricsConfigLoader.update_repos(metric_hub_repos)
139
140
141def _lookml(
142    namespaces,
143    glean_apps,
144    target_dir,
145    dryrun,
146    namespace_filter=[],
147    parallelism: int = 8,
148    metric_hub_repos=[],
149):
150    namespaces_content = namespaces.read()
151    _namespaces = yaml.safe_load(namespaces_content)
152    target = Path(target_dir)
153    target.mkdir(parents=True, exist_ok=True)
154
155    # Write namespaces file to target directory, for use
156    # by the Glean Dictionary and other tools
157    with open(target / "namespaces.yaml", "w") as target_namespaces_file:
158        target_namespaces_file.write(namespaces_content)
159
160    generate_views = []
161    generate_datagroups = []
162    generate_explores = []
163    generate_dashboards = []
164    v1_mapping = _glean_apps_to_v1_map(glean_apps)
165
166    for namespace, lookml_objects in _namespaces.items():
167        if len(namespace_filter) == 0 or namespace in namespace_filter:
168            view_dir = target / namespace / "views"
169            view_dir.mkdir(parents=True, exist_ok=True)
170            views = list(
171                _get_views_from_dict(lookml_objects.get("views", {}), namespace)
172            )
173
174            v1_name: Optional[str] = v1_mapping.get(namespace)
175            for view in views:
176                generate_views.append(
177                    partial(
178                        _generate_view,
179                        view_dir,
180                        view,
181                        v1_name,
182                        dryrun,
183                    )
184                )
185                generate_datagroups.append(
186                    partial(
187                        generate_datagroup,
188                        view,
189                        target,
190                        namespace,
191                        dryrun,
192                    )
193                )
194
195            explore_dir = target / namespace / "explores"
196            explore_dir.mkdir(parents=True, exist_ok=True)
197            explores = lookml_objects.get("explores", {})
198            generate_explores += [
199                partial(
200                    _generate_explore,
201                    explore_dir,
202                    namespace,
203                    explore_name,
204                    explore,
205                    view_dir,
206                    v1_name,
207                )
208                for explore_name, explore in explores.items()
209            ]
210
211            dashboard_dir = target / namespace / "dashboards"
212            dashboard_dir.mkdir(parents=True, exist_ok=True)
213            dashboards = lookml_objects.get("dashboards", {})
214            generate_dashboards += [
215                partial(
216                    _generate_dashboard,
217                    dashboard_dir,
218                    namespace,
219                    dashboard_name,
220                    dashboard,
221                )
222                for dashboard_name, dashboard in dashboards.items()
223            ]
224
225    if parallelism == 1:
226        # run without using multiprocessing
227        # this is needed for the unit tests to work as mocks are not shared across processes
228        logging.info("  Generating views")
229        for generate_view_func in generate_views:
230            generate_view_func()
231        logging.info("  Generating datagroups")
232        for generate_datagroup_func in generate_datagroups:
233            generate_datagroup_func()
234        logging.info("  Generating explores")
235        for generate_explore_func in generate_explores:
236            generate_explore_func()
237        logging.info("  Generating dashboards")
238        for generate_dashboard_func in generate_dashboards:
239            generate_dashboard_func()
240    else:
241        with Pool(
242            parallelism, initializer=partial(_update_metric_repos, metric_hub_repos)
243        ) as pool:
244            logging.info("  Generating views and datagroups")
245            pool.map(_run_generation, generate_views + generate_datagroups)
246            logging.info("  Generating explores")
247            pool.map(
248                _run_generation,
249                generate_explores,
250            )
251            logging.info("  Generating dashboards")
252            pool.map(
253                _run_generation,
254                generate_dashboards,
255            )
256
257
258@click.command(help=__doc__)
259@click.option(
260    "--namespaces",
261    default="namespaces.yaml",
262    type=click.File(),
263    help="Path to a yaml namespaces file",
264)
265@click.option(
266    "--app-listings-uri",
267    default="https://probeinfo.telemetry.mozilla.org/v2/glean/app-listings",
268    help="URI for probeinfo service v2 glean app listings",
269)
270@click.option(
271    "--target-dir",
272    default="looker-hub/",
273    type=click.Path(),
274    help="Path to a directory where lookml will be written",
275)
276@click.option(
277    "--metric-hub-repos",
278    "--metric-hub-repos",
279    multiple=True,
280    default=[METRIC_HUB_REPO, LOOKER_METRIC_HUB_REPO],
281    help="Repos to load metric configs from.",
282)
283@click.option(
284    "--only",
285    multiple=True,
286    default=[],
287    help="List of namespace names to generate lookml for.",
288)
289@click.option(
290    "--use_cloud_function",
291    "--use-cloud-function",
292    help="Use the Cloud Function to run dry runs during LookML generation.",
293    type=bool,
294)
295@click.option(
296    "--parallelism",
297    "-p",
298    default=8,
299    type=int,
300    help="Number of processes to use for LookML generation",
301)
302def lookml(
303    namespaces,
304    app_listings_uri,
305    target_dir,
306    metric_hub_repos,
307    only,
308    use_cloud_function,
309    parallelism,
310):
311    """Generate lookml from namespaces."""
312    if metric_hub_repos:
313        MetricsConfigLoader.update_repos(metric_hub_repos)
314    glean_apps = _get_glean_apps(app_listings_uri)
315
316    dry_run_id_token = None
317    creds = None
318    if use_cloud_function:
319        dry_run_id_token = id_token()
320    else:
321        creds = credentials()
322
323    dryrun = DryRunContext(
324        use_cloud_function=use_cloud_function,
325        id_token=dry_run_id_token,
326        credentials=creds,
327    )
328
329    return _lookml(
330        namespaces,
331        glean_apps,
332        target_dir,
333        dryrun,
334        only,
335        parallelism,
336        metric_hub_repos,
337    )
FILE_HEADER = '\n# *Do not manually modify this file*\n#\n# This file has been generated via https://github.com/mozilla/lookml-generator\n# You can extend this view in the looker-spoke-default project (https://github.com/mozilla/looker-spoke-default)\n\n'
lookml = <Command lookml>

Generate lookml from namespaces.