generator.views.ping_view

Class to describe a Ping View.

  1"""Class to describe a Ping View."""
  2
  3from __future__ import annotations
  4
  5from collections import defaultdict
  6from typing import Any, Dict, Iterator, List, Optional, Union
  7
  8from . import lookml_utils
  9from .view import OMIT_VIEWS, View, ViewDict
 10
 11
 12class PingView(View):
 13    """A view on a ping table."""
 14
 15    type: str = "ping_view"
 16    allow_glean: bool = False
 17
 18    def __init__(self, namespace: str, name: str, tables: List[Dict[str, Any]]):
 19        """Create instance of a PingView."""
 20        super().__init__(namespace, name, self.__class__.type, tables)
 21
 22    @classmethod
 23    def from_db_views(
 24        klass,
 25        namespace: str,
 26        is_glean: bool,
 27        channels: List[Dict[str, str]],
 28        db_views: dict,
 29    ) -> Iterator[PingView]:
 30        """Get Looker views for a namespace."""
 31        if (klass.allow_glean and not is_glean) or (not klass.allow_glean and is_glean):
 32            return
 33
 34        view_tables: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(dict)
 35        for channel in channels:
 36            dataset = channel["dataset"]
 37
 38            for view_id, references in db_views[dataset].items():
 39                if view_id in OMIT_VIEWS:
 40                    continue
 41
 42                table_id = f"mozdata.{dataset}.{view_id}"
 43                table: Dict[str, str] = {"table": table_id}
 44                if channel.get("channel") is not None:
 45                    table["channel"] = channel["channel"]
 46
 47                # Only include those that select from a single ping source table
 48                # or union together multiple ping source tables of the same name.
 49                reference_table_names = set(r[-1] for r in references)
 50                reference_dataset_names = set(r[-2] for r in references)
 51                if (
 52                    len(reference_table_names) != 1
 53                    or channel["source_dataset"] not in reference_dataset_names
 54                ):
 55                    continue
 56
 57                view_tables[view_id][table_id] = table
 58
 59        for view_id, tables_by_id in view_tables.items():
 60            yield klass(namespace, view_id, list(tables_by_id.values()))
 61
 62    @classmethod
 63    def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> PingView:
 64        """Get a view from a name and dict definition."""
 65        return klass(namespace, name, _dict["tables"])
 66
 67    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 68        """Generate LookML for this view."""
 69        view_defn: Dict[str, Any] = {"name": self.name}
 70
 71        # use schema for the table where channel=="release" or the first one
 72        table = next(
 73            (table for table in self.tables if table.get("channel") == "release"),
 74            self.tables[0],
 75        )["table"]
 76
 77        dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun)
 78
 79        # set document id field as a primary key for joins
 80        view_defn["dimensions"] = [
 81            d if d["name"] != "document_id" else dict(**d, primary_key="yes")
 82            for d in dimensions
 83            if not lookml_utils._is_dimension_group(d)
 84        ]
 85        view_defn["dimension_groups"] = [
 86            d for d in dimensions if lookml_utils._is_dimension_group(d)
 87        ]
 88
 89        # add measures
 90        view_defn["measures"] = self.get_measures(dimensions, table, v1_name)
 91
 92        [project, dataset, table_id] = table.split(".")
 93        table_schema = dryrun.create(
 94            project=project,
 95            dataset=dataset,
 96            table=table_id,
 97        ).get_table_schema()
 98        nested_views = lookml_utils._generate_nested_dimension_views(
 99            table_schema, self.name
100        )
101
102        # Round-tripping through a dict to get an ordered deduped list.
103        suggestions = list(
104            dict.fromkeys(
105                _table["channel"] for _table in self.tables if "channel" in _table
106            )
107        )
108
109        if len(suggestions) > 1:
110            view_defn["filters"] = [
111                {
112                    "name": "channel",
113                    "type": "string",
114                    "description": "Filter by the app's channel",
115                    "sql": "{% condition %} ${TABLE}.normalized_channel {% endcondition %}",
116                    "default_value": suggestions[0],
117                    "suggestions": suggestions,
118                }
119            ]
120
121        view_defn["sql_table_name"] = f"`{table}`"
122
123        return {"views": [view_defn] + nested_views}
124
125    def get_dimensions(
126        self, table, v1_name: Optional[str], dryrun
127    ) -> List[Dict[str, Any]]:
128        """Get the set of dimensions for this view."""
129        # add dimensions and dimension groups
130        return lookml_utils._generate_dimensions(table, dryrun=dryrun)
131
132    def get_measures(
133        self, dimensions: List[dict], table: str, v1_name: Optional[str]
134    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
135        """Generate measures from a list of dimensions.
136
137        When no dimension-specific measures are found, return a single "count" measure.
138
139        Raise ClickException if dimensions result in duplicate measures.
140        """
141        # Iterate through each of the dimensions and accumulate any measures
142        # that we want to include in the view. We pull out the client id first
143        # since we'll use it to calculate per-measure client counts.
144        measures: List[Dict[str, Union[str, List[Dict[str, str]]]]] = []
145
146        client_id_field = self.get_client_id(dimensions, table)
147        if client_id_field is not None:
148            measures.append(
149                {
150                    "name": "clients",
151                    "type": "count_distinct",
152                    "sql": f"${{{client_id_field}}}",
153                }
154            )
155
156        for dimension in dimensions:
157            dimension_name = dimension["name"]
158            if dimension_name == "document_id":
159                measures += [{"name": "ping_count", "type": "count"}]
160
161        return measures
class PingView(generator.views.view.View):
 13class PingView(View):
 14    """A view on a ping table."""
 15
 16    type: str = "ping_view"
 17    allow_glean: bool = False
 18
 19    def __init__(self, namespace: str, name: str, tables: List[Dict[str, Any]]):
 20        """Create instance of a PingView."""
 21        super().__init__(namespace, name, self.__class__.type, tables)
 22
 23    @classmethod
 24    def from_db_views(
 25        klass,
 26        namespace: str,
 27        is_glean: bool,
 28        channels: List[Dict[str, str]],
 29        db_views: dict,
 30    ) -> Iterator[PingView]:
 31        """Get Looker views for a namespace."""
 32        if (klass.allow_glean and not is_glean) or (not klass.allow_glean and is_glean):
 33            return
 34
 35        view_tables: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(dict)
 36        for channel in channels:
 37            dataset = channel["dataset"]
 38
 39            for view_id, references in db_views[dataset].items():
 40                if view_id in OMIT_VIEWS:
 41                    continue
 42
 43                table_id = f"mozdata.{dataset}.{view_id}"
 44                table: Dict[str, str] = {"table": table_id}
 45                if channel.get("channel") is not None:
 46                    table["channel"] = channel["channel"]
 47
 48                # Only include those that select from a single ping source table
 49                # or union together multiple ping source tables of the same name.
 50                reference_table_names = set(r[-1] for r in references)
 51                reference_dataset_names = set(r[-2] for r in references)
 52                if (
 53                    len(reference_table_names) != 1
 54                    or channel["source_dataset"] not in reference_dataset_names
 55                ):
 56                    continue
 57
 58                view_tables[view_id][table_id] = table
 59
 60        for view_id, tables_by_id in view_tables.items():
 61            yield klass(namespace, view_id, list(tables_by_id.values()))
 62
 63    @classmethod
 64    def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> PingView:
 65        """Get a view from a name and dict definition."""
 66        return klass(namespace, name, _dict["tables"])
 67
 68    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 69        """Generate LookML for this view."""
 70        view_defn: Dict[str, Any] = {"name": self.name}
 71
 72        # use schema for the table where channel=="release" or the first one
 73        table = next(
 74            (table for table in self.tables if table.get("channel") == "release"),
 75            self.tables[0],
 76        )["table"]
 77
 78        dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun)
 79
 80        # set document id field as a primary key for joins
 81        view_defn["dimensions"] = [
 82            d if d["name"] != "document_id" else dict(**d, primary_key="yes")
 83            for d in dimensions
 84            if not lookml_utils._is_dimension_group(d)
 85        ]
 86        view_defn["dimension_groups"] = [
 87            d for d in dimensions if lookml_utils._is_dimension_group(d)
 88        ]
 89
 90        # add measures
 91        view_defn["measures"] = self.get_measures(dimensions, table, v1_name)
 92
 93        [project, dataset, table_id] = table.split(".")
 94        table_schema = dryrun.create(
 95            project=project,
 96            dataset=dataset,
 97            table=table_id,
 98        ).get_table_schema()
 99        nested_views = lookml_utils._generate_nested_dimension_views(
100            table_schema, self.name
101        )
102
103        # Round-tripping through a dict to get an ordered deduped list.
104        suggestions = list(
105            dict.fromkeys(
106                _table["channel"] for _table in self.tables if "channel" in _table
107            )
108        )
109
110        if len(suggestions) > 1:
111            view_defn["filters"] = [
112                {
113                    "name": "channel",
114                    "type": "string",
115                    "description": "Filter by the app's channel",
116                    "sql": "{% condition %} ${TABLE}.normalized_channel {% endcondition %}",
117                    "default_value": suggestions[0],
118                    "suggestions": suggestions,
119                }
120            ]
121
122        view_defn["sql_table_name"] = f"`{table}`"
123
124        return {"views": [view_defn] + nested_views}
125
126    def get_dimensions(
127        self, table, v1_name: Optional[str], dryrun
128    ) -> List[Dict[str, Any]]:
129        """Get the set of dimensions for this view."""
130        # add dimensions and dimension groups
131        return lookml_utils._generate_dimensions(table, dryrun=dryrun)
132
133    def get_measures(
134        self, dimensions: List[dict], table: str, v1_name: Optional[str]
135    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
136        """Generate measures from a list of dimensions.
137
138        When no dimension-specific measures are found, return a single "count" measure.
139
140        Raise ClickException if dimensions result in duplicate measures.
141        """
142        # Iterate through each of the dimensions and accumulate any measures
143        # that we want to include in the view. We pull out the client id first
144        # since we'll use it to calculate per-measure client counts.
145        measures: List[Dict[str, Union[str, List[Dict[str, str]]]]] = []
146
147        client_id_field = self.get_client_id(dimensions, table)
148        if client_id_field is not None:
149            measures.append(
150                {
151                    "name": "clients",
152                    "type": "count_distinct",
153                    "sql": f"${{{client_id_field}}}",
154                }
155            )
156
157        for dimension in dimensions:
158            dimension_name = dimension["name"]
159            if dimension_name == "document_id":
160                measures += [{"name": "ping_count", "type": "count"}]
161
162        return measures

A view on a ping table.

PingView(namespace: str, name: str, tables: List[Dict[str, Any]])
19    def __init__(self, namespace: str, name: str, tables: List[Dict[str, Any]]):
20        """Create instance of a PingView."""
21        super().__init__(namespace, name, self.__class__.type, tables)

Create instance of a PingView.

type: str = 'ping_view'
allow_glean: bool = False
@classmethod
def from_db_views( klass, namespace: str, is_glean: bool, channels: List[Dict[str, str]], db_views: dict) -> Iterator[PingView]:
23    @classmethod
24    def from_db_views(
25        klass,
26        namespace: str,
27        is_glean: bool,
28        channels: List[Dict[str, str]],
29        db_views: dict,
30    ) -> Iterator[PingView]:
31        """Get Looker views for a namespace."""
32        if (klass.allow_glean and not is_glean) or (not klass.allow_glean and is_glean):
33            return
34
35        view_tables: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(dict)
36        for channel in channels:
37            dataset = channel["dataset"]
38
39            for view_id, references in db_views[dataset].items():
40                if view_id in OMIT_VIEWS:
41                    continue
42
43                table_id = f"mozdata.{dataset}.{view_id}"
44                table: Dict[str, str] = {"table": table_id}
45                if channel.get("channel") is not None:
46                    table["channel"] = channel["channel"]
47
48                # Only include those that select from a single ping source table
49                # or union together multiple ping source tables of the same name.
50                reference_table_names = set(r[-1] for r in references)
51                reference_dataset_names = set(r[-2] for r in references)
52                if (
53                    len(reference_table_names) != 1
54                    or channel["source_dataset"] not in reference_dataset_names
55                ):
56                    continue
57
58                view_tables[view_id][table_id] = table
59
60        for view_id, tables_by_id in view_tables.items():
61            yield klass(namespace, view_id, list(tables_by_id.values()))

Get Looker views for a namespace.

@classmethod
def from_dict( klass, namespace: str, name: str, _dict: generator.views.view.ViewDict) -> PingView:
63    @classmethod
64    def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> PingView:
65        """Get a view from a name and dict definition."""
66        return klass(namespace, name, _dict["tables"])

Get a view from a name and dict definition.

def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 68    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 69        """Generate LookML for this view."""
 70        view_defn: Dict[str, Any] = {"name": self.name}
 71
 72        # use schema for the table where channel=="release" or the first one
 73        table = next(
 74            (table for table in self.tables if table.get("channel") == "release"),
 75            self.tables[0],
 76        )["table"]
 77
 78        dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun)
 79
 80        # set document id field as a primary key for joins
 81        view_defn["dimensions"] = [
 82            d if d["name"] != "document_id" else dict(**d, primary_key="yes")
 83            for d in dimensions
 84            if not lookml_utils._is_dimension_group(d)
 85        ]
 86        view_defn["dimension_groups"] = [
 87            d for d in dimensions if lookml_utils._is_dimension_group(d)
 88        ]
 89
 90        # add measures
 91        view_defn["measures"] = self.get_measures(dimensions, table, v1_name)
 92
 93        [project, dataset, table_id] = table.split(".")
 94        table_schema = dryrun.create(
 95            project=project,
 96            dataset=dataset,
 97            table=table_id,
 98        ).get_table_schema()
 99        nested_views = lookml_utils._generate_nested_dimension_views(
100            table_schema, self.name
101        )
102
103        # Round-tripping through a dict to get an ordered deduped list.
104        suggestions = list(
105            dict.fromkeys(
106                _table["channel"] for _table in self.tables if "channel" in _table
107            )
108        )
109
110        if len(suggestions) > 1:
111            view_defn["filters"] = [
112                {
113                    "name": "channel",
114                    "type": "string",
115                    "description": "Filter by the app's channel",
116                    "sql": "{% condition %} ${TABLE}.normalized_channel {% endcondition %}",
117                    "default_value": suggestions[0],
118                    "suggestions": suggestions,
119                }
120            ]
121
122        view_defn["sql_table_name"] = f"`{table}`"
123
124        return {"views": [view_defn] + nested_views}

Generate LookML for this view.

def get_dimensions(self, table, v1_name: Optional[str], dryrun) -> List[Dict[str, Any]]:
126    def get_dimensions(
127        self, table, v1_name: Optional[str], dryrun
128    ) -> List[Dict[str, Any]]:
129        """Get the set of dimensions for this view."""
130        # add dimensions and dimension groups
131        return lookml_utils._generate_dimensions(table, dryrun=dryrun)

Get the set of dimensions for this view.

def get_measures( self, dimensions: List[dict], table: str, v1_name: Optional[str]) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
133    def get_measures(
134        self, dimensions: List[dict], table: str, v1_name: Optional[str]
135    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
136        """Generate measures from a list of dimensions.
137
138        When no dimension-specific measures are found, return a single "count" measure.
139
140        Raise ClickException if dimensions result in duplicate measures.
141        """
142        # Iterate through each of the dimensions and accumulate any measures
143        # that we want to include in the view. We pull out the client id first
144        # since we'll use it to calculate per-measure client counts.
145        measures: List[Dict[str, Union[str, List[Dict[str, str]]]]] = []
146
147        client_id_field = self.get_client_id(dimensions, table)
148        if client_id_field is not None:
149            measures.append(
150                {
151                    "name": "clients",
152                    "type": "count_distinct",
153                    "sql": f"${{{client_id_field}}}",
154                }
155            )
156
157        for dimension in dimensions:
158            dimension_name = dimension["name"]
159            if dimension_name == "document_id":
160                measures += [{"name": "ping_count", "type": "count"}]
161
162        return measures

Generate measures from a list of dimensions.

When no dimension-specific measures are found, return a single "count" measure.

Raise ClickException if dimensions result in duplicate measures.