generator.views.ping_view

Class to describe a Ping View.

  1"""Class to describe a Ping View."""
  2
  3from __future__ import annotations
  4
  5from collections import defaultdict
  6from typing import Any, Dict, Iterator, List, Optional, Union
  7
  8from . import lookml_utils
  9from .view import OMIT_VIEWS, View, ViewDict
 10
 11
 12class PingView(View):
 13    """A view on a ping table."""
 14
 15    type: str = "ping_view"
 16    allow_glean: bool = False
 17
 18    def __init__(self, namespace: str, name: str, tables: List[Dict[str, Any]]):
 19        """Create instance of a PingView."""
 20        super().__init__(namespace, name, self.__class__.type, tables)
 21
 22    @classmethod
 23    def from_db_views(
 24        klass,
 25        namespace: str,
 26        is_glean: bool,
 27        channels: List[Dict[str, str]],
 28        db_views: dict,
 29    ) -> Iterator[PingView]:
 30        """Get Looker views for a namespace."""
 31        if (klass.allow_glean and not is_glean) or (not klass.allow_glean and is_glean):
 32            return
 33
 34        view_tables: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(dict)
 35        for channel in channels:
 36            dataset = channel["dataset"]
 37
 38            for view_id, references in db_views[dataset].items():
 39                if view_id in OMIT_VIEWS:
 40                    continue
 41
 42                table_id = f"mozdata.{dataset}.{view_id}"
 43                table: Dict[str, str] = {"table": table_id}
 44                if channel.get("channel") is not None:
 45                    table["channel"] = channel["channel"]
 46
 47                # Only include those that select from a single ping source table
 48                # or union together multiple ping source tables of the same name.
 49                reference_table_names = set(r[-1] for r in references)
 50                reference_dataset_names = set(r[-2] for r in references)
 51                if len(reference_table_names) != 1 or (
 52                    channel["source_dataset"] not in reference_dataset_names
 53                    # Temporary hack to keep generating "ping views" for apps' `events_stream` union views which now
 54                    # select from `events_stream_v1` derived tables after https://github.com/mozilla/bigquery-etl/pull/8361.
 55                    # These `events_stream` "ping views" shouldn't have been generated in the first place, but they
 56                    # are currently being relied on (https://bugzilla.mozilla.org/show_bug.cgi?id=1997588).
 57                    # TODO: Remove this hack when implementing https://mozilla-hub.atlassian.net/browse/DENG-9548.
 58                    and not (
 59                        view_id == "events_stream"
 60                        and (channel["source_dataset"] + "_derived")
 61                        in reference_dataset_names
 62                    )
 63                ):
 64                    continue
 65
 66                view_tables[view_id][table_id] = table
 67
 68        for view_id, tables_by_id in view_tables.items():
 69            yield klass(namespace, view_id, list(tables_by_id.values()))
 70
 71    @classmethod
 72    def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> PingView:
 73        """Get a view from a name and dict definition."""
 74        return klass(namespace, name, _dict["tables"])
 75
 76    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 77        """Generate LookML for this view."""
 78        view_defn: Dict[str, Any] = {"name": self.name}
 79
 80        # use schema for the table where channel=="release" or the first one
 81        table = next(
 82            (table for table in self.tables if table.get("channel") == "release"),
 83            self.tables[0],
 84        )["table"]
 85
 86        dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun)
 87
 88        # set document id field as a primary key for joins
 89        view_defn["dimensions"] = [
 90            d if d["name"] != "document_id" else dict(**d, primary_key="yes")
 91            for d in dimensions
 92            if not lookml_utils._is_dimension_group(d)
 93        ]
 94        view_defn["dimension_groups"] = [
 95            d for d in dimensions if lookml_utils._is_dimension_group(d)
 96        ]
 97
 98        # add measures
 99        view_defn["measures"] = self.get_measures(dimensions, table, v1_name)
100
101        [project, dataset, table_id] = table.split(".")
102        table_schema = dryrun.create(
103            project=project,
104            dataset=dataset,
105            table=table_id,
106        ).get_table_schema()
107        nested_views = lookml_utils._generate_nested_dimension_views(
108            table_schema, self.name
109        )
110
111        # Round-tripping through a dict to get an ordered deduped list.
112        suggestions = list(
113            dict.fromkeys(
114                _table["channel"] for _table in self.tables if "channel" in _table
115            )
116        )
117
118        if len(suggestions) > 1:
119            view_defn["filters"] = [
120                {
121                    "name": "channel",
122                    "type": "string",
123                    "description": "Filter by the app's channel",
124                    "sql": "{% condition %} ${TABLE}.normalized_channel {% endcondition %}",
125                    "default_value": suggestions[0],
126                    "suggestions": suggestions,
127                }
128            ]
129
130        view_defn["sql_table_name"] = f"`{table}`"
131
132        return {"views": [view_defn] + nested_views}
133
134    def get_dimensions(
135        self, table, v1_name: Optional[str], dryrun
136    ) -> List[Dict[str, Any]]:
137        """Get the set of dimensions for this view."""
138        # add dimensions and dimension groups
139        return lookml_utils._generate_dimensions(table, dryrun=dryrun)
140
141    def get_measures(
142        self, dimensions: List[dict], table: str, v1_name: Optional[str]
143    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
144        """Generate measures from a list of dimensions.
145
146        When no dimension-specific measures are found, return a single "count" measure.
147
148        Raise ClickException if dimensions result in duplicate measures.
149        """
150        # Iterate through each of the dimensions and accumulate any measures
151        # that we want to include in the view. We pull out the client id first
152        # since we'll use it to calculate per-measure client counts.
153        measures: List[Dict[str, Union[str, List[Dict[str, str]]]]] = []
154
155        client_id_field = self.get_client_id(dimensions, table)
156        if client_id_field is not None:
157            measures.append(
158                {
159                    "name": "clients",
160                    "type": "count_distinct",
161                    "sql": f"${{{client_id_field}}}",
162                }
163            )
164
165        for dimension in dimensions:
166            dimension_name = dimension["name"]
167            if dimension_name == "document_id":
168                measures += [{"name": "ping_count", "type": "count"}]
169
170        return measures
class PingView(generator.views.view.View):
 13class PingView(View):
 14    """A view on a ping table."""
 15
 16    type: str = "ping_view"
 17    allow_glean: bool = False
 18
 19    def __init__(self, namespace: str, name: str, tables: List[Dict[str, Any]]):
 20        """Create instance of a PingView."""
 21        super().__init__(namespace, name, self.__class__.type, tables)
 22
 23    @classmethod
 24    def from_db_views(
 25        klass,
 26        namespace: str,
 27        is_glean: bool,
 28        channels: List[Dict[str, str]],
 29        db_views: dict,
 30    ) -> Iterator[PingView]:
 31        """Get Looker views for a namespace."""
 32        if (klass.allow_glean and not is_glean) or (not klass.allow_glean and is_glean):
 33            return
 34
 35        view_tables: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(dict)
 36        for channel in channels:
 37            dataset = channel["dataset"]
 38
 39            for view_id, references in db_views[dataset].items():
 40                if view_id in OMIT_VIEWS:
 41                    continue
 42
 43                table_id = f"mozdata.{dataset}.{view_id}"
 44                table: Dict[str, str] = {"table": table_id}
 45                if channel.get("channel") is not None:
 46                    table["channel"] = channel["channel"]
 47
 48                # Only include those that select from a single ping source table
 49                # or union together multiple ping source tables of the same name.
 50                reference_table_names = set(r[-1] for r in references)
 51                reference_dataset_names = set(r[-2] for r in references)
 52                if len(reference_table_names) != 1 or (
 53                    channel["source_dataset"] not in reference_dataset_names
 54                    # Temporary hack to keep generating "ping views" for apps' `events_stream` union views which now
 55                    # select from `events_stream_v1` derived tables after https://github.com/mozilla/bigquery-etl/pull/8361.
 56                    # These `events_stream` "ping views" shouldn't have been generated in the first place, but they
 57                    # are currently being relied on (https://bugzilla.mozilla.org/show_bug.cgi?id=1997588).
 58                    # TODO: Remove this hack when implementing https://mozilla-hub.atlassian.net/browse/DENG-9548.
 59                    and not (
 60                        view_id == "events_stream"
 61                        and (channel["source_dataset"] + "_derived")
 62                        in reference_dataset_names
 63                    )
 64                ):
 65                    continue
 66
 67                view_tables[view_id][table_id] = table
 68
 69        for view_id, tables_by_id in view_tables.items():
 70            yield klass(namespace, view_id, list(tables_by_id.values()))
 71
 72    @classmethod
 73    def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> PingView:
 74        """Get a view from a name and dict definition."""
 75        return klass(namespace, name, _dict["tables"])
 76
 77    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 78        """Generate LookML for this view."""
 79        view_defn: Dict[str, Any] = {"name": self.name}
 80
 81        # use schema for the table where channel=="release" or the first one
 82        table = next(
 83            (table for table in self.tables if table.get("channel") == "release"),
 84            self.tables[0],
 85        )["table"]
 86
 87        dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun)
 88
 89        # set document id field as a primary key for joins
 90        view_defn["dimensions"] = [
 91            d if d["name"] != "document_id" else dict(**d, primary_key="yes")
 92            for d in dimensions
 93            if not lookml_utils._is_dimension_group(d)
 94        ]
 95        view_defn["dimension_groups"] = [
 96            d for d in dimensions if lookml_utils._is_dimension_group(d)
 97        ]
 98
 99        # add measures
100        view_defn["measures"] = self.get_measures(dimensions, table, v1_name)
101
102        [project, dataset, table_id] = table.split(".")
103        table_schema = dryrun.create(
104            project=project,
105            dataset=dataset,
106            table=table_id,
107        ).get_table_schema()
108        nested_views = lookml_utils._generate_nested_dimension_views(
109            table_schema, self.name
110        )
111
112        # Round-tripping through a dict to get an ordered deduped list.
113        suggestions = list(
114            dict.fromkeys(
115                _table["channel"] for _table in self.tables if "channel" in _table
116            )
117        )
118
119        if len(suggestions) > 1:
120            view_defn["filters"] = [
121                {
122                    "name": "channel",
123                    "type": "string",
124                    "description": "Filter by the app's channel",
125                    "sql": "{% condition %} ${TABLE}.normalized_channel {% endcondition %}",
126                    "default_value": suggestions[0],
127                    "suggestions": suggestions,
128                }
129            ]
130
131        view_defn["sql_table_name"] = f"`{table}`"
132
133        return {"views": [view_defn] + nested_views}
134
135    def get_dimensions(
136        self, table, v1_name: Optional[str], dryrun
137    ) -> List[Dict[str, Any]]:
138        """Get the set of dimensions for this view."""
139        # add dimensions and dimension groups
140        return lookml_utils._generate_dimensions(table, dryrun=dryrun)
141
142    def get_measures(
143        self, dimensions: List[dict], table: str, v1_name: Optional[str]
144    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
145        """Generate measures from a list of dimensions.
146
147        When no dimension-specific measures are found, return a single "count" measure.
148
149        Raise ClickException if dimensions result in duplicate measures.
150        """
151        # Iterate through each of the dimensions and accumulate any measures
152        # that we want to include in the view. We pull out the client id first
153        # since we'll use it to calculate per-measure client counts.
154        measures: List[Dict[str, Union[str, List[Dict[str, str]]]]] = []
155
156        client_id_field = self.get_client_id(dimensions, table)
157        if client_id_field is not None:
158            measures.append(
159                {
160                    "name": "clients",
161                    "type": "count_distinct",
162                    "sql": f"${{{client_id_field}}}",
163                }
164            )
165
166        for dimension in dimensions:
167            dimension_name = dimension["name"]
168            if dimension_name == "document_id":
169                measures += [{"name": "ping_count", "type": "count"}]
170
171        return measures

A view on a ping table.

PingView(namespace: str, name: str, tables: List[Dict[str, Any]])
19    def __init__(self, namespace: str, name: str, tables: List[Dict[str, Any]]):
20        """Create instance of a PingView."""
21        super().__init__(namespace, name, self.__class__.type, tables)

Create instance of a PingView.

type: str = 'ping_view'
allow_glean: bool = False
@classmethod
def from_db_views( klass, namespace: str, is_glean: bool, channels: List[Dict[str, str]], db_views: dict) -> Iterator[PingView]:
23    @classmethod
24    def from_db_views(
25        klass,
26        namespace: str,
27        is_glean: bool,
28        channels: List[Dict[str, str]],
29        db_views: dict,
30    ) -> Iterator[PingView]:
31        """Get Looker views for a namespace."""
32        if (klass.allow_glean and not is_glean) or (not klass.allow_glean and is_glean):
33            return
34
35        view_tables: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(dict)
36        for channel in channels:
37            dataset = channel["dataset"]
38
39            for view_id, references in db_views[dataset].items():
40                if view_id in OMIT_VIEWS:
41                    continue
42
43                table_id = f"mozdata.{dataset}.{view_id}"
44                table: Dict[str, str] = {"table": table_id}
45                if channel.get("channel") is not None:
46                    table["channel"] = channel["channel"]
47
48                # Only include those that select from a single ping source table
49                # or union together multiple ping source tables of the same name.
50                reference_table_names = set(r[-1] for r in references)
51                reference_dataset_names = set(r[-2] for r in references)
52                if len(reference_table_names) != 1 or (
53                    channel["source_dataset"] not in reference_dataset_names
54                    # Temporary hack to keep generating "ping views" for apps' `events_stream` union views which now
55                    # select from `events_stream_v1` derived tables after https://github.com/mozilla/bigquery-etl/pull/8361.
56                    # These `events_stream` "ping views" shouldn't have been generated in the first place, but they
57                    # are currently being relied on (https://bugzilla.mozilla.org/show_bug.cgi?id=1997588).
58                    # TODO: Remove this hack when implementing https://mozilla-hub.atlassian.net/browse/DENG-9548.
59                    and not (
60                        view_id == "events_stream"
61                        and (channel["source_dataset"] + "_derived")
62                        in reference_dataset_names
63                    )
64                ):
65                    continue
66
67                view_tables[view_id][table_id] = table
68
69        for view_id, tables_by_id in view_tables.items():
70            yield klass(namespace, view_id, list(tables_by_id.values()))

Get Looker views for a namespace.

@classmethod
def from_dict( klass, namespace: str, name: str, _dict: generator.views.view.ViewDict) -> PingView:
72    @classmethod
73    def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> PingView:
74        """Get a view from a name and dict definition."""
75        return klass(namespace, name, _dict["tables"])

Get a view from a name and dict definition.

def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 77    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 78        """Generate LookML for this view."""
 79        view_defn: Dict[str, Any] = {"name": self.name}
 80
 81        # use schema for the table where channel=="release" or the first one
 82        table = next(
 83            (table for table in self.tables if table.get("channel") == "release"),
 84            self.tables[0],
 85        )["table"]
 86
 87        dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun)
 88
 89        # set document id field as a primary key for joins
 90        view_defn["dimensions"] = [
 91            d if d["name"] != "document_id" else dict(**d, primary_key="yes")
 92            for d in dimensions
 93            if not lookml_utils._is_dimension_group(d)
 94        ]
 95        view_defn["dimension_groups"] = [
 96            d for d in dimensions if lookml_utils._is_dimension_group(d)
 97        ]
 98
 99        # add measures
100        view_defn["measures"] = self.get_measures(dimensions, table, v1_name)
101
102        [project, dataset, table_id] = table.split(".")
103        table_schema = dryrun.create(
104            project=project,
105            dataset=dataset,
106            table=table_id,
107        ).get_table_schema()
108        nested_views = lookml_utils._generate_nested_dimension_views(
109            table_schema, self.name
110        )
111
112        # Round-tripping through a dict to get an ordered deduped list.
113        suggestions = list(
114            dict.fromkeys(
115                _table["channel"] for _table in self.tables if "channel" in _table
116            )
117        )
118
119        if len(suggestions) > 1:
120            view_defn["filters"] = [
121                {
122                    "name": "channel",
123                    "type": "string",
124                    "description": "Filter by the app's channel",
125                    "sql": "{% condition %} ${TABLE}.normalized_channel {% endcondition %}",
126                    "default_value": suggestions[0],
127                    "suggestions": suggestions,
128                }
129            ]
130
131        view_defn["sql_table_name"] = f"`{table}`"
132
133        return {"views": [view_defn] + nested_views}

Generate LookML for this view.

def get_dimensions(self, table, v1_name: Optional[str], dryrun) -> List[Dict[str, Any]]:
135    def get_dimensions(
136        self, table, v1_name: Optional[str], dryrun
137    ) -> List[Dict[str, Any]]:
138        """Get the set of dimensions for this view."""
139        # add dimensions and dimension groups
140        return lookml_utils._generate_dimensions(table, dryrun=dryrun)

Get the set of dimensions for this view.

def get_measures( self, dimensions: List[dict], table: str, v1_name: Optional[str]) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
142    def get_measures(
143        self, dimensions: List[dict], table: str, v1_name: Optional[str]
144    ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
145        """Generate measures from a list of dimensions.
146
147        When no dimension-specific measures are found, return a single "count" measure.
148
149        Raise ClickException if dimensions result in duplicate measures.
150        """
151        # Iterate through each of the dimensions and accumulate any measures
152        # that we want to include in the view. We pull out the client id first
153        # since we'll use it to calculate per-measure client counts.
154        measures: List[Dict[str, Union[str, List[Dict[str, str]]]]] = []
155
156        client_id_field = self.get_client_id(dimensions, table)
157        if client_id_field is not None:
158            measures.append(
159                {
160                    "name": "clients",
161                    "type": "count_distinct",
162                    "sql": f"${{{client_id_field}}}",
163                }
164            )
165
166        for dimension in dimensions:
167            dimension_name = dimension["name"]
168            if dimension_name == "document_id":
169                measures += [{"name": "ping_count", "type": "count"}]
170
171        return measures

Generate measures from a list of dimensions.

When no dimension-specific measures are found, return a single "count" measure.

Raise ClickException if dimensions result in duplicate measures.