generator.views.events_stream_view

Class to describe an events_stream view.

  1"""Class to describe an `events_stream` view."""
  2
  3from __future__ import annotations
  4
  5from copy import deepcopy
  6from typing import Any, Iterator, Optional
  7
  8from . import lookml_utils
  9from .view import View, ViewDict
 10
 11
 12class EventsStreamView(View):
 13    """A view for querying `events_stream` data, with one row per event."""
 14
 15    type: str = "events_stream_view"
 16
 17    default_measures: list[dict[str, str]] = [
 18        {
 19            "name": "event_count",
 20            "type": "count",
 21            "description": "The number of times the event(s) occurred.",
 22        },
 23        # GleanPingViews were previously generated for some `events_stream` views, and those had
 24        # `ping_count` measures, so we generate the same measures here to avoid breaking anything.
 25        # TODO: Remove this once dashboards have been migrated to use the proper `event_count` measures.
 26        {
 27            "name": "ping_count",
 28            "type": "count",
 29            "hidden": "yes",
 30        },
 31    ]
 32
 33    def __init__(self, namespace: str, name: str, tables: list[dict[str, str]]):
 34        """Get an instance of an EventsStreamView."""
 35        super().__init__(namespace, name, EventsStreamView.type, tables)
 36
 37    @classmethod
 38    def from_db_views(
 39        klass,
 40        namespace: str,
 41        is_glean: bool,
 42        channels: list[dict[str, str]],
 43        db_views: dict,
 44    ) -> Iterator[EventsStreamView]:
 45        """Get EventsStreamViews from db views."""
 46        for view_id in db_views[namespace]:
 47            if view_id.endswith("events_stream"):
 48                yield EventsStreamView(
 49                    namespace,
 50                    view_id,
 51                    [{"table": f"mozdata.{namespace}.{view_id}"}],
 52                )
 53
 54    @classmethod
 55    def from_dict(
 56        klass, namespace: str, name: str, _dict: ViewDict
 57    ) -> EventsStreamView:
 58        """Get EventsStreamView from a name and dict definition."""
 59        return EventsStreamView(namespace, name, _dict["tables"])
 60
 61    def to_lookml(self, v1_name: Optional[str], dryrun) -> dict[str, Any]:
 62        """Generate LookML for this view."""
 63        dimensions = lookml_utils._generate_dimensions(
 64            self.tables[0]["table"], dryrun=dryrun
 65        )
 66        for dimension in dimensions:
 67            if dimension["name"] == "event_id":
 68                # `event_id` columns were added in https://github.com/mozilla/bigquery-etl/pull/8596.
 69                dimension["sql"] = "COALESCE(${TABLE}.event_id, GENERATE_UUID())"
 70                dimension["primary_key"] = "yes"
 71
 72        measures = self.get_measures(dimensions)
 73
 74        return {
 75            "views": [
 76                {
 77                    "name": self.name,
 78                    "sql_table_name": f"`{self.tables[0]['table']}`",
 79                    "dimensions": [
 80                        d for d in dimensions if not lookml_utils._is_dimension_group(d)
 81                    ],
 82                    "dimension_groups": [
 83                        d for d in dimensions if lookml_utils._is_dimension_group(d)
 84                    ],
 85                    "measures": measures,
 86                },
 87            ],
 88        }
 89
 90    def get_measures(self, dimensions: list[dict[str, Any]]) -> list[dict[str, str]]:
 91        """Get measures for this view."""
 92        measures = deepcopy(EventsStreamView.default_measures)
 93        if client_id_dimension := self.get_client_id(
 94            dimensions, self.tables[0]["table"]
 95        ):
 96            measures.append(
 97                {
 98                    "name": "client_count",
 99                    "type": "count_distinct",
100                    "sql": f"${{{client_id_dimension}}}",
101                    "description": "The number of clients that completed the event(s).",
102                }
103            )
104            # GleanPingViews were previously generated for some `events_stream` views, and those had
105            # `clients` measures, so we generate the same measures here to avoid breaking anything.
106            # TODO: Remove this once dashboards have been migrated to use the proper `client_count` measures.
107            measures.append(
108                {
109                    "name": "clients",
110                    "type": "count_distinct",
111                    "sql": f"${{{client_id_dimension}}}",
112                    "hidden": "yes",
113                }
114            )
115        return measures
class EventsStreamView(generator.views.view.View):
 13class EventsStreamView(View):
 14    """A view for querying `events_stream` data, with one row per event."""
 15
 16    type: str = "events_stream_view"
 17
 18    default_measures: list[dict[str, str]] = [
 19        {
 20            "name": "event_count",
 21            "type": "count",
 22            "description": "The number of times the event(s) occurred.",
 23        },
 24        # GleanPingViews were previously generated for some `events_stream` views, and those had
 25        # `ping_count` measures, so we generate the same measures here to avoid breaking anything.
 26        # TODO: Remove this once dashboards have been migrated to use the proper `event_count` measures.
 27        {
 28            "name": "ping_count",
 29            "type": "count",
 30            "hidden": "yes",
 31        },
 32    ]
 33
 34    def __init__(self, namespace: str, name: str, tables: list[dict[str, str]]):
 35        """Get an instance of an EventsStreamView."""
 36        super().__init__(namespace, name, EventsStreamView.type, tables)
 37
 38    @classmethod
 39    def from_db_views(
 40        klass,
 41        namespace: str,
 42        is_glean: bool,
 43        channels: list[dict[str, str]],
 44        db_views: dict,
 45    ) -> Iterator[EventsStreamView]:
 46        """Get EventsStreamViews from db views."""
 47        for view_id in db_views[namespace]:
 48            if view_id.endswith("events_stream"):
 49                yield EventsStreamView(
 50                    namespace,
 51                    view_id,
 52                    [{"table": f"mozdata.{namespace}.{view_id}"}],
 53                )
 54
 55    @classmethod
 56    def from_dict(
 57        klass, namespace: str, name: str, _dict: ViewDict
 58    ) -> EventsStreamView:
 59        """Get EventsStreamView from a name and dict definition."""
 60        return EventsStreamView(namespace, name, _dict["tables"])
 61
 62    def to_lookml(self, v1_name: Optional[str], dryrun) -> dict[str, Any]:
 63        """Generate LookML for this view."""
 64        dimensions = lookml_utils._generate_dimensions(
 65            self.tables[0]["table"], dryrun=dryrun
 66        )
 67        for dimension in dimensions:
 68            if dimension["name"] == "event_id":
 69                # `event_id` columns were added in https://github.com/mozilla/bigquery-etl/pull/8596.
 70                dimension["sql"] = "COALESCE(${TABLE}.event_id, GENERATE_UUID())"
 71                dimension["primary_key"] = "yes"
 72
 73        measures = self.get_measures(dimensions)
 74
 75        return {
 76            "views": [
 77                {
 78                    "name": self.name,
 79                    "sql_table_name": f"`{self.tables[0]['table']}`",
 80                    "dimensions": [
 81                        d for d in dimensions if not lookml_utils._is_dimension_group(d)
 82                    ],
 83                    "dimension_groups": [
 84                        d for d in dimensions if lookml_utils._is_dimension_group(d)
 85                    ],
 86                    "measures": measures,
 87                },
 88            ],
 89        }
 90
 91    def get_measures(self, dimensions: list[dict[str, Any]]) -> list[dict[str, str]]:
 92        """Get measures for this view."""
 93        measures = deepcopy(EventsStreamView.default_measures)
 94        if client_id_dimension := self.get_client_id(
 95            dimensions, self.tables[0]["table"]
 96        ):
 97            measures.append(
 98                {
 99                    "name": "client_count",
100                    "type": "count_distinct",
101                    "sql": f"${{{client_id_dimension}}}",
102                    "description": "The number of clients that completed the event(s).",
103                }
104            )
105            # GleanPingViews were previously generated for some `events_stream` views, and those had
106            # `clients` measures, so we generate the same measures here to avoid breaking anything.
107            # TODO: Remove this once dashboards have been migrated to use the proper `client_count` measures.
108            measures.append(
109                {
110                    "name": "clients",
111                    "type": "count_distinct",
112                    "sql": f"${{{client_id_dimension}}}",
113                    "hidden": "yes",
114                }
115            )
116        return measures

A view for querying events_stream data, with one row per event.

EventsStreamView(namespace: str, name: str, tables: list[dict[str, str]])
34    def __init__(self, namespace: str, name: str, tables: list[dict[str, str]]):
35        """Get an instance of an EventsStreamView."""
36        super().__init__(namespace, name, EventsStreamView.type, tables)

Get an instance of an EventsStreamView.

type: str = 'events_stream_view'
default_measures: list[dict[str, str]] = [{'name': 'event_count', 'type': 'count', 'description': 'The number of times the event(s) occurred.'}, {'name': 'ping_count', 'type': 'count', 'hidden': 'yes'}]
@classmethod
def from_db_views( klass, namespace: str, is_glean: bool, channels: list[dict[str, str]], db_views: dict) -> Iterator[EventsStreamView]:
38    @classmethod
39    def from_db_views(
40        klass,
41        namespace: str,
42        is_glean: bool,
43        channels: list[dict[str, str]],
44        db_views: dict,
45    ) -> Iterator[EventsStreamView]:
46        """Get EventsStreamViews from db views."""
47        for view_id in db_views[namespace]:
48            if view_id.endswith("events_stream"):
49                yield EventsStreamView(
50                    namespace,
51                    view_id,
52                    [{"table": f"mozdata.{namespace}.{view_id}"}],
53                )

Get EventsStreamViews from db views.

@classmethod
def from_dict( klass, namespace: str, name: str, _dict: generator.views.view.ViewDict) -> EventsStreamView:
55    @classmethod
56    def from_dict(
57        klass, namespace: str, name: str, _dict: ViewDict
58    ) -> EventsStreamView:
59        """Get EventsStreamView from a name and dict definition."""
60        return EventsStreamView(namespace, name, _dict["tables"])

Get EventsStreamView from a name and dict definition.

def to_lookml(self, v1_name: Optional[str], dryrun) -> dict[str, typing.Any]:
62    def to_lookml(self, v1_name: Optional[str], dryrun) -> dict[str, Any]:
63        """Generate LookML for this view."""
64        dimensions = lookml_utils._generate_dimensions(
65            self.tables[0]["table"], dryrun=dryrun
66        )
67        for dimension in dimensions:
68            if dimension["name"] == "event_id":
69                # `event_id` columns were added in https://github.com/mozilla/bigquery-etl/pull/8596.
70                dimension["sql"] = "COALESCE(${TABLE}.event_id, GENERATE_UUID())"
71                dimension["primary_key"] = "yes"
72
73        measures = self.get_measures(dimensions)
74
75        return {
76            "views": [
77                {
78                    "name": self.name,
79                    "sql_table_name": f"`{self.tables[0]['table']}`",
80                    "dimensions": [
81                        d for d in dimensions if not lookml_utils._is_dimension_group(d)
82                    ],
83                    "dimension_groups": [
84                        d for d in dimensions if lookml_utils._is_dimension_group(d)
85                    ],
86                    "measures": measures,
87                },
88            ],
89        }

Generate LookML for this view.

def get_measures(self, dimensions: list[dict[str, typing.Any]]) -> list[dict[str, str]]:
 91    def get_measures(self, dimensions: list[dict[str, Any]]) -> list[dict[str, str]]:
 92        """Get measures for this view."""
 93        measures = deepcopy(EventsStreamView.default_measures)
 94        if client_id_dimension := self.get_client_id(
 95            dimensions, self.tables[0]["table"]
 96        ):
 97            measures.append(
 98                {
 99                    "name": "client_count",
100                    "type": "count_distinct",
101                    "sql": f"${{{client_id_dimension}}}",
102                    "description": "The number of clients that completed the event(s).",
103                }
104            )
105            # GleanPingViews were previously generated for some `events_stream` views, and those had
106            # `clients` measures, so we generate the same measures here to avoid breaking anything.
107            # TODO: Remove this once dashboards have been migrated to use the proper `client_count` measures.
108            measures.append(
109                {
110                    "name": "clients",
111                    "type": "count_distinct",
112                    "sql": f"${{{client_id_dimension}}}",
113                    "hidden": "yes",
114                }
115            )
116        return measures

Get measures for this view.