generator.views.events_stream_view

Class to describe an events_stream view.

  1"""Class to describe an `events_stream` view."""
  2
  3from __future__ import annotations
  4
  5from copy import deepcopy
  6from typing import Any, Iterator, Optional
  7
  8from . import lookml_utils
  9from .lookml_utils import DEFAULT_MAX_SUGGEST_PERSIST_FOR
 10from .view import View, ViewDict
 11
 12
 13class EventsStreamView(View):
 14    """A view for querying `events_stream` data, with one row per event."""
 15
 16    type: str = "events_stream_view"
 17
 18    default_measures: list[dict[str, str]] = [
 19        {
 20            "name": "event_count",
 21            "type": "count",
 22            "description": "The number of times the event(s) occurred.",
 23        },
 24        # GleanPingViews were previously generated for some `events_stream` views, and those had
 25        # `ping_count` measures, so we generate the same measures here to avoid breaking anything.
 26        # TODO: Remove this once dashboards have been migrated to use the proper `event_count` measures.
 27        {
 28            "name": "ping_count",
 29            "type": "count",
 30            "hidden": "yes",
 31        },
 32    ]
 33
 34    def __init__(self, namespace: str, name: str, tables: list[dict[str, str]]):
 35        """Get an instance of an EventsStreamView."""
 36        super().__init__(namespace, name, EventsStreamView.type, tables)
 37
 38    @classmethod
 39    def from_db_views(
 40        klass,
 41        namespace: str,
 42        is_glean: bool,
 43        channels: list[dict[str, str]],
 44        db_views: dict,
 45    ) -> Iterator[EventsStreamView]:
 46        """Get EventsStreamViews from db views."""
 47        for view_id in db_views[namespace]:
 48            if view_id.endswith("events_stream"):
 49                yield EventsStreamView(
 50                    namespace,
 51                    view_id,
 52                    [{"table": f"mozdata.{namespace}.{view_id}"}],
 53                )
 54
 55    @classmethod
 56    def from_dict(
 57        klass, namespace: str, name: str, _dict: ViewDict
 58    ) -> EventsStreamView:
 59        """Get EventsStreamView from a name and dict definition."""
 60        return EventsStreamView(namespace, name, _dict["tables"])
 61
 62    def to_lookml(self, v1_name: Optional[str], dryrun) -> dict[str, Any]:
 63        """Generate LookML for this view."""
 64        dimensions = lookml_utils._generate_dimensions(
 65            self.tables[0]["table"], dryrun=dryrun
 66        )
 67        for dimension in dimensions:
 68            if dimension["name"] == "event_id":
 69                # `event_id` columns were added in https://github.com/mozilla/bigquery-etl/pull/8596.
 70                dimension["sql"] = "COALESCE(${TABLE}.event_id, GENERATE_UUID())"
 71                dimension["primary_key"] = "yes"
 72            elif dimension["name"] == "experiments":
 73                dimension["sql"] = "JSON_KEYS(${TABLE}.experiments, 1)"
 74
 75        measures = self.get_measures(dimensions)
 76
 77        return {
 78            "views": [
 79                {
 80                    "name": self.name,
 81                    "sql_table_name": f"`{self.tables[0]['table']}`",
 82                    "dimensions": [
 83                        d for d in dimensions if not lookml_utils._is_dimension_group(d)
 84                    ],
 85                    "dimension_groups": [
 86                        d for d in dimensions if lookml_utils._is_dimension_group(d)
 87                    ],
 88                    "measures": measures,
 89                },
 90                {
 91                    "name": f"{self.name}__experiments",
 92                    "dimensions": [
 93                        {
 94                            "name": "id",
 95                            "type": "string",
 96                            "sql": "${TABLE}",
 97                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
 98                        },
 99                        {
100                            "name": "branch",
101                            "type": "string",
102                            "sql": "JSON_VALUE(events_stream.experiments[${TABLE}].branch)",
103                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
104                        },
105                        {
106                            "name": "enrollment_id",
107                            "type": "string",
108                            "sql": "JSON_VALUE(events_stream.experiments[${TABLE}].extra.enrollment_id)",
109                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
110                        },
111                        {
112                            "name": "type",
113                            "type": "string",
114                            "sql": "JSON_VALUE(events_stream.experiments[${TABLE}].extra.type)",
115                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
116                        },
117                    ],
118                },
119            ],
120        }
121
122    def get_measures(self, dimensions: list[dict[str, Any]]) -> list[dict[str, str]]:
123        """Get measures for this view."""
124        measures = deepcopy(EventsStreamView.default_measures)
125        if client_id_dimension := self.get_client_id(
126            dimensions, self.tables[0]["table"]
127        ):
128            measures.append(
129                {
130                    "name": "client_count",
131                    "type": "count_distinct",
132                    "sql": f"${{{client_id_dimension}}}",
133                    "description": "The number of clients that completed the event(s).",
134                }
135            )
136            # GleanPingViews were previously generated for some `events_stream` views, and those had
137            # `clients` measures, so we generate the same measures here to avoid breaking anything.
138            # TODO: Remove this once dashboards have been migrated to use the proper `client_count` measures.
139            measures.append(
140                {
141                    "name": "clients",
142                    "type": "count_distinct",
143                    "sql": f"${{{client_id_dimension}}}",
144                    "hidden": "yes",
145                }
146            )
147        return measures
class EventsStreamView(generator.views.view.View):
 14class EventsStreamView(View):
 15    """A view for querying `events_stream` data, with one row per event."""
 16
 17    type: str = "events_stream_view"
 18
 19    default_measures: list[dict[str, str]] = [
 20        {
 21            "name": "event_count",
 22            "type": "count",
 23            "description": "The number of times the event(s) occurred.",
 24        },
 25        # GleanPingViews were previously generated for some `events_stream` views, and those had
 26        # `ping_count` measures, so we generate the same measures here to avoid breaking anything.
 27        # TODO: Remove this once dashboards have been migrated to use the proper `event_count` measures.
 28        {
 29            "name": "ping_count",
 30            "type": "count",
 31            "hidden": "yes",
 32        },
 33    ]
 34
 35    def __init__(self, namespace: str, name: str, tables: list[dict[str, str]]):
 36        """Get an instance of an EventsStreamView."""
 37        super().__init__(namespace, name, EventsStreamView.type, tables)
 38
 39    @classmethod
 40    def from_db_views(
 41        klass,
 42        namespace: str,
 43        is_glean: bool,
 44        channels: list[dict[str, str]],
 45        db_views: dict,
 46    ) -> Iterator[EventsStreamView]:
 47        """Get EventsStreamViews from db views."""
 48        for view_id in db_views[namespace]:
 49            if view_id.endswith("events_stream"):
 50                yield EventsStreamView(
 51                    namespace,
 52                    view_id,
 53                    [{"table": f"mozdata.{namespace}.{view_id}"}],
 54                )
 55
 56    @classmethod
 57    def from_dict(
 58        klass, namespace: str, name: str, _dict: ViewDict
 59    ) -> EventsStreamView:
 60        """Get EventsStreamView from a name and dict definition."""
 61        return EventsStreamView(namespace, name, _dict["tables"])
 62
 63    def to_lookml(self, v1_name: Optional[str], dryrun) -> dict[str, Any]:
 64        """Generate LookML for this view."""
 65        dimensions = lookml_utils._generate_dimensions(
 66            self.tables[0]["table"], dryrun=dryrun
 67        )
 68        for dimension in dimensions:
 69            if dimension["name"] == "event_id":
 70                # `event_id` columns were added in https://github.com/mozilla/bigquery-etl/pull/8596.
 71                dimension["sql"] = "COALESCE(${TABLE}.event_id, GENERATE_UUID())"
 72                dimension["primary_key"] = "yes"
 73            elif dimension["name"] == "experiments":
 74                dimension["sql"] = "JSON_KEYS(${TABLE}.experiments, 1)"
 75
 76        measures = self.get_measures(dimensions)
 77
 78        return {
 79            "views": [
 80                {
 81                    "name": self.name,
 82                    "sql_table_name": f"`{self.tables[0]['table']}`",
 83                    "dimensions": [
 84                        d for d in dimensions if not lookml_utils._is_dimension_group(d)
 85                    ],
 86                    "dimension_groups": [
 87                        d for d in dimensions if lookml_utils._is_dimension_group(d)
 88                    ],
 89                    "measures": measures,
 90                },
 91                {
 92                    "name": f"{self.name}__experiments",
 93                    "dimensions": [
 94                        {
 95                            "name": "id",
 96                            "type": "string",
 97                            "sql": "${TABLE}",
 98                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
 99                        },
100                        {
101                            "name": "branch",
102                            "type": "string",
103                            "sql": "JSON_VALUE(events_stream.experiments[${TABLE}].branch)",
104                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
105                        },
106                        {
107                            "name": "enrollment_id",
108                            "type": "string",
109                            "sql": "JSON_VALUE(events_stream.experiments[${TABLE}].extra.enrollment_id)",
110                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
111                        },
112                        {
113                            "name": "type",
114                            "type": "string",
115                            "sql": "JSON_VALUE(events_stream.experiments[${TABLE}].extra.type)",
116                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
117                        },
118                    ],
119                },
120            ],
121        }
122
123    def get_measures(self, dimensions: list[dict[str, Any]]) -> list[dict[str, str]]:
124        """Get measures for this view."""
125        measures = deepcopy(EventsStreamView.default_measures)
126        if client_id_dimension := self.get_client_id(
127            dimensions, self.tables[0]["table"]
128        ):
129            measures.append(
130                {
131                    "name": "client_count",
132                    "type": "count_distinct",
133                    "sql": f"${{{client_id_dimension}}}",
134                    "description": "The number of clients that completed the event(s).",
135                }
136            )
137            # GleanPingViews were previously generated for some `events_stream` views, and those had
138            # `clients` measures, so we generate the same measures here to avoid breaking anything.
139            # TODO: Remove this once dashboards have been migrated to use the proper `client_count` measures.
140            measures.append(
141                {
142                    "name": "clients",
143                    "type": "count_distinct",
144                    "sql": f"${{{client_id_dimension}}}",
145                    "hidden": "yes",
146                }
147            )
148        return measures

A view for querying events_stream data, with one row per event.

EventsStreamView(namespace: str, name: str, tables: list[dict[str, str]])
35    def __init__(self, namespace: str, name: str, tables: list[dict[str, str]]):
36        """Get an instance of an EventsStreamView."""
37        super().__init__(namespace, name, EventsStreamView.type, tables)

Get an instance of an EventsStreamView.

type: str = 'events_stream_view'
default_measures: list[dict[str, str]] = [{'name': 'event_count', 'type': 'count', 'description': 'The number of times the event(s) occurred.'}, {'name': 'ping_count', 'type': 'count', 'hidden': 'yes'}]
@classmethod
def from_db_views( klass, namespace: str, is_glean: bool, channels: list[dict[str, str]], db_views: dict) -> Iterator[EventsStreamView]:
39    @classmethod
40    def from_db_views(
41        klass,
42        namespace: str,
43        is_glean: bool,
44        channels: list[dict[str, str]],
45        db_views: dict,
46    ) -> Iterator[EventsStreamView]:
47        """Get EventsStreamViews from db views."""
48        for view_id in db_views[namespace]:
49            if view_id.endswith("events_stream"):
50                yield EventsStreamView(
51                    namespace,
52                    view_id,
53                    [{"table": f"mozdata.{namespace}.{view_id}"}],
54                )

Get EventsStreamViews from db views.

@classmethod
def from_dict( klass, namespace: str, name: str, _dict: generator.views.view.ViewDict) -> EventsStreamView:
56    @classmethod
57    def from_dict(
58        klass, namespace: str, name: str, _dict: ViewDict
59    ) -> EventsStreamView:
60        """Get EventsStreamView from a name and dict definition."""
61        return EventsStreamView(namespace, name, _dict["tables"])

Get EventsStreamView from a name and dict definition.

def to_lookml(self, v1_name: Optional[str], dryrun) -> dict[str, typing.Any]:
 63    def to_lookml(self, v1_name: Optional[str], dryrun) -> dict[str, Any]:
 64        """Generate LookML for this view."""
 65        dimensions = lookml_utils._generate_dimensions(
 66            self.tables[0]["table"], dryrun=dryrun
 67        )
 68        for dimension in dimensions:
 69            if dimension["name"] == "event_id":
 70                # `event_id` columns were added in https://github.com/mozilla/bigquery-etl/pull/8596.
 71                dimension["sql"] = "COALESCE(${TABLE}.event_id, GENERATE_UUID())"
 72                dimension["primary_key"] = "yes"
 73            elif dimension["name"] == "experiments":
 74                dimension["sql"] = "JSON_KEYS(${TABLE}.experiments, 1)"
 75
 76        measures = self.get_measures(dimensions)
 77
 78        return {
 79            "views": [
 80                {
 81                    "name": self.name,
 82                    "sql_table_name": f"`{self.tables[0]['table']}`",
 83                    "dimensions": [
 84                        d for d in dimensions if not lookml_utils._is_dimension_group(d)
 85                    ],
 86                    "dimension_groups": [
 87                        d for d in dimensions if lookml_utils._is_dimension_group(d)
 88                    ],
 89                    "measures": measures,
 90                },
 91                {
 92                    "name": f"{self.name}__experiments",
 93                    "dimensions": [
 94                        {
 95                            "name": "id",
 96                            "type": "string",
 97                            "sql": "${TABLE}",
 98                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
 99                        },
100                        {
101                            "name": "branch",
102                            "type": "string",
103                            "sql": "JSON_VALUE(events_stream.experiments[${TABLE}].branch)",
104                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
105                        },
106                        {
107                            "name": "enrollment_id",
108                            "type": "string",
109                            "sql": "JSON_VALUE(events_stream.experiments[${TABLE}].extra.enrollment_id)",
110                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
111                        },
112                        {
113                            "name": "type",
114                            "type": "string",
115                            "sql": "JSON_VALUE(events_stream.experiments[${TABLE}].extra.type)",
116                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
117                        },
118                    ],
119                },
120            ],
121        }

Generate LookML for this view.

def get_measures(self, dimensions: list[dict[str, typing.Any]]) -> list[dict[str, str]]:
123    def get_measures(self, dimensions: list[dict[str, Any]]) -> list[dict[str, str]]:
124        """Get measures for this view."""
125        measures = deepcopy(EventsStreamView.default_measures)
126        if client_id_dimension := self.get_client_id(
127            dimensions, self.tables[0]["table"]
128        ):
129            measures.append(
130                {
131                    "name": "client_count",
132                    "type": "count_distinct",
133                    "sql": f"${{{client_id_dimension}}}",
134                    "description": "The number of clients that completed the event(s).",
135                }
136            )
137            # GleanPingViews were previously generated for some `events_stream` views, and those had
138            # `clients` measures, so we generate the same measures here to avoid breaking anything.
139            # TODO: Remove this once dashboards have been migrated to use the proper `client_count` measures.
140            measures.append(
141                {
142                    "name": "clients",
143                    "type": "count_distinct",
144                    "sql": f"${{{client_id_dimension}}}",
145                    "hidden": "yes",
146                }
147            )
148        return measures

Get measures for this view.