generator.views.events_stream_view
Class to describe an events_stream view.
1"""Class to describe an `events_stream` view.""" 2 3from __future__ import annotations 4 5from copy import deepcopy 6from typing import Any, Iterator, Optional 7 8from . import lookml_utils 9from .lookml_utils import DEFAULT_MAX_SUGGEST_PERSIST_FOR 10from .view import View, ViewDict 11 12 13class EventsStreamView(View): 14 """A view for querying `events_stream` data, with one row per event.""" 15 16 type: str = "events_stream_view" 17 18 default_measures: list[dict[str, str]] = [ 19 { 20 "name": "event_count", 21 "type": "count", 22 "description": "The number of times the event(s) occurred.", 23 }, 24 # GleanPingViews were previously generated for some `events_stream` views, and those had 25 # `ping_count` measures, so we generate the same measures here to avoid breaking anything. 26 # TODO: Remove this once dashboards have been migrated to use the proper `event_count` measures. 27 { 28 "name": "ping_count", 29 "type": "count", 30 "hidden": "yes", 31 }, 32 ] 33 34 def __init__(self, namespace: str, name: str, tables: list[dict[str, str]]): 35 """Get an instance of an EventsStreamView.""" 36 super().__init__(namespace, name, EventsStreamView.type, tables) 37 38 @classmethod 39 def from_db_views( 40 klass, 41 namespace: str, 42 is_glean: bool, 43 channels: list[dict[str, str]], 44 db_views: dict, 45 ) -> Iterator[EventsStreamView]: 46 """Get EventsStreamViews from db views.""" 47 for view_id in db_views[namespace]: 48 if view_id.endswith("events_stream"): 49 yield EventsStreamView( 50 namespace, 51 view_id, 52 [{"table": f"mozdata.{namespace}.{view_id}"}], 53 ) 54 55 @classmethod 56 def from_dict( 57 klass, namespace: str, name: str, _dict: ViewDict 58 ) -> EventsStreamView: 59 """Get EventsStreamView from a name and dict definition.""" 60 return EventsStreamView(namespace, name, _dict["tables"]) 61 62 def to_lookml(self, v1_name: Optional[str], dryrun) -> dict[str, Any]: 63 """Generate LookML for this view.""" 64 dimensions = lookml_utils._generate_dimensions( 65 self.tables[0]["table"], dryrun=dryrun 66 ) 67 for dimension in dimensions: 68 if dimension["name"] == "event_id": 69 # `event_id` columns were added in https://github.com/mozilla/bigquery-etl/pull/8596. 70 dimension["sql"] = "COALESCE(${TABLE}.event_id, GENERATE_UUID())" 71 dimension["primary_key"] = "yes" 72 elif dimension["name"] == "experiments": 73 dimension["sql"] = "JSON_KEYS(${TABLE}.experiments, 1)" 74 75 measures = self.get_measures(dimensions) 76 77 return { 78 "views": [ 79 { 80 "name": self.name, 81 "sql_table_name": f"`{self.tables[0]['table']}`", 82 "dimensions": [ 83 d for d in dimensions if not lookml_utils._is_dimension_group(d) 84 ], 85 "dimension_groups": [ 86 d for d in dimensions if lookml_utils._is_dimension_group(d) 87 ], 88 "measures": measures, 89 }, 90 { 91 "name": f"{self.name}__experiments", 92 "dimensions": [ 93 { 94 "name": "id", 95 "type": "string", 96 "sql": "${TABLE}", 97 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 98 }, 99 { 100 "name": "branch", 101 "type": "string", 102 "sql": "JSON_VALUE(events_stream.experiments[${TABLE}].branch)", 103 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 104 }, 105 { 106 "name": "enrollment_id", 107 "type": "string", 108 "sql": "JSON_VALUE(events_stream.experiments[${TABLE}].extra.enrollment_id)", 109 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 110 }, 111 { 112 "name": "type", 113 "type": "string", 114 "sql": "JSON_VALUE(events_stream.experiments[${TABLE}].extra.type)", 115 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 116 }, 117 ], 118 }, 119 ], 120 } 121 122 def get_measures(self, dimensions: list[dict[str, Any]]) -> list[dict[str, str]]: 123 """Get measures for this view.""" 124 measures = deepcopy(EventsStreamView.default_measures) 125 if client_id_dimension := self.get_client_id( 126 dimensions, self.tables[0]["table"] 127 ): 128 measures.append( 129 { 130 "name": "client_count", 131 "type": "count_distinct", 132 "sql": f"${{{client_id_dimension}}}", 133 "description": "The number of clients that completed the event(s).", 134 } 135 ) 136 # GleanPingViews were previously generated for some `events_stream` views, and those had 137 # `clients` measures, so we generate the same measures here to avoid breaking anything. 138 # TODO: Remove this once dashboards have been migrated to use the proper `client_count` measures. 139 measures.append( 140 { 141 "name": "clients", 142 "type": "count_distinct", 143 "sql": f"${{{client_id_dimension}}}", 144 "hidden": "yes", 145 } 146 ) 147 return measures
14class EventsStreamView(View): 15 """A view for querying `events_stream` data, with one row per event.""" 16 17 type: str = "events_stream_view" 18 19 default_measures: list[dict[str, str]] = [ 20 { 21 "name": "event_count", 22 "type": "count", 23 "description": "The number of times the event(s) occurred.", 24 }, 25 # GleanPingViews were previously generated for some `events_stream` views, and those had 26 # `ping_count` measures, so we generate the same measures here to avoid breaking anything. 27 # TODO: Remove this once dashboards have been migrated to use the proper `event_count` measures. 28 { 29 "name": "ping_count", 30 "type": "count", 31 "hidden": "yes", 32 }, 33 ] 34 35 def __init__(self, namespace: str, name: str, tables: list[dict[str, str]]): 36 """Get an instance of an EventsStreamView.""" 37 super().__init__(namespace, name, EventsStreamView.type, tables) 38 39 @classmethod 40 def from_db_views( 41 klass, 42 namespace: str, 43 is_glean: bool, 44 channels: list[dict[str, str]], 45 db_views: dict, 46 ) -> Iterator[EventsStreamView]: 47 """Get EventsStreamViews from db views.""" 48 for view_id in db_views[namespace]: 49 if view_id.endswith("events_stream"): 50 yield EventsStreamView( 51 namespace, 52 view_id, 53 [{"table": f"mozdata.{namespace}.{view_id}"}], 54 ) 55 56 @classmethod 57 def from_dict( 58 klass, namespace: str, name: str, _dict: ViewDict 59 ) -> EventsStreamView: 60 """Get EventsStreamView from a name and dict definition.""" 61 return EventsStreamView(namespace, name, _dict["tables"]) 62 63 def to_lookml(self, v1_name: Optional[str], dryrun) -> dict[str, Any]: 64 """Generate LookML for this view.""" 65 dimensions = lookml_utils._generate_dimensions( 66 self.tables[0]["table"], dryrun=dryrun 67 ) 68 for dimension in dimensions: 69 if dimension["name"] == "event_id": 70 # `event_id` columns were added in https://github.com/mozilla/bigquery-etl/pull/8596. 71 dimension["sql"] = "COALESCE(${TABLE}.event_id, GENERATE_UUID())" 72 dimension["primary_key"] = "yes" 73 elif dimension["name"] == "experiments": 74 dimension["sql"] = "JSON_KEYS(${TABLE}.experiments, 1)" 75 76 measures = self.get_measures(dimensions) 77 78 return { 79 "views": [ 80 { 81 "name": self.name, 82 "sql_table_name": f"`{self.tables[0]['table']}`", 83 "dimensions": [ 84 d for d in dimensions if not lookml_utils._is_dimension_group(d) 85 ], 86 "dimension_groups": [ 87 d for d in dimensions if lookml_utils._is_dimension_group(d) 88 ], 89 "measures": measures, 90 }, 91 { 92 "name": f"{self.name}__experiments", 93 "dimensions": [ 94 { 95 "name": "id", 96 "type": "string", 97 "sql": "${TABLE}", 98 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 99 }, 100 { 101 "name": "branch", 102 "type": "string", 103 "sql": "JSON_VALUE(events_stream.experiments[${TABLE}].branch)", 104 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 105 }, 106 { 107 "name": "enrollment_id", 108 "type": "string", 109 "sql": "JSON_VALUE(events_stream.experiments[${TABLE}].extra.enrollment_id)", 110 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 111 }, 112 { 113 "name": "type", 114 "type": "string", 115 "sql": "JSON_VALUE(events_stream.experiments[${TABLE}].extra.type)", 116 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 117 }, 118 ], 119 }, 120 ], 121 } 122 123 def get_measures(self, dimensions: list[dict[str, Any]]) -> list[dict[str, str]]: 124 """Get measures for this view.""" 125 measures = deepcopy(EventsStreamView.default_measures) 126 if client_id_dimension := self.get_client_id( 127 dimensions, self.tables[0]["table"] 128 ): 129 measures.append( 130 { 131 "name": "client_count", 132 "type": "count_distinct", 133 "sql": f"${{{client_id_dimension}}}", 134 "description": "The number of clients that completed the event(s).", 135 } 136 ) 137 # GleanPingViews were previously generated for some `events_stream` views, and those had 138 # `clients` measures, so we generate the same measures here to avoid breaking anything. 139 # TODO: Remove this once dashboards have been migrated to use the proper `client_count` measures. 140 measures.append( 141 { 142 "name": "clients", 143 "type": "count_distinct", 144 "sql": f"${{{client_id_dimension}}}", 145 "hidden": "yes", 146 } 147 ) 148 return measures
A view for querying events_stream data, with one row per event.
EventsStreamView(namespace: str, name: str, tables: list[dict[str, str]])
35 def __init__(self, namespace: str, name: str, tables: list[dict[str, str]]): 36 """Get an instance of an EventsStreamView.""" 37 super().__init__(namespace, name, EventsStreamView.type, tables)
Get an instance of an EventsStreamView.
default_measures: list[dict[str, str]] =
[{'name': 'event_count', 'type': 'count', 'description': 'The number of times the event(s) occurred.'}, {'name': 'ping_count', 'type': 'count', 'hidden': 'yes'}]
@classmethod
def
from_db_views( klass, namespace: str, is_glean: bool, channels: list[dict[str, str]], db_views: dict) -> Iterator[EventsStreamView]:
39 @classmethod 40 def from_db_views( 41 klass, 42 namespace: str, 43 is_glean: bool, 44 channels: list[dict[str, str]], 45 db_views: dict, 46 ) -> Iterator[EventsStreamView]: 47 """Get EventsStreamViews from db views.""" 48 for view_id in db_views[namespace]: 49 if view_id.endswith("events_stream"): 50 yield EventsStreamView( 51 namespace, 52 view_id, 53 [{"table": f"mozdata.{namespace}.{view_id}"}], 54 )
Get EventsStreamViews from db views.
@classmethod
def
from_dict( klass, namespace: str, name: str, _dict: generator.views.view.ViewDict) -> EventsStreamView:
56 @classmethod 57 def from_dict( 58 klass, namespace: str, name: str, _dict: ViewDict 59 ) -> EventsStreamView: 60 """Get EventsStreamView from a name and dict definition.""" 61 return EventsStreamView(namespace, name, _dict["tables"])
Get EventsStreamView from a name and dict definition.
def
to_lookml(self, v1_name: Optional[str], dryrun) -> dict[str, typing.Any]:
63 def to_lookml(self, v1_name: Optional[str], dryrun) -> dict[str, Any]: 64 """Generate LookML for this view.""" 65 dimensions = lookml_utils._generate_dimensions( 66 self.tables[0]["table"], dryrun=dryrun 67 ) 68 for dimension in dimensions: 69 if dimension["name"] == "event_id": 70 # `event_id` columns were added in https://github.com/mozilla/bigquery-etl/pull/8596. 71 dimension["sql"] = "COALESCE(${TABLE}.event_id, GENERATE_UUID())" 72 dimension["primary_key"] = "yes" 73 elif dimension["name"] == "experiments": 74 dimension["sql"] = "JSON_KEYS(${TABLE}.experiments, 1)" 75 76 measures = self.get_measures(dimensions) 77 78 return { 79 "views": [ 80 { 81 "name": self.name, 82 "sql_table_name": f"`{self.tables[0]['table']}`", 83 "dimensions": [ 84 d for d in dimensions if not lookml_utils._is_dimension_group(d) 85 ], 86 "dimension_groups": [ 87 d for d in dimensions if lookml_utils._is_dimension_group(d) 88 ], 89 "measures": measures, 90 }, 91 { 92 "name": f"{self.name}__experiments", 93 "dimensions": [ 94 { 95 "name": "id", 96 "type": "string", 97 "sql": "${TABLE}", 98 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 99 }, 100 { 101 "name": "branch", 102 "type": "string", 103 "sql": "JSON_VALUE(events_stream.experiments[${TABLE}].branch)", 104 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 105 }, 106 { 107 "name": "enrollment_id", 108 "type": "string", 109 "sql": "JSON_VALUE(events_stream.experiments[${TABLE}].extra.enrollment_id)", 110 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 111 }, 112 { 113 "name": "type", 114 "type": "string", 115 "sql": "JSON_VALUE(events_stream.experiments[${TABLE}].extra.type)", 116 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 117 }, 118 ], 119 }, 120 ], 121 }
Generate LookML for this view.
def
get_measures(self, dimensions: list[dict[str, typing.Any]]) -> list[dict[str, str]]:
123 def get_measures(self, dimensions: list[dict[str, Any]]) -> list[dict[str, str]]: 124 """Get measures for this view.""" 125 measures = deepcopy(EventsStreamView.default_measures) 126 if client_id_dimension := self.get_client_id( 127 dimensions, self.tables[0]["table"] 128 ): 129 measures.append( 130 { 131 "name": "client_count", 132 "type": "count_distinct", 133 "sql": f"${{{client_id_dimension}}}", 134 "description": "The number of clients that completed the event(s).", 135 } 136 ) 137 # GleanPingViews were previously generated for some `events_stream` views, and those had 138 # `clients` measures, so we generate the same measures here to avoid breaking anything. 139 # TODO: Remove this once dashboards have been migrated to use the proper `client_count` measures. 140 measures.append( 141 { 142 "name": "clients", 143 "type": "count_distinct", 144 "sql": f"${{{client_id_dimension}}}", 145 "hidden": "yes", 146 } 147 ) 148 return measures
Get measures for this view.