generator.views.ping_view
Class to describe a Ping View.
1"""Class to describe a Ping View.""" 2 3from __future__ import annotations 4 5from collections import defaultdict 6from typing import Any, Dict, Iterator, List, Optional, Union 7 8from . import lookml_utils 9from .view import OMIT_VIEWS, View, ViewDict 10 11 12class PingView(View): 13 """A view on a ping table.""" 14 15 type: str = "ping_view" 16 allow_glean: bool = False 17 18 def __init__(self, namespace: str, name: str, tables: List[Dict[str, Any]]): 19 """Create instance of a PingView.""" 20 super().__init__(namespace, name, self.__class__.type, tables) 21 22 @classmethod 23 def from_db_views( 24 klass, 25 namespace: str, 26 is_glean: bool, 27 channels: List[Dict[str, str]], 28 db_views: dict, 29 ) -> Iterator[PingView]: 30 """Get Looker views for a namespace.""" 31 if (klass.allow_glean and not is_glean) or (not klass.allow_glean and is_glean): 32 return 33 34 view_tables: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(dict) 35 for channel in channels: 36 dataset = channel["dataset"] 37 38 for view_id, references in db_views[dataset].items(): 39 if view_id in OMIT_VIEWS: 40 continue 41 42 table_id = f"mozdata.{dataset}.{view_id}" 43 table: Dict[str, str] = {"table": table_id} 44 if channel.get("channel") is not None: 45 table["channel"] = channel["channel"] 46 47 # Only include those that select from a single ping source table 48 # or union together multiple ping source tables of the same name. 49 reference_table_names = set(r[-1] for r in references) 50 reference_dataset_names = set(r[-2] for r in references) 51 if len(reference_table_names) != 1 or ( 52 channel["source_dataset"] not in reference_dataset_names 53 # Temporary hack to keep generating "ping views" for apps' `events_stream` union views which now 54 # select from `events_stream_v1` derived tables after https://github.com/mozilla/bigquery-etl/pull/8361. 55 # These `events_stream` "ping views" shouldn't have been generated in the first place, but they 56 # are currently being relied on (https://bugzilla.mozilla.org/show_bug.cgi?id=1997588). 57 # TODO: Remove this hack when implementing https://mozilla-hub.atlassian.net/browse/DENG-9548. 58 and not ( 59 view_id == "events_stream" 60 and (channel["source_dataset"] + "_derived") 61 in reference_dataset_names 62 ) 63 ): 64 continue 65 66 view_tables[view_id][table_id] = table 67 68 for view_id, tables_by_id in view_tables.items(): 69 yield klass(namespace, view_id, list(tables_by_id.values())) 70 71 @classmethod 72 def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> PingView: 73 """Get a view from a name and dict definition.""" 74 return klass(namespace, name, _dict["tables"]) 75 76 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 77 """Generate LookML for this view.""" 78 view_defn: Dict[str, Any] = {"name": self.name} 79 80 # use schema for the table where channel=="release" or the first one 81 table = next( 82 (table for table in self.tables if table.get("channel") == "release"), 83 self.tables[0], 84 )["table"] 85 86 dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun) 87 88 # set document id field as a primary key for joins 89 view_defn["dimensions"] = [ 90 d if d["name"] != "document_id" else dict(**d, primary_key="yes") 91 for d in dimensions 92 if not lookml_utils._is_dimension_group(d) 93 ] 94 view_defn["dimension_groups"] = [ 95 d for d in dimensions if lookml_utils._is_dimension_group(d) 96 ] 97 98 # add measures 99 view_defn["measures"] = self.get_measures(dimensions, table, v1_name) 100 101 [project, dataset, table_id] = table.split(".") 102 table_schema = dryrun.create( 103 project=project, 104 dataset=dataset, 105 table=table_id, 106 ).get_table_schema() 107 nested_views = lookml_utils._generate_nested_dimension_views( 108 table_schema, self.name 109 ) 110 111 # Round-tripping through a dict to get an ordered deduped list. 112 suggestions = list( 113 dict.fromkeys( 114 _table["channel"] for _table in self.tables if "channel" in _table 115 ) 116 ) 117 118 if len(suggestions) > 1: 119 view_defn["filters"] = [ 120 { 121 "name": "channel", 122 "type": "string", 123 "description": "Filter by the app's channel", 124 "sql": "{% condition %} ${TABLE}.normalized_channel {% endcondition %}", 125 "default_value": suggestions[0], 126 "suggestions": suggestions, 127 } 128 ] 129 130 view_defn["sql_table_name"] = f"`{table}`" 131 132 return {"views": [view_defn] + nested_views} 133 134 def get_dimensions( 135 self, table, v1_name: Optional[str], dryrun 136 ) -> List[Dict[str, Any]]: 137 """Get the set of dimensions for this view.""" 138 # add dimensions and dimension groups 139 return lookml_utils._generate_dimensions(table, dryrun=dryrun) 140 141 def get_measures( 142 self, dimensions: List[dict], table: str, v1_name: Optional[str] 143 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 144 """Generate measures from a list of dimensions. 145 146 When no dimension-specific measures are found, return a single "count" measure. 147 148 Raise ClickException if dimensions result in duplicate measures. 149 """ 150 # Iterate through each of the dimensions and accumulate any measures 151 # that we want to include in the view. We pull out the client id first 152 # since we'll use it to calculate per-measure client counts. 153 measures: List[Dict[str, Union[str, List[Dict[str, str]]]]] = [] 154 155 client_id_field = self.get_client_id(dimensions, table) 156 if client_id_field is not None: 157 measures.append( 158 { 159 "name": "clients", 160 "type": "count_distinct", 161 "sql": f"${{{client_id_field}}}", 162 } 163 ) 164 165 for dimension in dimensions: 166 dimension_name = dimension["name"] 167 if dimension_name == "document_id": 168 measures += [{"name": "ping_count", "type": "count"}] 169 170 return measures
13class PingView(View): 14 """A view on a ping table.""" 15 16 type: str = "ping_view" 17 allow_glean: bool = False 18 19 def __init__(self, namespace: str, name: str, tables: List[Dict[str, Any]]): 20 """Create instance of a PingView.""" 21 super().__init__(namespace, name, self.__class__.type, tables) 22 23 @classmethod 24 def from_db_views( 25 klass, 26 namespace: str, 27 is_glean: bool, 28 channels: List[Dict[str, str]], 29 db_views: dict, 30 ) -> Iterator[PingView]: 31 """Get Looker views for a namespace.""" 32 if (klass.allow_glean and not is_glean) or (not klass.allow_glean and is_glean): 33 return 34 35 view_tables: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(dict) 36 for channel in channels: 37 dataset = channel["dataset"] 38 39 for view_id, references in db_views[dataset].items(): 40 if view_id in OMIT_VIEWS: 41 continue 42 43 table_id = f"mozdata.{dataset}.{view_id}" 44 table: Dict[str, str] = {"table": table_id} 45 if channel.get("channel") is not None: 46 table["channel"] = channel["channel"] 47 48 # Only include those that select from a single ping source table 49 # or union together multiple ping source tables of the same name. 50 reference_table_names = set(r[-1] for r in references) 51 reference_dataset_names = set(r[-2] for r in references) 52 if len(reference_table_names) != 1 or ( 53 channel["source_dataset"] not in reference_dataset_names 54 # Temporary hack to keep generating "ping views" for apps' `events_stream` union views which now 55 # select from `events_stream_v1` derived tables after https://github.com/mozilla/bigquery-etl/pull/8361. 56 # These `events_stream` "ping views" shouldn't have been generated in the first place, but they 57 # are currently being relied on (https://bugzilla.mozilla.org/show_bug.cgi?id=1997588). 58 # TODO: Remove this hack when implementing https://mozilla-hub.atlassian.net/browse/DENG-9548. 59 and not ( 60 view_id == "events_stream" 61 and (channel["source_dataset"] + "_derived") 62 in reference_dataset_names 63 ) 64 ): 65 continue 66 67 view_tables[view_id][table_id] = table 68 69 for view_id, tables_by_id in view_tables.items(): 70 yield klass(namespace, view_id, list(tables_by_id.values())) 71 72 @classmethod 73 def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> PingView: 74 """Get a view from a name and dict definition.""" 75 return klass(namespace, name, _dict["tables"]) 76 77 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 78 """Generate LookML for this view.""" 79 view_defn: Dict[str, Any] = {"name": self.name} 80 81 # use schema for the table where channel=="release" or the first one 82 table = next( 83 (table for table in self.tables if table.get("channel") == "release"), 84 self.tables[0], 85 )["table"] 86 87 dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun) 88 89 # set document id field as a primary key for joins 90 view_defn["dimensions"] = [ 91 d if d["name"] != "document_id" else dict(**d, primary_key="yes") 92 for d in dimensions 93 if not lookml_utils._is_dimension_group(d) 94 ] 95 view_defn["dimension_groups"] = [ 96 d for d in dimensions if lookml_utils._is_dimension_group(d) 97 ] 98 99 # add measures 100 view_defn["measures"] = self.get_measures(dimensions, table, v1_name) 101 102 [project, dataset, table_id] = table.split(".") 103 table_schema = dryrun.create( 104 project=project, 105 dataset=dataset, 106 table=table_id, 107 ).get_table_schema() 108 nested_views = lookml_utils._generate_nested_dimension_views( 109 table_schema, self.name 110 ) 111 112 # Round-tripping through a dict to get an ordered deduped list. 113 suggestions = list( 114 dict.fromkeys( 115 _table["channel"] for _table in self.tables if "channel" in _table 116 ) 117 ) 118 119 if len(suggestions) > 1: 120 view_defn["filters"] = [ 121 { 122 "name": "channel", 123 "type": "string", 124 "description": "Filter by the app's channel", 125 "sql": "{% condition %} ${TABLE}.normalized_channel {% endcondition %}", 126 "default_value": suggestions[0], 127 "suggestions": suggestions, 128 } 129 ] 130 131 view_defn["sql_table_name"] = f"`{table}`" 132 133 return {"views": [view_defn] + nested_views} 134 135 def get_dimensions( 136 self, table, v1_name: Optional[str], dryrun 137 ) -> List[Dict[str, Any]]: 138 """Get the set of dimensions for this view.""" 139 # add dimensions and dimension groups 140 return lookml_utils._generate_dimensions(table, dryrun=dryrun) 141 142 def get_measures( 143 self, dimensions: List[dict], table: str, v1_name: Optional[str] 144 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 145 """Generate measures from a list of dimensions. 146 147 When no dimension-specific measures are found, return a single "count" measure. 148 149 Raise ClickException if dimensions result in duplicate measures. 150 """ 151 # Iterate through each of the dimensions and accumulate any measures 152 # that we want to include in the view. We pull out the client id first 153 # since we'll use it to calculate per-measure client counts. 154 measures: List[Dict[str, Union[str, List[Dict[str, str]]]]] = [] 155 156 client_id_field = self.get_client_id(dimensions, table) 157 if client_id_field is not None: 158 measures.append( 159 { 160 "name": "clients", 161 "type": "count_distinct", 162 "sql": f"${{{client_id_field}}}", 163 } 164 ) 165 166 for dimension in dimensions: 167 dimension_name = dimension["name"] 168 if dimension_name == "document_id": 169 measures += [{"name": "ping_count", "type": "count"}] 170 171 return measures
A view on a ping table.
PingView(namespace: str, name: str, tables: List[Dict[str, Any]])
19 def __init__(self, namespace: str, name: str, tables: List[Dict[str, Any]]): 20 """Create instance of a PingView.""" 21 super().__init__(namespace, name, self.__class__.type, tables)
Create instance of a PingView.
@classmethod
def
from_db_views( klass, namespace: str, is_glean: bool, channels: List[Dict[str, str]], db_views: dict) -> Iterator[PingView]:
23 @classmethod 24 def from_db_views( 25 klass, 26 namespace: str, 27 is_glean: bool, 28 channels: List[Dict[str, str]], 29 db_views: dict, 30 ) -> Iterator[PingView]: 31 """Get Looker views for a namespace.""" 32 if (klass.allow_glean and not is_glean) or (not klass.allow_glean and is_glean): 33 return 34 35 view_tables: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(dict) 36 for channel in channels: 37 dataset = channel["dataset"] 38 39 for view_id, references in db_views[dataset].items(): 40 if view_id in OMIT_VIEWS: 41 continue 42 43 table_id = f"mozdata.{dataset}.{view_id}" 44 table: Dict[str, str] = {"table": table_id} 45 if channel.get("channel") is not None: 46 table["channel"] = channel["channel"] 47 48 # Only include those that select from a single ping source table 49 # or union together multiple ping source tables of the same name. 50 reference_table_names = set(r[-1] for r in references) 51 reference_dataset_names = set(r[-2] for r in references) 52 if len(reference_table_names) != 1 or ( 53 channel["source_dataset"] not in reference_dataset_names 54 # Temporary hack to keep generating "ping views" for apps' `events_stream` union views which now 55 # select from `events_stream_v1` derived tables after https://github.com/mozilla/bigquery-etl/pull/8361. 56 # These `events_stream` "ping views" shouldn't have been generated in the first place, but they 57 # are currently being relied on (https://bugzilla.mozilla.org/show_bug.cgi?id=1997588). 58 # TODO: Remove this hack when implementing https://mozilla-hub.atlassian.net/browse/DENG-9548. 59 and not ( 60 view_id == "events_stream" 61 and (channel["source_dataset"] + "_derived") 62 in reference_dataset_names 63 ) 64 ): 65 continue 66 67 view_tables[view_id][table_id] = table 68 69 for view_id, tables_by_id in view_tables.items(): 70 yield klass(namespace, view_id, list(tables_by_id.values()))
Get Looker views for a namespace.
@classmethod
def
from_dict( klass, namespace: str, name: str, _dict: generator.views.view.ViewDict) -> PingView:
72 @classmethod 73 def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> PingView: 74 """Get a view from a name and dict definition.""" 75 return klass(namespace, name, _dict["tables"])
Get a view from a name and dict definition.
def
to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
77 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 78 """Generate LookML for this view.""" 79 view_defn: Dict[str, Any] = {"name": self.name} 80 81 # use schema for the table where channel=="release" or the first one 82 table = next( 83 (table for table in self.tables if table.get("channel") == "release"), 84 self.tables[0], 85 )["table"] 86 87 dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun) 88 89 # set document id field as a primary key for joins 90 view_defn["dimensions"] = [ 91 d if d["name"] != "document_id" else dict(**d, primary_key="yes") 92 for d in dimensions 93 if not lookml_utils._is_dimension_group(d) 94 ] 95 view_defn["dimension_groups"] = [ 96 d for d in dimensions if lookml_utils._is_dimension_group(d) 97 ] 98 99 # add measures 100 view_defn["measures"] = self.get_measures(dimensions, table, v1_name) 101 102 [project, dataset, table_id] = table.split(".") 103 table_schema = dryrun.create( 104 project=project, 105 dataset=dataset, 106 table=table_id, 107 ).get_table_schema() 108 nested_views = lookml_utils._generate_nested_dimension_views( 109 table_schema, self.name 110 ) 111 112 # Round-tripping through a dict to get an ordered deduped list. 113 suggestions = list( 114 dict.fromkeys( 115 _table["channel"] for _table in self.tables if "channel" in _table 116 ) 117 ) 118 119 if len(suggestions) > 1: 120 view_defn["filters"] = [ 121 { 122 "name": "channel", 123 "type": "string", 124 "description": "Filter by the app's channel", 125 "sql": "{% condition %} ${TABLE}.normalized_channel {% endcondition %}", 126 "default_value": suggestions[0], 127 "suggestions": suggestions, 128 } 129 ] 130 131 view_defn["sql_table_name"] = f"`{table}`" 132 133 return {"views": [view_defn] + nested_views}
Generate LookML for this view.
def
get_dimensions(self, table, v1_name: Optional[str], dryrun) -> List[Dict[str, Any]]:
135 def get_dimensions( 136 self, table, v1_name: Optional[str], dryrun 137 ) -> List[Dict[str, Any]]: 138 """Get the set of dimensions for this view.""" 139 # add dimensions and dimension groups 140 return lookml_utils._generate_dimensions(table, dryrun=dryrun)
Get the set of dimensions for this view.
def
get_measures( self, dimensions: List[dict], table: str, v1_name: Optional[str]) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
142 def get_measures( 143 self, dimensions: List[dict], table: str, v1_name: Optional[str] 144 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 145 """Generate measures from a list of dimensions. 146 147 When no dimension-specific measures are found, return a single "count" measure. 148 149 Raise ClickException if dimensions result in duplicate measures. 150 """ 151 # Iterate through each of the dimensions and accumulate any measures 152 # that we want to include in the view. We pull out the client id first 153 # since we'll use it to calculate per-measure client counts. 154 measures: List[Dict[str, Union[str, List[Dict[str, str]]]]] = [] 155 156 client_id_field = self.get_client_id(dimensions, table) 157 if client_id_field is not None: 158 measures.append( 159 { 160 "name": "clients", 161 "type": "count_distinct", 162 "sql": f"${{{client_id_field}}}", 163 } 164 ) 165 166 for dimension in dimensions: 167 dimension_name = dimension["name"] 168 if dimension_name == "document_id": 169 measures += [{"name": "ping_count", "type": "count"}] 170 171 return measures
Generate measures from a list of dimensions.
When no dimension-specific measures are found, return a single "count" measure.
Raise ClickException if dimensions result in duplicate measures.