generator.views.ping_view
Class to describe a Ping View.
1"""Class to describe a Ping View.""" 2 3from __future__ import annotations 4 5from collections import defaultdict 6from typing import Any, Dict, Iterator, List, Optional, Union 7 8from . import lookml_utils 9from .view import OMIT_VIEWS, View, ViewDict 10 11 12class PingView(View): 13 """A view on a ping table.""" 14 15 type: str = "ping_view" 16 allow_glean: bool = False 17 18 def __init__(self, namespace: str, name: str, tables: List[Dict[str, Any]]): 19 """Create instance of a PingView.""" 20 super().__init__(namespace, name, self.__class__.type, tables) 21 22 @classmethod 23 def from_db_views( 24 klass, 25 namespace: str, 26 is_glean: bool, 27 channels: List[Dict[str, str]], 28 db_views: dict, 29 ) -> Iterator[PingView]: 30 """Get Looker views for a namespace.""" 31 if (klass.allow_glean and not is_glean) or (not klass.allow_glean and is_glean): 32 return 33 34 view_tables: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(dict) 35 for channel in channels: 36 dataset = channel["dataset"] 37 38 for view_id, references in db_views[dataset].items(): 39 if view_id in OMIT_VIEWS: 40 continue 41 42 table_id = f"mozdata.{dataset}.{view_id}" 43 table: Dict[str, str] = {"table": table_id} 44 if channel.get("channel") is not None: 45 table["channel"] = channel["channel"] 46 47 # Only include those that select from a single ping source table 48 # or union together multiple ping source tables of the same name. 49 reference_table_names = set(r[-1] for r in references) 50 reference_dataset_names = set(r[-2] for r in references) 51 if ( 52 len(reference_table_names) != 1 53 or channel["source_dataset"] not in reference_dataset_names 54 ): 55 continue 56 57 view_tables[view_id][table_id] = table 58 59 for view_id, tables_by_id in view_tables.items(): 60 yield klass(namespace, view_id, list(tables_by_id.values())) 61 62 @classmethod 63 def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> PingView: 64 """Get a view from a name and dict definition.""" 65 return klass(namespace, name, _dict["tables"]) 66 67 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 68 """Generate LookML for this view.""" 69 view_defn: Dict[str, Any] = {"name": self.name} 70 71 # use schema for the table where channel=="release" or the first one 72 table = next( 73 (table for table in self.tables if table.get("channel") == "release"), 74 self.tables[0], 75 )["table"] 76 77 dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun) 78 79 # set document id field as a primary key for joins 80 view_defn["dimensions"] = [ 81 d if d["name"] != "document_id" else dict(**d, primary_key="yes") 82 for d in dimensions 83 if not lookml_utils._is_dimension_group(d) 84 ] 85 view_defn["dimension_groups"] = [ 86 d for d in dimensions if lookml_utils._is_dimension_group(d) 87 ] 88 89 # add measures 90 view_defn["measures"] = self.get_measures(dimensions, table, v1_name) 91 92 [project, dataset, table_id] = table.split(".") 93 table_schema = dryrun.create( 94 project=project, 95 dataset=dataset, 96 table=table_id, 97 ).get_table_schema() 98 nested_views = lookml_utils._generate_nested_dimension_views( 99 table_schema, self.name 100 ) 101 102 # Round-tripping through a dict to get an ordered deduped list. 103 suggestions = list( 104 dict.fromkeys( 105 _table["channel"] for _table in self.tables if "channel" in _table 106 ) 107 ) 108 109 if len(suggestions) > 1: 110 view_defn["filters"] = [ 111 { 112 "name": "channel", 113 "type": "string", 114 "description": "Filter by the app's channel", 115 "sql": "{% condition %} ${TABLE}.normalized_channel {% endcondition %}", 116 "default_value": suggestions[0], 117 "suggestions": suggestions, 118 } 119 ] 120 121 view_defn["sql_table_name"] = f"`{table}`" 122 123 return {"views": [view_defn] + nested_views} 124 125 def get_dimensions( 126 self, table, v1_name: Optional[str], dryrun 127 ) -> List[Dict[str, Any]]: 128 """Get the set of dimensions for this view.""" 129 # add dimensions and dimension groups 130 return lookml_utils._generate_dimensions(table, dryrun=dryrun) 131 132 def get_measures( 133 self, dimensions: List[dict], table: str, v1_name: Optional[str] 134 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 135 """Generate measures from a list of dimensions. 136 137 When no dimension-specific measures are found, return a single "count" measure. 138 139 Raise ClickException if dimensions result in duplicate measures. 140 """ 141 # Iterate through each of the dimensions and accumulate any measures 142 # that we want to include in the view. We pull out the client id first 143 # since we'll use it to calculate per-measure client counts. 144 measures: List[Dict[str, Union[str, List[Dict[str, str]]]]] = [] 145 146 client_id_field = self.get_client_id(dimensions, table) 147 if client_id_field is not None: 148 measures.append( 149 { 150 "name": "clients", 151 "type": "count_distinct", 152 "sql": f"${{{client_id_field}}}", 153 } 154 ) 155 156 for dimension in dimensions: 157 dimension_name = dimension["name"] 158 if dimension_name == "document_id": 159 measures += [{"name": "ping_count", "type": "count"}] 160 161 return measures
13class PingView(View): 14 """A view on a ping table.""" 15 16 type: str = "ping_view" 17 allow_glean: bool = False 18 19 def __init__(self, namespace: str, name: str, tables: List[Dict[str, Any]]): 20 """Create instance of a PingView.""" 21 super().__init__(namespace, name, self.__class__.type, tables) 22 23 @classmethod 24 def from_db_views( 25 klass, 26 namespace: str, 27 is_glean: bool, 28 channels: List[Dict[str, str]], 29 db_views: dict, 30 ) -> Iterator[PingView]: 31 """Get Looker views for a namespace.""" 32 if (klass.allow_glean and not is_glean) or (not klass.allow_glean and is_glean): 33 return 34 35 view_tables: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(dict) 36 for channel in channels: 37 dataset = channel["dataset"] 38 39 for view_id, references in db_views[dataset].items(): 40 if view_id in OMIT_VIEWS: 41 continue 42 43 table_id = f"mozdata.{dataset}.{view_id}" 44 table: Dict[str, str] = {"table": table_id} 45 if channel.get("channel") is not None: 46 table["channel"] = channel["channel"] 47 48 # Only include those that select from a single ping source table 49 # or union together multiple ping source tables of the same name. 50 reference_table_names = set(r[-1] for r in references) 51 reference_dataset_names = set(r[-2] for r in references) 52 if ( 53 len(reference_table_names) != 1 54 or channel["source_dataset"] not in reference_dataset_names 55 ): 56 continue 57 58 view_tables[view_id][table_id] = table 59 60 for view_id, tables_by_id in view_tables.items(): 61 yield klass(namespace, view_id, list(tables_by_id.values())) 62 63 @classmethod 64 def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> PingView: 65 """Get a view from a name and dict definition.""" 66 return klass(namespace, name, _dict["tables"]) 67 68 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 69 """Generate LookML for this view.""" 70 view_defn: Dict[str, Any] = {"name": self.name} 71 72 # use schema for the table where channel=="release" or the first one 73 table = next( 74 (table for table in self.tables if table.get("channel") == "release"), 75 self.tables[0], 76 )["table"] 77 78 dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun) 79 80 # set document id field as a primary key for joins 81 view_defn["dimensions"] = [ 82 d if d["name"] != "document_id" else dict(**d, primary_key="yes") 83 for d in dimensions 84 if not lookml_utils._is_dimension_group(d) 85 ] 86 view_defn["dimension_groups"] = [ 87 d for d in dimensions if lookml_utils._is_dimension_group(d) 88 ] 89 90 # add measures 91 view_defn["measures"] = self.get_measures(dimensions, table, v1_name) 92 93 [project, dataset, table_id] = table.split(".") 94 table_schema = dryrun.create( 95 project=project, 96 dataset=dataset, 97 table=table_id, 98 ).get_table_schema() 99 nested_views = lookml_utils._generate_nested_dimension_views( 100 table_schema, self.name 101 ) 102 103 # Round-tripping through a dict to get an ordered deduped list. 104 suggestions = list( 105 dict.fromkeys( 106 _table["channel"] for _table in self.tables if "channel" in _table 107 ) 108 ) 109 110 if len(suggestions) > 1: 111 view_defn["filters"] = [ 112 { 113 "name": "channel", 114 "type": "string", 115 "description": "Filter by the app's channel", 116 "sql": "{% condition %} ${TABLE}.normalized_channel {% endcondition %}", 117 "default_value": suggestions[0], 118 "suggestions": suggestions, 119 } 120 ] 121 122 view_defn["sql_table_name"] = f"`{table}`" 123 124 return {"views": [view_defn] + nested_views} 125 126 def get_dimensions( 127 self, table, v1_name: Optional[str], dryrun 128 ) -> List[Dict[str, Any]]: 129 """Get the set of dimensions for this view.""" 130 # add dimensions and dimension groups 131 return lookml_utils._generate_dimensions(table, dryrun=dryrun) 132 133 def get_measures( 134 self, dimensions: List[dict], table: str, v1_name: Optional[str] 135 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 136 """Generate measures from a list of dimensions. 137 138 When no dimension-specific measures are found, return a single "count" measure. 139 140 Raise ClickException if dimensions result in duplicate measures. 141 """ 142 # Iterate through each of the dimensions and accumulate any measures 143 # that we want to include in the view. We pull out the client id first 144 # since we'll use it to calculate per-measure client counts. 145 measures: List[Dict[str, Union[str, List[Dict[str, str]]]]] = [] 146 147 client_id_field = self.get_client_id(dimensions, table) 148 if client_id_field is not None: 149 measures.append( 150 { 151 "name": "clients", 152 "type": "count_distinct", 153 "sql": f"${{{client_id_field}}}", 154 } 155 ) 156 157 for dimension in dimensions: 158 dimension_name = dimension["name"] 159 if dimension_name == "document_id": 160 measures += [{"name": "ping_count", "type": "count"}] 161 162 return measures
A view on a ping table.
PingView(namespace: str, name: str, tables: List[Dict[str, Any]])
19 def __init__(self, namespace: str, name: str, tables: List[Dict[str, Any]]): 20 """Create instance of a PingView.""" 21 super().__init__(namespace, name, self.__class__.type, tables)
Create instance of a PingView.
@classmethod
def
from_db_views( klass, namespace: str, is_glean: bool, channels: List[Dict[str, str]], db_views: dict) -> Iterator[PingView]:
23 @classmethod 24 def from_db_views( 25 klass, 26 namespace: str, 27 is_glean: bool, 28 channels: List[Dict[str, str]], 29 db_views: dict, 30 ) -> Iterator[PingView]: 31 """Get Looker views for a namespace.""" 32 if (klass.allow_glean and not is_glean) or (not klass.allow_glean and is_glean): 33 return 34 35 view_tables: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(dict) 36 for channel in channels: 37 dataset = channel["dataset"] 38 39 for view_id, references in db_views[dataset].items(): 40 if view_id in OMIT_VIEWS: 41 continue 42 43 table_id = f"mozdata.{dataset}.{view_id}" 44 table: Dict[str, str] = {"table": table_id} 45 if channel.get("channel") is not None: 46 table["channel"] = channel["channel"] 47 48 # Only include those that select from a single ping source table 49 # or union together multiple ping source tables of the same name. 50 reference_table_names = set(r[-1] for r in references) 51 reference_dataset_names = set(r[-2] for r in references) 52 if ( 53 len(reference_table_names) != 1 54 or channel["source_dataset"] not in reference_dataset_names 55 ): 56 continue 57 58 view_tables[view_id][table_id] = table 59 60 for view_id, tables_by_id in view_tables.items(): 61 yield klass(namespace, view_id, list(tables_by_id.values()))
Get Looker views for a namespace.
@classmethod
def
from_dict( klass, namespace: str, name: str, _dict: generator.views.view.ViewDict) -> PingView:
63 @classmethod 64 def from_dict(klass, namespace: str, name: str, _dict: ViewDict) -> PingView: 65 """Get a view from a name and dict definition.""" 66 return klass(namespace, name, _dict["tables"])
Get a view from a name and dict definition.
def
to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
68 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 69 """Generate LookML for this view.""" 70 view_defn: Dict[str, Any] = {"name": self.name} 71 72 # use schema for the table where channel=="release" or the first one 73 table = next( 74 (table for table in self.tables if table.get("channel") == "release"), 75 self.tables[0], 76 )["table"] 77 78 dimensions = self.get_dimensions(table, v1_name, dryrun=dryrun) 79 80 # set document id field as a primary key for joins 81 view_defn["dimensions"] = [ 82 d if d["name"] != "document_id" else dict(**d, primary_key="yes") 83 for d in dimensions 84 if not lookml_utils._is_dimension_group(d) 85 ] 86 view_defn["dimension_groups"] = [ 87 d for d in dimensions if lookml_utils._is_dimension_group(d) 88 ] 89 90 # add measures 91 view_defn["measures"] = self.get_measures(dimensions, table, v1_name) 92 93 [project, dataset, table_id] = table.split(".") 94 table_schema = dryrun.create( 95 project=project, 96 dataset=dataset, 97 table=table_id, 98 ).get_table_schema() 99 nested_views = lookml_utils._generate_nested_dimension_views( 100 table_schema, self.name 101 ) 102 103 # Round-tripping through a dict to get an ordered deduped list. 104 suggestions = list( 105 dict.fromkeys( 106 _table["channel"] for _table in self.tables if "channel" in _table 107 ) 108 ) 109 110 if len(suggestions) > 1: 111 view_defn["filters"] = [ 112 { 113 "name": "channel", 114 "type": "string", 115 "description": "Filter by the app's channel", 116 "sql": "{% condition %} ${TABLE}.normalized_channel {% endcondition %}", 117 "default_value": suggestions[0], 118 "suggestions": suggestions, 119 } 120 ] 121 122 view_defn["sql_table_name"] = f"`{table}`" 123 124 return {"views": [view_defn] + nested_views}
Generate LookML for this view.
def
get_dimensions(self, table, v1_name: Optional[str], dryrun) -> List[Dict[str, Any]]:
126 def get_dimensions( 127 self, table, v1_name: Optional[str], dryrun 128 ) -> List[Dict[str, Any]]: 129 """Get the set of dimensions for this view.""" 130 # add dimensions and dimension groups 131 return lookml_utils._generate_dimensions(table, dryrun=dryrun)
Get the set of dimensions for this view.
def
get_measures( self, dimensions: List[dict], table: str, v1_name: Optional[str]) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
133 def get_measures( 134 self, dimensions: List[dict], table: str, v1_name: Optional[str] 135 ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]: 136 """Generate measures from a list of dimensions. 137 138 When no dimension-specific measures are found, return a single "count" measure. 139 140 Raise ClickException if dimensions result in duplicate measures. 141 """ 142 # Iterate through each of the dimensions and accumulate any measures 143 # that we want to include in the view. We pull out the client id first 144 # since we'll use it to calculate per-measure client counts. 145 measures: List[Dict[str, Union[str, List[Dict[str, str]]]]] = [] 146 147 client_id_field = self.get_client_id(dimensions, table) 148 if client_id_field is not None: 149 measures.append( 150 { 151 "name": "clients", 152 "type": "count_distinct", 153 "sql": f"${{{client_id_field}}}", 154 } 155 ) 156 157 for dimension in dimensions: 158 dimension_name = dimension["name"] 159 if dimension_name == "document_id": 160 measures += [{"name": "ping_count", "type": "count"}] 161 162 return measures
Generate measures from a list of dimensions.
When no dimension-specific measures are found, return a single "count" measure.
Raise ClickException if dimensions result in duplicate measures.