generator.views.funnel_analysis_view
Class to describe a Funnel Analysis View.
We create a single View file and a single Explore file.
The View file has many Looker views defined within it:
funnel_analysis: Based on events_daily, has the events
string and user dimensions (e.g. country)
event_names: The names of events. Used for suggestions.
event_N: For each possible funnel step, a single view. This is used to define what that funnel step is.
The Explore's job is to take this generated file an link all those event_N's to the funnel_analysis.
We join them via cross join, because event_N views only have 1 row and 1 column - the match_string
to use for a regex_match on the events
string in funnel_analysis.
For example, say we filter event_1 on event
: WHERE event in ("session-start, "session-end")
Then we join that with funnel_analysis: FROM funnel_analysis CROSS JOIN event_1
That lets us find out whether the user completed those funnel steps:
SELECT REGEXP_CONTAINS(funnel_analysis.events, event_1.match_string) AS completed_step_1
The funnel_analysis
view has some nice dimensions to hide these details from the end user,
e.g. completed_funnel_step_N
. We can then count those users across dimensions.
1"""Class to describe a Funnel Analysis View. 2 3We create a single View file and a single Explore file. 4 5The View file has many Looker views defined within it: 6 funnel_analysis: Based on events_daily, has the `events` string and user dimensions (e.g. country) 7 event_names: The names of events. Used for suggestions. 8 event_N: For each possible funnel step, a single view. This is used to define what that funnel step is. 9 10The Explore's job is to take this generated file an link all those event_N's to the funnel_analysis. 11We join them via cross join, because event_N views only have 1 row and 1 column - the match_string 12to use for a regex_match on the `events` string in funnel_analysis. 13 14For example, say we filter event_1 on `event`: `WHERE event in ("session-start, "session-end")` 15Then we join that with funnel_analysis: `FROM funnel_analysis CROSS JOIN event_1` 16That lets us find out whether the user completed those funnel steps: 17 `SELECT REGEXP_CONTAINS(funnel_analysis.events, event_1.match_string) AS completed_step_1` 18 19The `funnel_analysis` view has some nice dimensions to hide these details from the end user, 20e.g. `completed_funnel_step_N`. We can then count those users across dimensions. 21""" 22 23from __future__ import annotations 24 25from textwrap import dedent 26from typing import Any, Dict, Iterator, List, Optional 27 28from .view import View, ViewDict 29 30DEFAULT_NUM_FUNNEL_STEPS: int = 4 31 32 33class FunnelAnalysisView(View): 34 """A view for doing Funnel Analysis.""" 35 36 type: str = "funnel_analysis_view" 37 38 def __init__(self, namespace: str, tables: List[Dict[str, str]]): 39 """Get an instance of a FunnelAnalysisView.""" 40 super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables) 41 42 @classmethod 43 def from_db_views( 44 klass, 45 namespace: str, 46 is_glean: bool, 47 channels: List[Dict[str, str]], 48 db_views: dict, 49 num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS, 50 ) -> Iterator[FunnelAnalysisView]: 51 """Get Client Count Views from db views and app variants. 52 53 We only create a FunnelAnalysisView if we have the two necessary db tables: 54 - events_daily 55 - event_types 56 """ 57 # We can guarantee there will always be at least one channel, 58 # because this comes from the associated _get_glean_repos in 59 # namespaces.py 60 dataset = next( 61 (channel for channel in channels if channel.get("channel") == "release"), 62 channels[0], 63 )["dataset"] 64 65 necessary_views = {"events_daily", "event_types"} 66 actual_views = {} 67 for view_id, references in db_views[dataset].items(): 68 if view_id in necessary_views: 69 actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`" 70 71 if len(actual_views) == 2: 72 # Only create an instance if we have the two necessary tables 73 tables = { 74 "funnel_analysis": "events_daily_table", 75 "event_types": actual_views["event_types"], 76 } 77 tables.update( 78 {f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)} 79 ) 80 yield FunnelAnalysisView( 81 namespace, 82 [tables], 83 ) 84 85 @classmethod 86 def from_dict( 87 klass, namespace: str, name: str, _dict: ViewDict 88 ) -> FunnelAnalysisView: 89 """Get a FunnalAnalysisView from a dict representation.""" 90 return FunnelAnalysisView(namespace, _dict["tables"]) 91 92 def to_lookml(self, bq_client, v1_name: Optional[str]) -> Dict[str, Any]: 93 """Get this view as LookML.""" 94 return { 95 "includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"], 96 "views": self._funnel_analysis_lookml() + self._event_types_lookml(), 97 } 98 99 def n_events(self) -> int: 100 """Get the number of events allowed in this funnel.""" 101 return len([k for k in self.tables[0] if k.startswith("step_")]) 102 103 def _funnel_analysis_lookml(self) -> List[Dict[str, Any]]: 104 dimensions = [ 105 { 106 "name": f"completed_step_{n}", 107 "type": "yesno", 108 "description": f"Whether the user completed step {n} on the associated day.", 109 "sql": dedent( 110 f""" 111 REGEXP_CONTAINS( 112 ${{TABLE}}.events, mozfun.event_analysis.create_funnel_regex( 113 [{", ".join([ 114 f'${{step_{ni}.match_string}}' for ni in range(1, n + 1) 115 ])}], 116 True 117 ) 118 ) 119 """ 120 ), 121 } 122 for n in range(1, self.n_events() + 1) 123 ] 124 125 count_measures: List[Dict[str, Any]] = [ 126 { 127 "name": f"count_completed_step_{n}", 128 "description": ( 129 f"The number of times that step {n} was completed. " 130 "Grouping by day makes this a count of users who completed " 131 f"step {n} on each day." 132 ), 133 "type": "count", 134 "filters": [{f"completed_step_{ni}": "yes"} for ni in range(1, n + 1)], 135 } 136 for n in range(1, self.n_events() + 1) 137 ] 138 139 fractional_measures: List[Dict[str, Any]] = [ 140 { 141 "name": f"fraction_completed_step_{n}", 142 "description": f"Of the user-days that completed Step 1, the fraction that completed step {n}.", 143 "type": "number", 144 "value_format": "0.00%", 145 "sql": f"SAFE_DIVIDE(${{count_completed_step_{n}}}, ${{count_completed_step_1}})", 146 } 147 for n in range(1, self.n_events() + 1) 148 ] 149 return [ 150 { 151 "name": "funnel_analysis", 152 "extends": ["events_daily_table"], 153 "dimensions": dimensions, 154 "measures": count_measures + fractional_measures, 155 } 156 ] 157 158 def _event_types_lookml(self) -> List[Dict[str, Any]]: 159 events = ( 160 [ 161 { 162 "name": "event_types", 163 "derived_table": { 164 "sql": dedent( 165 f""" 166 SELECT 167 mozfun.event_analysis.aggregate_match_strings( 168 ARRAY_AGG( 169 DISTINCT CONCAT( 170 {{% if _filters['property_name'] or _filters['property_value'] -%}} 171 COALESCE(mozfun.event_analysis.escape_metachars(property_value.value), ''), 172 {{% endif -%}} 173 mozfun.event_analysis.event_index_to_match_string(et.index) 174 ) 175 ) 176 ) AS match_string 177 FROM 178 {self.tables[0]['event_types']} AS et 179 LEFT JOIN 180 UNNEST(COALESCE(event_properties, [])) AS properties 181 LEFT JOIN 182 UNNEST(properties.value) AS property_value 183 WHERE 184 {{% condition category %}} category {{% endcondition %}} 185 AND {{% condition event %}} event {{% endcondition %}} 186 AND {{% condition property_name %}} properties.key {{% endcondition %}} 187 AND {{% condition property_value %}} property_value.key {{% endcondition %}} 188 """ 189 ), 190 }, 191 "filters": [ 192 { 193 "name": "category", 194 "description": "The event category, as defined in metrics.yaml.", 195 "type": "string", 196 "suggest_explore": "event_names", 197 "suggest_dimension": "event_names.category", 198 }, 199 { 200 "name": "event", 201 "description": "The event name.", 202 "type": "string", 203 "suggest_explore": "event_names", 204 "suggest_dimension": "event_names.event", 205 }, 206 { 207 "name": "property_name", 208 "description": "The event property name.", 209 "type": "string", 210 "suggest_explore": "event_names", 211 "suggest_dimension": "event_names.property_name", 212 }, 213 { 214 "name": "property_value", 215 "description": "The event property value.", 216 "type": "string", 217 "suggest_explore": "event_names", 218 "suggest_dimension": "event_names.property_value", 219 }, 220 ], 221 "dimensions": [ 222 { 223 "name": "match_string", 224 "hidden": "yes", 225 "sql": "${TABLE}.match_string", 226 } 227 ], 228 } 229 ] 230 + [ 231 { 232 "name": f"step_{n}", 233 "extends": ["event_types"], 234 } 235 for n in range(1, self.n_events() + 1) 236 ] 237 + [ 238 { 239 "name": "event_names", 240 "derived_table": { 241 "sql": ( 242 "SELECT category, " 243 " event, " 244 " property.key AS property_name, " 245 " property_value.key AS property_value, " 246 " property_value.index as property_index " 247 f"FROM {self.tables[0]['event_types']} " 248 "LEFT JOIN UNNEST(event_properties) AS property " 249 "LEFT JOIN UNNEST(property.value) AS property_value " 250 ) 251 }, 252 "dimensions": [ 253 { 254 "name": "category", 255 "type": "string", 256 "sql": "${TABLE}.category", 257 }, 258 { 259 "name": "event", 260 "type": "string", 261 "sql": "${TABLE}.event", 262 }, 263 { 264 "name": "property_name", 265 "type": "string", 266 "sql": "${TABLE}.property_name", 267 }, 268 { 269 "name": "property_value", 270 "type": "string", 271 "sql": "${TABLE}.property_value", 272 }, 273 ], 274 } 275 ] 276 ) 277 278 return events
34class FunnelAnalysisView(View): 35 """A view for doing Funnel Analysis.""" 36 37 type: str = "funnel_analysis_view" 38 39 def __init__(self, namespace: str, tables: List[Dict[str, str]]): 40 """Get an instance of a FunnelAnalysisView.""" 41 super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables) 42 43 @classmethod 44 def from_db_views( 45 klass, 46 namespace: str, 47 is_glean: bool, 48 channels: List[Dict[str, str]], 49 db_views: dict, 50 num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS, 51 ) -> Iterator[FunnelAnalysisView]: 52 """Get Client Count Views from db views and app variants. 53 54 We only create a FunnelAnalysisView if we have the two necessary db tables: 55 - events_daily 56 - event_types 57 """ 58 # We can guarantee there will always be at least one channel, 59 # because this comes from the associated _get_glean_repos in 60 # namespaces.py 61 dataset = next( 62 (channel for channel in channels if channel.get("channel") == "release"), 63 channels[0], 64 )["dataset"] 65 66 necessary_views = {"events_daily", "event_types"} 67 actual_views = {} 68 for view_id, references in db_views[dataset].items(): 69 if view_id in necessary_views: 70 actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`" 71 72 if len(actual_views) == 2: 73 # Only create an instance if we have the two necessary tables 74 tables = { 75 "funnel_analysis": "events_daily_table", 76 "event_types": actual_views["event_types"], 77 } 78 tables.update( 79 {f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)} 80 ) 81 yield FunnelAnalysisView( 82 namespace, 83 [tables], 84 ) 85 86 @classmethod 87 def from_dict( 88 klass, namespace: str, name: str, _dict: ViewDict 89 ) -> FunnelAnalysisView: 90 """Get a FunnalAnalysisView from a dict representation.""" 91 return FunnelAnalysisView(namespace, _dict["tables"]) 92 93 def to_lookml(self, bq_client, v1_name: Optional[str]) -> Dict[str, Any]: 94 """Get this view as LookML.""" 95 return { 96 "includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"], 97 "views": self._funnel_analysis_lookml() + self._event_types_lookml(), 98 } 99 100 def n_events(self) -> int: 101 """Get the number of events allowed in this funnel.""" 102 return len([k for k in self.tables[0] if k.startswith("step_")]) 103 104 def _funnel_analysis_lookml(self) -> List[Dict[str, Any]]: 105 dimensions = [ 106 { 107 "name": f"completed_step_{n}", 108 "type": "yesno", 109 "description": f"Whether the user completed step {n} on the associated day.", 110 "sql": dedent( 111 f""" 112 REGEXP_CONTAINS( 113 ${{TABLE}}.events, mozfun.event_analysis.create_funnel_regex( 114 [{", ".join([ 115 f'${{step_{ni}.match_string}}' for ni in range(1, n + 1) 116 ])}], 117 True 118 ) 119 ) 120 """ 121 ), 122 } 123 for n in range(1, self.n_events() + 1) 124 ] 125 126 count_measures: List[Dict[str, Any]] = [ 127 { 128 "name": f"count_completed_step_{n}", 129 "description": ( 130 f"The number of times that step {n} was completed. " 131 "Grouping by day makes this a count of users who completed " 132 f"step {n} on each day." 133 ), 134 "type": "count", 135 "filters": [{f"completed_step_{ni}": "yes"} for ni in range(1, n + 1)], 136 } 137 for n in range(1, self.n_events() + 1) 138 ] 139 140 fractional_measures: List[Dict[str, Any]] = [ 141 { 142 "name": f"fraction_completed_step_{n}", 143 "description": f"Of the user-days that completed Step 1, the fraction that completed step {n}.", 144 "type": "number", 145 "value_format": "0.00%", 146 "sql": f"SAFE_DIVIDE(${{count_completed_step_{n}}}, ${{count_completed_step_1}})", 147 } 148 for n in range(1, self.n_events() + 1) 149 ] 150 return [ 151 { 152 "name": "funnel_analysis", 153 "extends": ["events_daily_table"], 154 "dimensions": dimensions, 155 "measures": count_measures + fractional_measures, 156 } 157 ] 158 159 def _event_types_lookml(self) -> List[Dict[str, Any]]: 160 events = ( 161 [ 162 { 163 "name": "event_types", 164 "derived_table": { 165 "sql": dedent( 166 f""" 167 SELECT 168 mozfun.event_analysis.aggregate_match_strings( 169 ARRAY_AGG( 170 DISTINCT CONCAT( 171 {{% if _filters['property_name'] or _filters['property_value'] -%}} 172 COALESCE(mozfun.event_analysis.escape_metachars(property_value.value), ''), 173 {{% endif -%}} 174 mozfun.event_analysis.event_index_to_match_string(et.index) 175 ) 176 ) 177 ) AS match_string 178 FROM 179 {self.tables[0]['event_types']} AS et 180 LEFT JOIN 181 UNNEST(COALESCE(event_properties, [])) AS properties 182 LEFT JOIN 183 UNNEST(properties.value) AS property_value 184 WHERE 185 {{% condition category %}} category {{% endcondition %}} 186 AND {{% condition event %}} event {{% endcondition %}} 187 AND {{% condition property_name %}} properties.key {{% endcondition %}} 188 AND {{% condition property_value %}} property_value.key {{% endcondition %}} 189 """ 190 ), 191 }, 192 "filters": [ 193 { 194 "name": "category", 195 "description": "The event category, as defined in metrics.yaml.", 196 "type": "string", 197 "suggest_explore": "event_names", 198 "suggest_dimension": "event_names.category", 199 }, 200 { 201 "name": "event", 202 "description": "The event name.", 203 "type": "string", 204 "suggest_explore": "event_names", 205 "suggest_dimension": "event_names.event", 206 }, 207 { 208 "name": "property_name", 209 "description": "The event property name.", 210 "type": "string", 211 "suggest_explore": "event_names", 212 "suggest_dimension": "event_names.property_name", 213 }, 214 { 215 "name": "property_value", 216 "description": "The event property value.", 217 "type": "string", 218 "suggest_explore": "event_names", 219 "suggest_dimension": "event_names.property_value", 220 }, 221 ], 222 "dimensions": [ 223 { 224 "name": "match_string", 225 "hidden": "yes", 226 "sql": "${TABLE}.match_string", 227 } 228 ], 229 } 230 ] 231 + [ 232 { 233 "name": f"step_{n}", 234 "extends": ["event_types"], 235 } 236 for n in range(1, self.n_events() + 1) 237 ] 238 + [ 239 { 240 "name": "event_names", 241 "derived_table": { 242 "sql": ( 243 "SELECT category, " 244 " event, " 245 " property.key AS property_name, " 246 " property_value.key AS property_value, " 247 " property_value.index as property_index " 248 f"FROM {self.tables[0]['event_types']} " 249 "LEFT JOIN UNNEST(event_properties) AS property " 250 "LEFT JOIN UNNEST(property.value) AS property_value " 251 ) 252 }, 253 "dimensions": [ 254 { 255 "name": "category", 256 "type": "string", 257 "sql": "${TABLE}.category", 258 }, 259 { 260 "name": "event", 261 "type": "string", 262 "sql": "${TABLE}.event", 263 }, 264 { 265 "name": "property_name", 266 "type": "string", 267 "sql": "${TABLE}.property_name", 268 }, 269 { 270 "name": "property_value", 271 "type": "string", 272 "sql": "${TABLE}.property_value", 273 }, 274 ], 275 } 276 ] 277 ) 278 279 return events
A view for doing Funnel Analysis.
39 def __init__(self, namespace: str, tables: List[Dict[str, str]]): 40 """Get an instance of a FunnelAnalysisView.""" 41 super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables)
Get an instance of a FunnelAnalysisView.
43 @classmethod 44 def from_db_views( 45 klass, 46 namespace: str, 47 is_glean: bool, 48 channels: List[Dict[str, str]], 49 db_views: dict, 50 num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS, 51 ) -> Iterator[FunnelAnalysisView]: 52 """Get Client Count Views from db views and app variants. 53 54 We only create a FunnelAnalysisView if we have the two necessary db tables: 55 - events_daily 56 - event_types 57 """ 58 # We can guarantee there will always be at least one channel, 59 # because this comes from the associated _get_glean_repos in 60 # namespaces.py 61 dataset = next( 62 (channel for channel in channels if channel.get("channel") == "release"), 63 channels[0], 64 )["dataset"] 65 66 necessary_views = {"events_daily", "event_types"} 67 actual_views = {} 68 for view_id, references in db_views[dataset].items(): 69 if view_id in necessary_views: 70 actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`" 71 72 if len(actual_views) == 2: 73 # Only create an instance if we have the two necessary tables 74 tables = { 75 "funnel_analysis": "events_daily_table", 76 "event_types": actual_views["event_types"], 77 } 78 tables.update( 79 {f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)} 80 ) 81 yield FunnelAnalysisView( 82 namespace, 83 [tables], 84 )
Get Client Count Views from db views and app variants.
We only create a FunnelAnalysisView if we have the two necessary db tables: - events_daily - event_types
86 @classmethod 87 def from_dict( 88 klass, namespace: str, name: str, _dict: ViewDict 89 ) -> FunnelAnalysisView: 90 """Get a FunnalAnalysisView from a dict representation.""" 91 return FunnelAnalysisView(namespace, _dict["tables"])
Get a FunnalAnalysisView from a dict representation.
93 def to_lookml(self, bq_client, v1_name: Optional[str]) -> Dict[str, Any]: 94 """Get this view as LookML.""" 95 return { 96 "includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"], 97 "views": self._funnel_analysis_lookml() + self._event_types_lookml(), 98 }
Get this view as LookML.