generator.views.funnel_analysis_view
Class to describe a Funnel Analysis View.
We create a single View file and a single Explore file.
The View file has many Looker views defined within it:
funnel_analysis: Based on events_daily, has the events
string and user dimensions (e.g. country)
event_names: The names of events. Used for suggestions.
event_N: For each possible funnel step, a single view. This is used to define what that funnel step is.
The Explore's job is to take this generated file an link all those event_N's to the funnel_analysis.
We join them via cross join, because event_N views only have 1 row and 1 column - the match_string
to use for a regex_match on the events
string in funnel_analysis.
For example, say we filter event_1 on event
: WHERE event in ("session-start, "session-end")
Then we join that with funnel_analysis: FROM funnel_analysis CROSS JOIN event_1
That lets us find out whether the user completed those funnel steps:
SELECT REGEXP_CONTAINS(funnel_analysis.events, event_1.match_string) AS completed_step_1
The funnel_analysis
view has some nice dimensions to hide these details from the end user,
e.g. completed_funnel_step_N
. We can then count those users across dimensions.
1"""Class to describe a Funnel Analysis View. 2 3We create a single View file and a single Explore file. 4 5The View file has many Looker views defined within it: 6 funnel_analysis: Based on events_daily, has the `events` string and user dimensions (e.g. country) 7 event_names: The names of events. Used for suggestions. 8 event_N: For each possible funnel step, a single view. This is used to define what that funnel step is. 9 10The Explore's job is to take this generated file an link all those event_N's to the funnel_analysis. 11We join them via cross join, because event_N views only have 1 row and 1 column - the match_string 12to use for a regex_match on the `events` string in funnel_analysis. 13 14For example, say we filter event_1 on `event`: `WHERE event in ("session-start, "session-end")` 15Then we join that with funnel_analysis: `FROM funnel_analysis CROSS JOIN event_1` 16That lets us find out whether the user completed those funnel steps: 17 `SELECT REGEXP_CONTAINS(funnel_analysis.events, event_1.match_string) AS completed_step_1` 18 19The `funnel_analysis` view has some nice dimensions to hide these details from the end user, 20e.g. `completed_funnel_step_N`. We can then count those users across dimensions. 21""" 22 23from __future__ import annotations 24 25from textwrap import dedent 26from typing import Any, Dict, Iterator, List, Optional 27 28from .view import View, ViewDict 29 30DEFAULT_NUM_FUNNEL_STEPS: int = 4 31 32 33class FunnelAnalysisView(View): 34 """A view for doing Funnel Analysis.""" 35 36 type: str = "funnel_analysis_view" 37 38 def __init__(self, namespace: str, tables: List[Dict[str, str]]): 39 """Get an instance of a FunnelAnalysisView.""" 40 super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables) 41 42 @classmethod 43 def from_db_views( 44 klass, 45 namespace: str, 46 is_glean: bool, 47 channels: List[Dict[str, str]], 48 db_views: dict, 49 num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS, 50 ) -> Iterator[FunnelAnalysisView]: 51 """Get Client Count Views from db views and app variants. 52 53 We only create a FunnelAnalysisView if we have the two necessary db tables: 54 - events_daily 55 - event_types 56 """ 57 # We can guarantee there will always be at least one channel, 58 # because this comes from the associated _get_glean_repos in 59 # namespaces.py 60 dataset = next( 61 (channel for channel in channels if channel.get("channel") == "release"), 62 channels[0], 63 )["dataset"] 64 65 necessary_views = {"events_daily", "event_types"} 66 actual_views = {} 67 for view_id, references in db_views[dataset].items(): 68 if view_id in necessary_views: 69 actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`" 70 71 if len(actual_views) == 2: 72 # Only create an instance if we have the two necessary tables 73 tables = { 74 "funnel_analysis": "events_daily_table", 75 "event_types": actual_views["event_types"], 76 } 77 tables.update( 78 {f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)} 79 ) 80 yield FunnelAnalysisView( 81 namespace, 82 [tables], 83 ) 84 85 @classmethod 86 def from_dict( 87 klass, namespace: str, name: str, _dict: ViewDict 88 ) -> FunnelAnalysisView: 89 """Get a FunnalAnalysisView from a dict representation.""" 90 return FunnelAnalysisView(namespace, _dict["tables"]) 91 92 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 93 """Get this view as LookML.""" 94 return { 95 "includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"], 96 "views": self._funnel_analysis_lookml() + self._event_types_lookml(), 97 } 98 99 def n_events(self) -> int: 100 """Get the number of events allowed in this funnel.""" 101 return len([k for k in self.tables[0] if k.startswith("step_")]) 102 103 def _funnel_analysis_lookml(self) -> List[Dict[str, Any]]: 104 dimensions = [ 105 { 106 "name": f"completed_step_{n}", 107 "type": "yesno", 108 "description": f"Whether the user completed step {n} on the associated day.", 109 "sql": dedent( 110 f""" 111 REGEXP_CONTAINS( 112 ${{TABLE}}.events, mozfun.event_analysis.create_funnel_regex( 113 [{", ".join([ 114 f'${{step_{ni}.match_string}}' for ni in range(1, n + 1) 115 ])}], 116 True 117 ) 118 ) 119 """ 120 ), 121 } 122 for n in range(1, self.n_events() + 1) 123 ] 124 125 count_measures: List[Dict[str, Any]] = [ 126 { 127 "name": f"count_completed_step_{n}", 128 "description": ( 129 f"The number of times that step {n} was completed. " 130 "Grouping by day makes this a count of users who completed " 131 f"step {n} on each day." 132 ), 133 "type": "count", 134 "filters": [{f"completed_step_{ni}": "yes"} for ni in range(1, n + 1)], 135 } 136 for n in range(1, self.n_events() + 1) 137 ] 138 139 fractional_measures: List[Dict[str, Any]] = [ 140 { 141 "name": f"fraction_completed_step_{n}", 142 "description": f"Of the user-days that completed Step 1, the fraction that completed step {n}.", 143 "type": "number", 144 "value_format": "0.00%", 145 "sql": f"SAFE_DIVIDE(${{count_completed_step_{n}}}, ${{count_completed_step_1}})", 146 } 147 for n in range(1, self.n_events() + 1) 148 ] 149 return [ 150 { 151 "name": "funnel_analysis", 152 "extends": ["events_daily_table"], 153 "dimensions": dimensions, 154 "measures": count_measures + fractional_measures, 155 } 156 ] 157 158 def _event_types_lookml(self) -> List[Dict[str, Any]]: 159 events = ( 160 [ 161 { 162 "name": "event_types", 163 "derived_table": { 164 "sql": dedent( 165 f""" 166 SELECT 167 mozfun.event_analysis.aggregate_match_strings( 168 ARRAY_AGG( 169 DISTINCT 170 {{% if _filters['property_value'] -%}} 171 mozfun.event_analysis.event_property_value_to_match_string( 172 et.index, 173 properties.index, 174 property_value.value 175 ) 176 {{% elsif _filters['property_name'] -%}} 177 mozfun.event_analysis.event_property_index_to_match_string( 178 et.index, 179 properties.index 180 ) 181 {{% else -%}} 182 mozfun.event_analysis.event_index_to_match_string(et.index) 183 {{% endif -%}} 184 IGNORE NULLS 185 ) 186 ) AS match_string 187 FROM 188 {self.tables[0]['event_types']} AS et 189 LEFT JOIN 190 UNNEST(COALESCE(event_properties, [])) AS properties 191 LEFT JOIN 192 UNNEST(properties.value) AS property_value 193 WHERE 194 {{% condition category %}} category {{% endcondition %}} 195 AND {{% condition event %}} event {{% endcondition %}} 196 AND {{% condition property_name %}} properties.key {{% endcondition %}} 197 AND {{% condition property_value %}} property_value.key {{% endcondition %}} 198 """ 199 ), 200 }, 201 "filters": [ 202 { 203 "name": "category", 204 "description": "The event category, as defined in metrics.yaml.", 205 "type": "string", 206 "suggest_explore": "event_names", 207 "suggest_dimension": "event_names.category", 208 }, 209 { 210 "name": "event", 211 "description": "The event name.", 212 "type": "string", 213 "suggest_explore": "event_names", 214 "suggest_dimension": "event_names.event", 215 }, 216 { 217 "name": "property_name", 218 "description": "The event property name.", 219 "type": "string", 220 "suggest_explore": "event_names", 221 "suggest_dimension": "event_names.property_name", 222 }, 223 { 224 "name": "property_value", 225 "description": "The event property value.", 226 "type": "string", 227 "suggest_explore": "event_names", 228 "suggest_dimension": "event_names.property_value", 229 }, 230 ], 231 "dimensions": [ 232 { 233 "name": "match_string", 234 "hidden": "yes", 235 "sql": "${TABLE}.match_string", 236 } 237 ], 238 } 239 ] 240 + [ 241 { 242 "name": f"step_{n}", 243 "extends": ["event_types"], 244 } 245 for n in range(1, self.n_events() + 1) 246 ] 247 + [ 248 { 249 "name": "event_names", 250 "derived_table": { 251 "sql": ( 252 "SELECT category, " 253 " event, " 254 " property.key AS property_name, " 255 " property_value.key AS property_value, " 256 " property_value.index as property_index " 257 f"FROM {self.tables[0]['event_types']} " 258 "LEFT JOIN UNNEST(event_properties) AS property " 259 "LEFT JOIN UNNEST(property.value) AS property_value " 260 ) 261 }, 262 "dimensions": [ 263 { 264 "name": "category", 265 "type": "string", 266 "sql": "${TABLE}.category", 267 }, 268 { 269 "name": "event", 270 "type": "string", 271 "sql": "${TABLE}.event", 272 }, 273 { 274 "name": "property_name", 275 "type": "string", 276 "sql": "${TABLE}.property_name", 277 }, 278 { 279 "name": "property_value", 280 "type": "string", 281 "sql": "${TABLE}.property_value", 282 }, 283 ], 284 } 285 ] 286 ) 287 288 return events
34class FunnelAnalysisView(View): 35 """A view for doing Funnel Analysis.""" 36 37 type: str = "funnel_analysis_view" 38 39 def __init__(self, namespace: str, tables: List[Dict[str, str]]): 40 """Get an instance of a FunnelAnalysisView.""" 41 super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables) 42 43 @classmethod 44 def from_db_views( 45 klass, 46 namespace: str, 47 is_glean: bool, 48 channels: List[Dict[str, str]], 49 db_views: dict, 50 num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS, 51 ) -> Iterator[FunnelAnalysisView]: 52 """Get Client Count Views from db views and app variants. 53 54 We only create a FunnelAnalysisView if we have the two necessary db tables: 55 - events_daily 56 - event_types 57 """ 58 # We can guarantee there will always be at least one channel, 59 # because this comes from the associated _get_glean_repos in 60 # namespaces.py 61 dataset = next( 62 (channel for channel in channels if channel.get("channel") == "release"), 63 channels[0], 64 )["dataset"] 65 66 necessary_views = {"events_daily", "event_types"} 67 actual_views = {} 68 for view_id, references in db_views[dataset].items(): 69 if view_id in necessary_views: 70 actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`" 71 72 if len(actual_views) == 2: 73 # Only create an instance if we have the two necessary tables 74 tables = { 75 "funnel_analysis": "events_daily_table", 76 "event_types": actual_views["event_types"], 77 } 78 tables.update( 79 {f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)} 80 ) 81 yield FunnelAnalysisView( 82 namespace, 83 [tables], 84 ) 85 86 @classmethod 87 def from_dict( 88 klass, namespace: str, name: str, _dict: ViewDict 89 ) -> FunnelAnalysisView: 90 """Get a FunnalAnalysisView from a dict representation.""" 91 return FunnelAnalysisView(namespace, _dict["tables"]) 92 93 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 94 """Get this view as LookML.""" 95 return { 96 "includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"], 97 "views": self._funnel_analysis_lookml() + self._event_types_lookml(), 98 } 99 100 def n_events(self) -> int: 101 """Get the number of events allowed in this funnel.""" 102 return len([k for k in self.tables[0] if k.startswith("step_")]) 103 104 def _funnel_analysis_lookml(self) -> List[Dict[str, Any]]: 105 dimensions = [ 106 { 107 "name": f"completed_step_{n}", 108 "type": "yesno", 109 "description": f"Whether the user completed step {n} on the associated day.", 110 "sql": dedent( 111 f""" 112 REGEXP_CONTAINS( 113 ${{TABLE}}.events, mozfun.event_analysis.create_funnel_regex( 114 [{", ".join([ 115 f'${{step_{ni}.match_string}}' for ni in range(1, n + 1) 116 ])}], 117 True 118 ) 119 ) 120 """ 121 ), 122 } 123 for n in range(1, self.n_events() + 1) 124 ] 125 126 count_measures: List[Dict[str, Any]] = [ 127 { 128 "name": f"count_completed_step_{n}", 129 "description": ( 130 f"The number of times that step {n} was completed. " 131 "Grouping by day makes this a count of users who completed " 132 f"step {n} on each day." 133 ), 134 "type": "count", 135 "filters": [{f"completed_step_{ni}": "yes"} for ni in range(1, n + 1)], 136 } 137 for n in range(1, self.n_events() + 1) 138 ] 139 140 fractional_measures: List[Dict[str, Any]] = [ 141 { 142 "name": f"fraction_completed_step_{n}", 143 "description": f"Of the user-days that completed Step 1, the fraction that completed step {n}.", 144 "type": "number", 145 "value_format": "0.00%", 146 "sql": f"SAFE_DIVIDE(${{count_completed_step_{n}}}, ${{count_completed_step_1}})", 147 } 148 for n in range(1, self.n_events() + 1) 149 ] 150 return [ 151 { 152 "name": "funnel_analysis", 153 "extends": ["events_daily_table"], 154 "dimensions": dimensions, 155 "measures": count_measures + fractional_measures, 156 } 157 ] 158 159 def _event_types_lookml(self) -> List[Dict[str, Any]]: 160 events = ( 161 [ 162 { 163 "name": "event_types", 164 "derived_table": { 165 "sql": dedent( 166 f""" 167 SELECT 168 mozfun.event_analysis.aggregate_match_strings( 169 ARRAY_AGG( 170 DISTINCT 171 {{% if _filters['property_value'] -%}} 172 mozfun.event_analysis.event_property_value_to_match_string( 173 et.index, 174 properties.index, 175 property_value.value 176 ) 177 {{% elsif _filters['property_name'] -%}} 178 mozfun.event_analysis.event_property_index_to_match_string( 179 et.index, 180 properties.index 181 ) 182 {{% else -%}} 183 mozfun.event_analysis.event_index_to_match_string(et.index) 184 {{% endif -%}} 185 IGNORE NULLS 186 ) 187 ) AS match_string 188 FROM 189 {self.tables[0]['event_types']} AS et 190 LEFT JOIN 191 UNNEST(COALESCE(event_properties, [])) AS properties 192 LEFT JOIN 193 UNNEST(properties.value) AS property_value 194 WHERE 195 {{% condition category %}} category {{% endcondition %}} 196 AND {{% condition event %}} event {{% endcondition %}} 197 AND {{% condition property_name %}} properties.key {{% endcondition %}} 198 AND {{% condition property_value %}} property_value.key {{% endcondition %}} 199 """ 200 ), 201 }, 202 "filters": [ 203 { 204 "name": "category", 205 "description": "The event category, as defined in metrics.yaml.", 206 "type": "string", 207 "suggest_explore": "event_names", 208 "suggest_dimension": "event_names.category", 209 }, 210 { 211 "name": "event", 212 "description": "The event name.", 213 "type": "string", 214 "suggest_explore": "event_names", 215 "suggest_dimension": "event_names.event", 216 }, 217 { 218 "name": "property_name", 219 "description": "The event property name.", 220 "type": "string", 221 "suggest_explore": "event_names", 222 "suggest_dimension": "event_names.property_name", 223 }, 224 { 225 "name": "property_value", 226 "description": "The event property value.", 227 "type": "string", 228 "suggest_explore": "event_names", 229 "suggest_dimension": "event_names.property_value", 230 }, 231 ], 232 "dimensions": [ 233 { 234 "name": "match_string", 235 "hidden": "yes", 236 "sql": "${TABLE}.match_string", 237 } 238 ], 239 } 240 ] 241 + [ 242 { 243 "name": f"step_{n}", 244 "extends": ["event_types"], 245 } 246 for n in range(1, self.n_events() + 1) 247 ] 248 + [ 249 { 250 "name": "event_names", 251 "derived_table": { 252 "sql": ( 253 "SELECT category, " 254 " event, " 255 " property.key AS property_name, " 256 " property_value.key AS property_value, " 257 " property_value.index as property_index " 258 f"FROM {self.tables[0]['event_types']} " 259 "LEFT JOIN UNNEST(event_properties) AS property " 260 "LEFT JOIN UNNEST(property.value) AS property_value " 261 ) 262 }, 263 "dimensions": [ 264 { 265 "name": "category", 266 "type": "string", 267 "sql": "${TABLE}.category", 268 }, 269 { 270 "name": "event", 271 "type": "string", 272 "sql": "${TABLE}.event", 273 }, 274 { 275 "name": "property_name", 276 "type": "string", 277 "sql": "${TABLE}.property_name", 278 }, 279 { 280 "name": "property_value", 281 "type": "string", 282 "sql": "${TABLE}.property_value", 283 }, 284 ], 285 } 286 ] 287 ) 288 289 return events
A view for doing Funnel Analysis.
39 def __init__(self, namespace: str, tables: List[Dict[str, str]]): 40 """Get an instance of a FunnelAnalysisView.""" 41 super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables)
Get an instance of a FunnelAnalysisView.
43 @classmethod 44 def from_db_views( 45 klass, 46 namespace: str, 47 is_glean: bool, 48 channels: List[Dict[str, str]], 49 db_views: dict, 50 num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS, 51 ) -> Iterator[FunnelAnalysisView]: 52 """Get Client Count Views from db views and app variants. 53 54 We only create a FunnelAnalysisView if we have the two necessary db tables: 55 - events_daily 56 - event_types 57 """ 58 # We can guarantee there will always be at least one channel, 59 # because this comes from the associated _get_glean_repos in 60 # namespaces.py 61 dataset = next( 62 (channel for channel in channels if channel.get("channel") == "release"), 63 channels[0], 64 )["dataset"] 65 66 necessary_views = {"events_daily", "event_types"} 67 actual_views = {} 68 for view_id, references in db_views[dataset].items(): 69 if view_id in necessary_views: 70 actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`" 71 72 if len(actual_views) == 2: 73 # Only create an instance if we have the two necessary tables 74 tables = { 75 "funnel_analysis": "events_daily_table", 76 "event_types": actual_views["event_types"], 77 } 78 tables.update( 79 {f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)} 80 ) 81 yield FunnelAnalysisView( 82 namespace, 83 [tables], 84 )
Get Client Count Views from db views and app variants.
We only create a FunnelAnalysisView if we have the two necessary db tables: - events_daily - event_types
86 @classmethod 87 def from_dict( 88 klass, namespace: str, name: str, _dict: ViewDict 89 ) -> FunnelAnalysisView: 90 """Get a FunnalAnalysisView from a dict representation.""" 91 return FunnelAnalysisView(namespace, _dict["tables"])
Get a FunnalAnalysisView from a dict representation.
93 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 94 """Get this view as LookML.""" 95 return { 96 "includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"], 97 "views": self._funnel_analysis_lookml() + self._event_types_lookml(), 98 }
Get this view as LookML.