generator.views.funnel_analysis_view
Class to describe a Funnel Analysis View.
We create a single View file and a single Explore file.
The View file has many Looker views defined within it:
funnel_analysis: Based on events_daily, has the events
string and user dimensions (e.g. country)
event_names: The names of events. Used for suggestions.
event_N: For each possible funnel step, a single view. This is used to define what that funnel step is.
The Explore's job is to take this generated file an link all those event_N's to the funnel_analysis.
We join them via cross join, because event_N views only have 1 row and 1 column - the match_string
to use for a regex_match on the events
string in funnel_analysis.
For example, say we filter event_1 on event
: WHERE event in ("session-start, "session-end")
Then we join that with funnel_analysis: FROM funnel_analysis CROSS JOIN event_1
That lets us find out whether the user completed those funnel steps:
SELECT REGEXP_CONTAINS(funnel_analysis.events, event_1.match_string) AS completed_step_1
The funnel_analysis
view has some nice dimensions to hide these details from the end user,
e.g. completed_funnel_step_N
. We can then count those users across dimensions.
1"""Class to describe a Funnel Analysis View. 2 3We create a single View file and a single Explore file. 4 5The View file has many Looker views defined within it: 6 funnel_analysis: Based on events_daily, has the `events` string and user dimensions (e.g. country) 7 event_names: The names of events. Used for suggestions. 8 event_N: For each possible funnel step, a single view. This is used to define what that funnel step is. 9 10The Explore's job is to take this generated file an link all those event_N's to the funnel_analysis. 11We join them via cross join, because event_N views only have 1 row and 1 column - the match_string 12to use for a regex_match on the `events` string in funnel_analysis. 13 14For example, say we filter event_1 on `event`: `WHERE event in ("session-start, "session-end")` 15Then we join that with funnel_analysis: `FROM funnel_analysis CROSS JOIN event_1` 16That lets us find out whether the user completed those funnel steps: 17 `SELECT REGEXP_CONTAINS(funnel_analysis.events, event_1.match_string) AS completed_step_1` 18 19The `funnel_analysis` view has some nice dimensions to hide these details from the end user, 20e.g. `completed_funnel_step_N`. We can then count those users across dimensions. 21""" 22 23from __future__ import annotations 24 25from textwrap import dedent 26from typing import Any, Dict, Iterator, List, Optional 27 28from .lookml_utils import DEFAULT_MAX_SUGGEST_PERSIST_FOR 29from .view import View, ViewDict 30 31DEFAULT_NUM_FUNNEL_STEPS: int = 4 32 33 34class FunnelAnalysisView(View): 35 """A view for doing Funnel Analysis.""" 36 37 type: str = "funnel_analysis_view" 38 39 def __init__(self, namespace: str, tables: List[Dict[str, str]]): 40 """Get an instance of a FunnelAnalysisView.""" 41 super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables) 42 43 @classmethod 44 def from_db_views( 45 klass, 46 namespace: str, 47 is_glean: bool, 48 channels: List[Dict[str, str]], 49 db_views: dict, 50 num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS, 51 ) -> Iterator[FunnelAnalysisView]: 52 """Get Client Count Views from db views and app variants. 53 54 We only create a FunnelAnalysisView if we have the two necessary db tables: 55 - events_daily 56 - event_types 57 """ 58 # We can guarantee there will always be at least one channel, 59 # because this comes from the associated _get_glean_repos in 60 # namespaces.py 61 dataset = next( 62 (channel for channel in channels if channel.get("channel") == "release"), 63 channels[0], 64 )["dataset"] 65 66 necessary_views = {"events_daily", "event_types"} 67 actual_views = {} 68 for view_id, references in db_views[dataset].items(): 69 if view_id in necessary_views: 70 actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`" 71 72 if len(actual_views) == 2: 73 # Only create an instance if we have the two necessary tables 74 tables = { 75 "funnel_analysis": "events_daily_table", 76 "event_types": actual_views["event_types"], 77 } 78 tables.update( 79 {f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)} 80 ) 81 yield FunnelAnalysisView( 82 namespace, 83 [tables], 84 ) 85 86 @classmethod 87 def from_dict( 88 klass, namespace: str, name: str, _dict: ViewDict 89 ) -> FunnelAnalysisView: 90 """Get a FunnalAnalysisView from a dict representation.""" 91 return FunnelAnalysisView(namespace, _dict["tables"]) 92 93 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 94 """Get this view as LookML.""" 95 return { 96 "includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"], 97 "views": self._funnel_analysis_lookml() + self._event_types_lookml(), 98 } 99 100 def n_events(self) -> int: 101 """Get the number of events allowed in this funnel.""" 102 return len([k for k in self.tables[0] if k.startswith("step_")]) 103 104 def _funnel_analysis_lookml(self) -> List[Dict[str, Any]]: 105 dimensions = [ 106 { 107 "name": f"completed_step_{n}", 108 "type": "yesno", 109 "description": f"Whether the user completed step {n} on the associated day.", 110 "sql": dedent( 111 f""" 112 REGEXP_CONTAINS( 113 ${{TABLE}}.events, mozfun.event_analysis.create_funnel_regex( 114 [{", ".join([ 115 f'${{step_{ni}.match_string}}' for ni in range(1, n + 1) 116 ])}], 117 True 118 ) 119 ) 120 """ 121 ), 122 } 123 for n in range(1, self.n_events() + 1) 124 ] 125 126 count_measures: List[Dict[str, Any]] = [ 127 { 128 "name": f"count_completed_step_{n}", 129 "description": ( 130 f"The number of times that step {n} was completed. " 131 "Grouping by day makes this a count of users who completed " 132 f"step {n} on each day." 133 ), 134 "type": "count", 135 "filters": [{f"completed_step_{ni}": "yes"} for ni in range(1, n + 1)], 136 } 137 for n in range(1, self.n_events() + 1) 138 ] 139 140 fractional_measures: List[Dict[str, Any]] = [ 141 { 142 "name": f"fraction_completed_step_{n}", 143 "description": f"Of the user-days that completed Step 1, the fraction that completed step {n}.", 144 "type": "number", 145 "value_format": "0.00%", 146 "sql": f"SAFE_DIVIDE(${{count_completed_step_{n}}}, ${{count_completed_step_1}})", 147 } 148 for n in range(1, self.n_events() + 1) 149 ] 150 return [ 151 { 152 "name": "funnel_analysis", 153 "extends": ["events_daily_table"], 154 "dimensions": dimensions, 155 "measures": count_measures + fractional_measures, 156 } 157 ] 158 159 def _event_types_lookml(self) -> List[Dict[str, Any]]: 160 events = ( 161 [ 162 { 163 "name": "event_types", 164 "derived_table": { 165 "sql": dedent( 166 f""" 167 SELECT 168 mozfun.event_analysis.aggregate_match_strings( 169 ARRAY_AGG( 170 DISTINCT 171 {{% if _filters['property_value'] -%}} 172 mozfun.event_analysis.event_property_value_to_match_string( 173 et.index, 174 properties.index, 175 property_value.value 176 ) 177 {{% elsif _filters['property_name'] -%}} 178 mozfun.event_analysis.event_property_index_to_match_string( 179 et.index, 180 properties.index 181 ) 182 {{% else -%}} 183 mozfun.event_analysis.event_index_to_match_string(et.index) 184 {{% endif -%}} 185 IGNORE NULLS 186 ) 187 ) AS match_string 188 FROM 189 {self.tables[0]['event_types']} AS et 190 LEFT JOIN 191 UNNEST(COALESCE(event_properties, [])) AS properties 192 LEFT JOIN 193 UNNEST(properties.value) AS property_value 194 WHERE 195 {{% condition category %}} category {{% endcondition %}} 196 AND {{% condition event %}} event {{% endcondition %}} 197 AND {{% condition property_name %}} properties.key {{% endcondition %}} 198 AND {{% condition property_value %}} property_value.key {{% endcondition %}} 199 """ 200 ), 201 }, 202 "filters": [ 203 { 204 "name": "category", 205 "description": "The event category, as defined in metrics.yaml.", 206 "type": "string", 207 "suggest_explore": "event_names", 208 "suggest_dimension": "event_names.category", 209 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 210 }, 211 { 212 "name": "event", 213 "description": "The event name.", 214 "type": "string", 215 "suggest_explore": "event_names", 216 "suggest_dimension": "event_names.event", 217 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 218 }, 219 { 220 "name": "property_name", 221 "description": "The event property name.", 222 "type": "string", 223 "suggest_explore": "event_names", 224 "suggest_dimension": "event_names.property_name", 225 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 226 }, 227 { 228 "name": "property_value", 229 "description": "The event property value.", 230 "type": "string", 231 "suggest_explore": "event_names", 232 "suggest_dimension": "event_names.property_value", 233 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 234 }, 235 ], 236 "dimensions": [ 237 { 238 "name": "match_string", 239 "hidden": "yes", 240 "sql": "${TABLE}.match_string", 241 } 242 ], 243 } 244 ] 245 + [ 246 { 247 "name": f"step_{n}", 248 "extends": ["event_types"], 249 } 250 for n in range(1, self.n_events() + 1) 251 ] 252 + [ 253 { 254 "name": "event_names", 255 "derived_table": { 256 "sql": ( 257 "SELECT category, " 258 " event, " 259 " property.key AS property_name, " 260 " property_value.key AS property_value, " 261 " property_value.index as property_index " 262 f"FROM {self.tables[0]['event_types']} " 263 "LEFT JOIN UNNEST(event_properties) AS property " 264 "LEFT JOIN UNNEST(property.value) AS property_value " 265 ) 266 }, 267 "dimensions": [ 268 { 269 "name": "category", 270 "type": "string", 271 "sql": "${TABLE}.category", 272 }, 273 { 274 "name": "event", 275 "type": "string", 276 "sql": "${TABLE}.event", 277 }, 278 { 279 "name": "property_name", 280 "type": "string", 281 "sql": "${TABLE}.property_name", 282 }, 283 { 284 "name": "property_value", 285 "type": "string", 286 "sql": "${TABLE}.property_value", 287 }, 288 ], 289 } 290 ] 291 ) 292 293 return events
35class FunnelAnalysisView(View): 36 """A view for doing Funnel Analysis.""" 37 38 type: str = "funnel_analysis_view" 39 40 def __init__(self, namespace: str, tables: List[Dict[str, str]]): 41 """Get an instance of a FunnelAnalysisView.""" 42 super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables) 43 44 @classmethod 45 def from_db_views( 46 klass, 47 namespace: str, 48 is_glean: bool, 49 channels: List[Dict[str, str]], 50 db_views: dict, 51 num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS, 52 ) -> Iterator[FunnelAnalysisView]: 53 """Get Client Count Views from db views and app variants. 54 55 We only create a FunnelAnalysisView if we have the two necessary db tables: 56 - events_daily 57 - event_types 58 """ 59 # We can guarantee there will always be at least one channel, 60 # because this comes from the associated _get_glean_repos in 61 # namespaces.py 62 dataset = next( 63 (channel for channel in channels if channel.get("channel") == "release"), 64 channels[0], 65 )["dataset"] 66 67 necessary_views = {"events_daily", "event_types"} 68 actual_views = {} 69 for view_id, references in db_views[dataset].items(): 70 if view_id in necessary_views: 71 actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`" 72 73 if len(actual_views) == 2: 74 # Only create an instance if we have the two necessary tables 75 tables = { 76 "funnel_analysis": "events_daily_table", 77 "event_types": actual_views["event_types"], 78 } 79 tables.update( 80 {f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)} 81 ) 82 yield FunnelAnalysisView( 83 namespace, 84 [tables], 85 ) 86 87 @classmethod 88 def from_dict( 89 klass, namespace: str, name: str, _dict: ViewDict 90 ) -> FunnelAnalysisView: 91 """Get a FunnalAnalysisView from a dict representation.""" 92 return FunnelAnalysisView(namespace, _dict["tables"]) 93 94 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 95 """Get this view as LookML.""" 96 return { 97 "includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"], 98 "views": self._funnel_analysis_lookml() + self._event_types_lookml(), 99 } 100 101 def n_events(self) -> int: 102 """Get the number of events allowed in this funnel.""" 103 return len([k for k in self.tables[0] if k.startswith("step_")]) 104 105 def _funnel_analysis_lookml(self) -> List[Dict[str, Any]]: 106 dimensions = [ 107 { 108 "name": f"completed_step_{n}", 109 "type": "yesno", 110 "description": f"Whether the user completed step {n} on the associated day.", 111 "sql": dedent( 112 f""" 113 REGEXP_CONTAINS( 114 ${{TABLE}}.events, mozfun.event_analysis.create_funnel_regex( 115 [{", ".join([ 116 f'${{step_{ni}.match_string}}' for ni in range(1, n + 1) 117 ])}], 118 True 119 ) 120 ) 121 """ 122 ), 123 } 124 for n in range(1, self.n_events() + 1) 125 ] 126 127 count_measures: List[Dict[str, Any]] = [ 128 { 129 "name": f"count_completed_step_{n}", 130 "description": ( 131 f"The number of times that step {n} was completed. " 132 "Grouping by day makes this a count of users who completed " 133 f"step {n} on each day." 134 ), 135 "type": "count", 136 "filters": [{f"completed_step_{ni}": "yes"} for ni in range(1, n + 1)], 137 } 138 for n in range(1, self.n_events() + 1) 139 ] 140 141 fractional_measures: List[Dict[str, Any]] = [ 142 { 143 "name": f"fraction_completed_step_{n}", 144 "description": f"Of the user-days that completed Step 1, the fraction that completed step {n}.", 145 "type": "number", 146 "value_format": "0.00%", 147 "sql": f"SAFE_DIVIDE(${{count_completed_step_{n}}}, ${{count_completed_step_1}})", 148 } 149 for n in range(1, self.n_events() + 1) 150 ] 151 return [ 152 { 153 "name": "funnel_analysis", 154 "extends": ["events_daily_table"], 155 "dimensions": dimensions, 156 "measures": count_measures + fractional_measures, 157 } 158 ] 159 160 def _event_types_lookml(self) -> List[Dict[str, Any]]: 161 events = ( 162 [ 163 { 164 "name": "event_types", 165 "derived_table": { 166 "sql": dedent( 167 f""" 168 SELECT 169 mozfun.event_analysis.aggregate_match_strings( 170 ARRAY_AGG( 171 DISTINCT 172 {{% if _filters['property_value'] -%}} 173 mozfun.event_analysis.event_property_value_to_match_string( 174 et.index, 175 properties.index, 176 property_value.value 177 ) 178 {{% elsif _filters['property_name'] -%}} 179 mozfun.event_analysis.event_property_index_to_match_string( 180 et.index, 181 properties.index 182 ) 183 {{% else -%}} 184 mozfun.event_analysis.event_index_to_match_string(et.index) 185 {{% endif -%}} 186 IGNORE NULLS 187 ) 188 ) AS match_string 189 FROM 190 {self.tables[0]['event_types']} AS et 191 LEFT JOIN 192 UNNEST(COALESCE(event_properties, [])) AS properties 193 LEFT JOIN 194 UNNEST(properties.value) AS property_value 195 WHERE 196 {{% condition category %}} category {{% endcondition %}} 197 AND {{% condition event %}} event {{% endcondition %}} 198 AND {{% condition property_name %}} properties.key {{% endcondition %}} 199 AND {{% condition property_value %}} property_value.key {{% endcondition %}} 200 """ 201 ), 202 }, 203 "filters": [ 204 { 205 "name": "category", 206 "description": "The event category, as defined in metrics.yaml.", 207 "type": "string", 208 "suggest_explore": "event_names", 209 "suggest_dimension": "event_names.category", 210 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 211 }, 212 { 213 "name": "event", 214 "description": "The event name.", 215 "type": "string", 216 "suggest_explore": "event_names", 217 "suggest_dimension": "event_names.event", 218 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 219 }, 220 { 221 "name": "property_name", 222 "description": "The event property name.", 223 "type": "string", 224 "suggest_explore": "event_names", 225 "suggest_dimension": "event_names.property_name", 226 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 227 }, 228 { 229 "name": "property_value", 230 "description": "The event property value.", 231 "type": "string", 232 "suggest_explore": "event_names", 233 "suggest_dimension": "event_names.property_value", 234 "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR, 235 }, 236 ], 237 "dimensions": [ 238 { 239 "name": "match_string", 240 "hidden": "yes", 241 "sql": "${TABLE}.match_string", 242 } 243 ], 244 } 245 ] 246 + [ 247 { 248 "name": f"step_{n}", 249 "extends": ["event_types"], 250 } 251 for n in range(1, self.n_events() + 1) 252 ] 253 + [ 254 { 255 "name": "event_names", 256 "derived_table": { 257 "sql": ( 258 "SELECT category, " 259 " event, " 260 " property.key AS property_name, " 261 " property_value.key AS property_value, " 262 " property_value.index as property_index " 263 f"FROM {self.tables[0]['event_types']} " 264 "LEFT JOIN UNNEST(event_properties) AS property " 265 "LEFT JOIN UNNEST(property.value) AS property_value " 266 ) 267 }, 268 "dimensions": [ 269 { 270 "name": "category", 271 "type": "string", 272 "sql": "${TABLE}.category", 273 }, 274 { 275 "name": "event", 276 "type": "string", 277 "sql": "${TABLE}.event", 278 }, 279 { 280 "name": "property_name", 281 "type": "string", 282 "sql": "${TABLE}.property_name", 283 }, 284 { 285 "name": "property_value", 286 "type": "string", 287 "sql": "${TABLE}.property_value", 288 }, 289 ], 290 } 291 ] 292 ) 293 294 return events
A view for doing Funnel Analysis.
40 def __init__(self, namespace: str, tables: List[Dict[str, str]]): 41 """Get an instance of a FunnelAnalysisView.""" 42 super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables)
Get an instance of a FunnelAnalysisView.
44 @classmethod 45 def from_db_views( 46 klass, 47 namespace: str, 48 is_glean: bool, 49 channels: List[Dict[str, str]], 50 db_views: dict, 51 num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS, 52 ) -> Iterator[FunnelAnalysisView]: 53 """Get Client Count Views from db views and app variants. 54 55 We only create a FunnelAnalysisView if we have the two necessary db tables: 56 - events_daily 57 - event_types 58 """ 59 # We can guarantee there will always be at least one channel, 60 # because this comes from the associated _get_glean_repos in 61 # namespaces.py 62 dataset = next( 63 (channel for channel in channels if channel.get("channel") == "release"), 64 channels[0], 65 )["dataset"] 66 67 necessary_views = {"events_daily", "event_types"} 68 actual_views = {} 69 for view_id, references in db_views[dataset].items(): 70 if view_id in necessary_views: 71 actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`" 72 73 if len(actual_views) == 2: 74 # Only create an instance if we have the two necessary tables 75 tables = { 76 "funnel_analysis": "events_daily_table", 77 "event_types": actual_views["event_types"], 78 } 79 tables.update( 80 {f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)} 81 ) 82 yield FunnelAnalysisView( 83 namespace, 84 [tables], 85 )
Get Client Count Views from db views and app variants.
We only create a FunnelAnalysisView if we have the two necessary db tables: - events_daily - event_types
87 @classmethod 88 def from_dict( 89 klass, namespace: str, name: str, _dict: ViewDict 90 ) -> FunnelAnalysisView: 91 """Get a FunnalAnalysisView from a dict representation.""" 92 return FunnelAnalysisView(namespace, _dict["tables"])
Get a FunnalAnalysisView from a dict representation.
94 def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]: 95 """Get this view as LookML.""" 96 return { 97 "includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"], 98 "views": self._funnel_analysis_lookml() + self._event_types_lookml(), 99 }
Get this view as LookML.