generator.views.funnel_analysis_view

Class to describe a Funnel Analysis View.

We create a single View file and a single Explore file.

The View file has many Looker views defined within it: funnel_analysis: Based on events_daily, has the events string and user dimensions (e.g. country) event_names: The names of events. Used for suggestions. event_N: For each possible funnel step, a single view. This is used to define what that funnel step is.

The Explore's job is to take this generated file an link all those event_N's to the funnel_analysis. We join them via cross join, because event_N views only have 1 row and 1 column - the match_string to use for a regex_match on the events string in funnel_analysis.

For example, say we filter event_1 on event: WHERE event in ("session-start, "session-end") Then we join that with funnel_analysis: FROM funnel_analysis CROSS JOIN event_1 That lets us find out whether the user completed those funnel steps: SELECT REGEXP_CONTAINS(funnel_analysis.events, event_1.match_string) AS completed_step_1

The funnel_analysis view has some nice dimensions to hide these details from the end user, e.g. completed_funnel_step_N. We can then count those users across dimensions.

  1"""Class to describe a Funnel Analysis View.
  2
  3We create a single View file and a single Explore file.
  4
  5The View file has many Looker views defined within it:
  6    funnel_analysis: Based on events_daily, has the `events` string and user dimensions (e.g. country)
  7    event_names: The names of events. Used for suggestions.
  8    event_N: For each possible funnel step, a single view. This is used to define what that funnel step is.
  9
 10The Explore's job is to take this generated file an link all those event_N's to the funnel_analysis.
 11We join them via cross join, because event_N views only have 1 row and 1 column - the match_string
 12to use for a regex_match on the `events` string in funnel_analysis.
 13
 14For example, say we filter event_1 on `event`: `WHERE event in ("session-start, "session-end")`
 15Then we join that with funnel_analysis: `FROM funnel_analysis CROSS JOIN event_1`
 16That lets us find out whether the user completed those funnel steps:
 17    `SELECT REGEXP_CONTAINS(funnel_analysis.events, event_1.match_string) AS completed_step_1`
 18
 19The `funnel_analysis` view has some nice dimensions to hide these details from the end user,
 20e.g. `completed_funnel_step_N`. We can then count those users across dimensions.
 21"""
 22
 23from __future__ import annotations
 24
 25from textwrap import dedent
 26from typing import Any, Dict, Iterator, List, Optional
 27
 28from .lookml_utils import DEFAULT_MAX_SUGGEST_PERSIST_FOR
 29from .view import View, ViewDict
 30
 31DEFAULT_NUM_FUNNEL_STEPS: int = 4
 32
 33
 34class FunnelAnalysisView(View):
 35    """A view for doing Funnel Analysis."""
 36
 37    type: str = "funnel_analysis_view"
 38
 39    def __init__(self, namespace: str, tables: List[Dict[str, str]]):
 40        """Get an instance of a FunnelAnalysisView."""
 41        super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables)
 42
 43    @classmethod
 44    def from_db_views(
 45        klass,
 46        namespace: str,
 47        is_glean: bool,
 48        channels: List[Dict[str, str]],
 49        db_views: dict,
 50        num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS,
 51    ) -> Iterator[FunnelAnalysisView]:
 52        """Get Client Count Views from db views and app variants.
 53
 54        We only create a FunnelAnalysisView if we have the two necessary db tables:
 55            - events_daily
 56            - event_types
 57        """
 58        # We can guarantee there will always be at least one channel,
 59        # because this comes from the associated _get_glean_repos in
 60        # namespaces.py
 61        dataset = next(
 62            (channel for channel in channels if channel.get("channel") == "release"),
 63            channels[0],
 64        )["dataset"]
 65
 66        necessary_views = {"events_daily", "event_types"}
 67        actual_views = {}
 68        for view_id, references in db_views[dataset].items():
 69            if view_id in necessary_views:
 70                actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`"
 71
 72        if len(actual_views) == 2:
 73            # Only create an instance if we have the two necessary tables
 74            tables = {
 75                "funnel_analysis": "events_daily_table",
 76                "event_types": actual_views["event_types"],
 77            }
 78            tables.update(
 79                {f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)}
 80            )
 81            yield FunnelAnalysisView(
 82                namespace,
 83                [tables],
 84            )
 85
 86    @classmethod
 87    def from_dict(
 88        klass, namespace: str, name: str, _dict: ViewDict
 89    ) -> FunnelAnalysisView:
 90        """Get a FunnalAnalysisView from a dict representation."""
 91        return FunnelAnalysisView(namespace, _dict["tables"])
 92
 93    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 94        """Get this view as LookML."""
 95        return {
 96            "includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"],
 97            "views": self._funnel_analysis_lookml() + self._event_types_lookml(),
 98        }
 99
100    def n_events(self) -> int:
101        """Get the number of events allowed in this funnel."""
102        return len([k for k in self.tables[0] if k.startswith("step_")])
103
104    def _funnel_analysis_lookml(self) -> List[Dict[str, Any]]:
105        dimensions = [
106            {
107                "name": f"completed_step_{n}",
108                "type": "yesno",
109                "description": f"Whether the user completed step {n} on the associated day.",
110                "sql": dedent(
111                    f"""
112                    REGEXP_CONTAINS(
113                        ${{TABLE}}.events, mozfun.event_analysis.create_funnel_regex(
114                            [{", ".join([
115                                f'${{step_{ni}.match_string}}' for ni in range(1, n + 1)
116                            ])}],
117                            True
118                        )
119                    )
120                    """
121                ),
122            }
123            for n in range(1, self.n_events() + 1)
124        ]
125
126        count_measures: List[Dict[str, Any]] = [
127            {
128                "name": f"count_completed_step_{n}",
129                "description": (
130                    f"The number of times that step {n} was completed. "
131                    "Grouping by day makes this a count of users who completed "
132                    f"step {n} on each day."
133                ),
134                "type": "count",
135                "filters": [{f"completed_step_{ni}": "yes"} for ni in range(1, n + 1)],
136            }
137            for n in range(1, self.n_events() + 1)
138        ]
139
140        fractional_measures: List[Dict[str, Any]] = [
141            {
142                "name": f"fraction_completed_step_{n}",
143                "description": f"Of the user-days that completed Step 1, the fraction that completed step {n}.",
144                "type": "number",
145                "value_format": "0.00%",
146                "sql": f"SAFE_DIVIDE(${{count_completed_step_{n}}}, ${{count_completed_step_1}})",
147            }
148            for n in range(1, self.n_events() + 1)
149        ]
150        return [
151            {
152                "name": "funnel_analysis",
153                "extends": ["events_daily_table"],
154                "dimensions": dimensions,
155                "measures": count_measures + fractional_measures,
156            }
157        ]
158
159    def _event_types_lookml(self) -> List[Dict[str, Any]]:
160        events = (
161            [
162                {
163                    "name": "event_types",
164                    "derived_table": {
165                        "sql": dedent(
166                            f"""
167                            SELECT
168                              mozfun.event_analysis.aggregate_match_strings(
169                                ARRAY_AGG(
170                                  DISTINCT
171                                  {{% if _filters['property_value'] -%}}
172                                    mozfun.event_analysis.event_property_value_to_match_string(
173                                      et.index,
174                                      properties.index,
175                                      property_value.value
176                                    )
177                                  {{% elsif _filters['property_name'] -%}}
178                                    mozfun.event_analysis.event_property_index_to_match_string(
179                                      et.index,
180                                      properties.index
181                                    )
182                                  {{% else -%}}
183                                    mozfun.event_analysis.event_index_to_match_string(et.index)
184                                  {{% endif -%}}
185                                  IGNORE NULLS
186                                )
187                              ) AS match_string
188                            FROM
189                              {self.tables[0]['event_types']} AS et
190                            LEFT JOIN
191                              UNNEST(COALESCE(event_properties, [])) AS properties
192                            LEFT JOIN
193                              UNNEST(properties.value) AS property_value
194                            WHERE
195                              {{% condition category %}} category {{% endcondition %}}
196                              AND {{% condition event %}} event {{% endcondition %}}
197                              AND {{% condition property_name %}} properties.key {{% endcondition %}}
198                              AND {{% condition property_value %}} property_value.key {{% endcondition %}}
199                            """
200                        ),
201                    },
202                    "filters": [
203                        {
204                            "name": "category",
205                            "description": "The event category, as defined in metrics.yaml.",
206                            "type": "string",
207                            "suggest_explore": "event_names",
208                            "suggest_dimension": "event_names.category",
209                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
210                        },
211                        {
212                            "name": "event",
213                            "description": "The event name.",
214                            "type": "string",
215                            "suggest_explore": "event_names",
216                            "suggest_dimension": "event_names.event",
217                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
218                        },
219                        {
220                            "name": "property_name",
221                            "description": "The event property name.",
222                            "type": "string",
223                            "suggest_explore": "event_names",
224                            "suggest_dimension": "event_names.property_name",
225                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
226                        },
227                        {
228                            "name": "property_value",
229                            "description": "The event property value.",
230                            "type": "string",
231                            "suggest_explore": "event_names",
232                            "suggest_dimension": "event_names.property_value",
233                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
234                        },
235                    ],
236                    "dimensions": [
237                        {
238                            "name": "match_string",
239                            "hidden": "yes",
240                            "sql": "${TABLE}.match_string",
241                        }
242                    ],
243                }
244            ]
245            + [
246                {
247                    "name": f"step_{n}",
248                    "extends": ["event_types"],
249                }
250                for n in range(1, self.n_events() + 1)
251            ]
252            + [
253                {
254                    "name": "event_names",
255                    "derived_table": {
256                        "sql": (
257                            "SELECT category, "
258                            "  event, "
259                            "  property.key AS property_name, "
260                            "  property_value.key AS property_value, "
261                            "  property_value.index as property_index "
262                            f"FROM {self.tables[0]['event_types']} "
263                            "LEFT JOIN UNNEST(event_properties) AS property "
264                            "LEFT JOIN UNNEST(property.value) AS property_value "
265                        )
266                    },
267                    "dimensions": [
268                        {
269                            "name": "category",
270                            "type": "string",
271                            "sql": "${TABLE}.category",
272                        },
273                        {
274                            "name": "event",
275                            "type": "string",
276                            "sql": "${TABLE}.event",
277                        },
278                        {
279                            "name": "property_name",
280                            "type": "string",
281                            "sql": "${TABLE}.property_name",
282                        },
283                        {
284                            "name": "property_value",
285                            "type": "string",
286                            "sql": "${TABLE}.property_value",
287                        },
288                    ],
289                }
290            ]
291        )
292
293        return events
DEFAULT_NUM_FUNNEL_STEPS: int = 4
class FunnelAnalysisView(generator.views.view.View):
 35class FunnelAnalysisView(View):
 36    """A view for doing Funnel Analysis."""
 37
 38    type: str = "funnel_analysis_view"
 39
 40    def __init__(self, namespace: str, tables: List[Dict[str, str]]):
 41        """Get an instance of a FunnelAnalysisView."""
 42        super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables)
 43
 44    @classmethod
 45    def from_db_views(
 46        klass,
 47        namespace: str,
 48        is_glean: bool,
 49        channels: List[Dict[str, str]],
 50        db_views: dict,
 51        num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS,
 52    ) -> Iterator[FunnelAnalysisView]:
 53        """Get Client Count Views from db views and app variants.
 54
 55        We only create a FunnelAnalysisView if we have the two necessary db tables:
 56            - events_daily
 57            - event_types
 58        """
 59        # We can guarantee there will always be at least one channel,
 60        # because this comes from the associated _get_glean_repos in
 61        # namespaces.py
 62        dataset = next(
 63            (channel for channel in channels if channel.get("channel") == "release"),
 64            channels[0],
 65        )["dataset"]
 66
 67        necessary_views = {"events_daily", "event_types"}
 68        actual_views = {}
 69        for view_id, references in db_views[dataset].items():
 70            if view_id in necessary_views:
 71                actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`"
 72
 73        if len(actual_views) == 2:
 74            # Only create an instance if we have the two necessary tables
 75            tables = {
 76                "funnel_analysis": "events_daily_table",
 77                "event_types": actual_views["event_types"],
 78            }
 79            tables.update(
 80                {f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)}
 81            )
 82            yield FunnelAnalysisView(
 83                namespace,
 84                [tables],
 85            )
 86
 87    @classmethod
 88    def from_dict(
 89        klass, namespace: str, name: str, _dict: ViewDict
 90    ) -> FunnelAnalysisView:
 91        """Get a FunnalAnalysisView from a dict representation."""
 92        return FunnelAnalysisView(namespace, _dict["tables"])
 93
 94    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 95        """Get this view as LookML."""
 96        return {
 97            "includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"],
 98            "views": self._funnel_analysis_lookml() + self._event_types_lookml(),
 99        }
100
101    def n_events(self) -> int:
102        """Get the number of events allowed in this funnel."""
103        return len([k for k in self.tables[0] if k.startswith("step_")])
104
105    def _funnel_analysis_lookml(self) -> List[Dict[str, Any]]:
106        dimensions = [
107            {
108                "name": f"completed_step_{n}",
109                "type": "yesno",
110                "description": f"Whether the user completed step {n} on the associated day.",
111                "sql": dedent(
112                    f"""
113                    REGEXP_CONTAINS(
114                        ${{TABLE}}.events, mozfun.event_analysis.create_funnel_regex(
115                            [{", ".join([
116                                f'${{step_{ni}.match_string}}' for ni in range(1, n + 1)
117                            ])}],
118                            True
119                        )
120                    )
121                    """
122                ),
123            }
124            for n in range(1, self.n_events() + 1)
125        ]
126
127        count_measures: List[Dict[str, Any]] = [
128            {
129                "name": f"count_completed_step_{n}",
130                "description": (
131                    f"The number of times that step {n} was completed. "
132                    "Grouping by day makes this a count of users who completed "
133                    f"step {n} on each day."
134                ),
135                "type": "count",
136                "filters": [{f"completed_step_{ni}": "yes"} for ni in range(1, n + 1)],
137            }
138            for n in range(1, self.n_events() + 1)
139        ]
140
141        fractional_measures: List[Dict[str, Any]] = [
142            {
143                "name": f"fraction_completed_step_{n}",
144                "description": f"Of the user-days that completed Step 1, the fraction that completed step {n}.",
145                "type": "number",
146                "value_format": "0.00%",
147                "sql": f"SAFE_DIVIDE(${{count_completed_step_{n}}}, ${{count_completed_step_1}})",
148            }
149            for n in range(1, self.n_events() + 1)
150        ]
151        return [
152            {
153                "name": "funnel_analysis",
154                "extends": ["events_daily_table"],
155                "dimensions": dimensions,
156                "measures": count_measures + fractional_measures,
157            }
158        ]
159
160    def _event_types_lookml(self) -> List[Dict[str, Any]]:
161        events = (
162            [
163                {
164                    "name": "event_types",
165                    "derived_table": {
166                        "sql": dedent(
167                            f"""
168                            SELECT
169                              mozfun.event_analysis.aggregate_match_strings(
170                                ARRAY_AGG(
171                                  DISTINCT
172                                  {{% if _filters['property_value'] -%}}
173                                    mozfun.event_analysis.event_property_value_to_match_string(
174                                      et.index,
175                                      properties.index,
176                                      property_value.value
177                                    )
178                                  {{% elsif _filters['property_name'] -%}}
179                                    mozfun.event_analysis.event_property_index_to_match_string(
180                                      et.index,
181                                      properties.index
182                                    )
183                                  {{% else -%}}
184                                    mozfun.event_analysis.event_index_to_match_string(et.index)
185                                  {{% endif -%}}
186                                  IGNORE NULLS
187                                )
188                              ) AS match_string
189                            FROM
190                              {self.tables[0]['event_types']} AS et
191                            LEFT JOIN
192                              UNNEST(COALESCE(event_properties, [])) AS properties
193                            LEFT JOIN
194                              UNNEST(properties.value) AS property_value
195                            WHERE
196                              {{% condition category %}} category {{% endcondition %}}
197                              AND {{% condition event %}} event {{% endcondition %}}
198                              AND {{% condition property_name %}} properties.key {{% endcondition %}}
199                              AND {{% condition property_value %}} property_value.key {{% endcondition %}}
200                            """
201                        ),
202                    },
203                    "filters": [
204                        {
205                            "name": "category",
206                            "description": "The event category, as defined in metrics.yaml.",
207                            "type": "string",
208                            "suggest_explore": "event_names",
209                            "suggest_dimension": "event_names.category",
210                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
211                        },
212                        {
213                            "name": "event",
214                            "description": "The event name.",
215                            "type": "string",
216                            "suggest_explore": "event_names",
217                            "suggest_dimension": "event_names.event",
218                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
219                        },
220                        {
221                            "name": "property_name",
222                            "description": "The event property name.",
223                            "type": "string",
224                            "suggest_explore": "event_names",
225                            "suggest_dimension": "event_names.property_name",
226                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
227                        },
228                        {
229                            "name": "property_value",
230                            "description": "The event property value.",
231                            "type": "string",
232                            "suggest_explore": "event_names",
233                            "suggest_dimension": "event_names.property_value",
234                            "suggest_persist_for": DEFAULT_MAX_SUGGEST_PERSIST_FOR,
235                        },
236                    ],
237                    "dimensions": [
238                        {
239                            "name": "match_string",
240                            "hidden": "yes",
241                            "sql": "${TABLE}.match_string",
242                        }
243                    ],
244                }
245            ]
246            + [
247                {
248                    "name": f"step_{n}",
249                    "extends": ["event_types"],
250                }
251                for n in range(1, self.n_events() + 1)
252            ]
253            + [
254                {
255                    "name": "event_names",
256                    "derived_table": {
257                        "sql": (
258                            "SELECT category, "
259                            "  event, "
260                            "  property.key AS property_name, "
261                            "  property_value.key AS property_value, "
262                            "  property_value.index as property_index "
263                            f"FROM {self.tables[0]['event_types']} "
264                            "LEFT JOIN UNNEST(event_properties) AS property "
265                            "LEFT JOIN UNNEST(property.value) AS property_value "
266                        )
267                    },
268                    "dimensions": [
269                        {
270                            "name": "category",
271                            "type": "string",
272                            "sql": "${TABLE}.category",
273                        },
274                        {
275                            "name": "event",
276                            "type": "string",
277                            "sql": "${TABLE}.event",
278                        },
279                        {
280                            "name": "property_name",
281                            "type": "string",
282                            "sql": "${TABLE}.property_name",
283                        },
284                        {
285                            "name": "property_value",
286                            "type": "string",
287                            "sql": "${TABLE}.property_value",
288                        },
289                    ],
290                }
291            ]
292        )
293
294        return events

A view for doing Funnel Analysis.

FunnelAnalysisView(namespace: str, tables: List[Dict[str, str]])
40    def __init__(self, namespace: str, tables: List[Dict[str, str]]):
41        """Get an instance of a FunnelAnalysisView."""
42        super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables)

Get an instance of a FunnelAnalysisView.

type: str = 'funnel_analysis_view'
@classmethod
def from_db_views( klass, namespace: str, is_glean: bool, channels: List[Dict[str, str]], db_views: dict, num_funnel_steps: int = 4) -> Iterator[FunnelAnalysisView]:
44    @classmethod
45    def from_db_views(
46        klass,
47        namespace: str,
48        is_glean: bool,
49        channels: List[Dict[str, str]],
50        db_views: dict,
51        num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS,
52    ) -> Iterator[FunnelAnalysisView]:
53        """Get Client Count Views from db views and app variants.
54
55        We only create a FunnelAnalysisView if we have the two necessary db tables:
56            - events_daily
57            - event_types
58        """
59        # We can guarantee there will always be at least one channel,
60        # because this comes from the associated _get_glean_repos in
61        # namespaces.py
62        dataset = next(
63            (channel for channel in channels if channel.get("channel") == "release"),
64            channels[0],
65        )["dataset"]
66
67        necessary_views = {"events_daily", "event_types"}
68        actual_views = {}
69        for view_id, references in db_views[dataset].items():
70            if view_id in necessary_views:
71                actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`"
72
73        if len(actual_views) == 2:
74            # Only create an instance if we have the two necessary tables
75            tables = {
76                "funnel_analysis": "events_daily_table",
77                "event_types": actual_views["event_types"],
78            }
79            tables.update(
80                {f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)}
81            )
82            yield FunnelAnalysisView(
83                namespace,
84                [tables],
85            )

Get Client Count Views from db views and app variants.

We only create a FunnelAnalysisView if we have the two necessary db tables: - events_daily - event_types

@classmethod
def from_dict( klass, namespace: str, name: str, _dict: generator.views.view.ViewDict) -> FunnelAnalysisView:
87    @classmethod
88    def from_dict(
89        klass, namespace: str, name: str, _dict: ViewDict
90    ) -> FunnelAnalysisView:
91        """Get a FunnalAnalysisView from a dict representation."""
92        return FunnelAnalysisView(namespace, _dict["tables"])

Get a FunnalAnalysisView from a dict representation.

def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
94    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
95        """Get this view as LookML."""
96        return {
97            "includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"],
98            "views": self._funnel_analysis_lookml() + self._event_types_lookml(),
99        }

Get this view as LookML.

def n_events(self) -> int:
101    def n_events(self) -> int:
102        """Get the number of events allowed in this funnel."""
103        return len([k for k in self.tables[0] if k.startswith("step_")])

Get the number of events allowed in this funnel.