generator.views.funnel_analysis_view

Class to describe a Funnel Analysis View.

We create a single View file and a single Explore file.

The View file has many Looker views defined within it: funnel_analysis: Based on events_daily, has the events string and user dimensions (e.g. country) event_names: The names of events. Used for suggestions. event_N: For each possible funnel step, a single view. This is used to define what that funnel step is.

The Explore's job is to take this generated file an link all those event_N's to the funnel_analysis. We join them via cross join, because event_N views only have 1 row and 1 column - the match_string to use for a regex_match on the events string in funnel_analysis.

For example, say we filter event_1 on event: WHERE event in ("session-start, "session-end") Then we join that with funnel_analysis: FROM funnel_analysis CROSS JOIN event_1 That lets us find out whether the user completed those funnel steps: SELECT REGEXP_CONTAINS(funnel_analysis.events, event_1.match_string) AS completed_step_1

The funnel_analysis view has some nice dimensions to hide these details from the end user, e.g. completed_funnel_step_N. We can then count those users across dimensions.

  1"""Class to describe a Funnel Analysis View.
  2
  3We create a single View file and a single Explore file.
  4
  5The View file has many Looker views defined within it:
  6    funnel_analysis: Based on events_daily, has the `events` string and user dimensions (e.g. country)
  7    event_names: The names of events. Used for suggestions.
  8    event_N: For each possible funnel step, a single view. This is used to define what that funnel step is.
  9
 10The Explore's job is to take this generated file an link all those event_N's to the funnel_analysis.
 11We join them via cross join, because event_N views only have 1 row and 1 column - the match_string
 12to use for a regex_match on the `events` string in funnel_analysis.
 13
 14For example, say we filter event_1 on `event`: `WHERE event in ("session-start, "session-end")`
 15Then we join that with funnel_analysis: `FROM funnel_analysis CROSS JOIN event_1`
 16That lets us find out whether the user completed those funnel steps:
 17    `SELECT REGEXP_CONTAINS(funnel_analysis.events, event_1.match_string) AS completed_step_1`
 18
 19The `funnel_analysis` view has some nice dimensions to hide these details from the end user,
 20e.g. `completed_funnel_step_N`. We can then count those users across dimensions.
 21"""
 22
 23from __future__ import annotations
 24
 25from textwrap import dedent
 26from typing import Any, Dict, Iterator, List, Optional
 27
 28from .view import View, ViewDict
 29
 30DEFAULT_NUM_FUNNEL_STEPS: int = 4
 31
 32
 33class FunnelAnalysisView(View):
 34    """A view for doing Funnel Analysis."""
 35
 36    type: str = "funnel_analysis_view"
 37
 38    def __init__(self, namespace: str, tables: List[Dict[str, str]]):
 39        """Get an instance of a FunnelAnalysisView."""
 40        super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables)
 41
 42    @classmethod
 43    def from_db_views(
 44        klass,
 45        namespace: str,
 46        is_glean: bool,
 47        channels: List[Dict[str, str]],
 48        db_views: dict,
 49        num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS,
 50    ) -> Iterator[FunnelAnalysisView]:
 51        """Get Client Count Views from db views and app variants.
 52
 53        We only create a FunnelAnalysisView if we have the two necessary db tables:
 54            - events_daily
 55            - event_types
 56        """
 57        # We can guarantee there will always be at least one channel,
 58        # because this comes from the associated _get_glean_repos in
 59        # namespaces.py
 60        dataset = next(
 61            (channel for channel in channels if channel.get("channel") == "release"),
 62            channels[0],
 63        )["dataset"]
 64
 65        necessary_views = {"events_daily", "event_types"}
 66        actual_views = {}
 67        for view_id, references in db_views[dataset].items():
 68            if view_id in necessary_views:
 69                actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`"
 70
 71        if len(actual_views) == 2:
 72            # Only create an instance if we have the two necessary tables
 73            tables = {
 74                "funnel_analysis": "events_daily_table",
 75                "event_types": actual_views["event_types"],
 76            }
 77            tables.update(
 78                {f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)}
 79            )
 80            yield FunnelAnalysisView(
 81                namespace,
 82                [tables],
 83            )
 84
 85    @classmethod
 86    def from_dict(
 87        klass, namespace: str, name: str, _dict: ViewDict
 88    ) -> FunnelAnalysisView:
 89        """Get a FunnalAnalysisView from a dict representation."""
 90        return FunnelAnalysisView(namespace, _dict["tables"])
 91
 92    def to_lookml(self, bq_client, v1_name: Optional[str]) -> Dict[str, Any]:
 93        """Get this view as LookML."""
 94        return {
 95            "includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"],
 96            "views": self._funnel_analysis_lookml() + self._event_types_lookml(),
 97        }
 98
 99    def n_events(self) -> int:
100        """Get the number of events allowed in this funnel."""
101        return len([k for k in self.tables[0] if k.startswith("step_")])
102
103    def _funnel_analysis_lookml(self) -> List[Dict[str, Any]]:
104        dimensions = [
105            {
106                "name": f"completed_step_{n}",
107                "type": "yesno",
108                "description": f"Whether the user completed step {n} on the associated day.",
109                "sql": dedent(
110                    f"""
111                    REGEXP_CONTAINS(
112                        ${{TABLE}}.events, mozfun.event_analysis.create_funnel_regex(
113                            [{", ".join([
114                                f'${{step_{ni}.match_string}}' for ni in range(1, n + 1)
115                            ])}],
116                            True
117                        )
118                    )
119                    """
120                ),
121            }
122            for n in range(1, self.n_events() + 1)
123        ]
124
125        count_measures: List[Dict[str, Any]] = [
126            {
127                "name": f"count_completed_step_{n}",
128                "description": (
129                    f"The number of times that step {n} was completed. "
130                    "Grouping by day makes this a count of users who completed "
131                    f"step {n} on each day."
132                ),
133                "type": "count",
134                "filters": [{f"completed_step_{ni}": "yes"} for ni in range(1, n + 1)],
135            }
136            for n in range(1, self.n_events() + 1)
137        ]
138
139        fractional_measures: List[Dict[str, Any]] = [
140            {
141                "name": f"fraction_completed_step_{n}",
142                "description": f"Of the user-days that completed Step 1, the fraction that completed step {n}.",
143                "type": "number",
144                "value_format": "0.00%",
145                "sql": f"SAFE_DIVIDE(${{count_completed_step_{n}}}, ${{count_completed_step_1}})",
146            }
147            for n in range(1, self.n_events() + 1)
148        ]
149        return [
150            {
151                "name": "funnel_analysis",
152                "extends": ["events_daily_table"],
153                "dimensions": dimensions,
154                "measures": count_measures + fractional_measures,
155            }
156        ]
157
158    def _event_types_lookml(self) -> List[Dict[str, Any]]:
159        events = (
160            [
161                {
162                    "name": "event_types",
163                    "derived_table": {
164                        "sql": dedent(
165                            f"""
166                            SELECT
167                              mozfun.event_analysis.aggregate_match_strings(
168                                ARRAY_AGG(
169                                  DISTINCT CONCAT(
170                                    {{% if _filters['property_name'] or _filters['property_value'] -%}}
171                                    COALESCE(mozfun.event_analysis.escape_metachars(property_value.value), ''),
172                                    {{% endif -%}}
173                                    mozfun.event_analysis.event_index_to_match_string(et.index)
174                                  )
175                                )
176                              ) AS match_string
177                            FROM
178                              {self.tables[0]['event_types']} AS et
179                            LEFT JOIN
180                              UNNEST(COALESCE(event_properties, [])) AS properties
181                            LEFT JOIN
182                              UNNEST(properties.value) AS property_value
183                            WHERE
184                              {{% condition category %}} category {{% endcondition %}}
185                              AND {{% condition event %}} event {{% endcondition %}}
186                              AND {{% condition property_name %}} properties.key {{% endcondition %}}
187                              AND {{% condition property_value %}} property_value.key {{% endcondition %}}
188                            """
189                        ),
190                    },
191                    "filters": [
192                        {
193                            "name": "category",
194                            "description": "The event category, as defined in metrics.yaml.",
195                            "type": "string",
196                            "suggest_explore": "event_names",
197                            "suggest_dimension": "event_names.category",
198                        },
199                        {
200                            "name": "event",
201                            "description": "The event name.",
202                            "type": "string",
203                            "suggest_explore": "event_names",
204                            "suggest_dimension": "event_names.event",
205                        },
206                        {
207                            "name": "property_name",
208                            "description": "The event property name.",
209                            "type": "string",
210                            "suggest_explore": "event_names",
211                            "suggest_dimension": "event_names.property_name",
212                        },
213                        {
214                            "name": "property_value",
215                            "description": "The event property value.",
216                            "type": "string",
217                            "suggest_explore": "event_names",
218                            "suggest_dimension": "event_names.property_value",
219                        },
220                    ],
221                    "dimensions": [
222                        {
223                            "name": "match_string",
224                            "hidden": "yes",
225                            "sql": "${TABLE}.match_string",
226                        }
227                    ],
228                }
229            ]
230            + [
231                {
232                    "name": f"step_{n}",
233                    "extends": ["event_types"],
234                }
235                for n in range(1, self.n_events() + 1)
236            ]
237            + [
238                {
239                    "name": "event_names",
240                    "derived_table": {
241                        "sql": (
242                            "SELECT category, "
243                            "  event, "
244                            "  property.key AS property_name, "
245                            "  property_value.key AS property_value, "
246                            "  property_value.index as property_index "
247                            f"FROM {self.tables[0]['event_types']} "
248                            "LEFT JOIN UNNEST(event_properties) AS property "
249                            "LEFT JOIN UNNEST(property.value) AS property_value "
250                        )
251                    },
252                    "dimensions": [
253                        {
254                            "name": "category",
255                            "type": "string",
256                            "sql": "${TABLE}.category",
257                        },
258                        {
259                            "name": "event",
260                            "type": "string",
261                            "sql": "${TABLE}.event",
262                        },
263                        {
264                            "name": "property_name",
265                            "type": "string",
266                            "sql": "${TABLE}.property_name",
267                        },
268                        {
269                            "name": "property_value",
270                            "type": "string",
271                            "sql": "${TABLE}.property_value",
272                        },
273                    ],
274                }
275            ]
276        )
277
278        return events
DEFAULT_NUM_FUNNEL_STEPS: int = 4
class FunnelAnalysisView(generator.views.view.View):
 34class FunnelAnalysisView(View):
 35    """A view for doing Funnel Analysis."""
 36
 37    type: str = "funnel_analysis_view"
 38
 39    def __init__(self, namespace: str, tables: List[Dict[str, str]]):
 40        """Get an instance of a FunnelAnalysisView."""
 41        super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables)
 42
 43    @classmethod
 44    def from_db_views(
 45        klass,
 46        namespace: str,
 47        is_glean: bool,
 48        channels: List[Dict[str, str]],
 49        db_views: dict,
 50        num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS,
 51    ) -> Iterator[FunnelAnalysisView]:
 52        """Get Client Count Views from db views and app variants.
 53
 54        We only create a FunnelAnalysisView if we have the two necessary db tables:
 55            - events_daily
 56            - event_types
 57        """
 58        # We can guarantee there will always be at least one channel,
 59        # because this comes from the associated _get_glean_repos in
 60        # namespaces.py
 61        dataset = next(
 62            (channel for channel in channels if channel.get("channel") == "release"),
 63            channels[0],
 64        )["dataset"]
 65
 66        necessary_views = {"events_daily", "event_types"}
 67        actual_views = {}
 68        for view_id, references in db_views[dataset].items():
 69            if view_id in necessary_views:
 70                actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`"
 71
 72        if len(actual_views) == 2:
 73            # Only create an instance if we have the two necessary tables
 74            tables = {
 75                "funnel_analysis": "events_daily_table",
 76                "event_types": actual_views["event_types"],
 77            }
 78            tables.update(
 79                {f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)}
 80            )
 81            yield FunnelAnalysisView(
 82                namespace,
 83                [tables],
 84            )
 85
 86    @classmethod
 87    def from_dict(
 88        klass, namespace: str, name: str, _dict: ViewDict
 89    ) -> FunnelAnalysisView:
 90        """Get a FunnalAnalysisView from a dict representation."""
 91        return FunnelAnalysisView(namespace, _dict["tables"])
 92
 93    def to_lookml(self, bq_client, v1_name: Optional[str]) -> Dict[str, Any]:
 94        """Get this view as LookML."""
 95        return {
 96            "includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"],
 97            "views": self._funnel_analysis_lookml() + self._event_types_lookml(),
 98        }
 99
100    def n_events(self) -> int:
101        """Get the number of events allowed in this funnel."""
102        return len([k for k in self.tables[0] if k.startswith("step_")])
103
104    def _funnel_analysis_lookml(self) -> List[Dict[str, Any]]:
105        dimensions = [
106            {
107                "name": f"completed_step_{n}",
108                "type": "yesno",
109                "description": f"Whether the user completed step {n} on the associated day.",
110                "sql": dedent(
111                    f"""
112                    REGEXP_CONTAINS(
113                        ${{TABLE}}.events, mozfun.event_analysis.create_funnel_regex(
114                            [{", ".join([
115                                f'${{step_{ni}.match_string}}' for ni in range(1, n + 1)
116                            ])}],
117                            True
118                        )
119                    )
120                    """
121                ),
122            }
123            for n in range(1, self.n_events() + 1)
124        ]
125
126        count_measures: List[Dict[str, Any]] = [
127            {
128                "name": f"count_completed_step_{n}",
129                "description": (
130                    f"The number of times that step {n} was completed. "
131                    "Grouping by day makes this a count of users who completed "
132                    f"step {n} on each day."
133                ),
134                "type": "count",
135                "filters": [{f"completed_step_{ni}": "yes"} for ni in range(1, n + 1)],
136            }
137            for n in range(1, self.n_events() + 1)
138        ]
139
140        fractional_measures: List[Dict[str, Any]] = [
141            {
142                "name": f"fraction_completed_step_{n}",
143                "description": f"Of the user-days that completed Step 1, the fraction that completed step {n}.",
144                "type": "number",
145                "value_format": "0.00%",
146                "sql": f"SAFE_DIVIDE(${{count_completed_step_{n}}}, ${{count_completed_step_1}})",
147            }
148            for n in range(1, self.n_events() + 1)
149        ]
150        return [
151            {
152                "name": "funnel_analysis",
153                "extends": ["events_daily_table"],
154                "dimensions": dimensions,
155                "measures": count_measures + fractional_measures,
156            }
157        ]
158
159    def _event_types_lookml(self) -> List[Dict[str, Any]]:
160        events = (
161            [
162                {
163                    "name": "event_types",
164                    "derived_table": {
165                        "sql": dedent(
166                            f"""
167                            SELECT
168                              mozfun.event_analysis.aggregate_match_strings(
169                                ARRAY_AGG(
170                                  DISTINCT CONCAT(
171                                    {{% if _filters['property_name'] or _filters['property_value'] -%}}
172                                    COALESCE(mozfun.event_analysis.escape_metachars(property_value.value), ''),
173                                    {{% endif -%}}
174                                    mozfun.event_analysis.event_index_to_match_string(et.index)
175                                  )
176                                )
177                              ) AS match_string
178                            FROM
179                              {self.tables[0]['event_types']} AS et
180                            LEFT JOIN
181                              UNNEST(COALESCE(event_properties, [])) AS properties
182                            LEFT JOIN
183                              UNNEST(properties.value) AS property_value
184                            WHERE
185                              {{% condition category %}} category {{% endcondition %}}
186                              AND {{% condition event %}} event {{% endcondition %}}
187                              AND {{% condition property_name %}} properties.key {{% endcondition %}}
188                              AND {{% condition property_value %}} property_value.key {{% endcondition %}}
189                            """
190                        ),
191                    },
192                    "filters": [
193                        {
194                            "name": "category",
195                            "description": "The event category, as defined in metrics.yaml.",
196                            "type": "string",
197                            "suggest_explore": "event_names",
198                            "suggest_dimension": "event_names.category",
199                        },
200                        {
201                            "name": "event",
202                            "description": "The event name.",
203                            "type": "string",
204                            "suggest_explore": "event_names",
205                            "suggest_dimension": "event_names.event",
206                        },
207                        {
208                            "name": "property_name",
209                            "description": "The event property name.",
210                            "type": "string",
211                            "suggest_explore": "event_names",
212                            "suggest_dimension": "event_names.property_name",
213                        },
214                        {
215                            "name": "property_value",
216                            "description": "The event property value.",
217                            "type": "string",
218                            "suggest_explore": "event_names",
219                            "suggest_dimension": "event_names.property_value",
220                        },
221                    ],
222                    "dimensions": [
223                        {
224                            "name": "match_string",
225                            "hidden": "yes",
226                            "sql": "${TABLE}.match_string",
227                        }
228                    ],
229                }
230            ]
231            + [
232                {
233                    "name": f"step_{n}",
234                    "extends": ["event_types"],
235                }
236                for n in range(1, self.n_events() + 1)
237            ]
238            + [
239                {
240                    "name": "event_names",
241                    "derived_table": {
242                        "sql": (
243                            "SELECT category, "
244                            "  event, "
245                            "  property.key AS property_name, "
246                            "  property_value.key AS property_value, "
247                            "  property_value.index as property_index "
248                            f"FROM {self.tables[0]['event_types']} "
249                            "LEFT JOIN UNNEST(event_properties) AS property "
250                            "LEFT JOIN UNNEST(property.value) AS property_value "
251                        )
252                    },
253                    "dimensions": [
254                        {
255                            "name": "category",
256                            "type": "string",
257                            "sql": "${TABLE}.category",
258                        },
259                        {
260                            "name": "event",
261                            "type": "string",
262                            "sql": "${TABLE}.event",
263                        },
264                        {
265                            "name": "property_name",
266                            "type": "string",
267                            "sql": "${TABLE}.property_name",
268                        },
269                        {
270                            "name": "property_value",
271                            "type": "string",
272                            "sql": "${TABLE}.property_value",
273                        },
274                    ],
275                }
276            ]
277        )
278
279        return events

A view for doing Funnel Analysis.

FunnelAnalysisView(namespace: str, tables: List[Dict[str, str]])
39    def __init__(self, namespace: str, tables: List[Dict[str, str]]):
40        """Get an instance of a FunnelAnalysisView."""
41        super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables)

Get an instance of a FunnelAnalysisView.

type: str = 'funnel_analysis_view'
@classmethod
def from_db_views( klass, namespace: str, is_glean: bool, channels: List[Dict[str, str]], db_views: dict, num_funnel_steps: int = 4) -> Iterator[FunnelAnalysisView]:
43    @classmethod
44    def from_db_views(
45        klass,
46        namespace: str,
47        is_glean: bool,
48        channels: List[Dict[str, str]],
49        db_views: dict,
50        num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS,
51    ) -> Iterator[FunnelAnalysisView]:
52        """Get Client Count Views from db views and app variants.
53
54        We only create a FunnelAnalysisView if we have the two necessary db tables:
55            - events_daily
56            - event_types
57        """
58        # We can guarantee there will always be at least one channel,
59        # because this comes from the associated _get_glean_repos in
60        # namespaces.py
61        dataset = next(
62            (channel for channel in channels if channel.get("channel") == "release"),
63            channels[0],
64        )["dataset"]
65
66        necessary_views = {"events_daily", "event_types"}
67        actual_views = {}
68        for view_id, references in db_views[dataset].items():
69            if view_id in necessary_views:
70                actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`"
71
72        if len(actual_views) == 2:
73            # Only create an instance if we have the two necessary tables
74            tables = {
75                "funnel_analysis": "events_daily_table",
76                "event_types": actual_views["event_types"],
77            }
78            tables.update(
79                {f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)}
80            )
81            yield FunnelAnalysisView(
82                namespace,
83                [tables],
84            )

Get Client Count Views from db views and app variants.

We only create a FunnelAnalysisView if we have the two necessary db tables: - events_daily - event_types

@classmethod
def from_dict( klass, namespace: str, name: str, _dict: generator.views.view.ViewDict) -> FunnelAnalysisView:
86    @classmethod
87    def from_dict(
88        klass, namespace: str, name: str, _dict: ViewDict
89    ) -> FunnelAnalysisView:
90        """Get a FunnalAnalysisView from a dict representation."""
91        return FunnelAnalysisView(namespace, _dict["tables"])

Get a FunnalAnalysisView from a dict representation.

def to_lookml(self, bq_client, v1_name: Optional[str]) -> Dict[str, Any]:
93    def to_lookml(self, bq_client, v1_name: Optional[str]) -> Dict[str, Any]:
94        """Get this view as LookML."""
95        return {
96            "includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"],
97            "views": self._funnel_analysis_lookml() + self._event_types_lookml(),
98        }

Get this view as LookML.

def n_events(self) -> int:
100    def n_events(self) -> int:
101        """Get the number of events allowed in this funnel."""
102        return len([k for k in self.tables[0] if k.startswith("step_")])

Get the number of events allowed in this funnel.