generator.views.funnel_analysis_view

Class to describe a Funnel Analysis View.

We create a single View file and a single Explore file.

The View file has many Looker views defined within it: funnel_analysis: Based on events_daily, has the events string and user dimensions (e.g. country) event_names: The names of events. Used for suggestions. event_N: For each possible funnel step, a single view. This is used to define what that funnel step is.

The Explore's job is to take this generated file an link all those event_N's to the funnel_analysis. We join them via cross join, because event_N views only have 1 row and 1 column - the match_string to use for a regex_match on the events string in funnel_analysis.

For example, say we filter event_1 on event: WHERE event in ("session-start, "session-end") Then we join that with funnel_analysis: FROM funnel_analysis CROSS JOIN event_1 That lets us find out whether the user completed those funnel steps: SELECT REGEXP_CONTAINS(funnel_analysis.events, event_1.match_string) AS completed_step_1

The funnel_analysis view has some nice dimensions to hide these details from the end user, e.g. completed_funnel_step_N. We can then count those users across dimensions.

  1"""Class to describe a Funnel Analysis View.
  2
  3We create a single View file and a single Explore file.
  4
  5The View file has many Looker views defined within it:
  6    funnel_analysis: Based on events_daily, has the `events` string and user dimensions (e.g. country)
  7    event_names: The names of events. Used for suggestions.
  8    event_N: For each possible funnel step, a single view. This is used to define what that funnel step is.
  9
 10The Explore's job is to take this generated file an link all those event_N's to the funnel_analysis.
 11We join them via cross join, because event_N views only have 1 row and 1 column - the match_string
 12to use for a regex_match on the `events` string in funnel_analysis.
 13
 14For example, say we filter event_1 on `event`: `WHERE event in ("session-start, "session-end")`
 15Then we join that with funnel_analysis: `FROM funnel_analysis CROSS JOIN event_1`
 16That lets us find out whether the user completed those funnel steps:
 17    `SELECT REGEXP_CONTAINS(funnel_analysis.events, event_1.match_string) AS completed_step_1`
 18
 19The `funnel_analysis` view has some nice dimensions to hide these details from the end user,
 20e.g. `completed_funnel_step_N`. We can then count those users across dimensions.
 21"""
 22
 23from __future__ import annotations
 24
 25from textwrap import dedent
 26from typing import Any, Dict, Iterator, List, Optional
 27
 28from .view import View, ViewDict
 29
 30DEFAULT_NUM_FUNNEL_STEPS: int = 4
 31
 32
 33class FunnelAnalysisView(View):
 34    """A view for doing Funnel Analysis."""
 35
 36    type: str = "funnel_analysis_view"
 37
 38    def __init__(self, namespace: str, tables: List[Dict[str, str]]):
 39        """Get an instance of a FunnelAnalysisView."""
 40        super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables)
 41
 42    @classmethod
 43    def from_db_views(
 44        klass,
 45        namespace: str,
 46        is_glean: bool,
 47        channels: List[Dict[str, str]],
 48        db_views: dict,
 49        num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS,
 50    ) -> Iterator[FunnelAnalysisView]:
 51        """Get Client Count Views from db views and app variants.
 52
 53        We only create a FunnelAnalysisView if we have the two necessary db tables:
 54            - events_daily
 55            - event_types
 56        """
 57        # We can guarantee there will always be at least one channel,
 58        # because this comes from the associated _get_glean_repos in
 59        # namespaces.py
 60        dataset = next(
 61            (channel for channel in channels if channel.get("channel") == "release"),
 62            channels[0],
 63        )["dataset"]
 64
 65        necessary_views = {"events_daily", "event_types"}
 66        actual_views = {}
 67        for view_id, references in db_views[dataset].items():
 68            if view_id in necessary_views:
 69                actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`"
 70
 71        if len(actual_views) == 2:
 72            # Only create an instance if we have the two necessary tables
 73            tables = {
 74                "funnel_analysis": "events_daily_table",
 75                "event_types": actual_views["event_types"],
 76            }
 77            tables.update(
 78                {f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)}
 79            )
 80            yield FunnelAnalysisView(
 81                namespace,
 82                [tables],
 83            )
 84
 85    @classmethod
 86    def from_dict(
 87        klass, namespace: str, name: str, _dict: ViewDict
 88    ) -> FunnelAnalysisView:
 89        """Get a FunnalAnalysisView from a dict representation."""
 90        return FunnelAnalysisView(namespace, _dict["tables"])
 91
 92    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 93        """Get this view as LookML."""
 94        return {
 95            "includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"],
 96            "views": self._funnel_analysis_lookml() + self._event_types_lookml(),
 97        }
 98
 99    def n_events(self) -> int:
100        """Get the number of events allowed in this funnel."""
101        return len([k for k in self.tables[0] if k.startswith("step_")])
102
103    def _funnel_analysis_lookml(self) -> List[Dict[str, Any]]:
104        dimensions = [
105            {
106                "name": f"completed_step_{n}",
107                "type": "yesno",
108                "description": f"Whether the user completed step {n} on the associated day.",
109                "sql": dedent(
110                    f"""
111                    REGEXP_CONTAINS(
112                        ${{TABLE}}.events, mozfun.event_analysis.create_funnel_regex(
113                            [{", ".join([
114                                f'${{step_{ni}.match_string}}' for ni in range(1, n + 1)
115                            ])}],
116                            True
117                        )
118                    )
119                    """
120                ),
121            }
122            for n in range(1, self.n_events() + 1)
123        ]
124
125        count_measures: List[Dict[str, Any]] = [
126            {
127                "name": f"count_completed_step_{n}",
128                "description": (
129                    f"The number of times that step {n} was completed. "
130                    "Grouping by day makes this a count of users who completed "
131                    f"step {n} on each day."
132                ),
133                "type": "count",
134                "filters": [{f"completed_step_{ni}": "yes"} for ni in range(1, n + 1)],
135            }
136            for n in range(1, self.n_events() + 1)
137        ]
138
139        fractional_measures: List[Dict[str, Any]] = [
140            {
141                "name": f"fraction_completed_step_{n}",
142                "description": f"Of the user-days that completed Step 1, the fraction that completed step {n}.",
143                "type": "number",
144                "value_format": "0.00%",
145                "sql": f"SAFE_DIVIDE(${{count_completed_step_{n}}}, ${{count_completed_step_1}})",
146            }
147            for n in range(1, self.n_events() + 1)
148        ]
149        return [
150            {
151                "name": "funnel_analysis",
152                "extends": ["events_daily_table"],
153                "dimensions": dimensions,
154                "measures": count_measures + fractional_measures,
155            }
156        ]
157
158    def _event_types_lookml(self) -> List[Dict[str, Any]]:
159        events = (
160            [
161                {
162                    "name": "event_types",
163                    "derived_table": {
164                        "sql": dedent(
165                            f"""
166                            SELECT
167                              mozfun.event_analysis.aggregate_match_strings(
168                                ARRAY_AGG(
169                                  DISTINCT
170                                  {{% if _filters['property_value'] -%}}
171                                    mozfun.event_analysis.event_property_value_to_match_string(
172                                      et.index,
173                                      properties.index,
174                                      property_value.value
175                                    )
176                                  {{% elsif _filters['property_name'] -%}}
177                                    mozfun.event_analysis.event_property_index_to_match_string(
178                                      et.index,
179                                      properties.index
180                                    )
181                                  {{% else -%}}
182                                    mozfun.event_analysis.event_index_to_match_string(et.index)
183                                  {{% endif -%}}
184                                  IGNORE NULLS
185                                )
186                              ) AS match_string
187                            FROM
188                              {self.tables[0]['event_types']} AS et
189                            LEFT JOIN
190                              UNNEST(COALESCE(event_properties, [])) AS properties
191                            LEFT JOIN
192                              UNNEST(properties.value) AS property_value
193                            WHERE
194                              {{% condition category %}} category {{% endcondition %}}
195                              AND {{% condition event %}} event {{% endcondition %}}
196                              AND {{% condition property_name %}} properties.key {{% endcondition %}}
197                              AND {{% condition property_value %}} property_value.key {{% endcondition %}}
198                            """
199                        ),
200                    },
201                    "filters": [
202                        {
203                            "name": "category",
204                            "description": "The event category, as defined in metrics.yaml.",
205                            "type": "string",
206                            "suggest_explore": "event_names",
207                            "suggest_dimension": "event_names.category",
208                        },
209                        {
210                            "name": "event",
211                            "description": "The event name.",
212                            "type": "string",
213                            "suggest_explore": "event_names",
214                            "suggest_dimension": "event_names.event",
215                        },
216                        {
217                            "name": "property_name",
218                            "description": "The event property name.",
219                            "type": "string",
220                            "suggest_explore": "event_names",
221                            "suggest_dimension": "event_names.property_name",
222                        },
223                        {
224                            "name": "property_value",
225                            "description": "The event property value.",
226                            "type": "string",
227                            "suggest_explore": "event_names",
228                            "suggest_dimension": "event_names.property_value",
229                        },
230                    ],
231                    "dimensions": [
232                        {
233                            "name": "match_string",
234                            "hidden": "yes",
235                            "sql": "${TABLE}.match_string",
236                        }
237                    ],
238                }
239            ]
240            + [
241                {
242                    "name": f"step_{n}",
243                    "extends": ["event_types"],
244                }
245                for n in range(1, self.n_events() + 1)
246            ]
247            + [
248                {
249                    "name": "event_names",
250                    "derived_table": {
251                        "sql": (
252                            "SELECT category, "
253                            "  event, "
254                            "  property.key AS property_name, "
255                            "  property_value.key AS property_value, "
256                            "  property_value.index as property_index "
257                            f"FROM {self.tables[0]['event_types']} "
258                            "LEFT JOIN UNNEST(event_properties) AS property "
259                            "LEFT JOIN UNNEST(property.value) AS property_value "
260                        )
261                    },
262                    "dimensions": [
263                        {
264                            "name": "category",
265                            "type": "string",
266                            "sql": "${TABLE}.category",
267                        },
268                        {
269                            "name": "event",
270                            "type": "string",
271                            "sql": "${TABLE}.event",
272                        },
273                        {
274                            "name": "property_name",
275                            "type": "string",
276                            "sql": "${TABLE}.property_name",
277                        },
278                        {
279                            "name": "property_value",
280                            "type": "string",
281                            "sql": "${TABLE}.property_value",
282                        },
283                    ],
284                }
285            ]
286        )
287
288        return events
DEFAULT_NUM_FUNNEL_STEPS: int = 4
class FunnelAnalysisView(generator.views.view.View):
 34class FunnelAnalysisView(View):
 35    """A view for doing Funnel Analysis."""
 36
 37    type: str = "funnel_analysis_view"
 38
 39    def __init__(self, namespace: str, tables: List[Dict[str, str]]):
 40        """Get an instance of a FunnelAnalysisView."""
 41        super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables)
 42
 43    @classmethod
 44    def from_db_views(
 45        klass,
 46        namespace: str,
 47        is_glean: bool,
 48        channels: List[Dict[str, str]],
 49        db_views: dict,
 50        num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS,
 51    ) -> Iterator[FunnelAnalysisView]:
 52        """Get Client Count Views from db views and app variants.
 53
 54        We only create a FunnelAnalysisView if we have the two necessary db tables:
 55            - events_daily
 56            - event_types
 57        """
 58        # We can guarantee there will always be at least one channel,
 59        # because this comes from the associated _get_glean_repos in
 60        # namespaces.py
 61        dataset = next(
 62            (channel for channel in channels if channel.get("channel") == "release"),
 63            channels[0],
 64        )["dataset"]
 65
 66        necessary_views = {"events_daily", "event_types"}
 67        actual_views = {}
 68        for view_id, references in db_views[dataset].items():
 69            if view_id in necessary_views:
 70                actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`"
 71
 72        if len(actual_views) == 2:
 73            # Only create an instance if we have the two necessary tables
 74            tables = {
 75                "funnel_analysis": "events_daily_table",
 76                "event_types": actual_views["event_types"],
 77            }
 78            tables.update(
 79                {f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)}
 80            )
 81            yield FunnelAnalysisView(
 82                namespace,
 83                [tables],
 84            )
 85
 86    @classmethod
 87    def from_dict(
 88        klass, namespace: str, name: str, _dict: ViewDict
 89    ) -> FunnelAnalysisView:
 90        """Get a FunnalAnalysisView from a dict representation."""
 91        return FunnelAnalysisView(namespace, _dict["tables"])
 92
 93    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
 94        """Get this view as LookML."""
 95        return {
 96            "includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"],
 97            "views": self._funnel_analysis_lookml() + self._event_types_lookml(),
 98        }
 99
100    def n_events(self) -> int:
101        """Get the number of events allowed in this funnel."""
102        return len([k for k in self.tables[0] if k.startswith("step_")])
103
104    def _funnel_analysis_lookml(self) -> List[Dict[str, Any]]:
105        dimensions = [
106            {
107                "name": f"completed_step_{n}",
108                "type": "yesno",
109                "description": f"Whether the user completed step {n} on the associated day.",
110                "sql": dedent(
111                    f"""
112                    REGEXP_CONTAINS(
113                        ${{TABLE}}.events, mozfun.event_analysis.create_funnel_regex(
114                            [{", ".join([
115                                f'${{step_{ni}.match_string}}' for ni in range(1, n + 1)
116                            ])}],
117                            True
118                        )
119                    )
120                    """
121                ),
122            }
123            for n in range(1, self.n_events() + 1)
124        ]
125
126        count_measures: List[Dict[str, Any]] = [
127            {
128                "name": f"count_completed_step_{n}",
129                "description": (
130                    f"The number of times that step {n} was completed. "
131                    "Grouping by day makes this a count of users who completed "
132                    f"step {n} on each day."
133                ),
134                "type": "count",
135                "filters": [{f"completed_step_{ni}": "yes"} for ni in range(1, n + 1)],
136            }
137            for n in range(1, self.n_events() + 1)
138        ]
139
140        fractional_measures: List[Dict[str, Any]] = [
141            {
142                "name": f"fraction_completed_step_{n}",
143                "description": f"Of the user-days that completed Step 1, the fraction that completed step {n}.",
144                "type": "number",
145                "value_format": "0.00%",
146                "sql": f"SAFE_DIVIDE(${{count_completed_step_{n}}}, ${{count_completed_step_1}})",
147            }
148            for n in range(1, self.n_events() + 1)
149        ]
150        return [
151            {
152                "name": "funnel_analysis",
153                "extends": ["events_daily_table"],
154                "dimensions": dimensions,
155                "measures": count_measures + fractional_measures,
156            }
157        ]
158
159    def _event_types_lookml(self) -> List[Dict[str, Any]]:
160        events = (
161            [
162                {
163                    "name": "event_types",
164                    "derived_table": {
165                        "sql": dedent(
166                            f"""
167                            SELECT
168                              mozfun.event_analysis.aggregate_match_strings(
169                                ARRAY_AGG(
170                                  DISTINCT
171                                  {{% if _filters['property_value'] -%}}
172                                    mozfun.event_analysis.event_property_value_to_match_string(
173                                      et.index,
174                                      properties.index,
175                                      property_value.value
176                                    )
177                                  {{% elsif _filters['property_name'] -%}}
178                                    mozfun.event_analysis.event_property_index_to_match_string(
179                                      et.index,
180                                      properties.index
181                                    )
182                                  {{% else -%}}
183                                    mozfun.event_analysis.event_index_to_match_string(et.index)
184                                  {{% endif -%}}
185                                  IGNORE NULLS
186                                )
187                              ) AS match_string
188                            FROM
189                              {self.tables[0]['event_types']} AS et
190                            LEFT JOIN
191                              UNNEST(COALESCE(event_properties, [])) AS properties
192                            LEFT JOIN
193                              UNNEST(properties.value) AS property_value
194                            WHERE
195                              {{% condition category %}} category {{% endcondition %}}
196                              AND {{% condition event %}} event {{% endcondition %}}
197                              AND {{% condition property_name %}} properties.key {{% endcondition %}}
198                              AND {{% condition property_value %}} property_value.key {{% endcondition %}}
199                            """
200                        ),
201                    },
202                    "filters": [
203                        {
204                            "name": "category",
205                            "description": "The event category, as defined in metrics.yaml.",
206                            "type": "string",
207                            "suggest_explore": "event_names",
208                            "suggest_dimension": "event_names.category",
209                        },
210                        {
211                            "name": "event",
212                            "description": "The event name.",
213                            "type": "string",
214                            "suggest_explore": "event_names",
215                            "suggest_dimension": "event_names.event",
216                        },
217                        {
218                            "name": "property_name",
219                            "description": "The event property name.",
220                            "type": "string",
221                            "suggest_explore": "event_names",
222                            "suggest_dimension": "event_names.property_name",
223                        },
224                        {
225                            "name": "property_value",
226                            "description": "The event property value.",
227                            "type": "string",
228                            "suggest_explore": "event_names",
229                            "suggest_dimension": "event_names.property_value",
230                        },
231                    ],
232                    "dimensions": [
233                        {
234                            "name": "match_string",
235                            "hidden": "yes",
236                            "sql": "${TABLE}.match_string",
237                        }
238                    ],
239                }
240            ]
241            + [
242                {
243                    "name": f"step_{n}",
244                    "extends": ["event_types"],
245                }
246                for n in range(1, self.n_events() + 1)
247            ]
248            + [
249                {
250                    "name": "event_names",
251                    "derived_table": {
252                        "sql": (
253                            "SELECT category, "
254                            "  event, "
255                            "  property.key AS property_name, "
256                            "  property_value.key AS property_value, "
257                            "  property_value.index as property_index "
258                            f"FROM {self.tables[0]['event_types']} "
259                            "LEFT JOIN UNNEST(event_properties) AS property "
260                            "LEFT JOIN UNNEST(property.value) AS property_value "
261                        )
262                    },
263                    "dimensions": [
264                        {
265                            "name": "category",
266                            "type": "string",
267                            "sql": "${TABLE}.category",
268                        },
269                        {
270                            "name": "event",
271                            "type": "string",
272                            "sql": "${TABLE}.event",
273                        },
274                        {
275                            "name": "property_name",
276                            "type": "string",
277                            "sql": "${TABLE}.property_name",
278                        },
279                        {
280                            "name": "property_value",
281                            "type": "string",
282                            "sql": "${TABLE}.property_value",
283                        },
284                    ],
285                }
286            ]
287        )
288
289        return events

A view for doing Funnel Analysis.

FunnelAnalysisView(namespace: str, tables: List[Dict[str, str]])
39    def __init__(self, namespace: str, tables: List[Dict[str, str]]):
40        """Get an instance of a FunnelAnalysisView."""
41        super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables)

Get an instance of a FunnelAnalysisView.

type: str = 'funnel_analysis_view'
@classmethod
def from_db_views( klass, namespace: str, is_glean: bool, channels: List[Dict[str, str]], db_views: dict, num_funnel_steps: int = 4) -> Iterator[FunnelAnalysisView]:
43    @classmethod
44    def from_db_views(
45        klass,
46        namespace: str,
47        is_glean: bool,
48        channels: List[Dict[str, str]],
49        db_views: dict,
50        num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS,
51    ) -> Iterator[FunnelAnalysisView]:
52        """Get Client Count Views from db views and app variants.
53
54        We only create a FunnelAnalysisView if we have the two necessary db tables:
55            - events_daily
56            - event_types
57        """
58        # We can guarantee there will always be at least one channel,
59        # because this comes from the associated _get_glean_repos in
60        # namespaces.py
61        dataset = next(
62            (channel for channel in channels if channel.get("channel") == "release"),
63            channels[0],
64        )["dataset"]
65
66        necessary_views = {"events_daily", "event_types"}
67        actual_views = {}
68        for view_id, references in db_views[dataset].items():
69            if view_id in necessary_views:
70                actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`"
71
72        if len(actual_views) == 2:
73            # Only create an instance if we have the two necessary tables
74            tables = {
75                "funnel_analysis": "events_daily_table",
76                "event_types": actual_views["event_types"],
77            }
78            tables.update(
79                {f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)}
80            )
81            yield FunnelAnalysisView(
82                namespace,
83                [tables],
84            )

Get Client Count Views from db views and app variants.

We only create a FunnelAnalysisView if we have the two necessary db tables: - events_daily - event_types

@classmethod
def from_dict( klass, namespace: str, name: str, _dict: generator.views.view.ViewDict) -> FunnelAnalysisView:
86    @classmethod
87    def from_dict(
88        klass, namespace: str, name: str, _dict: ViewDict
89    ) -> FunnelAnalysisView:
90        """Get a FunnalAnalysisView from a dict representation."""
91        return FunnelAnalysisView(namespace, _dict["tables"])

Get a FunnalAnalysisView from a dict representation.

def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
93    def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
94        """Get this view as LookML."""
95        return {
96            "includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"],
97            "views": self._funnel_analysis_lookml() + self._event_types_lookml(),
98        }

Get this view as LookML.

def n_events(self) -> int:
100    def n_events(self) -> int:
101        """Get the number of events allowed in this funnel."""
102        return len([k for k in self.tables[0] if k.startswith("step_")])

Get the number of events allowed in this funnel.