mozilla_schema_generator.probes

  1# -*- coding: utf-8 -*-
  2
  3# This Source Code Form is subject to the terms of the Mozilla Public
  4# License, v. 2.0. If a copy of the MPL was not distributed with this
  5# file, You can obtain one at http://mozilla.org/MPL/2.0/.
  6
  7
  8from __future__ import annotations
  9
 10import json
 11from datetime import datetime
 12from typing import Any, List
 13
 14from .schema import SchemaException
 15from .utils import _get
 16
 17
 18class Probe(object):
 19    type_key = "type"
 20    name_key = "name"
 21    history_key = "history"
 22    in_source_key = "in-source"
 23
 24    def __init__(self, identifier: str, definition: dict):
 25        self.id = identifier
 26        self.type = definition[self.type_key]
 27        self.name = definition[self.name_key]
 28
 29    def __repr__(self):
 30        return json.dumps(
 31            {
 32                "id": self.id,
 33                "type": self.type,
 34                "name": self.name,
 35                "description": self.description,
 36            }
 37        )
 38
 39    def get_type(self) -> str:
 40        return self.type
 41
 42    def get_name(self) -> str:
 43        return self.name
 44
 45    def get_description(self) -> str:
 46        return self.description
 47
 48    def get_last_change(self) -> datetime:
 49        raise NotImplementedError("Last Change is not available on generic probe")
 50
 51    def get_first_added(self) -> datetime:
 52        raise NotImplementedError("First added is not available on generic probe")
 53
 54    def get_schema(self, addtlProps: Any) -> Any:
 55        raise NotImplementedError("Get Schema is not available on generic probe")
 56
 57    def get(self, *k) -> Any:
 58        return _get(self.definition, k)
 59
 60    def __lt__(self, other: Probe) -> bool:
 61        if self.get_first_added() == other.get_first_added():
 62            return self.get_name() < other.get_name()
 63
 64        return self.get_first_added() < other.get_first_added()
 65
 66
 67class MainProbe(Probe):
 68    first_added_key = "first_added"
 69
 70    histogram_schema = {"type": "string"}
 71
 72    parent_processes = {"main"}
 73
 74    child_processes = {"content", "gpu", "extension", "dynamic", "socket"}
 75
 76    processes_map = {
 77        "all_childs": child_processes,
 78        "all_children": child_processes,
 79        "all": child_processes | parent_processes,
 80    }
 81
 82    def __init__(self, identifier: str, definition: dict):
 83        self._set_dates(definition[self.first_added_key])
 84        self._set_definition(definition)
 85        self._set_description(self.definition)
 86        super().__init__(identifier, definition)
 87
 88    def _set_definition(self, full_defn: dict):
 89        history = [d for arr in full_defn[self.history_key].values() for d in arr]
 90        self.definition = max(history, key=lambda x: int(x["versions"]["first"]))
 91        self.definition["name"] = full_defn[self.name_key]
 92        self._set_processes(history)
 93
 94    def _set_processes(self, history):
 95        # Include all historical processes
 96        processes = {
 97            p for d in history for p in d["details"].get("record_in_processes", [])
 98        }
 99        processes = {
100            sub_p for p in processes for sub_p in self.processes_map.get(p, [p])
101        }
102        self.definition["details"]["record_in_processes"] = processes
103
104    def _set_dates(self, first_added_value: dict):
105        vals = [datetime.fromisoformat(v) for v in first_added_value.values()]
106
107        self.first_added = min(vals)
108        self.last_change = max(vals)
109
110    def _set_description(self, definition):
111        self.description = None
112        if "description" in definition:
113            self.description = definition["description"]
114            # BigQuery limits descriptions to a maximum of 1024 characters,
115            # so we truncate anything longer than 1000.
116            if len(self.description) >= 1000:
117                self.description = self.description[:1000] + "…"
118
119    def get_first_added(self) -> datetime:
120        return self.first_added
121
122    def get_last_change(self) -> datetime:
123        return self.last_change
124
125    def get_schema(self, addtlProps: Any) -> Any:
126        # Get the schema based on the probe type
127        if self.get_type() == "scalar":
128            ptype = self.get("details", "kind")
129            if ptype == "boolean":
130                pschema = {"type": "boolean"}
131            elif ptype == "string":
132                pschema = {"type": "string"}
133            elif ptype == "uint":
134                pschema = {"type": "integer"}
135            else:
136                raise Exception("Unknown scalar type " + ptype)
137        elif self.get_type() == "histogram":
138            pschema = self.histogram_schema
139
140        if self.description is not None:
141            pschema["description"] = self.description
142
143        # Add nested level if keyed
144        if self.get("details", "keyed"):
145            final_schema = {"type": "object", "additionalProperties": pschema}
146        else:
147            final_schema = pschema
148
149        return final_schema
150
151
152class GleanProbe(Probe):
153    all_pings_keywords = ("all-pings", "all_pings")
154    first_added_key = "first_added"
155
156    def __init__(self, identifier: str, definition: dict, *, pings: List[str] = None):
157        self._set_dates(definition)
158        self._set_definition(definition)
159        self._set_description(self.definition)
160        self._in_source = definition.get(self.in_source_key, False)
161        super().__init__(identifier, definition)
162
163        defn_pings = set(
164            [
165                p
166                for d in definition[self.history_key]
167                for p in d.get("send_in_pings", ["metrics"])
168            ]
169        )
170        self.definition["send_in_pings"] = defn_pings
171
172        if pings is not None:
173            self._update_all_pings(pings)
174
175    def _update_all_pings(self, pings: List[str]):
176        if any(
177            [
178                kw in self.definition["send_in_pings"]
179                for kw in GleanProbe.all_pings_keywords
180            ]
181        ):
182            self.definition["send_in_pings"] = set(pings)
183
184    def _set_definition(self, full_defn: dict):
185        # Expose the entire history, for special casing of the probe.
186        self.definition_history = list(
187            sorted(
188                full_defn[self.history_key],
189                key=lambda x: datetime.fromisoformat(x["dates"]["last"]),
190                reverse=True,
191            )
192        )
193
194        # The canonical definition for up-to-date schemas
195        self.definition = self.definition_history[0]
196        self.definition["name"] = full_defn[self.name_key]
197
198    def _set_dates(self, definition: dict):
199        vals = [
200            datetime.fromisoformat(d["dates"]["first"])
201            for d in definition[self.history_key]
202        ]
203
204        self.first_added = min(vals)
205        self.last_change = max(vals)
206
207    def _set_description(self, definition):
208        if "description" in definition:
209            self.description = definition["description"]
210        else:
211            self.description = None
212
213    def is_in_source(self) -> bool:
214        return self._in_source
215
216    def get_first_added(self) -> datetime:
217        return self.first_added
218
219    def get_last_change(self) -> datetime:
220        return self.last_change
221
222    def get_schema(self, addtlProps: Any) -> Any:
223        if addtlProps is None:
224            raise SchemaException(
225                "Additional Properties cannot be missing for Glean probes"
226            )
227
228        if self.description:
229            addtlProps["description"] = self.description
230
231        return addtlProps
class Probe:
19class Probe(object):
20    type_key = "type"
21    name_key = "name"
22    history_key = "history"
23    in_source_key = "in-source"
24
25    def __init__(self, identifier: str, definition: dict):
26        self.id = identifier
27        self.type = definition[self.type_key]
28        self.name = definition[self.name_key]
29
30    def __repr__(self):
31        return json.dumps(
32            {
33                "id": self.id,
34                "type": self.type,
35                "name": self.name,
36                "description": self.description,
37            }
38        )
39
40    def get_type(self) -> str:
41        return self.type
42
43    def get_name(self) -> str:
44        return self.name
45
46    def get_description(self) -> str:
47        return self.description
48
49    def get_last_change(self) -> datetime:
50        raise NotImplementedError("Last Change is not available on generic probe")
51
52    def get_first_added(self) -> datetime:
53        raise NotImplementedError("First added is not available on generic probe")
54
55    def get_schema(self, addtlProps: Any) -> Any:
56        raise NotImplementedError("Get Schema is not available on generic probe")
57
58    def get(self, *k) -> Any:
59        return _get(self.definition, k)
60
61    def __lt__(self, other: Probe) -> bool:
62        if self.get_first_added() == other.get_first_added():
63            return self.get_name() < other.get_name()
64
65        return self.get_first_added() < other.get_first_added()
Probe(identifier: str, definition: dict)
25    def __init__(self, identifier: str, definition: dict):
26        self.id = identifier
27        self.type = definition[self.type_key]
28        self.name = definition[self.name_key]
type_key = 'type'
name_key = 'name'
history_key = 'history'
in_source_key = 'in-source'
id
type
name
def get_type(self) -> str:
40    def get_type(self) -> str:
41        return self.type
def get_name(self) -> str:
43    def get_name(self) -> str:
44        return self.name
def get_description(self) -> str:
46    def get_description(self) -> str:
47        return self.description
def get_last_change(self) -> datetime.datetime:
49    def get_last_change(self) -> datetime:
50        raise NotImplementedError("Last Change is not available on generic probe")
def get_first_added(self) -> datetime.datetime:
52    def get_first_added(self) -> datetime:
53        raise NotImplementedError("First added is not available on generic probe")
def get_schema(self, addtlProps: Any) -> Any:
55    def get_schema(self, addtlProps: Any) -> Any:
56        raise NotImplementedError("Get Schema is not available on generic probe")
def get(self, *k) -> Any:
58    def get(self, *k) -> Any:
59        return _get(self.definition, k)
class MainProbe(Probe):
 68class MainProbe(Probe):
 69    first_added_key = "first_added"
 70
 71    histogram_schema = {"type": "string"}
 72
 73    parent_processes = {"main"}
 74
 75    child_processes = {"content", "gpu", "extension", "dynamic", "socket"}
 76
 77    processes_map = {
 78        "all_childs": child_processes,
 79        "all_children": child_processes,
 80        "all": child_processes | parent_processes,
 81    }
 82
 83    def __init__(self, identifier: str, definition: dict):
 84        self._set_dates(definition[self.first_added_key])
 85        self._set_definition(definition)
 86        self._set_description(self.definition)
 87        super().__init__(identifier, definition)
 88
 89    def _set_definition(self, full_defn: dict):
 90        history = [d for arr in full_defn[self.history_key].values() for d in arr]
 91        self.definition = max(history, key=lambda x: int(x["versions"]["first"]))
 92        self.definition["name"] = full_defn[self.name_key]
 93        self._set_processes(history)
 94
 95    def _set_processes(self, history):
 96        # Include all historical processes
 97        processes = {
 98            p for d in history for p in d["details"].get("record_in_processes", [])
 99        }
100        processes = {
101            sub_p for p in processes for sub_p in self.processes_map.get(p, [p])
102        }
103        self.definition["details"]["record_in_processes"] = processes
104
105    def _set_dates(self, first_added_value: dict):
106        vals = [datetime.fromisoformat(v) for v in first_added_value.values()]
107
108        self.first_added = min(vals)
109        self.last_change = max(vals)
110
111    def _set_description(self, definition):
112        self.description = None
113        if "description" in definition:
114            self.description = definition["description"]
115            # BigQuery limits descriptions to a maximum of 1024 characters,
116            # so we truncate anything longer than 1000.
117            if len(self.description) >= 1000:
118                self.description = self.description[:1000] + "…"
119
120    def get_first_added(self) -> datetime:
121        return self.first_added
122
123    def get_last_change(self) -> datetime:
124        return self.last_change
125
126    def get_schema(self, addtlProps: Any) -> Any:
127        # Get the schema based on the probe type
128        if self.get_type() == "scalar":
129            ptype = self.get("details", "kind")
130            if ptype == "boolean":
131                pschema = {"type": "boolean"}
132            elif ptype == "string":
133                pschema = {"type": "string"}
134            elif ptype == "uint":
135                pschema = {"type": "integer"}
136            else:
137                raise Exception("Unknown scalar type " + ptype)
138        elif self.get_type() == "histogram":
139            pschema = self.histogram_schema
140
141        if self.description is not None:
142            pschema["description"] = self.description
143
144        # Add nested level if keyed
145        if self.get("details", "keyed"):
146            final_schema = {"type": "object", "additionalProperties": pschema}
147        else:
148            final_schema = pschema
149
150        return final_schema
MainProbe(identifier: str, definition: dict)
83    def __init__(self, identifier: str, definition: dict):
84        self._set_dates(definition[self.first_added_key])
85        self._set_definition(definition)
86        self._set_description(self.definition)
87        super().__init__(identifier, definition)
first_added_key = 'first_added'
histogram_schema = {'type': 'string'}
parent_processes = {'main'}
child_processes = {'dynamic', 'gpu', 'content', 'socket', 'extension'}
processes_map = {'all_childs': {'dynamic', 'gpu', 'content', 'socket', 'extension'}, 'all_children': {'dynamic', 'gpu', 'content', 'socket', 'extension'}, 'all': {'dynamic', 'content', 'socket', 'main', 'gpu', 'extension'}}
def get_first_added(self) -> datetime.datetime:
120    def get_first_added(self) -> datetime:
121        return self.first_added
def get_last_change(self) -> datetime.datetime:
123    def get_last_change(self) -> datetime:
124        return self.last_change
def get_schema(self, addtlProps: Any) -> Any:
126    def get_schema(self, addtlProps: Any) -> Any:
127        # Get the schema based on the probe type
128        if self.get_type() == "scalar":
129            ptype = self.get("details", "kind")
130            if ptype == "boolean":
131                pschema = {"type": "boolean"}
132            elif ptype == "string":
133                pschema = {"type": "string"}
134            elif ptype == "uint":
135                pschema = {"type": "integer"}
136            else:
137                raise Exception("Unknown scalar type " + ptype)
138        elif self.get_type() == "histogram":
139            pschema = self.histogram_schema
140
141        if self.description is not None:
142            pschema["description"] = self.description
143
144        # Add nested level if keyed
145        if self.get("details", "keyed"):
146            final_schema = {"type": "object", "additionalProperties": pschema}
147        else:
148            final_schema = pschema
149
150        return final_schema
class GleanProbe(Probe):
153class GleanProbe(Probe):
154    all_pings_keywords = ("all-pings", "all_pings")
155    first_added_key = "first_added"
156
157    def __init__(self, identifier: str, definition: dict, *, pings: List[str] = None):
158        self._set_dates(definition)
159        self._set_definition(definition)
160        self._set_description(self.definition)
161        self._in_source = definition.get(self.in_source_key, False)
162        super().__init__(identifier, definition)
163
164        defn_pings = set(
165            [
166                p
167                for d in definition[self.history_key]
168                for p in d.get("send_in_pings", ["metrics"])
169            ]
170        )
171        self.definition["send_in_pings"] = defn_pings
172
173        if pings is not None:
174            self._update_all_pings(pings)
175
176    def _update_all_pings(self, pings: List[str]):
177        if any(
178            [
179                kw in self.definition["send_in_pings"]
180                for kw in GleanProbe.all_pings_keywords
181            ]
182        ):
183            self.definition["send_in_pings"] = set(pings)
184
185    def _set_definition(self, full_defn: dict):
186        # Expose the entire history, for special casing of the probe.
187        self.definition_history = list(
188            sorted(
189                full_defn[self.history_key],
190                key=lambda x: datetime.fromisoformat(x["dates"]["last"]),
191                reverse=True,
192            )
193        )
194
195        # The canonical definition for up-to-date schemas
196        self.definition = self.definition_history[0]
197        self.definition["name"] = full_defn[self.name_key]
198
199    def _set_dates(self, definition: dict):
200        vals = [
201            datetime.fromisoformat(d["dates"]["first"])
202            for d in definition[self.history_key]
203        ]
204
205        self.first_added = min(vals)
206        self.last_change = max(vals)
207
208    def _set_description(self, definition):
209        if "description" in definition:
210            self.description = definition["description"]
211        else:
212            self.description = None
213
214    def is_in_source(self) -> bool:
215        return self._in_source
216
217    def get_first_added(self) -> datetime:
218        return self.first_added
219
220    def get_last_change(self) -> datetime:
221        return self.last_change
222
223    def get_schema(self, addtlProps: Any) -> Any:
224        if addtlProps is None:
225            raise SchemaException(
226                "Additional Properties cannot be missing for Glean probes"
227            )
228
229        if self.description:
230            addtlProps["description"] = self.description
231
232        return addtlProps
GleanProbe(identifier: str, definition: dict, *, pings: List[str] = None)
157    def __init__(self, identifier: str, definition: dict, *, pings: List[str] = None):
158        self._set_dates(definition)
159        self._set_definition(definition)
160        self._set_description(self.definition)
161        self._in_source = definition.get(self.in_source_key, False)
162        super().__init__(identifier, definition)
163
164        defn_pings = set(
165            [
166                p
167                for d in definition[self.history_key]
168                for p in d.get("send_in_pings", ["metrics"])
169            ]
170        )
171        self.definition["send_in_pings"] = defn_pings
172
173        if pings is not None:
174            self._update_all_pings(pings)
all_pings_keywords = ('all-pings', 'all_pings')
first_added_key = 'first_added'
def is_in_source(self) -> bool:
214    def is_in_source(self) -> bool:
215        return self._in_source
def get_first_added(self) -> datetime.datetime:
217    def get_first_added(self) -> datetime:
218        return self.first_added
def get_last_change(self) -> datetime.datetime:
220    def get_last_change(self) -> datetime:
221        return self.last_change
def get_schema(self, addtlProps: Any) -> Any:
223    def get_schema(self, addtlProps: Any) -> Any:
224        if addtlProps is None:
225            raise SchemaException(
226                "Additional Properties cannot be missing for Glean probes"
227            )
228
229        if self.description:
230            addtlProps["description"] = self.description
231
232        return addtlProps