mozilla_schema_generator.probes

  1# -*- coding: utf-8 -*-
  2
  3# This Source Code Form is subject to the terms of the Mozilla Public
  4# License, v. 2.0. If a copy of the MPL was not distributed with this
  5# file, You can obtain one at http://mozilla.org/MPL/2.0/.
  6
  7
  8from __future__ import annotations
  9
 10import json
 11from datetime import datetime
 12from typing import Any, List
 13
 14from .schema import SchemaException
 15from .utils import _get
 16
 17
 18class Probe(object):
 19
 20    type_key = "type"
 21    name_key = "name"
 22    history_key = "history"
 23    in_source_key = "in-source"
 24
 25    def __init__(self, identifier: str, definition: dict):
 26        self.id = identifier
 27        self.type = definition[self.type_key]
 28        self.name = definition[self.name_key]
 29
 30    def __repr__(self):
 31        return json.dumps(
 32            {
 33                "id": self.id,
 34                "type": self.type,
 35                "name": self.name,
 36                "description": self.description,
 37            }
 38        )
 39
 40    def get_type(self) -> str:
 41        return self.type
 42
 43    def get_name(self) -> str:
 44        return self.name
 45
 46    def get_description(self) -> str:
 47        return self.description
 48
 49    def get_last_change(self) -> datetime:
 50        raise NotImplementedError("Last Change is not available on generic probe")
 51
 52    def get_first_added(self) -> datetime:
 53        raise NotImplementedError("First added is not available on generic probe")
 54
 55    def get_schema(self, addtlProps: Any) -> Any:
 56        raise NotImplementedError("Get Schema is not available on generic probe")
 57
 58    def get(self, *k) -> Any:
 59        return _get(self.definition, k)
 60
 61    def __lt__(self, other: Probe) -> bool:
 62        if self.get_first_added() == other.get_first_added():
 63            return self.get_name() < other.get_name()
 64
 65        return self.get_first_added() < other.get_first_added()
 66
 67
 68class MainProbe(Probe):
 69
 70    first_added_key = "first_added"
 71
 72    histogram_schema = {"type": "string"}
 73
 74    parent_processes = {"main"}
 75
 76    child_processes = {"content", "gpu", "extension", "dynamic", "socket"}
 77
 78    processes_map = {
 79        "all_childs": child_processes,
 80        "all_children": child_processes,
 81        "all": child_processes | parent_processes,
 82    }
 83
 84    def __init__(self, identifier: str, definition: dict):
 85        self._set_dates(definition[self.first_added_key])
 86        self._set_definition(definition)
 87        self._set_description(self.definition)
 88        super().__init__(identifier, definition)
 89
 90    def _set_definition(self, full_defn: dict):
 91        history = [d for arr in full_defn[self.history_key].values() for d in arr]
 92        self.definition = max(history, key=lambda x: int(x["versions"]["first"]))
 93        self.definition["name"] = full_defn[self.name_key]
 94        self._set_processes(history)
 95
 96    def _set_processes(self, history):
 97        # Include all historical processes
 98        processes = {
 99            p for d in history for p in d["details"].get("record_in_processes", [])
100        }
101        processes = {
102            sub_p for p in processes for sub_p in self.processes_map.get(p, [p])
103        }
104        self.definition["details"]["record_in_processes"] = processes
105
106    def _set_dates(self, first_added_value: dict):
107        vals = [datetime.fromisoformat(v) for v in first_added_value.values()]
108
109        self.first_added = min(vals)
110        self.last_change = max(vals)
111
112    def _set_description(self, definition):
113        self.description = None
114        if "description" in definition:
115            self.description = definition["description"]
116            # BigQuery limits descriptions to a maximum of 1024 characters,
117            # so we truncate anything longer than 1000.
118            if len(self.description) >= 1000:
119                self.description = self.description[:1000] + "…"
120
121    def get_first_added(self) -> datetime:
122        return self.first_added
123
124    def get_last_change(self) -> datetime:
125        return self.last_change
126
127    def get_schema(self, addtlProps: Any) -> Any:
128        # Get the schema based on the probe type
129        if self.get_type() == "scalar":
130            ptype = self.get("details", "kind")
131            if ptype == "boolean":
132                pschema = {"type": "boolean"}
133            elif ptype == "string":
134                pschema = {"type": "string"}
135            elif ptype == "uint":
136                pschema = {"type": "integer"}
137            else:
138                raise Exception("Unknown scalar type " + ptype)
139        elif self.get_type() == "histogram":
140            pschema = self.histogram_schema
141
142        if self.description is not None:
143            pschema["description"] = self.description
144
145        # Add nested level if keyed
146        if self.get("details", "keyed"):
147            final_schema = {"type": "object", "additionalProperties": pschema}
148        else:
149            final_schema = pschema
150
151        return final_schema
152
153
154class GleanProbe(Probe):
155
156    all_pings_keywords = ("all-pings", "all_pings")
157    first_added_key = "first_added"
158
159    def __init__(self, identifier: str, definition: dict, *, pings: List[str] = None):
160        self._set_dates(definition)
161        self._set_definition(definition)
162        self._set_description(self.definition)
163        self._in_source = definition.get(self.in_source_key, False)
164        super().__init__(identifier, definition)
165
166        defn_pings = set(
167            [
168                p
169                for d in definition[self.history_key]
170                for p in d.get("send_in_pings", ["metrics"])
171            ]
172        )
173        self.definition["send_in_pings"] = defn_pings
174
175        if pings is not None:
176            self._update_all_pings(pings)
177
178    def _update_all_pings(self, pings: List[str]):
179        if any(
180            [
181                kw in self.definition["send_in_pings"]
182                for kw in GleanProbe.all_pings_keywords
183            ]
184        ):
185            self.definition["send_in_pings"] = set(pings)
186
187    def _set_definition(self, full_defn: dict):
188        # Expose the entire history, for special casing of the probe.
189        self.definition_history = list(
190            sorted(
191                full_defn[self.history_key],
192                key=lambda x: datetime.fromisoformat(x["dates"]["last"]),
193                reverse=True,
194            )
195        )
196
197        # The canonical definition for up-to-date schemas
198        self.definition = self.definition_history[0]
199        self.definition["name"] = full_defn[self.name_key]
200
201    def _set_dates(self, definition: dict):
202        vals = [
203            datetime.fromisoformat(d["dates"]["first"])
204            for d in definition[self.history_key]
205        ]
206
207        self.first_added = min(vals)
208        self.last_change = max(vals)
209
210    def _set_description(self, definition):
211        if "description" in definition:
212            self.description = definition["description"]
213        else:
214            self.description = None
215
216    def is_in_source(self) -> bool:
217        return self._in_source
218
219    def get_first_added(self) -> datetime:
220        return self.first_added
221
222    def get_last_change(self) -> datetime:
223        return self.last_change
224
225    def get_schema(self, addtlProps: Any) -> Any:
226        if addtlProps is None:
227            raise SchemaException(
228                "Additional Properties cannot be missing for Glean probes"
229            )
230
231        if self.description:
232            addtlProps["description"] = self.description
233
234        return addtlProps
class Probe:
19class Probe(object):
20
21    type_key = "type"
22    name_key = "name"
23    history_key = "history"
24    in_source_key = "in-source"
25
26    def __init__(self, identifier: str, definition: dict):
27        self.id = identifier
28        self.type = definition[self.type_key]
29        self.name = definition[self.name_key]
30
31    def __repr__(self):
32        return json.dumps(
33            {
34                "id": self.id,
35                "type": self.type,
36                "name": self.name,
37                "description": self.description,
38            }
39        )
40
41    def get_type(self) -> str:
42        return self.type
43
44    def get_name(self) -> str:
45        return self.name
46
47    def get_description(self) -> str:
48        return self.description
49
50    def get_last_change(self) -> datetime:
51        raise NotImplementedError("Last Change is not available on generic probe")
52
53    def get_first_added(self) -> datetime:
54        raise NotImplementedError("First added is not available on generic probe")
55
56    def get_schema(self, addtlProps: Any) -> Any:
57        raise NotImplementedError("Get Schema is not available on generic probe")
58
59    def get(self, *k) -> Any:
60        return _get(self.definition, k)
61
62    def __lt__(self, other: Probe) -> bool:
63        if self.get_first_added() == other.get_first_added():
64            return self.get_name() < other.get_name()
65
66        return self.get_first_added() < other.get_first_added()
Probe(identifier: str, definition: dict)
26    def __init__(self, identifier: str, definition: dict):
27        self.id = identifier
28        self.type = definition[self.type_key]
29        self.name = definition[self.name_key]
def get_type(self) -> str:
41    def get_type(self) -> str:
42        return self.type
def get_name(self) -> str:
44    def get_name(self) -> str:
45        return self.name
def get_description(self) -> str:
47    def get_description(self) -> str:
48        return self.description
def get_last_change(self) -> datetime.datetime:
50    def get_last_change(self) -> datetime:
51        raise NotImplementedError("Last Change is not available on generic probe")
def get_first_added(self) -> datetime.datetime:
53    def get_first_added(self) -> datetime:
54        raise NotImplementedError("First added is not available on generic probe")
def get_schema(self, addtlProps: Any) -> Any:
56    def get_schema(self, addtlProps: Any) -> Any:
57        raise NotImplementedError("Get Schema is not available on generic probe")
def get(self, *k) -> Any:
59    def get(self, *k) -> Any:
60        return _get(self.definition, k)
class MainProbe(Probe):
 69class MainProbe(Probe):
 70
 71    first_added_key = "first_added"
 72
 73    histogram_schema = {"type": "string"}
 74
 75    parent_processes = {"main"}
 76
 77    child_processes = {"content", "gpu", "extension", "dynamic", "socket"}
 78
 79    processes_map = {
 80        "all_childs": child_processes,
 81        "all_children": child_processes,
 82        "all": child_processes | parent_processes,
 83    }
 84
 85    def __init__(self, identifier: str, definition: dict):
 86        self._set_dates(definition[self.first_added_key])
 87        self._set_definition(definition)
 88        self._set_description(self.definition)
 89        super().__init__(identifier, definition)
 90
 91    def _set_definition(self, full_defn: dict):
 92        history = [d for arr in full_defn[self.history_key].values() for d in arr]
 93        self.definition = max(history, key=lambda x: int(x["versions"]["first"]))
 94        self.definition["name"] = full_defn[self.name_key]
 95        self._set_processes(history)
 96
 97    def _set_processes(self, history):
 98        # Include all historical processes
 99        processes = {
100            p for d in history for p in d["details"].get("record_in_processes", [])
101        }
102        processes = {
103            sub_p for p in processes for sub_p in self.processes_map.get(p, [p])
104        }
105        self.definition["details"]["record_in_processes"] = processes
106
107    def _set_dates(self, first_added_value: dict):
108        vals = [datetime.fromisoformat(v) for v in first_added_value.values()]
109
110        self.first_added = min(vals)
111        self.last_change = max(vals)
112
113    def _set_description(self, definition):
114        self.description = None
115        if "description" in definition:
116            self.description = definition["description"]
117            # BigQuery limits descriptions to a maximum of 1024 characters,
118            # so we truncate anything longer than 1000.
119            if len(self.description) >= 1000:
120                self.description = self.description[:1000] + "…"
121
122    def get_first_added(self) -> datetime:
123        return self.first_added
124
125    def get_last_change(self) -> datetime:
126        return self.last_change
127
128    def get_schema(self, addtlProps: Any) -> Any:
129        # Get the schema based on the probe type
130        if self.get_type() == "scalar":
131            ptype = self.get("details", "kind")
132            if ptype == "boolean":
133                pschema = {"type": "boolean"}
134            elif ptype == "string":
135                pschema = {"type": "string"}
136            elif ptype == "uint":
137                pschema = {"type": "integer"}
138            else:
139                raise Exception("Unknown scalar type " + ptype)
140        elif self.get_type() == "histogram":
141            pschema = self.histogram_schema
142
143        if self.description is not None:
144            pschema["description"] = self.description
145
146        # Add nested level if keyed
147        if self.get("details", "keyed"):
148            final_schema = {"type": "object", "additionalProperties": pschema}
149        else:
150            final_schema = pschema
151
152        return final_schema
MainProbe(identifier: str, definition: dict)
85    def __init__(self, identifier: str, definition: dict):
86        self._set_dates(definition[self.first_added_key])
87        self._set_definition(definition)
88        self._set_description(self.definition)
89        super().__init__(identifier, definition)
def get_first_added(self) -> datetime.datetime:
122    def get_first_added(self) -> datetime:
123        return self.first_added
def get_last_change(self) -> datetime.datetime:
125    def get_last_change(self) -> datetime:
126        return self.last_change
def get_schema(self, addtlProps: Any) -> Any:
128    def get_schema(self, addtlProps: Any) -> Any:
129        # Get the schema based on the probe type
130        if self.get_type() == "scalar":
131            ptype = self.get("details", "kind")
132            if ptype == "boolean":
133                pschema = {"type": "boolean"}
134            elif ptype == "string":
135                pschema = {"type": "string"}
136            elif ptype == "uint":
137                pschema = {"type": "integer"}
138            else:
139                raise Exception("Unknown scalar type " + ptype)
140        elif self.get_type() == "histogram":
141            pschema = self.histogram_schema
142
143        if self.description is not None:
144            pschema["description"] = self.description
145
146        # Add nested level if keyed
147        if self.get("details", "keyed"):
148            final_schema = {"type": "object", "additionalProperties": pschema}
149        else:
150            final_schema = pschema
151
152        return final_schema
class GleanProbe(Probe):
155class GleanProbe(Probe):
156
157    all_pings_keywords = ("all-pings", "all_pings")
158    first_added_key = "first_added"
159
160    def __init__(self, identifier: str, definition: dict, *, pings: List[str] = None):
161        self._set_dates(definition)
162        self._set_definition(definition)
163        self._set_description(self.definition)
164        self._in_source = definition.get(self.in_source_key, False)
165        super().__init__(identifier, definition)
166
167        defn_pings = set(
168            [
169                p
170                for d in definition[self.history_key]
171                for p in d.get("send_in_pings", ["metrics"])
172            ]
173        )
174        self.definition["send_in_pings"] = defn_pings
175
176        if pings is not None:
177            self._update_all_pings(pings)
178
179    def _update_all_pings(self, pings: List[str]):
180        if any(
181            [
182                kw in self.definition["send_in_pings"]
183                for kw in GleanProbe.all_pings_keywords
184            ]
185        ):
186            self.definition["send_in_pings"] = set(pings)
187
188    def _set_definition(self, full_defn: dict):
189        # Expose the entire history, for special casing of the probe.
190        self.definition_history = list(
191            sorted(
192                full_defn[self.history_key],
193                key=lambda x: datetime.fromisoformat(x["dates"]["last"]),
194                reverse=True,
195            )
196        )
197
198        # The canonical definition for up-to-date schemas
199        self.definition = self.definition_history[0]
200        self.definition["name"] = full_defn[self.name_key]
201
202    def _set_dates(self, definition: dict):
203        vals = [
204            datetime.fromisoformat(d["dates"]["first"])
205            for d in definition[self.history_key]
206        ]
207
208        self.first_added = min(vals)
209        self.last_change = max(vals)
210
211    def _set_description(self, definition):
212        if "description" in definition:
213            self.description = definition["description"]
214        else:
215            self.description = None
216
217    def is_in_source(self) -> bool:
218        return self._in_source
219
220    def get_first_added(self) -> datetime:
221        return self.first_added
222
223    def get_last_change(self) -> datetime:
224        return self.last_change
225
226    def get_schema(self, addtlProps: Any) -> Any:
227        if addtlProps is None:
228            raise SchemaException(
229                "Additional Properties cannot be missing for Glean probes"
230            )
231
232        if self.description:
233            addtlProps["description"] = self.description
234
235        return addtlProps
GleanProbe(identifier: str, definition: dict, *, pings: List[str] = None)
160    def __init__(self, identifier: str, definition: dict, *, pings: List[str] = None):
161        self._set_dates(definition)
162        self._set_definition(definition)
163        self._set_description(self.definition)
164        self._in_source = definition.get(self.in_source_key, False)
165        super().__init__(identifier, definition)
166
167        defn_pings = set(
168            [
169                p
170                for d in definition[self.history_key]
171                for p in d.get("send_in_pings", ["metrics"])
172            ]
173        )
174        self.definition["send_in_pings"] = defn_pings
175
176        if pings is not None:
177            self._update_all_pings(pings)
def is_in_source(self) -> bool:
217    def is_in_source(self) -> bool:
218        return self._in_source
def get_first_added(self) -> datetime.datetime:
220    def get_first_added(self) -> datetime:
221        return self.first_added
def get_last_change(self) -> datetime.datetime:
223    def get_last_change(self) -> datetime:
224        return self.last_change
def get_schema(self, addtlProps: Any) -> Any:
226    def get_schema(self, addtlProps: Any) -> Any:
227        if addtlProps is None:
228            raise SchemaException(
229                "Additional Properties cannot be missing for Glean probes"
230            )
231
232        if self.description:
233            addtlProps["description"] = self.description
234
235        return addtlProps