mozilla_schema_generator.probes
1# -*- coding: utf-8 -*- 2 3# This Source Code Form is subject to the terms of the Mozilla Public 4# License, v. 2.0. If a copy of the MPL was not distributed with this 5# file, You can obtain one at http://mozilla.org/MPL/2.0/. 6 7 8from __future__ import annotations 9 10import json 11from datetime import datetime 12from typing import Any, List 13 14from .schema import SchemaException 15from .utils import _get 16 17 18class Probe(object): 19 type_key = "type" 20 name_key = "name" 21 history_key = "history" 22 in_source_key = "in-source" 23 24 def __init__(self, identifier: str, definition: dict): 25 self.id = identifier 26 self.type = definition[self.type_key] 27 self.name = definition[self.name_key] 28 29 def __repr__(self): 30 return json.dumps( 31 { 32 "id": self.id, 33 "type": self.type, 34 "name": self.name, 35 "description": self.description, 36 } 37 ) 38 39 def get_type(self) -> str: 40 return self.type 41 42 def get_name(self) -> str: 43 return self.name 44 45 def get_description(self) -> str: 46 return self.description 47 48 def get_last_change(self) -> datetime: 49 raise NotImplementedError("Last Change is not available on generic probe") 50 51 def get_first_added(self) -> datetime: 52 raise NotImplementedError("First added is not available on generic probe") 53 54 def get_schema(self, addtlProps: Any) -> Any: 55 raise NotImplementedError("Get Schema is not available on generic probe") 56 57 def get(self, *k) -> Any: 58 return _get(self.definition, k) 59 60 def __lt__(self, other: Probe) -> bool: 61 if self.get_first_added() == other.get_first_added(): 62 return self.get_name() < other.get_name() 63 64 return self.get_first_added() < other.get_first_added() 65 66 67class MainProbe(Probe): 68 first_added_key = "first_added" 69 70 histogram_schema = {"type": "string"} 71 72 parent_processes = {"main"} 73 74 child_processes = {"content", "gpu", "extension", "dynamic", "socket"} 75 76 processes_map = { 77 "all_childs": child_processes, 78 "all_children": child_processes, 79 "all": child_processes | parent_processes, 80 } 81 82 def __init__(self, identifier: str, definition: dict): 83 self._set_dates(definition[self.first_added_key]) 84 self._set_definition(definition) 85 self._set_description(self.definition) 86 super().__init__(identifier, definition) 87 88 def _set_definition(self, full_defn: dict): 89 history = [d for arr in full_defn[self.history_key].values() for d in arr] 90 self.definition = max(history, key=lambda x: int(x["versions"]["first"])) 91 self.definition["name"] = full_defn[self.name_key] 92 self._set_processes(history) 93 94 def _set_processes(self, history): 95 # Include all historical processes 96 processes = { 97 p for d in history for p in d["details"].get("record_in_processes", []) 98 } 99 processes = { 100 sub_p for p in processes for sub_p in self.processes_map.get(p, [p]) 101 } 102 self.definition["details"]["record_in_processes"] = processes 103 104 def _set_dates(self, first_added_value: dict): 105 vals = [datetime.fromisoformat(v) for v in first_added_value.values()] 106 107 self.first_added = min(vals) 108 self.last_change = max(vals) 109 110 def _set_description(self, definition): 111 self.description = None 112 if "description" in definition: 113 self.description = definition["description"] 114 # BigQuery limits descriptions to a maximum of 1024 characters, 115 # so we truncate anything longer than 1000. 116 if len(self.description) >= 1000: 117 self.description = self.description[:1000] + "…" 118 119 def get_first_added(self) -> datetime: 120 return self.first_added 121 122 def get_last_change(self) -> datetime: 123 return self.last_change 124 125 def get_schema(self, addtlProps: Any) -> Any: 126 # Get the schema based on the probe type 127 if self.get_type() == "scalar": 128 ptype = self.get("details", "kind") 129 if ptype == "boolean": 130 pschema = {"type": "boolean"} 131 elif ptype == "string": 132 pschema = {"type": "string"} 133 elif ptype == "uint": 134 pschema = {"type": "integer"} 135 else: 136 raise Exception("Unknown scalar type " + ptype) 137 elif self.get_type() == "histogram": 138 pschema = self.histogram_schema 139 140 if self.description is not None: 141 pschema["description"] = self.description 142 143 # Add nested level if keyed 144 if self.get("details", "keyed"): 145 final_schema = {"type": "object", "additionalProperties": pschema} 146 else: 147 final_schema = pschema 148 149 return final_schema 150 151 152class GleanProbe(Probe): 153 all_pings_keywords = ("all-pings", "all_pings") 154 first_added_key = "first_added" 155 156 def __init__(self, identifier: str, definition: dict, *, pings: List[str] = None): 157 self._set_dates(definition) 158 self._set_definition(definition) 159 self._set_description(self.definition) 160 self._in_source = definition.get(self.in_source_key, False) 161 super().__init__(identifier, definition) 162 163 defn_pings = set( 164 [ 165 p 166 for d in definition[self.history_key] 167 for p in d.get("send_in_pings", ["metrics"]) 168 ] 169 ) 170 self.definition["send_in_pings"] = defn_pings 171 172 if pings is not None: 173 self._update_all_pings(pings) 174 175 def _update_all_pings(self, pings: List[str]): 176 if any( 177 [ 178 kw in self.definition["send_in_pings"] 179 for kw in GleanProbe.all_pings_keywords 180 ] 181 ): 182 self.definition["send_in_pings"] = set(pings) 183 184 def _set_definition(self, full_defn: dict): 185 # Expose the entire history, for special casing of the probe. 186 self.definition_history = list( 187 sorted( 188 full_defn[self.history_key], 189 key=lambda x: datetime.fromisoformat(x["dates"]["last"]), 190 reverse=True, 191 ) 192 ) 193 194 # The canonical definition for up-to-date schemas 195 self.definition = self.definition_history[0] 196 self.definition["name"] = full_defn[self.name_key] 197 198 def _set_dates(self, definition: dict): 199 vals = [ 200 datetime.fromisoformat(d["dates"]["first"]) 201 for d in definition[self.history_key] 202 ] 203 204 self.first_added = min(vals) 205 self.last_change = max(vals) 206 207 def _set_description(self, definition): 208 if "description" in definition: 209 self.description = definition["description"] 210 else: 211 self.description = None 212 213 def is_in_source(self) -> bool: 214 return self._in_source 215 216 def get_first_added(self) -> datetime: 217 return self.first_added 218 219 def get_last_change(self) -> datetime: 220 return self.last_change 221 222 def get_schema(self, addtlProps: Any) -> Any: 223 if addtlProps is None: 224 raise SchemaException( 225 "Additional Properties cannot be missing for Glean probes" 226 ) 227 228 if self.description: 229 addtlProps["description"] = self.description 230 231 return addtlProps
class
Probe:
19class Probe(object): 20 type_key = "type" 21 name_key = "name" 22 history_key = "history" 23 in_source_key = "in-source" 24 25 def __init__(self, identifier: str, definition: dict): 26 self.id = identifier 27 self.type = definition[self.type_key] 28 self.name = definition[self.name_key] 29 30 def __repr__(self): 31 return json.dumps( 32 { 33 "id": self.id, 34 "type": self.type, 35 "name": self.name, 36 "description": self.description, 37 } 38 ) 39 40 def get_type(self) -> str: 41 return self.type 42 43 def get_name(self) -> str: 44 return self.name 45 46 def get_description(self) -> str: 47 return self.description 48 49 def get_last_change(self) -> datetime: 50 raise NotImplementedError("Last Change is not available on generic probe") 51 52 def get_first_added(self) -> datetime: 53 raise NotImplementedError("First added is not available on generic probe") 54 55 def get_schema(self, addtlProps: Any) -> Any: 56 raise NotImplementedError("Get Schema is not available on generic probe") 57 58 def get(self, *k) -> Any: 59 return _get(self.definition, k) 60 61 def __lt__(self, other: Probe) -> bool: 62 if self.get_first_added() == other.get_first_added(): 63 return self.get_name() < other.get_name() 64 65 return self.get_first_added() < other.get_first_added()
68class MainProbe(Probe): 69 first_added_key = "first_added" 70 71 histogram_schema = {"type": "string"} 72 73 parent_processes = {"main"} 74 75 child_processes = {"content", "gpu", "extension", "dynamic", "socket"} 76 77 processes_map = { 78 "all_childs": child_processes, 79 "all_children": child_processes, 80 "all": child_processes | parent_processes, 81 } 82 83 def __init__(self, identifier: str, definition: dict): 84 self._set_dates(definition[self.first_added_key]) 85 self._set_definition(definition) 86 self._set_description(self.definition) 87 super().__init__(identifier, definition) 88 89 def _set_definition(self, full_defn: dict): 90 history = [d for arr in full_defn[self.history_key].values() for d in arr] 91 self.definition = max(history, key=lambda x: int(x["versions"]["first"])) 92 self.definition["name"] = full_defn[self.name_key] 93 self._set_processes(history) 94 95 def _set_processes(self, history): 96 # Include all historical processes 97 processes = { 98 p for d in history for p in d["details"].get("record_in_processes", []) 99 } 100 processes = { 101 sub_p for p in processes for sub_p in self.processes_map.get(p, [p]) 102 } 103 self.definition["details"]["record_in_processes"] = processes 104 105 def _set_dates(self, first_added_value: dict): 106 vals = [datetime.fromisoformat(v) for v in first_added_value.values()] 107 108 self.first_added = min(vals) 109 self.last_change = max(vals) 110 111 def _set_description(self, definition): 112 self.description = None 113 if "description" in definition: 114 self.description = definition["description"] 115 # BigQuery limits descriptions to a maximum of 1024 characters, 116 # so we truncate anything longer than 1000. 117 if len(self.description) >= 1000: 118 self.description = self.description[:1000] + "…" 119 120 def get_first_added(self) -> datetime: 121 return self.first_added 122 123 def get_last_change(self) -> datetime: 124 return self.last_change 125 126 def get_schema(self, addtlProps: Any) -> Any: 127 # Get the schema based on the probe type 128 if self.get_type() == "scalar": 129 ptype = self.get("details", "kind") 130 if ptype == "boolean": 131 pschema = {"type": "boolean"} 132 elif ptype == "string": 133 pschema = {"type": "string"} 134 elif ptype == "uint": 135 pschema = {"type": "integer"} 136 else: 137 raise Exception("Unknown scalar type " + ptype) 138 elif self.get_type() == "histogram": 139 pschema = self.histogram_schema 140 141 if self.description is not None: 142 pschema["description"] = self.description 143 144 # Add nested level if keyed 145 if self.get("details", "keyed"): 146 final_schema = {"type": "object", "additionalProperties": pschema} 147 else: 148 final_schema = pschema 149 150 return final_schema
processes_map =
{'all_childs': {'dynamic', 'gpu', 'content', 'socket', 'extension'}, 'all_children': {'dynamic', 'gpu', 'content', 'socket', 'extension'}, 'all': {'dynamic', 'content', 'socket', 'main', 'gpu', 'extension'}}
def
get_schema(self, addtlProps: Any) -> Any:
126 def get_schema(self, addtlProps: Any) -> Any: 127 # Get the schema based on the probe type 128 if self.get_type() == "scalar": 129 ptype = self.get("details", "kind") 130 if ptype == "boolean": 131 pschema = {"type": "boolean"} 132 elif ptype == "string": 133 pschema = {"type": "string"} 134 elif ptype == "uint": 135 pschema = {"type": "integer"} 136 else: 137 raise Exception("Unknown scalar type " + ptype) 138 elif self.get_type() == "histogram": 139 pschema = self.histogram_schema 140 141 if self.description is not None: 142 pschema["description"] = self.description 143 144 # Add nested level if keyed 145 if self.get("details", "keyed"): 146 final_schema = {"type": "object", "additionalProperties": pschema} 147 else: 148 final_schema = pschema 149 150 return final_schema
Inherited Members
153class GleanProbe(Probe): 154 all_pings_keywords = ("all-pings", "all_pings") 155 first_added_key = "first_added" 156 157 def __init__(self, identifier: str, definition: dict, *, pings: List[str] = None): 158 self._set_dates(definition) 159 self._set_definition(definition) 160 self._set_description(self.definition) 161 self._in_source = definition.get(self.in_source_key, False) 162 super().__init__(identifier, definition) 163 164 defn_pings = set( 165 [ 166 p 167 for d in definition[self.history_key] 168 for p in d.get("send_in_pings", ["metrics"]) 169 ] 170 ) 171 self.definition["send_in_pings"] = defn_pings 172 173 if pings is not None: 174 self._update_all_pings(pings) 175 176 def _update_all_pings(self, pings: List[str]): 177 if any( 178 [ 179 kw in self.definition["send_in_pings"] 180 for kw in GleanProbe.all_pings_keywords 181 ] 182 ): 183 self.definition["send_in_pings"] = set(pings) 184 185 def _set_definition(self, full_defn: dict): 186 # Expose the entire history, for special casing of the probe. 187 self.definition_history = list( 188 sorted( 189 full_defn[self.history_key], 190 key=lambda x: datetime.fromisoformat(x["dates"]["last"]), 191 reverse=True, 192 ) 193 ) 194 195 # The canonical definition for up-to-date schemas 196 self.definition = self.definition_history[0] 197 self.definition["name"] = full_defn[self.name_key] 198 199 def _set_dates(self, definition: dict): 200 vals = [ 201 datetime.fromisoformat(d["dates"]["first"]) 202 for d in definition[self.history_key] 203 ] 204 205 self.first_added = min(vals) 206 self.last_change = max(vals) 207 208 def _set_description(self, definition): 209 if "description" in definition: 210 self.description = definition["description"] 211 else: 212 self.description = None 213 214 def is_in_source(self) -> bool: 215 return self._in_source 216 217 def get_first_added(self) -> datetime: 218 return self.first_added 219 220 def get_last_change(self) -> datetime: 221 return self.last_change 222 223 def get_schema(self, addtlProps: Any) -> Any: 224 if addtlProps is None: 225 raise SchemaException( 226 "Additional Properties cannot be missing for Glean probes" 227 ) 228 229 if self.description: 230 addtlProps["description"] = self.description 231 232 return addtlProps
GleanProbe(identifier: str, definition: dict, *, pings: List[str] = None)
157 def __init__(self, identifier: str, definition: dict, *, pings: List[str] = None): 158 self._set_dates(definition) 159 self._set_definition(definition) 160 self._set_description(self.definition) 161 self._in_source = definition.get(self.in_source_key, False) 162 super().__init__(identifier, definition) 163 164 defn_pings = set( 165 [ 166 p 167 for d in definition[self.history_key] 168 for p in d.get("send_in_pings", ["metrics"]) 169 ] 170 ) 171 self.definition["send_in_pings"] = defn_pings 172 173 if pings is not None: 174 self._update_all_pings(pings)