Changeset 8aa20c7 in flexoentity
- Timestamp:
- 11/01/25 15:51:10 (2 months ago)
- Branches:
- master
- Children:
- 5c72356
- Parents:
- ca39274
- Files:
-
- 1 added
- 8 edited
-
README.md (modified) (4 diffs)
-
flexoentity/domain.py (modified) (2 diffs)
-
flexoentity/flexo_entity.py (modified) (14 diffs)
-
flexoentity/id_factory.py (modified) (3 diffs)
-
tests/conftest.py (modified) (1 diff)
-
tests/test_flexoid.py (added)
-
tests/test_id_lifecycle.py (modified) (10 diffs)
-
tests/test_id_stress.py (modified) (6 diffs)
-
tests/test_persistance_integrity.py (modified) (4 diffs)
Legend:
- Unmodified
- Added
- Removed
-
README.md
rca39274 r8aa20c7 78 78 79 79 *Highlights* 80 - =generate(domain, e type, estate, text, version=1)= → create a new ID80 - =generate(domain, entity_type, estate, text, version=1)= → create a new ID 81 81 - =next_version(oid)= → increment version safely 82 - =clone_new_base(domain, e type, estate, text)= → start a new lineage82 - =clone_new_base(domain, entity_type, estate, text)= → start a new lineage 83 83 - Deterministic prefix, state-dependent signature 84 84 … … 105 105 106 106 #+BEGIN_SRC python 107 entity = Question("AF", EntityType. QUESTION, "What is Ohm’s law?")107 entity = Question("AF", EntityType.ITEM, "What is Ohm’s law?") 108 108 json_str = entity.to_json() 109 109 reloaded = Question.from_json(json_str) … … 125 125 def __init__(self, domain, text): 126 126 self._text = text 127 super().__init__(domain, EntityType. QUESTION)127 super().__init__(domain, EntityType.ITEM) 128 128 129 129 @property … … 145 145 { 146 146 "domain": "AF", 147 "e type": "QUESTION",147 "entity_type": "QUESTION", 148 148 "text_seed": "What is Ohm’s law?", 149 149 "state": "APPROVED_AND_SIGNED", -
flexoentity/domain.py
rca39274 r8aa20c7 1 1 from dataclasses import dataclass 2 2 from flexoentity.flexo_entity import FlexoEntity, EntityType, EntityState 3 3 4 4 5 @dataclass … … 11 12 @classmethod 12 13 def default(cls): 13 return cls(domain="GEN", e type=EntityType.DATABASE, state=EntityState.DRAFT)14 return cls(domain="GEN", entity_type=EntityType.DOMAIN, state=EntityState.DRAFT) 14 15 15 16 def __post_init__(self): -
flexoentity/flexo_entity.py
rca39274 r8aa20c7 5 5 import json 6 6 import re 7 from enum import Enum, auto 7 from uuid import UUID 8 from enum import Enum 8 9 from dataclasses import dataclass, field 9 10 from typing import Optional … … 21 22 22 23 class EntityType(Enum): 23 QUESTION = auto() 24 MEDIA = auto() 25 CATALOG = auto() 26 EXAM = auto() 27 DATABASE = auto() 28 CERTIFICATE = auto() 29 DOMAIN = auto() 30 24 GENERIC = "G" 25 DOMAIN = "D" 26 MEDIA = "M" 27 ITEM = "I" 28 CATALOG = "C" 29 TEXT = "T" 30 HANDOUT = "H" 31 OUTPUT = "O" 32 RECORD = "R" 33 SESSION = "S" 34 USER = "U" 35 CONFIG = "F" 36 EVENT = "E" 37 38 @classmethod 39 def from_letter(cls, a_letter): 40 return cls(a_letter) 41 42 # FIXME: Add more mappings 31 43 def short(self) -> str: 32 44 mapping = { 33 EntityType.QUESTION: "Q",34 45 EntityType.MEDIA: "M", 35 46 EntityType.DOMAIN: "DOM", 36 47 EntityType.CATALOG: "CAT", 37 EntityType.EXAM: "EX",38 EntityType.DATABASE: "DB",39 EntityType.CERTIFICATE: "CERT"40 48 } 41 49 return mapping[self] … … 47 55 48 56 class EntityState(Enum): 49 DRAFT = auto() 50 APPROVED = auto() 51 APPROVED_AND_SIGNED = auto() 52 PUBLISHED = auto() 53 OBSOLETE = auto() 54 55 def short(self) -> str: 56 """ 57 Return a one-letter abbreviation for the state, used in Flex-O IDs. 58 """ 59 mapping = { 60 EntityState.DRAFT: "D", 61 EntityState.APPROVED: "A", 62 EntityState.APPROVED_AND_SIGNED: "S", 63 EntityState.PUBLISHED: "P", 64 EntityState.OBSOLETE: "O", 65 66 } 67 return mapping[self] 68 69 @classmethod 70 def from_short(cls, char: str): 71 """ 72 Inverse of .short(): restore the EntityState from its one-letter code. 73 """ 74 reverse = { 75 "D": cls.DRAFT, 76 "A": cls.APPROVED, 77 "S": cls.APPROVED_AND_SIGNED, 78 "P": cls.PUBLISHED, 79 "O": cls.OBSOLETE, 80 } 81 try: 82 return reverse[char.upper()] 83 except KeyError: 84 raise ValueError(f"Unknown state abbreviation: {char}") 57 DRAFT = "D" 58 APPROVED = "A" 59 APPROVED_AND_SIGNED = "S" 60 PUBLISHED = "P" 61 OBSOLETE = "O" 85 62 86 63 def __str__(self): … … 91 68 class FlexoEntity(ABC): 92 69 domain: str 93 etype: EntityType 70 entity_type: EntityType 71 subtype: str = "GENERIC" 94 72 state: EntityState 95 73 flexo_id: Optional[FlexOID] = field(default=None) 96 74 fingerprint: str = field(default_factory=str) 75 originator_id: UUID = field(default=UUID(int=0)) 76 owner_id: UUID = field(default=UUID(int=0)) 97 77 origin: Optional[str] = field(default=None) 98 78 99 OID_PATTERN = re.compile(100 r"^(?P<domain>[A-Z0-9]+)-(?P<etype>[A-Z]+)"101 r"(?P<date>\d{6,8})-(?P<hash>[0-9A-F]+)@(?P<version>\d{3})(?P<state>[A-Z])$"102 )103 104 @classmethod105 def from_string(cls, s: str) -> "FlexOID":106 """Rehydrate a FlexOID from its canonical string form."""107 m = cls.OID_PATTERN.match(s.strip())108 if not m:109 raise ValueError(f"Invalid FlexOID string: {s}")110 gd = m.groupdict()111 return cls(112 domain=gd["domain"],113 etype=gd["etype"],114 state=gd["state"],115 )116 79 @property 117 80 @abstractmethod … … 143 106 144 107 self.flexo_id = FlexOID.safe_generate(self.domain_code(), 145 self.e type.short(),146 self.state. short(),108 self.entity_type.value, 109 self.state.value, 147 110 self.text_seed, 148 111 1) … … 152 115 def __str__(self): 153 116 return ( 154 f"{self.e type.name}({self.flexo_id}, {self.state.name}, "117 f"{self.entity_type.name}({self.flexo_id}, {self.state.name}, " 155 118 f"fingerprint={self.fingerprint}..., v{self.version})" 156 119 ) … … 159 122 return { 160 123 "domain": self.domain_code(), 161 "e type": self.etype.name,124 "entity_type": self.entity_type.name, 162 125 "state": self.state.name, 163 126 "flexo_id": str(self.flexo_id), … … 173 136 domain_obj = Domain( 174 137 domain=abbrev, 175 e type=EntityType.DOMAIN,138 entity_type=EntityType.DOMAIN, 176 139 state=EntityState.DRAFT, # default when reconstructing context 177 140 ) 178 141 obj = cls( 179 142 domain=domain_obj, 180 e type=EntityType[data["etype"]],143 entity_type=EntityType[data["entity_type"]], 181 144 state=EntityState[data["state"]], 182 145 ) … … 221 184 self.fingerprint = new_fp 222 185 self.flexo_id = FlexOID.safe_generate(self.domain_code(), 223 self.e type.short(),224 self.state. short(),186 self.entity_type.value, 187 self.state.value, 225 188 self.text_seed, 226 189 self.flexo_id.version) … … 244 207 # special case: marking obsolete 245 208 if target_state == EntityState.OBSOLETE: 246 self.flexo_id = FlexOID (self.flexo_id.flexo_id[:-1] + "O")209 self.flexo_id = FlexOID.with_state(self.flexo_id, "O") 247 210 self.state = target_state 248 211 return … … 287 250 288 251 if self.state == EntityState.DRAFT: 289 new_version = self.flexo_id.version + 1290 252 new_fid = FlexOID.safe_generate(self.domain_code(), 291 self.e type.short(),292 EntityState.APPROVED. short(),253 self.entity_type.value, 254 EntityState.APPROVED.value, 293 255 self.text_seed, 294 version= new_version256 version=self.version 295 257 ) 296 self. previous_id= self.flexo_id # optional: keep audit trail258 self.origin = self.flexo_id # optional: keep audit trail 297 259 self.flexo_id = new_fid 298 260 self.state = EntityState.APPROVED … … 320 282 new_fid = FlexOID.safe_generate( 321 283 self.domain_code(), 322 self.e type.short(),323 EntityState.PUBLISHED. short(),284 self.entity_type.value, 285 EntityState.PUBLISHED.value, 324 286 self.text_seed, 325 287 version=new_version 326 288 ) 327 289 328 self. previous_id= self.flexo_id290 self.origin = self.flexo_id 329 291 self.flexo_id = new_fid 330 292 self.state = EntityState.PUBLISHED … … 346 308 self.flexo_id = FlexOID.clone_new_base( 347 309 self.domain_code(), 348 self.e type.short(),349 self.state. short(),310 self.entity_type.value, 311 self.state.value, 350 312 self.text_seed, 351 313 ) … … 362 324 canonical_seed(entity.text_seed).encode("utf-8"), digest_size=8 363 325 ).hexdigest().upper() 364 print(entity.fingerprint)365 print(expected_fp)366 326 return expected_fp == entity.fingerprint 367 327 -
flexoentity/id_factory.py
rca39274 r8aa20c7 1 1 """ 2 id_factory.py — Central Flex-O ID generator and versioning control (hardened). 3 4 Improvements: 5 - BLAKE2s hashing (modern, fast, stdlib) 6 - 6 hex-digit hash (≈16.7M combinations) 7 - UTC-based dates for consistency 8 - Collision disambiguator (-A, -B, ...) 9 - Canonical seed and content fingerprint helpers 2 id_factory.py — Central Flex-O ID generation and lifecycle logic. 3 4 I represent the identity backbone of the Flex-O ecosystem. 5 I define how entities receive, preserve, and evolve their unique identifiers 6 throughout their lifecycle. 7 8 Each Flex-O ID (FlexOID) is a self-contained string that encodes: 9 10 <DOMAIN>-<ETYPE><YYMMDD>-<HASH>@<VERSION><STATE> 11 12 Example: 13 GEN-ITEM251101-3139ACFAE38B@002A 14 15 where 16 DOMAIN — a short prefix identifying the origin domain (e.g., GEN) 17 ETYPE — a compact entity type code (e.g., ITEM) 18 YYMMDD — the UTC creation date 19 HASH — a 12-hex BLAKE2s digest derived from canonical content 20 VERSION — a three-digit lineage counter (001-999) 21 STATE — a single capital letter indicating lifecycle state 22 23 ─────────────────────────────────────────────────────────────────────────────── 24 Lifecycle semantics 25 ─────────────────────────────────────────────────────────────────────────────── 26 27 I distinguish clearly between *content evolution* and *workflow state*: 28 29 • Draft → Approved: 30 I generate a fully new FlexOID whose hash and version reflect 31 the new, stable content. This step marks the transition from 32 provisional to permanent identity. No extra version bump is required. 33 34 • Approved → Signed → Published → Obsolete: 35 These transitions do not change the content hash. 36 They represent confidence or visibility, not content. 37 The same FlexOID (same version) remains valid across them. 38 39 In short: 40 Version changes record evolution of content. 41 State changes record evolution of trust. 42 43 ─────────────────────────────────────────────────────────────────────────────── 44 Implementation notes 45 ─────────────────────────────────────────────────────────────────────────────── 46 47 • Hashing: I use modern BLAKE2s (6-byte digest) for deterministic, 48 collision-resistant identifiers. 49 50 • Canonicalization: I flatten entity data into a reproducible text form 51 so equivalent content always yields the same hash. 52 53 • Generation: I provide safe and deterministic factories 54 (FlexOID.generate, FlexOID.safe_generate) 55 that handle rare hash collisions by minimal salting. 56 57 • Validation: Every FlexOID validates itself through a strict regex pattern 58 and exposes lightweight accessors (domain, type, date, hash, 59 version, state). 60 61 ─────────────────────────────────────────────────────────────────────────────── 62 Philosophy 63 ─────────────────────────────────────────────────────────────────────────────── 64 65 I strive to be deterministic, minimal, and auditable. 66 My design favors immutability and clarity over flexibility. 67 A FlexOID never lies about its lineage: once created, it is final. 68 69 Version marks evolution of content. 70 State marks evolution of confidence. 10 71 """ 11 72 12 import logging 13 from datetime import datetime, timezone 73 import re 14 74 import hashlib 15 75 import secrets 16 import itertools17 76 import json 18 19 logger = logging.getLogger(__name__) 20 21 # ────────────────────────────────────────────────────────────────────────────── 22 # Canonicalization helpers 23 # ────────────────────────────────────────────────────────────────────────────── 24 77 from datetime import datetime, timezone 78 from logging import Logger 79 80 logger = Logger(__file__) 25 81 26 82 def canonical_seed(obj) -> str: 27 83 """ 28 Deterministically flatten an entity's core data into a string 29 for hashing and deduplication. 84 I transform *obj* into a deterministic, comparable text form. 85 86 I remove irrelevant formatting differences so that two equal 87 pieces of data always yield the same hash seed. 88 89 Rules: 90 - If *obj* is a string, I normalize whitespace. 91 - If *obj* is a dict, I JSON-encode it with sorted keys. 92 - If *obj* is an object, I recurse on its __dict__. 93 - Otherwise, I coerce *obj* to str(). 30 94 """ 95 31 96 if isinstance(obj, str): 32 97 text = " ".join(obj.split()) … … 38 103 return str(obj) 39 104 40 # ────────────────────────────────────────────────────────────────────────────── 41 # ID Factory 42 # ────────────────────────────────────────────────────────────────────────────── 43 44 45 class FlexOID: 105 106 class FlexOID(str): 107 """ 108 I represent a canonical textual identifier within the Flex-O ecosystem. 109 i I am immutable and behave like a normal string, but I understand 110 my own internal structure. I can reveal my parts, create new 111 versions, and switch between lifecycle states. 112 """ 113 46 114 MAX_VERSION = 999 47 115 WARN_THRESHOLD = 900 48 116 49 50 # keep in-memory registry for same-session collisions (optional) 51 # NOTE: We might remove this soon 52 _seen_hashes = set() 117 OID_PATTERN = re.compile( 118 r"^(?P<domain>[A-Z0-9]+)-" 119 r"(?P<etype>[A-Z0-9]+)" 120 r"(?P<date>\d{6})-" 121 r"(?P<hash>[A-F0-9]+)@" 122 r"(?P<version>\d{3})" 123 r"(?P<state>[A-Z])$" 124 ) 125 126 def __new__(cls, value: str): 127 """ 128 I create a new validated Flex-O ID from *value*. 129 I verify that the given string matches the required pattern 130 and remember the parsed match for later access. 131 """ 132 m = cls.OID_PATTERN.match(value) 133 if not m: 134 raise ValueError(f"Invalid FlexOID format: {value}") 135 obj = super().__new__(cls, value) 136 obj._m = m 137 return obj 138 139 # ─────────────────────────────────────────── 140 # Parsed accessors 141 # ─────────────────────────────────────────── 142 @property 143 def domain(self) -> str: 144 """I answer the domain prefix (e.g., 'GEN').""" 145 return self._m.group("domain") 146 147 @property 148 def entity_type(self) -> str: 149 """I answer the short entity-type code (e.g., 'ITEM').""" 150 return self._m.group("etype") 151 152 @property 153 def date_str(self) -> str: 154 """I answer the YYMMDD creation date as a string.""" 155 return self._m.group("date") 156 157 @property 158 def date(self) -> date: 159 """I answer the creation date as a `datetime.date` instance.""" 160 return datetime.strptime(self.date_str, "%y%m%d").date() 161 162 @property 163 def hash_part(self) -> str: 164 """I answer the twelve-hex BLAKE2s digest that stabilizes my prefix.""" 165 return self._m.group("hash") 166 167 @property 168 def version(self) -> int: 169 """I answer my numeric version (1–999).""" 170 return int(self._m.group("version")) 171 172 @property 173 def state_code(self) -> str: 174 """I answer my one-letter lifecycle state code (e.g., 'D', 'A', 'S').""" 175 return self._m.group("state") 176 177 @property 178 def prefix(self) -> str: 179 """I answer everything before the '@' symbol — my immutable lineage prefix.""" 180 return self.split('@', 1)[0] 181 182 # ─────────────────────────────────────────── 183 # Transformations 184 # ─────────────────────────────────────────── 185 def with_state(self, new_state: str) -> "FlexOID": 186 """ 187 I create a copy of myself with the same version but a new state letter. 188 The given *new_state* must be a single capital letter. 189 """ 190 if not (isinstance(new_state, str) and len(new_state) == 1 and new_state.isalpha()): 191 raise ValueError("State must be a single capital letter.") 192 return FlexOID(f"{self.prefix}@{self.version:03d}{new_state}") 193 194 def __repr__(self): 195 """I display myself in a developer-friendly representation.""" 196 return f"FlexOID({str(self)})" 197 198 # ─────────────────────────────────────────── 199 # Generation helpers 200 # ─────────────────────────────────────────── 201 @staticmethod 202 def _blake_hash(text: str) -> str: 203 """ 204 I compute a 12-hex-digit BLAKE2s digest for *text*. 205 206 I am modern, fast, and collision-resistant. 207 My output is deterministic and stable across platforms. 208 """ 209 210 return hashlib.blake2s(text.encode("utf-8"), digest_size=6).hexdigest().upper() 211 212 @staticmethod 213 def generate(domain: str, entity_type: str, estate: str, text: str, version: int = 1): 214 """ 215 I create a new deterministic Flex-O ID. 216 217 I combine the domain, entity type, and canonicalized *text* 218 into a stable BLAKE2s hash. My prefix therefore remains 219 unchanged when only the state or version changes. 220 """ 221 if not (1 <= version <= FlexOID.MAX_VERSION): 222 raise ValueError(f"Version {version} exceeds limit; mark obsolete.") 223 224 if not (isinstance(estate, str) and len(estate) == 1 and estate.isalpha()): 225 raise ValueError("estate must be a single capital letter.") 226 227 date_part = datetime.now(timezone.utc).strftime("%y%m%d") 228 hash_seed = canonical_seed(f"{domain}:{entity_type}:{canonical_seed(text)}") 229 base_hash = FlexOID._blake_hash(hash_seed) 230 return FlexOID(f"{domain}-{entity_type}{date_part}-{base_hash}@{version:03d}{estate}") 231 232 @staticmethod 233 def safe_generate(domain, entity_type, estate, text, version=1, repo=None): 234 """ 235 I create a new deterministic ID like `generate`, 236 but I also consult an optional *repo* to avoid hash collisions. 237 238 If a different seed has already produced the same prefix, 239 I deterministically salt my seed and regenerate a unique ID. 240 """ 241 domain_code = getattr(domain, "domain", domain) 242 oid = FlexOID.generate(domain_code, entity_type, estate, text, version=version) 243 244 if repo is None: 245 return oid 246 247 existing = repo.get(str(oid)) if hasattr(repo, "get") else repo.get(oid) 248 if not existing: 249 return oid 250 251 try: 252 same_seed = ( 253 getattr(existing, "text_seed", None) == text 254 or getattr(existing, "canonical_seed", lambda: None)() == canonical_seed(text) 255 ) 256 except Exception: 257 same_seed = False 258 259 if same_seed: 260 return oid 261 262 logger.warning(f"FlexOID collision detected for {oid}") 263 salt = secrets.token_hex(1) 264 salted_text = f"{text}|salt:{salt}" 265 return FlexOID.generate(domain_code, entity_type, estate, salted_text, version=version) 53 266 54 267 @classmethod 55 def from_string(cls, id_str: str): 56 return cls(id_str) 268 def next_version(cls, oid: "FlexOID") -> "FlexOID": 269 """ 270 I create the next version within the same lineage. 271 I increment the numeric version but keep the same prefix and state. 272 """ 273 new_ver = oid.version + 1 274 if new_ver > cls.WARN_THRESHOLD and new_ver < cls.MAX_VERSION: 275 logger.warning(f"{oid} approaching obsolescence ({new_ver}/999).") 276 if new_ver > cls.MAX_VERSION: 277 raise RuntimeError(f"{oid} exceeded {cls.MAX_VERSION}; mark obsolete.") 278 return FlexOID(f"{oid.prefix}@{new_ver:03d}{oid.state_code}") 57 279 58 280 @classmethod 59 def from_oid_and_version(cls, oid, version: int): 281 def from_oid_and_version(cls, oid, version): 282 """ 283 I recreate *oid* with an explicitly given version number. 284 I keep its prefix and state, but replace the numeric counter. 285 """ 60 286 if not (1 <= version <= cls.MAX_VERSION): 61 287 raise ValueError(f"Version {version} out of bounds (1..{cls.MAX_VERSION}).") 62 288 return FlexOID(f"{oid.prefix}@{version:03d}{oid.state_code}") 63 289 64 def __init__(self, flexo_id: str):65 self.flexo_id = flexo_id66 67 def __eq__(self, other):68 if not isinstance(other, FlexOID):69 return NotImplemented70 return self.flexo_id == other.flexo_id71 72 def __lt__(self, other):73 if not isinstance(other, FlexOID):74 return NotImplemented75 if self.prefix != other.prefix:76 raise ValueError("Cannot order FlexOIDs from different prefixes")77 return self.version < other.version78 79 def __hash__(self):80 return hash(self.flexo_id)81 82 290 @staticmethod 83 def _blake_hash(text: str) -> str: 84 """Return a 12-hex BLAKE2s digest.""" 85 return hashlib.blake2s(text.encode("utf-8"), 86 digest_size=6).hexdigest().upper() # 6 bytes → 12 hex 87 88 @staticmethod 89 def safe_generate(domain, etype, estate, text, version=1, repo=None): 90 """ 91 Generate a new FlexOID with deterministic hashing, handling rare collisions. 92 """ 93 94 # Normalize domain (Domain object or string) 95 domain_code = getattr(domain, "domain", domain) 96 97 # Generate the deterministic candidate OID 98 oid = FlexOID.generate(domain_code, etype, estate, text, version=version) 99 100 # Collision detection — only if a repository is available 101 if repo is not None: 102 existing = repo.get(str(oid)) if hasattr(repo, "get") else repo.get(oid) 103 else: 104 existing = None 105 106 if existing: 107 try: 108 same_seed = existing.text_seed == text or \ 109 existing.canonical_seed() == canonical_seed(text) 110 except Exception: 111 same_seed = False 112 113 if not same_seed: 114 # Collision detected — regenerate deterministically 115 print("Collision detected", len(repo)) 116 logger.warning(f"FlexOID collision detected for {oid}") 117 118 # (A) refresh date 119 date_part = datetime.now(timezone.utc).strftime("%y%m%d") 120 121 # (B) add minimal deterministic salt (2 hex chars) 122 salt = secrets.token_hex(1) 123 salted_text = f"{text}|salt:{salt}" 124 125 # (C) generate new OID with new date and salted seed 126 oid = FlexOID.generate( 127 domain_code, 128 etype, 129 estate, 130 salted_text, 131 version=version, 132 ) 133 134 # (D) record lineage if the caller has `origin` tracking 135 if hasattr(existing, "flexo_id"): 136 logger.info(f"New lineage created from {existing.flexo_id}") 137 138 return oid 139 140 @staticmethod 141 def generate(domain: str, etype: str, estate: str, text: str, 142 version: int = 1): 143 """ 144 Generate a deterministic Flex-O ID. 145 146 - The hash (and therefore prefix) depends only on domain, etype, and text. 147 → Prefix stays stable across state changes. 148 """ 149 150 if not (1 <= version <= FlexOID.MAX_VERSION): 151 raise ValueError(f"Version {version} exceeds limit; mark obsolete.") 152 153 date_part = datetime.now(timezone.utc).strftime("%y%m%d") 154 155 # state-independent hash seed → prefix stability 156 hash_seed = canonical_seed(f"{domain}:{etype}:{text}") 157 base_hash = FlexOID._blake_hash(hash_seed) 158 159 ver_part = f"{version:03d}{estate}" 160 flexo_id_str = f"{domain}-{etype}{date_part}-{base_hash}@{ver_part}" 161 162 return FlexOID(flexo_id_str) 163 164 @property 165 def state_code(self): 166 part = self.flexo_id.rsplit("@", 1)[-1] 167 168 if not (part and part[-1].isalpha()): 169 raise ValueError(f"Invalid Flex-O ID format: {self.flexo_id}") 170 return part[-1] 171 172 @property 173 def domain(self) -> str: 174 """Return the domain prefix (e.g., 'AF').""" 175 try: 176 return self.flexo_id.split('-', 1)[0] 177 except IndexError: 178 raise ValueError(f"Malformed Flex-O ID: {self.flexo_id}") 179 180 @property 181 def etype(self) -> str: 182 """Return the entity type code (e.g., 'Q', 'CAT', etc.).""" 183 try: 184 part = self.flexo_id.split('-', 1)[1] 185 return ''.join(filter(str.isalpha, part.split('-')[0])) # up to first dash 186 except IndexError: 187 raise ValueError(f"Malformed Flex-O ID: {self.flexo_id}") 188 189 @property 190 def date_str(self) -> str: 191 """Return the YYMMDD creation date as string.""" 192 try: 193 part = self.flexo_id.split('-', 1)[1] 194 # e.g. "Q251019" → skip type prefix, take next 6 digits 195 digits = ''.join(ch for ch in part if ch.isdigit()) 196 return digits[:6] 197 except IndexError: 198 raise ValueError(f"Malformed Flex-O ID: {self.flexo_id}") 199 200 @property 201 def date(self) -> datetime: 202 """Return the creation date as datetime.date object (UTC, naive).""" 203 try: 204 ds = self.date_str 205 return datetime.strptime(ds, "%y%m%d").date() 206 except Exception as e: 207 raise ValueError(f"Invalid date in Flex-O ID: {self.flexo_id}") from e 208 209 @property 210 def hash_part(self) -> str: 211 """Return the 6-hex BLAKE hash portion (e.g., '9B3E2').""" 212 try: 213 after_dash = self.flexo_id.split('-', 2)[2] 214 return after_dash.split('@')[0] 215 except IndexError: 216 raise ValueError(f"Malformed Flex-O ID: {self.flexo_id}") 217 218 @property 219 def suffix(self) -> str: 220 """Return the full suffix after '@' (e.g., '001A').""" 221 try: 222 return self.flexo_id.split('@', 1)[1] 223 except IndexError: 224 raise ValueError(f"Malformed Flex-O ID: {self.flexo_id}") 225 226 @property 227 def version(self) -> int: 228 try: 229 return int(self.suffix[:-1]) # drop state suffix 230 except (ValueError, IndexError): 231 return 1 232 233 @property 234 def prefix(self) -> str: 235 # nur bis einschließlich Hash-Teil 236 return self.flexo_id.split('@', 1)[0] 237 # return self.flexo_id.split('@')[0].rsplit('-', 1)[0] 291 def clone_new_base(domain: str, entity_type: str, estate: str, text: str) -> "FlexOID": 292 """ 293 I start a completely new lineage (version 1) for a derived entity. 294 I am used when copying or forking an existing object that should 295 not share version history with its origin. 296 """ 297 return FlexOID.safe_generate(domain, entity_type, estate, text, version=1) 238 298 239 299 def parsed(self) -> dict: 240 """Return a structured breakdown of the Flex-O ID.""" 300 """ 301 I answer a dictionary that describes all my components — 302 useful for debugging, serialization tests, or human inspection. 303 """ 241 304 return { 242 305 "domain": self.domain, 243 "e type": self.etype,306 "entity_type": self.entity_type, 244 307 "date": self.date, 245 308 "hash": self.hash_part, … … 247 310 "state": self.state_code, 248 311 } 249 250 @classmethod251 def next_version(cls, oid) -> str:252 """253 Create the next version in the same ID lineage.254 255 Increments the version counter of an existing FlexOID while preserving256 its prefix. Used when an entity transitions to a257 new revision within the same lifecycle (e.g., minor updates or approvals).258 259 Parameters260 ----------261 oid : FlexOID262 The existing ID whose version is to be incremented.263 264 Returns265 -------266 FlexOID267 A new Flex-O ID with the same prefix, but version +1.268 269 Raises270 ------271 RuntimeError272 If the maximum allowed version (`MAX_VERSION`) is exceeded.273 274 Notes275 -----276 - Warnings are logged when the version approaches obsolescence.277 """278 new_ver = oid.version + 1279 280 if new_ver > cls.WARN_THRESHOLD and new_ver < cls.MAX_VERSION:281 logger.warning(f"{oid} approaching obsolescence ({new_ver}/999).")282 if new_ver > cls.MAX_VERSION:283 raise RuntimeError(f"{oid} exceeded {cls.MAX_VERSION}; mark obsolete.")284 285 new_id = f"{oid.prefix}@{new_ver:03d}{oid.state_code}"286 return cls(new_id)287 288 @staticmethod289 def clone_new_base(domain: str, etype: str, estate: str, text: str):290 """291 Start a new Flex-O ID lineage for a derived or duplicated entity.292 293 This helper creates a completely new base ID (version 1) using the given294 parameters, instead of incrementing an existing version chain. It is used295 when an entity is copied, forked, or conceptually replaced by a new one.296 297 Returns298 -------299 FlexOID300 A new base ID starting at version 1, unrelated to the original lineage.301 302 Notes303 -----304 - Equivalent to calling `generate(..., version=1)` explicitly.305 - Used when creating "clones" or "variants" that should not share version history.306 """307 return FlexOID.safe_generate(domain, etype, estate, text, version=1)308 309 def __str__(self):310 return self.flexo_id311 312 def __repr__(self):313 return f"<FlexOID {self.flexo_id}>"314 -
tests/conftest.py
rca39274 r8aa20c7 1 # tests/conftest.py 2 1 # tests/stubs/single_choice_question.py 3 2 import pytest 4 import json 3 from datetime import datetime 4 from dataclasses import dataclass, field 5 from typing import List 5 6 from flexoentity import FlexoEntity, EntityType, EntityState, Domain 6 7 7 import pytest 8 import json 9 from flexoentity import EntityType, EntityState, Domain 10 from builder.questions import RadioQuestion, AnswerOption # adjust path if different 11 from builder.media_items import NullMediaItem # adjust import if needed 8 @pytest.fixture 9 def fixed_datetime(monkeypatch): 10 class FixedDate(datetime): 11 @classmethod 12 def now(cls, tz=None): 13 return datetime(2025, 11, 1, tzinfo=tz) 14 monkeypatch.setattr("flexoentity.id_factory.datetime", FixedDate) 15 return FixedDate 12 16 13 17 14 @pytest.fixture(scope="session") 15 def domain(): 16 """Provide a reusable domain for all entity tests.""" 17 return Domain( 18 domain="SIG", 19 etype=EntityType.DOMAIN, 20 state=EntityState.DRAFT, 21 fullname="Signal Corps", 22 description="Questions related to communications and signaling systems.", 23 classification="RESTRICTED", 24 owner="test-suite" 25 ) 18 @dataclass 19 class AnswerOption: 20 id: str 21 text: str 22 points: float = 0.0 23 24 def to_dict(self): 25 return {"id": self.id, "text": self.text, "points": self.points} 26 27 @classmethod 28 def from_dict(cls, data): 29 return cls( 30 id=data.get("id", ""), 31 text=data.get("text", ""), 32 points=data.get("points", 0.0) 33 ) 26 34 27 35 28 @pytest.fixture 29 def radio_question(domain): 30 """Return a simple RadioQuestion entity for testing FlexoEntity logic.""" 31 q = RadioQuestion( 32 domain=domain, 33 etype=EntityType.QUESTION, 34 state=EntityState.DRAFT, 35 text="Which frequency band is used for shortwave communication?", 36 options=[ 37 AnswerOption(id="opt1", text="HF (3–30 MHz)", points=1), 38 AnswerOption(id="opt2", text="VHF (30–300 MHz)", points=0), 39 AnswerOption(id="opt3", text="UHF (300–3000 MHz)", points=0), 40 ] 41 ) 42 return q 36 @dataclass 37 class SingleChoiceQuestion(FlexoEntity): 38 """A minimal stub to test FlexoEntity integration.""" 39 text: str = "" 40 options: List[AnswerOption] = field(default_factory=list) 43 41 44 42 45 @pytest.fixture 46 def serialized_question(radio_question): 47 """Provide the serialized JSON form for roundtrip tests.""" 48 return radio_question.to_json() 43 @classmethod 44 def default(cls): 45 return cls(domain=Domain(domain="GEN", 46 entity_type=EntityType.DOMAIN, 47 state=EntityState.DRAFT), 48 state=EntityState.DRAFT, entity_type=EntityType.ITEM) 49 49 50 def to_dict(self): 51 base = super().to_dict() 52 base.update({ 53 "text": self.text, 54 "options": [opt.to_dict() for opt in self.options], 55 }) 56 return base 57 58 @property 59 def text_seed(self) -> str: 60 """Include answer options (and points) for deterministic ID generation.""" 61 62 joined = "|".join( 63 f"{opt.text.strip()}:{opt.points}" 64 for opt in sorted(self.options, key=lambda o: o.text.strip().lower()) 65 ) 66 return f"{self.text}{joined}" 67 68 @classmethod 69 def from_dict(cls, data): 70 obj = cls( 71 text=data.get("text", ""), 72 options=[AnswerOption.from_dict(o) for o in data.get("options", [])], 73 ) 74 # restore FlexoEntity core fields 75 obj.domain = data.get("domain") 76 obj.entity_type = EntityType[data.get("etype")] if "etype" in data else EntityType.ITEM 77 obj.state = EntityState[data.get("state")] if "state" in data else EntityState.DRAFT 78 if "flexo_id" in data: 79 from flexoentity import FlexOID 80 obj.flexo_id = FlexOID.parsed(data["flexo_id"]) 81 return obj 50 82 51 83 @pytest.fixture 52 def deserialized_question(serialized_question): 53 """Recreate a question from JSON for consistency tests.""" 54 return RadioQuestion.from_json(serialized_question) 55 84 def domain(): 85 return Domain.default() 56 86 57 87 @pytest.fixture 58 def null_media(): 59 """Provide a default NullMediaItem instance for media tests.""" 60 return NullMediaItem( 61 domain=domain, 62 etype=EntityType.MEDIA, 63 state=EntityState.DRAFT 64 ) 88 def sample_question(): 89 return SingleChoiceQuestion(domain=Domain.default(), 90 text="What is 2 + 2?", 91 options=[], 92 entity_type=EntityType.ITEM, 93 state=EntityState.DRAFT) -
tests/test_id_lifecycle.py
rca39274 r8aa20c7 1 1 import pytest 2 from flexoentity import FlexOID, FlexoEntity, Entity Type, EntityState2 from flexoentity import FlexOID, FlexoEntity, EntityState 3 3 4 4 5 5 # ────────────────────────────────────────────────────────────────────────────── 6 # Tests adapted to use real RadioQuestion fixture instead of DummyEntity6 # Tests adapted to use real SingleChoiceQuestion fixture instead of DummyEntity 7 7 # ────────────────────────────────────────────────────────────────────────────── 8 8 9 def test_initial_state( radio_question):10 q = radio_question9 def test_initial_state(sample_question): 10 q = sample_question 11 11 assert q.state == EntityState.DRAFT 12 12 assert q.flexo_id.version == 1 … … 14 14 15 15 16 def test_approval_ bumps_version(radio_question):17 q = radio_question16 def test_approval_does_not_bump_version(sample_question): 17 q = sample_question 18 18 q.approve() 19 19 assert q.state == EntityState.APPROVED 20 assert q.flexo_id.version == 220 assert q.flexo_id.version == 1 21 21 22 22 23 def test_signing_bumps_version( radio_question):24 q = radio_question23 def test_signing_bumps_version(sample_question): 24 q = sample_question 25 25 q.approve() 26 26 v_before = str(q.flexo_id) … … 30 30 31 31 32 def test_publish_bumps_version( radio_question):33 q = radio_question32 def test_publish_bumps_version(sample_question): 33 q = sample_question 34 34 q.approve() 35 35 q.sign() … … 40 40 41 41 42 def test_modify_content_changes_fingerprint( radio_question):43 q = radio_question44 q.text = "Rephrased content" # simulate text change42 def test_modify_content_changes_fingerprint(sample_question): 43 q = sample_question 44 q.text += "Rephrased content" # simulate text change 45 45 changed = q._update_fingerprint() 46 46 assert changed 47 47 48 48 49 def test_no_version_bump_on_draft_edits( radio_question):50 q = radio_question49 def test_no_version_bump_on_draft_edits(sample_question): 50 q = sample_question 51 51 q.text = "Minor draft edit" 52 52 q._update_fingerprint() … … 54 54 55 55 56 def test_version_bump_after_edit_and_sign( radio_question):57 q = radio_question56 def test_version_bump_after_edit_and_sign(sample_question): 57 q = sample_question 58 58 q.approve() 59 59 v1 = str(q.flexo_id) … … 63 63 64 64 65 def test_integrity_check_passes_and_fails( radio_question):66 q = radio_question65 def test_integrity_check_passes_and_fails(sample_question): 66 q = sample_question 67 67 q.approve() 68 68 assert FlexoEntity.verify_integrity(q) … … 73 73 74 74 75 def test_obsolete_state( radio_question):76 q = radio_question75 def test_obsolete_state(sample_question): 76 q = sample_question 77 77 q.approve() 78 78 q.sign() … … 82 82 83 83 84 def test_clone_new_base_resets_lineage( radio_question):85 q = radio_question84 def test_clone_new_base_resets_lineage(sample_question): 85 q = sample_question 86 86 q.approve() 87 87 q.sign() … … 94 94 assert q.flexo_id.version == 1 95 95 96 def test_clone_new_base_sets_origin( radio_question):97 q = radio_question96 def test_clone_new_base_sets_origin(sample_question): 97 q = sample_question 98 98 q.approve() 99 99 q.sign() … … 107 107 assert q.flexo_id != old_id 108 108 109 def test_mass_version_increments_until_obsolete( radio_question):110 q = radio_question109 def test_mass_version_increments_until_obsolete(sample_question): 110 q = sample_question 111 111 q.approve() 112 for _ in range(FlexOID.MAX_VERSION - 2):112 for _ in range(FlexOID.MAX_VERSION - 1): 113 113 q.bump_version() 114 114 with pytest.raises(RuntimeError, match="mark obsolete"): -
tests/test_id_stress.py
rca39274 r8aa20c7 4 4 """ 5 5 6 import copy 7 import logging 8 import random 9 6 10 import pytest 7 import random 8 import logging 11 9 12 from flexoentity import FlexOID, EntityType, EntityState 10 from builder.questions import RadioQuestion, AnswerOption11 13 12 14 logger = logging.getLogger(__name__) … … 18 20 via salt + date adjustment. 19 21 """ 20 e type = EntityType.QUESTION22 entity_type = EntityType.ITEM 21 23 estate = EntityState.DRAFT 22 24 seeds = [f"question {i}" for i in range(100000)] … … 31 33 ids = [] 32 34 for seed in seeds: 33 oid = FlexOID.safe_generate(domain.domain, e type, estate, seed, repo=repo)35 oid = FlexOID.safe_generate(domain.domain, entity_type.value, estate.value, seed, repo=repo) 34 36 assert isinstance(oid, FlexOID) 35 37 ids.append(str(oid)) … … 46 48 47 49 # Sanity check: IDs should look canonical 48 assert all(id_str.startswith(" SIG-") for id_str in ids)50 assert all(id_str.startswith("GEN") for id_str in ids) 49 51 assert all("@" in id_str for id_str in ids) 50 52 … … 54 56 (No runtime disambiguation; IDs are deterministic by design.) 55 57 """ 56 e type = EntityType.QUESTION58 entity_type = EntityType.ITEM 57 59 estate = EntityState.DRAFT 58 60 text = "identical question text" 59 61 60 id1 = FlexOID.generate(domain.domain, e type, estate, text)61 id2 = FlexOID.generate(domain.domain, e type, estate, text)62 id1 = FlexOID.generate(domain.domain, entity_type.value, estate.value, text) 63 id2 = FlexOID.generate(domain.domain, entity_type.value, estate.value, text) 62 64 # IDs must be identical because generation is deterministic 63 65 assert id1 == id2 64 66 65 67 66 def test_id_reproducibility_across_runs(domain):67 """68 The same seed on a new process (fresh _seen_hashes)69 should yield the same base ID (without suffix).70 """71 etype = EntityType.CATALOG72 estate = EntityState.DRAFT73 seed = "reproducibility test seed"68 # def test_id_reproducibility_across_runs(domain): 69 # """ 70 # The same seed on a new process (fresh _seen_hashes) 71 # should yield the same base ID (without suffix). 72 # """ 73 # entity_type = EntityType.CATALOG 74 # estate = EntityState.DRAFT 75 # seed = "reproducibility test seed" 74 76 75 id1 = FlexOID.generate(domain.domain, etype, estate, seed)76 FlexOID._seen_hashes.clear()77 id2 = FlexOID.generate(domain.domain, etype, estate, seed)77 # id1 = FlexOID.generate(domain.domain, entity_type.value, estate.value, seed) 78 # FlexOID._seen_hashes.clear() 79 # id2 = FlexOID.generate(domain.domain, entity_type.value, estate.value, seed) 78 80 79 assert id1 == id281 # assert id1 == id2 80 82 81 83 82 def test_version_ceiling_enforcement( radio_question):84 def test_version_ceiling_enforcement(sample_question): 83 85 """Simulate approaching @999 to trigger obsolescence guard.""" 84 q = radio_question86 q = sample_question 85 87 q.approve() 86 88 … … 97 99 98 100 99 def test_massive_lifecycle_simulation( domain):101 def test_massive_lifecycle_simulation(sample_question): 100 102 """ 101 Generate 100 random RadioQuestions, simulate multiple edits and state transitions,103 Generate 100 random SingleChoiceQuestions, simulate multiple edits and state transitions, 102 104 ensure all final IDs and fingerprints are unique and valid. 103 105 """ 104 106 entities = [ 105 RadioQuestion( 106 domain=domain, 107 etype=EntityType.QUESTION, 108 state=EntityState.DRAFT, 109 text=f"random question {i}", 110 options=[ 111 AnswerOption(id="opt4", text="HF (3–30 MHz)", points=1), 112 AnswerOption(id="opt5", text="VHF (30–300 MHz)", points=0), 113 ], 114 ) 115 for i in range(100) 107 copy.deepcopy(sample_question) for _ in range(100) 116 108 ] 117 109 118 for e in entities:110 for i, e in enumerate(entities): 119 111 # random edit 120 e.text += " updated"112 e.text += f" updated #{i}" 121 113 e._update_fingerprint() 122 114 -
tests/test_persistance_integrity.py
rca39274 r8aa20c7 6 6 import pytest 7 7 8 from builder.questions import RadioQuestion, AnswerOption9 8 from flexoentity import EntityState, EntityType, Domain 10 9 11 10 @pytest.fixture 12 11 def approved_question(): 13 """Provide a fully approved and published RadioQuestion for persistence tests."""14 q = RadioQuestion(15 domain=Domain(domain="GEN", e type=EntityType.DOMAIN, state=EntityState.DRAFT),16 e type=None, # RadioQuestion sets this internally to EntityType.QUESTION12 """Provide a fully approved and published SingleChoiceQuestion for persistence tests.""" 13 q = SingleChoiceQuestion( 14 domain=Domain(domain="GEN", entity_type=EntityType.DOMAIN, state=EntityState.DRAFT), 15 entity_type=None, # SingleChoiceQuestion sets this internally to EntityType.ITEM 17 16 state=EntityState.DRAFT, 18 17 text="What is Ohm’s law?", … … 36 35 json_str = approved_question.to_json() 37 36 print("JSON", json_str) 38 loaded = RadioQuestion.from_json(json_str)37 loaded = SingleChoiceQuestion.from_json(json_str) 39 38 40 39 print("Approved", approved_question.text_seed) 41 40 print("Loaded", loaded.text_seed) 42 41 # Fingerprint and state should match — integrity must pass 43 assert RadioQuestion.verify_integrity(loaded)42 assert SingleChoiceQuestion.verify_integrity(loaded) 44 43 45 44 # Metadata should be preserved exactly … … 56 55 tampered_json = json.dumps(tampered) 57 56 58 loaded = RadioQuestion.from_json(tampered_json)59 assert not RadioQuestion.verify_integrity(loaded)57 loaded = SingleChoiceQuestion.from_json(tampered_json) 58 assert not SingleChoiceQuestion.verify_integrity(loaded) 60 59 61 60 @pytest.mark.skip(reason="FlexOIDs regenerated on import; corruption detection not yet applicable") … … 70 69 file.write_text(corrupted) 71 70 72 loaded = RadioQuestion.from_json(file.read_text())73 assert not RadioQuestion.verify_integrity(loaded)71 loaded = SingleChoiceQuestion.from_json(file.read_text()) 72 assert not SingleChoiceQuestion.verify_integrity(loaded)
Note:
See TracChangeset
for help on using the changeset viewer.
