Changeset 8aa20c7 in flexoentity


Ignore:
Timestamp:
11/01/25 15:51:10 (2 months ago)
Author:
Enrico Schwass <ennoausberlin@…>
Branches:
master
Children:
5c72356
Parents:
ca39274
Message:

full refactoring of FlexOID

Files:
1 added
8 edited

Legend:

Unmodified
Added
Removed
  • README.md

    rca39274 r8aa20c7  
    7878
    7979*Highlights*
    80 - =generate(domain, etype, estate, text, version=1)= → create a new ID
     80- =generate(domain, entity_type, estate, text, version=1)= → create a new ID
    8181- =next_version(oid)= → increment version safely
    82 - =clone_new_base(domain, etype, estate, text)= → start a new lineage
     82- =clone_new_base(domain, entity_type, estate, text)= → start a new lineage
    8383- Deterministic prefix, state-dependent signature
    8484
     
    105105
    106106#+BEGIN_SRC python
    107 entity = Question("AF", EntityType.QUESTION, "What is Ohm’s law?")
     107entity = Question("AF", EntityType.ITEM, "What is Ohm’s law?")
    108108json_str = entity.to_json()
    109109reloaded = Question.from_json(json_str)
     
    125125    def __init__(self, domain, text):
    126126        self._text = text
    127         super().__init__(domain, EntityType.QUESTION)
     127        super().__init__(domain, EntityType.ITEM)
    128128
    129129    @property
     
    145145{
    146146  "domain": "AF",
    147   "etype": "QUESTION",
     147  "entity_type": "QUESTION",
    148148  "text_seed": "What is Ohm’s law?",
    149149  "state": "APPROVED_AND_SIGNED",
  • flexoentity/domain.py

    rca39274 r8aa20c7  
    11from dataclasses import dataclass
    22from flexoentity.flexo_entity import FlexoEntity, EntityType, EntityState
     3
    34
    45@dataclass
     
    1112    @classmethod
    1213    def default(cls):
    13         return cls(domain="GEN", etype=EntityType.DATABASE, state=EntityState.DRAFT)
     14        return cls(domain="GEN", entity_type=EntityType.DOMAIN, state=EntityState.DRAFT)
    1415
    1516    def __post_init__(self):
  • flexoentity/flexo_entity.py

    rca39274 r8aa20c7  
    55import json
    66import re
    7 from enum import Enum, auto
     7from uuid import UUID
     8from enum import Enum
    89from dataclasses import dataclass, field
    910from typing import Optional
     
    2122
    2223class EntityType(Enum):
    23     QUESTION = auto()
    24     MEDIA = auto()
    25     CATALOG = auto()
    26     EXAM = auto()
    27     DATABASE = auto()
    28     CERTIFICATE = auto()
    29     DOMAIN = auto()
    30 
     24    GENERIC =  "G"
     25    DOMAIN = "D"
     26    MEDIA = "M"
     27    ITEM = "I"
     28    CATALOG = "C"
     29    TEXT = "T"
     30    HANDOUT = "H"
     31    OUTPUT = "O"
     32    RECORD = "R"
     33    SESSION = "S"
     34    USER = "U"
     35    CONFIG = "F"
     36    EVENT = "E"
     37
     38    @classmethod
     39    def from_letter(cls, a_letter):
     40        return cls(a_letter)
     41
     42    # FIXME: Add more mappings
    3143    def short(self) -> str:
    3244        mapping = {
    33             EntityType.QUESTION: "Q",
    3445            EntityType.MEDIA: "M",
    3546            EntityType.DOMAIN: "DOM",
    3647            EntityType.CATALOG:  "CAT",
    37             EntityType.EXAM:     "EX",
    38             EntityType.DATABASE: "DB",
    39             EntityType.CERTIFICATE: "CERT"
    4048        }
    4149        return mapping[self]
     
    4755
    4856class EntityState(Enum):
    49     DRAFT = auto()
    50     APPROVED = auto()
    51     APPROVED_AND_SIGNED = auto()
    52     PUBLISHED = auto()
    53     OBSOLETE = auto()
    54 
    55     def short(self) -> str:
    56         """
    57         Return a one-letter abbreviation for the state, used in Flex-O IDs.
    58         """
    59         mapping = {
    60             EntityState.DRAFT: "D",
    61             EntityState.APPROVED: "A",
    62             EntityState.APPROVED_AND_SIGNED: "S",
    63             EntityState.PUBLISHED: "P",
    64             EntityState.OBSOLETE: "O",
    65 
    66         }
    67         return mapping[self]
    68 
    69     @classmethod
    70     def from_short(cls, char: str):
    71         """
    72         Inverse of .short(): restore the EntityState from its one-letter code.
    73         """
    74         reverse = {
    75             "D": cls.DRAFT,
    76             "A": cls.APPROVED,
    77             "S": cls.APPROVED_AND_SIGNED,
    78             "P": cls.PUBLISHED,
    79             "O": cls.OBSOLETE,
    80         }
    81         try:
    82             return reverse[char.upper()]
    83         except KeyError:
    84             raise ValueError(f"Unknown state abbreviation: {char}")
     57    DRAFT = "D"
     58    APPROVED = "A"
     59    APPROVED_AND_SIGNED = "S"
     60    PUBLISHED = "P"
     61    OBSOLETE = "O"
    8562
    8663    def __str__(self):
     
    9168class FlexoEntity(ABC):
    9269    domain: str
    93     etype: EntityType
     70    entity_type: EntityType
     71    subtype: str = "GENERIC"
    9472    state: EntityState
    9573    flexo_id: Optional[FlexOID] = field(default=None)
    9674    fingerprint: str = field(default_factory=str)
     75    originator_id: UUID = field(default=UUID(int=0))
     76    owner_id: UUID = field(default=UUID(int=0))
    9777    origin: Optional[str] = field(default=None)
    9878
    99     OID_PATTERN = re.compile(
    100         r"^(?P<domain>[A-Z0-9]+)-(?P<etype>[A-Z]+)"
    101         r"(?P<date>\d{6,8})-(?P<hash>[0-9A-F]+)@(?P<version>\d{3})(?P<state>[A-Z])$"
    102     )
    103 
    104     @classmethod
    105     def from_string(cls, s: str) -> "FlexOID":
    106         """Rehydrate a FlexOID from its canonical string form."""
    107         m = cls.OID_PATTERN.match(s.strip())
    108         if not m:
    109             raise ValueError(f"Invalid FlexOID string: {s}")
    110         gd = m.groupdict()
    111         return cls(
    112             domain=gd["domain"],
    113             etype=gd["etype"],
    114             state=gd["state"],
    115         )
    11679    @property
    11780    @abstractmethod
     
    143106
    144107        self.flexo_id = FlexOID.safe_generate(self.domain_code(),
    145                                          self.etype.short(),
    146                                          self.state.short(),
     108                                         self.entity_type.value,
     109                                         self.state.value,
    147110                                         self.text_seed,
    148111                                         1)
     
    152115    def __str__(self):
    153116        return (
    154             f"{self.etype.name}({self.flexo_id}, {self.state.name}, "
     117            f"{self.entity_type.name}({self.flexo_id}, {self.state.name}, "
    155118            f"fingerprint={self.fingerprint}..., v{self.version})"
    156119        )
     
    159122        return {
    160123            "domain": self.domain_code(),
    161             "etype": self.etype.name,
     124            "entity_type": self.entity_type.name,
    162125            "state": self.state.name,
    163126            "flexo_id": str(self.flexo_id),
     
    173136        domain_obj = Domain(
    174137            domain=abbrev,
    175             etype=EntityType.DOMAIN,
     138            entity_type=EntityType.DOMAIN,
    176139            state=EntityState.DRAFT,  # default when reconstructing context
    177140        )
    178141        obj = cls(
    179142            domain=domain_obj,
    180             etype=EntityType[data["etype"]],
     143            entity_type=EntityType[data["entity_type"]],
    181144            state=EntityState[data["state"]],
    182145        )
     
    221184            self.fingerprint = new_fp
    222185            self.flexo_id = FlexOID.safe_generate(self.domain_code(),
    223                                              self.etype.short(),
    224                                              self.state.short(),
     186                                             self.entity_type.value,
     187                                             self.state.value,
    225188                                             self.text_seed,
    226189                                             self.flexo_id.version)
     
    244207        # special case: marking obsolete
    245208        if target_state == EntityState.OBSOLETE:
    246             self.flexo_id = FlexOID(self.flexo_id.flexo_id[:-1] + "O")
     209            self.flexo_id = FlexOID.with_state(self.flexo_id, "O")
    247210            self.state = target_state
    248211            return
     
    287250       
    288251        if self.state == EntityState.DRAFT:
    289             new_version = self.flexo_id.version + 1
    290252            new_fid = FlexOID.safe_generate(self.domain_code(),
    291                 self.etype.short(),
    292                 EntityState.APPROVED.short(),
     253                self.entity_type.value,
     254                EntityState.APPROVED.value,
    293255                self.text_seed,
    294                 version=new_version
     256                version=self.version
    295257            )
    296             self.previous_id = self.flexo_id  # optional: keep audit trail
     258            self.origin = self.flexo_id  # optional: keep audit trail
    297259            self.flexo_id = new_fid
    298260            self.state = EntityState.APPROVED
     
    320282        new_fid = FlexOID.safe_generate(
    321283            self.domain_code(),
    322             self.etype.short(),
    323             EntityState.PUBLISHED.short(),
     284            self.entity_type.value,
     285            EntityState.PUBLISHED.value,
    324286            self.text_seed,
    325287            version=new_version
    326288        )
    327289
    328         self.previous_id = self.flexo_id
     290        self.origin = self.flexo_id
    329291        self.flexo_id = new_fid
    330292        self.state = EntityState.PUBLISHED
     
    346308        self.flexo_id = FlexOID.clone_new_base(
    347309            self.domain_code(),
    348             self.etype.short(),
    349             self.state.short(),
     310            self.entity_type.value,
     311            self.state.value,
    350312            self.text_seed,
    351313        )
     
    362324            canonical_seed(entity.text_seed).encode("utf-8"), digest_size=8
    363325        ).hexdigest().upper()
    364         print(entity.fingerprint)
    365         print(expected_fp)
    366326        return expected_fp == entity.fingerprint
    367327
  • flexoentity/id_factory.py

    rca39274 r8aa20c7  
    11"""
    2 id_factory.py — Central Flex-O ID generator and versioning control (hardened).
    3 
    4 Improvements:
    5 - BLAKE2s hashing (modern, fast, stdlib)
    6 - 6 hex-digit hash (≈16.7M combinations)
    7 - UTC-based dates for consistency
    8 - Collision disambiguator (-A, -B, ...)
    9 - Canonical seed and content fingerprint helpers
     2id_factory.py — Central Flex-O ID generation and lifecycle logic.
     3
     4I represent the identity backbone of the Flex-O ecosystem.
     5I define how entities receive, preserve, and evolve their unique identifiers
     6throughout their lifecycle.
     7
     8Each Flex-O ID (FlexOID) is a self-contained string that encodes:
     9
     10    <DOMAIN>-<ETYPE><YYMMDD>-<HASH>@<VERSION><STATE>
     11
     12Example:
     13    GEN-ITEM251101-3139ACFAE38B@002A
     14
     15where
     16    DOMAIN      — a short prefix identifying the origin domain (e.g., GEN)
     17    ETYPE       — a compact entity type code (e.g., ITEM)
     18    YYMMDD      — the UTC creation date
     19    HASH        — a 12-hex BLAKE2s digest derived from canonical content
     20    VERSION     — a three-digit lineage counter (001-999)
     21    STATE       — a single capital letter indicating lifecycle state
     22
     23───────────────────────────────────────────────────────────────────────────────
     24Lifecycle semantics
     25───────────────────────────────────────────────────────────────────────────────
     26
     27I distinguish clearly between *content evolution* and *workflow state*:
     28
     29• Draft → Approved:
     30      I generate a fully new FlexOID whose hash and version reflect
     31      the new, stable content.  This step marks the transition from
     32      provisional to permanent identity.  No extra version bump is required.
     33
     34• Approved → Signed → Published → Obsolete:
     35      These transitions do not change the content hash.
     36      They represent confidence or visibility, not content.
     37      The same FlexOID (same version) remains valid across them.
     38
     39In short:
     40      Version changes record evolution of content.
     41      State changes record evolution of trust.
     42
     43───────────────────────────────────────────────────────────────────────────────
     44Implementation notes
     45───────────────────────────────────────────────────────────────────────────────
     46
     47• Hashing:  I use modern BLAKE2s (6-byte digest) for deterministic,
     48            collision-resistant identifiers.
     49
     50• Canonicalization:  I flatten entity data into a reproducible text form
     51                     so equivalent content always yields the same hash.
     52
     53• Generation:  I provide safe and deterministic factories
     54               (FlexOID.generate, FlexOID.safe_generate)
     55               that handle rare hash collisions by minimal salting.
     56
     57• Validation:  Every FlexOID validates itself through a strict regex pattern
     58               and exposes lightweight accessors (domain, type, date, hash,
     59               version, state).
     60
     61───────────────────────────────────────────────────────────────────────────────
     62Philosophy
     63───────────────────────────────────────────────────────────────────────────────
     64
     65I strive to be deterministic, minimal, and auditable.
     66My design favors immutability and clarity over flexibility.
     67A FlexOID never lies about its lineage: once created, it is final.
     68
     69Version marks evolution of content.
     70State marks evolution of confidence.
    1071"""
    1172
    12 import logging
    13 from datetime import datetime, timezone
     73import re
    1474import hashlib
    1575import secrets
    16 import itertools
    1776import json
    18 
    19 logger = logging.getLogger(__name__)
    20 
    21 # ──────────────────────────────────────────────────────────────────────────────
    22 #  Canonicalization helpers
    23 # ──────────────────────────────────────────────────────────────────────────────
    24 
     77from datetime import datetime, timezone
     78from logging import Logger
     79
     80logger = Logger(__file__)
    2581
    2682def canonical_seed(obj) -> str:
    2783    """
    28     Deterministically flatten an entity's core data into a string
    29     for hashing and deduplication.
     84    I transform *obj* into a deterministic, comparable text form.
     85
     86    I remove irrelevant formatting differences so that two equal
     87    pieces of data always yield the same hash seed.
     88
     89    Rules:
     90    - If *obj* is a string, I normalize whitespace.
     91    - If *obj* is a dict, I JSON-encode it with sorted keys.
     92    - If *obj* is an object, I recurse on its __dict__.
     93    - Otherwise, I coerce *obj* to str().
    3094    """
     95
    3196    if isinstance(obj, str):
    3297        text = " ".join(obj.split())
     
    38103    return str(obj)
    39104
    40 # ──────────────────────────────────────────────────────────────────────────────
    41 #  ID Factory
    42 # ──────────────────────────────────────────────────────────────────────────────
    43 
    44 
    45 class FlexOID:
     105
     106class FlexOID(str):
     107    """
     108    I represent a canonical textual identifier within the Flex-O ecosystem.
     109i   I am immutable and behave like a normal string, but I understand
     110    my own internal structure.  I can reveal my parts, create new
     111    versions, and switch between lifecycle states.
     112    """
     113
    46114    MAX_VERSION = 999
    47115    WARN_THRESHOLD = 900
    48116
    49 
    50     # keep in-memory registry for same-session collisions (optional)
    51     # NOTE: We might remove this soon
    52     _seen_hashes = set()
     117    OID_PATTERN = re.compile(
     118        r"^(?P<domain>[A-Z0-9]+)-"
     119        r"(?P<etype>[A-Z0-9]+)"
     120        r"(?P<date>\d{6})-"
     121        r"(?P<hash>[A-F0-9]+)@"
     122        r"(?P<version>\d{3})"
     123        r"(?P<state>[A-Z])$"
     124    )
     125
     126    def __new__(cls, value: str):
     127        """
     128        I create a new validated Flex-O ID from *value*.
     129        I verify that the given string matches the required pattern
     130        and remember the parsed match for later access.
     131        """
     132        m = cls.OID_PATTERN.match(value)
     133        if not m:
     134            raise ValueError(f"Invalid FlexOID format: {value}")
     135        obj = super().__new__(cls, value)
     136        obj._m = m
     137        return obj
     138
     139    # ───────────────────────────────────────────
     140    # Parsed accessors
     141    # ───────────────────────────────────────────
     142    @property
     143    def domain(self) -> str:
     144        """I answer the domain prefix (e.g., 'GEN')."""
     145        return self._m.group("domain")
     146
     147    @property
     148    def entity_type(self) -> str:
     149        """I answer the short entity-type code (e.g., 'ITEM')."""
     150        return self._m.group("etype")
     151
     152    @property
     153    def date_str(self) -> str:
     154        """I answer the YYMMDD creation date as a string."""
     155        return self._m.group("date")
     156
     157    @property
     158    def date(self) -> date:
     159        """I answer the creation date as a `datetime.date` instance."""
     160        return datetime.strptime(self.date_str, "%y%m%d").date()
     161
     162    @property
     163    def hash_part(self) -> str:
     164        """I answer the twelve-hex BLAKE2s digest that stabilizes my prefix."""
     165        return self._m.group("hash")
     166
     167    @property
     168    def version(self) -> int:
     169        """I answer my numeric version (1–999)."""
     170        return int(self._m.group("version"))
     171
     172    @property
     173    def state_code(self) -> str:
     174        """I answer my one-letter lifecycle state code (e.g., 'D', 'A', 'S')."""
     175        return self._m.group("state")
     176
     177    @property
     178    def prefix(self) -> str:
     179        """I answer everything before the '@' symbol — my immutable lineage prefix."""
     180        return self.split('@', 1)[0]
     181
     182    # ───────────────────────────────────────────
     183    # Transformations
     184    # ───────────────────────────────────────────
     185    def with_state(self, new_state: str) -> "FlexOID":
     186        """
     187        I create a copy of myself with the same version but a new state letter.
     188        The given *new_state* must be a single capital letter.
     189        """
     190        if not (isinstance(new_state, str) and len(new_state) == 1 and new_state.isalpha()):
     191            raise ValueError("State must be a single capital letter.")
     192        return FlexOID(f"{self.prefix}@{self.version:03d}{new_state}")
     193
     194    def __repr__(self):
     195        """I display myself in a developer-friendly representation."""
     196        return f"FlexOID({str(self)})"
     197
     198    # ───────────────────────────────────────────
     199    # Generation helpers
     200    # ───────────────────────────────────────────
     201    @staticmethod
     202    def _blake_hash(text: str) -> str:
     203        """
     204        I compute a 12-hex-digit BLAKE2s digest for *text*.
     205
     206        I am modern, fast, and collision-resistant.
     207        My output is deterministic and stable across platforms.
     208        """
     209
     210        return hashlib.blake2s(text.encode("utf-8"), digest_size=6).hexdigest().upper()
     211
     212    @staticmethod
     213    def generate(domain: str, entity_type: str, estate: str, text: str, version: int = 1):
     214        """
     215        I create a new deterministic Flex-O ID.
     216
     217        I combine the domain, entity type, and canonicalized *text*
     218        into a stable BLAKE2s hash.  My prefix therefore remains
     219        unchanged when only the state or version changes.
     220        """
     221        if not (1 <= version <= FlexOID.MAX_VERSION):
     222            raise ValueError(f"Version {version} exceeds limit; mark obsolete.")
     223
     224        if not (isinstance(estate, str) and len(estate) == 1 and estate.isalpha()):
     225            raise ValueError("estate must be a single capital letter.")
     226
     227        date_part = datetime.now(timezone.utc).strftime("%y%m%d")
     228        hash_seed = canonical_seed(f"{domain}:{entity_type}:{canonical_seed(text)}")
     229        base_hash = FlexOID._blake_hash(hash_seed)
     230        return FlexOID(f"{domain}-{entity_type}{date_part}-{base_hash}@{version:03d}{estate}")
     231
     232    @staticmethod
     233    def safe_generate(domain, entity_type, estate, text, version=1, repo=None):
     234        """
     235        I create a new deterministic ID like `generate`,
     236        but I also consult an optional *repo* to avoid hash collisions.
     237
     238        If a different seed has already produced the same prefix,
     239        I deterministically salt my seed and regenerate a unique ID.
     240        """
     241        domain_code = getattr(domain, "domain", domain)
     242        oid = FlexOID.generate(domain_code, entity_type, estate, text, version=version)
     243
     244        if repo is None:
     245            return oid
     246
     247        existing = repo.get(str(oid)) if hasattr(repo, "get") else repo.get(oid)
     248        if not existing:
     249            return oid
     250
     251        try:
     252            same_seed = (
     253                getattr(existing, "text_seed", None) == text
     254                or getattr(existing, "canonical_seed", lambda: None)() == canonical_seed(text)
     255            )
     256        except Exception:
     257            same_seed = False
     258
     259        if same_seed:
     260            return oid
     261
     262        logger.warning(f"FlexOID collision detected for {oid}")
     263        salt = secrets.token_hex(1)
     264        salted_text = f"{text}|salt:{salt}"
     265        return FlexOID.generate(domain_code, entity_type, estate, salted_text, version=version)
    53266
    54267    @classmethod
    55     def from_string(cls, id_str: str):
    56         return cls(id_str)
     268    def next_version(cls, oid: "FlexOID") -> "FlexOID":
     269        """
     270        I create the next version within the same lineage.
     271        I increment the numeric version but keep the same prefix and state.
     272        """
     273        new_ver = oid.version + 1
     274        if new_ver > cls.WARN_THRESHOLD and new_ver < cls.MAX_VERSION:
     275            logger.warning(f"{oid} approaching obsolescence ({new_ver}/999).")
     276        if new_ver > cls.MAX_VERSION:
     277            raise RuntimeError(f"{oid} exceeded {cls.MAX_VERSION}; mark obsolete.")
     278        return FlexOID(f"{oid.prefix}@{new_ver:03d}{oid.state_code}")
    57279
    58280    @classmethod
    59     def from_oid_and_version(cls, oid, version: int):
     281    def from_oid_and_version(cls, oid, version):
     282        """
     283        I recreate *oid* with an explicitly given version number.
     284        I keep its prefix and state, but replace the numeric counter.
     285        """
    60286        if not (1 <= version <= cls.MAX_VERSION):
    61287            raise ValueError(f"Version {version} out of bounds (1..{cls.MAX_VERSION}).")
    62288        return FlexOID(f"{oid.prefix}@{version:03d}{oid.state_code}")
    63289
    64     def __init__(self, flexo_id: str):
    65         self.flexo_id = flexo_id
    66 
    67     def __eq__(self, other):
    68         if not isinstance(other, FlexOID):
    69             return NotImplemented
    70         return self.flexo_id == other.flexo_id
    71 
    72     def __lt__(self, other):
    73         if not isinstance(other, FlexOID):
    74             return NotImplemented
    75         if self.prefix != other.prefix:
    76             raise ValueError("Cannot order FlexOIDs from different prefixes")
    77         return self.version < other.version
    78 
    79     def __hash__(self):
    80         return hash(self.flexo_id)
    81 
    82290    @staticmethod
    83     def _blake_hash(text: str) -> str:
    84         """Return a 12-hex BLAKE2s digest."""
    85         return hashlib.blake2s(text.encode("utf-8"),
    86                                digest_size=6).hexdigest().upper()  # 6 bytes → 12 hex
    87 
    88     @staticmethod
    89     def safe_generate(domain, etype, estate, text, version=1, repo=None):
    90         """
    91         Generate a new FlexOID with deterministic hashing, handling rare collisions.
    92         """
    93 
    94         # Normalize domain (Domain object or string)
    95         domain_code = getattr(domain, "domain", domain)
    96 
    97         # Generate the deterministic candidate OID
    98         oid = FlexOID.generate(domain_code, etype, estate, text, version=version)
    99 
    100         # Collision detection — only if a repository is available
    101         if repo is not None:
    102             existing = repo.get(str(oid)) if hasattr(repo, "get") else repo.get(oid)
    103         else:
    104             existing = None
    105 
    106         if existing:
    107             try:
    108                 same_seed = existing.text_seed == text or \
    109                             existing.canonical_seed() == canonical_seed(text)
    110             except Exception:
    111                 same_seed = False
    112 
    113             if not same_seed:
    114                 # Collision detected — regenerate deterministically
    115                 print("Collision detected", len(repo))
    116                 logger.warning(f"FlexOID collision detected for {oid}")
    117 
    118                 # (A) refresh date
    119                 date_part = datetime.now(timezone.utc).strftime("%y%m%d")
    120 
    121                 # (B) add minimal deterministic salt (2 hex chars)
    122                 salt = secrets.token_hex(1)
    123                 salted_text = f"{text}|salt:{salt}"
    124 
    125                 # (C) generate new OID with new date and salted seed
    126                 oid = FlexOID.generate(
    127                     domain_code,
    128                     etype,
    129                     estate,
    130                     salted_text,
    131                     version=version,
    132                 )
    133 
    134                 # (D) record lineage if the caller has `origin` tracking
    135                 if hasattr(existing, "flexo_id"):
    136                     logger.info(f"New lineage created from {existing.flexo_id}")
    137 
    138         return oid
    139 
    140     @staticmethod
    141     def generate(domain: str, etype: str, estate: str, text: str,
    142              version: int = 1):
    143         """
    144         Generate a deterministic Flex-O ID.
    145 
    146         - The hash (and therefore prefix) depends only on domain, etype, and text.
    147         → Prefix stays stable across state changes.
    148         """
    149 
    150         if not (1 <= version <= FlexOID.MAX_VERSION):
    151             raise ValueError(f"Version {version} exceeds limit; mark obsolete.")
    152 
    153         date_part = datetime.now(timezone.utc).strftime("%y%m%d")
    154 
    155         # state-independent hash seed → prefix stability
    156         hash_seed = canonical_seed(f"{domain}:{etype}:{text}")
    157         base_hash = FlexOID._blake_hash(hash_seed)
    158 
    159         ver_part = f"{version:03d}{estate}"
    160         flexo_id_str = f"{domain}-{etype}{date_part}-{base_hash}@{ver_part}"
    161 
    162         return FlexOID(flexo_id_str)
    163 
    164     @property
    165     def state_code(self):
    166         part = self.flexo_id.rsplit("@", 1)[-1]
    167 
    168         if not (part and part[-1].isalpha()):
    169             raise ValueError(f"Invalid Flex-O ID format: {self.flexo_id}")
    170         return part[-1]
    171 
    172     @property
    173     def domain(self) -> str:
    174         """Return the domain prefix (e.g., 'AF')."""
    175         try:
    176             return self.flexo_id.split('-', 1)[0]
    177         except IndexError:
    178             raise ValueError(f"Malformed Flex-O ID: {self.flexo_id}")
    179 
    180     @property
    181     def etype(self) -> str:
    182         """Return the entity type code (e.g., 'Q', 'CAT', etc.)."""
    183         try:
    184             part = self.flexo_id.split('-', 1)[1]
    185             return ''.join(filter(str.isalpha, part.split('-')[0]))  # up to first dash
    186         except IndexError:
    187             raise ValueError(f"Malformed Flex-O ID: {self.flexo_id}")
    188 
    189     @property
    190     def date_str(self) -> str:
    191         """Return the YYMMDD creation date as string."""
    192         try:
    193             part = self.flexo_id.split('-', 1)[1]
    194             # e.g. "Q251019" → skip type prefix, take next 6 digits
    195             digits = ''.join(ch for ch in part if ch.isdigit())
    196             return digits[:6]
    197         except IndexError:
    198             raise ValueError(f"Malformed Flex-O ID: {self.flexo_id}")
    199 
    200     @property
    201     def date(self) -> datetime:
    202         """Return the creation date as datetime.date object (UTC, naive)."""
    203         try:
    204             ds = self.date_str
    205             return datetime.strptime(ds, "%y%m%d").date()
    206         except Exception as e:
    207             raise ValueError(f"Invalid date in Flex-O ID: {self.flexo_id}") from e
    208 
    209     @property
    210     def hash_part(self) -> str:
    211         """Return the 6-hex BLAKE hash portion (e.g., '9B3E2')."""
    212         try:
    213             after_dash = self.flexo_id.split('-', 2)[2]
    214             return after_dash.split('@')[0]
    215         except IndexError:
    216             raise ValueError(f"Malformed Flex-O ID: {self.flexo_id}")
    217 
    218     @property
    219     def suffix(self) -> str:
    220         """Return the full suffix after '@' (e.g., '001A')."""
    221         try:
    222             return self.flexo_id.split('@', 1)[1]
    223         except IndexError:
    224             raise ValueError(f"Malformed Flex-O ID: {self.flexo_id}")
    225 
    226     @property
    227     def version(self) -> int:
    228         try:
    229             return int(self.suffix[:-1])  # drop state suffix
    230         except (ValueError, IndexError):
    231             return 1
    232 
    233     @property
    234     def prefix(self) -> str:
    235         # nur bis einschließlich Hash-Teil
    236         return self.flexo_id.split('@', 1)[0]
    237         # return self.flexo_id.split('@')[0].rsplit('-', 1)[0]
     291    def clone_new_base(domain: str, entity_type: str, estate: str, text: str) -> "FlexOID":
     292        """
     293        I start a completely new lineage (version 1) for a derived entity.
     294        I am used when copying or forking an existing object that should
     295        not share version history with its origin.
     296        """
     297        return FlexOID.safe_generate(domain, entity_type, estate, text, version=1)
    238298
    239299    def parsed(self) -> dict:
    240         """Return a structured breakdown of the Flex-O ID."""
     300        """
     301        I answer a dictionary that describes all my components —
     302        useful for debugging, serialization tests, or human inspection.
     303        """
    241304        return {
    242305            "domain": self.domain,
    243             "etype": self.etype,
     306            "entity_type": self.entity_type,
    244307            "date": self.date,
    245308            "hash": self.hash_part,
     
    247310            "state": self.state_code,
    248311        }
    249 
    250     @classmethod
    251     def next_version(cls, oid) -> str:
    252         """
    253         Create the next version in the same ID lineage.
    254 
    255         Increments the version counter of an existing FlexOID while preserving
    256         its prefix. Used when an entity transitions to a
    257         new revision within the same lifecycle (e.g., minor updates or approvals).
    258 
    259         Parameters
    260         ----------
    261         oid : FlexOID
    262         The existing ID whose version is to be incremented.
    263 
    264         Returns
    265         -------
    266         FlexOID
    267         A new Flex-O ID with the same prefix, but version +1.
    268 
    269         Raises
    270         ------
    271         RuntimeError
    272         If the maximum allowed version (`MAX_VERSION`) is exceeded.
    273 
    274         Notes
    275         -----
    276         - Warnings are logged when the version approaches obsolescence.
    277         """
    278         new_ver = oid.version + 1
    279 
    280         if new_ver > cls.WARN_THRESHOLD and new_ver < cls.MAX_VERSION:
    281             logger.warning(f"{oid} approaching obsolescence ({new_ver}/999).")
    282         if new_ver > cls.MAX_VERSION:
    283             raise RuntimeError(f"{oid} exceeded {cls.MAX_VERSION}; mark obsolete.")
    284 
    285         new_id = f"{oid.prefix}@{new_ver:03d}{oid.state_code}"
    286         return cls(new_id)
    287 
    288     @staticmethod
    289     def clone_new_base(domain: str, etype: str, estate: str, text: str):
    290         """
    291         Start a new Flex-O ID lineage for a derived or duplicated entity.
    292 
    293         This helper creates a completely new base ID (version 1) using the given
    294         parameters, instead of incrementing an existing version chain. It is used
    295         when an entity is copied, forked, or conceptually replaced by a new one.
    296 
    297         Returns
    298         -------
    299         FlexOID
    300         A new base ID starting at version 1, unrelated to the original lineage.
    301 
    302         Notes
    303         -----
    304         - Equivalent to calling `generate(..., version=1)` explicitly.
    305         - Used when creating "clones" or "variants" that should not share version history.
    306         """
    307         return FlexOID.safe_generate(domain, etype, estate, text, version=1)
    308 
    309     def __str__(self):
    310         return self.flexo_id
    311 
    312     def __repr__(self):
    313         return f"<FlexOID {self.flexo_id}>"
    314 
  • tests/conftest.py

    rca39274 r8aa20c7  
    1 # tests/conftest.py
    2 
     1# tests/stubs/single_choice_question.py
    32import pytest
    4 import json
     3from datetime import datetime
     4from dataclasses import dataclass, field
     5from typing import List
    56from flexoentity import FlexoEntity, EntityType, EntityState, Domain
    67
    7 import pytest
    8 import json
    9 from flexoentity import EntityType, EntityState, Domain
    10 from builder.questions import RadioQuestion, AnswerOption  # adjust path if different
    11 from builder.media_items import NullMediaItem  # adjust import if needed
     8@pytest.fixture
     9def fixed_datetime(monkeypatch):
     10    class FixedDate(datetime):
     11        @classmethod
     12        def now(cls, tz=None):
     13            return datetime(2025, 11, 1, tzinfo=tz)
     14    monkeypatch.setattr("flexoentity.id_factory.datetime", FixedDate)
     15    return FixedDate
    1216
    1317
    14 @pytest.fixture(scope="session")
    15 def domain():
    16     """Provide a reusable domain for all entity tests."""
    17     return Domain(
    18         domain="SIG",
    19         etype=EntityType.DOMAIN,
    20         state=EntityState.DRAFT,
    21         fullname="Signal Corps",
    22         description="Questions related to communications and signaling systems.",
    23         classification="RESTRICTED",
    24         owner="test-suite"
    25     )
     18@dataclass
     19class AnswerOption:
     20    id: str
     21    text: str
     22    points: float = 0.0
     23
     24    def to_dict(self):
     25        return {"id": self.id, "text": self.text, "points": self.points}
     26
     27    @classmethod
     28    def from_dict(cls, data):
     29        return cls(
     30            id=data.get("id", ""),
     31            text=data.get("text", ""),
     32            points=data.get("points", 0.0)
     33        )
    2634
    2735
    28 @pytest.fixture
    29 def radio_question(domain):
    30     """Return a simple RadioQuestion entity for testing FlexoEntity logic."""
    31     q = RadioQuestion(
    32         domain=domain,
    33         etype=EntityType.QUESTION,
    34         state=EntityState.DRAFT,
    35         text="Which frequency band is used for shortwave communication?",
    36         options=[
    37             AnswerOption(id="opt1", text="HF (3–30 MHz)", points=1),
    38             AnswerOption(id="opt2", text="VHF (30–300 MHz)", points=0),
    39             AnswerOption(id="opt3", text="UHF (300–3000 MHz)", points=0),
    40         ]
    41     )
    42     return q
     36@dataclass
     37class SingleChoiceQuestion(FlexoEntity):
     38    """A minimal stub to test FlexoEntity integration."""
     39    text: str = ""
     40    options: List[AnswerOption] = field(default_factory=list)
    4341
    4442
    45 @pytest.fixture
    46 def serialized_question(radio_question):
    47     """Provide the serialized JSON form for roundtrip tests."""
    48     return radio_question.to_json()
     43    @classmethod
     44    def default(cls):
     45        return cls(domain=Domain(domain="GEN",
     46                                 entity_type=EntityType.DOMAIN,
     47                                 state=EntityState.DRAFT),
     48                   state=EntityState.DRAFT, entity_type=EntityType.ITEM)
    4949
     50    def to_dict(self):
     51        base = super().to_dict()
     52        base.update({
     53            "text": self.text,
     54            "options": [opt.to_dict() for opt in self.options],
     55        })
     56        return base
     57
     58    @property
     59    def text_seed(self) -> str:
     60        """Include answer options (and points) for deterministic ID generation."""
     61
     62        joined = "|".join(
     63            f"{opt.text.strip()}:{opt.points}"
     64            for opt in sorted(self.options, key=lambda o: o.text.strip().lower())
     65        )
     66        return f"{self.text}{joined}"
     67
     68    @classmethod
     69    def from_dict(cls, data):
     70        obj = cls(
     71            text=data.get("text", ""),
     72            options=[AnswerOption.from_dict(o) for o in data.get("options", [])],
     73        )
     74        # restore FlexoEntity core fields
     75        obj.domain = data.get("domain")
     76        obj.entity_type = EntityType[data.get("etype")] if "etype" in data else EntityType.ITEM
     77        obj.state = EntityState[data.get("state")] if "state" in data else EntityState.DRAFT
     78        if "flexo_id" in data:
     79            from flexoentity import FlexOID
     80            obj.flexo_id = FlexOID.parsed(data["flexo_id"])
     81        return obj
    5082
    5183@pytest.fixture
    52 def deserialized_question(serialized_question):
    53     """Recreate a question from JSON for consistency tests."""
    54     return RadioQuestion.from_json(serialized_question)
    55 
     84def domain():
     85    return Domain.default()
    5686
    5787@pytest.fixture
    58 def null_media():
    59     """Provide a default NullMediaItem instance for media tests."""
    60     return NullMediaItem(
    61         domain=domain,
    62         etype=EntityType.MEDIA,
    63         state=EntityState.DRAFT
    64     )
     88def sample_question():
     89    return SingleChoiceQuestion(domain=Domain.default(),
     90                               text="What is 2 + 2?",
     91                               options=[],
     92                               entity_type=EntityType.ITEM,
     93                               state=EntityState.DRAFT)
  • tests/test_id_lifecycle.py

    rca39274 r8aa20c7  
    11import pytest
    2 from flexoentity import FlexOID, FlexoEntity, EntityType, EntityState
     2from flexoentity import FlexOID, FlexoEntity, EntityState
    33
    44
    55# ──────────────────────────────────────────────────────────────────────────────
    6 # Tests adapted to use real RadioQuestion fixture instead of DummyEntity
     6# Tests adapted to use real SingleChoiceQuestion fixture instead of DummyEntity
    77# ──────────────────────────────────────────────────────────────────────────────
    88
    9 def test_initial_state(radio_question):
    10     q = radio_question
     9def test_initial_state(sample_question):
     10    q = sample_question
    1111    assert q.state == EntityState.DRAFT
    1212    assert q.flexo_id.version == 1
     
    1414
    1515
    16 def test_approval_bumps_version(radio_question):
    17     q = radio_question
     16def test_approval_does_not_bump_version(sample_question):
     17    q = sample_question
    1818    q.approve()
    1919    assert q.state == EntityState.APPROVED
    20     assert q.flexo_id.version == 2
     20    assert q.flexo_id.version == 1
    2121
    2222
    23 def test_signing_bumps_version(radio_question):
    24     q = radio_question
     23def test_signing_bumps_version(sample_question):
     24    q = sample_question
    2525    q.approve()
    2626    v_before = str(q.flexo_id)
     
    3030
    3131
    32 def test_publish_bumps_version(radio_question):
    33     q = radio_question
     32def test_publish_bumps_version(sample_question):
     33    q = sample_question
    3434    q.approve()
    3535    q.sign()
     
    4040
    4141
    42 def test_modify_content_changes_fingerprint(radio_question):
    43     q = radio_question
    44     q.text = "Rephrased content"  # simulate text change
     42def test_modify_content_changes_fingerprint(sample_question):
     43    q = sample_question
     44    q.text += "Rephrased content"  # simulate text change
    4545    changed = q._update_fingerprint()
    4646    assert changed
    4747
    4848
    49 def test_no_version_bump_on_draft_edits(radio_question):
    50     q = radio_question
     49def test_no_version_bump_on_draft_edits(sample_question):
     50    q = sample_question
    5151    q.text = "Minor draft edit"
    5252    q._update_fingerprint()
     
    5454
    5555
    56 def test_version_bump_after_edit_and_sign(radio_question):
    57     q = radio_question
     56def test_version_bump_after_edit_and_sign(sample_question):
     57    q = sample_question
    5858    q.approve()
    5959    v1 = str(q.flexo_id)
     
    6363
    6464
    65 def test_integrity_check_passes_and_fails(radio_question):
    66     q = radio_question
     65def test_integrity_check_passes_and_fails(sample_question):
     66    q = sample_question
    6767    q.approve()
    6868    assert FlexoEntity.verify_integrity(q)
     
    7373
    7474
    75 def test_obsolete_state(radio_question):
    76     q = radio_question
     75def test_obsolete_state(sample_question):
     76    q = sample_question
    7777    q.approve()
    7878    q.sign()
     
    8282
    8383
    84 def test_clone_new_base_resets_lineage(radio_question):
    85     q = radio_question
     84def test_clone_new_base_resets_lineage(sample_question):
     85    q = sample_question
    8686    q.approve()
    8787    q.sign()
     
    9494    assert q.flexo_id.version == 1
    9595
    96 def test_clone_new_base_sets_origin(radio_question):
    97     q = radio_question
     96def test_clone_new_base_sets_origin(sample_question):
     97    q = sample_question
    9898    q.approve()
    9999    q.sign()
     
    107107    assert q.flexo_id != old_id
    108108
    109 def test_mass_version_increments_until_obsolete(radio_question):
    110     q = radio_question
     109def test_mass_version_increments_until_obsolete(sample_question):
     110    q = sample_question
    111111    q.approve()
    112     for _ in range(FlexOID.MAX_VERSION - 2):
     112    for _ in range(FlexOID.MAX_VERSION - 1):
    113113        q.bump_version()
    114114    with pytest.raises(RuntimeError, match="mark obsolete"):
  • tests/test_id_stress.py

    rca39274 r8aa20c7  
    44"""
    55
     6import copy
     7import logging
     8import random
     9
    610import pytest
    7 import random
    8 import logging
     11
    912from flexoentity import FlexOID, EntityType, EntityState
    10 from builder.questions import RadioQuestion, AnswerOption
    1113
    1214logger = logging.getLogger(__name__)
     
    1820    via salt + date adjustment.
    1921    """
    20     etype = EntityType.QUESTION
     22    entity_type = EntityType.ITEM
    2123    estate = EntityState.DRAFT
    2224    seeds = [f"question {i}" for i in range(100000)]
     
    3133    ids = []
    3234    for seed in seeds:
    33         oid = FlexOID.safe_generate(domain.domain, etype, estate, seed, repo=repo)
     35        oid = FlexOID.safe_generate(domain.domain, entity_type.value, estate.value, seed, repo=repo)
    3436        assert isinstance(oid, FlexOID)
    3537        ids.append(str(oid))
     
    4648
    4749    # Sanity check: IDs should look canonical
    48     assert all(id_str.startswith("SIG-") for id_str in ids)
     50    assert all(id_str.startswith("GEN") for id_str in ids)
    4951    assert all("@" in id_str for id_str in ids)
    5052
     
    5456    (No runtime disambiguation; IDs are deterministic by design.)
    5557    """
    56     etype = EntityType.QUESTION
     58    entity_type = EntityType.ITEM
    5759    estate = EntityState.DRAFT
    5860    text = "identical question text"
    5961
    60     id1 = FlexOID.generate(domain.domain, etype, estate, text)
    61     id2 = FlexOID.generate(domain.domain, etype, estate, text)
     62    id1 = FlexOID.generate(domain.domain, entity_type.value, estate.value, text)
     63    id2 = FlexOID.generate(domain.domain, entity_type.value, estate.value, text)
    6264    # IDs must be identical because generation is deterministic
    6365    assert id1 == id2
    6466
    6567
    66 def test_id_reproducibility_across_runs(domain):
    67     """
    68     The same seed on a new process (fresh _seen_hashes)
    69     should yield the same base ID (without suffix).
    70     """
    71     etype = EntityType.CATALOG
    72     estate = EntityState.DRAFT
    73     seed = "reproducibility test seed"
     68# def test_id_reproducibility_across_runs(domain):
     69#     """
     70#     The same seed on a new process (fresh _seen_hashes)
     71#     should yield the same base ID (without suffix).
     72#     """
     73#     entity_type = EntityType.CATALOG
     74#     estate = EntityState.DRAFT
     75#     seed = "reproducibility test seed"
    7476
    75     id1 = FlexOID.generate(domain.domain, etype, estate, seed)
    76     FlexOID._seen_hashes.clear()
    77     id2 = FlexOID.generate(domain.domain, etype, estate, seed)
     77#     id1 = FlexOID.generate(domain.domain, entity_type.value, estate.value, seed)
     78#     FlexOID._seen_hashes.clear()
     79#     id2 = FlexOID.generate(domain.domain, entity_type.value, estate.value, seed)
    7880
    79     assert id1 == id2
     81#     assert id1 == id2
    8082
    8183
    82 def test_version_ceiling_enforcement(radio_question):
     84def test_version_ceiling_enforcement(sample_question):
    8385    """Simulate approaching @999 to trigger obsolescence guard."""
    84     q = radio_question
     86    q = sample_question
    8587    q.approve()
    8688
     
    9799
    98100
    99 def test_massive_lifecycle_simulation(domain):
     101def test_massive_lifecycle_simulation(sample_question):
    100102    """
    101     Generate 100 random RadioQuestions, simulate multiple edits and state transitions,
     103    Generate 100 random SingleChoiceQuestions, simulate multiple edits and state transitions,
    102104    ensure all final IDs and fingerprints are unique and valid.
    103105    """
    104106    entities = [
    105         RadioQuestion(
    106             domain=domain,
    107             etype=EntityType.QUESTION,
    108             state=EntityState.DRAFT,
    109             text=f"random question {i}",
    110             options=[
    111                 AnswerOption(id="opt4", text="HF (3–30 MHz)", points=1),
    112                 AnswerOption(id="opt5", text="VHF (30–300 MHz)", points=0),
    113             ],
    114         )
    115         for i in range(100)
     107        copy.deepcopy(sample_question) for _ in range(100)
    116108    ]
    117109
    118     for e in entities:
     110    for i, e in enumerate(entities):
    119111        # random edit
    120         e.text += " updated"
     112        e.text += f" updated #{i}"
    121113        e._update_fingerprint()
    122114
  • tests/test_persistance_integrity.py

    rca39274 r8aa20c7  
    66import pytest
    77
    8 from builder.questions import RadioQuestion, AnswerOption
    98from flexoentity import EntityState, EntityType, Domain
    109
    1110@pytest.fixture
    1211def approved_question():
    13     """Provide a fully approved and published RadioQuestion for persistence tests."""
    14     q = RadioQuestion(
    15         domain=Domain(domain="GEN", etype=EntityType.DOMAIN, state=EntityState.DRAFT),
    16         etype=None,  # RadioQuestion sets this internally to EntityType.QUESTION
     12    """Provide a fully approved and published SingleChoiceQuestion for persistence tests."""
     13    q = SingleChoiceQuestion(
     14        domain=Domain(domain="GEN", entity_type=EntityType.DOMAIN, state=EntityState.DRAFT),
     15        entity_type=None,  # SingleChoiceQuestion sets this internally to EntityType.ITEM
    1716        state=EntityState.DRAFT,
    1817        text="What is Ohm’s law?",
     
    3635    json_str = approved_question.to_json()
    3736    print("JSON", json_str)
    38     loaded = RadioQuestion.from_json(json_str)
     37    loaded = SingleChoiceQuestion.from_json(json_str)
    3938
    4039    print("Approved", approved_question.text_seed)
    4140    print("Loaded", loaded.text_seed)
    4241    # Fingerprint and state should match — integrity must pass
    43     assert RadioQuestion.verify_integrity(loaded)
     42    assert SingleChoiceQuestion.verify_integrity(loaded)
    4443
    4544    # Metadata should be preserved exactly
     
    5655    tampered_json = json.dumps(tampered)
    5756
    58     loaded = RadioQuestion.from_json(tampered_json)
    59     assert not RadioQuestion.verify_integrity(loaded)
     57    loaded = SingleChoiceQuestion.from_json(tampered_json)
     58    assert not SingleChoiceQuestion.verify_integrity(loaded)
    6059
    6160@pytest.mark.skip(reason="FlexOIDs regenerated on import; corruption detection not yet applicable")
     
    7069    file.write_text(corrupted)
    7170
    72     loaded = RadioQuestion.from_json(file.read_text())
    73     assert not RadioQuestion.verify_integrity(loaded)
     71    loaded = SingleChoiceQuestion.from_json(file.read_text())
     72    assert not SingleChoiceQuestion.verify_integrity(loaded)
Note: See TracChangeset for help on using the changeset viewer.