"""
id_factory.py — Central Flex-O ID generation and lifecycle logic.

I represent the identity backbone of the Flex-O ecosystem.
I define how entities receive, preserve, and evolve their unique identifiers
throughout their lifecycle.

Each Flex-O ID (FlexOID) is a self-contained string that encodes:

    <DOMAIN>-<ETYPE><YYMMDD>-<HASH>@<VERSION><STATE>

Example:
    GEN-ITEM251101-3139ACFAE38B@002A

where
    DOMAIN      — a short prefix identifying the origin domain (e.g., GEN)
    ETYPE       — a compact entity type code (e.g., ITEM)
    YYMMDD      — the UTC creation date
    HASH        — a 12-hex BLAKE2s digest derived from canonical content
    VERSION     — a three-digit lineage counter (001-999)
    STATE       — a single capital letter indicating lifecycle state

───────────────────────────────────────────────────────────────────────────────
Lifecycle semantics
───────────────────────────────────────────────────────────────────────────────

I distinguish clearly between *content evolution* and *workflow state*:

• Draft → Approved:
      I generate a fully new FlexOID whose hash and version reflect
      the new, stable content.  This step marks the transition from
      provisional to permanent identity.  No extra version bump is required.

• Approved → Signed → Published → Obsolete:
      These transitions do not change the content hash.
      They represent confidence or visibility, not content.
      The same FlexOID (same version) remains valid across them.

In short:
      Version changes record evolution of content.
      State changes record evolution of trust.

───────────────────────────────────────────────────────────────────────────────
Implementation notes
───────────────────────────────────────────────────────────────────────────────

• Hashing:  I use modern BLAKE2s (6-byte digest) for deterministic,
            collision-resistant identifiers.

• Canonicalization:  I flatten entity data into a reproducible text form
                     so equivalent content always yields the same hash.

• Generation:  I provide safe and deterministic factories
               (FlexOID.generate, FlexOID.safe_generate)
               that handle rare hash collisions by minimal salting.

• Validation:  Every FlexOID validates itself through a strict regex pattern
               and exposes lightweight accessors (domain, type, date, hash,
               version, state).

───────────────────────────────────────────────────────────────────────────────
Philosophy
───────────────────────────────────────────────────────────────────────────────

I strive to be deterministic, minimal, and auditable.
My design favors immutability and clarity over flexibility.
A FlexOID never lies about its lineage: once created, it is final.

Version marks evolution of content.
State marks evolution of confidence.
"""

import re
import hashlib
import secrets
import json
from datetime import datetime, timezone
from flexoentity import logger

def canonical_seed(obj) -> str:
    """
    I transform *obj* into a deterministic, comparable text form.

    I remove irrelevant formatting differences so that two equal
    pieces of data always yield the same hash seed.

    Rules:
    - If *obj* is a string, I normalize whitespace.
    - If *obj* is a dict, I JSON-encode it with sorted keys.
    - If *obj* is an object, I recurse on its __dict__.
    - Otherwise, I coerce *obj* to str().
    """

    if isinstance(obj, str):
        text = " ".join(obj.split())
        return text
    if isinstance(obj, dict):
        return json.dumps(obj, sort_keys=True, separators=(",", ":"))
    if hasattr(obj, "__dict__"):
        return canonical_seed(obj.__dict__)
    return str(obj)


class FlexOID(str):
    """
    I represent a canonical textual identifier within the Flex-O ecosystem.
i   I am immutable and behave like a normal string, but I understand
    my own internal structure.  I can reveal my parts, create new
    versions, and switch between lifecycle states.
    """

    MAX_VERSION = 999
    WARN_THRESHOLD = 900

    OID_PATTERN = re.compile(
        r"^(?P<domain>[A-Z0-9]+)-"
        r"(?P<etype>[A-Z0-9]+)"
        r"(?P<date>\d{6})-"
        r"(?P<hash>[A-F0-9]+)@"
        r"(?P<version>\d{3})"
        r"(?P<state>[A-Z])$"
    )

    def __new__(cls, value: str):
        """
        I create a new validated Flex-O ID from *value*.
        I verify that the given string matches the required pattern
        and remember the parsed match for later access.
        """
        m = cls.OID_PATTERN.match(value)
        if not m:
            raise ValueError(f"Invalid FlexOID format: {value}")
        obj = super().__new__(cls, value)
        obj._m = m
        return obj

    # ───────────────────────────────────────────
    # Parsed accessors
    # ───────────────────────────────────────────
    @property
    def domain(self) -> str:
        """I answer the domain prefix (e.g., 'GEN')."""
        return self._m.group("domain")

    @property
    def entity_type(self) -> str:
        """I answer the short entity-type code (e.g., 'ITEM')."""
        return self._m.group("etype")

    @property
    def date_str(self) -> str:
        """I answer the YYMMDD creation date as a string."""
        return self._m.group("date")

    @property
    def date(self) -> date:
        """I answer the creation date as a `datetime.date` instance."""
        return datetime.strptime(self.date_str, "%y%m%d").date()

    @property
    def hash_part(self) -> str:
        """I answer the twelve-hex BLAKE2s digest that stabilizes my prefix."""
        return self._m.group("hash")

    @property
    def version(self) -> int:
        """I answer my numeric version (1–999)."""
        return int(self._m.group("version"))

    @property
    def state_code(self) -> str:
        """I answer my one-letter lifecycle state code (e.g., 'D', 'A', 'S')."""
        return self._m.group("state")

    @property
    def prefix(self) -> str:
        """I answer everything before the '@' symbol — my immutable lineage prefix."""
        return self.split('@', 1)[0]

    # ───────────────────────────────────────────
    # Transformations
    # ───────────────────────────────────────────
    def with_state(self, new_state: str) -> "FlexOID":
        """
        I create a copy of myself with the same version but a new state letter.
        The given *new_state* must be a single capital letter.
        """
        if not (isinstance(new_state, str) and len(new_state) == 1 and new_state.isalpha()):
            raise ValueError("State must be a single capital letter.")
        return FlexOID(f"{self.prefix}@{self.version:03d}{new_state}")

    def __repr__(self):
        """I display myself in a developer-friendly representation."""
        return f"FlexOID({str(self)})"

    # ───────────────────────────────────────────
    # Generation helpers
    # ───────────────────────────────────────────
    @staticmethod
    def _blake_hash(text: str) -> str:
        """
        I compute a 12-hex-digit BLAKE2s digest for *text*.

        I am modern, fast, and collision-resistant.
        My output is deterministic and stable across platforms.
        """

        return hashlib.blake2s(text.encode("utf-8"), digest_size=6).hexdigest().upper()

    @staticmethod
    def generate(domain: str, entity_type: str, estate: str, text: str, version: int = 1):
        """
        I create a new deterministic Flex-O ID.

        I combine the domain, entity type, and canonicalized *text*
        into a stable BLAKE2s hash.  My prefix therefore remains
        unchanged when only the state or version changes.
        """
        if not (1 <= version <= FlexOID.MAX_VERSION):
            raise ValueError(f"Version {version} exceeds limit; mark obsolete.")

        if not (isinstance(estate, str) and len(estate) == 1 and estate.isalpha()):
            raise ValueError("estate must be a single capital letter.")

        date_part = datetime.now(timezone.utc).strftime("%y%m%d")
        hash_seed = canonical_seed(f"{domain}:{entity_type}:{canonical_seed(text)}")
        base_hash = FlexOID._blake_hash(hash_seed)
        return FlexOID(f"{domain}-{entity_type}{date_part}-{base_hash}@{version:03d}{estate}")

    @staticmethod
    def safe_generate(domain, entity_type, estate, text, version=1, repo=None):
        """
        I create a new deterministic ID like `generate`,
        but I also consult an optional *repo* to avoid hash collisions.

        If a different seed has already produced the same prefix,
        I deterministically salt my seed and regenerate a unique ID.
        """
        domain_code = getattr(domain, "domain", domain)
        oid = FlexOID.generate(domain_code, entity_type, estate, text, version=version)

        if repo is None:
            return oid

        existing = repo.get(str(oid)) if hasattr(repo, "get") else repo.get(oid)
        if not existing:
            return oid

        try:
            same_seed = (
                getattr(existing, "text_seed", None) == text
                or getattr(existing, "canonical_seed", lambda: None)() == canonical_seed(text)
            )
        except Exception:
            same_seed = False

        if same_seed:
            return oid

        logger.warning(f"FlexOID collision detected for {oid}")
        salt = secrets.token_hex(1)
        salted_text = f"{text}|salt:{salt}"
        return FlexOID.generate(domain_code, entity_type, estate, salted_text, version=version)

    @classmethod
    def next_version(cls, oid: "FlexOID") -> "FlexOID":
        """
        I create the next version within the same lineage.
        I increment the numeric version but keep the same prefix and state.
        """
        new_ver = oid.version + 1
        if new_ver > cls.WARN_THRESHOLD and new_ver < cls.MAX_VERSION:
            logger.warning(f"{oid} approaching obsolescence ({new_ver}/999).")
        if new_ver > cls.MAX_VERSION:
            raise RuntimeError(f"{oid} exceeded {cls.MAX_VERSION}; mark obsolete.")
        return FlexOID(f"{oid.prefix}@{new_ver:03d}{oid.state_code}")

    @classmethod
    def from_oid_and_version(cls, oid, version):
        """
        I recreate *oid* with an explicitly given version number.
        I keep its prefix and state, but replace the numeric counter.
        """
        if not (1 <= version <= cls.MAX_VERSION):
            raise ValueError(f"Version {version} out of bounds (1..{cls.MAX_VERSION}).")
        return FlexOID(f"{oid.prefix}@{version:03d}{oid.state_code}")

    @staticmethod
    def clone_new_base(domain: str, entity_type: str, estate: str, text: str) -> "FlexOID":
        """
        I start a completely new lineage (version 1) for a derived entity.
        I am used when copying or forking an existing object that should
        not share version history with its origin.
        """
        return FlexOID.safe_generate(domain, entity_type, estate, text, version=1)

    def parsed(self) -> dict:
        """
        I answer a dictionary that describes all my components —
        useful for debugging, serialization tests, or human inspection.
        """
        return {
            "domain": self.domain,
            "entity_type": self.entity_type,
            "date": self.date,
            "hash": self.hash_part,
            "version": self.version,
            "state": self.state_code,
        }
