"""
id_factory.py — Central Flex-O ID generation and lifecycle logic.

I represent the identity backbone of the Flex-O ecosystem.
I define how entities receive, preserve, and evolve their unique identifiers
throughout their lifecycle.

Each Flex-O ID (FlexOID) is a self-contained string that encodes:

    <DOMAIN_ID>-<ENTITY_TYPE><YYMMDD>-<HASH>@<VERSION><STATE>

Example:
    GEN-ITEM251101-3139ACFAE38B@002A

where
    DOMAIN_ID   — a prefix identifying the origin domain (e.g., PY_ARITHM)
    ENTITY_TYPE — a compact entity type single letter code (e.g., "I" for ITEM)
    YYMMDD      — the UTC creation date
    HASH        — a random cryptographic nonce ensuring global uniqueness
    VERSION     — a three-digit lineage counter (001-999)
    STATE       — a single capital letter indicating lifecycle state

───────────────────────────────────────────────────────────────────────────────
Lifecycle semantics
───────────────────────────────────────────────────────────────────────────────

I distinguish clearly between *content evolution* and *workflow state*:

• Draft → Approved:
      I generate a fully new FlexOID
      This step marks the transition from provisional to permanent identity.
      No extra version bump is required.

• Approved → Signed → Published → Obsolete:
      These transitions do not change the content hash.
      They represent confidence or visibility, not content.
      The same FlexOID (same version) remains valid across them.

In short:
      Version changes record evolution of content.
      State changes record evolution of trust.

───────────────────────────────────────────────────────────────────────────────
Implementation notes
───────────────────────────────────────────────────────────────────────────────

• Hashing:  I use modern BLAKE2s (6-byte digest) for deterministic,
            collision-resistant identifiers.

• Canonicalization:  I flatten entity data into a reproducible text form
                     so equivalent content always yields the same hash.

• Generation:  I provide safe and deterministic factories
               (FlexOID.generate, FlexOID.safe_generate)
               that handle rare hash collisions by minimal salting.

• Validation:  Every FlexOID validates itself through a strict regex pattern
               and exposes lightweight accessors (domain_id, type, date, hash,
               version, state).

───────────────────────────────────────────────────────────────────────────────
Philosophy
───────────────────────────────────────────────────────────────────────────────

I strive to be deterministic, minimal, and auditable.
My design favors immutability and clarity over flexibility.
A FlexOID never lies about its lineage: once created, it is final.

Version marks evolution of content.
State marks evolution of confidence.
"""

import re
import hashlib
import secrets
import json
from datetime import datetime, timezone
from flexoentity import logger

def canonical_seed(text: str) -> str:
    """Canonicalize identity-only text_seed."""
    if not text:
        return ""
    # remove control characters
    s = re.sub(r"[\t\r\n]+", " ", text)
    # collapse multiple spaces
    s = re.sub(r"\s+", " ", s)
    return s.strip()

class FlexOID(str):
    """
    I represent a canonical textual identifier within the Flex-O ecosystem.
i   I am immutable and behave like a normal string, but I understand
    my own internal structure.  I can reveal my parts, create new
    versions, and switch between lifecycle states.
    """

    MAX_VERSION = 999
    WARN_THRESHOLD = 900

    OID_PATTERN = re.compile(
        r"^(?P<domain_id>[A-Z0-9_]+)-"
        r"(?P<entity_type>[A-Z0-9]+)"
        r"(?P<date>\d{6})-"
        r"(?P<hash>[A-F0-9]+)@"
        r"(?P<version>\d{3})"
        r"(?P<state>[A-Z])$"
    )
    @classmethod
    def from_strings(cls, domain_id, entity_type, date, hash_part, version, state):
        """
        Construct a FlexOID from raw string components only.
        All parameters must already be strings in the exact expected format.
        No coercions, no guesses, no defaults.

        Required formats:
           domain_id:    [A-Z0-9_]+
           entity_type:  [A-Z]
           date:         YYMMDD (6 digits)
           hash_part:    uppercase hex, length >= 1
           version:      001..999 as 3-digit string
           state:        single capital letter
        """

        # Domain format
        if not isinstance(domain_id, str) or not re.fullmatch(r"[A-Z0-9_]+", domain_id):
            raise ValueError(f"Invalid domain_id string: {domain_id}")

        # Entity type format
        if not isinstance(entity_type, str) or not re.fullmatch(r"[A-Z]", entity_type):
            raise ValueError(f"Invalid entity_type string: {entity_type}")

        # Date format
        if not isinstance(date, str) or not re.fullmatch(r"\d{6}", date):
            raise ValueError(f"Invalid date string: {date}")

        # Hash format
        if not isinstance(hash_part, str) or not re.fullmatch(r"[A-F0-9]+", hash_part):
            raise ValueError(f"Invalid hash string: {hash_part}")

        # Version
        if not isinstance(version, str) or not re.fullmatch(r"\d{3}", version):
            raise ValueError(f"Invalid version string: {version}")

        version_int = int(version)
        if not 1 <= version_int <= cls.MAX_VERSION:
            raise ValueError(f"Version {version} out of range.")

        # State
        if not isinstance(state, str) or not re.fullmatch(r"[A-Z]", state):
            raise ValueError(f"Invalid state: {state}")

        oid_str = f"{domain_id}-{entity_type}{date}-{hash_part}@{version}{state}"
        return cls(oid_str)

    @classmethod
    def from_dict(cls, d):
        try:
            return cls.from_strings(
                domain_id=d["domain_id"],
                entity_type=d["entity_type"],
                date=d["date"],
                hash_part=d["hash"],
                version=d["version"],
                state=d["state"],
            )
        except KeyError as e:
            raise ValueError(f"Missing required FlexOID field: {e}")

    def __new__(cls, value: str):
        """
        I create a new validated Flex-O ID from *value*.
        I verify that the given string matches the required pattern
        and remember the parsed match for later access.
        """
        m = cls.OID_PATTERN.match(value)
        if not m:
            raise ValueError(f"Invalid FlexOID format: {value}")
        obj = super().__new__(cls, value)
        obj._m = m
        return obj

    # ───────────────────────────────────────────
    # Parsed accessors
    # ───────────────────────────────────────────
    @property
    def domain_id(self) -> str:
        """I answer the domain prefix (e.g., 'PY_ARITH')."""
        return self._m.group("domain_id")

    @property
    def entity_type(self) -> str:
        """I answer the short entity-type code (e.g., 'ITEM')."""
        return self._m.group("entity_type")

    @property
    def date_str(self) -> str:
        """I answer the YYMMDD creation date as a string."""
        return self._m.group("date")

    @property
    def date(self):
        """I answer the creation date as a `datetime.date` instance."""
        return datetime.strptime(self.date_str, "%y%m%d").date()

    @property
    def hash_part(self) -> str:
        """I answer the twelve-hex BLAKE2s digest that stabilizes my prefix."""
        return self._m.group("hash")

    @property
    def version(self) -> int:
        """I answer my numeric version (1–999)."""
        return int(self._m.group("version"))

    @property
    def state_code(self) -> str:
        """I answer my one-letter lifecycle state code (e.g., 'D', 'A', 'S')."""
        return self._m.group("state")

    @property
    def prefix(self) -> str:
        """I answer everything before the '@' symbol — my immutable lineage prefix."""
        return self.split('@', 1)[0]

    # ───────────────────────────────────────────
    # Transformations
    # ───────────────────────────────────────────
    def with_state(self, new_state: str) -> "FlexOID":
        """
        I create a copy of myself with the same version but a new state letter.
        The given *new_state* must be a single capital letter.
        """
        if not (isinstance(new_state, str) and len(new_state) == 1 and new_state.isalpha()):
            raise ValueError("State must be a single capital letter.")
        return FlexOID(f"{self.prefix}@{self.version:03d}{new_state}")

    def __repr__(self):
        """I display myself in a developer-friendly representation."""
        return f"FlexOID({str(self)})"

    # ───────────────────────────────────────────
    # Generation helpers
    # ───────────────────────────────────────────
    @staticmethod
    def _blake_hash(text: str) -> str:
        """
        I compute a 12-hex-digit BLAKE2s digest for *text*.

        I am modern, fast, and collision-resistant.
        My output is deterministic and stable across platforms.
        """

        return hashlib.blake2s(text.encode("utf-8"), digest_size=6).hexdigest().upper()

    @staticmethod
    def generate(domain: str, entity_type: str, state: str, text: str = "", version: int = 1):
        """
        I create a new Flex-O ID.
        
        Identity is independent from content.
        The hash part is now a cryptographic random nonce.
        """

        if not 1 <= version <= FlexOID.MAX_VERSION:
            raise ValueError(f"Version {version} exceeds limit; mark obsolete.")

        if not (isinstance(state, str) and len(state) == 1 and state.isalpha()):
            raise ValueError("state must be a single capital letter.")

        date_part = datetime.now(timezone.utc).strftime("%y%m%d")

        # 12 hex characters = 48-bit nonce (same visible size as before)
        nonce = secrets.token_hex(6).upper()

        return FlexOID(f"{domain}-{entity_type}{date_part}-{nonce}@{version:03d}{state}")

    @staticmethod
    def safe_generate(domain_id, entity_type, state, text="", version=1, repo=None):
        """
        Generate a random identity and retry only if it already exists in repo.
        """

        if repo is None:
            return FlexOID.generate(domain_id, entity_type, state, text="", version=version)

        while True:
            oid = FlexOID.generate(domain_id, entity_type, state, text="", version=version)
            existing = repo.get(str(oid)) if hasattr(repo, "get") else repo.get(oid)
            if not existing:
                return oid

    @classmethod
    def next_version(cls, oid: "FlexOID") -> "FlexOID":
        """
        I create the next version within the same lineage.
        I increment the numeric version but keep the same prefix and state.
        """
        new_ver = oid.version + 1
        if cls.WARN_THRESHOLD < new_ver < cls.MAX_VERSION:
            logger.warning(f"{oid} approaching obsolescence ({new_ver}/999).")
        if new_ver > cls.MAX_VERSION:
            raise RuntimeError(f"{oid} exceeded {cls.MAX_VERSION}; mark obsolete.")
        return FlexOID(f"{oid.prefix}@{new_ver:03d}{oid.state_code}")

    @classmethod
    def from_oid_and_version(cls, oid, version):
        """
        I recreate *oid* with an explicitly given version number.
        I keep its prefix and state, but replace the numeric counter.
        """
        if not (1 <= version <= cls.MAX_VERSION):
            raise ValueError(f"Version {version} out of bounds (1..{cls.MAX_VERSION}).")
        return FlexOID(f"{oid.prefix}@{version:03d}{oid.state_code}")

    @staticmethod
    def clone_new_base(domain_id: str, entity_type: str, state: str, text: str) -> "FlexOID":
        """
        I start a completely new lineage (version 1) for a derived entity.
        I am used when copying or forking an existing object that should
        not share version history with its origin.
        """
        return FlexOID.safe_generate(domain_id, entity_type, state, text, version=1)

    def to_dict(self) -> dict:
        """
        I answer a dictionary that describes all my components —
        useful for debugging, serialization tests, or human inspection.
        """
        return {
            "domain_id": self.domain_id,
            "entity_type": self.entity_type,
            "date": self.date,
            "hash": self.hash_part,
            "version": self.version,
            "state": self.state_code,
        }
