Changeset 02d288d in flexoentity for tests/test_id_stress.py


Ignore:
Timestamp:
10/23/25 13:27:08 (3 months ago)
Author:
Enrico Schwass <ennoausberlin@…>
Branches:
master
Children:
4ceca57
Parents:
6a7dec1
Message:

improve hash generation and collision handler - move signature from FlexOID to FlexoEntity

File:
1 edited

Legend:

Unmodified
Added
Removed
  • tests/test_id_stress.py

    r6a7dec1 r02d288d  
    33Focus: collision avoidance, version ceiling, reproducibility.
    44"""
     5
    56import pytest
    67import random
     8import logging
     9from flexoentity import FlexOID, EntityType, EntityState
     10from builder.questions import RadioQuestion, AnswerOption
    711
    8 from flexoentity import FlexOID, EntityType, EntityState, Domain
     12logger = logging.getLogger(__name__)
    913
    10 from tests.conftest import DummyEntity
    11 
    12 # ──────────────────────────────────────────────────────────────────────────────
    13 def test_bulk_generation_uniqueness():
    14     """Generate 10,000 IDs and assert uniqueness (statistical test)."""
    15     domain = Domain(domain="SIG", etype=EntityType.DOMAIN, state=EntityState.DRAFT,
    16                     fullname="Signal Corps", classification="RESTRICTED", owner="MESE")
    17 
     14def test_bulk_generation_uniqueness(domain):
     15    """
     16    Generate 10,000 IDs and ensure uniqueness using safe_generate().
     17    If a collision occurs, safe_generate() must resolve it automatically
     18    via salt + date adjustment.
     19    """
    1820    etype = EntityType.QUESTION
    1921    estate = EntityState.DRAFT
    20     seeds = [f"question {i}" for i in range(10_000)]
     22    seeds = [f"question {i}" for i in range(4000000)]
    2123
    22     ids = [FlexOID.generate(domain, etype, estate, seed) for seed in seeds]
     24    # Simulate a simple in-memory repository for collision detection
     25    repo = {}
    2326
    24     assert len(ids) == len(set(ids)), "ID collisions detected in bulk generation"
     27    def repo_get(oid_str):
     28        return repo.get(str(oid_str))
    2529
     30    # Generate IDs using safe_generate
     31    ids = []
     32    for seed in seeds:
     33        oid = FlexOID.safe_generate(domain.domain, etype, estate, seed, repo=repo)
     34        assert isinstance(oid, FlexOID)
     35        ids.append(str(oid))
     36        repo[str(oid)] = oid  # register for future collision detection
    2637
    27 def test_disambiguator_trigger():
     38    unique_count = len(set(ids))
     39    total_count = len(ids)
     40    collisions = total_count - unique_count
     41
     42    logger.info(f"Generated {total_count} IDs ({collisions} collisions handled).")
     43
     44    # Assert that safe_generate avoided duplicates
     45    assert total_count == unique_count, f"Unexpected duplicate IDs ({collisions} found)"
     46
     47    # Sanity check: IDs should look canonical
     48    assert all(id_str.startswith("SIG-") for id_str in ids)
     49    assert all("@" in id_str for id_str in ids)
     50
     51def test_id_generation_is_deterministic(domain):
    2852    """
    2953    Generating the same entity twice with same inputs yields identical ID.
    3054    (No runtime disambiguation; IDs are deterministic by design.)
    3155    """
    32     domain = "AF"
    3356    etype = EntityType.QUESTION
    3457    estate = EntityState.DRAFT
    3558    text = "identical question text"
    36     id1 = FlexOID.generate(domain, etype, estate, text)
    37     id2 = FlexOID.generate(domain, etype, estate, text)
    38     # IDs must be identical, because we now enforce determinism, not randomization
     59
     60    id1 = FlexOID.generate(domain.domain, etype, estate, text)
     61    id2 = FlexOID.generate(domain.domain, etype, estate, text)
     62    # IDs must be identical because generation is deterministic
    3963    assert id1 == id2
    40     assert id1.signature == id2.signature
    4164
    4265
    43 def test_id_reproducibility_across_runs():
     66def test_id_reproducibility_across_runs(domain):
    4467    """
    4568    The same seed on a new process (fresh _seen_hashes)
    4669    should yield the same base ID (without suffix).
    4770    """
    48     domain = Domain(domain="SIG", etype=EntityType.DOMAIN, state=EntityState.DRAFT,
    49                     fullname="Signal Corps", classification="RESTRICTED")
    5071    etype = EntityType.CATALOG
    5172    estate = EntityState.DRAFT
    5273    seed = "reproducibility test seed"
    53     id1 = FlexOID.generate(domain, etype, estate, seed)
    54     # Reset hash cache
     74
     75    id1 = FlexOID.generate(domain.domain, etype, estate, seed)
    5576    FlexOID._seen_hashes.clear()
    56     id2 = FlexOID.generate(domain, etype, estate, seed)
     77    id2 = FlexOID.generate(domain.domain, etype, estate, seed)
     78
    5779    assert id1 == id2
    58     assert id1.signature == id2.signature
    5980
    6081
    61 def test_version_ceiling_enforcement():
     82def test_version_ceiling_enforcement(radio_question):
    6283    """Simulate approaching @999 to trigger obsolescence guard."""
    63     entity = DummyEntity(domain="AF", etype=EntityType.EXAM, state=EntityState.DRAFT, seed="Final Exam 2025")
    64     entity.approve()
     84    q = radio_question
     85    q.approve()
     86
    6587    # artificially bump version number to near ceiling
    66     entity.flexo_id = FlexOID.from_oid_and_version(entity.flexo_id, 998)
     88    q.flexo_id = FlexOID.from_oid_and_version(q.flexo_id, 998)
    6789
    6890    # 998 → 999 is allowed
    69     entity.sign()
    70     assert entity.flexo_id.version == 999
     91    q.sign()
     92    assert q.flexo_id.version == 999
    7193
    7294    # 999 → 1000 should raise RuntimeError
    7395    with pytest.raises(RuntimeError):
    74         entity.sign()
     96        q.sign()
    7597
    7698
    77 def test_massive_lifecycle_simulation():
     99def test_massive_lifecycle_simulation(domain):
    78100    """
    79     Generate 100 random entities, simulate multiple edits and state transitions,
     101    Generate 100 random RadioQuestions, simulate multiple edits and state transitions,
    80102    ensure all final IDs and fingerprints are unique and valid.
    81103    """
    82     entities = [DummyEntity(domain="AF", etype=EntityType.QUESTION, state=EntityState.DRAFT, seed=f"random question {i}") for i in range(100)]
     104    entities = [
     105        RadioQuestion(
     106            domain=domain,
     107            etype=EntityType.QUESTION,
     108            state=EntityState.DRAFT,
     109            text=f"random question {i}",
     110            options=[
     111                AnswerOption(id="opt4", text="HF (3–30 MHz)", points=1),
     112                AnswerOption(id="opt5", text="VHF (30–300 MHz)", points=0),
     113            ],
     114        )
     115        for i in range(100)
     116    ]
    83117
    84118    for e in entities:
    85         # random edit, approval, signing
    86         e._seed += " updated"
     119        # random edit
     120        e.text += " updated"
    87121        e._update_fingerprint()
     122
     123        # lifecycle transitions
    88124        e.approve()
    89125        if random.random() > 0.3:
     
    92128            e.publish()
    93129
    94     ids = [e.flexo_id for e in entities]
    95     fps = [e.flexo_id.signature for e in entities]
    96     assert len(ids) == len(set(ids)), "Duplicate IDs after random lifecycle"
    97     assert len(fps) == len(set(fps)), "Duplicate fingerprints after random lifecycle"
     130    flexoids = [e.flexo_id for e in entities]
     131    assert len(flexoids) == len(set(flexoids)), "Duplicate FlexOIDs after lifecycle simulation"
Note: See TracChangeset for help on using the changeset viewer.