Source code for pasted._atoms

"""
pasted._atoms
=============
Atomic data (Z → symbol mapping, Pyykkö covalent radii) and all
input-parsing / validation helpers that do not depend on numpy.
"""

from __future__ import annotations

import math
from collections import Counter

# ---------------------------------------------------------------------------
# Atomic data  Z = 1–106
# ---------------------------------------------------------------------------

ATOMIC_NUMBERS: dict[str, int] = {
    "H": 1,
    "He": 2,
    "Li": 3,
    "Be": 4,
    "B": 5,
    "C": 6,
    "N": 7,
    "O": 8,
    "F": 9,
    "Ne": 10,
    "Na": 11,
    "Mg": 12,
    "Al": 13,
    "Si": 14,
    "P": 15,
    "S": 16,
    "Cl": 17,
    "Ar": 18,
    "K": 19,
    "Ca": 20,
    "Sc": 21,
    "Ti": 22,
    "V": 23,
    "Cr": 24,
    "Mn": 25,
    "Fe": 26,
    "Co": 27,
    "Ni": 28,
    "Cu": 29,
    "Zn": 30,
    "Ga": 31,
    "Ge": 32,
    "As": 33,
    "Se": 34,
    "Br": 35,
    "Kr": 36,
    "Rb": 37,
    "Sr": 38,
    "Y": 39,
    "Zr": 40,
    "Nb": 41,
    "Mo": 42,
    "Tc": 43,
    "Ru": 44,
    "Rh": 45,
    "Pd": 46,
    "Ag": 47,
    "Cd": 48,
    "In": 49,
    "Sn": 50,
    "Sb": 51,
    "Te": 52,
    "I": 53,
    "Xe": 54,
    "Cs": 55,
    "Ba": 56,
    "La": 57,
    "Ce": 58,
    "Pr": 59,
    "Nd": 60,
    "Pm": 61,
    "Sm": 62,
    "Eu": 63,
    "Gd": 64,
    "Tb": 65,
    "Dy": 66,
    "Ho": 67,
    "Er": 68,
    "Tm": 69,
    "Yb": 70,
    "Lu": 71,
    "Hf": 72,
    "Ta": 73,
    "W": 74,
    "Re": 75,
    "Os": 76,
    "Ir": 77,
    "Pt": 78,
    "Au": 79,
    "Hg": 80,
    "Tl": 81,
    "Pb": 82,
    "Bi": 83,
    "Po": 84,
    "At": 85,
    "Rn": 86,
    "Fr": 87,
    "Ra": 88,
    "Ac": 89,
    "Th": 90,
    "Pa": 91,
    "U": 92,
    "Np": 93,
    "Pu": 94,
    "Am": 95,
    "Cm": 96,
    "Bk": 97,
    "Cf": 98,
    "Es": 99,
    "Fm": 100,
    "Md": 101,
    "No": 102,
    "Lr": 103,
    "Rf": 104,
    "Db": 105,
    "Sg": 106,
}

_Z_TO_SYM: dict[int, str] = {v: k for k, v in ATOMIC_NUMBERS.items()}
_ALL_Z: list[int] = sorted(_Z_TO_SYM)

ALL_METRICS: frozenset[str] = frozenset(
    {
        "H_atom",
        "H_spatial",
        "H_total",
        "RDF_dev",
        "shape_aniso",
        "Q4",
        "Q6",
        "Q8",
        "graph_lcc",
        "graph_cc",
        # MM-level structural descriptors (added in 0.1.9)
        "ring_fraction",
        "charge_frustration",
        "moran_I_chi",
    }
)

# ---------------------------------------------------------------------------
# Pyykkö single-bond covalent radii (Å), Z = 1–86
# Reference: Pyykkö & Atsumi, Chem. Eur. J. 15 (2009) 186–197
# ---------------------------------------------------------------------------

_COV_RADII_ANG: dict[str, float] = {
    "H": 0.32,
    "He": 0.46,
    "Li": 1.33,
    "Be": 1.02,
    "B": 0.85,
    "C": 0.75,
    "N": 0.71,
    "O": 0.63,
    "F": 0.64,
    "Ne": 0.67,
    "Na": 1.55,
    "Mg": 1.39,
    "Al": 1.26,
    "Si": 1.16,
    "P": 1.11,
    "S": 1.03,
    "Cl": 0.99,
    "Ar": 0.96,
    "K": 1.96,
    "Ca": 1.71,
    "Sc": 1.48,
    "Ti": 1.36,
    "V": 1.34,
    "Cr": 1.22,
    "Mn": 1.19,
    "Fe": 1.16,
    "Co": 1.11,
    "Ni": 1.10,
    "Cu": 1.12,
    "Zn": 1.18,
    "Ga": 1.24,
    "Ge": 1.24,
    "As": 1.21,
    "Se": 1.16,
    "Br": 1.14,
    "Kr": 1.17,
    "Rb": 2.10,
    "Sr": 1.85,
    "Y": 1.63,
    "Zr": 1.54,
    "Nb": 1.47,
    "Mo": 1.38,
    "Tc": 1.28,
    "Ru": 1.25,
    "Rh": 1.25,
    "Pd": 1.20,
    "Ag": 1.28,
    "Cd": 1.36,
    "In": 1.42,
    "Sn": 1.40,
    "Sb": 1.40,
    "Te": 1.36,
    "I": 1.33,
    "Xe": 1.31,
    "Cs": 2.32,
    "Ba": 1.96,
    "La": 1.80,
    "Ce": 1.63,
    "Pr": 1.76,
    "Nd": 1.74,
    "Pm": 1.73,
    "Sm": 1.72,
    "Eu": 1.68,
    "Gd": 1.69,
    "Tb": 1.68,
    "Dy": 1.67,
    "Ho": 1.66,
    "Er": 1.65,
    "Tm": 1.64,
    "Yb": 1.70,
    "Lu": 1.62,
    "Hf": 1.52,
    "Ta": 1.46,
    "W": 1.37,
    "Re": 1.31,
    "Os": 1.29,
    "Ir": 1.22,
    "Pt": 1.23,
    "Au": 1.24,
    "Hg": 1.33,
    "Tl": 1.44,
    "Pb": 1.44,
    "Bi": 1.51,
    "Po": 1.45,
    "At": 1.47,
    "Rn": 1.42,
}

# Z > 86: no literature single-bond radii available.
# Proxy: same-group nearest lighter element.
_COV_RADII_PROXY: dict[str, str] = {
    "Fr": "Cs",  # group  1
    "Ra": "Ba",  # group  2
    "Ac": "La",  # group  3
    # Actinides (Th–Lr) → corresponding lanthanides (Ce–Lu)
    "Th": "Ce",
    "Pa": "Pr",
    "U": "Nd",
    "Np": "Pm",
    "Pu": "Sm",
    "Am": "Eu",
    "Cm": "Gd",
    "Bk": "Tb",
    "Cf": "Dy",
    "Es": "Ho",
    "Fm": "Er",
    "Md": "Tm",
    "No": "Yb",
    "Lr": "Lu",
    # Period-7 d-block → Period-6 d-block (same group)
    "Rf": "Hf",
    "Db": "Ta",
    "Sg": "W",
}



[docs]
def cov_radius_ang(sym: str) -> float:
    """Return the Pyykkö single-bond covalent radius in Å for *sym*.

    For Z > 86 the same-group nearest lighter element is used as a proxy
    (e.g. Fr → Cs, U → Nd, Rf → Hf).
    """
    r = _COV_RADII_ANG.get(sym)
    if r is not None:
        return r
    proxy = _COV_RADII_PROXY.get(sym)
    if proxy is not None:
        return _COV_RADII_ANG[proxy]
    return 1.50  # ultimate fallback (should never be reached for Z ≤ 106)



# Keep the private alias used in other modules that were not yet updated.
_cov_radius_ang = cov_radius_ang


# ---------------------------------------------------------------------------
# Pauling electronegativity  (Allen scale not used — Pauling per user choice)
# Reference: Pauling, L. The Nature of the Chemical Bond, 3rd ed. (1960).
#            IUPAC 2016 recommended values used for updates.
# Noble gases and most actinides lack Pauling values; fallback = 1.0.
# ---------------------------------------------------------------------------

_PAULING_EN: dict[str, float] = {
    "H": 2.20, "Li": 0.98, "Be": 1.57, "B": 2.04, "C": 2.55,
    "N": 3.04, "O": 3.44, "F": 3.98, "Na": 0.93, "Mg": 1.31,
    "Al": 1.61, "Si": 1.90, "P": 2.19, "S": 2.58, "Cl": 3.16,
    "K": 0.82, "Ca": 1.00, "Sc": 1.36, "Ti": 1.54, "V": 1.63,
    "Cr": 1.66, "Mn": 1.55, "Fe": 1.83, "Co": 1.88, "Ni": 1.91,
    "Cu": 1.90, "Zn": 1.65, "Ga": 1.81, "Ge": 2.01, "As": 2.18,
    "Se": 2.55, "Br": 2.96, "Rb": 0.82, "Sr": 0.95, "Y": 1.22,
    "Zr": 1.33, "Nb": 1.60, "Mo": 2.16, "Tc": 1.90, "Ru": 2.20,
    "Rh": 2.28, "Pd": 2.20, "Ag": 1.93, "Cd": 1.69, "In": 1.78,
    "Sn": 1.96, "Sb": 2.05, "Te": 2.10, "I": 2.66, "Cs": 0.79,
    "Ba": 0.89, "La": 1.10, "Ce": 1.12, "Pr": 1.13, "Nd": 1.14,
    "Pm": 1.13, "Sm": 1.17, "Eu": 1.20, "Gd": 1.20, "Tb": 1.10,
    "Dy": 1.22, "Ho": 1.23, "Er": 1.24, "Tm": 1.25, "Yb": 1.10,
    "Lu": 1.27, "Hf": 1.30, "Ta": 1.50, "W": 2.36, "Re": 1.90,
    "Os": 2.20, "Ir": 2.20, "Pt": 2.28, "Au": 2.54, "Hg": 2.00,
    "Tl": 1.62, "Pb": 2.33, "Bi": 2.02, "Po": 2.00, "At": 2.20,
    # Z > 86: no reliable Pauling values — use group-based proxies
    # Fr, Ra: alkali/alkaline-earth → Cs, Ba values
    "Fr": 0.70, "Ra": 0.90,
    # Ac-series: approximate from lanthanide analogues
    "Ac": 1.10, "Th": 1.30, "Pa": 1.50, "U": 1.38, "Np": 1.36,
    "Pu": 1.28, "Am": 1.13, "Cm": 1.28, "Bk": 1.30, "Cf": 1.30,
    "Es": 1.30, "Fm": 1.30, "Md": 1.30, "No": 1.30, "Lr": 1.30,
    # Period-7 d-block: no published values — use Period-6 analogue
    "Rf": 1.30, "Db": 1.50, "Sg": 2.36,
    # Noble gases: He, Ne, Ar — no experimental Pauling value;
    # assigned 4.0 (maximum) to model complete resistance to electron donation.
    # Kr and Xe can form compounds (e.g. XeF2, KrF2) and have literature
    # estimates on the Allen/Allred-Rochow scale: Kr ≈ 3.0, Xe ≈ 2.6.
    # Rn: no reliable data; conservatively set to 4.0.
    "He": 4.0, "Ne": 4.0, "Ar": 4.0, "Kr": 3.0, "Xe": 2.6, "Rn": 4.0,
}

#: Fallback Pauling electronegativity for elements without a literature value
#: (any symbol not in the table; Kr/Xe/other noble gases have explicit entries).
PAULING_EN_FALLBACK: float = 1.0



[docs]
def pauling_electronegativity(sym: str) -> float:
    """Return the Pauling electronegativity for element *sym*.

    Values follow Pauling (1960) with IUPAC 2016 updates.  Noble gases
    with no known compounds (He, Ne, Ar, Rn) are assigned 4.0 to model
    complete resistance to electron donation.  Kr (≈ 3.0) and Xe (≈ 2.6)
    use literature estimates from the Allen / Allred-Rochow scale, reflecting
    their known tendency to form compounds (KrF₂, XeF₂, etc.).
    Any symbol not in the table returns :data:`PAULING_EN_FALLBACK` (1.0).

    Parameters
    ----------
    sym:
        Element symbol (case-sensitive, e.g. ``"Fe"``).

    Returns
    -------
    float
        Pauling electronegativity.  Noble gases return 4.0;
        any other element without a value returns 1.0.
    """
    return _PAULING_EN.get(sym, PAULING_EN_FALLBACK)


# ---------------------------------------------------------------------------
# Element-pool helpers
# ---------------------------------------------------------------------------



[docs]
def parse_element_spec(spec: str) -> list[str]:
    """Parse an atomic-number spec string into a sorted list of element symbols.

    Syntax
    ------
    ``"1-30"``       Z = 1 through 30
    ``"6,7,8"``      Z = 6, 7, 8
    ``"1-10,26,28"`` Z = 1–10 plus Z = 26 and 28

    Raises
    ------
    ValueError
        On malformed input or unsupported Z values.
    """
    z_set: set[int] = set()
    for token in spec.split(","):
        token = token.strip()  # noqa: PLW2901
        if not token:
            continue
        if "-" in token:
            lo_s, hi_s = token.split("-", 1)
            lo, hi = int(lo_s), int(hi_s)
            if lo > hi:
                raise ValueError(f"Range {token!r}: lower > upper.")
            z_set.update(range(lo, hi + 1))
        else:
            z_set.add(int(token))
    symbols: list[str] = []
    for z in sorted(z_set):
        if z not in _Z_TO_SYM:
            raise ValueError(f"Z={z} not supported (supported range: {_ALL_Z[0]}–{_ALL_Z[-1]})")
        symbols.append(_Z_TO_SYM[z])
    if not symbols:
        raise ValueError("Element specification resolved to an empty pool.")
    return symbols




[docs]
def default_element_pool() -> list[str]:
    """Return all supported element symbols (Z = 1–106), sorted by Z."""
    return [_Z_TO_SYM[z] for z in _ALL_Z]



# ---------------------------------------------------------------------------
# Range / filter parsers
# ---------------------------------------------------------------------------



[docs]
def parse_lo_hi(s: str, name: str = "range") -> tuple[float, float]:
    """Parse ``"LO:HI"`` → ``(float, float)``."""
    parts = s.split(":")
    if len(parts) != 2:
        raise ValueError(f"--{name} must be 'LO:HI', got {s!r}")
    return float(parts[0]), float(parts[1])




[docs]
def parse_int_range(s: str) -> tuple[int, int]:
    """Parse ``"MIN:MAX"`` → ``(int, int)`` with MIN ≥ 1 and MIN ≤ MAX."""
    parts = s.split(":")
    if len(parts) != 2:
        raise ValueError(f"Must be 'MIN:MAX', got {s!r}")
    lo, hi = int(parts[0]), int(parts[1])
    if lo < 1 or lo > hi:
        raise ValueError(f"MIN must be ≥ 1 and ≤ MAX, got {s!r}")
    return lo, hi




[docs]
def parse_filter(f: str) -> tuple[str, float, float]:
    """Parse ``"METRIC:MIN:MAX"`` → ``(metric, lo, hi)``.

    Use ``"-"`` for an open bound.

    Raises
    ------
    ValueError
        On unknown metric or malformed string.
    """
    parts = f.split(":")
    if len(parts) != 3:
        raise ValueError(f"Expected 'METRIC:MIN:MAX', got {f!r}")
    metric, lo_s, hi_s = parts
    if metric not in ALL_METRICS:
        raise ValueError(f"Unknown metric {metric!r}. Valid metrics: {sorted(ALL_METRICS)}")
    lo = -math.inf if lo_s.strip() == "-" else float(lo_s)
    hi = math.inf if hi_s.strip() == "-" else float(hi_s)
    if lo > hi:
        raise ValueError(f"Filter {f!r}: MIN > MAX.")
    return metric, lo, hi



# ---------------------------------------------------------------------------
# Charge / multiplicity validation
# ---------------------------------------------------------------------------



[docs]
def validate_charge_mult(atoms_list: list[str], charge: int, mult: int) -> tuple[bool, str]:
    """Check electron count and spin-parity for *atoms_list*.

    Returns
    -------
    (ok, message)
        *ok* is ``True`` when both conditions pass.
    """
    total_z = sum(ATOMIC_NUMBERS[a] for a in atoms_list)
    n_e = total_z - charge
    if n_e <= 0:
        return False, (f"n_electrons={n_e} (total_Z={total_z}, charge={charge:+d}).")
    n_up = mult - 1
    if (n_e % 2) != (n_up % 2):
        return False, (
            f"parity mismatch: n_electrons={n_e} (charge={charge:+d}), "
            f"mult={mult} → n_unpaired={n_up}."
        )
    comp = " ".join(f"{s}:{c}" for s, c in sorted(Counter(atoms_list).items()))
    return True, (
        f"n_atoms={len(atoms_list)} total_Z={total_z} "
        f"n_electrons={n_e} charge={charge:+d} mult={mult} comp=[{comp}]"
    )