Source code for pasted._atoms

"""
pasted._atoms
=============
Atomic data (Z → symbol mapping, Pyykkö covalent radii) and all
input-parsing / validation helpers that do not depend on numpy.
"""

from __future__ import annotations

import math
from collections import Counter

# ---------------------------------------------------------------------------
# Atomic data  Z = 1–106
# ---------------------------------------------------------------------------

ATOMIC_NUMBERS: dict[str, int] = {
    "H": 1,
    "He": 2,
    "Li": 3,
    "Be": 4,
    "B": 5,
    "C": 6,
    "N": 7,
    "O": 8,
    "F": 9,
    "Ne": 10,
    "Na": 11,
    "Mg": 12,
    "Al": 13,
    "Si": 14,
    "P": 15,
    "S": 16,
    "Cl": 17,
    "Ar": 18,
    "K": 19,
    "Ca": 20,
    "Sc": 21,
    "Ti": 22,
    "V": 23,
    "Cr": 24,
    "Mn": 25,
    "Fe": 26,
    "Co": 27,
    "Ni": 28,
    "Cu": 29,
    "Zn": 30,
    "Ga": 31,
    "Ge": 32,
    "As": 33,
    "Se": 34,
    "Br": 35,
    "Kr": 36,
    "Rb": 37,
    "Sr": 38,
    "Y": 39,
    "Zr": 40,
    "Nb": 41,
    "Mo": 42,
    "Tc": 43,
    "Ru": 44,
    "Rh": 45,
    "Pd": 46,
    "Ag": 47,
    "Cd": 48,
    "In": 49,
    "Sn": 50,
    "Sb": 51,
    "Te": 52,
    "I": 53,
    "Xe": 54,
    "Cs": 55,
    "Ba": 56,
    "La": 57,
    "Ce": 58,
    "Pr": 59,
    "Nd": 60,
    "Pm": 61,
    "Sm": 62,
    "Eu": 63,
    "Gd": 64,
    "Tb": 65,
    "Dy": 66,
    "Ho": 67,
    "Er": 68,
    "Tm": 69,
    "Yb": 70,
    "Lu": 71,
    "Hf": 72,
    "Ta": 73,
    "W": 74,
    "Re": 75,
    "Os": 76,
    "Ir": 77,
    "Pt": 78,
    "Au": 79,
    "Hg": 80,
    "Tl": 81,
    "Pb": 82,
    "Bi": 83,
    "Po": 84,
    "At": 85,
    "Rn": 86,
    "Fr": 87,
    "Ra": 88,
    "Ac": 89,
    "Th": 90,
    "Pa": 91,
    "U": 92,
    "Np": 93,
    "Pu": 94,
    "Am": 95,
    "Cm": 96,
    "Bk": 97,
    "Cf": 98,
    "Es": 99,
    "Fm": 100,
    "Md": 101,
    "No": 102,
    "Lr": 103,
    "Rf": 104,
    "Db": 105,
    "Sg": 106,
}

_Z_TO_SYM: dict[int, str] = {v: k for k, v in ATOMIC_NUMBERS.items()}
_ALL_Z: list[int] = sorted(_Z_TO_SYM)

ALL_METRICS: frozenset[str] = frozenset(
    {
        "H_atom",
        "H_spatial",
        "H_total",
        "RDF_dev",
        "shape_aniso",
        "Q4",
        "Q6",
        "Q8",
        "graph_lcc",
        "graph_cc",
        # MM-level structural descriptors (added in 0.1.9)
        "ring_fraction",
        "charge_frustration",
        "moran_I_chi",
    }
)

# ---------------------------------------------------------------------------
# Pyykkö single-bond covalent radii (Å), Z = 1–86
# Reference: Pyykkö & Atsumi, Chem. Eur. J. 15 (2009) 186–197
# ---------------------------------------------------------------------------

_COV_RADII_ANG: dict[str, float] = {
    "H": 0.32,
    "He": 0.46,
    "Li": 1.33,
    "Be": 1.02,
    "B": 0.85,
    "C": 0.75,
    "N": 0.71,
    "O": 0.63,
    "F": 0.64,
    "Ne": 0.67,
    "Na": 1.55,
    "Mg": 1.39,
    "Al": 1.26,
    "Si": 1.16,
    "P": 1.11,
    "S": 1.03,
    "Cl": 0.99,
    "Ar": 0.96,
    "K": 1.96,
    "Ca": 1.71,
    "Sc": 1.48,
    "Ti": 1.36,
    "V": 1.34,
    "Cr": 1.22,
    "Mn": 1.19,
    "Fe": 1.16,
    "Co": 1.11,
    "Ni": 1.10,
    "Cu": 1.12,
    "Zn": 1.18,
    "Ga": 1.24,
    "Ge": 1.24,
    "As": 1.21,
    "Se": 1.16,
    "Br": 1.14,
    "Kr": 1.17,
    "Rb": 2.10,
    "Sr": 1.85,
    "Y": 1.63,
    "Zr": 1.54,
    "Nb": 1.47,
    "Mo": 1.38,
    "Tc": 1.28,
    "Ru": 1.25,
    "Rh": 1.25,
    "Pd": 1.20,
    "Ag": 1.28,
    "Cd": 1.36,
    "In": 1.42,
    "Sn": 1.40,
    "Sb": 1.40,
    "Te": 1.36,
    "I": 1.33,
    "Xe": 1.31,
    "Cs": 2.32,
    "Ba": 1.96,
    "La": 1.80,
    "Ce": 1.63,
    "Pr": 1.76,
    "Nd": 1.74,
    "Pm": 1.73,
    "Sm": 1.72,
    "Eu": 1.68,
    "Gd": 1.69,
    "Tb": 1.68,
    "Dy": 1.67,
    "Ho": 1.66,
    "Er": 1.65,
    "Tm": 1.64,
    "Yb": 1.70,
    "Lu": 1.62,
    "Hf": 1.52,
    "Ta": 1.46,
    "W": 1.37,
    "Re": 1.31,
    "Os": 1.29,
    "Ir": 1.22,
    "Pt": 1.23,
    "Au": 1.24,
    "Hg": 1.33,
    "Tl": 1.44,
    "Pb": 1.44,
    "Bi": 1.51,
    "Po": 1.45,
    "At": 1.47,
    "Rn": 1.42,
}

# Z > 86: no literature single-bond radii available.
# Proxy: same-group nearest lighter element.
_COV_RADII_PROXY: dict[str, str] = {
    "Fr": "Cs",  # group  1
    "Ra": "Ba",  # group  2
    "Ac": "La",  # group  3
    # Actinides (Th–Lr) → corresponding lanthanides (Ce–Lu)
    "Th": "Ce",
    "Pa": "Pr",
    "U": "Nd",
    "Np": "Pm",
    "Pu": "Sm",
    "Am": "Eu",
    "Cm": "Gd",
    "Bk": "Tb",
    "Cf": "Dy",
    "Es": "Ho",
    "Fm": "Er",
    "Md": "Tm",
    "No": "Yb",
    "Lr": "Lu",
    # Period-7 d-block → Period-6 d-block (same group)
    "Rf": "Hf",
    "Db": "Ta",
    "Sg": "W",
}


[docs] def cov_radius_ang(sym: str) -> float: """Return the Pyykkö single-bond covalent radius in Å for *sym*. For Z > 86 the same-group nearest lighter element is used as a proxy (e.g. Fr → Cs, U → Nd, Rf → Hf). """ r = _COV_RADII_ANG.get(sym) if r is not None: return r proxy = _COV_RADII_PROXY.get(sym) if proxy is not None: return _COV_RADII_ANG[proxy] return 1.50 # ultimate fallback (should never be reached for Z ≤ 106)
# Keep the private alias used in other modules that were not yet updated. _cov_radius_ang = cov_radius_ang # --------------------------------------------------------------------------- # Pauling electronegativity (Allen scale not used — Pauling per user choice) # Reference: Pauling, L. The Nature of the Chemical Bond, 3rd ed. (1960). # IUPAC 2016 recommended values used for updates. # Noble gases and most actinides lack Pauling values; fallback = 1.0. # --------------------------------------------------------------------------- _PAULING_EN: dict[str, float] = { "H": 2.20, "Li": 0.98, "Be": 1.57, "B": 2.04, "C": 2.55, "N": 3.04, "O": 3.44, "F": 3.98, "Na": 0.93, "Mg": 1.31, "Al": 1.61, "Si": 1.90, "P": 2.19, "S": 2.58, "Cl": 3.16, "K": 0.82, "Ca": 1.00, "Sc": 1.36, "Ti": 1.54, "V": 1.63, "Cr": 1.66, "Mn": 1.55, "Fe": 1.83, "Co": 1.88, "Ni": 1.91, "Cu": 1.90, "Zn": 1.65, "Ga": 1.81, "Ge": 2.01, "As": 2.18, "Se": 2.55, "Br": 2.96, "Rb": 0.82, "Sr": 0.95, "Y": 1.22, "Zr": 1.33, "Nb": 1.60, "Mo": 2.16, "Tc": 1.90, "Ru": 2.20, "Rh": 2.28, "Pd": 2.20, "Ag": 1.93, "Cd": 1.69, "In": 1.78, "Sn": 1.96, "Sb": 2.05, "Te": 2.10, "I": 2.66, "Cs": 0.79, "Ba": 0.89, "La": 1.10, "Ce": 1.12, "Pr": 1.13, "Nd": 1.14, "Pm": 1.13, "Sm": 1.17, "Eu": 1.20, "Gd": 1.20, "Tb": 1.10, "Dy": 1.22, "Ho": 1.23, "Er": 1.24, "Tm": 1.25, "Yb": 1.10, "Lu": 1.27, "Hf": 1.30, "Ta": 1.50, "W": 2.36, "Re": 1.90, "Os": 2.20, "Ir": 2.20, "Pt": 2.28, "Au": 2.54, "Hg": 2.00, "Tl": 1.62, "Pb": 2.33, "Bi": 2.02, "Po": 2.00, "At": 2.20, # Z > 86: no reliable Pauling values — use group-based proxies # Fr, Ra: alkali/alkaline-earth → Cs, Ba values "Fr": 0.70, "Ra": 0.90, # Ac-series: approximate from lanthanide analogues "Ac": 1.10, "Th": 1.30, "Pa": 1.50, "U": 1.38, "Np": 1.36, "Pu": 1.28, "Am": 1.13, "Cm": 1.28, "Bk": 1.30, "Cf": 1.30, "Es": 1.30, "Fm": 1.30, "Md": 1.30, "No": 1.30, "Lr": 1.30, # Period-7 d-block: no published values — use Period-6 analogue "Rf": 1.30, "Db": 1.50, "Sg": 2.36, # Noble gases: He, Ne, Ar — no experimental Pauling value; # assigned 4.0 (maximum) to model complete resistance to electron donation. # Kr and Xe can form compounds (e.g. XeF2, KrF2) and have literature # estimates on the Allen/Allred-Rochow scale: Kr ≈ 3.0, Xe ≈ 2.6. # Rn: no reliable data; conservatively set to 4.0. "He": 4.0, "Ne": 4.0, "Ar": 4.0, "Kr": 3.0, "Xe": 2.6, "Rn": 4.0, } #: Fallback Pauling electronegativity for elements without a literature value #: (any symbol not in the table; Kr/Xe/other noble gases have explicit entries). PAULING_EN_FALLBACK: float = 1.0
[docs] def pauling_electronegativity(sym: str) -> float: """Return the Pauling electronegativity for element *sym*. Values follow Pauling (1960) with IUPAC 2016 updates. Noble gases with no known compounds (He, Ne, Ar, Rn) are assigned 4.0 to model complete resistance to electron donation. Kr (≈ 3.0) and Xe (≈ 2.6) use literature estimates from the Allen / Allred-Rochow scale, reflecting their known tendency to form compounds (KrF₂, XeF₂, etc.). Any symbol not in the table returns :data:`PAULING_EN_FALLBACK` (1.0). Parameters ---------- sym: Element symbol (case-sensitive, e.g. ``"Fe"``). Returns ------- float Pauling electronegativity. Noble gases return 4.0; any other element without a value returns 1.0. """ return _PAULING_EN.get(sym, PAULING_EN_FALLBACK)
# --------------------------------------------------------------------------- # Element-pool helpers # ---------------------------------------------------------------------------
[docs] def parse_element_spec(spec: str) -> list[str]: """Parse an atomic-number spec string into a sorted list of element symbols. Syntax ------ ``"1-30"`` Z = 1 through 30 ``"6,7,8"`` Z = 6, 7, 8 ``"1-10,26,28"`` Z = 1–10 plus Z = 26 and 28 Raises ------ ValueError On malformed input or unsupported Z values. """ z_set: set[int] = set() for token in spec.split(","): token = token.strip() # noqa: PLW2901 if not token: continue if "-" in token: lo_s, hi_s = token.split("-", 1) lo, hi = int(lo_s), int(hi_s) if lo > hi: raise ValueError(f"Range {token!r}: lower > upper.") z_set.update(range(lo, hi + 1)) else: z_set.add(int(token)) symbols: list[str] = [] for z in sorted(z_set): if z not in _Z_TO_SYM: raise ValueError(f"Z={z} not supported (supported range: {_ALL_Z[0]}{_ALL_Z[-1]})") symbols.append(_Z_TO_SYM[z]) if not symbols: raise ValueError("Element specification resolved to an empty pool.") return symbols
[docs] def default_element_pool() -> list[str]: """Return all supported element symbols (Z = 1–106), sorted by Z.""" return [_Z_TO_SYM[z] for z in _ALL_Z]
# --------------------------------------------------------------------------- # Range / filter parsers # ---------------------------------------------------------------------------
[docs] def parse_lo_hi(s: str, name: str = "range") -> tuple[float, float]: """Parse ``"LO:HI"`` → ``(float, float)``.""" parts = s.split(":") if len(parts) != 2: raise ValueError(f"--{name} must be 'LO:HI', got {s!r}") return float(parts[0]), float(parts[1])
[docs] def parse_int_range(s: str) -> tuple[int, int]: """Parse ``"MIN:MAX"`` → ``(int, int)`` with MIN ≥ 1 and MIN ≤ MAX.""" parts = s.split(":") if len(parts) != 2: raise ValueError(f"Must be 'MIN:MAX', got {s!r}") lo, hi = int(parts[0]), int(parts[1]) if lo < 1 or lo > hi: raise ValueError(f"MIN must be ≥ 1 and ≤ MAX, got {s!r}") return lo, hi
[docs] def parse_filter(f: str) -> tuple[str, float, float]: """Parse ``"METRIC:MIN:MAX"`` → ``(metric, lo, hi)``. Use ``"-"`` for an open bound. Raises ------ ValueError On unknown metric or malformed string. """ parts = f.split(":") if len(parts) != 3: raise ValueError(f"Expected 'METRIC:MIN:MAX', got {f!r}") metric, lo_s, hi_s = parts if metric not in ALL_METRICS: raise ValueError(f"Unknown metric {metric!r}. Valid metrics: {sorted(ALL_METRICS)}") lo = -math.inf if lo_s.strip() == "-" else float(lo_s) hi = math.inf if hi_s.strip() == "-" else float(hi_s) if lo > hi: raise ValueError(f"Filter {f!r}: MIN > MAX.") return metric, lo, hi
# --------------------------------------------------------------------------- # Charge / multiplicity validation # ---------------------------------------------------------------------------
[docs] def validate_charge_mult(atoms_list: list[str], charge: int, mult: int) -> tuple[bool, str]: """Check electron count and spin-parity for *atoms_list*. Returns ------- (ok, message) *ok* is ``True`` when both conditions pass. """ total_z = sum(ATOMIC_NUMBERS[a] for a in atoms_list) n_e = total_z - charge if n_e <= 0: return False, (f"n_electrons={n_e} (total_Z={total_z}, charge={charge:+d}).") n_up = mult - 1 if (n_e % 2) != (n_up % 2): return False, ( f"parity mismatch: n_electrons={n_e} (charge={charge:+d}), " f"mult={mult} → n_unpaired={n_up}." ) comp = " ".join(f"{s}:{c}" for s, c in sorted(Counter(atoms_list).items())) return True, ( f"n_atoms={len(atoms_list)} total_Z={total_z} " f"n_electrons={n_e} charge={charge:+d} mult={mult} comp=[{comp}]" )