Source code for pasted._io

"""
pasted._io
==========
XYZ format serialisation helpers.
"""

from __future__ import annotations

import math
import re
from collections import Counter
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from ._placement import Vec3


def _fmt(v: float) -> str:
    """Format a metric value: ``nan`` as the string ``'nan'``, else 4 d.p."""
    return "nan" if math.isnan(v) else f"{v:.4f}"


[docs] def format_xyz( atoms: list[str], positions: list[Vec3], charge: int, mult: int, metrics: dict[str, float], prefix: str = "", ) -> str: """Serialise a structure to the extended XYZ format. The second line (comment line) encodes *prefix*, charge, multiplicity, composition, and all metric values. Parameters ---------- atoms: Element symbols. positions: Cartesian coordinates (Å), one per atom. charge: Total system charge. mult: Spin multiplicity 2S+1. metrics: Dict of computed disorder metrics. prefix: Prepended to the comment line (e.g. ``"sample=1 mode=gas"``). Returns ------- A multi-line string (no trailing newline). """ comp = ",".join(f"{s}:{c}" for s, c in sorted(Counter(atoms).items())) metric_str = " ".join(f"{k}={_fmt(v)}" for k, v in metrics.items()) comment = (f"{prefix} charge={charge:+d} mult={mult} comp=[{comp}] {metric_str}").strip() lines = [str(len(atoms)), comment] for atom, (x, y, z) in zip(atoms, positions, strict=False): lines.append(f"{atom:<4s} {x:12.6f} {y:12.6f} {z:12.6f}") return "\n".join(lines)
[docs] def parse_xyz(text: str) -> list[tuple[list[str], list[Vec3], int, int, dict[str, float]]]: """Parse a (possibly multi-frame) XYZ string — standard or extended format. Supports both: * **Standard XYZ** — atom count line, comment line, then coordinate lines. ``charge`` defaults to 0, ``mult`` to 1, ``metrics`` is empty. * **Extended XYZ** (as written by PASTED) — the comment line may contain ``charge=+0``, ``mult=1``, and ``KEY=VALUE`` metric tokens. Parameters ---------- text: Full contents of one or more XYZ frames (concatenated). Returns ------- list of ``(atoms, positions, charge, mult, metrics)`` tuples, one per frame. Raises ------ ValueError When the atom-count line or a coordinate line cannot be parsed. """ frames = [] lines = text.splitlines() i = 0 while i < len(lines): # Skip blank lines between frames if not lines[i].strip(): i += 1 continue # --- atom count line --- try: n_atoms = int(lines[i].strip()) except ValueError as exc: raise ValueError( f"Expected atom count on line {i + 1}, got {lines[i]!r}" ) from exc i += 1 if i >= len(lines): raise ValueError("Unexpected end of file after atom count line.") # --- comment line (extended XYZ fields) --- comment = lines[i] i += 1 charge = 0 mult = 1 metrics: dict[str, float] = {} m_charge = re.search(r"\bcharge=([+-]?\d+)", comment) if m_charge: charge = int(m_charge.group(1)) m_mult = re.search(r"\bmult=(\d+)", comment) if m_mult: mult = int(m_mult.group(1)) # Parse KEY=FLOAT tokens for metrics (skip charge/mult already captured) pat = r"\b([A-Za-z_][A-Za-z0-9_]*)=([+-]?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?)" for tok in re.findall(pat, comment): key, val_str = tok if key in ("charge", "mult"): continue try: metrics[key] = float(val_str) except ValueError: pass # --- coordinate lines --- atoms: list[str] = [] positions: list[tuple[float, float, float]] = [] for _ in range(n_atoms): if i >= len(lines): raise ValueError( f"Unexpected end of file: expected {n_atoms} coordinate lines." ) parts = lines[i].split() i += 1 if len(parts) < 4: raise ValueError(f"Malformed coordinate line: {lines[i - 1]!r}") atoms.append(parts[0]) try: positions.append((float(parts[1]), float(parts[2]), float(parts[3]))) except ValueError as exc: raise ValueError(f"Non-numeric coordinate in: {lines[i - 1]!r}") from exc frames.append((atoms, positions, charge, mult, metrics)) return frames