"""
pasted._io
==========
XYZ format serialisation helpers.
"""
from __future__ import annotations
import math
import re
from collections import Counter
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from ._placement import Vec3
def _fmt(v: float) -> str:
"""Format a metric value: ``nan`` as the string ``'nan'``, else 4 d.p."""
return "nan" if math.isnan(v) else f"{v:.4f}"
[docs]
def parse_xyz(text: str) -> list[tuple[list[str], list[Vec3], int, int, dict[str, float]]]:
"""Parse a (possibly multi-frame) XYZ string — standard or extended format.
Supports both:
* **Standard XYZ** — atom count line, comment line, then coordinate lines.
``charge`` defaults to 0, ``mult`` to 1, ``metrics`` is empty.
* **Extended XYZ** (as written by PASTED) — the comment line may contain
``charge=+0``, ``mult=1``, and ``KEY=VALUE`` metric tokens.
Parameters
----------
text:
Full contents of one or more XYZ frames (concatenated).
Returns
-------
list of ``(atoms, positions, charge, mult, metrics)`` tuples, one per frame.
Raises
------
ValueError
When the atom-count line or a coordinate line cannot be parsed.
"""
frames = []
lines = text.splitlines()
i = 0
while i < len(lines):
# Skip blank lines between frames
if not lines[i].strip():
i += 1
continue
# --- atom count line ---
try:
n_atoms = int(lines[i].strip())
except ValueError as exc:
raise ValueError(
f"Expected atom count on line {i + 1}, got {lines[i]!r}"
) from exc
i += 1
if i >= len(lines):
raise ValueError("Unexpected end of file after atom count line.")
# --- comment line (extended XYZ fields) ---
comment = lines[i]
i += 1
charge = 0
mult = 1
metrics: dict[str, float] = {}
m_charge = re.search(r"\bcharge=([+-]?\d+)", comment)
if m_charge:
charge = int(m_charge.group(1))
m_mult = re.search(r"\bmult=(\d+)", comment)
if m_mult:
mult = int(m_mult.group(1))
# Parse KEY=FLOAT tokens for metrics (skip charge/mult already captured)
pat = r"\b([A-Za-z_][A-Za-z0-9_]*)=([+-]?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?)"
for tok in re.findall(pat, comment):
key, val_str = tok
if key in ("charge", "mult"):
continue
try:
metrics[key] = float(val_str)
except ValueError:
pass
# --- coordinate lines ---
atoms: list[str] = []
positions: list[tuple[float, float, float]] = []
for _ in range(n_atoms):
if i >= len(lines):
raise ValueError(
f"Unexpected end of file: expected {n_atoms} coordinate lines."
)
parts = lines[i].split()
i += 1
if len(parts) < 4:
raise ValueError(f"Malformed coordinate line: {lines[i - 1]!r}")
atoms.append(parts[0])
try:
positions.append((float(parts[1]), float(parts[2]), float(parts[3])))
except ValueError as exc:
raise ValueError(f"Non-numeric coordinate in: {lines[i - 1]!r}") from exc
frames.append((atoms, positions, charge, mult, metrics))
return frames