Source code for xraylabtool.calculators.scattering_data

"""Scattering factor data loading and element path management."""

from __future__ import annotations

from pathlib import Path
from typing import Any

import numpy as np

# Pre-computed element file paths for faster access
_AVAILABLE_ELEMENTS: dict[str, Path] = {}


def _initialize_element_paths() -> None:
    """
    Pre-compute all available element file paths at module load time.
    This optimization eliminates repeated file system checks.
    """

    base_paths = [
        Path.cwd() / "src" / "AtomicScatteringFactor",
        Path(__file__).parent.parent.parent
        / "src"
        / "AtomicScatteringFactor",  # For old structure compatibility
        Path(__file__).parent.parent
        / "data"
        / "AtomicScatteringFactor",  # New structure
    ]

    for base_path in base_paths:
        if base_path.exists():
            for nff_file in base_path.glob("*.nff"):
                element = nff_file.stem.capitalize()
                if element not in _AVAILABLE_ELEMENTS:
                    _AVAILABLE_ELEMENTS[element] = nff_file



[docs]
def load_scattering_factor_data(element: str) -> Any:
    """
    Load f1/f2 scattering factor data for a specific element from .nff files.

    This function reads .nff files using CSV parsing and caches the results
    in a module-level dictionary keyed by element symbol. Returns a pandas-compatible
    data structure for accessing columns E, f1, f2.

    Args:
        element: Element symbol (e.g., 'H', 'C', 'N', 'O', 'Si', 'Ge')

    Returns:
        ScatteringData object with pandas-like interface containing columns: E (energy), f1, f2

    Raises:
        FileNotFoundError: If the .nff file for the element is not found
        ValueError: If the element symbol is invalid, empty, or file format is invalid

    Examples:
        >>> from xraylabtool.calculators.scattering_data import load_scattering_factor_data
        >>> data = load_scattering_factor_data('Si')
        >>> print(data.columns)
        ['E', 'f1', 'f2']
        >>> print(len(data) > 100)  # Verify we have enough data points
        True
    """
    from xraylabtool.calculators.cache import _scattering_factor_cache

    # Validate input
    if not element or not isinstance(element, str):
        raise ValueError(f"Element symbol must be a non-empty string, got: {element!r}")

    # Normalize element symbol (capitalize first letter, lowercase rest)
    element = element.capitalize()

    # Check if already cached
    if element in _scattering_factor_cache:
        return _scattering_factor_cache[element]

    # Use pre-computed element paths for faster access
    if element not in _AVAILABLE_ELEMENTS:
        raise FileNotFoundError(
            f"Scattering factor data file not found for element '{element}'. "
            f"Available elements: {sorted(_AVAILABLE_ELEMENTS.keys())}"
        )

    file_path = _AVAILABLE_ELEMENTS[element]

    try:
        # Read and validate header line
        with open(file_path) as _f:
            header_line = _f.readline().strip()

        header = [col.strip() for col in header_line.split(",")]
        expected_columns = {"E", "f1", "f2"}
        actual_columns = set(header)

        if not expected_columns.issubset(actual_columns):
            missing_cols = expected_columns - actual_columns
            raise ValueError(
                f"Invalid .nff file format for element '{element}'. "
                f"Missing required columns: {missing_cols}. "
                f"Found columns: {list(actual_columns)}"
            )

        # Get column indices for correct ordering
        e_idx = header.index("E")
        f1_idx = header.index("f1")
        f2_idx = header.index("f2")

        # Load entire file at C-level via np.loadtxt — 3-8x faster than csv.reader loop
        raw = np.loadtxt(file_path, delimiter=",", skiprows=1, dtype=np.float64)

        if raw.ndim == 1:
            raw = raw.reshape(1, -1)

        if len(raw) == 0:
            raise ValueError(
                "Empty scattering factor data file for element "
                f"'{element}': {file_path}"
            )

        # Re-order columns to canonical [E, f1, f2] if needed
        data_array = raw[:, [e_idx, f1_idx, f2_idx]]

        scattering_data = ScatteringData(data_array, ["E", "f1", "f2"])

        # Cache the data
        _scattering_factor_cache[element] = scattering_data

        return scattering_data

    except (OSError, ValueError) as e:
        raise ValueError(
            "Error parsing scattering factor data file for element "
            f"'{element}': {file_path}. "
            f"Expected CSV format with columns: E,f1,f2. Error: {e}"
        ) from e
    except Exception as e:
        raise RuntimeError(
            "Unexpected error loading scattering factor data for element "
            f"'{element}' from {file_path}: {e}"
        ) from e




[docs]
class ScatteringData:
    """Pandas-like interface for scattering factor data arrays."""


[docs]
    def __init__(self, data_array: np.ndarray, column_names: list[str]) -> None:
        self.data = data_array
        self.columns = column_names
        self._column_indices = {name: i for i, name in enumerate(column_names)}


    def __len__(self) -> int:
        return len(self.data)

    def __getitem__(self, column: str) -> Any:
        idx = self._column_indices[column]

        # Return object with .values attribute for compatibility
        class ColumnProxy:
            def __init__(self, data: np.ndarray) -> None:
                self.values = data

        return ColumnProxy(self.data[:, idx])




[docs]
class AtomicScatteringFactor:
    """
    Class for handling atomic scattering factors.

    This class loads and manages atomic scattering factor data
    from NFF files using the module-level cache.
    """


[docs]
    def __init__(self) -> None:
        # Maintain backward compatibility with existing tests
        self.data: dict[str, Any] = {}
        self.data_path = (
            Path(__file__).parent.parent / "data" / "AtomicScatteringFactor"
        )

        # Create data directory if it doesn't exist (for test compatibility)
        self.data_path.mkdir(parents=True, exist_ok=True)



[docs]
    def load_element_data(self, element: str) -> Any:
        """
        Load scattering factor data for a specific element.

        Args:
            element: Element symbol (e.g., 'H', 'C', 'N', 'O', 'Si', 'Ge')

        Returns:
            DataFrame containing scattering factor data with columns: E, f1, f2

        Raises:
            FileNotFoundError: If the .nff file for the element is not found
            ValueError: If the element symbol is invalid
        """
        return load_scattering_factor_data(element)



[docs]
    def get_scattering_factor(self, _element: str, q_values: np.ndarray) -> np.ndarray:
        """
        Calculate scattering factors for given q values.

        Args:
            element: Element symbol
            q_values: Array of momentum transfer values

        Returns:
            Array of scattering factor values
        """
        # Placeholder implementation
        return np.ones_like(q_values)





[docs]
class CrystalStructure:
    """
    Class for representing and manipulating crystal structures.
    """


[docs]
    def __init__(
        self, lattice_parameters: tuple[float, float, float, float, float, float]
    ):
        """
        Initialize crystal structure.

        Args:
            lattice_parameters: (a, b, c, alpha, beta, gamma) in Angstroms and degrees
        """
        self.a, self.b, self.c, self.alpha, self.beta, self.gamma = lattice_parameters
        self.atoms: list[dict[str, Any]] = []



[docs]
    def add_atom(
        self, element: str, position: tuple[float, float, float], occupancy: float = 1.0
    ) -> None:
        """
        Add an atom to the crystal structure.

        Args:
            element: Element symbol
            position: Fractional coordinates (x, y, z)
            occupancy: Site occupancy factor
        """
        self.atoms.append(
            {"element": element, "position": position, "occupancy": occupancy}
        )



[docs]
    def calculate_structure_factor(self, hkl: tuple[int, int, int]) -> complex:
        """
        Calculate structure factor for given Miller indices.

        F(hkl) = sum_j  f_j * occ_j * exp(2*pi*i*(h*x_j + k*y_j + l*z_j))

        where the sum runs over all atoms in the unit cell. The atomic form
        factor f_j is currently taken as 1.0 for all atoms (geometric structure
        factor).

        Args:
            hkl: Miller indices (h, k, l)

        Returns:
            Complex structure factor
        """
        import cmath
        import math

        h, k, miller_l = hkl
        result = complex(0.0, 0.0)
        for atom in self.atoms:
            x, y, z = atom["position"]
            occ = atom["occupancy"]
            phase = 2.0 * math.pi * (h * x + k * y + miller_l * z)
            result += occ * cmath.exp(1j * phase)
        return result





[docs]
def load_data_file(filename: str) -> Any:
    """
    Load data from various file formats commonly used in X-ray analysis.

    Args:
        filename: Path to the data file

    Returns:
        DataFrame containing the loaded data
    """
    file_path = Path(filename)

    if not file_path.exists():
        raise FileNotFoundError(f"Data file not found: {filename}")

    # Lazy import pandas only when needed
    import pandas as pd

    # Determine file format and load accordingly
    if file_path.suffix.lower() == ".csv":
        return pd.read_csv(file_path)
    elif file_path.suffix.lower() in [".txt", ".dat"]:
        return pd.read_csv(file_path, delim_whitespace=True)  # type: ignore[call-overload]
    else:
        # Try to load as generic text file
        return pd.read_csv(file_path, delim_whitespace=True)  # type: ignore[call-overload]



# Initialize element paths at module import time for performance
_initialize_element_paths()