Source code for xraylabtool.calculators.scattering_data

"""Scattering factor data loading and element path management."""

from __future__ import annotations

from pathlib import Path
from typing import Any

import numpy as np

# Pre-computed element file paths for faster access
_AVAILABLE_ELEMENTS: dict[str, Path] = {}


def _initialize_element_paths() -> None:
    """
    Pre-compute all available element file paths at module load time.
    This optimization eliminates repeated file system checks.
    """

    base_paths = [
        Path.cwd() / "src" / "AtomicScatteringFactor",
        Path(__file__).parent.parent.parent
        / "src"
        / "AtomicScatteringFactor",  # For old structure compatibility
        Path(__file__).parent.parent
        / "data"
        / "AtomicScatteringFactor",  # New structure
    ]

    for base_path in base_paths:
        if base_path.exists():
            for nff_file in base_path.glob("*.nff"):
                element = nff_file.stem.capitalize()
                if element not in _AVAILABLE_ELEMENTS:
                    _AVAILABLE_ELEMENTS[element] = nff_file


[docs] def load_scattering_factor_data(element: str) -> Any: """ Load f1/f2 scattering factor data for a specific element from .nff files. This function reads .nff files using CSV parsing and caches the results in a module-level dictionary keyed by element symbol. Returns a pandas-compatible data structure for accessing columns E, f1, f2. Args: element: Element symbol (e.g., 'H', 'C', 'N', 'O', 'Si', 'Ge') Returns: ScatteringData object with pandas-like interface containing columns: E (energy), f1, f2 Raises: FileNotFoundError: If the .nff file for the element is not found ValueError: If the element symbol is invalid, empty, or file format is invalid Examples: >>> from xraylabtool.calculators.scattering_data import load_scattering_factor_data >>> data = load_scattering_factor_data('Si') >>> print(data.columns) ['E', 'f1', 'f2'] >>> print(len(data) > 100) # Verify we have enough data points True """ from xraylabtool.calculators.cache import _scattering_factor_cache # Validate input if not element or not isinstance(element, str): raise ValueError(f"Element symbol must be a non-empty string, got: {element!r}") # Normalize element symbol (capitalize first letter, lowercase rest) element = element.capitalize() # Check if already cached if element in _scattering_factor_cache: return _scattering_factor_cache[element] # Use pre-computed element paths for faster access if element not in _AVAILABLE_ELEMENTS: raise FileNotFoundError( f"Scattering factor data file not found for element '{element}'. " f"Available elements: {sorted(_AVAILABLE_ELEMENTS.keys())}" ) file_path = _AVAILABLE_ELEMENTS[element] try: # Read and validate header line with open(file_path) as _f: header_line = _f.readline().strip() header = [col.strip() for col in header_line.split(",")] expected_columns = {"E", "f1", "f2"} actual_columns = set(header) if not expected_columns.issubset(actual_columns): missing_cols = expected_columns - actual_columns raise ValueError( f"Invalid .nff file format for element '{element}'. " f"Missing required columns: {missing_cols}. " f"Found columns: {list(actual_columns)}" ) # Get column indices for correct ordering e_idx = header.index("E") f1_idx = header.index("f1") f2_idx = header.index("f2") # Load entire file at C-level via np.loadtxt — 3-8x faster than csv.reader loop raw = np.loadtxt(file_path, delimiter=",", skiprows=1, dtype=np.float64) if raw.ndim == 1: raw = raw.reshape(1, -1) if len(raw) == 0: raise ValueError( "Empty scattering factor data file for element " f"'{element}': {file_path}" ) # Re-order columns to canonical [E, f1, f2] if needed data_array = raw[:, [e_idx, f1_idx, f2_idx]] scattering_data = ScatteringData(data_array, ["E", "f1", "f2"]) # Cache the data _scattering_factor_cache[element] = scattering_data return scattering_data except (OSError, ValueError) as e: raise ValueError( "Error parsing scattering factor data file for element " f"'{element}': {file_path}. " f"Expected CSV format with columns: E,f1,f2. Error: {e}" ) from e except Exception as e: raise RuntimeError( "Unexpected error loading scattering factor data for element " f"'{element}' from {file_path}: {e}" ) from e
[docs] class ScatteringData: """Pandas-like interface for scattering factor data arrays."""
[docs] def __init__(self, data_array: np.ndarray, column_names: list[str]) -> None: self.data = data_array self.columns = column_names self._column_indices = {name: i for i, name in enumerate(column_names)}
def __len__(self) -> int: return len(self.data) def __getitem__(self, column: str) -> Any: idx = self._column_indices[column] # Return object with .values attribute for compatibility class ColumnProxy: def __init__(self, data: np.ndarray) -> None: self.values = data return ColumnProxy(self.data[:, idx])
[docs] class AtomicScatteringFactor: """ Class for handling atomic scattering factors. This class loads and manages atomic scattering factor data from NFF files using the module-level cache. """
[docs] def __init__(self) -> None: # Maintain backward compatibility with existing tests self.data: dict[str, Any] = {} self.data_path = ( Path(__file__).parent.parent / "data" / "AtomicScatteringFactor" ) # Create data directory if it doesn't exist (for test compatibility) self.data_path.mkdir(parents=True, exist_ok=True)
[docs] def load_element_data(self, element: str) -> Any: """ Load scattering factor data for a specific element. Args: element: Element symbol (e.g., 'H', 'C', 'N', 'O', 'Si', 'Ge') Returns: DataFrame containing scattering factor data with columns: E, f1, f2 Raises: FileNotFoundError: If the .nff file for the element is not found ValueError: If the element symbol is invalid """ return load_scattering_factor_data(element)
[docs] def get_scattering_factor(self, _element: str, q_values: np.ndarray) -> np.ndarray: """ Calculate scattering factors for given q values. Args: element: Element symbol q_values: Array of momentum transfer values Returns: Array of scattering factor values """ # Placeholder implementation return np.ones_like(q_values)
[docs] class CrystalStructure: """ Class for representing and manipulating crystal structures. """
[docs] def __init__( self, lattice_parameters: tuple[float, float, float, float, float, float] ): """ Initialize crystal structure. Args: lattice_parameters: (a, b, c, alpha, beta, gamma) in Angstroms and degrees """ self.a, self.b, self.c, self.alpha, self.beta, self.gamma = lattice_parameters self.atoms: list[dict[str, Any]] = []
[docs] def add_atom( self, element: str, position: tuple[float, float, float], occupancy: float = 1.0 ) -> None: """ Add an atom to the crystal structure. Args: element: Element symbol position: Fractional coordinates (x, y, z) occupancy: Site occupancy factor """ self.atoms.append( {"element": element, "position": position, "occupancy": occupancy} )
[docs] def calculate_structure_factor(self, hkl: tuple[int, int, int]) -> complex: """ Calculate structure factor for given Miller indices. F(hkl) = sum_j f_j * occ_j * exp(2*pi*i*(h*x_j + k*y_j + l*z_j)) where the sum runs over all atoms in the unit cell. The atomic form factor f_j is currently taken as 1.0 for all atoms (geometric structure factor). Args: hkl: Miller indices (h, k, l) Returns: Complex structure factor """ import cmath import math h, k, miller_l = hkl result = complex(0.0, 0.0) for atom in self.atoms: x, y, z = atom["position"] occ = atom["occupancy"] phase = 2.0 * math.pi * (h * x + k * y + miller_l * z) result += occ * cmath.exp(1j * phase) return result
[docs] def load_data_file(filename: str) -> Any: """ Load data from various file formats commonly used in X-ray analysis. Args: filename: Path to the data file Returns: DataFrame containing the loaded data """ file_path = Path(filename) if not file_path.exists(): raise FileNotFoundError(f"Data file not found: {filename}") # Lazy import pandas only when needed import pandas as pd # Determine file format and load accordingly if file_path.suffix.lower() == ".csv": return pd.read_csv(file_path) elif file_path.suffix.lower() in [".txt", ".dat"]: return pd.read_csv(file_path, delim_whitespace=True) # type: ignore[call-overload] else: # Try to load as generic text file return pd.read_csv(file_path, delim_whitespace=True) # type: ignore[call-overload]
# Initialize element paths at module import time for performance _initialize_element_paths()