Source code for xraylabtool.data_handling.atomic_cache

"""
High-performance atomic data cache system.

This module provides a pre-populated cache of atomic data for common elements
to eliminate expensive database queries to the Mendeleev library during runtime.
"""

from __future__ import annotations

from functools import lru_cache
import types
from typing import TYPE_CHECKING, Any

import numpy as np

from xraylabtool.exceptions import UnknownElementError

# Cache metrics imports are done lazily to avoid circular imports

if TYPE_CHECKING:
    from xraylabtool.typing_extensions import ComplexArray, EnergyArray

# Pre-populated atomic data for the 50 most common elements in materials science
# This eliminates the need for expensive Mendeleev database queries
_ATOMIC_DATA_PRELOADED = {
    "H": {"atomic_number": 1, "atomic_weight": 1.008},
    "He": {"atomic_number": 2, "atomic_weight": 4.0026},
    "Li": {"atomic_number": 3, "atomic_weight": 6.94},
    "Be": {"atomic_number": 4, "atomic_weight": 9.0122},
    "B": {"atomic_number": 5, "atomic_weight": 10.81},
    "C": {"atomic_number": 6, "atomic_weight": 12.011},
    "N": {"atomic_number": 7, "atomic_weight": 14.007},
    "O": {"atomic_number": 8, "atomic_weight": 15.999},
    "F": {"atomic_number": 9, "atomic_weight": 18.998},
    "Ne": {"atomic_number": 10, "atomic_weight": 20.180},
    "Na": {"atomic_number": 11, "atomic_weight": 22.990},
    "Mg": {"atomic_number": 12, "atomic_weight": 24.305},
    "Al": {"atomic_number": 13, "atomic_weight": 26.982},
    "Si": {"atomic_number": 14, "atomic_weight": 28.085},
    "P": {"atomic_number": 15, "atomic_weight": 30.974},
    "S": {"atomic_number": 16, "atomic_weight": 32.06},
    "Cl": {"atomic_number": 17, "atomic_weight": 35.45},
    "Ar": {"atomic_number": 18, "atomic_weight": 39.948},
    "K": {"atomic_number": 19, "atomic_weight": 39.098},
    "Ca": {"atomic_number": 20, "atomic_weight": 40.078},
    "Sc": {"atomic_number": 21, "atomic_weight": 44.956},
    "Ti": {"atomic_number": 22, "atomic_weight": 47.867},
    "V": {"atomic_number": 23, "atomic_weight": 50.942},
    "Cr": {"atomic_number": 24, "atomic_weight": 51.996},
    "Mn": {"atomic_number": 25, "atomic_weight": 54.938},
    "Fe": {"atomic_number": 26, "atomic_weight": 55.845},
    "Co": {"atomic_number": 27, "atomic_weight": 58.933},
    "Ni": {"atomic_number": 28, "atomic_weight": 58.693},
    "Cu": {"atomic_number": 29, "atomic_weight": 63.546},
    "Zn": {"atomic_number": 30, "atomic_weight": 65.38},
    "Ga": {"atomic_number": 31, "atomic_weight": 69.723},
    "Ge": {"atomic_number": 32, "atomic_weight": 72.630},
    "As": {"atomic_number": 33, "atomic_weight": 74.922},
    "Se": {"atomic_number": 34, "atomic_weight": 78.971},
    "Br": {"atomic_number": 35, "atomic_weight": 79.904},
    "Kr": {"atomic_number": 36, "atomic_weight": 83.798},
    "Rb": {"atomic_number": 37, "atomic_weight": 85.468},
    "Sr": {"atomic_number": 38, "atomic_weight": 87.62},
    "Y": {"atomic_number": 39, "atomic_weight": 88.906},
    "Zr": {"atomic_number": 40, "atomic_weight": 91.224},
    "Nb": {"atomic_number": 41, "atomic_weight": 92.906},
    "Mo": {"atomic_number": 42, "atomic_weight": 95.95},
    "Tc": {"atomic_number": 43, "atomic_weight": 98.0},
    "Ru": {"atomic_number": 44, "atomic_weight": 101.07},
    "Rh": {"atomic_number": 45, "atomic_weight": 102.91},
    "Pd": {"atomic_number": 46, "atomic_weight": 106.42},
    "Ag": {"atomic_number": 47, "atomic_weight": 107.87},
    "Cd": {"atomic_number": 48, "atomic_weight": 112.41},
    "In": {"atomic_number": 49, "atomic_weight": 114.82},
    "Sn": {"atomic_number": 50, "atomic_weight": 118.71},
    "Sb": {"atomic_number": 51, "atomic_weight": 121.76},
    "Te": {"atomic_number": 52, "atomic_weight": 127.60},
    "I": {"atomic_number": 53, "atomic_weight": 126.90},
    "Xe": {"atomic_number": 54, "atomic_weight": 131.29},
    "Cs": {"atomic_number": 55, "atomic_weight": 132.91},
    "Ba": {"atomic_number": 56, "atomic_weight": 137.33},
    "La": {"atomic_number": 57, "atomic_weight": 138.91},
    "Ce": {"atomic_number": 58, "atomic_weight": 140.12},
    "Pr": {"atomic_number": 59, "atomic_weight": 140.91},
    "Nd": {"atomic_number": 60, "atomic_weight": 144.24},
    "Pm": {"atomic_number": 61, "atomic_weight": 145.0},
    "Sm": {"atomic_number": 62, "atomic_weight": 150.36},
    "Eu": {"atomic_number": 63, "atomic_weight": 151.96},
    "Gd": {"atomic_number": 64, "atomic_weight": 157.25},
    "Tb": {"atomic_number": 65, "atomic_weight": 158.93},
    "Dy": {"atomic_number": 66, "atomic_weight": 162.50},
    "Ho": {"atomic_number": 67, "atomic_weight": 164.93},
    "Er": {"atomic_number": 68, "atomic_weight": 167.26},
    "Tm": {"atomic_number": 69, "atomic_weight": 168.93},
    "Yb": {"atomic_number": 70, "atomic_weight": 173.05},
    "Lu": {"atomic_number": 71, "atomic_weight": 174.97},
    "Hf": {"atomic_number": 72, "atomic_weight": 178.49},
    "Ta": {"atomic_number": 73, "atomic_weight": 180.95},
    "W": {"atomic_number": 74, "atomic_weight": 183.84},
    "Re": {"atomic_number": 75, "atomic_weight": 186.21},
    "Os": {"atomic_number": 76, "atomic_weight": 190.23},
    "Ir": {"atomic_number": 77, "atomic_weight": 192.22},
    "Pt": {"atomic_number": 78, "atomic_weight": 195.08},
    "Au": {"atomic_number": 79, "atomic_weight": 196.97},
    "Hg": {"atomic_number": 80, "atomic_weight": 200.59},
    "Tl": {"atomic_number": 81, "atomic_weight": 204.38},
    "Pb": {"atomic_number": 82, "atomic_weight": 207.2},
    "Bi": {"atomic_number": 83, "atomic_weight": 208.98},
    "Po": {"atomic_number": 84, "atomic_weight": 209.0},
    "At": {"atomic_number": 85, "atomic_weight": 210.0},
    "Rn": {"atomic_number": 86, "atomic_weight": 222.0},
    "Fr": {"atomic_number": 87, "atomic_weight": 223.0},
    "Ra": {"atomic_number": 88, "atomic_weight": 226.0},
    "Ac": {"atomic_number": 89, "atomic_weight": 227.0},
    "Th": {"atomic_number": 90, "atomic_weight": 232.04},
    "Pa": {"atomic_number": 91, "atomic_weight": 231.04},
    "U": {"atomic_number": 92, "atomic_weight": 238.03},
}

# Pre-built MappingProxyType wrappers for preloaded data — created once at
# import time so get_atomic_data_fast returns the same object on every hit
# instead of constructing a new wrapper per call.
_ATOMIC_DATA_PROXIES: dict[str, types.MappingProxyType[str, float]] = {
    k: types.MappingProxyType(v) for k, v in _ATOMIC_DATA_PRELOADED.items()
}

# Runtime cache for elements not in the preloaded data
_RUNTIME_CACHE: dict[str, dict[str, float]] = {}



[docs]
def get_atomic_data_fast(element: str) -> types.MappingProxyType[str, float]:
    """
    Fast atomic data lookup with preloaded cache and fallback to Mendeleev.

    This function first checks the preloaded cache, then the runtime cache,
    and only falls back to expensive Mendeleev queries as a last resort.

    Args:
        element: Element symbol (e.g., 'H', 'C', 'Si')

    Returns:
        Dictionary with 'atomic_number' and 'atomic_weight' keys

    Raises:
        ValueError: If element symbol is not recognized
    """
    element_key = element.capitalize()

    # Check preloaded cache first (fastest) — return pre-built proxy, no allocation
    if element_key in _ATOMIC_DATA_PROXIES:
        return _ATOMIC_DATA_PROXIES[element_key]

    # Check runtime cache second — also pre-wrapped on insertion
    if element_key in _RUNTIME_CACHE:
        return _RUNTIME_CACHE[element_key]  # type: ignore[return-value]

    # Fall back to Mendeleev (slowest) — single query for both fields
    try:
        from mendeleev import element as _mendeleev_element

        elem = _mendeleev_element(element_key)
        proxy = types.MappingProxyType(
            {
                "atomic_number": int(elem.atomic_number),
                "atomic_weight": float(elem.atomic_weight),
            }
        )

        # Cache the proxy directly so future lookups return the same object
        _RUNTIME_CACHE[element_key] = proxy  # type: ignore[assignment]
        return proxy

    except UnknownElementError:
        # Re-raise UnknownElementError without wrapping
        raise
    except ValueError as e:
        # Mendeleev raises ValueError for unknown elements — map to UnknownElementError
        error_str = str(e).lower()
        if "not found" in error_str or "unknown" in error_str:
            raise UnknownElementError(f"Unknown element symbol: '{element_key}'") from e
        raise ValueError(
            f"Cannot retrieve atomic data for element '{element}': {e}"
        ) from e
    except Exception as e:
        raise ValueError(
            f"Cannot retrieve atomic data for element '{element}': {e}"
        ) from e




[docs]
@lru_cache(maxsize=256)
def get_bulk_atomic_data_fast(
    elements_tuple: tuple[str, ...],
) -> dict[str, types.MappingProxyType[str, float]]:
    """
    High-performance bulk atomic data loader with caching.

    This function loads atomic data for multiple elements efficiently,
    using the preloaded cache to avoid expensive database queries.

    Args:
        elements_tuple: Tuple of element symbols

    Returns:
        Dictionary mapping element symbols to their atomic data (as immutable views)
    """
    result = {}
    for element in elements_tuple:
        result[element] = get_atomic_data_fast(element)
    return result




[docs]
def warm_up_cache(elements: list[str]) -> None:
    """
    Pre-warm the cache with specific elements.

    Args:
        elements: List of element symbols to preload
    """
    import contextlib

    for element in elements:
        with contextlib.suppress(ValueError, KeyError, ImportError, OSError):
            get_atomic_data_fast(element)




[docs]
def warm_cache_for_compounds(
    formulas: list[str],
    include_similar: bool = True,
    include_family: bool = True,
    timing_info: bool = False,
) -> dict[str, Any]:
    """
    Intelligently warm cache for compounds and their related elements.

    This function performs intelligent cache warming by analyzing compound
    formulas, extracting their constituent elements, and pre-loading both
    atomic data and scattering factor interpolators. It can also include
    similar compounds and compound families for comprehensive warming.

    Args:
        formulas: List of chemical formulas to warm cache for
        include_similar: Whether to include similar compounds
        include_family: Whether to include compound family members
        timing_info: Whether to return timing information

    Returns:
        Dictionary with warming results and statistics

    Examples:
        >>> result = warm_cache_for_compounds(["SiO2", "Al2O3"])
        >>> result["elements_warmed"]
        ['Si', 'O', 'Al']
        >>> result["success_rate"] > 0.9
        True
    """
    import time

    from xraylabtool.data_handling.compound_analysis import (
        COMPOUND_FAMILIES,
        find_similar_compounds,
        get_compound_family,
        get_elements_for_compound,
    )

    start_time = time.perf_counter() if timing_info else None

    # Collect all elements to warm
    elements_to_warm = set()
    compound_info = {}

    # Process each formula
    for formula in formulas:
        try:
            # Get constituent elements
            elements = get_elements_for_compound(formula)
            elements_to_warm.update(elements)

            compound_info[formula] = {
                "elements": elements,
                "status": "parsed",
                "similar_compounds": [],
                "family_compounds": [],
            }

            # Find similar compounds if requested
            if include_similar:
                similar = find_similar_compounds(formula, similarity_threshold=0.3)
                compound_info[formula]["similar_compounds"] = similar[:3]  # Limit to 3

                # Add elements from similar compounds
                for similar_formula in similar[:3]:
                    try:
                        similar_elements = get_elements_for_compound(similar_formula)
                        elements_to_warm.update(similar_elements)
                    except (KeyError, ValueError, ImportError):
                        # Skip invalid compounds during cache warming
                        continue

            # Find compound family members if requested
            if include_family:
                family = get_compound_family(formula)
                if family and family in COMPOUND_FAMILIES:
                    family_compounds = COMPOUND_FAMILIES[family][:5]  # Limit to 5
                    compound_info[formula]["family_compounds"] = family_compounds

                    # Add elements from family compounds
                    for family_formula in family_compounds:
                        try:
                            family_elements = get_elements_for_compound(family_formula)
                            elements_to_warm.update(family_elements)
                        except (KeyError, ValueError, ImportError):
                            # Skip invalid family compounds during cache warming
                            continue

        except Exception as e:
            compound_info[formula] = {
                "elements": [],
                "status": f"error: {e}",
                "similar_compounds": [],
                "family_compounds": [],
            }

    # Warm atomic data cache
    atomic_success = 0
    atomic_total = len(elements_to_warm)

    for element in elements_to_warm:
        try:
            get_atomic_data_fast(element)
            atomic_success += 1
        except (KeyError, ValueError, ImportError):
            # Skip elements that cannot be loaded during atomic cache warming
            continue

    # Warm scattering factor interpolators
    interpolator_success = 0
    interpolator_total = len(elements_to_warm)

    for element in elements_to_warm:
        try:
            from xraylabtool.calculators.core import (
                create_scattering_factor_interpolators,
            )

            create_scattering_factor_interpolators(element)
            interpolator_success += 1
        except (KeyError, ValueError, ImportError):
            # Skip elements that cannot create interpolators during cache warming
            continue

    # Warm bulk data cache for common combinations
    bulk_success = 0
    if len(elements_to_warm) > 1:
        try:
            # Create common element combinations
            element_list = list(elements_to_warm)
            common_combos = [
                tuple(element_list[:3]),  # First 3 elements
                tuple(element_list[:5]),  # First 5 elements
                tuple(sorted(element_list)),  # All elements sorted
            ]

            for combo in common_combos:
                if len(combo) > 0:
                    try:
                        get_bulk_atomic_data_fast(combo)
                        bulk_success += 1
                    except (KeyError, ValueError, ImportError):
                        # Skip invalid element combinations during bulk cache warming
                        continue

        except Exception:
            pass

    # Calculate timing
    end_time = time.perf_counter() if timing_info else None
    total_time_ms = (end_time - start_time) * 1000.0 if timing_info else 0.0  # type: ignore[operator]

    # Calculate success rates
    atomic_success_rate = atomic_success / atomic_total if atomic_total > 0 else 0.0
    interpolator_success_rate = (
        interpolator_success / interpolator_total if interpolator_total > 0 else 0.0
    )
    overall_success_rate = (
        (atomic_success + interpolator_success) / (atomic_total + interpolator_total)
        if (atomic_total + interpolator_total) > 0
        else 0.0
    )

    return {
        "elements_warmed": sorted(elements_to_warm),
        "compounds_processed": compound_info,
        "atomic_cache": {
            "success": atomic_success,
            "total": atomic_total,
            "success_rate": atomic_success_rate,
        },
        "interpolator_cache": {
            "success": interpolator_success,
            "total": interpolator_total,
            "success_rate": interpolator_success_rate,
        },
        "bulk_cache": {
            "success": bulk_success,
            "attempts": 3 if len(elements_to_warm) > 1 else 0,
        },
        "timing": (
            {
                "total_time_ms": total_time_ms,
                "time_per_element_ms": (
                    total_time_ms / len(elements_to_warm) if elements_to_warm else 0.0
                ),
            }
            if timing_info
            else {}
        ),
        "success_rate": overall_success_rate,
        "performance_metrics": {
            "elements_per_second": (
                len(elements_to_warm) / (total_time_ms / 1000.0)
                if total_time_ms > 0
                else 0.0
            ),
            "within_target": (
                total_time_ms < 100.0 if timing_info else True
            ),  # Target: <100ms
        },
    }




[docs]
def get_cache_stats() -> dict[str, int]:
    """
    Get cache statistics for monitoring.

    Returns:
        Dictionary with cache statistics
    """
    return {
        "preloaded_elements": len(_ATOMIC_DATA_PRELOADED),
        "runtime_cached_elements": len(_RUNTIME_CACHE),
        "total_cached_elements": len(_ATOMIC_DATA_PRELOADED) + len(_RUNTIME_CACHE),
    }




[docs]
def is_element_preloaded(element: str) -> bool:
    """
    Check if an element is in the preloaded cache.

    Args:
        element: Element symbol

    Returns:
        True if element is preloaded, False otherwise
    """
    return element.capitalize() in _ATOMIC_DATA_PRELOADED



# =====================================================================================
# AtomicDataProvider Protocol Implementation
# =====================================================================================



[docs]
class FastAtomicDataProvider:
    """
    High-performance atomic data provider implementing AtomicDataProvider protocol.

    This implementation uses preloaded atomic data and interpolated scattering
    factors for maximum performance in X-ray calculations.
    """


[docs]
    def __init__(self) -> None:
        """Initialize the atomic data provider."""
        self._scattering_cache: dict[
            str, tuple[np.ndarray, np.ndarray, np.ndarray]
        ] = {}



[docs]
    def get_scattering_factors(
        self, element: str, energies: EnergyArray
    ) -> ComplexArray:
        """
        Get atomic scattering factors for element at given energies.

        This method loads scattering factor data and interpolates it to the
        requested energies, returning complex scattering factors (f1 + if2).

        Parameters
        ----------
        element : str
            Chemical element symbol (e.g., 'Si', 'O')
        energies : EnergyArray
            X-ray energies in keV

        Returns
        -------
        ComplexArray
            Complex scattering factors (f1 + if2)
        """
        from xraylabtool.calculators.core import create_scattering_factor_interpolators

        # Convert to numpy array if needed
        energies_arr = np.asarray(energies, dtype=np.float64)

        # Convert energy from keV to eV for interpolation
        energies_ev = energies_arr * 1000.0

        # Get interpolators for this element
        f1_interp, f2_interp = create_scattering_factor_interpolators(element)

        # Interpolate f1 and f2 values
        f1_values = f1_interp(energies_ev)
        f2_values = f2_interp(energies_ev)

        # Combine into complex array
        complex_factors = f1_values + 1j * f2_values

        return np.asarray(complex_factors, dtype=np.complex128)



[docs]
    def is_element_cached(self, element: str) -> bool:
        """
        Check if element data is cached for fast access.

        Parameters
        ----------
        element : str
            Element symbol to check

        Returns
        -------
        bool
            True if element is cached for fast access
        """
        from xraylabtool.calculators.core import is_element_cached as is_core_cached

        # Check both our preloaded data and core module cache
        return is_element_preloaded(element) or is_core_cached(element)



[docs]
    def preload_elements(self, elements: list[str]) -> None:
        """
        Preload scattering factor data for elements.

        Parameters
        ----------
        elements : list[str]
            List of element symbols to preload
        """
        from xraylabtool.calculators.core import create_scattering_factor_interpolators

        for element in elements:
            try:
                # This will cache the interpolators
                create_scattering_factor_interpolators(element)
            except (KeyError, ValueError, ImportError):
                # Skip elements that can't be loaded during prewarming
                continue



[docs]
    def get_atomic_properties(self, element: str) -> types.MappingProxyType[str, float]:
        """
        Get basic atomic properties for an element.

        Parameters
        ----------
        element : str
            Element symbol

        Returns
        -------
        types.MappingProxyType[str, float]
            Immutable mapping with atomic properties
        """
        return get_atomic_data_fast(element)




# Global instance for easy access
_GLOBAL_PROVIDER: FastAtomicDataProvider | None = None



[docs]
def get_atomic_data_provider() -> FastAtomicDataProvider:
    """
    Get the global atomic data provider instance.

    Returns
    -------
    FastAtomicDataProvider
        Shared atomic data provider instance
    """
    global _GLOBAL_PROVIDER
    if _GLOBAL_PROVIDER is None:
        _GLOBAL_PROVIDER = FastAtomicDataProvider()
    return _GLOBAL_PROVIDER