Source code for xraylabtool.data_handling.atomic_cache

"""
High-performance atomic data cache system.

This module provides a pre-populated cache of atomic data for common elements
to eliminate expensive database queries to the Mendeleev library during runtime.
"""

from __future__ import annotations

from functools import lru_cache
import types
from typing import TYPE_CHECKING, Any

import numpy as np

from xraylabtool.exceptions import UnknownElementError

# Cache metrics imports are done lazily to avoid circular imports

if TYPE_CHECKING:
    from xraylabtool.typing_extensions import ComplexArray, EnergyArray

# Pre-populated atomic data for the 50 most common elements in materials science
# This eliminates the need for expensive Mendeleev database queries
_ATOMIC_DATA_PRELOADED = {
    "H": {"atomic_number": 1, "atomic_weight": 1.008},
    "He": {"atomic_number": 2, "atomic_weight": 4.0026},
    "Li": {"atomic_number": 3, "atomic_weight": 6.94},
    "Be": {"atomic_number": 4, "atomic_weight": 9.0122},
    "B": {"atomic_number": 5, "atomic_weight": 10.81},
    "C": {"atomic_number": 6, "atomic_weight": 12.011},
    "N": {"atomic_number": 7, "atomic_weight": 14.007},
    "O": {"atomic_number": 8, "atomic_weight": 15.999},
    "F": {"atomic_number": 9, "atomic_weight": 18.998},
    "Ne": {"atomic_number": 10, "atomic_weight": 20.180},
    "Na": {"atomic_number": 11, "atomic_weight": 22.990},
    "Mg": {"atomic_number": 12, "atomic_weight": 24.305},
    "Al": {"atomic_number": 13, "atomic_weight": 26.982},
    "Si": {"atomic_number": 14, "atomic_weight": 28.085},
    "P": {"atomic_number": 15, "atomic_weight": 30.974},
    "S": {"atomic_number": 16, "atomic_weight": 32.06},
    "Cl": {"atomic_number": 17, "atomic_weight": 35.45},
    "Ar": {"atomic_number": 18, "atomic_weight": 39.948},
    "K": {"atomic_number": 19, "atomic_weight": 39.098},
    "Ca": {"atomic_number": 20, "atomic_weight": 40.078},
    "Sc": {"atomic_number": 21, "atomic_weight": 44.956},
    "Ti": {"atomic_number": 22, "atomic_weight": 47.867},
    "V": {"atomic_number": 23, "atomic_weight": 50.942},
    "Cr": {"atomic_number": 24, "atomic_weight": 51.996},
    "Mn": {"atomic_number": 25, "atomic_weight": 54.938},
    "Fe": {"atomic_number": 26, "atomic_weight": 55.845},
    "Co": {"atomic_number": 27, "atomic_weight": 58.933},
    "Ni": {"atomic_number": 28, "atomic_weight": 58.693},
    "Cu": {"atomic_number": 29, "atomic_weight": 63.546},
    "Zn": {"atomic_number": 30, "atomic_weight": 65.38},
    "Ga": {"atomic_number": 31, "atomic_weight": 69.723},
    "Ge": {"atomic_number": 32, "atomic_weight": 72.630},
    "As": {"atomic_number": 33, "atomic_weight": 74.922},
    "Se": {"atomic_number": 34, "atomic_weight": 78.971},
    "Br": {"atomic_number": 35, "atomic_weight": 79.904},
    "Kr": {"atomic_number": 36, "atomic_weight": 83.798},
    "Rb": {"atomic_number": 37, "atomic_weight": 85.468},
    "Sr": {"atomic_number": 38, "atomic_weight": 87.62},
    "Y": {"atomic_number": 39, "atomic_weight": 88.906},
    "Zr": {"atomic_number": 40, "atomic_weight": 91.224},
    "Nb": {"atomic_number": 41, "atomic_weight": 92.906},
    "Mo": {"atomic_number": 42, "atomic_weight": 95.95},
    "Tc": {"atomic_number": 43, "atomic_weight": 98.0},
    "Ru": {"atomic_number": 44, "atomic_weight": 101.07},
    "Rh": {"atomic_number": 45, "atomic_weight": 102.91},
    "Pd": {"atomic_number": 46, "atomic_weight": 106.42},
    "Ag": {"atomic_number": 47, "atomic_weight": 107.87},
    "Cd": {"atomic_number": 48, "atomic_weight": 112.41},
    "In": {"atomic_number": 49, "atomic_weight": 114.82},
    "Sn": {"atomic_number": 50, "atomic_weight": 118.71},
    "Sb": {"atomic_number": 51, "atomic_weight": 121.76},
    "Te": {"atomic_number": 52, "atomic_weight": 127.60},
    "I": {"atomic_number": 53, "atomic_weight": 126.90},
    "Xe": {"atomic_number": 54, "atomic_weight": 131.29},
    "Cs": {"atomic_number": 55, "atomic_weight": 132.91},
    "Ba": {"atomic_number": 56, "atomic_weight": 137.33},
    "La": {"atomic_number": 57, "atomic_weight": 138.91},
    "Ce": {"atomic_number": 58, "atomic_weight": 140.12},
    "Pr": {"atomic_number": 59, "atomic_weight": 140.91},
    "Nd": {"atomic_number": 60, "atomic_weight": 144.24},
    "Pm": {"atomic_number": 61, "atomic_weight": 145.0},
    "Sm": {"atomic_number": 62, "atomic_weight": 150.36},
    "Eu": {"atomic_number": 63, "atomic_weight": 151.96},
    "Gd": {"atomic_number": 64, "atomic_weight": 157.25},
    "Tb": {"atomic_number": 65, "atomic_weight": 158.93},
    "Dy": {"atomic_number": 66, "atomic_weight": 162.50},
    "Ho": {"atomic_number": 67, "atomic_weight": 164.93},
    "Er": {"atomic_number": 68, "atomic_weight": 167.26},
    "Tm": {"atomic_number": 69, "atomic_weight": 168.93},
    "Yb": {"atomic_number": 70, "atomic_weight": 173.05},
    "Lu": {"atomic_number": 71, "atomic_weight": 174.97},
    "Hf": {"atomic_number": 72, "atomic_weight": 178.49},
    "Ta": {"atomic_number": 73, "atomic_weight": 180.95},
    "W": {"atomic_number": 74, "atomic_weight": 183.84},
    "Re": {"atomic_number": 75, "atomic_weight": 186.21},
    "Os": {"atomic_number": 76, "atomic_weight": 190.23},
    "Ir": {"atomic_number": 77, "atomic_weight": 192.22},
    "Pt": {"atomic_number": 78, "atomic_weight": 195.08},
    "Au": {"atomic_number": 79, "atomic_weight": 196.97},
    "Hg": {"atomic_number": 80, "atomic_weight": 200.59},
    "Tl": {"atomic_number": 81, "atomic_weight": 204.38},
    "Pb": {"atomic_number": 82, "atomic_weight": 207.2},
    "Bi": {"atomic_number": 83, "atomic_weight": 208.98},
    "Po": {"atomic_number": 84, "atomic_weight": 209.0},
    "At": {"atomic_number": 85, "atomic_weight": 210.0},
    "Rn": {"atomic_number": 86, "atomic_weight": 222.0},
    "Fr": {"atomic_number": 87, "atomic_weight": 223.0},
    "Ra": {"atomic_number": 88, "atomic_weight": 226.0},
    "Ac": {"atomic_number": 89, "atomic_weight": 227.0},
    "Th": {"atomic_number": 90, "atomic_weight": 232.04},
    "Pa": {"atomic_number": 91, "atomic_weight": 231.04},
    "U": {"atomic_number": 92, "atomic_weight": 238.03},
}

# Pre-built MappingProxyType wrappers for preloaded data — created once at
# import time so get_atomic_data_fast returns the same object on every hit
# instead of constructing a new wrapper per call.
_ATOMIC_DATA_PROXIES: dict[str, types.MappingProxyType[str, float]] = {
    k: types.MappingProxyType(v) for k, v in _ATOMIC_DATA_PRELOADED.items()
}

# Runtime cache for elements not in the preloaded data
_RUNTIME_CACHE: dict[str, dict[str, float]] = {}


[docs] def get_atomic_data_fast(element: str) -> types.MappingProxyType[str, float]: """ Fast atomic data lookup with preloaded cache and fallback to Mendeleev. This function first checks the preloaded cache, then the runtime cache, and only falls back to expensive Mendeleev queries as a last resort. Args: element: Element symbol (e.g., 'H', 'C', 'Si') Returns: Dictionary with 'atomic_number' and 'atomic_weight' keys Raises: ValueError: If element symbol is not recognized """ element_key = element.capitalize() # Check preloaded cache first (fastest) — return pre-built proxy, no allocation if element_key in _ATOMIC_DATA_PROXIES: return _ATOMIC_DATA_PROXIES[element_key] # Check runtime cache second — also pre-wrapped on insertion if element_key in _RUNTIME_CACHE: return _RUNTIME_CACHE[element_key] # type: ignore[return-value] # Fall back to Mendeleev (slowest) — single query for both fields try: from mendeleev import element as _mendeleev_element elem = _mendeleev_element(element_key) proxy = types.MappingProxyType( { "atomic_number": int(elem.atomic_number), "atomic_weight": float(elem.atomic_weight), } ) # Cache the proxy directly so future lookups return the same object _RUNTIME_CACHE[element_key] = proxy # type: ignore[assignment] return proxy except UnknownElementError: # Re-raise UnknownElementError without wrapping raise except ValueError as e: # Mendeleev raises ValueError for unknown elements — map to UnknownElementError error_str = str(e).lower() if "not found" in error_str or "unknown" in error_str: raise UnknownElementError(f"Unknown element symbol: '{element_key}'") from e raise ValueError( f"Cannot retrieve atomic data for element '{element}': {e}" ) from e except Exception as e: raise ValueError( f"Cannot retrieve atomic data for element '{element}': {e}" ) from e
[docs] @lru_cache(maxsize=256) def get_bulk_atomic_data_fast( elements_tuple: tuple[str, ...], ) -> dict[str, types.MappingProxyType[str, float]]: """ High-performance bulk atomic data loader with caching. This function loads atomic data for multiple elements efficiently, using the preloaded cache to avoid expensive database queries. Args: elements_tuple: Tuple of element symbols Returns: Dictionary mapping element symbols to their atomic data (as immutable views) """ result = {} for element in elements_tuple: result[element] = get_atomic_data_fast(element) return result
[docs] def warm_up_cache(elements: list[str]) -> None: """ Pre-warm the cache with specific elements. Args: elements: List of element symbols to preload """ import contextlib for element in elements: with contextlib.suppress(ValueError, KeyError, ImportError, OSError): get_atomic_data_fast(element)
[docs] def warm_cache_for_compounds( formulas: list[str], include_similar: bool = True, include_family: bool = True, timing_info: bool = False, ) -> dict[str, Any]: """ Intelligently warm cache for compounds and their related elements. This function performs intelligent cache warming by analyzing compound formulas, extracting their constituent elements, and pre-loading both atomic data and scattering factor interpolators. It can also include similar compounds and compound families for comprehensive warming. Args: formulas: List of chemical formulas to warm cache for include_similar: Whether to include similar compounds include_family: Whether to include compound family members timing_info: Whether to return timing information Returns: Dictionary with warming results and statistics Examples: >>> result = warm_cache_for_compounds(["SiO2", "Al2O3"]) >>> result["elements_warmed"] ['Si', 'O', 'Al'] >>> result["success_rate"] > 0.9 True """ import time from xraylabtool.data_handling.compound_analysis import ( COMPOUND_FAMILIES, find_similar_compounds, get_compound_family, get_elements_for_compound, ) start_time = time.perf_counter() if timing_info else None # Collect all elements to warm elements_to_warm = set() compound_info = {} # Process each formula for formula in formulas: try: # Get constituent elements elements = get_elements_for_compound(formula) elements_to_warm.update(elements) compound_info[formula] = { "elements": elements, "status": "parsed", "similar_compounds": [], "family_compounds": [], } # Find similar compounds if requested if include_similar: similar = find_similar_compounds(formula, similarity_threshold=0.3) compound_info[formula]["similar_compounds"] = similar[:3] # Limit to 3 # Add elements from similar compounds for similar_formula in similar[:3]: try: similar_elements = get_elements_for_compound(similar_formula) elements_to_warm.update(similar_elements) except (KeyError, ValueError, ImportError): # Skip invalid compounds during cache warming continue # Find compound family members if requested if include_family: family = get_compound_family(formula) if family and family in COMPOUND_FAMILIES: family_compounds = COMPOUND_FAMILIES[family][:5] # Limit to 5 compound_info[formula]["family_compounds"] = family_compounds # Add elements from family compounds for family_formula in family_compounds: try: family_elements = get_elements_for_compound(family_formula) elements_to_warm.update(family_elements) except (KeyError, ValueError, ImportError): # Skip invalid family compounds during cache warming continue except Exception as e: compound_info[formula] = { "elements": [], "status": f"error: {e}", "similar_compounds": [], "family_compounds": [], } # Warm atomic data cache atomic_success = 0 atomic_total = len(elements_to_warm) for element in elements_to_warm: try: get_atomic_data_fast(element) atomic_success += 1 except (KeyError, ValueError, ImportError): # Skip elements that cannot be loaded during atomic cache warming continue # Warm scattering factor interpolators interpolator_success = 0 interpolator_total = len(elements_to_warm) for element in elements_to_warm: try: from xraylabtool.calculators.core import ( create_scattering_factor_interpolators, ) create_scattering_factor_interpolators(element) interpolator_success += 1 except (KeyError, ValueError, ImportError): # Skip elements that cannot create interpolators during cache warming continue # Warm bulk data cache for common combinations bulk_success = 0 if len(elements_to_warm) > 1: try: # Create common element combinations element_list = list(elements_to_warm) common_combos = [ tuple(element_list[:3]), # First 3 elements tuple(element_list[:5]), # First 5 elements tuple(sorted(element_list)), # All elements sorted ] for combo in common_combos: if len(combo) > 0: try: get_bulk_atomic_data_fast(combo) bulk_success += 1 except (KeyError, ValueError, ImportError): # Skip invalid element combinations during bulk cache warming continue except Exception: pass # Calculate timing end_time = time.perf_counter() if timing_info else None total_time_ms = (end_time - start_time) * 1000.0 if timing_info else 0.0 # type: ignore[operator] # Calculate success rates atomic_success_rate = atomic_success / atomic_total if atomic_total > 0 else 0.0 interpolator_success_rate = ( interpolator_success / interpolator_total if interpolator_total > 0 else 0.0 ) overall_success_rate = ( (atomic_success + interpolator_success) / (atomic_total + interpolator_total) if (atomic_total + interpolator_total) > 0 else 0.0 ) return { "elements_warmed": sorted(elements_to_warm), "compounds_processed": compound_info, "atomic_cache": { "success": atomic_success, "total": atomic_total, "success_rate": atomic_success_rate, }, "interpolator_cache": { "success": interpolator_success, "total": interpolator_total, "success_rate": interpolator_success_rate, }, "bulk_cache": { "success": bulk_success, "attempts": 3 if len(elements_to_warm) > 1 else 0, }, "timing": ( { "total_time_ms": total_time_ms, "time_per_element_ms": ( total_time_ms / len(elements_to_warm) if elements_to_warm else 0.0 ), } if timing_info else {} ), "success_rate": overall_success_rate, "performance_metrics": { "elements_per_second": ( len(elements_to_warm) / (total_time_ms / 1000.0) if total_time_ms > 0 else 0.0 ), "within_target": ( total_time_ms < 100.0 if timing_info else True ), # Target: <100ms }, }
[docs] def get_cache_stats() -> dict[str, int]: """ Get cache statistics for monitoring. Returns: Dictionary with cache statistics """ return { "preloaded_elements": len(_ATOMIC_DATA_PRELOADED), "runtime_cached_elements": len(_RUNTIME_CACHE), "total_cached_elements": len(_ATOMIC_DATA_PRELOADED) + len(_RUNTIME_CACHE), }
[docs] def is_element_preloaded(element: str) -> bool: """ Check if an element is in the preloaded cache. Args: element: Element symbol Returns: True if element is preloaded, False otherwise """ return element.capitalize() in _ATOMIC_DATA_PRELOADED
# ===================================================================================== # AtomicDataProvider Protocol Implementation # =====================================================================================
[docs] class FastAtomicDataProvider: """ High-performance atomic data provider implementing AtomicDataProvider protocol. This implementation uses preloaded atomic data and interpolated scattering factors for maximum performance in X-ray calculations. """
[docs] def __init__(self) -> None: """Initialize the atomic data provider.""" self._scattering_cache: dict[ str, tuple[np.ndarray, np.ndarray, np.ndarray] ] = {}
[docs] def get_scattering_factors( self, element: str, energies: EnergyArray ) -> ComplexArray: """ Get atomic scattering factors for element at given energies. This method loads scattering factor data and interpolates it to the requested energies, returning complex scattering factors (f1 + if2). Parameters ---------- element : str Chemical element symbol (e.g., 'Si', 'O') energies : EnergyArray X-ray energies in keV Returns ------- ComplexArray Complex scattering factors (f1 + if2) """ from xraylabtool.calculators.core import create_scattering_factor_interpolators # Convert to numpy array if needed energies_arr = np.asarray(energies, dtype=np.float64) # Convert energy from keV to eV for interpolation energies_ev = energies_arr * 1000.0 # Get interpolators for this element f1_interp, f2_interp = create_scattering_factor_interpolators(element) # Interpolate f1 and f2 values f1_values = f1_interp(energies_ev) f2_values = f2_interp(energies_ev) # Combine into complex array complex_factors = f1_values + 1j * f2_values return np.asarray(complex_factors, dtype=np.complex128)
[docs] def is_element_cached(self, element: str) -> bool: """ Check if element data is cached for fast access. Parameters ---------- element : str Element symbol to check Returns ------- bool True if element is cached for fast access """ from xraylabtool.calculators.core import is_element_cached as is_core_cached # Check both our preloaded data and core module cache return is_element_preloaded(element) or is_core_cached(element)
[docs] def preload_elements(self, elements: list[str]) -> None: """ Preload scattering factor data for elements. Parameters ---------- elements : list[str] List of element symbols to preload """ from xraylabtool.calculators.core import create_scattering_factor_interpolators for element in elements: try: # This will cache the interpolators create_scattering_factor_interpolators(element) except (KeyError, ValueError, ImportError): # Skip elements that can't be loaded during prewarming continue
[docs] def get_atomic_properties(self, element: str) -> types.MappingProxyType[str, float]: """ Get basic atomic properties for an element. Parameters ---------- element : str Element symbol Returns ------- types.MappingProxyType[str, float] Immutable mapping with atomic properties """ return get_atomic_data_fast(element)
# Global instance for easy access _GLOBAL_PROVIDER: FastAtomicDataProvider | None = None
[docs] def get_atomic_data_provider() -> FastAtomicDataProvider: """ Get the global atomic data provider instance. Returns ------- FastAtomicDataProvider Shared atomic data provider instance """ global _GLOBAL_PROVIDER if _GLOBAL_PROVIDER is None: _GLOBAL_PROVIDER = FastAtomicDataProvider() return _GLOBAL_PROVIDER