Source code for hqs_nmr.spectrumio

# Copyright © 2025 HQS Quantum Simulations GmbH. All Rights Reserved.

"""Deserialization and serialization from varios NMR spectrum formats."""

from __future__ import annotations

from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Any, Optional, Union

import nmrglue
import numpy as np
from hqs_nmr_parameters import GYROMAGNETIC_RATIOS, Isotope

from hqs_nmr.datatypes import DEFAULT_UNKNOWN, NMRExperimentalSpectrum1D


[docs] def _extract_solvent_jcampdx(data: dict[str, Any]) -> list[str]: """Extracts solvent string from a JCAMP-DX data dictionary if available. With JCAMP-DX version 5.01 a new optional ".SOLVENT NAME" key has been added to store a description of the solvent. According to the specification this may include pH, ionic strength, if relevant. This function does not distinguish the solvent name from such additional data. A few notes regarding the parsing via the `nmrglue` package: Since a JCAMP-DX file may contain more than one block (ends with "##END"), `nmrglue` collects all values of a key in a list. Furthermore, `nmrglue` normalizes the key in the data dictionary, i.e. while the JCAMP-DX file may contain "##.SOLVENTNAME" or "##.SOLVENT NAME", the resulting data dictionary only contains a ".SOLVENTNAME" key in both cases. Args: data: Data dictionary resulting from parsing with `nmrglue`. Returns: Solvent string(s) or an empty list if no solvent information was found. """ solvent: list[str] = [] output: Union[list[str], None] = data.get(".SOLVENTNAME", None) if isinstance(output, list): solvent = output return solvent
[docs] def _extract_solvent_bruker(data: dict[str, Any]) -> str: """Extracts solvent string from a Bruker data dictionary if available. The Bruker directory format stores the solvent name in the acquisition file (`acqus` or `acqu`) under the non-standard key "##$SOLVENT". The solvent name is usually enclosed in angle brackets (e.g. `<DMSO>`), which are stripped of with this function. Args: data: Data dictionary resulting from parsing with `nmrglue`. Returns: Solvent string or an empty string if no solvent information was found. """ solvent = data.get("acqus", {}).get("SOLVENT", "") return solvent.replace("<", "").replace(">", "")
[docs] def _extract_temperature_jcampdx(data: dict[str, Any]) -> list[float]: """Extracts temperature (in Kelvin) from a JCAMP-DX data dictionary if available. JCAMP-DX files store the temperature in degrees Celsius in the "##TEMPERATURE" field. Note that this function returns the temperature in Kelvin. A few notes regarding the parsing via the `nmrglue` package: Since a JCAMP-DX file may contain more than one block (ends with "##END"), `nmrglue` collects all values of a key in a list. Args: data: Data dictionary resulting from parsing with `nmrglue`. Returns: Temperature(s) in Kelvin or an empty list if no temperature was found. """ temperature: list[float] = [] output = data.get("TEMPERATURE", None) if isinstance(output, list): temperature = [float(t) + 273.15 for t in output] return temperature
[docs] def _extract_temperature_bruker(data: dict[str, Any]) -> Optional[float]: """Extracts temperature (in Kelvin) from a Bruker data dictionary if available. The Bruker directory format stores the temperature (in Kelvin) in the acquisition file (`acqus` or `acqu`) under the non-standard key "##$TE". The data dictionary produced by `nmrglue.bruker.read_pdata` holds the temperature already as a `float`. Args: data: Data dictionary resulting from parsing with `nmrglue`. Returns: Temperature in Kelvin or `None` if no solvent information was found. """ return data.get("acqus", {}).get("TE", None)
[docs] def _extract_isotope_jcampdx(data: dict[str, Any]) -> list[Isotope]: """Extracts isotope from a JCAMP-DX data dictionary. JCAMP-DX files store the isotope in the "##.OBSERVE NUCLEUS" field. The corresponding value usually contains a leading caret (e.g. `^13C`), which is removed in this function. A few notes regarding the parsing via the `nmrglue` package: Since a JCAMP-DX file may contain more than one block (ends with "##END"), `nmrglue` collects all values of a key in a list. Furthermore, `nmrglue` normalizes the key in the data dictionary, i.e. while the JCAMP-DX file may contain "##.OBSERVENUCLEUS" or "##.OBSERVE NUCLEUS", the resulting data dictionary only contains a ".OBSERVENUCLEUS" key in both cases. Args: data: Data dictionary resulting from parsing with `nmrglue`. Returns: Isotope of observed nucleus. """ try: output = data[".OBSERVENUCLEUS"] return [Isotope.from_str(iso.replace("^", "")) for iso in output] except Exception as e: raise RuntimeError("Failed to extract isotope from NMR data dictionary.") from e
[docs] def _extract_isotope_bruker(data: dict[str, Any]) -> Isotope: """Extracts isotope from a Bruker data dictionary. The Bruker directory format stores the isotope in the processing file (`procs` or `proc`) "##$AXNUC". The isotope string is usually enclosed in angle brackets (e.g. `<1H>`). Contrary to the solvent name, the isotope string stored in the data dictionary does not include the angle brackets. Args: data: Data dictionary resulting from parsing with `nmrglue`. Returns: Isotope of observed nucleus. """ try: output: str = data["procs"]["AXNUC"] return Isotope.from_str(output) except Exception as e: raise RuntimeError("Failed to extract isotope from NMR data dictionary.") from e
[docs] def read_jdx( file: Union[Path, str], source: str = DEFAULT_UNKNOWN, license_id: str = DEFAULT_UNKNOWN, ) -> NMRExperimentalSpectrum1D: """Read spectral data from a JCAMP-DX (JDX) file. NOTE: it is assumed that the JCAMP-DX file contains only one block, i.e. one spectrum and one isotope. Args: file: Path to the JDX file. source: Owner or producer of the spectrum. license_id: License associated with the spectrum (as SPDX identifier if possible). Raises: RuntimeError: Parsing failed. NotImplementedError: Unsupported spectral data. ValueError: Inconsistent JDX parameters. Returns: A `NMRExperimentalSpectrum1D` object with chemical shifts (in ppm) and intensities. """ jdx_path = Path(file) if jdx_path.suffix.lower() not in [".jdx", ".dx", ".jcamp"]: raise ValueError("Incorrect file extension. Please provide a valid JDX file.") data_dict, data_values = nmrglue.jcampdx.read(filename=str(jdx_path)) solvent = _extract_solvent_jcampdx(data_dict) temperature = _extract_temperature_jcampdx(data_dict) isotope = _extract_isotope_jcampdx(data_dict) if data_values is None: raise RuntimeError(f"Error parsing {file!s}, could not extract spectral data.") if not isinstance(data_values, np.ndarray): raise ValueError(f"Unsupported spectral data of type {type(data_values)}.") if not all( [ data_dict["MINX"] == data_dict["LASTX"], data_dict["MAXX"] == data_dict["FIRSTX"], len(data_dict[".OBSERVEFREQUENCY"]) == 1, len(data_dict["MINX"]) == 1, len(data_dict["MAXX"]) == 1, len(isotope) == 1, len(solvent) <= 1, len(temperature) <= 1, ] ): raise ValueError("Invalid JDX data detected.") freq_MHz = float(data_dict[".OBSERVEFREQUENCY"][0]) min_hz = float(data_dict["MINX"][0]) max_hz = float(data_dict["MAXX"][0]) min_ppm = min_hz / freq_MHz max_ppm = max_hz / freq_MHz delta = np.linspace(max_ppm, min_ppm, len(data_values)) return NMRExperimentalSpectrum1D( omegas_ppm=delta, intensity=data_values, frequency_MHz=freq_MHz, solvent="" if not solvent else solvent[0], temperature=None if not temperature else temperature[0], isotope=isotope[0], source=source, license=license_id, )
[docs] def read_jdx_string( file_content: str, source: str = DEFAULT_UNKNOWN, license_id: str = DEFAULT_UNKNOWN ) -> tuple[np.ndarray, np.ndarray]: """Parses JDX spectral data from string. Note this function creates a temporary file in order to match the function interface of `nmrglue.jcampdx.read`. Args: file_content: Content of a JCAMP-DX file. source: Owner or producer of the spectrum. license_id: License associated with the spectrum (as SPDX identifier if possible). Returns: A tuple of chemical shifts (in ppm) and intensities. """ with NamedTemporaryFile(mode="w+", encoding="utf-8", suffix=".jdx") as tf: tf.write(file_content) tf.seek(0) spectrum = read_jdx(tf.name, source=source, license_id=license_id) return spectrum.omegas_ppm, spectrum.intensity
[docs] def read_bruker_dir( directory: Union[Path, str], source: str = DEFAULT_UNKNOWN, license_id: str = DEFAULT_UNKNOWN, ) -> NMRExperimentalSpectrum1D: """Read spectral data from Bruker directory. Args: directory: Path to the Bruker directory. source: Owner or producer of the spectrum. license_id: License associated with the spectrum (as SPDX identifier if possible). Returns: A `NMRExperimentalSpectrum1D` object containing chemical shifts (in ppm) and intensities. """ bruker_dir = str(Path(directory)) # Read data from a Bruker directory params, ydata = nmrglue.bruker.read_pdata(bruker_dir) # Left axis limit in ppm left_ppm = params["procs"]["OFFSET"] # spectrometer frequency in MHz freq_MHz = params["procs"]["SF"] # width of the frequency band in Hz bandwidth_hz = params["procs"]["SW_p"] # Right axis limit in ppm right_ppm = left_ppm - bandwidth_hz / freq_MHz # Number of data points num_data_points = params["procs"]["SI"] # Set up the x axis points xdata = np.linspace(left_ppm, right_ppm, num_data_points) solvent = _extract_solvent_bruker(params) temperature = _extract_temperature_bruker(params) isotope = _extract_isotope_bruker(params) return NMRExperimentalSpectrum1D( omegas_ppm=xdata, intensity=ydata, frequency_MHz=freq_MHz, solvent=solvent, temperature=temperature, isotope=isotope, source=source, license=license_id, )
[docs] def write_jdx( x_data: np.ndarray, y_data: np.ndarray, frequency_MHz: float, molecule_name: str, solvent: str = "CDCl3", isotope: str = "1H", manual_shift: Optional[float] = None, ) -> None: """Write a JCAMP-DX file. This function writes a JCAMP-DX file from given x and y data. It is able of moving the x-axis by a specified amount. This is useful when the experimental spectrum is wrong referenced. Args: x_data: X data, shifts in ppm. y_data: Y data, intensity. frequency_MHz: Observation frequency in MHz (depends on the isotope). molecule_name: Molecule name. solvent: Solvent name. Defaults to "CDCl3". isotope: String representation of the isotope as [atomic mass number][symbol]. Defaults to "1H". manual_shift: Manual shift in ppm. Defaults to None. """ jdx_name = f"{molecule_name:s}_{isotope:s}_{solvent:s}_{frequency_MHz:0.2f}" if manual_shift: x_data += manual_shift jdx_name += "MHz_shifted.jdx" else: jdx_name += "MHz_original.jdx" # JDX file: x data in Hz. We need to do a conversion from ppm to Hz using the # gyromagnetic ratio and the observe frequency. x_data_hz = ( x_data * frequency_MHz * GYROMAGNETIC_RATIOS[Isotope.from_str("1H")] / GYROMAGNETIC_RATIOS[Isotope.from_str(isotope)] ) minx = float(x_data_hz.min()) lastx = x_data_hz[-1] maxx = float(x_data_hz.max()) firstx = x_data_hz[0] firsty = y_data[0] miny = float(y_data.min()) maxy = float(y_data.max()) npoints = len(x_data_hz) with open(jdx_name, "w") as jdx_file: if not manual_shift: jdx_file.write("##TITLE=Experimental NMR data.\n") else: jdx_file.write("##TITLE=Experimental NMR data but shifted by the user.\n") jdx_file.write("1D - NMR E:\\ SBL 11\n") jdx_file.write("\n") jdx_file.write("##JCAMP-DX=5.01\n") jdx_file.write("##DATA TYPE=NMR SPECTRUM\n") jdx_file.write("##DATA CLASS=XYDATA\n") jdx_file.write("##ORIGIN=hqs-nmr\n") jdx_file.write("##OWNER=HQS Quantum Simulations GmbH\n") jdx_file.write( "##LONGDATE=Xxx, xx Xxx xxxx yy:yy:yy +0100 # export date from JSpecView\n" ) jdx_file.write(f"##.SHIFTREFERENCE=INTERNAL, {solvent}, 1, dd.dddd\n") jdx_file.write("##SPECTROMETER/DATA SYSTEM=spect\n") jdx_file.write(f"##.OBSERVE NUCLEUS={isotope}\n") jdx_file.write("##XUNITS=HZ\n") jdx_file.write("##YUNITS=ARBITRARY UNITS\n") jdx_file.write("##XFACTOR=1\n") jdx_file.write("##YFACTOR=1\n") jdx_file.write(f"##.OBSERVEFREQUENCY={frequency_MHz}\n") jdx_file.write(f"##.SOLVENT NAME={solvent}\n") jdx_file.write(f"##FIRSTX={firstx}\n") jdx_file.write(f"##MAXX={maxx}\n") jdx_file.write(f"##FIRSTY={firsty}\n") jdx_file.write(f"##LASTX={lastx}\n") jdx_file.write(f"##MINX={minx}\n") jdx_file.write(f"##NPOINTS={npoints}\n") jdx_file.write(f"##MINY={miny}\n") jdx_file.write(f"##MAXY={maxy}\n") jdx_file.write("##XYDATA=((X..X)(Y..Y))\n") for i, value in enumerate(y_data): jdx_file.write(f"{x_data_hz[i]}, {value}\n") jdx_file.write("##END=")
[docs] def bruker_dir_to_jdx( directory: Union[Path, str], molecule_name: str, ) -> None: """Read spectral data from a Bruker directory and write it to a JCAMP-DX file. Args: directory: Path to the Bruker directory. molecule_name: Molecule name. """ spectrum = read_bruker_dir(directory) write_jdx( x_data=spectrum.omegas_ppm, y_data=spectrum.intensity, frequency_MHz=spectrum.frequency_MHz, molecule_name=molecule_name, solvent=spectrum.solvent, isotope=str(spectrum.isotope), )