Source code for hqs_nmr.spectrumio

# Copyright © 2025 HQS Quantum Simulations GmbH. All Rights Reserved.

"""Deserialization and serialization from varios NMR spectrum formats."""

from __future__ import annotations

from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Any, Optional, Union

import nmrglue
import numpy as np
from hqs_nmr_parameters import GYROMAGNETIC_RATIOS, Isotope

from hqs_nmr.datatypes import DEFAULT_UNKNOWN, NMRExperimentalSpectrum1D



[docs]
def _extract_solvent_jcampdx(data: dict[str, Any]) -> list[str]:
    """Extracts solvent string from a JCAMP-DX data dictionary if available.

    With JCAMP-DX version 5.01 a new optional ".SOLVENT NAME" key has been added to store a
    description of the solvent. According to the specification this may include pH, ionic strength,
    if relevant. This function does not distinguish the solvent name from such additional data.

    A few notes regarding the parsing via the `nmrglue` package:
    Since a JCAMP-DX file may contain more than one block (ends with "##END"), `nmrglue` collects
    all values of a key in a list. Furthermore, `nmrglue` normalizes the key in the data
    dictionary, i.e. while the JCAMP-DX file may contain "##.SOLVENTNAME" or "##.SOLVENT NAME", the
    resulting data dictionary only contains a ".SOLVENTNAME" key in both cases.

    Args:
        data: Data dictionary resulting from parsing with `nmrglue`.

    Returns:
        Solvent string(s) or an empty list if no solvent information was found.
    """
    solvent: list[str] = []
    output: Union[list[str], None] = data.get(".SOLVENTNAME", None)
    if isinstance(output, list):
        solvent = output

    return solvent




[docs]
def _extract_solvent_bruker(data: dict[str, Any]) -> str:
    """Extracts solvent string from a Bruker data dictionary if available.

    The Bruker directory format stores the solvent name in the acquisition file (`acqus` or `acqu`)
    under the non-standard key "##$SOLVENT". The solvent name is usually enclosed in angle brackets
    (e.g. `<DMSO>`), which are stripped of with this function.

    Args:
        data: Data dictionary resulting from parsing with `nmrglue`.

    Returns:
        Solvent string or an empty string if no solvent information was found.
    """
    solvent = data.get("acqus", {}).get("SOLVENT", "")
    return solvent.replace("<", "").replace(">", "")




[docs]
def _extract_temperature_jcampdx(data: dict[str, Any]) -> list[float]:
    """Extracts temperature (in Kelvin) from a JCAMP-DX data dictionary if available.

    JCAMP-DX files store the temperature in degrees Celsius in the "##TEMPERATURE" field. Note that
    this function returns the temperature in Kelvin.

    A few notes regarding the parsing via the `nmrglue` package:
    Since a JCAMP-DX file may contain more than one block (ends with "##END"), `nmrglue` collects
    all values of a key in a list.

    Args:
        data: Data dictionary resulting from parsing with `nmrglue`.

    Returns:
        Temperature(s) in Kelvin or an empty list if no temperature was found.
    """
    temperature: list[float] = []
    output = data.get("TEMPERATURE", None)
    if isinstance(output, list):
        temperature = [float(t) + 273.15 for t in output]

    return temperature




[docs]
def _extract_temperature_bruker(data: dict[str, Any]) -> Optional[float]:
    """Extracts temperature (in Kelvin) from a Bruker data dictionary if available.

    The Bruker directory format stores the temperature (in Kelvin) in the acquisition file
    (`acqus` or `acqu`) under the non-standard key "##$TE". The data dictionary produced by
    `nmrglue.bruker.read_pdata` holds the temperature already as a `float`.

    Args:
        data: Data dictionary resulting from parsing with `nmrglue`.

    Returns:
        Temperature in Kelvin or `None` if no solvent information was found.
    """
    return data.get("acqus", {}).get("TE", None)




[docs]
def _extract_isotope_jcampdx(data: dict[str, Any]) -> list[Isotope]:
    """Extracts isotope from a JCAMP-DX data dictionary.

    JCAMP-DX files store the isotope in the "##.OBSERVE NUCLEUS" field. The corresponding value
    usually contains a leading caret (e.g. `^13C`), which is removed in this function.

    A few notes regarding the parsing via the `nmrglue` package:
    Since a JCAMP-DX file may contain more than one block (ends with "##END"), `nmrglue` collects
    all values of a key in a list. Furthermore, `nmrglue` normalizes the key in the data
    dictionary, i.e. while the JCAMP-DX file may contain "##.OBSERVENUCLEUS" or
    "##.OBSERVE NUCLEUS", the resulting data dictionary only contains a ".OBSERVENUCLEUS" key in
    both cases.

    Args:
        data: Data dictionary resulting from parsing with `nmrglue`.

    Returns:
        Isotope of observed nucleus.
    """
    try:
        output = data[".OBSERVENUCLEUS"]
        return [Isotope.from_str(iso.replace("^", "")) for iso in output]
    except Exception as e:
        raise RuntimeError("Failed to extract isotope from NMR data dictionary.") from e




[docs]
def _extract_isotope_bruker(data: dict[str, Any]) -> Isotope:
    """Extracts isotope from a Bruker data dictionary.

    The Bruker directory format stores the isotope in the processing file (`procs` or `proc`)
    "##$AXNUC". The isotope string is usually enclosed in angle brackets (e.g. `<1H>`). Contrary to
    the solvent name, the isotope string stored in the data dictionary does not include the angle
    brackets.

    Args:
        data: Data dictionary resulting from parsing with `nmrglue`.

    Returns:
        Isotope of observed nucleus.
    """
    try:
        output: str = data["procs"]["AXNUC"]
        return Isotope.from_str(output)
    except Exception as e:
        raise RuntimeError("Failed to extract isotope from NMR data dictionary.") from e




[docs]
def read_jdx(
    file: Union[Path, str],
    source: str = DEFAULT_UNKNOWN,
    license_id: str = DEFAULT_UNKNOWN,
) -> NMRExperimentalSpectrum1D:
    """Read spectral data from a JCAMP-DX (JDX) file.

    NOTE: it is assumed that the JCAMP-DX file contains only one block, i.e. one spectrum and one
    isotope.

    Args:
        file:       Path to the JDX file.
        source:     Owner or producer of the spectrum.
        license_id: License associated with the spectrum (as SPDX identifier if possible).

    Raises:
        RuntimeError:        Parsing failed.
        NotImplementedError: Unsupported spectral data.
        ValueError:          Inconsistent JDX parameters.

    Returns:
        A `NMRExperimentalSpectrum1D` object with chemical shifts (in ppm) and intensities.
    """
    jdx_path = Path(file)
    if jdx_path.suffix.lower() not in [".jdx", ".dx", ".jcamp"]:
        raise ValueError("Incorrect file extension. Please provide a valid JDX file.")

    data_dict, data_values = nmrglue.jcampdx.read(filename=str(jdx_path))

    solvent = _extract_solvent_jcampdx(data_dict)
    temperature = _extract_temperature_jcampdx(data_dict)
    isotope = _extract_isotope_jcampdx(data_dict)

    if data_values is None:
        raise RuntimeError(f"Error parsing {file!s}, could not extract spectral data.")
    if not isinstance(data_values, np.ndarray):
        raise ValueError(f"Unsupported spectral data of type {type(data_values)}.")
    if not all(
        [
            data_dict["MINX"] == data_dict["LASTX"],
            data_dict["MAXX"] == data_dict["FIRSTX"],
            len(data_dict[".OBSERVEFREQUENCY"]) == 1,
            len(data_dict["MINX"]) == 1,
            len(data_dict["MAXX"]) == 1,
            len(isotope) == 1,
            len(solvent) <= 1,
            len(temperature) <= 1,
        ]
    ):
        raise ValueError("Invalid JDX data detected.")

    freq_MHz = float(data_dict[".OBSERVEFREQUENCY"][0])
    min_hz = float(data_dict["MINX"][0])
    max_hz = float(data_dict["MAXX"][0])
    min_ppm = min_hz / freq_MHz
    max_ppm = max_hz / freq_MHz
    delta = np.linspace(max_ppm, min_ppm, len(data_values))

    return NMRExperimentalSpectrum1D(
        omegas_ppm=delta,
        intensity=data_values,
        frequency_MHz=freq_MHz,
        solvent="" if not solvent else solvent[0],
        temperature=None if not temperature else temperature[0],
        isotope=isotope[0],
        source=source,
        license=license_id,
    )




[docs]
def read_jdx_string(
    file_content: str, source: str = DEFAULT_UNKNOWN, license_id: str = DEFAULT_UNKNOWN
) -> tuple[np.ndarray, np.ndarray]:
    """Parses JDX spectral data from string.

    Note this function creates a temporary file in order to match the function interface of
    `nmrglue.jcampdx.read`.

    Args:
        file_content: Content of a JCAMP-DX file.
        source:       Owner or producer of the spectrum.
        license_id:   License associated with the spectrum (as SPDX identifier if possible).

    Returns:
        A tuple of chemical shifts (in ppm) and intensities.
    """
    with NamedTemporaryFile(mode="w+", encoding="utf-8", suffix=".jdx") as tf:
        tf.write(file_content)
        tf.seek(0)
        spectrum = read_jdx(tf.name, source=source, license_id=license_id)
    return spectrum.omegas_ppm, spectrum.intensity




[docs]
def read_bruker_dir(
    directory: Union[Path, str],
    source: str = DEFAULT_UNKNOWN,
    license_id: str = DEFAULT_UNKNOWN,
) -> NMRExperimentalSpectrum1D:
    """Read spectral data from Bruker directory.

    Args:
        directory:  Path to the Bruker directory.
        source:     Owner or producer of the spectrum.
        license_id: License associated with the spectrum (as SPDX identifier if possible).

    Returns:
        A `NMRExperimentalSpectrum1D` object containing chemical shifts (in ppm) and intensities.
    """
    bruker_dir = str(Path(directory))
    # Read data from a Bruker directory
    params, ydata = nmrglue.bruker.read_pdata(bruker_dir)

    # Left axis limit in ppm
    left_ppm = params["procs"]["OFFSET"]
    # spectrometer frequency in MHz
    freq_MHz = params["procs"]["SF"]
    # width of the frequency band in Hz
    bandwidth_hz = params["procs"]["SW_p"]
    # Right axis limit in ppm
    right_ppm = left_ppm - bandwidth_hz / freq_MHz
    # Number of data points
    num_data_points = params["procs"]["SI"]

    # Set up the x axis points
    xdata = np.linspace(left_ppm, right_ppm, num_data_points)

    solvent = _extract_solvent_bruker(params)
    temperature = _extract_temperature_bruker(params)
    isotope = _extract_isotope_bruker(params)

    return NMRExperimentalSpectrum1D(
        omegas_ppm=xdata,
        intensity=ydata,
        frequency_MHz=freq_MHz,
        solvent=solvent,
        temperature=temperature,
        isotope=isotope,
        source=source,
        license=license_id,
    )




[docs]
def write_jdx(
    x_data: np.ndarray,
    y_data: np.ndarray,
    frequency_MHz: float,
    molecule_name: str,
    solvent: str = "CDCl3",
    isotope: str = "1H",
    manual_shift: Optional[float] = None,
) -> None:
    """Write a JCAMP-DX file.

    This function writes a JCAMP-DX file from given x and y data. It is able of moving the x-axis
    by a specified amount. This is useful when the experimental spectrum is wrong referenced.

    Args:
        x_data: X data, shifts in ppm.
        y_data: Y data, intensity.
        frequency_MHz: Observation frequency in MHz (depends on the isotope).
        molecule_name: Molecule name.
        solvent: Solvent name. Defaults to "CDCl3".
        isotope: String representation of the isotope as [atomic mass number][symbol].
                 Defaults to "1H".
        manual_shift: Manual shift in ppm. Defaults to None.
    """
    jdx_name = f"{molecule_name:s}_{isotope:s}_{solvent:s}_{frequency_MHz:0.2f}"
    if manual_shift:
        x_data += manual_shift

        jdx_name += "MHz_shifted.jdx"
    else:
        jdx_name += "MHz_original.jdx"

    # JDX file: x data in Hz. We need to do a conversion from ppm to Hz using the
    # gyromagnetic ratio and the observe frequency.
    x_data_hz = (
        x_data
        * frequency_MHz
        * GYROMAGNETIC_RATIOS[Isotope.from_str("1H")]
        / GYROMAGNETIC_RATIOS[Isotope.from_str(isotope)]
    )
    minx = float(x_data_hz.min())
    lastx = x_data_hz[-1]
    maxx = float(x_data_hz.max())
    firstx = x_data_hz[0]
    firsty = y_data[0]
    miny = float(y_data.min())
    maxy = float(y_data.max())
    npoints = len(x_data_hz)

    with open(jdx_name, "w") as jdx_file:
        if not manual_shift:
            jdx_file.write("##TITLE=Experimental NMR data.\n")
        else:
            jdx_file.write("##TITLE=Experimental NMR data but shifted by the user.\n")
        jdx_file.write("1D - NMR E:\\ SBL 11\n")
        jdx_file.write("\n")
        jdx_file.write("##JCAMP-DX=5.01\n")
        jdx_file.write("##DATA TYPE=NMR SPECTRUM\n")
        jdx_file.write("##DATA CLASS=XYDATA\n")
        jdx_file.write("##ORIGIN=hqs-nmr\n")
        jdx_file.write("##OWNER=HQS Quantum Simulations GmbH\n")
        jdx_file.write(
            "##LONGDATE=Xxx, xx Xxx xxxx yy:yy:yy +0100  # export date from JSpecView\n"
        )
        jdx_file.write(f"##.SHIFTREFERENCE=INTERNAL, {solvent}, 1, dd.dddd\n")
        jdx_file.write("##SPECTROMETER/DATA SYSTEM=spect\n")
        jdx_file.write(f"##.OBSERVE NUCLEUS={isotope}\n")
        jdx_file.write("##XUNITS=HZ\n")
        jdx_file.write("##YUNITS=ARBITRARY UNITS\n")
        jdx_file.write("##XFACTOR=1\n")
        jdx_file.write("##YFACTOR=1\n")
        jdx_file.write(f"##.OBSERVEFREQUENCY={frequency_MHz}\n")
        jdx_file.write(f"##.SOLVENT NAME={solvent}\n")
        jdx_file.write(f"##FIRSTX={firstx}\n")
        jdx_file.write(f"##MAXX={maxx}\n")
        jdx_file.write(f"##FIRSTY={firsty}\n")
        jdx_file.write(f"##LASTX={lastx}\n")
        jdx_file.write(f"##MINX={minx}\n")
        jdx_file.write(f"##NPOINTS={npoints}\n")
        jdx_file.write(f"##MINY={miny}\n")
        jdx_file.write(f"##MAXY={maxy}\n")
        jdx_file.write("##XYDATA=((X..X)(Y..Y))\n")
        for i, value in enumerate(y_data):
            jdx_file.write(f"{x_data_hz[i]}, {value}\n")
        jdx_file.write("##END=")




[docs]
def bruker_dir_to_jdx(
    directory: Union[Path, str],
    molecule_name: str,
) -> None:
    """Read spectral data from a Bruker directory and write it to a JCAMP-DX file.

    Args:
        directory: Path to the Bruker directory.
        molecule_name: Molecule name.
    """
    spectrum = read_bruker_dir(directory)

    write_jdx(
        x_data=spectrum.omegas_ppm,
        y_data=spectrum.intensity,
        frequency_MHz=spectrum.frequency_MHz,
        molecule_name=molecule_name,
        solvent=spectrum.solvent,
        isotope=str(spectrum.isotope),
    )