Source code for textkernel

##########################################################################################
# textkernel/__init__.py
##########################################################################################
"""PDS Ring-Moon Systems Node, SETI Institute

This is a set of routines for parsing SPICE text kernels. This module implements the
complete syntax specification as discussed in the SPICE Kernel Required Reading document,
"kernel.req": https://naif.jpl.nasa.gov/pub/naif/toolkit_docs/C/req/kernel.html

The `textkernel` module provides two functions for reading text kernels:

- `from_text`: Given a string representing the contents of a text kernel, return a
  dictionary of the values found.
- `from_file`: Given the path to a text kernel, read the contents and return a dictionary
  of the values found.

and two functions for manipulating text kernels:

- `continued_value`: Interpret a list of strings as one or more continued strings.
- `update_dict`: Merge the contents of two text kernel dictionaries, preserving nested
  values.
"""

__all__ = ['from_text', 'from_file', 'continued_value', 'update_dict']

import re

import filecache

from textkernel._DATA_GRAMMAR          import _DATA_GRAMMAR
from textkernel._NAME_GRAMMAR          import _NAME_GRAMMAR
from textkernel._PREDEFINED_BODY_INFO  import _PREDEFINED_BODY_INFO
from textkernel._PREDEFINED_FRAME_INFO import _PREDEFINED_FRAME_INFO

try:
    from ._version import __version__
except ImportError:  # pragma: nocover
    __version__ = 'Version unspecified'


# Regular expressions to match \\begindata and \\begintext sections. These must be alone
# on a line. NOTE: There's really only one backslash in front of the "b", but two are
# needed in the Python source code because a single backslash indicates an escape.
_BEGINDATA = re.compile(r'\n[ \t]*\\begindata[ \t]*\r?\n', re.S)
_BEGINTEXT = re.compile(r'\n[ \t]*\\begintext[ \t]*\r?\n', re.S)


[docs] def from_text(text, tkdict=None, *, commented=True, contin=''): """ Parse a string as the contents of a text kernel and return a dict of values found. Args: text (str): The contents as a SPICE text kernel. It can be represented as a single string with embedded newlines or as a list of strings. tkdict (dict, optional): An optional starting dictionary. If provided, the new content is merged into the one provided; otherwise, a new dictionary is returned. commented (bool, optional): True if the kernel text contains comments delimited by `\\\\begintext` and `\\\\begindata`. contin (str, optional): Optional sequence of characters indicating that a string is "continued", meaning that its value should be concatenated with the next string in the list. See the rules for continued strings here: https://naif.jpl.nasa.gov/pub/naif/toolkit_docs/C/req/kernel.html#Additional%20Text%20Kernel%20Syntax%20Rules If a text kernel uses multiple different continuation sequences (which is exceedingly unlikely), you can only specify one sequence here; use continued_value() to interpret the values of other continued strings. The default value is "+" for all metakernels. Returns: dict: A dictionary containing all the parameters in the given string. The returned dictionary is keyed by all the parameter names (on the left side of an equal sign) in the text kernel, and each associated dictionary value is that found on the right side. Values are Python ints, floats, strings, datetime objects, or lists of one or more of these. For convenience, the returned dictionary adds additional, "hierarchical" keys that provide alternative access to the same values. Hierarchical keys are substrings from the original parameter name, which return a sub-dictionary keyed by part or all of the remainder of that parameter name. - Parameter names with a slash are split apart as if they represented components of a file directory tree, so these are equivalent: - tkdict["DELTET/EB"] == tkdict["DELTET"]["EB"] - When a body or frame ID is embedded inside a parameter name, it is extracted, converted to integer, and used as a piece of the hierarchy, making these equivalent: - tkdict["BODY399_POLE_RA"] == tkdict["BODY"][399]["POLE_RA"] - tkdict["SCLK01_MODULI_32"] == tkdict["SCLK"][-32]["01_MODULI"] Leading and trailing underscores before and after the embedded numeric ID are stripped from the hierarchical keys, as you can see in the examples above. - When the name associated with a body or frame ID is known, that name can be used in the place of the integer ID: - tkdict["BODY"][399] == tkdict["BODY"]["EARTH"] - tkdict["FRAME"][10013] == tkdict["FRAME"]["IAU_EARTH"] - tkdict["SCLK"][-32] == tkdict["SCLK"]["VOYAGER 2"] - If a frame is associated with a particular central body, the body's ID can also be used in place of the frame's ID: - tkdict["FRAME"][399] == tkdict["FRAME"]["IAU_EARTH"] - Note that the "BODY" and "FRAME" dictionaries also have an additional entry keyed by "ID", which returns the associated integer ID: - tkdict["FRAME"][623]["ID"] = 623 - tkdict["FRAME"]["IAU_SUTTUNGR"]["ID"] = 623 This ensures that you can look up a body or frame by name and readily obtain its ID. """ tkdict_is_new = (tkdict is None) if tkdict_is_new: tkdict = {} if not isinstance(text, str): text = '\n'.join(text) # Fill in the default continuation character if this is a metakernel if not contin and 'KERNELS_TO_LOAD' in text: contin = '+' # Pre-process commented text if commented: parts = _BEGINDATA.split(text)[1:] + [''] # blank at end restores final newline parts = [_BEGINTEXT.split(p)[0] for p in parts] text = '\n'.join(parts) # Parse parsed = _DATA_GRAMMAR.parse_string(text).as_list() # Track new sub-dictionaries and new name/ID pairs indices = [] # a list of tuples (before-text, idcode or name) new_body_names = [] # a list of new tuples NAIF_BODY_NAME values new_body_codes = [] # a list of new tuples NAIF_BODY_CODE values # Insert each value into the dictionary for (name, op, value) in parsed: # Catch new name/idcode pairs (before merging lists) if name == 'NAIF_BODY_NAME': new_body_names += value if isinstance(value, list) else [value] if name == 'NAIF_BODY_CODE': new_body_codes += value if isinstance(value, list) else [value] # Merge continued strings if necessary; any other value is returned as is value = continued_value(value, contin) # Merge list with previous value if operator is "+=" if op == '+=': if not isinstance(value, list): value = [value] if name in tkdict: old_value = tkdict[name] if isinstance(old_value, list): value = old_value + value else: value = [old_value] + value # Insert into the dictionary under the full name tkdict[name] = value # Identify the alternative names parsed_name = _NAME_GRAMMAR.parse_string(name).as_list()[0] # Unless it's a nested or indexed name, we're done if isinstance(parsed_name, str): continue # Put the nested or indexed value into a sub-dictionary subdict = tkdict for subname in parsed_name[:-1]: subdict = subdict.setdefault(subname, {}) # Keep track of indexed sub-dictionaries if isinstance(parsed_name, tuple): indices.append(parsed_name[:2]) subdict[parsed_name[-1]] = value # Key any pre-existing sub-dictionaries by a new name if new_body_codes and not tkdict_is_new: # For each sub-dictionary... for (key, subdict) in tkdict.items(): if not isinstance(subdict, dict): continue # ... if the idcode is a key, use the name as a key as well for k, idcode in enumerate(new_body_codes): if idcode in subdict: subdict[new_body_names[k]] = subdict[idcode] # Key any new indexed sub-dictionaries by name(s) as well as ID for (prefix, key) in indices: prefix_dict = tkdict[prefix] prefix_subdict = prefix_dict[key] allkeys = [key] # Determine whether this is a body or a frame; get predefined values if any if prefix in ('BODY', 'OBJECT', 'SCLK'): bf_key = 'BODY' (name, idcode) = _PREDEFINED_BODY_INFO.get(key, ('', 0)) else: bf_key = 'FRAME' (name, idcode, center) = _PREDEFINED_FRAME_INFO.get(key, ('', 0, 0)) # See if this already has an associated ID and name bf_dict = tkdict.setdefault(bf_key, {}) # tkdict['BODY'] or tkdict['FRAME'] bf_subdict = bf_dict.setdefault(key, {}) allkeys.append(bf_subdict.get('NAME', '')) allkeys.append(bf_subdict.get('ID', 0)) # Include the pre-defined values if any allkeys += [name, idcode] # This gets the body name in some "rocks" files. Example: # OBJECT_65040_FRAME = 'IAU_S12_2004' # implies name='S12_2004' if bf_key == 'BODY': frame_name = prefix_subdict.get('FRAME', '') if frame_name.startswith('IAU_'): allkeys.append(frame_name[4:]) # This is how the name is embedded in some instrument kernels if bf_key == 'FRAME' and 'INS' in tkdict: try: allkeys.append(tkdict['INS'][key]['FOV_FRAME']) except KeyError: # pragma: no cover pass # Remove duplicate, blank, and zero keys newkeys = [] for k in allkeys: if k and k not in newkeys: newkeys.append(k) allkeys = newkeys # Identify the first ID and the first name idcodes = [k for k in allkeys if isinstance(k, int)] first_id = idcodes[0] if idcodes else 0 names = [k for k in allkeys if isinstance(k, str)] first_name = names[0] if names else '' # Make sure each "BODY"/"FRAME" dictionary has an entry for the ID and NAME if first_id and 'ID' not in bf_subdict: bf_subdict['ID'] = first_id if first_name and 'NAME' not in bf_subdict: bf_subdict['NAME'] = first_name # Insert additional dictionary keys for k in allkeys[1:]: # first item in thelist is the current key bf_dict[k] = bf_subdict prefix_dict[k] = prefix_subdict # We're done with body IDs if bf_key == 'BODY': continue # Identify the frame center if not center: center = bf_subdict.get('CENTER', 0) # We can derive the center ID from the frame ID for instruments if not center and isinstance(key, int): center = -((-key) // 1000) if not (0 > center > -1000): center = 0 # pragma: no cover if center and 'CENTER' not in bf_subdict: bf_subdict['CENTER'] = center # Insert a "FRAME" dictionary key for each unique body center ID frame_dict = tkdict.get('FRAME', {}) frame_ids = {} # center ID -> list of frame IDs for frame_subdict in frame_dict.values(): if 'CENTER' in frame_subdict: # pragma: no branch center_id = frame_subdict['CENTER'] frame_ids.setdefault(center_id, []).append(frame_subdict['ID']) for center_id, frame_id_list in frame_ids.items(): if len(frame_id_list) == 1 and center_id not in frame_dict: frame_dict[center_id] = frame_dict[frame_id_list[0]] # pragma: no cover return tkdict
[docs] def from_file(path, tkdict=None, *, contin=''): """ Parse the contents of a text kernel, returning a dict of the values found. Args: path (str or Path or FCPath): The path to a kernel file as a string, `pathlib.Path`, or `filecache.FCPath`. tkdict (dict, optional): An optional starting dictionary. If provided, the new content is merged into the one provided; otherwise, a new dictionary is returned. contin (str, optional): Optional sequence of characters indicating that a string is "continued", meaning that its value should be concatenated with the next string in the list. See the rules for continued strings here: https://naif.jpl.nasa.gov/pub/naif/toolkit_docs/C/req/kernel.html#Additional%20Text%20Kernel%20Syntax%20Rules If a text kernel uses multiple different continuation sequences (which is exceedingly unlikely), you can only specify one sequence here; use continued_value() to interpret the values of other continued strings. The default value is "+" for all metakernels. Returns: dict: A dictionary containing all the parameters in the given string. The returned dictionary is keyed by all the parameter names (on the left side of an equal sign) in the text kernel, and each associated dictionary value is that found on the right side. Values are Python ints, floats, strings, datetime objects, or lists of one or more of these. For convenience, the returned dictionary adds additional, "hierarchical" keys that provide alternative access to the same values. Hierarchical keys are substrings from the original parameter name, which return a sub-dictionary keyed by part or all of the remainder of that parameter name. - Parameter names with a slash are split apart as if they represented components of a file directory tree, so these are equivalent: - tkdict["DELTET/EB"] == tkdict["DELTET"]["EB"] - When a body or frame ID is embedded inside a parameter name, it is extracted, converted to integer, and used as a piece of the hierarchy, making these equivalent: - tkdict["BODY399_POLE_RA"] == tkdict["BODY"][399]["POLE_RA"] - tkdict["SCLK01_MODULI_32"] == tkdict["SCLK"][-32]["01_MODULI"] Leading and trailing underscores before and after the embedded numeric ID are stripped from the hierarchical keys, as you can see in the examples above. - When the name associated with a body or frame ID is known, that name can be used in the place of the integer ID: - tkdict["BODY"][399] == tkdict["BODY"]["EARTH"] - tkdict["FRAME"][10013] == tkdict["FRAME"]["IAU_EARTH"] - tkdict["SCLK"][-32] == tkdict["SCLK"]["VOYAGER 2"] - If a frame is associated with a particular central body, the body's ID can also be used in place of the frame's ID: - tkdict["FRAME"][399] == tkdict["FRAME"]["IAU_EARTH"] - Note that the "BODY" and "FRAME" dictionaries also have an additional entry keyed by "ID", which returns the associated integer ID: - tkdict["FRAME"][623]["ID"] = 623 - tkdict["FRAME"]["IAU_SUTTUNGR"]["ID"] = 623 This ensures that you can look up a body or frame by name and readily obtain its ID. """ text = filecache.FCPath(path).read_text(encoding='latin1') return from_text(text, tkdict=tkdict, commented=True, contin=contin)
########################################################################################## # Kernel dictionary management ##########################################################################################
[docs] def continued_value(value, contin='+'): """Interpret a list of strings as one or more continued strings. Use this function if you did not specify the string's continuation sequence when you created the dictionary. Args: value (Any): A value from a text kernel. contin (str, optional): A sequence of characters indicating that a string is "continued", meaning that its value should be concatenated with the next string in the list. See the rules for continued strings here: https://naif.jpl.nasa.gov/pub/naif/toolkit_docs/C/req/kernel.html#Additional%20Text%20Kernel%20Syntax%20Rules Returns: Any: The same value after the continuation sequence has been applied. If the list now contains only a single value, that string is returned instead of a list containing the string. If any other type of value is given as input, that value is returned as is. """ if not contin: return value if not isinstance(value, list): return value newlist = [value[0]] merged = False for item in value[1:]: if isinstance(item, str) and isinstance(newlist[-1], str): stripped = newlist[-1].rstrip() if stripped.endswith(contin): newlist[-1] = stripped[:-len(contin)] + item merged = True continue newlist.append(item) # If a list was not modified, return the original in case this matters if not merged: return value # If the new list contains a single merged string, just return the string if len(newlist) == 1: return newlist[0] return newlist
[docs] def update_dict(tkdict, newdict): """Merge the contents of two text kernel dictionaries, preserving nested values. Values in the new dictionary take precedence. The returned dictionary is the same as what one would get by reading the first text kernel and then using its return value as the `tkdict` input when reading the second text kernel. Args: tkdict (dict): A text kernel dictionary. newdict (dict): A second text kernel dictionary. Returns: dict: The input `tkdict`, updated with the contents of `newdict`. """ def alt_dict_keys(d): """Create a dict that maps each key to its alt keys including itself.""" alt_keys = {} keys_for_dict_id = {} for key, value in d.items(): if isinstance(value, dict): dict_id = id(value) keys_for_dict_id.setdefault(dict_id, set()).add(key) for alt_key_set in keys_for_dict_id.values(): for key in alt_key_set: alt_keys[key] = alt_key_set return alt_keys # Use NAIF_BODY_CODE/NAME to define new keys new_body_codes = newdict.get('NAIF_BODY_CODE', []) if new_body_codes: new_body_names = newdict.get('NAIF_BODY_NAME', []) for key, subdict in tkdict.items(): if not isinstance(subdict, dict): continue for k, idcode in enumerate(new_body_codes): if idcode in subdict: subdict[new_body_names[k]] = subdict[idcode] # Identify each dictionary's alternative keys new_dict_keys = alt_dict_keys(newdict) old_dict_keys = alt_dict_keys(tkdict) # Copy/merge dictionary items keys_handled = set() for key, new_value in newdict.items(): # Merge dictionaries if isinstance(new_value, dict): if key in keys_handled: continue old_keys = old_dict_keys.get(key, set()) new_keys = new_dict_keys[key] - old_keys if old_keys: old_key = list(old_keys)[0] updated = update_dict(tkdict[old_key], new_value) else: updated = new_value for key in new_keys: tkdict[key] = updated keys_handled |= new_keys continue # Insert new values if key not in tkdict: tkdict[key] = new_value continue # Leave identical values alone tk_value = tkdict[key] if tk_value == new_value: continue # Otherwise, convert to list if necessary and concatenate concat = tk_value if isinstance(tk_value, list) else [tk_value] concat += new_value if isinstance(new_value, list) else [new_value] tkdict[key] = concat return tkdict
##########################################################################################