##########################################################################################
# textkernel/__init__.py
##########################################################################################
"""PDS Ring-Moon Systems Node, SETI Institute
This is a set of routines for parsing SPICE text kernels. This module implements the
complete syntax specification as discussed in the SPICE Kernel Required Reading document,
"kernel.req": https://naif.jpl.nasa.gov/pub/naif/toolkit_docs/C/req/kernel.html
The `textkernel` module provides two functions for reading text kernels:
- `from_text`: Given a string representing the contents of a text kernel, return a
dictionary of the values found.
- `from_file`: Given the path to a text kernel, read the contents and return a dictionary
of the values found.
and two functions for manipulating text kernels:
- `continued_value`: Interpret a list of strings as one or more continued strings.
- `update_dict`: Merge the contents of two text kernel dictionaries, preserving nested
values.
"""
__all__ = ['from_text', 'from_file', 'continued_value', 'update_dict']
import re
import filecache
from textkernel._DATA_GRAMMAR import _DATA_GRAMMAR
from textkernel._NAME_GRAMMAR import _NAME_GRAMMAR
from textkernel._PREDEFINED_BODY_INFO import _PREDEFINED_BODY_INFO
from textkernel._PREDEFINED_FRAME_INFO import _PREDEFINED_FRAME_INFO
try:
from ._version import __version__
except ImportError: # pragma: nocover
__version__ = 'Version unspecified'
# Regular expressions to match \\begindata and \\begintext sections. These must be alone
# on a line. NOTE: There's really only one backslash in front of the "b", but two are
# needed in the Python source code because a single backslash indicates an escape.
_BEGINDATA = re.compile(r'\n[ \t]*\\begindata[ \t]*\r?\n', re.S)
_BEGINTEXT = re.compile(r'\n[ \t]*\\begintext[ \t]*\r?\n', re.S)
[docs]
def from_text(text, tkdict=None, *, commented=True, contin=''):
"""
Parse a string as the contents of a text kernel and return a dict of values found.
Args:
text (str): The contents as a SPICE text kernel. It can be represented as a single
string with embedded newlines or as a list of strings.
tkdict (dict, optional): An optional starting dictionary. If provided, the new
content is merged into the one provided; otherwise, a new dictionary is
returned.
commented (bool, optional): True if the kernel text contains comments delimited
by `\\\\begintext` and `\\\\begindata`.
contin (str, optional): Optional sequence of characters indicating that a string
is "continued", meaning that its value should be concatenated with the
next string in the list. See the rules for continued strings here:
https://naif.jpl.nasa.gov/pub/naif/toolkit_docs/C/req/kernel.html#Additional%20Text%20Kernel%20Syntax%20Rules
If a text kernel uses multiple different continuation sequences (which
is exceedingly unlikely), you can only specify one sequence here; use
continued_value() to interpret the values of other continued strings.
The default value is "+" for all metakernels.
Returns:
dict: A dictionary containing all the parameters in the given string.
The returned dictionary is keyed by all the parameter names (on the left
side of an equal sign) in the text kernel, and each associated
dictionary value is that found on the right side. Values are Python
ints, floats, strings, datetime objects, or lists of one or more of
these.
For convenience, the returned dictionary adds additional, "hierarchical"
keys that provide alternative access to the same values. Hierarchical
keys are substrings from the original parameter name, which return a
sub-dictionary keyed by part or all of the remainder of that parameter
name.
- Parameter names with a slash are split apart as if they represented
components of a file directory tree, so these are equivalent:
- tkdict["DELTET/EB"] == tkdict["DELTET"]["EB"]
- When a body or frame ID is embedded inside a parameter name, it is extracted,
converted to integer, and used as a piece of the hierarchy, making these
equivalent:
- tkdict["BODY399_POLE_RA"] == tkdict["BODY"][399]["POLE_RA"]
- tkdict["SCLK01_MODULI_32"] == tkdict["SCLK"][-32]["01_MODULI"]
Leading and trailing underscores before and after the embedded numeric ID are
stripped from the hierarchical keys, as you can see in the examples above.
- When the name associated with a body or frame ID is known, that name can be
used in the place of the integer ID:
- tkdict["BODY"][399] == tkdict["BODY"]["EARTH"]
- tkdict["FRAME"][10013] == tkdict["FRAME"]["IAU_EARTH"]
- tkdict["SCLK"][-32] == tkdict["SCLK"]["VOYAGER 2"]
- If a frame is associated with a particular central body, the body's ID can also
be used in place of the frame's ID:
- tkdict["FRAME"][399] == tkdict["FRAME"]["IAU_EARTH"]
- Note that the "BODY" and "FRAME" dictionaries also have an additional entry
keyed by "ID", which returns the associated integer ID:
- tkdict["FRAME"][623]["ID"] = 623
- tkdict["FRAME"]["IAU_SUTTUNGR"]["ID"] = 623
This ensures that you can look up a body or frame by name and readily obtain its
ID.
"""
tkdict_is_new = (tkdict is None)
if tkdict_is_new:
tkdict = {}
if not isinstance(text, str):
text = '\n'.join(text)
# Fill in the default continuation character if this is a metakernel
if not contin and 'KERNELS_TO_LOAD' in text:
contin = '+'
# Pre-process commented text
if commented:
parts = _BEGINDATA.split(text)[1:] + [''] # blank at end restores final newline
parts = [_BEGINTEXT.split(p)[0] for p in parts]
text = '\n'.join(parts)
# Parse
parsed = _DATA_GRAMMAR.parse_string(text).as_list()
# Track new sub-dictionaries and new name/ID pairs
indices = [] # a list of tuples (before-text, idcode or name)
new_body_names = [] # a list of new tuples NAIF_BODY_NAME values
new_body_codes = [] # a list of new tuples NAIF_BODY_CODE values
# Insert each value into the dictionary
for (name, op, value) in parsed:
# Catch new name/idcode pairs (before merging lists)
if name == 'NAIF_BODY_NAME':
new_body_names += value if isinstance(value, list) else [value]
if name == 'NAIF_BODY_CODE':
new_body_codes += value if isinstance(value, list) else [value]
# Merge continued strings if necessary; any other value is returned as is
value = continued_value(value, contin)
# Merge list with previous value if operator is "+="
if op == '+=':
if not isinstance(value, list):
value = [value]
if name in tkdict:
old_value = tkdict[name]
if isinstance(old_value, list):
value = old_value + value
else:
value = [old_value] + value
# Insert into the dictionary under the full name
tkdict[name] = value
# Identify the alternative names
parsed_name = _NAME_GRAMMAR.parse_string(name).as_list()[0]
# Unless it's a nested or indexed name, we're done
if isinstance(parsed_name, str):
continue
# Put the nested or indexed value into a sub-dictionary
subdict = tkdict
for subname in parsed_name[:-1]:
subdict = subdict.setdefault(subname, {})
# Keep track of indexed sub-dictionaries
if isinstance(parsed_name, tuple):
indices.append(parsed_name[:2])
subdict[parsed_name[-1]] = value
# Key any pre-existing sub-dictionaries by a new name
if new_body_codes and not tkdict_is_new:
# For each sub-dictionary...
for (key, subdict) in tkdict.items():
if not isinstance(subdict, dict):
continue
# ... if the idcode is a key, use the name as a key as well
for k, idcode in enumerate(new_body_codes):
if idcode in subdict:
subdict[new_body_names[k]] = subdict[idcode]
# Key any new indexed sub-dictionaries by name(s) as well as ID
for (prefix, key) in indices:
prefix_dict = tkdict[prefix]
prefix_subdict = prefix_dict[key]
allkeys = [key]
# Determine whether this is a body or a frame; get predefined values if any
if prefix in ('BODY', 'OBJECT', 'SCLK'):
bf_key = 'BODY'
(name, idcode) = _PREDEFINED_BODY_INFO.get(key, ('', 0))
else:
bf_key = 'FRAME'
(name, idcode, center) = _PREDEFINED_FRAME_INFO.get(key, ('', 0, 0))
# See if this already has an associated ID and name
bf_dict = tkdict.setdefault(bf_key, {}) # tkdict['BODY'] or tkdict['FRAME']
bf_subdict = bf_dict.setdefault(key, {})
allkeys.append(bf_subdict.get('NAME', ''))
allkeys.append(bf_subdict.get('ID', 0))
# Include the pre-defined values if any
allkeys += [name, idcode]
# This gets the body name in some "rocks" files. Example:
# OBJECT_65040_FRAME = 'IAU_S12_2004'
# implies name='S12_2004'
if bf_key == 'BODY':
frame_name = prefix_subdict.get('FRAME', '')
if frame_name.startswith('IAU_'):
allkeys.append(frame_name[4:])
# This is how the name is embedded in some instrument kernels
if bf_key == 'FRAME' and 'INS' in tkdict:
try:
allkeys.append(tkdict['INS'][key]['FOV_FRAME'])
except KeyError: # pragma: no cover
pass
# Remove duplicate, blank, and zero keys
newkeys = []
for k in allkeys:
if k and k not in newkeys:
newkeys.append(k)
allkeys = newkeys
# Identify the first ID and the first name
idcodes = [k for k in allkeys if isinstance(k, int)]
first_id = idcodes[0] if idcodes else 0
names = [k for k in allkeys if isinstance(k, str)]
first_name = names[0] if names else ''
# Make sure each "BODY"/"FRAME" dictionary has an entry for the ID and NAME
if first_id and 'ID' not in bf_subdict:
bf_subdict['ID'] = first_id
if first_name and 'NAME' not in bf_subdict:
bf_subdict['NAME'] = first_name
# Insert additional dictionary keys
for k in allkeys[1:]: # first item in thelist is the current key
bf_dict[k] = bf_subdict
prefix_dict[k] = prefix_subdict
# We're done with body IDs
if bf_key == 'BODY':
continue
# Identify the frame center
if not center:
center = bf_subdict.get('CENTER', 0)
# We can derive the center ID from the frame ID for instruments
if not center and isinstance(key, int):
center = -((-key) // 1000)
if not (0 > center > -1000):
center = 0 # pragma: no cover
if center and 'CENTER' not in bf_subdict:
bf_subdict['CENTER'] = center
# Insert a "FRAME" dictionary key for each unique body center ID
frame_dict = tkdict.get('FRAME', {})
frame_ids = {} # center ID -> list of frame IDs
for frame_subdict in frame_dict.values():
if 'CENTER' in frame_subdict: # pragma: no branch
center_id = frame_subdict['CENTER']
frame_ids.setdefault(center_id, []).append(frame_subdict['ID'])
for center_id, frame_id_list in frame_ids.items():
if len(frame_id_list) == 1 and center_id not in frame_dict:
frame_dict[center_id] = frame_dict[frame_id_list[0]] # pragma: no cover
return tkdict
[docs]
def from_file(path, tkdict=None, *, contin=''):
"""
Parse the contents of a text kernel, returning a dict of the values found.
Args:
path (str or Path or FCPath): The path to a kernel file as a string,
`pathlib.Path`, or `filecache.FCPath`.
tkdict (dict, optional): An optional starting dictionary. If provided, the new
content is merged into the one provided; otherwise, a new dictionary is
returned.
contin (str, optional): Optional sequence of characters indicating that a string
is "continued", meaning that its value should be concatenated with the
next string in the list. See the rules for continued strings here:
https://naif.jpl.nasa.gov/pub/naif/toolkit_docs/C/req/kernel.html#Additional%20Text%20Kernel%20Syntax%20Rules
If a text kernel uses multiple different continuation sequences (which
is exceedingly unlikely), you can only specify one sequence here; use
continued_value() to interpret the values of other continued strings.
The default value is "+" for all metakernels.
Returns:
dict: A dictionary containing all the parameters in the given string.
The returned dictionary is keyed by all the parameter names (on the left
side of an equal sign) in the text kernel, and each associated
dictionary value is that found on the right side. Values are Python
ints, floats, strings, datetime objects, or lists of one or more of
these.
For convenience, the returned dictionary adds additional, "hierarchical"
keys that provide alternative access to the same values. Hierarchical
keys are substrings from the original parameter name, which return a
sub-dictionary keyed by part or all of the remainder of that parameter
name.
- Parameter names with a slash are split apart as if they represented
components of a file directory tree, so these are equivalent:
- tkdict["DELTET/EB"] == tkdict["DELTET"]["EB"]
- When a body or frame ID is embedded inside a parameter name, it is extracted,
converted to integer, and used as a piece of the hierarchy, making these
equivalent:
- tkdict["BODY399_POLE_RA"] == tkdict["BODY"][399]["POLE_RA"]
- tkdict["SCLK01_MODULI_32"] == tkdict["SCLK"][-32]["01_MODULI"]
Leading and trailing underscores before and after the embedded numeric ID are
stripped from the hierarchical keys, as you can see in the examples above.
- When the name associated with a body or frame ID is known, that name can be
used in the place of the integer ID:
- tkdict["BODY"][399] == tkdict["BODY"]["EARTH"]
- tkdict["FRAME"][10013] == tkdict["FRAME"]["IAU_EARTH"]
- tkdict["SCLK"][-32] == tkdict["SCLK"]["VOYAGER 2"]
- If a frame is associated with a particular central body, the body's ID can also
be used in place of the frame's ID:
- tkdict["FRAME"][399] == tkdict["FRAME"]["IAU_EARTH"]
- Note that the "BODY" and "FRAME" dictionaries also have an additional entry
keyed by "ID", which returns the associated integer ID:
- tkdict["FRAME"][623]["ID"] = 623
- tkdict["FRAME"]["IAU_SUTTUNGR"]["ID"] = 623
This ensures that you can look up a body or frame by name and readily obtain its
ID.
"""
text = filecache.FCPath(path).read_text(encoding='latin1')
return from_text(text, tkdict=tkdict, commented=True, contin=contin)
##########################################################################################
# Kernel dictionary management
##########################################################################################
[docs]
def continued_value(value, contin='+'):
"""Interpret a list of strings as one or more continued strings.
Use this function if you did not specify the string's continuation sequence when you
created the dictionary.
Args:
value (Any): A value from a text kernel.
contin (str, optional): A sequence of characters indicating that a string is
"continued", meaning that its value should be concatenated with the next
string in the list. See the rules for continued strings here:
https://naif.jpl.nasa.gov/pub/naif/toolkit_docs/C/req/kernel.html#Additional%20Text%20Kernel%20Syntax%20Rules
Returns:
Any: The same value after the continuation sequence has been applied.
If the list now contains only a single value, that string is returned
instead of a list containing the string.
If any other type of value is given as input, that value is returned as is.
"""
if not contin:
return value
if not isinstance(value, list):
return value
newlist = [value[0]]
merged = False
for item in value[1:]:
if isinstance(item, str) and isinstance(newlist[-1], str):
stripped = newlist[-1].rstrip()
if stripped.endswith(contin):
newlist[-1] = stripped[:-len(contin)] + item
merged = True
continue
newlist.append(item)
# If a list was not modified, return the original in case this matters
if not merged:
return value
# If the new list contains a single merged string, just return the string
if len(newlist) == 1:
return newlist[0]
return newlist
[docs]
def update_dict(tkdict, newdict):
"""Merge the contents of two text kernel dictionaries, preserving nested values.
Values in the new dictionary take precedence.
The returned dictionary is the same as what one would get by reading the first text
kernel and then using its return value as the `tkdict` input when reading the second
text kernel.
Args:
tkdict (dict): A text kernel dictionary.
newdict (dict): A second text kernel dictionary.
Returns:
dict: The input `tkdict`, updated with the contents of `newdict`.
"""
def alt_dict_keys(d):
"""Create a dict that maps each key to its alt keys including itself."""
alt_keys = {}
keys_for_dict_id = {}
for key, value in d.items():
if isinstance(value, dict):
dict_id = id(value)
keys_for_dict_id.setdefault(dict_id, set()).add(key)
for alt_key_set in keys_for_dict_id.values():
for key in alt_key_set:
alt_keys[key] = alt_key_set
return alt_keys
# Use NAIF_BODY_CODE/NAME to define new keys
new_body_codes = newdict.get('NAIF_BODY_CODE', [])
if new_body_codes:
new_body_names = newdict.get('NAIF_BODY_NAME', [])
for key, subdict in tkdict.items():
if not isinstance(subdict, dict):
continue
for k, idcode in enumerate(new_body_codes):
if idcode in subdict:
subdict[new_body_names[k]] = subdict[idcode]
# Identify each dictionary's alternative keys
new_dict_keys = alt_dict_keys(newdict)
old_dict_keys = alt_dict_keys(tkdict)
# Copy/merge dictionary items
keys_handled = set()
for key, new_value in newdict.items():
# Merge dictionaries
if isinstance(new_value, dict):
if key in keys_handled:
continue
old_keys = old_dict_keys.get(key, set())
new_keys = new_dict_keys[key] - old_keys
if old_keys:
old_key = list(old_keys)[0]
updated = update_dict(tkdict[old_key], new_value)
else:
updated = new_value
for key in new_keys:
tkdict[key] = updated
keys_handled |= new_keys
continue
# Insert new values
if key not in tkdict:
tkdict[key] = new_value
continue
# Leave identical values alone
tk_value = tkdict[key]
if tk_value == new_value:
continue
# Otherwise, convert to list if necessary and concatenate
concat = tk_value if isinstance(tk_value, list) else [tk_value]
concat += new_value if isinstance(new_value, list) else [new_value]
tkdict[key] = concat
return tkdict
##########################################################################################