Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
4228cad
Add wrangler
AreWeDreaming Mar 17, 2026
7faa192
Fix errors in test data and handle 2d ragged data
AreWeDreaming Mar 18, 2026
17ad3b8
Extend tests to strings
AreWeDreaming Mar 18, 2026
066a1ae
Update imas/wrangler.py
AreWeDreaming Mar 18, 2026
098418c
Update imas/wrangler.py
AreWeDreaming Mar 18, 2026
19c0f77
Make awkward import more selective
AreWeDreaming Mar 18, 2026
2504a69
Merge branch 'awkward_array_support' of github.com:AreWeDreaming/IMAS…
AreWeDreaming Mar 18, 2026
e990c5f
Fix messed up indent
AreWeDreaming Mar 19, 2026
1314972
Handle missing fields
AreWeDreaming Mar 19, 2026
31bee40
Fix incorrect return type hint
AreWeDreaming Mar 19, 2026
b091f4f
Fix wrong check in test assertion
AreWeDreaming Mar 19, 2026
dda7533
Be verbose on what version means
AreWeDreaming Mar 19, 2026
bddaa9b
fixed issue with 2D data and blackify
prasad-sawantdesai Mar 19, 2026
5ec40eb
version is not optional and convert_ids function when target_version …
prasad-sawantdesai Mar 19, 2026
0b78428
Merge branch 'develop' into awkward_array_support
prasad-sawantdesai Apr 1, 2026
694f588
added awkward as optional dependency
prasad-sawantdesai Apr 1, 2026
88acc28
checked version before converting
prasad-sawantdesai Apr 1, 2026
4367c85
fixed github actions -> readthedocs, tests and optional dependency in…
prasad-sawantdesai Apr 3, 2026
82e1f03
Merge branch 'develop' into awkward_array_support
prasad-sawantdesai Apr 3, 2026
a442bf2
fixed naming of the methods
prasad-sawantdesai Apr 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 44 additions & 1 deletion imas/backends/netcdf/ids_tensorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
"""Tensorization logic to convert IDSs to netCDF files and/or xarray Datasets."""

from collections import deque
from typing import List
from typing import List, Tuple

import numpy
import awkward as ak
Comment thread
prasad-sawantdesai marked this conversation as resolved.
Outdated

from imas.backends.netcdf.iterators import indexed_tree_iter
from imas.backends.netcdf.nc_metadata import NCMetadata
Expand Down Expand Up @@ -203,3 +204,45 @@ def tensorize(self, path, fillvalue):
tmp_var[aos_coords + tuple(map(slice, node.shape))] = node.value

return tmp_var

def recursively_convert_to_list(self, path: str, inactive_index:Tuple,
shape:Tuple, i_dim: int):
entry = []
for index in range(shape[i_dim]):
new_index = inactive_index + (index,)
if i_dim == len(shape) - 1:
entry.append(self.filled_data[path][new_index].value)
else:
entry.append(self.recursively_convert_to_list(path, new_index,
shape, i_dim + 1))
return entry

def awkward_tensorize(self, path:str):
"""
Tensorizes the data at the given path with the specified fill value.

Args:
path: The path to the data in the IDS.
fillvalue: The value to fill the tensor with. Can be of any type,
including strings.

Returns:
A tensor filled with the data from the specified path.
"""
if path in self.shapes:
shape = self.shapes[path]
if shape.ndim > 2:
raise NotImplementedError("Dimensions higher than 2 are not yet implemented.")
shape = shape.shape
hdf5_dim = 1
else:
dimensions = self.ncmeta.get_dimensions(path, self.homogeneous_time)
shape = tuple(self.dimension_size[dim] for dim in dimensions)
# Get the split between HDF5 indices and stored matrices
# i.e. equilibrium.time_slice.profiles_2d <-> psi
hdf5_dim = len(list(self.filled_data[path].keys())[0])
if hdf5_dim == 0:
return self.filled_data[path][()].value
else:
return ak.Array(self.recursively_convert_to_list(path, tuple(), shape[:hdf5_dim], 0))
Comment thread
prasad-sawantdesai marked this conversation as resolved.
Outdated

155 changes: 155 additions & 0 deletions imas/test/test_wrangle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import pytest
import numpy as np
import awkward as ak

from imas.wrangler import wrangle, unwrangle
from imas.ids_factory import IDSFactory
from imas.util import idsdiffgen

@pytest.fixture
def test_data():
data = {"equilibrium": {}}
data["equilibrium"]["N_time"] = 100
data["equilibrium"]["N_radial"] = 100
data["equilibrium"]["N_grid"] = 1
data["equilibrium"]["time"] = np.linspace(0.0, 5.0, data["equilibrium"]["N_time"])
data["equilibrium"]["psi_1d"] = np.linspace(0.0, 1.0, data["equilibrium"]["N_radial"])
data["equilibrium"]["r"] = np.linspace(1.0, 2.0, data["equilibrium"]["N_radial"])
data["equilibrium"]["z"] = np.linspace(-1.0, 1.0, data["equilibrium"]["N_radial"])
r_grid, z_grid = np.meshgrid(data["equilibrium"]["r"],
data["equilibrium"]["z"], indexing="ij")
data["equilibrium"]["psi_2d"] = (r_grid - 1.5) ** 2 + z_grid**2

data["thomson_scattering"] = {}
data["thomson_scattering"]["N_ch"] = (20,10)
N = data["thomson_scattering"]["N_ch"][0] + data["thomson_scattering"]["N_ch"][1]
data["thomson_scattering"]["identifier"] = np.asarray("channel_" + np.asarray(np.linspace(1,N+1,N, dtype=int),dtype="|U2"),dtype="|U10")
data["thomson_scattering"]["N_time"] = (100, 300)
data["thomson_scattering"]["r"] = np.concatenate([np.ones(data["thomson_scattering"]["N_ch"][0])*1.6,
np.ones(data["thomson_scattering"]["N_ch"][1])*1.7])
data["thomson_scattering"]["z"] = np.concatenate([np.linspace(-1.0, 1.0, data["thomson_scattering"]["N_ch"][0]),
np.linspace(-1.0, 1.0, data["thomson_scattering"]["N_ch"][1])])
data["thomson_scattering"]["t_e"] = data["thomson_scattering"]["z"]**2 * 5.e3
data["thomson_scattering"]["n_e"] = data["thomson_scattering"]["z"]**2 * 5.e19
data["thomson_scattering"]["time"] = (np.linspace(0,5.0, data["thomson_scattering"]["N_time"][0]),
np.linspace(0,5.0, data["thomson_scattering"]["N_time"][1]))
return data

@pytest.fixture
def flat(test_data):
flat = {}
# Equilibrium test data
flat["equilibrium.time"] = test_data["equilibrium"]["time"]
flat["equilibrium.time_slice.time"] = test_data["equilibrium"]["time"]
flat["equilibrium.ids_properties.homogeneous_time"] = 1
flat["equilibrium.time_slice.profiles_1d.psi"] = np.zeros(
(test_data["equilibrium"]["N_time"], test_data["equilibrium"]["N_radial"])
)
flat["equilibrium.time_slice.profiles_1d.psi"][:] = test_data["equilibrium"]["psi_1d"]
flat["equilibrium.time_slice.profiles_2d.grid.dim1"] = np.zeros(
(test_data["equilibrium"]["N_time"],
test_data["equilibrium"]["N_grid"],
test_data["equilibrium"]["N_radial"])
)
flat["equilibrium.time_slice.profiles_2d.grid.dim1"][:] = test_data["equilibrium"]["r"][None, :]
flat["equilibrium.time_slice.profiles_2d.grid.dim2"] = np.zeros(
(test_data["equilibrium"]["N_time"],
test_data["equilibrium"]["N_grid"],
test_data["equilibrium"]["N_radial"])
)
flat["equilibrium.time_slice.profiles_2d.grid.dim2"][:] = test_data["equilibrium"]["z"][None, :]
flat["equilibrium.time_slice.profiles_2d.psi"] = np.zeros(
(
test_data["equilibrium"]["N_time"],
test_data["equilibrium"]["N_grid"],
test_data["equilibrium"]["N_radial"],
test_data["equilibrium"]["N_radial"],
)
)
flat["equilibrium.time_slice.profiles_2d.psi"][:] = test_data["equilibrium"]["psi_2d"][None, ...]
# Thomson scattering test data (ragged)
flat["thomson_scattering.channel.identifier"] = test_data["thomson_scattering"]["identifier"]
flat["thomson_scattering.ids_properties.homogeneous_time"] = 0
flat["thomson_scattering.channel.t_e.time"] = ak.concatenate([np.tile(test_data["thomson_scattering"]["time"][0],
(test_data["thomson_scattering"]["N_ch"][0],
1)),
np.tile(test_data["thomson_scattering"]["time"][1],
(test_data["thomson_scattering"]["N_ch"][1],
1))])
flat["thomson_scattering.channel.t_e.data"] = ak.concatenate([np.repeat(test_data["thomson_scattering"]["t_e"][:test_data["thomson_scattering"]["N_ch"][0],None],
test_data["thomson_scattering"]["N_time"][0], axis=1),
np.repeat(test_data["thomson_scattering"]["t_e"][test_data["thomson_scattering"]["N_ch"][0]:,None],
test_data["thomson_scattering"]["N_time"][1], axis=1)])
flat["thomson_scattering.channel.n_e.time"] = ak.concatenate([np.tile(test_data["thomson_scattering"]["time"][0],
(test_data["thomson_scattering"]["N_ch"][0],
1)),
np.tile(test_data["thomson_scattering"]["time"][1],
(test_data["thomson_scattering"]["N_ch"][1],
1))])
flat["thomson_scattering.channel.n_e.data"] = ak.concatenate([np.repeat(test_data["thomson_scattering"]["n_e"][:test_data["thomson_scattering"]["N_ch"][0],None],
test_data["thomson_scattering"]["N_time"][0], axis=1),
np.repeat(test_data["thomson_scattering"]["n_e"][test_data["thomson_scattering"]["N_ch"][0]:,None],
test_data["thomson_scattering"]["N_time"][1], axis=1)])
flat["thomson_scattering.channel.position.r"] = test_data["thomson_scattering"]["r"]
flat["thomson_scattering.channel.position.z"] = test_data["thomson_scattering"]["z"]
return flat

@pytest.fixture
def test_ids_dict(test_data):
factory = IDSFactory("3.41.0")
equilibrium = factory.equilibrium()
equilibrium.time = test_data["equilibrium"]["time"]
equilibrium.time_slice.resize(test_data["equilibrium"]["N_time"])
equilibrium.ids_properties.homogeneous_time = 1
for i in range(test_data["equilibrium"]["N_time"]):
equilibrium.time_slice[i].time = test_data["equilibrium"]["time"][i]
equilibrium.time_slice[i].profiles_1d.psi = test_data["equilibrium"]["psi_1d"]
equilibrium.time_slice[i].profiles_2d.resize(1)
equilibrium.time_slice[i].profiles_2d[0].grid.dim1 = test_data["equilibrium"]["r"]
equilibrium.time_slice[i].profiles_2d[0].grid.dim2 = test_data["equilibrium"]["z"]
equilibrium.time_slice[i].profiles_2d[0].psi = test_data["equilibrium"]["psi_2d"]

thomson_scattering = factory.thomson_scattering()
thomson_scattering.ids_properties.homogeneous_time = 0
N = test_data["thomson_scattering"]["N_ch"][0] + test_data["thomson_scattering"]["N_ch"][1]
thomson_scattering.channel.resize(N)
index = 0
for i in range(N):
if i == test_data["thomson_scattering"]["N_ch"][0]:
index = 1
thomson_scattering.channel[i].identifier = test_data["thomson_scattering"]["identifier"][i]
thomson_scattering.channel[i].t_e.time = test_data["thomson_scattering"]["time"][index]
thomson_scattering.channel[i].t_e.data = np.tile(test_data["thomson_scattering"]["t_e"][i],
test_data["thomson_scattering"]["N_time"][index])
thomson_scattering.channel[i].n_e.time = test_data["thomson_scattering"]["time"][index]
thomson_scattering.channel[i].n_e.data = np.tile(test_data["thomson_scattering"]["n_e"][i],
test_data["thomson_scattering"]["N_time"][index])
thomson_scattering.channel[i].position.r = test_data["thomson_scattering"]["r"][i]
thomson_scattering.channel[i].position.z = test_data["thomson_scattering"]["z"][i]

return {"equilibrium":equilibrium, "thomson_scattering": thomson_scattering}


def test_wrangle(test_ids_dict, flat):
wrangled = wrangle(flat)
for key in test_ids_dict:
diff = idsdiffgen(wrangled[key],test_ids_dict[key])
assert len(list(diff)) == 0, diff

def get_dtype(arr):
"""Get dtype from either numpy or awkward array."""
if isinstance(arr, ak.Array):
# This is the easiest way I found to extract the numpy dtype from an awkward array
return eval("np." + arr.typestr.split("*")[-1])
if hasattr(arr, "dtype"):
return arr.dtype
else:
return type(arr)

def test_unwrangle(test_ids_dict, flat):
result = unwrangle(list(flat.keys()), test_ids_dict)
for key in flat.keys():
if np.issubdtype(get_dtype(result[key]), np.floating):
assert ak.almost_equal(result[key], flat[key])
else:
assert ak.array_equal(result[key], flat[key])
76 changes: 76 additions & 0 deletions imas/wrangler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from typing import Dict, List
import awkward as ak
import numpy as np
from . import IDSFactory
from .ids_toplevel import IDSToplevel
from .backends.netcdf.ids_tensorizer import IDSTensorizer

def recursively_put(location, value, ids):
# time_slice.profiles_1d.psi
if "." in location:
position, sub_location = location.split(".", 1)
sub_ids = getattr(ids, position)
if hasattr(sub_ids, "size"):
N = len(value)
if sub_ids.size == 0:
sub_ids.resize(N)
elif sub_ids.size != N:
raise ValueError(
f"""Inconsistent size across flat entries {location}, {N} (flat) vs. ids {sub_ids.size}!
"""
)
# Need to iterate over indices (e.g. equilibrium.time_slice[:].)
for index in range(N):
recursively_put(sub_location, value[index], sub_ids[index])
else:
# Need to set an attribute
# Now get the new substring, e.g. time_slice
position, sub_location = location.split(".", 1)
recursively_put(sub_location, value, sub_ids)
else:
setattr(ids, location, value)
return ids


def wrangle(flat: Dict, version="3.41.0") -> Dict[str, IDSToplevel]:
Comment thread
AreWeDreaming marked this conversation as resolved.
Outdated
wrangled = {}
factory = IDSFactory(version)
Comment thread
AreWeDreaming marked this conversation as resolved.
Outdated
for key in flat:
ids, location = key.split(".", 1)
if ids not in wrangled:
wrangled[ids] = getattr(factory, ids)()
wrangled[ids] = recursively_put(location, flat[key], wrangled[ids])
return wrangled

def split_location_across_ids(locations: List[str]) -> Dict[str, List[str]]:
ids_locations = {}
for location in locations:
ids, path = location.split(".",1)
if ids not in ids_locations:
ids_locations[ids] = []
ids_locations[ids].append(path.replace(".","/") )
return ids_locations

def unwrangle(
locations: List[str], ids_dict: Dict[str, IDSToplevel], version="3.41.0"
Comment thread
prasad-sawantdesai marked this conversation as resolved.
Outdated
) -> Dict[str, ak.Array | np.ndarray]:
flat = {}
ids_locations = split_location_across_ids(locations)
for key in ids_locations:
tensorizer = IDSTensorizer(ids_dict[key], ids_locations[key])
tensorizer.include_coordinate_paths()
tensorizer.collect_filled_data()
tensorizer.determine_data_shapes()
# Add IDS conversion
for ids_location in ids_locations[key]:
location = key + "." + ids_location.replace("/", ".")
values = tensorizer.awkward_tensorize(ids_location)
if hasattr(values, "__getattr__"):
# Not a scalar, e.g. homogenous_time
try:
flat[location] = np.asarray(values)
except ValueError as e:
flat[location] = ak.Array(values)
else:
flat[location] = values
return flat