Skip to content

Commit 655eabd

Browse files
committed
Fixed #339 Refactored subspace
1 parent 2a7de8e commit 655eabd

3 files changed

Lines changed: 78 additions & 52 deletions

File tree

docs/api.rst

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ STUMPY API
2121
stumpy.aampi
2222
stumpy.maamp
2323
stumpy.maamped
24-
stumpy.maamp_subspace
2524
stumpy.atsc
2625
stumpy.allc
2726
stumpy.fluss
@@ -108,11 +107,6 @@ maamped
108107

109108
.. autofunction:: stumpy.maamped
110109

111-
maamp_subspace
112-
==============
113-
114-
.. autofunction:: stumpy.maamp_subspace
115-
116110
atsc
117111
====
118112

stumpy/maamp.py

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,13 @@
88
from numba import njit, prange
99

1010
from . import core, config
11-
from .mstump import _apply_include, _preprocess_include, _get_multi_QT, _compute_PI
11+
from .mstump import (
12+
_apply_include,
13+
_preprocess_include,
14+
_get_multi_QT,
15+
_compute_PI,
16+
subspace,
17+
)
1218

1319
logger = logging.getLogger(__name__)
1420

@@ -102,31 +108,7 @@ def maamp_subspace(T, m, subseq_idx, nn_idx, k, include=None, discords=False):
102108
An array of that contains the `k`th-dimensional subspace for the subsequence
103109
with index equal to `motif_idx`
104110
"""
105-
T, _ = core.preprocess_non_normalized(T, m)
106-
107-
subseqs = T[:, subseq_idx : subseq_idx + m]
108-
neighbors = T[:, nn_idx : nn_idx + m]
109-
D = np.linalg.norm(subseqs - neighbors, axis=1)
110-
111-
if discords:
112-
sorted_idx = D[::-1].argsort(axis=0, kind="mergesort")
113-
else:
114-
sorted_idx = D.argsort(axis=0, kind="mergesort")
115-
116-
# `include` processing occur here since we are dealing with indices, not distances
117-
if include is not None:
118-
include = _preprocess_include(include)
119-
mask = np.in1d(sorted_idx, include)
120-
include_idx = mask.nonzero()[0]
121-
exclude_idx = (~mask).nonzero()[0]
122-
sorted_idx[: include_idx.shape[0]], sorted_idx[include_idx.shape[0] :] = (
123-
sorted_idx[include_idx],
124-
sorted_idx[exclude_idx],
125-
)
126-
127-
S = sorted_idx[: k + 1]
128-
129-
return S
111+
return subspace(T, m, subseq_idx, nn_idx, k, include, discords, normalize=False)
130112

131113

132114
def _query_maamp_profile(

stumpy/mstump.py

Lines changed: 70 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -192,25 +192,15 @@ def _discretize(a, bins, right=True):
192192
return np.digitize(a, bins, right=right)
193193

194194

195-
def subspace(T, m, subseq_idx, nn_idx, k, include=None, discords=False):
195+
def _subspace(D, k, include=None, discords=False):
196196
"""
197-
Compute the k-dimensional matrix profile subspace for a given subsequence index and
197+
Compute the k-dimensional matrixrofile subspace for a given subsequence index and
198198
its nearest neighbor index
199199
200200
Parameters
201201
----------
202-
T : ndarray
203-
The time series or sequence for which the multi-dimensional matrix profile,
204-
multi-dimensional matrix profile indices were computed
205-
206-
m : int
207-
Window size
208-
209-
subseq_idx : int
210-
The subsequence index in T
211-
212-
nn_idx : int
213-
The nearest neighbor index in T
202+
D : ndarray
203+
The multi-dimensional distance profile
214204
215205
k : int
216206
The subset number of dimensions out of `D = T.shape[0]`-dimensions to return
@@ -234,12 +224,6 @@ def subspace(T, m, subseq_idx, nn_idx, k, include=None, discords=False):
234224
An array of that contains the `k`th-dimensional subspace for the subsequence
235225
with index equal to `motif_idx`
236226
"""
237-
T, _, _ = core.preprocess(T, m)
238-
239-
subseqs = core.z_norm(T[:, subseq_idx : subseq_idx + m], axis=1)
240-
neighbors = core.z_norm(T[:, nn_idx : nn_idx + m], axis=1)
241-
D = np.linalg.norm(subseqs - neighbors, axis=1)
242-
243227
if discords:
244228
sorted_idx = D[::-1].argsort(axis=0, kind="mergesort")
245229
else:
@@ -258,6 +242,72 @@ def subspace(T, m, subseq_idx, nn_idx, k, include=None, discords=False):
258242

259243
S = sorted_idx[: k + 1]
260244

245+
return S
246+
247+
248+
def subspace(T, m, subseq_idx, nn_idx, k, include=None, discords=False, normalize=True):
249+
"""
250+
Compute the k-dimensional matrixrofile subspace for a given subsequence index and
251+
its nearest neighbor index
252+
253+
Parameters
254+
----------
255+
T : ndarray
256+
The time series or sequence for which the multi-dimensional matrix profile,
257+
multi-dimensional matrix profile indices were computed
258+
259+
m : int
260+
Window size
261+
262+
subseq_idx : int
263+
The subsequence index in T
264+
265+
nn_idx : int
266+
The nearest neighbor index in T
267+
268+
k : int
269+
The subset number of dimensions out of `D = T.shape[0]`-dimensions to return
270+
the subspace for
271+
272+
include : ndarray, default None
273+
A list of (zero-based) indices corresponding to the dimensions in `T` that
274+
must be included in the constrained multidimensional motif search.
275+
For more information, see Section IV D in:
276+
277+
`DOI: 10.1109/ICDM.2017.66 \
278+
<https://www.cs.ucr.edu/~eamonn/Motif_Discovery_ICDM.pdf>`__
279+
280+
discords : bool, default False
281+
When set to `True`, this reverses the distance profile to favor discords rather
282+
than motifs. Note that indices in `include` are still maintained and respected.
283+
284+
normalize : bool, default True
285+
When set to `True`, this z-normalizes subsequences prior to computing nearest
286+
neighbor distances. Z-normalization must be used when the corresponding
287+
multi-dimensional matrix profile is computed by `mstump` or `mstumped`. This
288+
should be set to `False` when the corresponding multi-dimensional matrix profile
289+
is computed by `maamp` or `maamped`.
290+
291+
Returns
292+
-------
293+
S : ndarray
294+
An array of that contains the `k`th-dimensional subspace for the subsequence
295+
with index equal to `motif_idx`
296+
"""
297+
if normalize:
298+
T, _, _ = core.preprocess(T, m)
299+
subseqs = core.z_norm(T[:, subseq_idx : subseq_idx + m], axis=1)
300+
neighbors = core.z_norm(T[:, nn_idx : nn_idx + m], axis=1)
301+
else:
302+
T, _ = core.preprocess_non_normalized(T, m)
303+
subseqs = T[:, subseq_idx : subseq_idx + m]
304+
neighbors = T[:, nn_idx : nn_idx + m]
305+
306+
D = np.linalg.norm(subseqs - neighbors, axis=1)
307+
308+
S = _subspace(D, k, include=include, discords=discords)
309+
310+
# MDL
261311
n_bit = 8
262312
bins = _inverse_norm()
263313
disc_subseqs = _discretize(subseqs[S], bins)

0 commit comments

Comments
 (0)