@@ -192,25 +192,15 @@ def _discretize(a, bins, right=True):
192192 return np .digitize (a , bins , right = right )
193193
194194
195- def subspace ( T , m , subseq_idx , nn_idx , k , include = None , discords = False ):
195+ def _subspace ( D , k , include = None , discords = False ):
196196 """
197- Compute the k-dimensional matrix profile subspace for a given subsequence index and
197+ Compute the k-dimensional matrixrofile subspace for a given subsequence index and
198198 its nearest neighbor index
199199
200200 Parameters
201201 ----------
202- T : ndarray
203- The time series or sequence for which the multi-dimensional matrix profile,
204- multi-dimensional matrix profile indices were computed
205-
206- m : int
207- Window size
208-
209- subseq_idx : int
210- The subsequence index in T
211-
212- nn_idx : int
213- The nearest neighbor index in T
202+ D : ndarray
203+ The multi-dimensional distance profile
214204
215205 k : int
216206 The subset number of dimensions out of `D = T.shape[0]`-dimensions to return
@@ -234,12 +224,6 @@ def subspace(T, m, subseq_idx, nn_idx, k, include=None, discords=False):
234224 An array of that contains the `k`th-dimensional subspace for the subsequence
235225 with index equal to `motif_idx`
236226 """
237- T , _ , _ = core .preprocess (T , m )
238-
239- subseqs = core .z_norm (T [:, subseq_idx : subseq_idx + m ], axis = 1 )
240- neighbors = core .z_norm (T [:, nn_idx : nn_idx + m ], axis = 1 )
241- D = np .linalg .norm (subseqs - neighbors , axis = 1 )
242-
243227 if discords :
244228 sorted_idx = D [::- 1 ].argsort (axis = 0 , kind = "mergesort" )
245229 else :
@@ -258,6 +242,72 @@ def subspace(T, m, subseq_idx, nn_idx, k, include=None, discords=False):
258242
259243 S = sorted_idx [: k + 1 ]
260244
245+ return S
246+
247+
248+ def subspace (T , m , subseq_idx , nn_idx , k , include = None , discords = False , normalize = True ):
249+ """
250+ Compute the k-dimensional matrixrofile subspace for a given subsequence index and
251+ its nearest neighbor index
252+
253+ Parameters
254+ ----------
255+ T : ndarray
256+ The time series or sequence for which the multi-dimensional matrix profile,
257+ multi-dimensional matrix profile indices were computed
258+
259+ m : int
260+ Window size
261+
262+ subseq_idx : int
263+ The subsequence index in T
264+
265+ nn_idx : int
266+ The nearest neighbor index in T
267+
268+ k : int
269+ The subset number of dimensions out of `D = T.shape[0]`-dimensions to return
270+ the subspace for
271+
272+ include : ndarray, default None
273+ A list of (zero-based) indices corresponding to the dimensions in `T` that
274+ must be included in the constrained multidimensional motif search.
275+ For more information, see Section IV D in:
276+
277+ `DOI: 10.1109/ICDM.2017.66 \
278+ <https://www.cs.ucr.edu/~eamonn/Motif_Discovery_ICDM.pdf>`__
279+
280+ discords : bool, default False
281+ When set to `True`, this reverses the distance profile to favor discords rather
282+ than motifs. Note that indices in `include` are still maintained and respected.
283+
284+ normalize : bool, default True
285+ When set to `True`, this z-normalizes subsequences prior to computing nearest
286+ neighbor distances. Z-normalization must be used when the corresponding
287+ multi-dimensional matrix profile is computed by `mstump` or `mstumped`. This
288+ should be set to `False` when the corresponding multi-dimensional matrix profile
289+ is computed by `maamp` or `maamped`.
290+
291+ Returns
292+ -------
293+ S : ndarray
294+ An array of that contains the `k`th-dimensional subspace for the subsequence
295+ with index equal to `motif_idx`
296+ """
297+ if normalize :
298+ T , _ , _ = core .preprocess (T , m )
299+ subseqs = core .z_norm (T [:, subseq_idx : subseq_idx + m ], axis = 1 )
300+ neighbors = core .z_norm (T [:, nn_idx : nn_idx + m ], axis = 1 )
301+ else :
302+ T , _ = core .preprocess_non_normalized (T , m )
303+ subseqs = T [:, subseq_idx : subseq_idx + m ]
304+ neighbors = T [:, nn_idx : nn_idx + m ]
305+
306+ D = np .linalg .norm (subseqs - neighbors , axis = 1 )
307+
308+ S = _subspace (D , k , include = include , discords = discords )
309+
310+ # MDL
261311 n_bit = 8
262312 bins = _inverse_norm ()
263313 disc_subseqs = _discretize (subseqs [S ], bins )
0 commit comments