1212logger = logging .getLogger (__name__ )
1313
1414
15+ def _preprocess_include (include ):
16+ """
17+ A utility function for processing the `include` input
18+
19+ Parameters
20+ ----------
21+ include : ndarray
22+ A list of (zero-based) indices corresponding to the dimensions in `T` that
23+ must be included in the constrained multidimensional motif search.
24+ For more information, see Section IV D in:
25+
26+ `DOI: 10.1109/ICDM.2017.66 \
27+ <https://www.cs.ucr.edu/~eamonn/Motif_Discovery_ICDM.pdf>`__
28+
29+ Returns
30+ -------
31+ include : ndarray
32+ Process `include` and remove any redundant index values
33+ """
34+ include = np .asarray (include )
35+ _ , idx = np .unique (include , return_index = True )
36+ if include .shape [0 ] != idx .shape [0 ]: # pragma: no cover
37+ logger .warning ("Removed repeating indices in `include`" )
38+ include = include [np .sort (idx )]
39+
40+ return include
41+
42+
1543def _multi_mass (Q , T , m , M_T , Σ_T , μ_Q , σ_Q ):
1644 """
1745 A multi-dimensional wrapper around "Mueen's Algorithm for Similarity Search"
@@ -99,6 +127,8 @@ def _apply_include(
99127 tmp_swap : ndarray, default None
100128 A reusable array to aid in array element swapping
101129 """
130+ include = _preprocess_include (include )
131+
102132 if restricted_indices is None :
103133 restricted_indices = include [include < include .shape [0 ]]
104134
@@ -118,27 +148,92 @@ def _apply_include(
118148 D [unrestricted_indices ] = tmp_swap [mask ]
119149
120150
151+ def _get_subspace (T , m , motif_idx , nn_idx , include = None , discords = False ):
152+ """
153+ Compute the multi-dimensional matrix profile subspace for a given motif index and
154+ its nearest neighbor index
155+
156+ Parameters
157+ ----------
158+ T : ndarray
159+ The time series or sequence for which the multi-dimensional matrix profile,
160+ multi-dimensional matrix profile indices were computed
161+
162+ m : int
163+ Window size
164+
165+ motif_idx : int
166+ The motif index in T
167+
168+ nn_idx : int
169+ The nearest neighbor index in T
170+
171+ include : ndarray, default None
172+ A list of (zero-based) indices corresponding to the dimensions in `T` that
173+ must be included in the constrained multidimensional motif search.
174+ For more information, see Section IV D in:
175+
176+ `DOI: 10.1109/ICDM.2017.66 \
177+ <https://www.cs.ucr.edu/~eamonn/Motif_Discovery_ICDM.pdf>`__
178+
179+ discords : bool, default False
180+ When set to `True`, this reverses the distance profile to favor discords rather
181+ than motifs. Note that indices in `include` are still maintained and respected.
182+
183+ Returns
184+ -------
185+ S : ndarray
186+ A ragged array of ndarrays that contain the multi-dimensional subspace for the
187+ window with index equal to `query_idx`. The `len(S)` will be equal to the total
188+ number of dimensions, `d`, and where `S[i]` corresponds to the list of subspace
189+ indices for the `i+1`th subspace dimension (i.e., `S[1]` corresponds to the
190+ subspace with dimension `2` and with `len(S[1]) == 2`).
191+ """
192+ T , _ , _ = core .preprocess (T , m )
193+
194+ S = np .empty (T .shape [0 ], dtype = object )
195+ D = np .linalg .norm (
196+ core .z_norm (T [:, motif_idx : motif_idx + m ], axis = 1 )
197+ - core .z_norm (T [:, nn_idx : nn_idx + m ], axis = 1 ),
198+ axis = 1 ,
199+ )
200+
201+ if include is not None :
202+ include = _preprocess_include (include )
203+ else :
204+ include = []
205+
206+ if discords :
207+ D [include ] = np .inf
208+ sorted_idx = D [::- 1 ].argsort (axis = 0 , kind = "mergesort" )
209+ else :
210+ D [include ] = 0.0
211+ sorted_idx = D .argsort (axis = 0 , kind = "mergesort" )
212+
213+ for k in range (T .shape [0 ]):
214+ S [k ] = sorted_idx [: k + 1 ]
215+
216+ return S
217+
218+
121219def _query_mstump_profile (
122220 query_idx , T_A , T_B , m , excl_zone , M_T , Σ_T , μ_Q , σ_Q , include = None , discords = False
123221):
124222 """
125- Multi-dimensional wrapper to compute the multi-dimensional matrix profile,
126- the multi-dimensional matrix profile index, the multi-dimensional matrix profile
127- subspace for a given query window within the times series or sequence that is
128- denoted by the `query_idx` index. Essentially, this is a convenience wrapper around
129- `_multi_mass`.
223+ Multi-dimensional wrapper to compute the multi-dimensional matrix profile and
224+ the multi-dimensional matrix profile index for a given query window within the times
225+ series or sequence that is denoted by the `query_idx` index. Essentially, this is a
226+ convenience wrapper around `_multi_mass`.
130227
131228 Parameters
132229 ----------
133230 query_idx : int
134- The window index to calculate the first multi-dimensional matrix profile,
135- multi-dimensional matrix profile indices, and multi-dimensional matrix profile
136- subspace.
231+ The window index to calculate the first multi-dimensional matrix profile and
232+ multi-dimensional matrix profile indices
137233
138234 T_A : ndarray
139- The time series or sequence for which the multi-dimensional matrix profile,
140- multi-dimensional matrix profile indices, and multi-dimensional matrix profile
141- subspace will be returned
235+ The time series or sequence for which the multi-dimensional matrix profile and
236+ multi-dimensional matrix profile indices
142237
143238 T_B : ndarray
144239 The time series or sequence that contains your query subsequences
@@ -182,13 +277,6 @@ def _query_mstump_profile(
182277 I : ndarray
183278 Multi-dimensional matrix profile indices for the window with index
184279 equal to `query_idx`
185-
186- S : ndarray
187- A ragged array of ndarrays that contain the multi-dimensional subspace for the
188- window with index equal to `query_idx`. The `len(S)` will be equal to the total
189- number of dimensions, `d`, and where `S[i]` corresponds to the list of subspace
190- indices for the `i+1`th subspace dimension (i.e., `S[1]` corresponds to the
191- subspace with dimension `2` and with `len(S[1]) == 2`).
192280 """
193281 d , n = T_A .shape
194282 k = n - m + 1
@@ -204,19 +292,14 @@ def _query_mstump_profile(
204292 )
205293
206294 if include is not None :
295+ include = _preprocess_include (include )
207296 _apply_include (D , include )
208297 start_row_idx = include .shape [0 ]
209298
210299 if discords :
211- # D[start_row_idx:][::-1].sort(axis=0)
212- sorted_idx = D [start_row_idx :][::- 1 ].argsort (axis = 0 , kind = "mergesort" )
213- broadcast_idx = np .arange (D [start_row_idx :].shape [1 ])[np .newaxis , :]
214- D [start_row_idx :][::- 1 ] = D [start_row_idx :][::- 1 ][sorted_idx , broadcast_idx ]
300+ D [start_row_idx :][::- 1 ].sort (axis = 0 , kind = "mergesort" )
215301 else :
216- # D[start_row_idx:].sort(axis=0)
217- sorted_idx = D [start_row_idx :].argsort (axis = 0 , kind = "mergesort" )
218- broadcast_idx = np .arange (D [start_row_idx :].shape [1 ])[np .newaxis , :]
219- D [start_row_idx :] = D [start_row_idx :][sorted_idx , broadcast_idx ]
302+ D [start_row_idx :].sort (axis = 0 , kind = "mergesort" )
220303
221304 D_prime = np .zeros (k )
222305 for i in range (d ):
@@ -227,18 +310,15 @@ def _query_mstump_profile(
227310
228311 P = np .full (d , np .inf , dtype = "float64" )
229312 I = np .full (d , - 1 , dtype = "int64" )
230- S = np .empty (d , dtype = object )
231313
232314 for i in range (d ):
233315 min_index = np .argmin (D [i ])
234316 I [i ] = min_index
235317 P [i ] = D [i , min_index ]
236- S [i ] = sorted_idx [: i + 1 , min_index ]
237318 if np .isinf (P [i ]): # pragma nocover
238319 I [i ] = - 1
239- S [i ][:] = - 1
240320
241- return P , I , S
321+ return P , I
242322
243323
244324def _get_first_mstump_profile (
@@ -306,7 +386,7 @@ def _get_first_mstump_profile(
306386 Multi-dimensional matrix profile indices for the window with index
307387 equal to `start`
308388 """
309- P , I , _ = _query_mstump_profile (
389+ P , I = _query_mstump_profile (
310390 start , T_A , T_B , m , excl_zone , M_T , Σ_T , μ_Q , σ_Q , include , discords
311391 )
312392 return P , I
@@ -706,11 +786,7 @@ def mstump(T, m, include=None, discords=False):
706786 core .check_window_size (m )
707787
708788 if include is not None :
709- include = np .asarray (include )
710- _ , idx = np .unique (include , return_index = True )
711- if include .shape [0 ] != idx .shape [0 ]: # pragma: no cover
712- logger .warning ("Removed repeating indices in `include`" )
713- include = include [np .sort (idx )]
789+ include = _preprocess_include (include )
714790
715791 d , n = T_B .shape
716792 k = n - m + 1
0 commit comments