@@ -40,53 +40,6 @@ def _preprocess_include(include):
4040 return include
4141
4242
43- def _multi_mass (Q , T , m , M_T , Σ_T , μ_Q , σ_Q ):
44- """
45- A multi-dimensional wrapper around "Mueen's Algorithm for Similarity Search"
46- (MASS) to compute multi-dimensional distance profile.
47-
48- Parameters
49- ----------
50- Q : ndarray
51- Query array or subsequence
52-
53- T : ndarray
54- Time series array or sequence
55-
56- m : int
57- Window size
58-
59- M_T : ndarray
60- Sliding mean for `T_A`
61-
62- Σ_T : ndarray
63- Sliding standard deviation for `T_A`
64-
65- μ_Q : ndarray
66- Mean value of `Q`
67-
68- σ_Q : ndarray
69- Standard deviation of `Q`
70-
71- Returns
72- -------
73- D : ndarray
74- Multi-dimensional distance profile
75- """
76- d , n = T .shape
77- k = n - m + 1
78-
79- D = np .empty ((d , k ), dtype = "float64" )
80-
81- for i in range (d ):
82- if np .isinf (μ_Q [i ]):
83- D [i , :] = np .inf
84- else :
85- D [i , :] = core .mass (Q [i ], T [i ], M_T [i ], Σ_T [i ])
86-
87- return D
88-
89-
9043def _apply_include (
9144 D ,
9245 include ,
@@ -148,7 +101,54 @@ def _apply_include(
148101 D [unrestricted_indices ] = tmp_swap [mask ]
149102
150103
151- def _get_subspace (T , m , motif_idx , nn_idx , include = None , discords = False ):
104+ def _multi_mass (Q , T , m , M_T , Σ_T , μ_Q , σ_Q ):
105+ """
106+ A multi-dimensional wrapper around "Mueen's Algorithm for Similarity Search"
107+ (MASS) to compute multi-dimensional distance profile.
108+
109+ Parameters
110+ ----------
111+ Q : ndarray
112+ Query array or subsequence
113+
114+ T : ndarray
115+ Time series array or sequence
116+
117+ m : int
118+ Window size
119+
120+ M_T : ndarray
121+ Sliding mean for `T_A`
122+
123+ Σ_T : ndarray
124+ Sliding standard deviation for `T_A`
125+
126+ μ_Q : ndarray
127+ Mean value of `Q`
128+
129+ σ_Q : ndarray
130+ Standard deviation of `Q`
131+
132+ Returns
133+ -------
134+ D : ndarray
135+ Multi-dimensional distance profile
136+ """
137+ d , n = T .shape
138+ k = n - m + 1
139+
140+ D = np .empty ((d , k ), dtype = "float64" )
141+
142+ for i in range (d ):
143+ if np .isinf (μ_Q [i ]):
144+ D [i , :] = np .inf
145+ else :
146+ D [i , :] = core .mass (Q [i ], T [i ], M_T [i ], Σ_T [i ])
147+
148+ return D
149+
150+
151+ def _get_subspace (T , m , motif_idx , nn_idx , k , include = None , discords = False ):
152152 """
153153 Compute the multi-dimensional matrix profile subspace for a given motif index and
154154 its nearest neighbor index
@@ -168,6 +168,10 @@ def _get_subspace(T, m, motif_idx, nn_idx, include=None, discords=False):
168168 nn_idx : int
169169 The nearest neighbor index in T
170170
171+ k : int
172+ The subset number of dimensions out of `D = T.shape[0]`-dimensions to return
173+ the subspace for
174+
171175 include : ndarray, default None
172176 A list of (zero-based) indices corresponding to the dimensions in `T` that
173177 must be included in the constrained multidimensional motif search.
@@ -183,35 +187,32 @@ def _get_subspace(T, m, motif_idx, nn_idx, include=None, discords=False):
183187 Returns
184188 -------
185189 S : ndarray
186- A ragged array of ndarrays that contain the multi-dimensional subspace for the
187- window with index equal to `query_idx`. The `len(S)` will be equal to the total
188- number of dimensions, `d`, and where `S[i]` corresponds to the list of subspace
189- indices for the `i+1`th subspace dimension (i.e., `S[1]` corresponds to the
190- subspace with dimension `2` and with `len(S[1]) == 2`).
190+ An array of that contains the `k`th-dimensional subspace for the subsequence
191+ with index equal to `motif_idx`
191192 """
192193 T , _ , _ = core .preprocess (T , m )
193194
194- S = np .empty (T .shape [0 ], dtype = object )
195- D = np .linalg .norm (
196- core .z_norm (T [:, motif_idx : motif_idx + m ], axis = 1 )
197- - core .z_norm (T [:, nn_idx : nn_idx + m ], axis = 1 ),
198- axis = 1 ,
199- )
200-
201- if include is not None :
202- include = _preprocess_include (include )
203- else :
204- include = []
195+ motif = core .z_norm (T [:, motif_idx : motif_idx + m ], axis = 1 )
196+ neighbor = core .z_norm (T [:, nn_idx : nn_idx + m ], axis = 1 )
197+ D = np .linalg .norm (motif - neighbor , axis = 1 )
205198
206199 if discords :
207- D [include ] = np .inf
208200 sorted_idx = D [::- 1 ].argsort (axis = 0 , kind = "mergesort" )
209201 else :
210- D [include ] = 0.0
211202 sorted_idx = D .argsort (axis = 0 , kind = "mergesort" )
212203
213- for k in range (T .shape [0 ]):
214- S [k ] = sorted_idx [: k + 1 ]
204+ # `include` processing occur here since we are dealing with indices, not distances
205+ if include is not None :
206+ include = _preprocess_include (include )
207+ mask = np .in1d (sorted_idx , include )
208+ include_idx = mask .nonzero ()[0 ]
209+ exclude_idx = (~ mask ).nonzero ()[0 ]
210+ sorted_idx [: include_idx .shape [0 ]], sorted_idx [include_idx .shape [0 ] :] = (
211+ sorted_idx [include_idx ],
212+ sorted_idx [exclude_idx ],
213+ )
214+
215+ S = sorted_idx [: k + 1 ]
215216
216217 return S
217218
@@ -292,7 +293,6 @@ def _query_mstump_profile(
292293 )
293294
294295 if include is not None :
295- include = _preprocess_include (include )
296296 _apply_include (D , include )
297297 start_row_idx = include .shape [0 ]
298298
@@ -700,6 +700,7 @@ def _mstump(
700700 σ_Q ,
701701 )
702702
703+ # `include` processing must occur here since we are dealing with distances
703704 if include is not None :
704705 _apply_include (
705706 D ,
0 commit comments