1212from .maamp import maamp , maamp_mdl , maamp_multi_distance_profile , maamp_subspace
1313
1414
15- def _multi_mass (Q , T , m , M_T , Σ_T , μ_Q , σ_Q , T_subseq_isconstant ):
15+ def _multi_mass (
16+ Q ,
17+ T ,
18+ m ,
19+ M_T ,
20+ Σ_T ,
21+ μ_Q ,
22+ σ_Q ,
23+ T_subseq_isconstant ,
24+ Q_subseq_isconstant ,
25+ query_idx = None ,
26+ ):
1627 """
1728 A multi-dimensional wrapper around "Mueen's Algorithm for Similarity Search"
1829 (MASS) to compute multi-dimensional distance profile.
@@ -43,6 +54,16 @@ def _multi_mass(Q, T, m, M_T, Σ_T, μ_Q, σ_Q, T_subseq_isconstant):
4354 T_subseq_isconstant : numpy.ndarray
4455 A boolean array that indicates whether a subsequence in `T` is constant (True)
4556
57+ Q_subseq_isconstant : numpy.ndarray
58+ A boolean array that indicates whether a subsequence in `Q` is constant (True)
59+
60+ query_idx : int, default None
61+ This is the index position along each of the time series in `T`, where
62+ the query subsequence, `Q`, is located. `query_idx` should be set to None
63+ if `Q` is not a subsequence of `T`. If `Q` is a subsequence of `T`, provding
64+ this argument is optional. If query_idx is provided, the distance between Q
65+ and `T[:, query_idx : query_idx + m]` will automatically be set to zero.
66+
4667 Returns
4768 -------
4869 D : numpy.ndarray
@@ -58,13 +79,19 @@ def _multi_mass(Q, T, m, M_T, Σ_T, μ_Q, σ_Q, T_subseq_isconstant):
5879 D [i , :] = np .inf
5980 else :
6081 D [i , :] = core .mass (
61- Q [i ], T [i ], M_T [i ], Σ_T [i ], T_subseq_isconstant = T_subseq_isconstant [i ]
82+ Q [i ],
83+ T [i ],
84+ M_T [i ],
85+ Σ_T [i ],
86+ T_subseq_isconstant = T_subseq_isconstant [i ],
87+ Q_subseq_isconstant = Q_subseq_isconstant [i ],
88+ query_idx = query_idx ,
6289 )
6390
6491 return D
6592
6693
67- @core .non_normalized (maamp_subspace )
94+ @core .non_normalized (maamp_subspace , exclude = [ "normalize" , "T_subseq_isconstant" ] )
6895def subspace (
6996 T ,
7097 m ,
@@ -77,6 +104,7 @@ def subspace(
77104 n_bit = 8 ,
78105 normalize = True ,
79106 p = 2.0 ,
107+ T_subseq_isconstant = None ,
80108):
81109 """
82110 Compute the k-dimensional matrix profile subspace for a given subsequence index and
@@ -141,6 +169,15 @@ def subspace(
141169 and the Euclidean distance, respectively. This parameter is ignored when
142170 `normalize == True`.
143171
172+ T_subseq_isconstant : numpy.ndarray, function, or list, default None
173+ A parameter that is used to show whether a subsequence of a time series in `T`
174+ is constant (True) or not. T_subseq_isconstant can be a 2D boolean numpy.ndarry
175+ or a function that can be applied to each time series in `T`. Alternatively, for
176+ maximum flexibility, a list (with length equal to the total number of time
177+ series) may also be used. In this case, T_subseq_isconstant[i] corresponds to
178+ the i-th time series T[i] and each element in the list can either be a 1D
179+ boolean np.ndarray, a function, or None.
180+
144181 Returns
145182 -------
146183 S : numpy.ndarray
@@ -177,15 +214,19 @@ def subspace(
177214 """
178215 T = core ._preprocess (T )
179216 core .check_window_size (m , max_size = T .shape [- 1 ])
217+ T_subseq_isconstant = core .process_isconstant (T , m , T_subseq_isconstant )
180218
181219 if discretize_func is None :
182220 bins = _inverse_norm (n_bit )
183221 discretize_func = partial (_discretize , bins = bins )
184222
185223 subseqs , _ , _ , _ = core .preprocess (T [:, subseq_idx : subseq_idx + m ], m )
186224 subseqs = core .z_norm (subseqs , axis = 1 )
225+ subseqs [T_subseq_isconstant [:, subseq_idx ]] = 0.0
226+
187227 neighbors , _ , _ , _ = core .preprocess (T [:, nn_idx : nn_idx + m ], m )
188228 neighbors = core .z_norm (neighbors , axis = 1 )
229+ neighbors [T_subseq_isconstant [:, nn_idx ]] = 0.0
189230
190231 disc_subseqs = discretize_func (subseqs )
191232 disc_neighbors = discretize_func (neighbors )
@@ -243,7 +284,7 @@ def _discretize(a, bins, right=True): # pragma: no cover
243284 return np .digitize (a , bins , right = right )
244285
245286
246- @core .non_normalized (maamp_mdl )
287+ @core .non_normalized (maamp_mdl , exclude = [ "normalize" , "T_subseq_isconstant" ] )
247288def mdl (
248289 T ,
249290 m ,
@@ -255,6 +296,7 @@ def mdl(
255296 n_bit = 8 ,
256297 normalize = True ,
257298 p = 2.0 ,
299+ T_subseq_isconstant = None ,
258300):
259301 """
260302 Compute the multi-dimensional number of bits needed to compress one
@@ -316,6 +358,15 @@ def mdl(
316358 and the Euclidean distance, respectively. This parameter is ignored when
317359 `normalize == True`.
318360
361+ T_subseq_isconstant : numpy.ndarray, function, or list, default None
362+ A parameter that is used to show whether a subsequence of a time series in `T`
363+ is constant (True) or not. T_subseq_isconstant can be a 2D boolean numpy.ndarry
364+ or a function that can be applied to each time series in `T`. Alternatively, for
365+ maximum flexibility, a list (with length equal to the total number of time
366+ series) may also be used. In this case, T_subseq_isconstant[i] corresponds to
367+ the i-th time series T[i] and each element in the list can either be a 1D
368+ boolean np.ndarray, a function, or None.
369+
319370 Returns
320371 -------
321372 bit_sizes : numpy.ndarray
@@ -352,6 +403,7 @@ def mdl(
352403 """
353404 T = core ._preprocess (T )
354405 core .check_window_size (m , max_size = T .shape [- 1 ])
406+ T_subseq_isconstant = core .process_isconstant (T , m , T_subseq_isconstant )
355407
356408 if discretize_func is None :
357409 bins = _inverse_norm (n_bit )
@@ -362,8 +414,11 @@ def mdl(
362414 for k in range (T .shape [0 ]):
363415 subseqs , _ , _ , _ = core .preprocess (T [:, subseq_idx [k ] : subseq_idx [k ] + m ], m )
364416 subseqs = core .z_norm (subseqs , axis = 1 )
417+ subseqs [T_subseq_isconstant [:, subseq_idx [k ]]] = 0.0
418+
365419 neighbors , _ , _ , _ = core .preprocess (T [:, nn_idx [k ] : nn_idx [k ] + m ], m )
366420 neighbors = core .z_norm (neighbors , axis = 1 )
421+ neighbors [T_subseq_isconstant [:, nn_idx [k ]]] = 0.0
367422
368423 disc_subseqs = discretize_func (subseqs )
369424 disc_neighbors = discretize_func (neighbors )
@@ -387,6 +442,7 @@ def _multi_distance_profile(
387442 μ_Q ,
388443 σ_Q ,
389444 T_subseq_isconstant ,
445+ Q_subseq_isconstant ,
390446 include = None ,
391447 discords = False ,
392448 excl_zone = None ,
@@ -399,7 +455,7 @@ def _multi_distance_profile(
399455 Parameters
400456 ----------
401457 query_idx : int
402- The window index to calculate the multi-dimensional distance profile for
458+ The start index of the ( multi-dimensional) query subsequence in `T_B`
403459
404460 T_A : numpy.ndarray
405461 The time series or sequence for which the multi-dimensional distance profile
@@ -426,6 +482,10 @@ def _multi_distance_profile(
426482 T_subseq_isconstant : numpy.ndarray
427483 A boolean array that indicates whether a subsequence in `T_A` is constant (True)
428484
485+ Q_subseq_isconstant : numpy.ndarray
486+ A boolean array that indicates whether a subsequence in `T_B` is
487+ constant (True)
488+
429489 include : numpy.ndarray, default None
430490 A list of (zero-based) indices corresponding to the dimensions in `T` that
431491 must be included in the constrained multidimensional motif search.
@@ -450,6 +510,7 @@ def _multi_distance_profile(
450510 d , n = T_A .shape
451511 k = n - m + 1
452512 start_row_idx = 0
513+
453514 D = _multi_mass (
454515 T_B [:, query_idx : query_idx + m ],
455516 T_A ,
@@ -459,6 +520,8 @@ def _multi_distance_profile(
459520 μ_Q [:, query_idx ],
460521 σ_Q [:, query_idx ],
461522 T_subseq_isconstant ,
523+ np .expand_dims (Q_subseq_isconstant [:, query_idx ], 1 ),
524+ query_idx = query_idx ,
462525 )
463526
464527 if include is not None :
@@ -481,9 +544,18 @@ def _multi_distance_profile(
481544 return D
482545
483546
484- @core .non_normalized (maamp_multi_distance_profile )
547+ @core .non_normalized (
548+ maamp_multi_distance_profile , exclude = ["normalize" , "T_subseq_isconstant" ]
549+ )
485550def multi_distance_profile (
486- query_idx , T , m , include = None , discords = False , normalize = True , p = 2.0
551+ query_idx ,
552+ T ,
553+ m ,
554+ include = None ,
555+ discords = False ,
556+ normalize = True ,
557+ p = 2.0 ,
558+ T_subseq_isconstant = None ,
487559):
488560 """
489561 Multi-dimensional wrapper to compute the multi-dimensional distance profile for a
@@ -525,13 +597,24 @@ def multi_distance_profile(
525597 and the Euclidean distance, respectively. This parameter is ignored when
526598 `normalize == True`.
527599
600+ T_subseq_isconstant : numpy.ndarray, function, or list, default None
601+ A parameter that is used to show whether a subsequence of a time series in `T`
602+ is constant (True) or not. T_subseq_isconstant can be a 2D boolean numpy.ndarry
603+ or a function that can be applied to each time series in `T`. Alternatively, for
604+ maximum flexibility, a list (with length equal to the total number of time
605+ series) may also be used. In this case, T_subseq_isconstant[i] corresponds to
606+ the i-th time series T[i] and each element in the list can either be a 1D
607+ boolean np.ndarray, a function, or None.
608+
528609 Returns
529610 -------
530611 D : numpy.ndarray
531612 Multi-dimensional distance profile for the window with index equal to
532613 `query_idx`
533614 """
534- T , M_T , Σ_T , T_subseq_isconstant = core .preprocess (T , m )
615+ T , M_T , Σ_T , T_subseq_isconstant = core .preprocess (
616+ T , m , T_subseq_isconstant = T_subseq_isconstant
617+ )
535618
536619 if T .ndim <= 1 : # pragma: no cover
537620 err = f"T is { T .ndim } -dimensional and must be at least 1-dimensional"
@@ -556,6 +639,7 @@ def multi_distance_profile(
556639 M_T ,
557640 Σ_T ,
558641 T_subseq_isconstant ,
642+ T_subseq_isconstant ,
559643 include ,
560644 discords ,
561645 excl_zone ,
@@ -575,6 +659,7 @@ def _get_first_mstump_profile(
575659 μ_Q ,
576660 σ_Q ,
577661 T_subseq_isconstant ,
662+ Q_subseq_isconstant ,
578663 include = None ,
579664 discords = False ,
580665):
@@ -621,6 +706,10 @@ def _get_first_mstump_profile(
621706 T_subseq_isconstant : numpy.ndarray
622707 A boolean array that indicates whether a subsequence in `T_A` is constant (True)
623708
709+ Q_subseq_isconstant : numpy.ndarray
710+ A boolean array that indicates whether a (query) subsequence in `T_B` is
711+ constant (True)
712+
624713 include : numpy.ndarray, default None
625714 A list of (zero-based) indices corresponding to the dimensions in `T` that
626715 must be included in the constrained multidimensional motif search.
@@ -653,6 +742,7 @@ def _get_first_mstump_profile(
653742 μ_Q ,
654743 σ_Q ,
655744 T_subseq_isconstant ,
745+ Q_subseq_isconstant ,
656746 include ,
657747 discords ,
658748 excl_zone ,
@@ -1013,8 +1103,10 @@ def _mstump(
10131103 return P , I
10141104
10151105
1016- @core .non_normalized (maamp )
1017- def mstump (T , m , include = None , discords = False , normalize = True , p = 2.0 ):
1106+ @core .non_normalized (maamp , exclude = ["normalize" , "T_subseq_isconstant" ])
1107+ def mstump (
1108+ T , m , include = None , discords = False , normalize = True , p = 2.0 , T_subseq_isconstant = None
1109+ ):
10181110 """
10191111 Compute the multi-dimensional z-normalized matrix profile
10201112
@@ -1059,6 +1151,15 @@ def mstump(T, m, include=None, discords=False, normalize=True, p=2.0):
10591151 and the Euclidean distance, respectively. This parameter is ignored when
10601152 `normalize == True`.
10611153
1154+ T_subseq_isconstant : numpy.ndarray, function, or list, default None
1155+ A parameter that is used to show whether a subsequence of a time series in `T`
1156+ is constant (True) or not. T_subseq_isconstant can be a 2D boolean numpy.ndarry
1157+ or a function that can be applied to each time series in `T`. Alternatively, for
1158+ maximum flexibility, a list (with length equal to the total number of time
1159+ series) may also be used. In this case, T_subseq_isconstant[i] corresponds to
1160+ the i-th time series T[i] and each element in the list can either be a 1D
1161+ boolean np.ndarray, a function, or None.
1162+
10621163 Returns
10631164 -------
10641165 P : numpy.ndarray
@@ -1100,8 +1201,19 @@ def mstump(T, m, include=None, discords=False, normalize=True, p=2.0):
11001201 T_A = T
11011202 T_B = T_A
11021203
1103- T_A , M_T , Σ_T , T_subseq_isconstant = core .preprocess (T_A , m )
1104- T_B , μ_Q , σ_Q , Q_subseq_isconstant = core .preprocess (T_B , m )
1204+ T_A = core ._preprocess (T_A )
1205+ T_B = core ._preprocess (T_B )
1206+
1207+ T_A_subseq_isconstant = T_subseq_isconstant
1208+ T_A_subseq_isconstant = core .process_isconstant (T_A , m , T_A_subseq_isconstant )
1209+ T_B_subseq_isconstant = T_A_subseq_isconstant
1210+
1211+ T_A , M_T , Σ_T , T_subseq_isconstant = core .preprocess (
1212+ T_A , m , T_subseq_isconstant = T_A_subseq_isconstant
1213+ )
1214+ T_B , μ_Q , σ_Q , Q_subseq_isconstant = core .preprocess (
1215+ T_B , m , T_subseq_isconstant = T_B_subseq_isconstant
1216+ )
11051217
11061218 if T_A .ndim <= 1 : # pragma: no cover
11071219 err = f"T is { T_A .ndim } -dimensional and must be at least 1-dimensional"
@@ -1135,6 +1247,7 @@ def mstump(T, m, include=None, discords=False, normalize=True, p=2.0):
11351247 μ_Q ,
11361248 σ_Q ,
11371249 T_subseq_isconstant ,
1250+ Q_subseq_isconstant ,
11381251 include ,
11391252 discords ,
11401253 )
0 commit comments