Skip to content

Commit 9b69a75

Browse files
committed
Fixed #298 Added _mpdist_vect and _aampdist_vect
1 parent 4f81430 commit 9b69a75

6 files changed

Lines changed: 409 additions & 55 deletions

File tree

stumpy/aampdist.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,59 @@
33
# STUMPY is a trademark of TD Ameritrade IP Company, Inc. All rights reserved.
44

55
from . import aamp, aamped
6-
from .mpdist import _mpdist
6+
from .core import _mass_absolute_distance_matrix
7+
from .mpdist import _mpdist, _mpdist_vect
8+
9+
10+
def _aampdist_vect(
11+
Q,
12+
T,
13+
m,
14+
percentage=0.05,
15+
k=None,
16+
custom_func=None,
17+
):
18+
"""
19+
Compute the non-normalized (i.e., without z-normalization) matrix profile distance
20+
measure vector between `Q` and each subsequence, `T[i : i + len(Q)]`, within `T`.
21+
22+
Parameters
23+
----------
24+
Q : ndarray
25+
Query array
26+
27+
T : ndarray
28+
Time series or sequence
29+
30+
m : int
31+
Window size
32+
33+
percentage : float, 0.05
34+
The percentage of distances that will be used to report `mpdist`. The value
35+
is between 0.0 and 1.0. This parameter is ignored when `k` is not `None` or when
36+
`k_func` is not None.
37+
38+
k : int, default None
39+
Specify the `k`th value in the concatenated matrix profiles to return. When `k`
40+
is not `None`, then the `percentage` parameter is ignored. This parameter is
41+
ignored when `k_func` is not None.
42+
43+
custom_func : object, default None
44+
A custom user defined function for selecting the desired value from the
45+
sorted `P_ABBA` array. This function may need to leverage `functools.partial`
46+
and should take `P_ABBA` as its only input parameter and return a single
47+
`MPdist` value. The `percentage` and `k` parameters are ignored when
48+
`custom_func` is not None.
49+
"""
50+
return _mpdist_vect(
51+
Q,
52+
T,
53+
m,
54+
distance_matrix_func=_mass_absolute_distance_matrix,
55+
percentage=percentage,
56+
k=k,
57+
custom_func=custom_func,
58+
)
759

860

961
def aampdist(T_A, T_B, m, percentage=0.05, k=None):

stumpy/core.py

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,9 @@ def _rolling_nanmin_1d(a, w=None):
468468
w = a.shape[0]
469469

470470
half_window_size = int(math.ceil((w - 1) / 2))
471-
return minimum_filter1d(a, size=w)[half_window_size : half_window_size - w + 1]
471+
return minimum_filter1d(a, size=w)[
472+
half_window_size : half_window_size + a.shape[0] - w + 1
473+
]
472474

473475

474476
def _rolling_nanmax_1d(a, w=None):
@@ -496,7 +498,9 @@ def _rolling_nanmax_1d(a, w=None):
496498
w = a.shape[0]
497499

498500
half_window_size = int(math.ceil((w - 1) / 2))
499-
return maximum_filter1d(a, size=w)[half_window_size : half_window_size - w + 1]
501+
return maximum_filter1d(a, size=w)[
502+
half_window_size : half_window_size + a.shape[0] - w + 1
503+
]
500504

501505

502506
def rolling_nanmin(a, w):
@@ -976,6 +980,36 @@ def mass(Q, T, M_T=None, Σ_T=None):
976980
return distance_profile
977981

978982

983+
def _mass_distance_matrix(Q, T, m, distance_matrix):
984+
"""
985+
Compute the full distance matrix between all of the subsequences of `Q` and `T`
986+
using the MASS algorithm
987+
988+
Parameters
989+
----------
990+
Q : ndarray
991+
Query array
992+
993+
T : ndarray
994+
Time series or sequence
995+
996+
m : int
997+
Window size
998+
999+
distance_matrix : ndarray
1000+
The full output distance matrix. This is mandatory since it may be reused.
1001+
1002+
Returns
1003+
-------
1004+
None
1005+
"""
1006+
k, l = distance_matrix.shape
1007+
T, M_T, Σ_T = preprocess(T, m)
1008+
1009+
for i in range(k):
1010+
distance_matrix[i, :] = mass(Q[i : i + m], T, M_T, Σ_T)
1011+
1012+
9791013
@njit(fastmath=True)
9801014
def _mass_absolute(Q_squared, T_squared, QT):
9811015
"""
@@ -1070,6 +1104,35 @@ def mass_absolute(Q, T):
10701104
return distance_profile
10711105

10721106

1107+
def _mass_absolute_distance_matrix(Q, T, m, distance_matrix):
1108+
"""
1109+
Compute the full non-normalized (i.e., without z-normalization) distance matrix
1110+
between all of the subsequences of `Q` and `T` using the MASS absolute algorithm
1111+
1112+
Parameters
1113+
----------
1114+
Q : ndarray
1115+
Query array
1116+
1117+
T : ndarray
1118+
Time series or sequence
1119+
1120+
m : int
1121+
Window size
1122+
1123+
distance_matrix : ndarray
1124+
The full output distance matrix. This is mandatory since it may be reused.
1125+
1126+
Returns
1127+
-------
1128+
None
1129+
"""
1130+
k, l = distance_matrix.shape
1131+
1132+
for i in range(k):
1133+
distance_matrix[i, :] = mass_absolute(Q[i : i + m], T)
1134+
1135+
10731136
def _get_QT(start, T_A, T_B, m):
10741137
"""
10751138
Compute the sliding dot product between the query, `T_A`, (from

stumpy/mpdist.py

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
import numpy as np
66
import math
77

8-
from . import stump, stumped
8+
from . import stump, stumped, core
9+
from .core import _mass_distance_matrix
910

1011

1112
def _compute_P_ABBA(
@@ -84,7 +85,7 @@ def _compute_P_ABBA(
8485
P_ABBA[n_A - m + 1 :] = mp_func(T_B, m, T_A, ignore_trivial=False)[:, 0]
8586

8687

87-
def _select_P_ABBA_value(P_ABBA, k=None, custom_func=None):
88+
def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
8889
"""
8990
A convenience function for returning the `k`th smallest value from the `P_ABBA`
9091
array or use a custom function to specify what `P_ABBA` value to return.
@@ -102,7 +103,7 @@ def _select_P_ABBA_value(P_ABBA, k=None, custom_func=None):
102103
A pre-sorted array resulting from the concatenation of the outputs from an
103104
AB-joinand BA-join for two time series, `T_A` and `T_B`
104105
105-
k : int, default None
106+
k : int
106107
Specify the `k`th value in the concatenated matrix profiles to return. This
107108
parameter is ignored when `k_func` is not None.
108109
@@ -118,6 +119,7 @@ def _select_P_ABBA_value(P_ABBA, k=None, custom_func=None):
118119
MPdist : float
119120
The matrix profile distance
120121
"""
122+
k = min(int(k), P_ABBA.shape[0] - 1)
121123
if custom_func is not None:
122124
MPdist = custom_func(P_ABBA)
123125
else:
@@ -214,7 +216,7 @@ def _mpdist(
214216
P_ABBA.sort()
215217

216218
if k is not None:
217-
k = int(k)
219+
k = min(int(k), P_ABBA.shape[0] - 1)
218220
else:
219221
percentage = min(percentage, 1.0)
220222
percentage = max(percentage, 0.0)
@@ -225,6 +227,77 @@ def _mpdist(
225227
return MPdist
226228

227229

230+
def _mpdist_vect(
231+
Q,
232+
T,
233+
m,
234+
distance_matrix_func=_mass_distance_matrix,
235+
percentage=0.05,
236+
k=None,
237+
custom_func=None,
238+
):
239+
"""
240+
Compute the matrix profile distance measure vector between `Q` and each subsequence,
241+
`T[i : i + len(Q)]`, within `T`.
242+
243+
Parameters
244+
----------
245+
Q : ndarray
246+
Query array
247+
248+
T : ndarray
249+
Time series or sequence
250+
251+
m : int
252+
Window size
253+
254+
distance_matrix_func : object, default _mass_distance_matrix
255+
The function to use to compute the distance matrix between `Q` and `T`
256+
257+
percentage : float, 0.05
258+
The percentage of distances that will be used to report `mpdist`. The value
259+
is between 0.0 and 1.0. This parameter is ignored when `k` is not `None` or when
260+
`k_func` is not None.
261+
262+
k : int, default None
263+
Specify the `k`th value in the concatenated matrix profiles to return. When `k`
264+
is not `None`, then the `percentage` parameter is ignored. This parameter is
265+
ignored when `k_func` is not None.
266+
267+
custom_func : object, default None
268+
A custom user defined function for selecting the desired value from the
269+
sorted `P_ABBA` array. This function may need to leverage `functools.partial`
270+
and should take `P_ABBA` as its only input parameter and return a single
271+
`MPdist` value. The `percentage` and `k` parameters are ignored when
272+
`custom_func` is not None.
273+
"""
274+
j = Q.shape[0] - m + 1 # `k` is reserved for `P_ABBA` selection
275+
l = T.shape[0] - m + 1
276+
MPdist_vect = np.empty(T.shape[0] - Q.shape[0] + 1)
277+
distance_matrix = np.full((j, l), np.inf)
278+
P_ABBA = np.empty(2 * j)
279+
280+
if k is None:
281+
percentage = min(percentage, 1.0)
282+
percentage = max(percentage, 0.0)
283+
k = min(math.ceil(percentage * (2 * Q.shape[0])), 2 * j - 1)
284+
285+
k = min(int(k), P_ABBA.shape[0] - 1)
286+
287+
distance_matrix_func(Q, T, m, distance_matrix)
288+
289+
rolling_row_min = core.rolling_nanmin(distance_matrix, j)
290+
col_min = np.nanmin(distance_matrix, axis=0)
291+
292+
for i in range(MPdist_vect.shape[0]):
293+
P_ABBA[:j] = rolling_row_min[:, i]
294+
P_ABBA[j:] = col_min[i : i + j]
295+
P_ABBA.sort()
296+
MPdist_vect[i] = _select_P_ABBA_value(P_ABBA, k, custom_func)
297+
298+
return MPdist_vect
299+
300+
228301
def mpdist(T_A, T_B, m, percentage=0.05, k=None):
229302
"""
230303
Compute the matrix profile distance (MPdist) measure between any two time series

tests/test_aampdist.py

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
import math
12
import numpy as np
23
import numpy.testing as npt
34
from stumpy import aampdist, aampdisted
5+
from stumpy.aampdist import _aampdist_vect
46
from dask.distributed import Client, LocalCluster
57
import pytest
68
import naive
@@ -28,6 +30,77 @@ def dask_cluster():
2830
k = [0, 1, 2, 3, 4]
2931

3032

33+
@pytest.mark.parametrize("T_A, T_B", test_data)
34+
def test_aampdist_vect(T_A, T_B):
35+
m = 3
36+
n_A = T_A.shape[0]
37+
n_B = T_B.shape[0]
38+
j = n_A - m + 1 # `k` is reserved for `P_ABBA` selection
39+
P_ABBA = np.empty(2 * j, dtype=np.float64)
40+
ref_mpdist_vect = np.empty(n_B - n_A + 1)
41+
42+
percentage = 0.05
43+
k = min(math.ceil(percentage * (2 * n_A)), 2 * j - 1)
44+
k = min(int(k), P_ABBA.shape[0] - 1)
45+
46+
for i in range(n_B - n_A + 1):
47+
P_ABBA[:j] = naive.aamp(T_A, m, T_B[i : i + n_A])[:, 0]
48+
P_ABBA[j:] = naive.aamp(T_B[i : i + n_A], m, T_A)[:, 0]
49+
P_ABBA.sort()
50+
ref_mpdist_vect[i] = P_ABBA[k]
51+
52+
comp_mpdist_vect = _aampdist_vect(T_A, T_B, m)
53+
54+
npt.assert_almost_equal(ref_mpdist_vect, comp_mpdist_vect)
55+
56+
57+
@pytest.mark.parametrize("T_A, T_B", test_data)
58+
@pytest.mark.parametrize("percentage", percentage)
59+
def test_aampdist_vect_percentage(T_A, T_B, percentage):
60+
m = 3
61+
n_A = T_A.shape[0]
62+
n_B = T_B.shape[0]
63+
j = n_A - m + 1 # `k` is reserved for `P_ABBA` selection
64+
P_ABBA = np.empty(2 * j, dtype=np.float64)
65+
ref_mpdist_vect = np.empty(n_B - n_A + 1)
66+
67+
k = min(math.ceil(percentage * (2 * n_A)), 2 * j - 1)
68+
k = min(int(k), P_ABBA.shape[0] - 1)
69+
70+
for i in range(n_B - n_A + 1):
71+
P_ABBA[:j] = naive.aamp(T_A, m, T_B[i : i + n_A])[:, 0]
72+
P_ABBA[j:] = naive.aamp(T_B[i : i + n_A], m, T_A)[:, 0]
73+
P_ABBA.sort()
74+
ref_mpdist_vect[i] = P_ABBA[min(k, P_ABBA.shape[0] - 1)]
75+
76+
comp_mpdist_vect = _aampdist_vect(T_A, T_B, m, percentage=percentage)
77+
78+
npt.assert_almost_equal(ref_mpdist_vect, comp_mpdist_vect)
79+
80+
81+
@pytest.mark.parametrize("T_A, T_B", test_data)
82+
@pytest.mark.parametrize("k", k)
83+
def test_aampdist_vect_k(T_A, T_B, k):
84+
m = 3
85+
n_A = T_A.shape[0]
86+
n_B = T_B.shape[0]
87+
j = n_A - m + 1 # `k` is reserved for `P_ABBA` selection
88+
P_ABBA = np.empty(2 * j, dtype=np.float64)
89+
ref_mpdist_vect = np.empty(n_B - n_A + 1)
90+
91+
k = min(int(k), P_ABBA.shape[0] - 1)
92+
93+
for i in range(n_B - n_A + 1):
94+
P_ABBA[:j] = naive.aamp(T_A, m, T_B[i : i + n_A])[:, 0]
95+
P_ABBA[j:] = naive.aamp(T_B[i : i + n_A], m, T_A)[:, 0]
96+
P_ABBA.sort()
97+
ref_mpdist_vect[i] = P_ABBA[min(k, P_ABBA.shape[0] - 1)]
98+
99+
comp_mpdist_vect = _aampdist_vect(T_A, T_B, m, k=k)
100+
101+
npt.assert_almost_equal(ref_mpdist_vect, comp_mpdist_vect)
102+
103+
31104
@pytest.mark.parametrize("T_A, T_B", test_data)
32105
def test_aampdist(T_A, T_B):
33106
m = 3
@@ -39,7 +112,7 @@ def test_aampdist(T_A, T_B):
39112

40113
@pytest.mark.parametrize("T_A, T_B", test_data)
41114
@pytest.mark.parametrize("percentage", percentage)
42-
def test_mpdist_percentage(T_A, T_B, percentage):
115+
def test_aampdist_percentage(T_A, T_B, percentage):
43116
m = 3
44117
ref_mpdist = naive.aampdist(T_A, T_B, m, percentage=percentage)
45118
comp_mpdist = aampdist(T_A, T_B, m, percentage=percentage)
@@ -49,7 +122,7 @@ def test_mpdist_percentage(T_A, T_B, percentage):
49122

50123
@pytest.mark.parametrize("T_A, T_B", test_data)
51124
@pytest.mark.parametrize("k", k)
52-
def test_mpdist_k(T_A, T_B, k):
125+
def test_aampdist_k(T_A, T_B, k):
53126
m = 3
54127
ref_mpdist = naive.aampdist(T_A, T_B, m, k=k)
55128
comp_mpdist = aampdist(T_A, T_B, m, k=k)
@@ -62,7 +135,7 @@ def test_mpdist_k(T_A, T_B, k):
62135
@pytest.mark.filterwarnings("ignore:numpy.ndarray size changed")
63136
@pytest.mark.filterwarnings("ignore:\\s+Port 8787 is already in use:UserWarning")
64137
@pytest.mark.parametrize("T_A, T_B", test_data)
65-
def test_mpdisted(T_A, T_B, dask_cluster):
138+
def test_aampdisted(T_A, T_B, dask_cluster):
66139
with Client(dask_cluster) as dask_client:
67140
m = 3
68141
ref_mpdist = naive.aampdist(T_A, T_B, m)

0 commit comments

Comments
 (0)