Skip to content

Commit 0df20d1

Browse files
committed
Fixed #258 Correct AB-Join Definition
1 parent 98995a2 commit 0df20d1

13 files changed

Lines changed: 192 additions & 198 deletions

File tree

stumpy/aamp.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ def _compute_diagonal(
3838
The time series or sequence for which to compute the matrix profile
3939
4040
T_B : ndarray
41-
The time series or sequence that contain your query subsequences
42-
of interest
41+
The time series or sequence that will be used to annotate T_A. For every
42+
subsequence in T_A, its nearest neighbor in T_B will be recorded.
4343
4444
m : int
4545
Window size
@@ -85,24 +85,24 @@ def _compute_diagonal(
8585
k = diags[diag_idx]
8686

8787
if k >= 0:
88-
iter_range = range(0, min(n_B - m + 1, n_A - m + 1 - k))
88+
iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - k))
8989
else:
90-
iter_range = range(-k, min(n_B - m + 1, n_A - m + 1 - k))
90+
iter_range = range(-k, min(n_A - m + 1, n_B - m + 1 - k))
9191

9292
for i in iter_range:
9393
if i == 0 or i == k or (k < 0 and i == -k):
94-
D_squared = np.linalg.norm(T_A[i + k : i + k + m] - T_B[i : i + m]) ** 2
94+
D_squared = np.linalg.norm(T_B[i + k : i + k + m] - T_A[i : i + m]) ** 2
9595
else:
9696
D_squared = np.abs(
9797
D_squared
98-
- (T_A[i + k - 1] - T_B[i - 1]) ** 2
99-
+ (T_A[i + k + m - 1] - T_B[i + m - 1]) ** 2
98+
- (T_B[i + k - 1] - T_A[i - 1]) ** 2
99+
+ (T_B[i + k + m - 1] - T_A[i + m - 1]) ** 2
100100
)
101101

102102
if D_squared < STUMPY_D_SQUARED_THRESHOLD:
103103
D_squared = 0.0
104104

105-
if T_A_subseq_isfinite[i + k] and T_B_subseq_isfinite[i]:
105+
if T_A_subseq_isfinite[i] and T_B_subseq_isfinite[i + k]:
106106
# Neither subsequence contains NaNs
107107
if D_squared < P[thread_idx, i, 0]:
108108
P[thread_idx, i, 0] = D_squared
@@ -147,8 +147,8 @@ def _aamp(
147147
The time series or sequence for which to compute the matrix profile
148148
149149
T_B : ndarray
150-
The time series or sequence that contain your query subsequences
151-
of interest
150+
The time series or sequence that will be used to annotate T_A. For every
151+
subsequence in T_A, its nearest neighbor in T_B will be recorded.
152152
153153
m : int
154154
Window size
@@ -185,7 +185,7 @@ def _aamp(
185185
"""
186186
n_A = T_A.shape[0]
187187
n_B = T_B.shape[0]
188-
l = n_B - m + 1
188+
l = n_A - m + 1
189189
n_threads = config.NUMBA_NUM_THREADS
190190
P = np.full((n_threads, l, 3), np.inf)
191191
I = np.full((n_threads, l, 3), -1, np.int64)
@@ -244,8 +244,9 @@ def aamp(T_A, m, T_B=None, ignore_trivial=True):
244244
Window size
245245
246246
T_B : ndarray
247-
The time series or sequence that contain your query subsequences
248-
of interest. Default is `None` which corresponds to a self-join.
247+
The time series or sequence that will be used to annotate T_A. For every
248+
subsequence in T_A, its nearest neighbor in T_B will be recorded. Default is
249+
`None` which corresponds to a self-join.
249250
250251
ignore_trivial : bool
251252
Set to `True` if this is a self-join. Otherwise, for AB-join, set this
@@ -291,15 +292,15 @@ def aamp(T_A, m, T_B=None, ignore_trivial=True):
291292

292293
n_A = T_A.shape[0]
293294
n_B = T_B.shape[0]
294-
l = n_B - m + 1
295+
l = n_A - m + 1
295296

296297
excl_zone = int(np.ceil(m / 4))
297298
out = np.empty((l, 4), dtype=object)
298299

299300
if ignore_trivial:
300-
diags = np.arange(excl_zone + 1, n_B - m + 1)
301+
diags = np.arange(excl_zone + 1, n_A - m + 1)
301302
else:
302-
diags = np.arange(-(n_B - m + 1) + 1, n_A - m + 1)
303+
diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1)
303304

304305
P, I = _aamp(
305306
T_A,

stumpy/aamped.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@ def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True):
3434
Window size
3535
3636
T_B : ndarray
37-
The time series or sequence that contain your query subsequences
38-
of interest. Default is `None` which corresponds to a self-join.
37+
The time series or sequence that will be used to annotate T_A. For every
38+
subsequence in T_A, its nearest neighbor in T_B will be recorded. Default is
39+
`None` which corresponds to a self-join.
3940
4041
ignore_trivial : bool
4142
Set to `True` if this is a self-join. Otherwise, for AB-join, set this
@@ -81,7 +82,7 @@ def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True):
8182

8283
n_A = T_A.shape[0]
8384
n_B = T_B.shape[0]
84-
l = n_B - m + 1
85+
l = n_A - m + 1
8586

8687
excl_zone = int(np.ceil(m / 4))
8788
out = np.empty((l, 4), dtype=object)
@@ -90,9 +91,9 @@ def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True):
9091
nworkers = len(hosts)
9192

9293
if ignore_trivial:
93-
diags = np.arange(excl_zone + 1, n_B - m + 1)
94+
diags = np.arange(excl_zone + 1, n_A - m + 1)
9495
else:
95-
diags = np.arange(-(n_B - m + 1) + 1, n_A - m + 1)
96+
diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1)
9697

9798
ndist_counts = core._count_diagonal_ndist(diags, m, n_A, n_B)
9899
diags_ranges = core._get_array_ranges(ndist_counts, nworkers)

stumpy/core.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -783,8 +783,8 @@ def mass_absolute(Q, T):
783783

784784
def _get_QT(start, T_A, T_B, m):
785785
"""
786-
Compute the sliding dot product between the query, `T_B`, (from
787-
[start:start+m]) and the time series, `T_A`. Additionally, compute
786+
Compute the sliding dot product between the query, `T_A`, (from
787+
[start:start+m]) and the time series, `T_B`. Additionally, compute
788788
QT for the first window.
789789
790790
Parameters
@@ -796,8 +796,8 @@ def _get_QT(start, T_A, T_B, m):
796796
The time series or sequence for which to compute the dot product
797797
798798
T_B : ndarray
799-
The time series or sequence that contain your query subsequence
800-
of interest
799+
The time series or sequence that will be used to annotate T_A. For every
800+
subsequence in T_A, its nearest neighbor in T_B will be recorded.
801801
802802
m : int
803803
Window size
@@ -1047,9 +1047,9 @@ def _count_diagonal_ndist(diags, m, n_A, n_B):
10471047
for diag_idx in prange(diags.shape[0]):
10481048
k = diags[diag_idx]
10491049
if k >= 0:
1050-
diag_ndist_counts[diag_idx] = min(n_A - m + 1 - k, n_B - m + 1)
1050+
diag_ndist_counts[diag_idx] = min(n_B - m + 1 - k, n_A - m + 1)
10511051
else:
1052-
diag_ndist_counts[diag_idx] = min(n_A - m + 1, n_B - m + 1 + k)
1052+
diag_ndist_counts[diag_idx] = min(n_B - m + 1, n_A - m + 1 + k)
10531053

10541054
return diag_ndist_counts
10551055

stumpy/gpu_aamp.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ def _compute_and_update_PI_kernel(
5050
The time series or sequence for which to compute the dot product
5151
5252
T_B : ndarray
53-
The time series or sequence that contain your query subsequence
54-
of interest
53+
The time series or sequence that will be used to annotate T_A. For every
54+
subsequence in T_A, its nearest neighbor in T_B will be recorded.
5555
5656
m : int
5757
Window size
@@ -196,8 +196,8 @@ def _gpu_aamp(
196196
the matrix profile
197197
198198
T_B_fname : str
199-
The file name for the time series or sequence that contain your
200-
query subsequences of interest
199+
The file name for the time series or sequence that will be used to annotate T_A.
200+
For every subsequence in T_A, its nearest neighbor in T_B will be recorded.
201201
202202
m : int
203203
Window size
@@ -413,12 +413,6 @@ def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0):
413413
T_B = T_A
414414
ignore_trivial = True
415415

416-
# Swap T_A and T_B for GPU implementation
417-
# This keeps the API identical to and compatible with `stumpy.stump`
418-
tmp_T = T_A
419-
T_A = T_B
420-
T_B = tmp_T
421-
422416
T_A, T_A_subseq_isfinite = core.preprocess_non_normalized(T_A, m)
423417
T_B, T_B_subseq_isfinite = core.preprocess_non_normalized(T_B, m)
424418

stumpy/gpu_stump.py

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ def _compute_and_update_PI_kernel(
4949
The time series or sequence for which to compute the dot product
5050
5151
T_B : ndarray
52-
The time series or sequence that contain your query subsequence
53-
of interest
52+
The time series or sequence that will be used to annotate T_A. For every
53+
subsequence in T_A, its nearest neighbor in T_B will be recorded.
5454
5555
m : int
5656
Window size
@@ -197,8 +197,8 @@ def _gpu_stump(
197197
the matrix profile
198198
199199
T_B_fname : str
200-
The file name for the time series or sequence that contain your
201-
query subsequences of interest
200+
The file name for the time series or sequence that will be used to annotate T_A.
201+
For every subsequence in T_A, its nearest neighbor in T_B will be recorded.
202202
203203
m : int
204204
Window size
@@ -266,12 +266,12 @@ def _gpu_stump(
266266
267267
See Table II, Figure 5, and Figure 6
268268
269-
Timeseries, T_B, will be annotated with the distance location
270-
(or index) of all its subsequences in another times series, T_A.
269+
Timeseries, T_A, will be annotated with the distance location
270+
(or index) of all its subsequences in another times series, T_B.
271271
272-
Return: For every subsequence, Q, in T_B, you will get a distance
272+
Return: For every subsequence, Q, in T_A, you will get a distance
273273
and index for the closest subsequence in T_A. Thus, the array
274-
returned will have length T_B.shape[0]-m+1. Additionally, the
274+
returned will have length T_A.shape[0]-m+1. Additionally, the
275275
left and right matrix profiles are also returned.
276276
277277
Note: Unlike in the Table II where T_A.shape is expected to be equal
@@ -387,8 +387,9 @@ def gpu_stump(T_A, m, T_B=None, ignore_trivial=True, device_id=0):
387387
Window size
388388
389389
T_B : (optional) ndarray
390-
The time series or sequence that contain your query subsequences
391-
of interest. Default is `None` which corresponds to a self-join.
390+
The time series or sequence that will be used to annotate T_A. For every
391+
subsequence in T_A, its nearest neighbor in T_B will be recorded. Default is
392+
`None` which corresponds to a self-join.
392393
393394
ignore_trivial : bool
394395
Set to `True` if this is a self-join. Otherwise, for AB-join, set this
@@ -415,12 +416,12 @@ def gpu_stump(T_A, m, T_B=None, ignore_trivial=True, device_id=0):
415416
416417
See Table II, Figure 5, and Figure 6
417418
418-
Timeseries, T_B, will be annotated with the distance location
419-
(or index) of all its subsequences in another times series, T_A.
419+
Timeseries, T_A, will be annotated with the distance location
420+
(or index) of all its subsequences in another times series, T_B.
420421
421-
Return: For every subsequence, Q, in T_B, you will get a distance
422-
and index for the closest subsequence in T_A. Thus, the array
423-
returned will have length T_B.shape[0]-m+1. Additionally, the
422+
Return: For every subsequence, Q, in T_A, you will get a distance
423+
and index for the closest subsequence in T_B. Thus, the array
424+
returned will have length T_A.shape[0]-m+1. Additionally, the
424425
left and right matrix profiles are also returned.
425426
426427
Note: Unlike in the Table II where T_A.shape is expected to be equal
@@ -440,12 +441,6 @@ def gpu_stump(T_A, m, T_B=None, ignore_trivial=True, device_id=0):
440441
T_B = T_A
441442
ignore_trivial = True
442443

443-
# Swap T_A and T_B for GPU implementation
444-
# This keeps the API identical to and compatible with `stumpy.stump`
445-
tmp_T = T_A
446-
T_A = T_B
447-
T_B = tmp_T
448-
449444
T_A, M_T, Σ_T = core.preprocess(T_A, m)
450445
T_B, μ_Q, σ_Q = core.preprocess(T_B, m)
451446

0 commit comments

Comments
 (0)