stumpy-dev
diff --git a/‎stumpy/aamp.py‎
Lines changed: 17 additions & 16 deletions b/‎stumpy/aamp.py‎
Lines changed: 17 additions & 16 deletions
diff --git a/‎stumpy/aamped.py‎
Lines changed: 6 additions & 5 deletions b/‎stumpy/aamped.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎stumpy/core.py‎
Lines changed: 6 additions & 6 deletions b/‎stumpy/core.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎stumpy/gpu_aamp.py‎
Lines changed: 4 additions & 10 deletions b/‎stumpy/gpu_aamp.py‎
Lines changed: 4 additions & 10 deletions
diff --git a/‎stumpy/gpu_stump.py‎
Lines changed: 16 additions & 21 deletions b/‎stumpy/gpu_stump.py‎
Lines changed: 16 additions & 21 deletions
@@ -38,8 +38,8 @@ def _compute_diagonal(
         The time series or sequence for which to compute the matrix profile
 
     T_B : ndarray
-        The time series or sequence that contain your query subsequences
-        of interest
+        The time series or sequence that will be used to annotate T_A. For every
+        subsequence in T_A, its nearest neighbor in T_B will be recorded.
 
     m : int
         Window size
@@ -85,24 +85,24 @@ def _compute_diagonal(
         k = diags[diag_idx]
 
         if k >= 0:
-            iter_range = range(0, min(n_B - m + 1, n_A - m + 1 - k))
+            iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - k))
         else:
-            iter_range = range(-k, min(n_B - m + 1, n_A - m + 1 - k))
+            iter_range = range(-k, min(n_A - m + 1, n_B - m + 1 - k))
 
         for i in iter_range:
             if i == 0 or i == k or (k < 0 and i == -k):
-                D_squared = np.linalg.norm(T_A[i + k : i + k + m] - T_B[i : i + m]) ** 2
+                D_squared = np.linalg.norm(T_B[i + k : i + k + m] - T_A[i : i + m]) ** 2
             else:
                 D_squared = np.abs(
                     D_squared
-                    - (T_A[i + k - 1] - T_B[i - 1]) ** 2
-                    + (T_A[i + k + m - 1] - T_B[i + m - 1]) ** 2
+                    - (T_B[i + k - 1] - T_A[i - 1]) ** 2
+                    + (T_B[i + k + m - 1] - T_A[i + m - 1]) ** 2
                 )
 
             if D_squared < STUMPY_D_SQUARED_THRESHOLD:
                 D_squared = 0.0
 
-            if T_A_subseq_isfinite[i + k] and T_B_subseq_isfinite[i]:
+            if T_A_subseq_isfinite[i] and T_B_subseq_isfinite[i + k]:
                 # Neither subsequence contains NaNs
                 if D_squared < P[thread_idx, i, 0]:
                     P[thread_idx, i, 0] = D_squared
@@ -147,8 +147,8 @@ def _aamp(
         The time series or sequence for which to compute the matrix profile
 
     T_B : ndarray
-        The time series or sequence that contain your query subsequences
-        of interest
+        The time series or sequence that will be used to annotate T_A. For every
+        subsequence in T_A, its nearest neighbor in T_B will be recorded.
 
     m : int
         Window size
@@ -185,7 +185,7 @@ def _aamp(
     """
     n_A = T_A.shape[0]
     n_B = T_B.shape[0]
-    l = n_B - m + 1
+    l = n_A - m + 1
     n_threads = config.NUMBA_NUM_THREADS
     P = np.full((n_threads, l, 3), np.inf)
     I = np.full((n_threads, l, 3), -1, np.int64)
@@ -244,8 +244,9 @@ def aamp(T_A, m, T_B=None, ignore_trivial=True):
         Window size
 
     T_B : ndarray
-        The time series or sequence that contain your query subsequences
-        of interest. Default is `None` which corresponds to a self-join.
+        The time series or sequence that will be used to annotate T_A. For every
+        subsequence in T_A, its nearest neighbor in T_B will be recorded. Default is
+        `None` which corresponds to a self-join.
 
     ignore_trivial : bool
         Set to `True` if this is a self-join. Otherwise, for AB-join, set this
@@ -291,15 +292,15 @@ def aamp(T_A, m, T_B=None, ignore_trivial=True):
 
     n_A = T_A.shape[0]
     n_B = T_B.shape[0]
-    l = n_B - m + 1
+    l = n_A - m + 1
 
     excl_zone = int(np.ceil(m / 4))
     out = np.empty((l, 4), dtype=object)
 
     if ignore_trivial:
-        diags = np.arange(excl_zone + 1, n_B - m + 1)
+        diags = np.arange(excl_zone + 1, n_A - m + 1)
     else:
-        diags = np.arange(-(n_B - m + 1) + 1, n_A - m + 1)
+        diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1)
 
     P, I = _aamp(
         T_A,
 
@@ -34,8 +34,9 @@ def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True):
         Window size
 
     T_B : ndarray
-        The time series or sequence that contain your query subsequences
-        of interest. Default is `None` which corresponds to a self-join.
+        The time series or sequence that will be used to annotate T_A. For every
+        subsequence in T_A, its nearest neighbor in T_B will be recorded. Default is
+        `None` which corresponds to a self-join.
 
     ignore_trivial : bool
         Set to `True` if this is a self-join. Otherwise, for AB-join, set this
@@ -81,7 +82,7 @@ def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True):
 
     n_A = T_A.shape[0]
     n_B = T_B.shape[0]
-    l = n_B - m + 1
+    l = n_A - m + 1
 
     excl_zone = int(np.ceil(m / 4))
     out = np.empty((l, 4), dtype=object)
@@ -90,9 +91,9 @@ def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True):
     nworkers = len(hosts)
 
     if ignore_trivial:
-        diags = np.arange(excl_zone + 1, n_B - m + 1)
+        diags = np.arange(excl_zone + 1, n_A - m + 1)
     else:
-        diags = np.arange(-(n_B - m + 1) + 1, n_A - m + 1)
+        diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1)
 
     ndist_counts = core._count_diagonal_ndist(diags, m, n_A, n_B)
     diags_ranges = core._get_array_ranges(ndist_counts, nworkers)
 
@@ -783,8 +783,8 @@ def mass_absolute(Q, T):
 
 def _get_QT(start, T_A, T_B, m):
     """
-    Compute the sliding dot product between the query, `T_B`, (from
-    [start:start+m]) and the time series, `T_A`. Additionally, compute
+    Compute the sliding dot product between the query, `T_A`, (from
+    [start:start+m]) and the time series, `T_B`. Additionally, compute
     QT for the first window.
 
     Parameters
@@ -796,8 +796,8 @@ def _get_QT(start, T_A, T_B, m):
         The time series or sequence for which to compute the dot product
 
     T_B : ndarray
-        The time series or sequence that contain your query subsequence
-        of interest
+        The time series or sequence that will be used to annotate T_A. For every
+        subsequence in T_A, its nearest neighbor in T_B will be recorded.
 
     m : int
         Window size
@@ -1047,9 +1047,9 @@ def _count_diagonal_ndist(diags, m, n_A, n_B):
     for diag_idx in prange(diags.shape[0]):
         k = diags[diag_idx]
         if k >= 0:
-            diag_ndist_counts[diag_idx] = min(n_A - m + 1 - k, n_B - m + 1)
+            diag_ndist_counts[diag_idx] = min(n_B - m + 1 - k, n_A - m + 1)
         else:
-            diag_ndist_counts[diag_idx] = min(n_A - m + 1, n_B - m + 1 + k)
+            diag_ndist_counts[diag_idx] = min(n_B - m + 1, n_A - m + 1 + k)
 
     return diag_ndist_counts
 
 
@@ -50,8 +50,8 @@ def _compute_and_update_PI_kernel(
         The time series or sequence for which to compute the dot product
 
     T_B : ndarray
-        The time series or sequence that contain your query subsequence
-        of interest
+        The time series or sequence that will be used to annotate T_A. For every
+        subsequence in T_A, its nearest neighbor in T_B will be recorded.
 
     m : int
         Window size
@@ -196,8 +196,8 @@ def _gpu_aamp(
         the matrix profile
 
     T_B_fname : str
-        The file name for the time series or sequence that contain your
-        query subsequences of interest
+        The file name for the time series or sequence that will be used to annotate T_A.
+        For every subsequence in T_A, its nearest neighbor in T_B will be recorded.
 
     m : int
         Window size
@@ -413,12 +413,6 @@ def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0):
         T_B = T_A
         ignore_trivial = True
 
-    # Swap T_A and T_B for GPU implementation
-    # This keeps the API identical to and compatible with `stumpy.stump`
-    tmp_T = T_A
-    T_A = T_B
-    T_B = tmp_T
-
     T_A, T_A_subseq_isfinite = core.preprocess_non_normalized(T_A, m)
     T_B, T_B_subseq_isfinite = core.preprocess_non_normalized(T_B, m)
 
 
@@ -49,8 +49,8 @@ def _compute_and_update_PI_kernel(
         The time series or sequence for which to compute the dot product
 
     T_B : ndarray
-        The time series or sequence that contain your query subsequence
-        of interest
+        The time series or sequence that will be used to annotate T_A. For every
+        subsequence in T_A, its nearest neighbor in T_B will be recorded.
 
     m : int
         Window size
@@ -197,8 +197,8 @@ def _gpu_stump(
         the matrix profile
 
     T_B_fname : str
-        The file name for the time series or sequence that contain your
-        query subsequences of interest
+        The file name for the time series or sequence that will be used to annotate T_A.
+        For every subsequence in T_A, its nearest neighbor in T_B will be recorded.
 
     m : int
         Window size
@@ -266,12 +266,12 @@ def _gpu_stump(
 
     See Table II, Figure 5, and Figure 6
 
-    Timeseries, T_B, will be annotated with the distance location
-    (or index) of all its subsequences in another times series, T_A.
+    Timeseries, T_A, will be annotated with the distance location
+    (or index) of all its subsequences in another times series, T_B.
 
-    Return: For every subsequence, Q, in T_B, you will get a distance
+    Return: For every subsequence, Q, in T_A, you will get a distance
     and index for the closest subsequence in T_A. Thus, the array
-    returned will have length T_B.shape[0]-m+1. Additionally, the
+    returned will have length T_A.shape[0]-m+1. Additionally, the
     left and right matrix profiles are also returned.
 
     Note: Unlike in the Table II where T_A.shape is expected to be equal
@@ -387,8 +387,9 @@ def gpu_stump(T_A, m, T_B=None, ignore_trivial=True, device_id=0):
         Window size
 
     T_B : (optional) ndarray
-        The time series or sequence that contain your query subsequences
-        of interest. Default is `None` which corresponds to a self-join.
+        The time series or sequence that will be used to annotate T_A. For every
+        subsequence in T_A, its nearest neighbor in T_B will be recorded. Default is
+        `None` which corresponds to a self-join.
 
     ignore_trivial : bool
         Set to `True` if this is a self-join. Otherwise, for AB-join, set this
@@ -415,12 +416,12 @@ def gpu_stump(T_A, m, T_B=None, ignore_trivial=True, device_id=0):
 
     See Table II, Figure 5, and Figure 6
 
-    Timeseries, T_B, will be annotated with the distance location
-    (or index) of all its subsequences in another times series, T_A.
+    Timeseries, T_A, will be annotated with the distance location
+    (or index) of all its subsequences in another times series, T_B.
 
-    Return: For every subsequence, Q, in T_B, you will get a distance
-    and index for the closest subsequence in T_A. Thus, the array
-    returned will have length T_B.shape[0]-m+1. Additionally, the
+    Return: For every subsequence, Q, in T_A, you will get a distance
+    and index for the closest subsequence in T_B. Thus, the array
+    returned will have length T_A.shape[0]-m+1. Additionally, the
     left and right matrix profiles are also returned.
 
     Note: Unlike in the Table II where T_A.shape is expected to be equal
@@ -440,12 +441,6 @@ def gpu_stump(T_A, m, T_B=None, ignore_trivial=True, device_id=0):
         T_B = T_A
         ignore_trivial = True
 
-    # Swap T_A and T_B for GPU implementation
-    # This keeps the API identical to and compatible with `stumpy.stump`
-    tmp_T = T_A
-    T_A = T_B
-    T_B = tmp_T
-
     T_A, M_T, Σ_T = core.preprocess(T_A, m)
     T_B, μ_Q, σ_Q = core.preprocess(T_B, m)