stumpy-dev
diff --git a/‎docs/Tutorial_Multidimensional_Motif_Discovery.ipynb‎
Lines changed: 3 additions & 16 deletions b/‎docs/Tutorial_Multidimensional_Motif_Discovery.ipynb‎
Lines changed: 3 additions & 16 deletions
diff --git a/‎stumpy/mstump.py‎
Lines changed: 112 additions & 36 deletions b/‎stumpy/mstump.py‎
Lines changed: 112 additions & 36 deletions
diff --git a/‎tests/naive.py‎
Lines changed: 26 additions & 8 deletions b/‎tests/naive.py‎
Lines changed: 26 additions & 8 deletions
@@ -348,7 +348,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -362,26 +362,13 @@
     }
    ],
    "source": [
-    "from stumpy.mstump import _query_mstump_profile\n",
-    "from stumpy import core\n",
-    "\n",
-    "def subspace(T, m, T_idx, k, include=None, discords=False, M_T=None, Σ_T=None):\n",
-    "    if M_T is None or Σ_T is None:\n",
-    "        T, M_T, Σ_T = stumpy.core.preprocess(T, m)\n",
-    "\n",
-    "    excl_zone = int(np.ceil(m / 4))\n",
-    "\n",
-    "    P, I, S = _query_mstump_profile(\n",
-    "        T_idx, T, T, m, excl_zone, M_T, Σ_T, M_T, Σ_T, include, discords\n",
-    "    )\n",
-    "    \n",
-    "    return S[k]\n",
+    "from stumpy.mstump import _get_subspace\n",
     "\n",
     "for k in range(df.shape[1]):\n",
     "    motif_idx = np.argmin(mp[:, k])\n",
     "    motif_mp_val = np.round(mp[motif_idx, k], 2)\n",
     "    nn_idx = indices[motif_idx, k]\n",
-    "    mp_subspace = subspace(df, m, motif_idx, k)\n",
+    "    mp_subspace = _get_subspace(df, m, motif_idx, nn_idx)[k]\n",
     "    print(f\"k-Dims: {k + 1}, Motif Idx: {motif_idx}, Nearest Neighbor Idx: {nn_idx}, Motif MP Value: {motif_mp_val}, Subspace(s) {mp_subspace}\")"
    ]
   },
 
@@ -12,6 +12,34 @@
 logger = logging.getLogger(__name__)
 
 
+def _preprocess_include(include):
+    """
+    A utility function for processing the `include` input
+
+    Parameters
+    ----------
+    include : ndarray
+        A list of (zero-based) indices corresponding to the dimensions in `T` that
+        must be included in the constrained multidimensional motif search.
+        For more information, see Section IV D in:
+
+        `DOI: 10.1109/ICDM.2017.66 \
+        <https://www.cs.ucr.edu/~eamonn/Motif_Discovery_ICDM.pdf>`__
+
+    Returns
+    -------
+    include : ndarray
+        Process `include` and remove any redundant index values
+    """
+    include = np.asarray(include)
+    _, idx = np.unique(include, return_index=True)
+    if include.shape[0] != idx.shape[0]:  # pragma: no cover
+        logger.warning("Removed repeating indices in `include`")
+        include = include[np.sort(idx)]
+
+    return include
+
+
 def _multi_mass(Q, T, m, M_T, Σ_T, μ_Q, σ_Q):
     """
     A multi-dimensional wrapper around "Mueen's Algorithm for Similarity Search"
@@ -99,6 +127,8 @@ def _apply_include(
     tmp_swap : ndarray, default None
         A reusable array to aid in array element swapping
     """
+    include = _preprocess_include(include)
+
     if restricted_indices is None:
         restricted_indices = include[include < include.shape[0]]
 
@@ -118,27 +148,92 @@ def _apply_include(
     D[unrestricted_indices] = tmp_swap[mask]
 
 
+def _get_subspace(T, m, motif_idx, nn_idx, include=None, discords=False):
+    """
+    Compute the multi-dimensional matrix profile subspace for a given motif index and
+    its nearest neighbor index
+
+    Parameters
+    ----------
+    T : ndarray
+        The time series or sequence for which the multi-dimensional matrix profile,
+        multi-dimensional matrix profile indices were computed
+
+    m : int
+        Window size
+
+    motif_idx : int
+        The motif index in T
+
+    nn_idx : int
+        The nearest neighbor index in T
+
+    include : ndarray, default None
+        A list of (zero-based) indices corresponding to the dimensions in `T` that
+        must be included in the constrained multidimensional motif search.
+        For more information, see Section IV D in:
+
+        `DOI: 10.1109/ICDM.2017.66 \
+        <https://www.cs.ucr.edu/~eamonn/Motif_Discovery_ICDM.pdf>`__
+
+    discords : bool, default False
+        When set to `True`, this reverses the distance profile to favor discords rather
+        than motifs. Note that indices in `include` are still maintained and respected.
+
+    Returns
+    -------
+        S : ndarray
+        A ragged array of ndarrays that contain the multi-dimensional subspace for the
+        window with index equal to `query_idx`. The `len(S)` will be equal to the total
+        number of dimensions, `d`, and where `S[i]` corresponds to the list of subspace
+        indices for the `i+1`th subspace dimension (i.e., `S[1]` corresponds to the
+        subspace with dimension `2` and with `len(S[1]) == 2`).
+    """
+    T, _, _ = core.preprocess(T, m)
+
+    S = np.empty(T.shape[0], dtype=object)
+    D = np.linalg.norm(
+        core.z_norm(T[:, motif_idx : motif_idx + m], axis=1)
+        - core.z_norm(T[:, nn_idx : nn_idx + m], axis=1),
+        axis=1,
+    )
+
+    if include is not None:
+        include = _preprocess_include(include)
+    else:
+        include = []
+
+    if discords:
+        D[include] = np.inf
+        sorted_idx = D[::-1].argsort(axis=0, kind="mergesort")
+    else:
+        D[include] = 0.0
+        sorted_idx = D.argsort(axis=0, kind="mergesort")
+
+    for k in range(T.shape[0]):
+        S[k] = sorted_idx[: k + 1]
+
+    return S
+
+
 def _query_mstump_profile(
     query_idx, T_A, T_B, m, excl_zone, M_T, Σ_T, μ_Q, σ_Q, include=None, discords=False
 ):
     """
-    Multi-dimensional wrapper to compute the multi-dimensional matrix profile,
-    the multi-dimensional matrix profile index, the multi-dimensional matrix profile
-    subspace for a given query window within the times series or sequence that is
-    denoted by the `query_idx` index. Essentially, this is a convenience wrapper around
-    `_multi_mass`.
+    Multi-dimensional wrapper to compute the multi-dimensional matrix profile and
+    the multi-dimensional matrix profile index for a given query window within the times
+    series or sequence that is denoted by the `query_idx` index. Essentially, this is a
+    convenience wrapper around `_multi_mass`.
 
     Parameters
     ----------
     query_idx : int
-        The window index to calculate the first multi-dimensional matrix profile,
-        multi-dimensional matrix profile indices, and multi-dimensional matrix profile
-        subspace.
+        The window index to calculate the first multi-dimensional matrix profile and
+        multi-dimensional matrix profile indices
 
     T_A : ndarray
-        The time series or sequence for which the multi-dimensional matrix profile,
-        multi-dimensional matrix profile indices, and multi-dimensional matrix profile
-        subspace will be returned
+        The time series or sequence for which the multi-dimensional matrix profile and
+        multi-dimensional matrix profile indices
 
     T_B : ndarray
         The time series or sequence that contains your query subsequences
@@ -182,13 +277,6 @@ def _query_mstump_profile(
     I : ndarray
         Multi-dimensional matrix profile indices for the window with index
         equal to `query_idx`
-
-    S : ndarray
-        A ragged array of ndarrays that contain the multi-dimensional subspace for the
-        window with index equal to `query_idx`. The `len(S)` will be equal to the total
-        number of dimensions, `d`, and where `S[i]` corresponds to the list of subspace
-        indices for the `i+1`th subspace dimension (i.e., `S[1]` corresponds to the
-        subspace with dimension `2` and with `len(S[1]) == 2`).
     """
     d, n = T_A.shape
     k = n - m + 1
@@ -204,19 +292,14 @@ def _query_mstump_profile(
     )
 
     if include is not None:
+        include = _preprocess_include(include)
         _apply_include(D, include)
         start_row_idx = include.shape[0]
 
     if discords:
-        # D[start_row_idx:][::-1].sort(axis=0)
-        sorted_idx = D[start_row_idx:][::-1].argsort(axis=0, kind="mergesort")
-        broadcast_idx = np.arange(D[start_row_idx:].shape[1])[np.newaxis, :]
-        D[start_row_idx:][::-1] = D[start_row_idx:][::-1][sorted_idx, broadcast_idx]
+        D[start_row_idx:][::-1].sort(axis=0, kind="mergesort")
     else:
-        # D[start_row_idx:].sort(axis=0)
-        sorted_idx = D[start_row_idx:].argsort(axis=0, kind="mergesort")
-        broadcast_idx = np.arange(D[start_row_idx:].shape[1])[np.newaxis, :]
-        D[start_row_idx:] = D[start_row_idx:][sorted_idx, broadcast_idx]
+        D[start_row_idx:].sort(axis=0, kind="mergesort")
 
     D_prime = np.zeros(k)
     for i in range(d):
@@ -227,18 +310,15 @@ def _query_mstump_profile(
 
     P = np.full(d, np.inf, dtype="float64")
     I = np.full(d, -1, dtype="int64")
-    S = np.empty(d, dtype=object)
 
     for i in range(d):
         min_index = np.argmin(D[i])
         I[i] = min_index
         P[i] = D[i, min_index]
-        S[i] = sorted_idx[: i + 1, min_index]
         if np.isinf(P[i]):  # pragma nocover
             I[i] = -1
-            S[i][:] = -1
 
-    return P, I, S
+    return P, I
 
 
 def _get_first_mstump_profile(
@@ -306,7 +386,7 @@ def _get_first_mstump_profile(
         Multi-dimensional matrix profile indices for the window with index
         equal to `start`
     """
-    P, I, _ = _query_mstump_profile(
+    P, I = _query_mstump_profile(
         start, T_A, T_B, m, excl_zone, M_T, Σ_T, μ_Q, σ_Q, include, discords
     )
     return P, I
@@ -706,11 +786,7 @@ def mstump(T, m, include=None, discords=False):
     core.check_window_size(m)
 
     if include is not None:
-        include = np.asarray(include)
-        _, idx = np.unique(include, return_index=True)
-        if include.shape[0] != idx.shape[0]:  # pragma: no cover
-            logger.warning("Removed repeating indices in `include`")
-            include = include[np.sort(idx)]
+        include = _preprocess_include(include)
 
     d, n = T_B.shape
     k = n - m + 1
 
@@ -2,6 +2,7 @@
 import numpy as np
 from scipy.spatial.distance import cdist
 from stumpy import core
+from stumpy.mstump import _apply_include
 
 
 def z_norm(a, axis=0, threshold=1e-7):
@@ -242,10 +243,6 @@ def mstump(T, m, excl_zone, include=None, discords=False):
 
     P = np.full((d, k), np.inf)
     I = np.ones((d, k), dtype="int64") * -1
-    S = np.empty((d, k), dtype=object)
-    for i in range(d):
-        for j in range(k):
-            S[i, j] = -np.ones(i + 1, dtype=np.int64)
 
     for i in range(k):
         Q = T[:, i : i + m]
@@ -280,14 +277,35 @@ def mstump(T, m, excl_zone, include=None, discords=False):
         P_i, I_i = PI(D_prime_prime, i, excl_zone)
 
         for dim in range(T.shape[0]):
-            for col_idx in range(P.shape[1]):
-                if P[dim, col_idx] > P_i[dim, col_idx]:
-                    S[dim, col_idx] = sorted_idx[: dim + 1, col_idx]
             col_mask = P[dim] > P_i[dim]
             P[dim, col_mask] = P_i[dim, col_mask]
             I[dim, col_mask] = I_i[dim, col_mask]
 
-    return P.T, I.T, S.T
+    return P.T, I.T
+
+
+def subspace(T, m, motif_idx, nn_idx, include=None, discords=False):
+    S = np.empty(T.shape[0], dtype=object)
+    D = distance(
+        z_norm(T[:, motif_idx : motif_idx + m], axis=1),
+        z_norm(T[:, nn_idx : nn_idx + m], axis=1),
+        axis=1,
+    )
+
+    if include is None:
+        include = []
+
+    if discords:
+        D[include] = np.inf
+        sorted_idx = D[::-1].argsort(axis=0, kind="mergesort")
+    else:
+        D[include] = 0.0
+        sorted_idx = D.argsort(axis=0, kind="mergesort")
+
+    for k in range(T.shape[0]):
+        S[k] = sorted_idx[: k + 1]
+
+    return S
 
 
 def get_array_ranges(a, n_chunks, truncate=False):