Skip to content

Commit 173299c

Browse files
committed
Added MDL building blocks
1 parent 525ae29 commit 173299c

1 file changed

Lines changed: 55 additions & 3 deletions

File tree

stumpy/mstump.py

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
import logging
66

77
import numpy as np
8+
from scipy.stats import norm
89
from numba import njit, prange
10+
from functools import lru_cache
911

1012
from . import core
1113

@@ -148,6 +150,48 @@ def _multi_mass(Q, T, m, M_T, Σ_T, μ_Q, σ_Q):
148150
return D
149151

150152

153+
@lru_cache()
154+
def _inverse_norm(n_bit=8):
155+
"""
156+
Generate bin edges from an inverse normal distribution
157+
158+
Parameters
159+
----------
160+
n_bit : int, default 8
161+
The number of bits to be used in generating the inverse normal distribution
162+
163+
Returns
164+
-------
165+
out : ndarray
166+
Array of bin edges that can be used for data discretization
167+
"""
168+
return norm.ppf(np.arange(1, (2 ** n_bit)) / (2 ** n_bit))
169+
170+
171+
def _discretize(a, bins, right=True):
172+
"""
173+
Discretize each row of the input array
174+
175+
Parameters
176+
----------
177+
a : ndarray
178+
The input array
179+
180+
bins : ndarray
181+
The bin edges used to discretize `a`
182+
183+
right : bool, default True
184+
Indicates whether the intervals for binning include the right or the left bin
185+
edge.
186+
187+
Returns
188+
-------
189+
out : ndarray
190+
Discretized array
191+
"""
192+
return np.digitize(a, bins, right=right)
193+
194+
151195
def _get_subspace(T, m, motif_idx, nn_idx, k, include=None, discords=False):
152196
"""
153197
Compute the multi-dimensional matrix profile subspace for a given motif index and
@@ -192,9 +236,9 @@ def _get_subspace(T, m, motif_idx, nn_idx, k, include=None, discords=False):
192236
"""
193237
T, _, _ = core.preprocess(T, m)
194238

195-
motif = core.z_norm(T[:, motif_idx : motif_idx + m], axis=1)
196-
neighbor = core.z_norm(T[:, nn_idx : nn_idx + m], axis=1)
197-
D = np.linalg.norm(motif - neighbor, axis=1)
239+
motifs = core.z_norm(T[:, motif_idx : motif_idx + m], axis=1)
240+
neighbors = core.z_norm(T[:, nn_idx : nn_idx + m], axis=1)
241+
D = np.linalg.norm(motifs - neighbors, axis=1)
198242

199243
if discords:
200244
sorted_idx = D[::-1].argsort(axis=0, kind="mergesort")
@@ -214,6 +258,14 @@ def _get_subspace(T, m, motif_idx, nn_idx, k, include=None, discords=False):
214258

215259
S = sorted_idx[: k + 1]
216260

261+
n_bit = 8
262+
bins = _inverse_norm()
263+
disc_motifs = _discretize(motifs[S], bins)
264+
disc_neighbors = _discretize(neighbors[S], bins)
265+
n_val = np.unique(disc_motifs - disc_neighbors).shape[0]
266+
bit_size = n_bit * (T.shape[0] * m * 2 - k * m)
267+
bit_size = bit_size + k * m * np.log2(n_val) + n_val * n_bit
268+
217269
return S
218270

219271

0 commit comments

Comments
 (0)