55import logging
66
77import numpy as np
8+ from scipy .stats import norm
89from numba import njit , prange
10+ from functools import lru_cache
911
1012from . import core
1113
@@ -148,6 +150,48 @@ def _multi_mass(Q, T, m, M_T, Σ_T, μ_Q, σ_Q):
148150 return D
149151
150152
153+ @lru_cache ()
154+ def _inverse_norm (n_bit = 8 ):
155+ """
156+ Generate bin edges from an inverse normal distribution
157+
158+ Parameters
159+ ----------
160+ n_bit : int, default 8
161+ The number of bits to be used in generating the inverse normal distribution
162+
163+ Returns
164+ -------
165+ out : ndarray
166+ Array of bin edges that can be used for data discretization
167+ """
168+ return norm .ppf (np .arange (1 , (2 ** n_bit )) / (2 ** n_bit ))
169+
170+
171+ def _discretize (a , bins , right = True ):
172+ """
173+ Discretize each row of the input array
174+
175+ Parameters
176+ ----------
177+ a : ndarray
178+ The input array
179+
180+ bins : ndarray
181+ The bin edges used to discretize `a`
182+
183+ right : bool, default True
184+ Indicates whether the intervals for binning include the right or the left bin
185+ edge.
186+
187+ Returns
188+ -------
189+ out : ndarray
190+ Discretized array
191+ """
192+ return np .digitize (a , bins , right = right )
193+
194+
151195def _get_subspace (T , m , motif_idx , nn_idx , k , include = None , discords = False ):
152196 """
153197 Compute the multi-dimensional matrix profile subspace for a given motif index and
@@ -192,9 +236,9 @@ def _get_subspace(T, m, motif_idx, nn_idx, k, include=None, discords=False):
192236 """
193237 T , _ , _ = core .preprocess (T , m )
194238
195- motif = core .z_norm (T [:, motif_idx : motif_idx + m ], axis = 1 )
196- neighbor = core .z_norm (T [:, nn_idx : nn_idx + m ], axis = 1 )
197- D = np .linalg .norm (motif - neighbor , axis = 1 )
239+ motifs = core .z_norm (T [:, motif_idx : motif_idx + m ], axis = 1 )
240+ neighbors = core .z_norm (T [:, nn_idx : nn_idx + m ], axis = 1 )
241+ D = np .linalg .norm (motifs - neighbors , axis = 1 )
198242
199243 if discords :
200244 sorted_idx = D [::- 1 ].argsort (axis = 0 , kind = "mergesort" )
@@ -214,6 +258,14 @@ def _get_subspace(T, m, motif_idx, nn_idx, k, include=None, discords=False):
214258
215259 S = sorted_idx [: k + 1 ]
216260
261+ n_bit = 8
262+ bins = _inverse_norm ()
263+ disc_motifs = _discretize (motifs [S ], bins )
264+ disc_neighbors = _discretize (neighbors [S ], bins )
265+ n_val = np .unique (disc_motifs - disc_neighbors ).shape [0 ]
266+ bit_size = n_bit * (T .shape [0 ] * m * 2 - k * m )
267+ bit_size = bit_size + k * m * np .log2 (n_val ) + n_val * n_bit
268+
217269 return S
218270
219271
0 commit comments