Skip to content

Commit b8688e1

Browse files
Added docstrings and meaningful error messages to estimate_components
1 parent 49020d2 commit b8688e1

1 file changed

Lines changed: 31 additions & 2 deletions

File tree

turftopic/models/cluster.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from sentence_transformers import SentenceTransformer
88
from sklearn.base import ClusterMixin, TransformerMixin
99
from sklearn.cluster import OPTICS, AgglomerativeClustering
10+
from sklearn.exceptions import NotFittedError
1011
from sklearn.feature_extraction.text import CountVectorizer
1112
from sklearn.manifold import TSNE
1213
from sklearn.metrics.pairwise import cosine_distances
@@ -300,7 +301,7 @@ def estimate_components(
300301
feature_importance: Literal[
301302
"centroid", "soft-c-tf-idf", "bayes", "c-tf-idf"
302303
],
303-
) -> np.array:
304+
) -> np.ndarray:
304305
"""Estimates feature importances based on a fitted clustering.
305306
306307
Parameters
@@ -319,6 +320,10 @@ def estimate_components(
319320
ndarray of shape (n_components, n_vocab)
320321
Topic-term matrix.
321322
"""
323+
if getattr(self, "labels_", None) is None:
324+
raise NotFittedError(
325+
"The model has not been fitted yet, please fit the model before estimating temporal components."
326+
)
322327
clusters = np.unique(self.labels_)
323328
self.classes_ = np.sort(clusters)
324329
self.topic_sizes_ = np.array(
@@ -422,8 +427,31 @@ def estimate_temporal_components(
422427
feature_importance: Literal[
423428
"c-tf-idf", "soft-c-tf-idf", "centroid", "bayes"
424429
],
425-
):
430+
) -> np.ndarray:
431+
"""Estimates temporal components based on a fitted topic model.
432+
433+
Parameters
434+
----------
435+
feature_importance: {'soft-c-tf-idf', 'c-tf-idf', 'bayes', 'centroid'}, default 'soft-c-tf-idf'
436+
Method for estimating term importances.
437+
'centroid' uses distances from cluster centroid similarly
438+
to Top2Vec.
439+
'c-tf-idf' uses BERTopic's c-tf-idf.
440+
'soft-c-tf-idf' uses Soft c-TF-IDF from GMM, the results should
441+
be very similar to 'c-tf-idf'.
442+
'bayes' uses Bayes' rule.
443+
444+
Returns
445+
-------
446+
ndarray of shape (n_time_bins, n_components, n_vocab)
447+
Temporal topic-term matrix.
448+
"""
449+
if getattr(self, "components_", None) is None:
450+
raise NotFittedError(
451+
"The model has not been fitted yet, please fit the model before estimating temporal components."
452+
)
426453
n_comp, n_vocab = self.components_.shape
454+
self.time_bin_edges = time_bin_edges
427455
n_bins = len(self.time_bin_edges) - 1
428456
self.temporal_components_ = np.full(
429457
(n_bins, n_comp, n_vocab),
@@ -464,6 +492,7 @@ def estimate_temporal_components(
464492
mask_terms = np.squeeze(np.asarray(mask_terms))
465493
components[:, mask_terms == 0] = np.nan
466494
self.temporal_components_[i_timebin] = components
495+
return self.temporal_components_
467496

468497
def fit_transform_dynamic(
469498
self,

0 commit comments

Comments
 (0)