Added docstrings to Topeax

x-tabdeveloping · x-tabdeveloping · commit 1587e775d997 · 2025-10-30T14:09:30.000+01:00
diff --git a/turftopic/models/topeax.py b/turftopic/models/topeax.py
@@ -1,8 +1,11 @@
 from typing import Optional, Union
 
 import numpy as np
-from scipy.ndimage.filters import maximum_filter
-from scipy.ndimage.morphology import binary_erosion, generate_binary_structure
+from scipy.ndimage import (
+    binary_erosion,
+    generate_binary_structure,
+    maximum_filter,
+)
 from scipy.stats import gaussian_kde
 from sklearn.base import BaseEstimator, ClusterMixin
 from sklearn.feature_extraction.text import CountVectorizer
@@ -19,11 +22,6 @@
 from turftopic.models.gmm import GMM, LexicalWordImportance
 
 
-def minmax(a):
-    min_a = np.min(a)
-    return (a - min_a) / (np.max(a) - min_a)
-
-
 def detect_peaks(image):
     # define an 8-connected neighborhood
     neighborhood = generate_binary_structure(2, 25)
@@ -58,6 +56,14 @@ def _m_step(self, X, log_resp):
 
 
 class Peax(ClusterMixin, BaseEstimator):
+    """Clustering model based on density peaks.
+
+    Parameters
+    ----------
+    random_state: int, default None
+        Random seed to use for fitting gaussian mixture to peaks.
+    """
+
     def __init__(self, random_state: Optional[int] = None):
         self.random_state = random_state
 
@@ -120,6 +126,23 @@ def score(self, X):
 
 
 class Topeax(GMM):
+    """Topic model based on the Peax clustering algorithm.
+    The algorithm discovers the number of topics automatically, and is based on GMM.
+
+    Parameters
+    ----------
+    encoder: str or SentenceTransformer
+        Model to encode documents/terms, all-MiniLM-L6-v2 is the default.
+    vectorizer: CountVectorizer, default None
+        Vectorizer used for term extraction.
+        Can be used to prune or filter the vocabulary.
+    perplexity: int, default 50
+        Number of neighbours to take into account when running TSNE.
+    random_state: int, default None
+        Random state to use so that results are exactly reproducible.
+
+    """
+
     def __init__(
         self,
         encoder: Union[