33import numpy as np
44from rich .console import Console
55from sentence_transformers import SentenceTransformer
6- from sklearn .decomposition import PCA , FastICA
6+ from sklearn .base import TransformerMixin
7+ from sklearn .decomposition import FastICA
78from sklearn .feature_extraction .text import CountVectorizer
89
910from turftopic .base import ContextualModel , Encoder
@@ -20,33 +21,40 @@ class SemanticSignalSeparation(ContextualModel):
2021
2122 corpus: list[str] = ["some text", "more text", ...]
2223
23- model = SemanticSignalSeparation(10, objective="independence" ).fit(corpus)
24+ model = SemanticSignalSeparation(10).fit(corpus)
2425 model.print_topics()
2526 ```
2627
2728 Parameters
2829 ----------
29- n_components: int
30+ n_components: int, default 10
3031 Number of topics.
3132 encoder: str or SentenceTransformer
3233 Model to encode documents/terms, all-MiniLM-L6-v2 is the default.
3334 vectorizer: CountVectorizer, default None
3435 Vectorizer used for term extraction.
3536 Can be used to prune or filter the vocabulary.
36- objective: 'orthogonality' or 'independence', default 'independence'
37- Indicates what the components should be optimized for.
38- When 'orthogonality', PCA is used to discover components,
39- when 'independence', ICA is used to discover components.
37+ decomposition: TransformerMixin, default None
38+ Custom decomposition method to use.
39+ Can be an instance of FastICA or PCA, or basically any dimensionality
40+ reduction method. Has to have `fit_transform` and `fit` methods.
41+ If not specified, FastICA is used.
42+ max_iter: int, default 200
43+ Maximum number of iterations for ICA.
44+ random_state: int, default None
45+ Random state to use so that results are exactly reproducible.
4046 """
4147
4248 def __init__ (
4349 self ,
44- n_components : int ,
50+ n_components : int = 10 ,
4551 encoder : Union [
4652 Encoder , str
4753 ] = "sentence-transformers/all-MiniLM-L6-v2" ,
4854 vectorizer : Optional [CountVectorizer ] = None ,
49- objective : Literal ["orthogonality" , "independence" ] = "independence" ,
55+ decomposition : Optional [TransformerMixin ] = None ,
56+ max_iter : int = 200 ,
57+ random_state : Optional [int ] = None ,
5058 ):
5159 self .n_components = n_components
5260 self .encoder = encoder
@@ -58,11 +66,14 @@ def __init__(
5866 self .vectorizer = default_vectorizer ()
5967 else :
6068 self .vectorizer = vectorizer
61- self .objective = objective
62- if objective == "independence" :
63- self .decomposition = FastICA (n_components )
69+ self .max_iter = max_iter
70+ self .random_state = random_state
71+ if decomposition is None :
72+ self .decomposition = FastICA (
73+ n_components , max_iter = max_iter , random_state = random_state
74+ )
6475 else :
65- self .decomposition = PCA ( n_components )
76+ self .decomposition = decomposition
6677
6778 def fit_transform (
6879 self , raw_documents , y = None , embeddings : Optional [np .ndarray ] = None
0 commit comments