Fixed raw encode calls

x-tabdeveloping · x-tabdeveloping · commit 83fd785464cc · 2026-04-13T12:42:46.000+02:00
diff --git a/turftopic/models/ctm.py b/turftopic/models/ctm.py
@@ -193,7 +193,7 @@ def transform(
             Document-topic matrix.
         """
         if embeddings is None:
-            embeddings = self.encoder_.encode(raw_documents)
+            embeddings = self.encode_documents(raw_documents)
         if self.combined:
             bow = self.vectorizer.fit_transform(raw_documents)
             contextual_embeddings = np.concatenate(
@@ -219,7 +219,7 @@ def fit_transform(
         with console.status("Fitting model") as status:
             if embeddings is None:
                 status.update("Encoding documents")
-                embeddings = self.encoder_.encode(raw_documents)
+                embeddings = self.encode_documents(raw_documents)
                 console.log("Documents encoded.")
             status.update("Extracting terms.")
             document_term_matrix = self.vectorizer.fit_transform(raw_documents)
diff --git a/turftopic/models/cvp.py b/turftopic/models/cvp.py
@@ -62,8 +62,8 @@ def __init__(
         self.classes_ = np.array([name for name in self._seeds])
         self.concept_matrix_ = []
         for _, (positive, negative) in self._seeds.items():
-            positive_emb = self.encoder_.encode(positive)
-            negative_emb = self.encoder_.encode(negative)
+            positive_emb = self.encoder_.encode(list(positive))
+            negative_emb = self.encoder_.encode(list(negative))
             cv = np.mean(positive_emb, axis=0) - np.mean(negative_emb, axis=0)
             self.concept_matrix_.append(cv / np.linalg.norm(cv))
         self.concept_matrix_ = np.stack(self.concept_matrix_)
@@ -92,7 +92,7 @@ def fit_transform(self, raw_documents=None, y=None, embeddings=None):
                 "Either embeddings or raw_documents has to be passed, both are None."
             )
         if embeddings is None:
-            embeddings = self.encoder_.encode(raw_documents)
+            embeddings = self.encoder_.encode(list(raw_documents))
         return embeddings @ self.concept_matrix_.T
 
     def transform(self, raw_documents=None, embeddings=None):
diff --git a/turftopic/models/decomp.py b/turftopic/models/decomp.py
@@ -140,7 +140,7 @@ def fit_transform(
         with console.status("Fitting model") as status:
             if self.embeddings is None:
                 status.update("Encoding documents")
-                self.embeddings = self.encoder_.encode(raw_documents)
+                self.embeddings = self.encode_documents(raw_documents)
                 console.log("Documents encoded.")
             status.update("Decomposing embeddings")
             if isinstance(self.decomposition, FastICA) and (y is not None):
@@ -153,7 +153,7 @@ def fit_transform(
             vocab = self.vectorizer.fit(raw_documents).get_feature_names_out()
             console.log("Term extraction done.")
             status.update("Encoding vocabulary")
-            self.vocab_embeddings = self.encoder_.encode(vocab)
+            self.vocab_embeddings = self.encode_documents(vocab)
             if self.vocab_embeddings.shape[1] != self.embeddings.shape[1]:
                 raise ValueError(
                     NOT_MATCHING_ERROR.format(
@@ -636,7 +636,7 @@ def transform(
             Document-topic matrix.
         """
         if embeddings is None:
-            embeddings = self.encoder_.encode(raw_documents)
+            embeddings = self.encode_documents(raw_documents)
         return self.decomposition.transform(embeddings)
 
     def print_topics(
diff --git a/turftopic/models/gmm.py b/turftopic/models/gmm.py
@@ -206,7 +206,7 @@ def fit_transform(
         with console.status("Fitting model") as status:
             if embeddings is None:
                 status.update("Encoding documents")
-                embeddings = self.encoder_.encode(raw_documents)
+                embeddings = self.encode_documents(raw_documents)
                 console.log("Documents encoded.")
             self.embeddings = embeddings
             status.update("Extracting terms.")
@@ -325,7 +325,7 @@ def transform(
             Document-topic matrix.
         """
         if embeddings is None:
-            embeddings = self.encoder_.encode(raw_documents)
+            embeddings = self.encode_documents(raw_documents)
         if self.dimensionality_reduction is not None:
             embeddings = self.dimensionality_reduction.transform(embeddings)
         return self.gmm_.predict_proba(embeddings)
diff --git a/turftopic/models/senstopic.py b/turftopic/models/senstopic.py
@@ -149,7 +149,7 @@ def fit_transform(
         with console.status("Fitting model") as status:
             if self.embeddings is None:
                 status.update("Encoding documents")
-                self.embeddings = self.encoder_.encode(raw_documents)
+                self.embeddings = self.encode_documents(raw_documents)
                 console.log("Documents encoded.")
             if self.n_components == "auto":
                 status.update("Finding the number of components.")
@@ -177,7 +177,7 @@ def fit_transform(
             console.log("Term extraction done.")
             if getattr(self, "vocab_embeddings", None) is None:
                 status.update("Encoding vocabulary")
-                self.vocab_embeddings = self.encoder_.encode(vocab)
+                self.vocab_embeddings = self.encode_documents(vocab)
             if self.vocab_embeddings.shape[1] != self.embeddings.shape[1]:
                 raise ValueError(
                     NOT_MATCHING_ERROR.format(