Skip to content

Commit 10d3cad

Browse files
Added docstrings, removed keyword serialization
1 parent 0469289 commit 10d3cad

1 file changed

Lines changed: 27 additions & 23 deletions

File tree

turftopic/models/keynmf.py

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
import json
21
from datetime import datetime
3-
from typing import Iterable, Optional, Union
2+
from typing import Optional, Union
43

54
import numpy as np
65
from rich.console import Console
@@ -14,27 +13,6 @@
1413
from turftopic.models._keynmf import KeywordExtractor, KeywordNMF
1514

1615

17-
def serialize_keywords(keywords: dict[str, float]) -> str:
18-
return json.dumps(
19-
{word: str(importance) for word, importance in keywords.items()}
20-
)
21-
22-
23-
def deserialize_keywords(s: str) -> dict[str, float]:
24-
obj = json.loads(s)
25-
return {word: float(importance) for word, importance in obj.items()}
26-
27-
28-
class KeywordIterator:
29-
def __init__(self, file: str):
30-
self.file = file
31-
32-
def __iter__(self) -> Iterable[dict[str, float]]:
33-
with open(self.file) as in_file:
34-
for line in in_file:
35-
yield deserialize_keywords(line.strip())
36-
37-
3816
class KeyNMF(ContextualModel, DynamicTopicModel):
3917
"""Extracts keywords from documents based on semantic similarity of
4018
term encodings to document encodings.
@@ -183,6 +161,17 @@ def partial_fit(
183161
embeddings: Optional[np.ndarray] = None,
184162
keywords: Optional[list[dict[str, float]]] = None,
185163
):
164+
"""Online fits KeyNMF on a batch of documents.
165+
166+
Parameters
167+
----------
168+
raw_documents: iterable of str
169+
Documents to fit the model on.
170+
embeddings: ndarray of shape (n_documents, n_dimensions), optional
171+
Precomputed document encodings.
172+
keywords: list[dict[str, float]], optional
173+
Precomputed keyword dictionaries.
174+
"""
186175
if keywords is None and raw_documents is None:
187176
raise ValueError(
188177
"You have to pass either keywords or raw_documents."
@@ -279,6 +268,21 @@ def partial_fit_dynamic(
279268
keywords: Optional[list[dict[str, float]]] = None,
280269
bins: Union[int, list[datetime]] = 10,
281270
):
271+
"""Online fits Dynamic KeyNMF on a batch of documents.
272+
273+
Parameters
274+
----------
275+
raw_documents: iterable of str
276+
Documents to fit the model on.
277+
embeddings: ndarray of shape (n_documents, n_dimensions), optional
278+
Precomputed document encodings.
279+
keywords: list[dict[str, float]], optional
280+
Precomputed keyword dictionaries.
281+
timestamps: list[datetime], optional
282+
List of timestamps for the batch.
283+
bins: list[datetime]
284+
Explicit time bin edges for the dynamic model.
285+
"""
282286
if timestamps is None:
283287
raise TypeError(
284288
"You have to pass timestamps when fitting a dynamic model."

0 commit comments

Comments
 (0)