|
1 | | -import json |
2 | 1 | from datetime import datetime |
3 | | -from typing import Iterable, Optional, Union |
| 2 | +from typing import Optional, Union |
4 | 3 |
|
5 | 4 | import numpy as np |
6 | 5 | from rich.console import Console |
|
14 | 13 | from turftopic.models._keynmf import KeywordExtractor, KeywordNMF |
15 | 14 |
|
16 | 15 |
|
17 | | -def serialize_keywords(keywords: dict[str, float]) -> str: |
18 | | - return json.dumps( |
19 | | - {word: str(importance) for word, importance in keywords.items()} |
20 | | - ) |
21 | | - |
22 | | - |
23 | | -def deserialize_keywords(s: str) -> dict[str, float]: |
24 | | - obj = json.loads(s) |
25 | | - return {word: float(importance) for word, importance in obj.items()} |
26 | | - |
27 | | - |
28 | | -class KeywordIterator: |
29 | | - def __init__(self, file: str): |
30 | | - self.file = file |
31 | | - |
32 | | - def __iter__(self) -> Iterable[dict[str, float]]: |
33 | | - with open(self.file) as in_file: |
34 | | - for line in in_file: |
35 | | - yield deserialize_keywords(line.strip()) |
36 | | - |
37 | | - |
38 | 16 | class KeyNMF(ContextualModel, DynamicTopicModel): |
39 | 17 | """Extracts keywords from documents based on semantic similarity of |
40 | 18 | term encodings to document encodings. |
@@ -183,6 +161,17 @@ def partial_fit( |
183 | 161 | embeddings: Optional[np.ndarray] = None, |
184 | 162 | keywords: Optional[list[dict[str, float]]] = None, |
185 | 163 | ): |
| 164 | + """Online fits KeyNMF on a batch of documents. |
| 165 | +
|
| 166 | + Parameters |
| 167 | + ---------- |
| 168 | + raw_documents: iterable of str |
| 169 | + Documents to fit the model on. |
| 170 | + embeddings: ndarray of shape (n_documents, n_dimensions), optional |
| 171 | + Precomputed document encodings. |
| 172 | + keywords: list[dict[str, float]], optional |
| 173 | + Precomputed keyword dictionaries. |
| 174 | + """ |
186 | 175 | if keywords is None and raw_documents is None: |
187 | 176 | raise ValueError( |
188 | 177 | "You have to pass either keywords or raw_documents." |
@@ -279,6 +268,21 @@ def partial_fit_dynamic( |
279 | 268 | keywords: Optional[list[dict[str, float]]] = None, |
280 | 269 | bins: Union[int, list[datetime]] = 10, |
281 | 270 | ): |
| 271 | + """Online fits Dynamic KeyNMF on a batch of documents. |
| 272 | +
|
| 273 | + Parameters |
| 274 | + ---------- |
| 275 | + raw_documents: iterable of str |
| 276 | + Documents to fit the model on. |
| 277 | + embeddings: ndarray of shape (n_documents, n_dimensions), optional |
| 278 | + Precomputed document encodings. |
| 279 | + keywords: list[dict[str, float]], optional |
| 280 | + Precomputed keyword dictionaries. |
| 281 | + timestamps: list[datetime], optional |
| 282 | + List of timestamps for the batch. |
| 283 | + bins: list[datetime] |
| 284 | + Explicit time bin edges for the dynamic model. |
| 285 | + """ |
282 | 286 | if timestamps is None: |
283 | 287 | raise TypeError( |
284 | 288 | "You have to pass timestamps when fitting a dynamic model." |
|
0 commit comments