You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: paper.bib
+31-28Lines changed: 31 additions & 28 deletions
Original file line number
Diff line number
Diff line change
@@ -51,6 +51,8 @@ @inproceedings{topmost
51
51
abstract = "Topic models have a rich history with various applications and have recently been reinvigorated by neural topic modeling. However, these numerous topic models adopt totally distinct datasets, implementations, and evaluations. This impedes quick utilization and fair comparisons, and thereby hinders their research progress and applications. To tackle this challenge, we in this paper propose a Topic Modeling System Toolkit (TopMost). Compared to existing toolkits, TopMost stands out by supporting more extensive features. It covers a broader spectrum of topic modeling scenarios with their complete lifecycles, including datasets, preprocessing, models, training, and evaluations. Thanks to its highly cohesive and decoupled modular design, TopMost enables rapid utilization, fair comparisons, and flexible extensions of diverse cutting-edge topic models. Our code, tutorials, and documentation are available at https://github.com/bobxwu/topmost."
52
52
}
53
53
54
+
@article{quantitative_text_analysis, title={Quantitative text analysis}, volume={4}, url={https://www.nature.com/articles/s43586-024-00302-w#citeas}, DOI={10.1038/s43586-024-00302-w}, number={1}, journal={Nature Reviews Methods Primers}, author={Nielbo, Kristoffer L. and Karsdorp, Folgert and Wevers, Melvin and Lassche, Alie and Baglini, Rebekah B. and Kestemont, Mike and Tahmasebi, Nina}, year={2024}, month=apr }
55
+
54
56
@inproceedings{stream,
55
57
title = "{STREAM}: Simplified Topic Retrieval, Exploration, and Analysis Module",
56
58
author = {Thielmann, Anton and
@@ -125,12 +127,12 @@ @misc{top2vec
125
127
primaryClass={cs.CL}
126
128
}
127
129
128
-
@inproceedings{prodlda,
129
-
title={Autoencoding Variational Inference For Topic Models},
130
-
author={Akash Srivastava and Charles Sutton},
131
-
booktitle={International Conference on Learning Representations},
title = "Sentence-{BERT}: Sentence Embeddings using {S}iamese {BERT}-Networks",
194
+
author = "Reimers, Nils and
195
+
Gurevych, Iryna",
196
+
editor = "Inui, Kentaro and
197
+
Jiang, Jing and
198
+
Ng, Vincent and
199
+
Wan, Xiaojun",
200
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
201
+
month = nov,
202
+
year = "2019",
203
+
address = "Hong Kong, China",
204
+
publisher = "Association for Computational Linguistics",
205
+
url = "https://aclanthology.org/D19-1410/",
206
+
doi = "10.18653/v1/D19-1410",
207
+
pages = "3982--3992",
208
+
abstract = "BERT (Devlin et al., 2018) and RoBERTa (Liu et al., 2019) has set a new state-of-the-art performance on sentence-pair regression tasks like semantic textual similarity (STS). However, it requires that both sentences are fed into the network, which causes a massive computational overhead: Finding the most similar pair in a collection of 10,000 sentences requires about 50 million inference computations ({\textasciitilde}65 hours) with BERT. The construction of BERT makes it unsuitable for semantic similarity search as well as for unsupervised tasks like clustering. In this publication, we present Sentence-BERT (SBERT), a modification of the pretrained BERT network that use siamese and triplet network structures to derive semantically meaningful sentence embeddings that can be compared using cosine-similarity. This reduces the effort for finding the most similar pair from 65 hours with BERT / RoBERTa to about 5 seconds with SBERT, while maintaining the accuracy from BERT. We evaluate SBERT and SRoBERTa on common STS tasks and transfer learning tasks, where it outperforms other state-of-the-art sentence embeddings methods."
0 commit comments