@inproceedings{287cd3aa3aba445f88c5c2968fc05342,
title = "Topic detection by topic model induced distance using biased initiation",
abstract = "Clustering is widely used in topic detection task. However, the vector space model based distance, such as cosine-like distance, will get a low precision and recall when the corpus contains many related topics. In this paper, we propose a new distance measure method: the Topic Model (TM) induced distance. Assuming that the distribution of word is different in each topic, the documents can be treated as a sample of the mixture of k topic models, which can be estimated using expectation maximization (EM). A biased initiation method is proposed in this paper for topic decomposition using EM, which will generate a converged matrix for the generation of TM induced distance. The collections of web news are clustered into classes using this TM distance. A series of experiments are described on a corpus containing 5033 web news from 30 topics. K-means clustering is processed on test set with different topic numbers. A comparison of clustering result using the TM induced distance and the traditional cosine-like distance are given. The experiment results show that the proposed topic decomposition method using biased initiation is effective than the topic decomposition using random values. The TM induced distance will generate more topical groups than the VS model based cosine-like distance. In the web news collections containing related topics, the TM induced distance can achieve a better precision and recall.",
keywords = "Topic detection, clustering, distance measure, topic model",
author = "Yonghui Wu and Yuxin Ding and Xiaolong Wang and Jun Xu",
year = "2010",
doi = "10.1007/978-3-642-13577-4\_27",
language = "英语",
isbn = "3642135765",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
pages = "310--323",
booktitle = "Advances in Computer Science and Information Technology - AST/UCMA/ISA/ACN 2010 Conferences, Joint Proceedings",
note = "2nd International Conference on Advanced Science and Technology ; Conference date: 23-06-2010 Through 25-06-2010",
}