@inproceedings{87c57ce7bb3e43ef9f5f2f71cf9bae55,
title = "Chinese new word extraction from MicroBlog data",
abstract = "Chinese new word extraction is an important task in Chinese natural language processing and MicroBlog has become a main place of new words' creation and dissemination. Although many effective methods have been proposed, there is a lack of research on Internet texts especially MicroBlog texts. In this paper, we study the MicroBlog-oriented method for new word extraction. Firstly we analyze the performance of classical statistical measures in extracting new words from MicroBlog texts. Secondly we base our work on Branch Entropy. For the shortcomings of statistical measures and the characteristics of MicroBlog texts, we propose a modified method. Experimental result demonstrates that our method is feasible and effective. Lastly, we show four types of new words extracted from MicroBlog.",
keywords = "Branch entropy, MicroBlog, Natural language processing, New word extraction, Statistical measure",
author = "Su, \{Qi Long\} and Liu, \{Bing Quan\}",
note = "Publisher Copyright: {\textcopyright} 2013 IEEE.; 12th International Conference on Machine Learning and Cybernetics, ICMLC 2013 ; Conference date: 14-07-2013 Through 17-07-2013",
year = "2013",
doi = "10.1109/ICMLC.2013.6890901",
language = "英语",
series = "Proceedings - International Conference on Machine Learning and Cybernetics",
publisher = "IEEE Computer Society",
pages = "1874--1879",
booktitle = "Proceedings - International Conference on Machine Learning and Cybernetics",
address = "美国",
}