@inproceedings{a511cda0a9e64ba8a1c16d89437cd112,
title = "Research on automatic acquisition of domain terms",
abstract = "In order to solve the various issues in natural language processing more precisely, it is important to construct a system for automatic acquisition of domain terms. A method for automatic acquisition of domain terms from raw materials that are not segmented is presented in this paper. The raw domain corpus is pre-processed firstly. Then by using the method of Information Entropy and Log-likelihood ratio, we can extract candidate words automatically, after this we use the open-domain lexicon to preserve domain terms by removing general words. At last, confidence is used to remove the non-meaningful words to improve term acquisition accuracy from domain candidate term set, and the special domain lexicon is constructed finally. The experimental results show that this simple method is efficient in extracting most of the domain terms. The domain terms we extracted have been effectively applied in personalized Chinese word segmentation system.",
keywords = "Automatic term extraction, Domain Terms, Information entropy, Log-likelihood ratio, Natural language processing",
author = "Juan Liu and Liu, \{Yuan Chao\} and Wei Jiang and Wang, \{Xiao Long\}",
year = "2008",
doi = "10.1109/ICMLC.2008.4620926",
language = "英语",
isbn = "9781424420964",
series = "Proceedings of the 7th International Conference on Machine Learning and Cybernetics, ICMLC",
pages = "3026--3031",
booktitle = "Proceedings of the 7th International Conference on Machine Learning and Cybernetics, ICMLC",
note = "7th International Conference on Machine Learning and Cybernetics, ICMLC ; Conference date: 12-07-2008 Through 15-07-2008",
}