@inproceedings{b0d55a5a6bf146b8b2b78d0ef98db4f8,
title = "A Chinese anti-spam filter approach based on Support Vector Machine",
abstract = "This paper presents an anti-spam filter approach based on Support Vector Machine (SVM). Firstly, we adopt the tri-gram language model to perform word segmentation in the Chinese Email. In order to overcome the sparse data problem, the Absolute Discount Smoothing algorithm is applied. Secondly, the different factoid words are identified by the Automaton Machine, so as to acquire the approximate syntactic and semantic usage of factoid words in the anti-spam filter task. Thirdly, we apply Support Vector Machine to filter the spam, where the Emails are permitted tobe written by the cross language, including Chinese and English. The experiments in the large-scale corpora with the cross language show that the SVM can improve the generalization than the Na{\"i}ve Bayes (Smoothed by Lidstone algorithm) by 4.09\% precision, and 8.18\% higher precision than the Maximum Entropy Model.",
keywords = "Anti-spam filter, Maximum Entropy, Na{\"i}ve Bayes, Support Vector Machine",
author = "Pang, \{Xiu Li\} and Eeng, \{Yu Qiang\} and Wei Jiang",
year = "2007",
doi = "10.1109/ICMSE.2007.4421831",
language = "英语",
isbn = "9787883580805",
series = "Proceedings of 2007 International Conference on Management Science and Engineering, ICMSE'07 (14th)",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "97--102",
booktitle = "Proceedings of 2007 International Conference on Management Science and Engineering, ICMSE'07 (14th)",
address = "美国",
note = "2007 International Conference on Management Science and Engineering, ICMSE'07 ; Conference date: 20-08-2007 Through 22-08-2007",
}