@inproceedings{b64b54f7bf1c495aa2a4cb197bcd1c16,
title = "A study on corpus content display and IP protection",
abstract = "Corpus has played an important role in most of research fields, especially in natural language processing. Some research demos provided detailed corpus content to highlight the contribution they have made, while overlook the security of corpus. In this paper, we explore content leakage resulted from the content display through a crawler. A website for displaying corpus is selected to be crawled by a simply crawler algorithm with some strategies we present. It is estimated that over 85\% of the corpus can be downloaded, which means a substantial threaten to its IP right. Finally, we discuss the protection measures for content display, and give some valid suggestions for information content protection in technology and law.",
keywords = "Corpus content display, Corpus security, Information content protection",
author = "Jingyi Ma and Muyun Yang and Haoyong Wang and Conghui Zhu and Bing Xu",
note = "Publisher Copyright: {\textcopyright} Springer Nature Singapore Pte Ltd. 2018.; 4th International Conference of Pioneer Computer Scientists, Engineers and Educators, ICPCSEE 2018 ; Conference date: 21-09-2018 Through 23-09-2018",
year = "2018",
doi = "10.1007/978-981-13-2206-8\_10",
language = "英语",
isbn = "9789811322051",
series = "Communications in Computer and Information Science",
publisher = "Springer Verlag",
pages = "108--119",
editor = "Qinglei Zhou and Hongzhi Wang and Wei Xie and Zeguang Lu and Qiguang Miao and Yan Wang",
booktitle = "Data Science - 4th International Conference of Pioneering Computer Scientists, Engineers and Educators, ICPCSEE 2018, Proceedings",
address = "德国",
}