@inproceedings{77061b6ae0e84ecb8abf8b379842458d,
title = "STARVQA: SPACE-TIME ATTENTION FOR VIDEO QUALITY ASSESSMENT",
abstract = "Transformer based on self-attention mechanism is blooming in computer vision nowadays. However, its application to video quality assessment (VQA) has not been reported. Evaluating the quality of in-the-wild videos is challenging due to the unknown of pristine reference and shooting distortion. This paper presents a novel space-time attention network for the VQA problem, named StarVQA. StarVQA builds a Transformer by alternately concatenating the divided space-time attention. To adapt the Transformer architecture for training, StarVQA designs a vectorized regression loss by encoding the mean opinion score (MOS) to the probability vector and embedding a special vectorized label token as the learnable variable. To capture the long-range spatiotemporal dependencies of a video sequence, StarVQA encodes the space-time position information of each patch to the input of the Transformer. Various experiments are conducted on the de-facto in-the-wild video datasets, including LIVE-VQC, KoNViD-1k, LSVQ, and LSVQ-1080p. Experimental results demonstrate the superiority of StarVQA over the state-of-the-art. The source code is available at https://github.com/GZHUDVL/StarVQA.",
keywords = "Transformer, attention, in-the-wild videos, synthetic distortion, video quality assessment",
author = "Fengchuang Xing and Wang, \{Yuan Gen\} and Hanpin Wang and Leida Li and Guopu Zhu",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 29th IEEE International Conference on Image Processing, ICIP 2022 ; Conference date: 16-10-2022 Through 19-10-2022",
year = "2022",
doi = "10.1109/ICIP46576.2022.9897881",
language = "英语",
series = "Proceedings - International Conference on Image Processing, ICIP",
publisher = "IEEE Computer Society",
pages = "2326--2330",
booktitle = "2022 IEEE International Conference on Image Processing, ICIP 2022 - Proceedings",
address = "美国",
}