@inproceedings{8f121d1e204c4910a1b075f138add4d4,
title = "Bootstrapped Representation Learning for Skeleton-Based Action Recognition",
abstract = "In this work, we study self-supervised representation learning for 3D skeleton-based action recognition. We extend Bootstrap Your Own Latent (BYOL) for representation learning on skeleton sequence data and propose a new data augmentation strategy including two asymmetric transformation pipelines. We also introduce a multi-viewpoint sampling method that leverages multiple viewing angles of the same action captured by different cameras. In the semi-supervised setting, we show that the performance can be further improved by knowledge distillation from wider networks, leveraging once more the unlabeled samples. We conduct extensive experiments on the NTU-60, NTU-120 and PKU-MMD datasets to demonstrate the performance of our proposed method. Our method consistently outperforms the current state of the art on linear evaluation, semi-supervised and transfer learning benchmarks.",
author = "Olivier Moliner and Sangxia Huang and Kalle Astrom",
year = "2022",
doi = "10.1109/CVPRW56347.2022.00460",
language = "English",
series = "IEEE Computer Society Conference on Computer Vision and Pattern Recognition Workshops",
publisher = "IEEE Computer Society",
pages = "4153--4163",
booktitle = "Proceedings - 2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops, CVPRW 2022",
address = "United States",
note = "2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops, CVPRW 2022 ; Conference date: 19-06-2022 Through 20-06-2022",
}