MasterBin-IIAU / VisionTransformer

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Awesome Transformer in Vision Awesome

A curated list of vision transformer related resources. Please feel free to pull requests or open an issue to add papers.

Table of Contents

Awesome Surveys

Title Venue BibTeX
A Survey on Visual Transformer ArXiv Bib

Transformer in Vision

Task Reg Det Seg Trk Other
Explanation Image Recoginition Object Detection Image Segmentation Object Tracking other types

You can add a tag for domains which contains several transformer-based works

2021

(Pls follow Time Inverse Ranking)

Title Venue Task Code BibTeX
Tokens-to-Token ViT: Training Vision Transformers from Scratch on ImageNet Arxiv Reg GitHub
Bib


@article{yuan2021tokens,
title={Tokens-to-Token ViT: Training Vision Transformers from Scratch on ImageNet},
author={Yuan, Li and Chen, Yunpeng and Wang, Tao and Yu, Weihao and Shi, Yujun and Tay, Francis EH and Feng, Jiashi and Yan, Shuicheng},
journal={arXiv preprint arXiv:2101.11986},
year={2021}
}


Bottleneck Transformers for Visual Recognition Arxiv Reg GitHub
Bib


@article{srinivas2021bottleneck,
title={Bottleneck Transformers for Visual Recognition},
author={Srinivas, Aravind and Lin, Tsung-Yi and Parmar, Niki and Shlens, Jonathon and Abbeel, Pieter and Vaswani, Ashish},
journal={arXiv preprint arXiv:2101.11605},
year={2021}
}


SSTVOS: Sparse Spatiotemporal Transformers for Video Object Segmentation Arxiv Seg ---
Bib


@article{duke2021sstvos,
title={SSTVOS: Sparse Spatiotemporal Transformers for Video Object Segmentation},
author={Duke, Brendan and Ahmed, Abdalla and Wolf, Christian and Aarabi, Parham and Taylor, Graham W},
journal={arXiv preprint arXiv:2101.08833},
year={2021}
}


TrackFormer: Multi-Object Tracking with Transformers Arxiv Trk ---
Bib


@article{meinhardt2021trackformer,
title={TrackFormer: Multi-Object Tracking with Transformers},
author={Meinhardt, Tim and Kirillov, Alexander and Leal-Taixe, Laura and Feichtenhofer, Christoph},
journal={arXiv preprint arXiv:2101.02702},
year={2021}
}


2020

Title Venue Task Code BibTeX
Training data-efficient image transformers & distillation through attention ArXiv Reg GitHub
Bib


@article{touvron2020training,
title={Training data-efficient image transformers & distillation through attention},
author={Touvron, Hugo and Cord, Matthieu and Douze, Matthijs and Massa, Francisco and Sablayrolles, Alexandre and J{'e}gou, Herv{'e}},
journal={arXiv preprint arXiv:2012.12877},
year={2020}
}


An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale ICLR Reg GitHub
Bib


@article{dosovitskiy2020image,
title={An image is worth 16x16 words: Transformers for image recognition at scale},
author={Dosovitskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and others},
journal={arXiv preprint arXiv:2010.11929},
year={2020}
}


Toward Transformer-Based Object Detection ArXiv Det ---
Bib


@article{beal2020toward,
title={Toward Transformer-Based Object Detection},
author={Beal, Josh and Kim, Eric and Tzeng, Eric and Park, Dong Huk and Zhai, Andrew and Kislyuk, Dmitry},
journal={arXiv preprint arXiv:2012.09958},
year={2020}
}


Rethinking Transformer-based Set Prediction for Object Detection ArXiv Det ---
Bib


@article{sun2020rethinking,
title={Rethinking Transformer-based Set Prediction for Object Detection},
author={Sun, Zhiqing and Cao, Shengcao and Yang, Yiming and Kitani, Kris},
journal={arXiv preprint arXiv:2011.10881},
year={2020}
}


UP-DETR: Unsupervised Pre-training for Object Detection with Transformers ArXiv Det ---
Bib


@article{dai2020up,
title={UP-DETR: Unsupervised Pre-training for Object Detection with Transformers},
author={Dai, Zhigang and Cai, Bolun and Lin, Yugeng and Chen, Junying},
journal={arXiv preprint arXiv:2011.09094},
year={2020}
}


Deformable DETR: Deformable Transformers for End-to-End Object Detection ArXiv Det GitHub
Bib


@article{zhu2020deformable,
title={Deformable DETR: Deformable Transformers for End-to-End Object Detection},
author={Zhu, Xizhou and Su, Weijie and Lu, Lewei and Li, Bin and Wang, Xiaogang and Dai, Jifeng},
journal={arXiv preprint arXiv:2010.04159},
year={2020}
}


End-to-End Object Detection with Transformers ECCV Det GitHub
Bib

article{zhu2020deformable,
title={Deformable DETR: Deformable Transformers for End-to-End Object Detection},
author={Zhu, Xizhou and Su, Weijie and Lu, Lewei and Li, Bin and Wang, Xiaogang and Dai, Jifeng},
journal={arXiv preprint arXiv:2010.04159},
year={2020}
}

Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective with Transformers Arxiv Seg Github
Bib

@article{zheng2020rethinking,
title={Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective with Transformers},
author={Zheng, Sixiao and Lu, Jiachen and Zhao, Hengshuang and Zhu, Xiatian and Luo, Zekun and Wang, Yabiao and Fu, Yanwei and Feng, Jianfeng and Xiang, Tao and Torr, Philip HS and others},
journal={arXiv preprint arXiv:2012.15840},
year={2020}
}

MaX-DeepLab: End-to-End Panoptic Segmentation with Mask Transformers Arxiv Seg ---
Bib

@article{wang2020max,
title={MaX-DeepLab: End-to-End Panoptic Segmentation with Mask Transformers},
author={Wang, Huiyu and Zhu, Yukun and Adam, Hartwig and Yuille, Alan and Chen, Liang-Chieh},
journal={arXiv preprint arXiv:2012.00759},
year={2020}
}

TransTrack: Multiple-Object Tracking with Transformer ArXiv Trk GitHub
Bib


@article{sun2020transtrack,
title={TransTrack: Multiple-Object Tracking with Transformer},
author={Sun, Peize and Jiang, Yi and Zhang, Rufeng and Xie, Enze and Cao, Jinkun and Hu, Xinting and Kong, Tao and Yuan, Zehuan and Wang, Changhu and Luo, Ping},
journal={arXiv preprint arXiv:2012.15460},
year={2020}
}


2012-2019

Title Venue Task Code BibTeX
Attention Is All You Need NeurIPS'17 -- GitHub
Bib

@inproceedings{vaswani2017attention,
title={Attention is all you need},
author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
booktitle={Advances in neural information processing systems},
pages={5998--6008},
year={2017}
}

Awesome vTransformer Libraies

About