Repo to share data used in the paper "Form2Seq : A Framework for Higher-Order Form Structure Extraction" accepted at EMNLP 2020: Paper link
A part of the dataset is now available here: Data link
Please cite our papers if you use the data in the above link.
{
@inproceedings{aggarwal-etal-2020-form2seq,
title = "{F}orm2{S}eq : A Framework for Higher-Order Form Structure Extraction",
author = "Aggarwal, Milan and
Gupta, Hiresh and
Sarkar, Mausoom and
Krishnamurthy, Balaji",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/2020.emnlp-main.314",
doi = "10.18653/v1/2020.emnlp-main.314",
pages = "3830--3840"
}
{
@inproceedings{sarkar2020document,
title={Document Structure Extraction Using Prior Based High Resolution Hierarchical Semantic Segmentation},
author={Sarkar, Mausoom and Aggarwal, Milan and Jain, Arneh and Gupta, Hiresh and Krishnamurthy, Balaji},
booktitle={European Conference on Computer Vision},
pages={649--666},
year={2020},
organization={Springer}
}
{
@inproceedings{aggarwal2020multi,
title={Multi-Modal Association based Grouping for Form Structure Extraction},
author={Aggarwal, Milan and Sarkar, Mausoom and Gupta, Hiresh and Krishnamurthy, Balaji},
booktitle={The IEEE Winter Conference on Applications of Computer Vision},
pages={2075--2084},
year={2020}
}
This dataset is licensed under a Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International License.