@inproceedings{golovneva-etal-2022-task,
title = "Task-driven augmented data evaluation",
author = "Golovneva, Olga and
Wei, Pan and
Abboud, Khadige and
Peris, Charith and
Tan, Lizhen and
Yu, Haiyang",
editor = "Bosselut, Antoine and
Chandu, Khyathi and
Dhole, Kaustubh and
Gangal, Varun and
Gehrmann, Sebastian and
Jernite, Yacine and
Novikova, Jekaterina and
Perez-Beltrachini, Laura",
booktitle = "Proceedings of the 2nd Workshop on Natural Language Generation, Evaluation, and Metrics (GEM)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.gem-1.2/",
doi = "10.18653/v1/2022.gem-1.2",
pages = "18--25",
abstract = "In the area of data augmentation research, the main focus to date has been on the improvement of the generation models, while the examination and improvements to synthetic data evaluation methods remains less explored. In our work, we explore a number of sentence similarity measures in the context of data generation filtering, and evaluate their impact on the performance of the targeted Natural Language Understanding problem on the example of the intent classification and named entity recognition tasks. Our experiments on ATIS dataset show that the right choice of filtering technique can bring up to 33{\%} in sentence accuracy improvement for targeted underrepresented intents."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="golovneva-etal-2022-task">
<titleInfo>
<title>Task-driven augmented data evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Olga</namePart>
<namePart type="family">Golovneva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pan</namePart>
<namePart type="family">Wei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khadige</namePart>
<namePart type="family">Abboud</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Charith</namePart>
<namePart type="family">Peris</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lizhen</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haiyang</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Natural Language Generation, Evaluation, and Metrics (GEM)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Bosselut</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khyathi</namePart>
<namePart type="family">Chandu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaustubh</namePart>
<namePart type="family">Dhole</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Varun</namePart>
<namePart type="family">Gangal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Gehrmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yacine</namePart>
<namePart type="family">Jernite</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jekaterina</namePart>
<namePart type="family">Novikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Perez-Beltrachini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the area of data augmentation research, the main focus to date has been on the improvement of the generation models, while the examination and improvements to synthetic data evaluation methods remains less explored. In our work, we explore a number of sentence similarity measures in the context of data generation filtering, and evaluate their impact on the performance of the targeted Natural Language Understanding problem on the example of the intent classification and named entity recognition tasks. Our experiments on ATIS dataset show that the right choice of filtering technique can bring up to 33% in sentence accuracy improvement for targeted underrepresented intents.</abstract>
<identifier type="citekey">golovneva-etal-2022-task</identifier>
<identifier type="doi">10.18653/v1/2022.gem-1.2</identifier>
<location>
<url>https://aclanthology.org/2022.gem-1.2/</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>18</start>
<end>25</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Task-driven augmented data evaluation
%A Golovneva, Olga
%A Wei, Pan
%A Abboud, Khadige
%A Peris, Charith
%A Tan, Lizhen
%A Yu, Haiyang
%Y Bosselut, Antoine
%Y Chandu, Khyathi
%Y Dhole, Kaustubh
%Y Gangal, Varun
%Y Gehrmann, Sebastian
%Y Jernite, Yacine
%Y Novikova, Jekaterina
%Y Perez-Beltrachini, Laura
%S Proceedings of the 2nd Workshop on Natural Language Generation, Evaluation, and Metrics (GEM)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F golovneva-etal-2022-task
%X In the area of data augmentation research, the main focus to date has been on the improvement of the generation models, while the examination and improvements to synthetic data evaluation methods remains less explored. In our work, we explore a number of sentence similarity measures in the context of data generation filtering, and evaluate their impact on the performance of the targeted Natural Language Understanding problem on the example of the intent classification and named entity recognition tasks. Our experiments on ATIS dataset show that the right choice of filtering technique can bring up to 33% in sentence accuracy improvement for targeted underrepresented intents.
%R 10.18653/v1/2022.gem-1.2
%U https://aclanthology.org/2022.gem-1.2/
%U https://doi.org/10.18653/v1/2022.gem-1.2
%P 18-25
Markdown (Informal)
[Task-driven augmented data evaluation](https://aclanthology.org/2022.gem-1.2/) (Golovneva et al., GEM 2022)
ACL
- Olga Golovneva, Pan Wei, Khadige Abboud, Charith Peris, Lizhen Tan, and Haiyang Yu. 2022. Task-driven augmented data evaluation. In Proceedings of the 2nd Workshop on Natural Language Generation, Evaluation, and Metrics (GEM), pages 18–25, Abu Dhabi, United Arab Emirates (Hybrid). Association for Computational Linguistics.