@inproceedings{oka-etal-2020-incorporating,
title = "Incorporating Noisy Length Constraints into Transformer with Length-aware Positional Encodings",
author = "Oka, Yui and
Chousa, Katsuki and
Sudoh, Katsuhito and
Nakamura, Satoshi",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.319/",
doi = "10.18653/v1/2020.coling-main.319",
pages = "3580--3585",
abstract = "Neural Machine Translation often suffers from an under-translation problem due to its limited modeling of output sequence lengths. In this work, we propose a novel approach to training a Transformer model using length constraints based on length-aware positional encoding (PE). Since length constraints with exact target sentence lengths degrade translation performance, we add random noise within a certain window size to the length constraints in the PE during the training. In the inference step, we predict the output lengths using input sequences and a BERT-based length prediction model. Experimental results in an ASPEC English-to-Japanese translation showed the proposed method produced translations with lengths close to the reference ones and outperformed a vanilla Transformer (especially in short sentences) by 3.22 points in BLEU. The average translation results using our length prediction model were also better than another baseline method using input lengths for the length constraints. The proposed noise injection improved robustness for length prediction errors, especially within the window size."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="oka-etal-2020-incorporating">
<titleInfo>
<title>Incorporating Noisy Length Constraints into Transformer with Length-aware Positional Encodings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yui</namePart>
<namePart type="family">Oka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katsuki</namePart>
<namePart type="family">Chousa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katsuhito</namePart>
<namePart type="family">Sudoh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Satoshi</namePart>
<namePart type="family">Nakamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Donia</namePart>
<namePart type="family">Scott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuria</namePart>
<namePart type="family">Bel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Neural Machine Translation often suffers from an under-translation problem due to its limited modeling of output sequence lengths. In this work, we propose a novel approach to training a Transformer model using length constraints based on length-aware positional encoding (PE). Since length constraints with exact target sentence lengths degrade translation performance, we add random noise within a certain window size to the length constraints in the PE during the training. In the inference step, we predict the output lengths using input sequences and a BERT-based length prediction model. Experimental results in an ASPEC English-to-Japanese translation showed the proposed method produced translations with lengths close to the reference ones and outperformed a vanilla Transformer (especially in short sentences) by 3.22 points in BLEU. The average translation results using our length prediction model were also better than another baseline method using input lengths for the length constraints. The proposed noise injection improved robustness for length prediction errors, especially within the window size.</abstract>
<identifier type="citekey">oka-etal-2020-incorporating</identifier>
<identifier type="doi">10.18653/v1/2020.coling-main.319</identifier>
<location>
<url>https://aclanthology.org/2020.coling-main.319/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>3580</start>
<end>3585</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Incorporating Noisy Length Constraints into Transformer with Length-aware Positional Encodings
%A Oka, Yui
%A Chousa, Katsuki
%A Sudoh, Katsuhito
%A Nakamura, Satoshi
%Y Scott, Donia
%Y Bel, Nuria
%Y Zong, Chengqing
%S Proceedings of the 28th International Conference on Computational Linguistics
%D 2020
%8 December
%I International Committee on Computational Linguistics
%C Barcelona, Spain (Online)
%F oka-etal-2020-incorporating
%X Neural Machine Translation often suffers from an under-translation problem due to its limited modeling of output sequence lengths. In this work, we propose a novel approach to training a Transformer model using length constraints based on length-aware positional encoding (PE). Since length constraints with exact target sentence lengths degrade translation performance, we add random noise within a certain window size to the length constraints in the PE during the training. In the inference step, we predict the output lengths using input sequences and a BERT-based length prediction model. Experimental results in an ASPEC English-to-Japanese translation showed the proposed method produced translations with lengths close to the reference ones and outperformed a vanilla Transformer (especially in short sentences) by 3.22 points in BLEU. The average translation results using our length prediction model were also better than another baseline method using input lengths for the length constraints. The proposed noise injection improved robustness for length prediction errors, especially within the window size.
%R 10.18653/v1/2020.coling-main.319
%U https://aclanthology.org/2020.coling-main.319/
%U https://doi.org/10.18653/v1/2020.coling-main.319
%P 3580-3585
Markdown (Informal)
[Incorporating Noisy Length Constraints into Transformer with Length-aware Positional Encodings](https://aclanthology.org/2020.coling-main.319/) (Oka et al., COLING 2020)
ACL