@inproceedings{yankovskaya-etal-2023-machine,
title = "Machine Translation for Low-resource {F}inno-{U}gric Languages",
author = {Yankovskaya, Lisa and
Tars, Maali and
T{\"a}ttar, Andre and
Fishel, Mark},
editor = {Alum{\"a}e, Tanel and
Fishel, Mark},
booktitle = "Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)",
month = may,
year = "2023",
address = "T{\'o}rshavn, Faroe Islands",
publisher = "University of Tartu Library",
url = "https://aclanthology.org/2023.nodalida-1.77/",
pages = "762--771",
abstract = "This paper focuses on neural machine translation (NMT) for low-resource Finno-Ugric languages. Our contributions are three-fold: (1) we extend existing and collect new parallel and monolingual corpora for 20 languages, (2) we expand the 200-language translation benchmark FLORES-200 with manual translations into nine new languages, and (3) we present experiments using the collected data to create NMT systems for the included languages and investigate the impact of back-translation data on the NMT performance for low-resource languages. Experimental results show that carefully selected limited amounts of back-translation directions yield the best results in terms of translation scores, for both high-resource and low-resource output languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yankovskaya-etal-2023-machine">
<titleInfo>
<title>Machine Translation for Low-resource Finno-Ugric Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lisa</namePart>
<namePart type="family">Yankovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maali</namePart>
<namePart type="family">Tars</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Tättar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Fishel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tanel</namePart>
<namePart type="family">Alumäe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Fishel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>University of Tartu Library</publisher>
<place>
<placeTerm type="text">Tórshavn, Faroe Islands</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper focuses on neural machine translation (NMT) for low-resource Finno-Ugric languages. Our contributions are three-fold: (1) we extend existing and collect new parallel and monolingual corpora for 20 languages, (2) we expand the 200-language translation benchmark FLORES-200 with manual translations into nine new languages, and (3) we present experiments using the collected data to create NMT systems for the included languages and investigate the impact of back-translation data on the NMT performance for low-resource languages. Experimental results show that carefully selected limited amounts of back-translation directions yield the best results in terms of translation scores, for both high-resource and low-resource output languages.</abstract>
<identifier type="citekey">yankovskaya-etal-2023-machine</identifier>
<location>
<url>https://aclanthology.org/2023.nodalida-1.77/</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>762</start>
<end>771</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Machine Translation for Low-resource Finno-Ugric Languages
%A Yankovskaya, Lisa
%A Tars, Maali
%A Tättar, Andre
%A Fishel, Mark
%Y Alumäe, Tanel
%Y Fishel, Mark
%S Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)
%D 2023
%8 May
%I University of Tartu Library
%C Tórshavn, Faroe Islands
%F yankovskaya-etal-2023-machine
%X This paper focuses on neural machine translation (NMT) for low-resource Finno-Ugric languages. Our contributions are three-fold: (1) we extend existing and collect new parallel and monolingual corpora for 20 languages, (2) we expand the 200-language translation benchmark FLORES-200 with manual translations into nine new languages, and (3) we present experiments using the collected data to create NMT systems for the included languages and investigate the impact of back-translation data on the NMT performance for low-resource languages. Experimental results show that carefully selected limited amounts of back-translation directions yield the best results in terms of translation scores, for both high-resource and low-resource output languages.
%U https://aclanthology.org/2023.nodalida-1.77/
%P 762-771
Markdown (Informal)
[Machine Translation for Low-resource Finno-Ugric Languages](https://aclanthology.org/2023.nodalida-1.77/) (Yankovskaya et al., NoDaLiDa 2023)
ACL