2020
|
Bizzoni, Yuri; Juzek, Tom S; España-Bonet, Cristina; Chowdhury, Koel Dutta; van Genabith, Josef; Teich, Elke How Human is Machine Translationese? Comparing Human and Machine Translations of Text and Speech Inproceedings The 17th International Workshop on Spoken Language Translation, Seattle, WA, United States, 2020. Abstract | Links | BibTeX @inproceedings{Bizzoni2020,
title = {How Human is Machine Translationese? Comparing Human and Machine Translations of Text and Speech},
author = {Yuri Bizzoni and Tom S. Juzek and Cristina Espa\~{n}a-Bonet and Koel Dutta Chowdhury and Josef van Genabith and Elke Teich},
url = {http://www.sfb1102.uni-saarland.de/wp/wp-content/uploads/2020/06/IWSLT-b1-B7-final2020.pdf
http://iwslt.org/doku.php},
year = {2020},
date = {2020-07-00},
booktitle = {The 17th International Workshop on Spoken Language Translation},
address = {Seattle, WA, United States},
abstract = {Translationese is a phenomenon present in human translations, simultaneous interpreting, and even machine translations. Some translationese features tend to appear in simultaneous interpreting with higher frequency than in human text translation, but the reasons for this are unclear. This study analyzes translationese patterns in translation, interpreting, and machine translation outputs in order to explore possible reasons. In our analysis we (i) detail two non-invasive ways of detecting translationese and (ii) compare translationese across human and machine translations from text and speech. We find that machine translation shows traces of translationese, but does not reproduce the patterns found in human translation, offering support to the hypothesis that such patterns are due to the model (human vs. machine) rather than to the data (written vs. spoken).},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Translationese is a phenomenon present in human translations, simultaneous interpreting, and even machine translations. Some translationese features tend to appear in simultaneous interpreting with higher frequency than in human text translation, but the reasons for this are unclear. This study analyzes translationese patterns in translation, interpreting, and machine translation outputs in order to explore possible reasons. In our analysis we (i) detail two non-invasive ways of detecting translationese and (ii) compare translationese across human and machine translations from text and speech. We find that machine translation shows traces of translationese, but does not reproduce the patterns found in human translation, offering support to the hypothesis that such patterns are due to the model (human vs. machine) rather than to the data (written vs. spoken). |
2019
|
Lapshinova-Koltunski, Ekaterina; Espa{~n}a-Bonet, Cristina; van Genabith, Josef Analysing Coreference in Transformer Outputs Inproceedings Proceedings of the Fourth Workshop on Discourse in Machine Translation (DiscoMT 2019), pp. 1-12, Association for Computational Linguistics, Hong Kong, 2019. Abstract | Links | BibTeX @inproceedings{lapshinovaEtal:2019iscoMT,
title = {Analysing Coreference in Transformer Outputs},
author = {Ekaterina Lapshinova-Koltunski and Cristina Espa{~n}a-Bonet and Josef van Genabith},
url = {https://www.aclweb.org/anthology/D19-6501},
doi = {10.18653/v1/D19-6501},
year = {2019},
date = {2019-11-00},
booktitle = {Proceedings of the Fourth Workshop on Discourse in Machine Translation (DiscoMT 2019)},
pages = {1-12},
publisher = {Association for Computational Linguistics},
address = {Hong Kong},
abstract = {We analyse coreference phenomena in three neural machine translation systems trained with different data settings with or without access to explicit intra- and cross-sentential anaphoric information. We compare system performance on two different genres: news and TED talks. To do this, we manually annotate (the possibly incorrect) coreference chains in the MT outputs and evaluate the coreference chain translations. We define an error typology that aims to go further than pronoun translation adequacy and includes types such as incorrect word selection or missing words. The features of coreference chains in automatic translations are also compared to those of the source texts and human translations. The analysis shows stronger potential translationese effects in machine translated outputs than in human translations.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
We analyse coreference phenomena in three neural machine translation systems trained with different data settings with or without access to explicit intra- and cross-sentential anaphoric information. We compare system performance on two different genres: news and TED talks. To do this, we manually annotate (the possibly incorrect) coreference chains in the MT outputs and evaluate the coreference chain translations. We define an error typology that aims to go further than pronoun translation adequacy and includes types such as incorrect word selection or missing words. The features of coreference chains in automatic translations are also compared to those of the source texts and human translations. The analysis shows stronger potential translationese effects in machine translated outputs than in human translations. |
2016
|
Bojar, Ondvrej; Chatterjee, Rajen; Federmann, Christian; Graham, Yvette; Haddow, Barry; Huck, Matthias; Yepes, Antonio Jimeno; Koehn, Philipp; Logacheva, Varvara; Monz, Christof; Negri, Matteo; Neveol, Aurelie; Neves, Mariana; Popel, Martin; Post, Matt; Rubino, Raphael; Scarton, Carolina; Specia, Lucia; Turchi, Marco; Verspoor, Karin; Zampieri, Marcos Findings of the 2016 Conference on Machine Translation Inproceedings Proceedings of the First Conference on Machine Translation, pp. 131-198, Association for Computational Linguistics, Berlin, Germany, 2016. Links | BibTeX @inproceedings{bojar-EtAl:2016:WMT1,
title = {Findings of the 2016 Conference on Machine Translation},
author = {Ondvrej Bojar and Rajen Chatterjee and Christian Federmann and Yvette Graham and Barry Haddow and Matthias Huck and Antonio Jimeno Yepes and Philipp Koehn and Varvara Logacheva and Christof Monz and Matteo Negri and Aurelie Neveol and Mariana Neves and Martin Popel and Matt Post and Raphael Rubino and Carolina Scarton and Lucia Specia and Marco Turchi and Karin Verspoor and Marcos Zampieri},
url = {http://www.aclweb.org/anthology/W/W16/W16-2301},
year = {2016},
date = {2016-08-01},
booktitle = {Proceedings of the First Conference on Machine Translation},
pages = {131-198},
publisher = {Association for Computational Linguistics},
address = {Berlin, Germany},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Rubino, Raphael; Lapshinova-Koltunski, Ekaterina; van Genabith, Josef Information Density and Quality Estimation Features as Translationese Indicators for Human Translation Classification Inproceedings Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 960-970, Association for Computational Linguistics, 2016. Links | BibTeX @inproceedings{N16-1110,
title = {Information Density and Quality Estimation Features as Translationese Indicators for Human Translation Classification},
author = {Raphael Rubino and Ekaterina Lapshinova-Koltunski and Josef van Genabith},
url = {http://aclweb.org/anthology/N16-1110},
doi = {10.18653/v1/N16-1110},
year = {2016},
date = {2016-01-01},
booktitle = {Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
pages = {960-970},
publisher = {Association for Computational Linguistics},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Rubino, Raphael; Degaetano-Ortlieb, Stefania; Teich, Elke; van Genabith, Josef Modeling Diachronic Change in Scientific Writing with Information Density Inproceedings Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers, pp. 750-761, The COLING 2016 Organizing Committee, 2016. Links | BibTeX @inproceedings{C16-1072,
title = {Modeling Diachronic Change in Scientific Writing with Information Density},
author = {Raphael Rubino and Stefania Degaetano-Ortlieb and Elke Teich and Josef van Genabith},
url = {http://aclweb.org/anthology/C16-1072},
year = {2016},
date = {2016-01-01},
booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
pages = {750-761},
publisher = {The COLING 2016 Organizing Committee},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|