2020
|
Mosbach, Marius; Degaetano-Ortlieb, Stefania; Krielke, Marie-Pauline; Abdullah, Badr M; Klakow, Dietrich A Closer Look at Linguistic Knowledge in Masked Language Models: The Case of Relative Clauses in American English Inproceedings Proceedings of the 28th International Conference on Computational Linguistics, pp. 771-787, 2020. Abstract | Links | BibTeX @inproceedings{Mosbach2020,
title = {A Closer Look at Linguistic Knowledge in Masked Language Models: The Case of Relative Clauses in American English},
author = {Marius Mosbach and Stefania Degaetano-Ortlieb and Marie-Pauline Krielke and Badr M. Abdullah and Dietrich Klakow},
url = {https://www.aclweb.org/anthology/2020.coling-main.67/
https://www.aclweb.org/anthology/2020.coling-main.67.pdf
},
year = {2020},
date = {2020-12-00},
booktitle = {Proceedings of the 28th International Conference on Computational Linguistics},
pages = {771-787},
abstract = {Transformer-based language models achieve high performance on various tasks, but we still lack understanding of the kind of linguistic knowledge they learn and rely on. We evaluate three models (BERT, RoBERTa, and ALBERT), testing their grammatical and semantic knowledge by sentence-level probing, diagnostic cases, and masked prediction tasks. We focus on relative clauses (in American English) as a complex phenomenon needing contextual information and antecedent identification to be resolved. Based on a naturalistic dataset, probing shows that all three models indeed capture linguistic knowledge about grammaticality, achieving high performance. Evaluation on diagnostic cases and masked prediction tasks considering fine-grained linguistic knowledge, however, shows pronounced model-specific weaknesses especially on semantic knowledge, strongly impacting models’ performance. Our results highlight the importance of (a) model comparison in evaluation task and (b) building up claims of model performance and the linguistic knowledge they capture beyond purely probing-based evaluations.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Transformer-based language models achieve high performance on various tasks, but we still lack understanding of the kind of linguistic knowledge they learn and rely on. We evaluate three models (BERT, RoBERTa, and ALBERT), testing their grammatical and semantic knowledge by sentence-level probing, diagnostic cases, and masked prediction tasks. We focus on relative clauses (in American English) as a complex phenomenon needing contextual information and antecedent identification to be resolved. Based on a naturalistic dataset, probing shows that all three models indeed capture linguistic knowledge about grammaticality, achieving high performance. Evaluation on diagnostic cases and masked prediction tasks considering fine-grained linguistic knowledge, however, shows pronounced model-specific weaknesses especially on semantic knowledge, strongly impacting models’ performance. Our results highlight the importance of (a) model comparison in evaluation task and (b) building up claims of model performance and the linguistic knowledge they capture beyond purely probing-based evaluations.
|
Stenger, Irina; Jágrová, Klára; Fischer, Andrea; Avgustinova, Tania “Reading Polish with Czech Eyes” or “How Russian Can a Bulgarian Text Be?”: Orthographic Differences as an Experimental Variable in Slavic Intercomprehension Incollection Radeva-Bork, Teodora; Kosta, Peter (Ed.): Current Developments in Slavic Linguistics. Twenty Years After (based on selected papers from FDSL 11), pp. 483-500, Peter Lang, 2020. Links | BibTeX @incollection{Stenger2020,
title = {“Reading Polish with Czech Eyes” or “How Russian Can a Bulgarian Text Be?”: Orthographic Differences as an Experimental Variable in Slavic Intercomprehension},
author = {Irina Stenger and Kl\'{a}ra J\'{a}grov\'{a} and Andrea Fischer and Tania Avgustinova},
editor = {Teodora Radeva-Bork and Peter Kosta },
url = {https://www.peterlang.com/view/title/19540},
doi = {10.3726/978-3-653-07147-4},
year = {2020},
date = {2020-00-00},
booktitle = {Current Developments in Slavic Linguistics. Twenty Years After (based on selected papers from FDSL 11)},
pages = {483-500},
publisher = {Peter Lang},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
|
Stenger, Irina; Jágrová, Klára; Avgustinova, Tania The INCOMSLAV Platform: Experimental Website with Integrated Methods for Measuring Linguistic Distances and Asymmetries in Receptive Multilingualism Inproceedings Fiumara, James; Cieri, Christopher; Liberman, Mark; Callison-Burch, Chris (Ed.): LREC 2020 Workshop Language Resources and Evaluation Conference 11-16 May 2020, Citizen Linguistics in Language Resource Development (CLLRD 2020), pp. 40-48, 2020. Abstract | Links | BibTeX @inproceedings{Stenger2020b,
title = {The INCOMSLAV Platform: Experimental Website with Integrated Methods for Measuring Linguistic Distances and Asymmetries in Receptive Multilingualism},
author = {Irina Stenger and Kl\'{a}ra J\'{a}grov\'{a} and Tania Avgustinova},
editor = {James Fiumara and Christopher Cieri and Mark Liberman and Chris Callison-Burch},
url = {https://lrec2020.lrec-conf.org/media/proceedings/Workshops/Books/CLLRD%202020book.pdf
},
year = {2020},
date = {2020-00-00},
booktitle = {LREC 2020 Workshop Language Resources and Evaluation Conference 11-16 May 2020, Citizen Linguistics in Language Resource Development (CLLRD 2020)},
pages = {40-48},
abstract = {We report on a web-based resource for conducting intercomprehension experiments with native speakers of Slavic languages and present our methods for measuring linguistic distances and asymmetries in receptive multilingualism. Through a website which serves as a platform for online testing, a large number of participants with different linguistic backgrounds can be targeted. A statistical language model is used to measure information density and to gauge how language users master various degrees of (un)intelligibilty. The key idea is that intercomprehension should be better when the model adapted for understanding the unknown language exhibits relatively low average distance and surprisal. All obtained intelligibility scores together with distance and asymmetry measures for the different language pairs and processing directions are made available as an integrated online resource in the form of a Slavic intercomprehension matrix (SlavMatrix).},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
We report on a web-based resource for conducting intercomprehension experiments with native speakers of Slavic languages and present our methods for measuring linguistic distances and asymmetries in receptive multilingualism. Through a website which serves as a platform for online testing, a large number of participants with different linguistic backgrounds can be targeted. A statistical language model is used to measure information density and to gauge how language users master various degrees of (un)intelligibilty. The key idea is that intercomprehension should be better when the model adapted for understanding the unknown language exhibits relatively low average distance and surprisal. All obtained intelligibility scores together with distance and asymmetry measures for the different language pairs and processing directions are made available as an integrated online resource in the form of a Slavic intercomprehension matrix (SlavMatrix). |
Stenger, Irina; Avgustinova, Tania Visual vs. auditory perception of Bulgarian stimuli by Russian native speakers Inproceedings Vladimir P. Selegej et al., (Ed.): Computational Linguistics and Intellectual Technologies: Papers from the Annual International Conference ‘Dialogue’, pp. 684 - 695, 2020. Abstract | Links | BibTeX @inproceedings{Stenger2020b,
title = {Visual vs. auditory perception of Bulgarian stimuli by Russian native speakers},
author = {Irina Stenger and Tania Avgustinova},
editor = {{Vladimir P. Selegej et al.}},
url = {http://www.dialog-21.ru/media/4962/stengeriplusavgustinovat-045.pdf},
year = {2020},
date = {2020-00-00},
booktitle = {Computational Linguistics and Intellectual Technologies: Papers from the Annual International Conference ‘Dialogue’},
journal = {Computational Linguistics and Intellectual Technologies: Papers from the Annual International Conference ‘Dialogue’},
number = {19},
pages = {684 - 695},
abstract = {This study contributes to a better understanding of receptive multilingualism by determining similarities and differences in successful processing of written and spoken cognate words in an unknown but (closely) related language. We investigate two Slavic languages with regard to their mutual intelligibility. The current focus is on the recognition of isolated Bulgarian words by Russian native speakers in a cognate guessing task, considering both written and audio stimuli. The experimentally obtained intercomprehension scores show a generally high degree of intelligibility of Bulgarian cognates to Russian subjects, as well as processing difficulties in case of visual vs. auditory perception. In search of an explanation, we examine the linguistic factors that can contribute to various degrees of written and spoken word intelligibility. The intercomprehension scores obtained in the online word translation experiments are correlated with (i) the identical and mismatched correspondences on the orthographic and phonetic level, (ii) the word length of the stimuli, and (iii) the frequency of Russian cognates. Additionally we validate two measuring methods: the Levenshtein distance and the word adaptation surprisal as potential predictors of the word intelligibility in reading and oral intercomprehension.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
This study contributes to a better understanding of receptive multilingualism by determining similarities and differences in successful processing of written and spoken cognate words in an unknown but (closely) related language. We investigate two Slavic languages with regard to their mutual intelligibility. The current focus is on the recognition of isolated Bulgarian words by Russian native speakers in a cognate guessing task, considering both written and audio stimuli. The experimentally obtained intercomprehension scores show a generally high degree of intelligibility of Bulgarian cognates to Russian subjects, as well as processing difficulties in case of visual vs. auditory perception. In search of an explanation, we examine the linguistic factors that can contribute to various degrees of written and spoken word intelligibility. The intercomprehension scores obtained in the online word translation experiments are correlated with (i) the identical and mismatched correspondences on the orthographic and phonetic level, (ii) the word length of the stimuli, and (iii) the frequency of Russian cognates. Additionally we validate two measuring methods: the Levenshtein distance and the word adaptation surprisal as potential predictors of the word intelligibility in reading and oral intercomprehension. |
Avgustinova, Tania; Stenger, Irina Russian-Bulgarian mutual intelligibility in light of linguistic and statistical models of Slavic receptive multilingualism [Russko-bolgarskaja vzaimoponjatnost’ v svete lingvističeskich i statističeskich modelej slavjanskoj receptivnoj mnogojazyčnocsti] Book Chapter Marti, Roland; Pognan, Patrice; Brezar, Mojca Schlamberger (Ed.): Intercomprehensio Slavica, pp. 85-99, University Press, Faculty of Arts, Ljubljana, 2020. Abstract | Links | BibTeX @inbook{Avgustinova2020,
title = {Russian-Bulgarian mutual intelligibility in light of linguistic and statistical models of Slavic receptive multilingualism [Russko-bolgarskaja vzaimoponjatnost’ v svete lingvisti\v{c}eskich i statisti\v{c}eskich modelej slavjanskoj receptivnoj mnogojazy\v{c}nocsti]},
author = {Tania Avgustinova and Irina Stenger},
editor = {Roland Marti and Patrice Pognan and Mojca Schlamberger Brezar},
url = {https://e-knjige.ff.uni-lj.si/znanstvena-zalozba/catalog/view/226/326/5284-1},
year = {2020},
date = {2020-00-00},
booktitle = {Intercomprehensio Slavica},
pages = {85-99},
publisher = {University Press, Faculty of Arts},
address = {Ljubljana},
abstract = {Computational modelling of the observed mutual intelligibility of Slavic languages unavoid-ably requires systematic integration of classical Slavistics knowledge from comparative his-torical grammar and traditional contrastive description of language pairs. The phenomenon of intercomprehension is quite intuitive: speakers of a given language L1 understand another closely related language (variety) L2 without being able to use the latter productively, i.e. for speaking or writing. This specific mode of using the human linguistic competence manifests itself as receptive multilingualism. The degree of mutual understanding of genetically close-ly related languages, such as Bulgarian and Russian, corresponds to objectively measurable distances at different linguistic levels. The common Slavic basis and the comparative-syn-chronous perspective allow us to reveal Bulgarian-Russian linguistic affinity with regard to spelling, vocabulary and grammar.},
keywords = {},
pubstate = {published},
tppubtype = {inbook}
}
Computational modelling of the observed mutual intelligibility of Slavic languages unavoid-ably requires systematic integration of classical Slavistics knowledge from comparative his-torical grammar and traditional contrastive description of language pairs. The phenomenon of intercomprehension is quite intuitive: speakers of a given language L1 understand another closely related language (variety) L2 without being able to use the latter productively, i.e. for speaking or writing. This specific mode of using the human linguistic competence manifests itself as receptive multilingualism. The degree of mutual understanding of genetically close-ly related languages, such as Bulgarian and Russian, corresponds to objectively measurable distances at different linguistic levels. The common Slavic basis and the comparative-syn-chronous perspective allow us to reveal Bulgarian-Russian linguistic affinity with regard to spelling, vocabulary and grammar. |
Stenger, Irina; Avgustinova, Tania How intelligible is spoken Bulgarian for Russian native speakers in an intercomprehension scenario? Inproceedings et al., Vanya Micheva (Ed.): Proceedings of the International Annual Conference of the Institute for Bulgarian Language, pp. 142-151, Sofia, 2020. Abstract | Links | BibTeX @inproceedings{Stenger2020b,
title = {How intelligible is spoken Bulgarian for Russian native speakers in an intercomprehension scenario?},
author = {Irina Stenger and Tania Avgustinova},
editor = {Vanya Micheva et al.},
url = {https://ibl.bas.bg/wp-content/uploads/2020/07/PROCEEDINGS_IBLCONF2020_vol2.pdf},
year = {2020},
date = {2020-00-00},
booktitle = {Proceedings of the International Annual Conference of the Institute for Bulgarian Language},
volume = {2},
pages = {142-151},
address = {Sofia},
abstract = {In a web-based experiment, Bulgarian audio stimuli in the form of recorded isolated words are presented to Russian native speakers who are required to write a suitable Russian translation. The degree of intelligibility, as revealed by the cognate guessing task, is relatively high for this pair of languages. We correlate the obtained intercomprehension scores with established linguistic factors in order to determine their influence on the cross-linguistic spoken word recognition. A detailed error analysis focuses on sound correspondences that cause translation problems in such an intercomprehension scenario.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
In a web-based experiment, Bulgarian audio stimuli in the form of recorded isolated words are presented to Russian native speakers who are required to write a suitable Russian translation. The degree of intelligibility, as revealed by the cognate guessing task, is relatively high for this pair of languages. We correlate the obtained intercomprehension scores with established linguistic factors in order to determine their influence on the cross-linguistic spoken word recognition. A detailed error analysis focuses on sound correspondences that cause translation problems in such an intercomprehension scenario. |
2019
|
Mosbach, Marius; Stenger, Irina; Avgustinova, Tania; Klakow, Dietrich incom.py - A Toolbox for Calculating Linguistic Distances and Asymmetries between Related Languages Inproceedings Angelova, Galia; Mitkov, Ruslan; Nikolova, Ivelina; Temnikova, Irina (Ed.): Proceedings of Recent Advances in Natural Language Processing, RANLP 2019, Varna, Bulgaria, 2-4 September 2019, pp. 811-819, Varna, Bulgaria, 2019. Links | BibTeX @inproceedings{Mosbach2019,
title = {incom.py - A Toolbox for Calculating Linguistic Distances and Asymmetries between Related Languages},
author = {Marius Mosbach and Irina Stenger and Tania Avgustinova and Dietrich Klakow},
editor = {Galia Angelova and Ruslan Mitkov and Ivelina Nikolova and Irina Temnikova},
url = {https://acl-bg.org/proceedings/2019/RANLP%202019/pdf/RANLP094.pdf},
doi = {10.26615/978-954-452-056-4_094},
year = {2019},
date = {2019-10-01},
booktitle = {Proceedings of Recent Advances in Natural Language Processing, RANLP 2019, Varna, Bulgaria, 2-4 September 2019},
pages = {811-819},
address = {Varna, Bulgaria},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Avgustinova, Tania; Iomdin, Leonid Towards a Typology of Microsyntactic Constructions Inproceedings Corpas-Pastor, Gloria; Mitkov, Ruslan (Ed.): Computational and Corpus-Based Phraseology, pp. 15-30, Springer, Cham, 2019. Links | BibTeX @inproceedings{Avgustinova2019,
title = {Towards a Typology of Microsyntactic Constructions},
author = {Tania Avgustinova and Leonid Iomdin},
editor = {Gloria Corpas-Pastor and Ruslan Mitkov},
url = {https://link.springer.com/chapter/10.1007/978-3-030-30135-4_2},
year = {2019},
date = {2019-09-18},
booktitle = {Computational and Corpus-Based Phraseology},
pages = {15-30},
publisher = {Springer, Cham},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Stenger, Irina; Avgustinova, Tania; Belousov, Konstantin I; Baranov, Dmitrij A; Erofeeva., Elena V Interaction of linguistic and socio-cognitive factors in receptive multilingualism [Vzaimodejstvie lingvističeskich i sociokognitivnych parametrov pri receptivnom mul’tilingvisme] Inproceedings 25th International Conference on Computational Linguistics and Intellectual Technologies (Dialogue 2019), Moscow, Russia, 2019. Links | BibTeX @inproceedings{Stenger2019,
title = {Interaction of linguistic and socio-cognitive factors in receptive multilingualism [Vzaimodejstvie lingvisti\v{c}eskich i sociokognitivnych parametrov pri receptivnom mul’tilingvisme]},
author = {Irina Stenger and Tania Avgustinova and Konstantin I. Belousov and Dmitrij A. Baranov and Elena V. Erofeeva.},
url = {http://www.dialog-21.ru/digest/2019/online/},
year = {2019},
date = {2019-00-00},
booktitle = { 25th International Conference on Computational Linguistics and Intellectual Technologies (Dialogue 2019)},
address = {Moscow, Russia},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Stenger, Irina Zur Rolle der Orthographie in der slavischen Interkomprehension mit besonderem Fokus auf die kyrillische Schrift PhD Thesis Saarland University, 2019, ISBN: 978-3-86223-283-3. Abstract | Links | BibTeX @phdthesis{Stenger_diss_2019,
title = {Zur Rolle der Orthographie in der slavischen Interkomprehension mit besonderem Fokus auf die kyrillische Schrift},
author = {Irina Stenger},
editor = {Roland Marti [Akademische Betreuung]},
url = {http://universaar.uni-saarland.de/monographien/volltexte/2019/185/pdf/Zur_Rolle_der_Orthographie_in_der_slavischen_Interkomprehension.pdf},
isbn = {978-3-86223-283-3},
year = {2019},
date = {2019-00-00},
address = {Saarbr\"{u}cken},
school = {Saarland University},
abstract = {Die slavischen Sprachen stellen einen bedeutenden indogermanischen Sprachzweig dar. Es stellt sich die Frage, inwieweit sich Sprecher verschiedener slavischer Sprachen interkomprehensiv verst\"{a}ndigen k\"{o}nnen. Unter Interkomprehension wird die Kommunikationsf\"{a}higkeit von Sprechern verwandter Sprachen verstanden, wobei sich jeder Sprecher seiner Sprache bedient.
Die vorliegende Arbeit untersucht die orthographische Verst\"{a}ndlichkeit slavischer Sprachen mit kyrillischer Schrift im interkomprehensiven Lesen. Sechs ost- und s\"{u}dslavische Sprachen - Bulgarisch, Makedonisch, Russisch, Serbisch, Ukrainisch und Wei{\ss}russisch - werden im Hinblick auf orthographische \"{A}hnlichkeiten und Unterschiede miteinander verglichen und statistisch analysiert. Der Fokus der empirischen Untersuchung liegt auf der Erkennung einzelner Kognaten mit diachronisch motivierten orthographischen Korrespondenzen in ost- und s\"{u}dslavischen Sprachen, ausgehend vom Russischen.
Die in dieser Arbeit vorgestellten Methoden und erzielten Ergebnisse stellen einen empirischen Beitrag zur slavischen Interkomprehensionsforschung und Interkomrepehensionsdidaktik dar.},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Die slavischen Sprachen stellen einen bedeutenden indogermanischen Sprachzweig dar. Es stellt sich die Frage, inwieweit sich Sprecher verschiedener slavischer Sprachen interkomprehensiv verständigen können. Unter Interkomprehension wird die Kommunikationsfähigkeit von Sprechern verwandter Sprachen verstanden, wobei sich jeder Sprecher seiner Sprache bedient.
Die vorliegende Arbeit untersucht die orthographische Verständlichkeit slavischer Sprachen mit kyrillischer Schrift im interkomprehensiven Lesen. Sechs ost- und südslavische Sprachen - Bulgarisch, Makedonisch, Russisch, Serbisch, Ukrainisch und Weißrussisch - werden im Hinblick auf orthographische Ähnlichkeiten und Unterschiede miteinander verglichen und statistisch analysiert. Der Fokus der empirischen Untersuchung liegt auf der Erkennung einzelner Kognaten mit diachronisch motivierten orthographischen Korrespondenzen in ost- und südslavischen Sprachen, ausgehend vom Russischen.
Die in dieser Arbeit vorgestellten Methoden und erzielten Ergebnisse stellen einen empirischen Beitrag zur slavischen Interkomprehensionsforschung und Interkomrepehensionsdidaktik dar. |
Jágrová, Klára; Stenger, Irina; Telus, Magdalena Slavische Interkomprehension im 5-Sprachen-Kurs – Dokumentation eines Semesters Journal Article Polnisch in Deutschland. Zeitschrift der Bundesvereinigung der Polnischlehrkräfte. Sondernummer: Emil Krebs und die Mehrsprachigkeit in Europa, pp. 122–133, 2019. Links | BibTeX @article{J\'{a}grov\'{a}2019,
title = {Slavische Interkomprehension im 5-Sprachen-Kurs {\textendash} Dokumentation eines Semesters},
author = {Kl\'{a}ra J\'{a}grov\'{a} and Irina Stenger and Magdalena Telus},
url = {http://polnischunterricht.de/wp-content/uploads/2020/01/popr_www_specjalna_2019.indd_.pdf},
year = {2019},
date = {2019-00-00},
journal = {Polnisch in Deutschland. Zeitschrift der Bundesvereinigung der Polnischlehrkr\"{a}fte. Sondernummer: Emil Krebs und die Mehrsprachigkeit in Europa},
pages = {122{\textendash}133},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
2018
|
Jágrová, Klára; Stenger, Irina; Avgustinova, Tania Polski nadal nieskomplikowany? Interkomprehensionsexperimente mit Nominalphrasen Journal Article Polnisch in Deutschland. Zeitschrift der Bundesvereinigung der Polnischlehrkräfte, 5/2017 , pp. 20 - 37, 2018. Links | BibTeX @article{J\'{a}grov\'{a}2018,
title = {Polski nadal nieskomplikowany? Interkomprehensionsexperimente mit Nominalphrasen},
author = {Kl\'{a}ra J\'{a}grov\'{a} and Irina Stenger and Tania Avgustinova},
url = {http://polnischunterricht.de/wp-content/uploads/2018/02/www_gazeta_2017.pdf},
year = {2018},
date = {2018-00-00},
journal = {Polnisch in Deutschland. Zeitschrift der Bundesvereinigung der Polnischlehrkr\"{a}fte},
volume = {5/2017},
pages = {20 - 37},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Jágrová, Klára; Avgustinova, Tania; Stenger, Irina; Fischer, Andrea Language models, surprisal and fantasy in Slavic intercomprehension Journal Article Computer Speech & Language, 2018, (Statistical language modelling, Surprisal, Receptive multilingualism, Slavic languages, Sentential context, Think-aloud protocols, Polish Czech Reading). Abstract | Links | BibTeX @article{J\'{a}grov\'{a}2018b,
title = {Language models, surprisal and fantasy in Slavic intercomprehension},
author = {Kl\'{a}ra J\'{a}grov\'{a} and Tania Avgustinova and Irina Stenger and Andrea Fischer},
url = {https://www.sciencedirect.com/science/article/pii/S0885230817300451},
year = {2018},
date = {2018-00-00},
journal = {Computer Speech & Language},
abstract = {In monolingual human language processing, the predictability of a word given its surrounding sentential context is crucial. With regard to receptive multilingualism, it is unclear to what extent predictability in context interplays with other linguistic factors in understanding a related but unknown language {\textendash} a process called intercomprehension. We distinguish two dimensions influencing processing effort during intercomprehension: surprisal in sentential context and linguistic distance. Based on this hypothesis, we formulate expectations regarding the difficulty of designed experimental stimuli and compare them to the results from think-aloud protocols of experiments in which Czech native speakers decode Polish sentences by agreeing on an appropriate translation. On the one hand, orthographic and lexical distances are reliable predictors of linguistic similarity. On the other hand, we obtain the predictability of words in a sentence with the help of trigram language models. We find that linguistic distance (encoding similarity) and in-context surprisal (predictability in context) appear to be complementary, with neither factor outweighing the other, and that our distinguishing of these two measurable dimensions is helpful in understanding certain unexpected effects in human behaviour.},
note = {Statistical language modelling, Surprisal, Receptive multilingualism, Slavic languages, Sentential context, Think-aloud protocols, Polish Czech Reading},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
In monolingual human language processing, the predictability of a word given its surrounding sentential context is crucial. With regard to receptive multilingualism, it is unclear to what extent predictability in context interplays with other linguistic factors in understanding a related but unknown language – a process called intercomprehension. We distinguish two dimensions influencing processing effort during intercomprehension: surprisal in sentential context and linguistic distance. Based on this hypothesis, we formulate expectations regarding the difficulty of designed experimental stimuli and compare them to the results from think-aloud protocols of experiments in which Czech native speakers decode Polish sentences by agreeing on an appropriate translation. On the one hand, orthographic and lexical distances are reliable predictors of linguistic similarity. On the other hand, we obtain the predictability of words in a sentence with the help of trigram language models. We find that linguistic distance (encoding similarity) and in-context surprisal (predictability in context) appear to be complementary, with neither factor outweighing the other, and that our distinguishing of these two measurable dimensions is helpful in understanding certain unexpected effects in human behaviour. |
2017
|
Jágrová, Klára; Stenger, Irina; Avgustinova, Tania; Marti, Roland POLSKI TO JEZYK NIESKOMPLIKOWANY? Theoretische und praktische Interkomprehension der 100 häufigsten polnischen Substantive Journal Article In Polnisch in Deutschland. Zeitschrift der Bundesvereinigung der Polnischlehrkräfte, 4/2016 , pp. 5-19, 2017. BibTeX @article{J\'{a}grov\'{a}2017,
title = {POLSKI TO JEZYK NIESKOMPLIKOWANY? Theoretische und praktische Interkomprehension der 100 h\"{a}ufigsten polnischen Substantive},
author = {Kl\'{a}ra J\'{a}grov\'{a} and Irina Stenger and Tania Avgustinova and Roland Marti},
year = {2017},
date = {2017-00-00},
journal = {In Polnisch in Deutschland. Zeitschrift der Bundesvereinigung der Polnischlehrkr\"{a}fte},
volume = {4/2016},
pages = {5-19},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Stenger, Irina; Avgustinova, Tania; Marti, Roland Levenshtein distance and word adaptation surprisal as methods of measuring mutual intelligibility in reading comprehension of Slavic languages Inproceedings Computational Linguistics and Intellectual Technologies: International Conference "Dialogue 2017" , pp. 304-317, 2017. Links | BibTeX @inproceedings{Stenger2017,
title = {Levenshtein distance and word adaptation surprisal as methods of measuring mutual intelligibility in reading comprehension of Slavic languages},
author = {Irina Stenger and Tania Avgustinova and Roland Marti},
url = {http://www.dialog-21.ru/media/3982/dialogue2017_v1.pdf},
year = {2017},
date = {2017-00-00},
booktitle = {Computational Linguistics and Intellectual Technologies: International Conference "Dialogue 2017" },
volume = {1},
number = {16 (23)},
pages = {304-317},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Stenger, Irina; Jágrová, Klára; Fischer, Andrea; Avgustinova, Tania; Klakow, Dietrich; Marti, Roland Modeling the Impact of Orthographic Coding on Czech-Polish and Bulgarian-Russian Reading Intercomprehension Journal Article Nordic Journal of Linguistic, (40(2)), pp. 175-199, 2017. Links | BibTeX @article{Stenger2017b,
title = {Modeling the Impact of Orthographic Coding on Czech-Polish and Bulgarian-Russian Reading Intercomprehension},
author = {Irina Stenger and Kl\'{a}ra J\'{a}grov\'{a} and Andrea Fischer and Tania Avgustinova and Dietrich Klakow and Roland Marti},
url = {https://www.cambridge.org/core/journals/nordic-journal-of-linguistics/article/modeling-the-impact-of-orthographic-coding-on-czechpolish-and-bulgarianrussian-reading-intercomprehension/363BEB5C556DFBDAC7FEED0AE06B06AA
},
year = {2017},
date = {2017-00-00},
journal = {Nordic Journal of Linguistic},
number = {40(2)},
pages = {175-199},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Jágrová, Klára; Stenger, Irina; Marti, Roland; Avgustinova, Tania Lexical and orthographic distances between Bulgarian, Czech, Polish, and Russian: A comparative analysis of the most frequent nouns Inproceedings Joseph Emonds & Markéta Janebová (eds.), Language Use and Linguistic Structure. Proceedings of the Olomouc Linguistics Colloquium 2016, pp. 401–416, Olomouc: Palacký University, 2017. Links | BibTeX @inproceedings{Kl\'{a}ra2017,
title = {Lexical and orthographic distances between Bulgarian, Czech, Polish, and Russian: A comparative analysis of the most frequent nouns},
author = {Kl\'{a}ra J\'{a}grov\'{a} and Irina Stenger and Roland Marti and Tania Avgustinova},
url = {http://olinco.upol.cz/wp-content/uploads/2017/06/olinco-2016-proceedings.pdf},
year = {2017},
date = {2017-00-00},
booktitle = {Joseph Emonds & Mark'{e}ta Janebov\'{a} (eds.), Language Use and Linguistic Structure. Proceedings of the Olomouc Linguistics Colloquium 2016},
journal = {Joseph Emonds & Mark'{e}ta Janebov\'{a} (eds.), Language Use and Linguistic Structure},
pages = {401{\textendash}416},
address = {Olomouc: Palack\'{y} University},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Fischer, Andrea K; Vreeken, Jilles; Klakow, Dietrich Beyond Pairwise Similarity: Quantifying and Characterizing Linguistic Similarity between Groups of Languages by MDL Journal Article Computación y Systems, 21 (4), pp. 829-839, 2017. Links | BibTeX @article{Fischer2017,
title = {Beyond Pairwise Similarity: Quantifying and Characterizing Linguistic Similarity between Groups of Languages by MDL},
author = {Andrea K. Fischer and Jilles Vreeken and Dietrich Klakow},
url = {http://www.cys.cic.ipn.mx/ojs/index.php/CyS/article/view/2865 },
year = {2017},
date = {2017-00-00},
journal = {Computaci\'{o}n y Systems},
volume = {21},
number = {4},
pages = {829-839},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
2016
|
Stenger, Irina How reading intercomprehension works among Slavic languages with Cyrillic script Inproceedings Köllner, Marisa; Ziai, Ramon (Ed.): pp. 30-42, ESSLLI 2016, 2016. BibTeX @inproceedings{Stenger2016,
title = {How reading intercomprehension works among Slavic languages with Cyrillic script},
author = {Irina Stenger},
editor = {Marisa K\"{o}llner and Ramon Ziai},
year = {2016},
date = {2016-00-00},
pages = {30-42},
publisher = {ESSLLI 2016},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2015
|
Fischer, Andrea; Demberg, Vera; Klakow, Dietrich Towards Flexible, Small-Domain Surface Generation: Combining Data-Driven and Grammatical Approaches Inproceedings Proceedings of the 15th European Workshop on Natural Language Generation (ENLG), pp. 105-108, Association for Computational Linguistics Brighton, 2015. Links | BibTeX @inproceedings{fischer:demberg:klakow,
title = {Towards Flexible, Small-Domain Surface Generation: Combining Data-Driven and Grammatical Approaches},
author = {Andrea Fischer and Vera Demberg and Dietrich Klakow},
url = {https://www.aclweb.org/anthology/W15-4718/},
year = {2015},
date = {2015-09-00},
booktitle = {Proceedings of the 15th European Workshop on Natural Language Generation (ENLG)},
pages = {105-108},
address = {Brighton},
organization = {Association for Computational Linguistics},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Avgustinova, Tania; Fischer, Andrea; Jágrová, Klára; Stenger, Irina The Empirical Basis of Slavic Intercomprehension Inproceedings REMU, Joensuu, Finland, 2015. Links | BibTeX @inproceedings{tania2015empirical,
title = {The Empirical Basis of Slavic Intercomprehension},
author = {Tania Avgustinova and Andrea Fischer and Kl\'{a}ra J\'{a}grov\'{a} and Irina Stenger},
url = {http://www.coli.uni-saarland.de/~tania/ta-pub/REMU-slides-presentation_2015-05-28.pdf},
year = {2015},
date = {2015-01-01},
booktitle = {REMU},
address = {Joensuu, Finland},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Stenger, Irina "Reading Polish with Czech Eyes" or "How Russian Can a Bulgarian Text Be?": Orthographic Differences as an Experimental Variable in Reading Comprehension Inproceedings 11th European Conference on Formal Description of Slavic Languages (FDSL-11), Potsdam, Germany, 2015. Links | BibTeX @inproceedings{irina2015reading,
title = {"Reading Polish with Czech Eyes" or "How Russian Can a Bulgarian Text Be?": Orthographic Differences as an Experimental Variable in Reading Comprehension},
author = {Irina Stenger},
url = {https://www.lsv.uni-saarland.de/wp-content/publications/2015/Reading_Polish_with_Czech_Eyes_FDSL11_abstract.pdf},
year = {2015},
date = {2015-01-01},
booktitle = {11th European Conference on Formal Description of Slavic Languages (FDSL-11)},
address = {Potsdam, Germany},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Fischer, Andrea; Jágrová, Klára; Stenger, Irina; Avgustinova, Tania; Klakow, Dietrich; Marti, Roland An Orthography Transformation Experiment with Czech-Polish and Bulgarian-Russian Parallel Word Sets Inproceedings Sharp, Bernadette; ł, Wies; Delmonte, Rodolfo (Ed.): Natural Language Processing and Cognitive Science, pp. 115-126, Ca Foscarina Editrice, Venezia, 2015. Links | BibTeX @inproceedings{klara2015orthography,
title = {An Orthography Transformation Experiment with Czech-Polish and Bulgarian-Russian Parallel Word Sets},
author = {Andrea Fischer and Kl\'{a}ra J\'{a}grov\'{a} and Irina Stenger and Tania Avgustinova and Dietrich Klakow and Roland Marti},
editor = {Bernadette Sharp and Wies{{\l}}aw Lubaszewski and Rodolfo Delmonte},
url = {http://www.sfb1102.uni-saarland.de/wp/wp-content/uploads/2016/02/Avgustinova1.pdf},
year = {2015},
date = {2015-01-01},
booktitle = {Natural Language Processing and Cognitive Science},
pages = {115-126},
publisher = {Ca Foscarina Editrice, Venezia},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Fischer, Andrea; Jágrová, Klára; Stenger, Irina; Avgustinova, Tania; Klakow, Dietrich; Marti, Roland Models for Mutual Intelligibility Inproceedings Data Mining and its Use and Usability for Linguistic Analysis, Universität des Saarlandes Saarbrücken, 2015. Links | BibTeX @inproceedings{andrea2015models,
title = {Models for Mutual Intelligibility},
author = {Andrea Fischer and Kl\'{a}ra J\'{a}grov\'{a} and Irina Stenger and Tania Avgustinova and Dietrich Klakow and Roland Marti},
url = {http://www.sfb1102.uni-saarland.de/wp/wp-content/uploads/2015/03/sfb-b1-coll-C4-March2015.pdf},
year = {2015},
date = {2015-01-01},
booktitle = {Data Mining and its Use and Usability for Linguistic Analysis},
address = {Saarbr\"{u}cken},
organization = {Universit\"{a}t des Saarlandes},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2014
|
Klakow, Dietrich; Avgustinova, Tania; Stenger, Irina; Fischer, Andrea; Jágrová, Klára The INCOMSLAV Project Inproceedings Seminar in formal linguistics at ÚFAL, Charles University Prague, 2014. Links | BibTeX @inproceedings{dietrich2014incomslav,
title = {The INCOMSLAV Project},
author = {Dietrich Klakow and Tania Avgustinova and Irina Stenger and Andrea Fischer and Kl\'{a}ra J\'{a}grov\'{a}},
url = {https://ufal.mff.cuni.cz/events/incomslav-project},
year = {2014},
date = {2014-01-01},
booktitle = {Seminar in formal linguistics at \'{U}FAL},
address = {Prague},
organization = {Charles University},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|