2015
Arnaoudova, Venera; Haiduc, Sonia; Marcus, Andrian; Antoniol, Giuliano
The Use of Text Retrieval and Natural Language Processing in Software Engineering Proceedings Article
In: Proceedings of the International Conference on Software Engineering (ICSE) - Technical Briefings, pp. 949–950, 2015.
BibTeX | Tags: information retrieval, natural language processing
@inproceedings{Arnaoudova-icseTB15-NLPinSE,
title = {The Use of Text Retrieval and Natural Language Processing in Software Engineering},
author = {Venera Arnaoudova and Sonia Haiduc and Andrian Marcus and Giuliano Antoniol},
year  = {2015},
date = {2015-01-01},
booktitle = {Proceedings of the International Conference on Software Engineering (ICSE) - Technical Briefings},
pages = {949--950},
keywords = {information retrieval, natural language processing},
pubstate = {published},
tppubtype = {inproceedings}
}
Arnaoudova, Venera; Penta, Massimiliano Di; Antoniol, Giuliano
Linguistic Antipatterns: What They are and How Developers Perceive Them Journal Article
In: Empirical Software Engineering (EMSE), vol. 21, no. 1, pp. 104–158, 2015.
Abstract | Links | BibTeX | Tags: developers' perception, empirical study, linguistic antipatterns, natural language processing, source code identifiers
@article{LAsPerception-15,
title = {Linguistic Antipatterns: What They are and How Developers Perceive Them},
author = {Venera Arnaoudova and Massimiliano {Di Penta} and Giuliano Antoniol},
url = {/wp-content/uploads/2014/10/2014-EMSE-Arnaodova-et-al-Perception-LAs.pdf},
year  = {2015},
date = {2015-01-01},
journal = {Empirical Software Engineering (EMSE)},
volume = {21},
number = {1},
pages = {104--158},
abstract = {Antipatterns are known as poor solutions to recurring problems. For example, Brown et al. and Fowler define practices concerning poor design or implementation solutions. However, we know that the source code lexicon is part of the factors that affect the psychological complexity of a program, i.e., factors that make a program difficult to understand and maintain by humans. The aim of this work is to identify recurring poor practices related to inconsistencies among the naming, documentation, and implementation of an entity—called Linguistic Antipatterns (LAs)—that may impair program understanding. To this end, we first mine examples of such inconsistencies in real open-source projects and abstract them into a catalog of 17 recurring LAs related to methods and attributes1. Then, to understand the relevancy of LAs, we perform two empirical studies with developers—30 external (i.e., not familiar with the code) and 14 internal (i.e., people developing or maintaining the code). Results indicate that the majority of the participants perceive LAs as poor practices and therefore must be avoided—69% and 51% of the external and internal developers, respectively. As further evidence of LAs’ validity, open source developers that were made aware of LAs reacted to the issue by making code changes in 10% of the cases. Finally, in order to facilitate the use of LAs in practice, we identified a sub-set of LAs which were universally agreed upon as being problematic; those which had a clear dissonance between code behavior and lexicon.
},
keywords = {developers' perception, empirical study, linguistic antipatterns, natural language processing, source code identifiers},
pubstate = {published},
tppubtype = {article}
}
Antipatterns are known as poor solutions to recurring problems. For example, Brown et al. and Fowler define practices concerning poor design or implementation solutions. However, we know that the source code lexicon is part of the factors that affect the psychological complexity of a program, i.e., factors that make a program difficult to understand and maintain by humans. The aim of this work is to identify recurring poor practices related to inconsistencies among the naming, documentation, and implementation of an entity—called Linguistic Antipatterns (LAs)—that may impair program understanding. To this end, we first mine examples of such inconsistencies in real open-source projects and abstract them into a catalog of 17 recurring LAs related to methods and attributes1. Then, to understand the relevancy of LAs, we perform two empirical studies with developers—30 external (i.e., not familiar with the code) and 14 internal (i.e., people developing or maintaining the code). Results indicate that the majority of the participants perceive LAs as poor practices and therefore must be avoided—69% and 51% of the external and internal developers, respectively. As further evidence of LAs’ validity, open source developers that were made aware of LAs reacted to the issue by making code changes in 10% of the cases. Finally, in order to facilitate the use of LAs in practice, we identified a sub-set of LAs which were universally agreed upon as being problematic; those which had a clear dissonance between code behavior and lexicon.