master-thesis/bibliography.bib


@inproceedings{alshangiti2019whydevelopingmachine,
  title = {Why Is {{Developing Machine Learning Applications Challenging}}? {{A Study}} on {{Stack Overflow Posts}}},
  shorttitle = {Why Is {{Developing Machine Learning Applications Challenging}}?},
  booktitle = {2019 {{ACM}}/{{IEEE International Symposium}} on {{Empirical Software Engineering}} and {{Measurement}} ({{ESEM}})},
  author = {Alshangiti, Moayad and Sapkota, Hitesh and Murukannaiah, Pradeep K. and Liu, Xumin and Yu, Qi},
  date = {2019-09},
  pages = {1--11},
  publisher = {{IEEE}},
  location = {{Porto de Galinhas, Recife, Brazil}},
  doi = {10.1109/ESEM.2019.8870187},
  abstract = {Method: We conduct an empirical study of ML-related developer posts on Stack Overflow. We perform in-depth quantitative and qualitative analyses focusing on a series of research questions related to the challenges of developing ML applications and the directions to address them. Results: Our findings include: (1) ML questions suffer from a much higher percentage of unanswered questions on Stack Overflow than other domains; (2) there is a lack of ML experts in the Stack Overflow QA community; (3) the data preprocessing and model deployment phases are where most of the challenges lay; and (4) addressing most of these challenges require more ML implementation knowledge than ML conceptual knowledge. Conclusions: Our findings suggest that most challenges are under the data preparation and model deployment phases, i.e., early and late stages. Also, the implementation aspect of ML shows much higher difficulty level among developers than the conceptual aspect.},
  eventtitle = {2019 {{ACM}}/{{IEEE International Symposium}} on {{Empirical Software Engineering}} and {{Measurement}} ({{ESEM}})},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/07-zotero-attachments/IEEE/Alshangiti_2019_Why is Developing Machine Learning Applications Challenging.pdf},
  isbn = {978-1-72812-968-6},
  langid = {english}
}

@inproceedings{amershi-2019-softwareengineeringmachine,
  title = {Software {{Engineering}} for {{Machine Learning}}: {{A Case Study}}},
  shorttitle = {Software {{Engineering}} for {{Machine Learning}}},
  booktitle = {2019 {{IEEE}}/{{ACM}} 41st {{International Conference}} on {{Software Engineering}}: {{Software Engineering}} in {{Practice}} ({{ICSE}}-{{SEIP}})},
  author = {Amershi, Saleema and Begel, Andrew and Bird, Christian and DeLine, Robert and Gall, Harald and Kamar, Ece and Nagappan, Nachiappan and Nushi, Besmira and Zimmermann, Thomas},
  date = {2019-05},
  pages = {291--300},
  publisher = {{IEEE}},
  location = {{Montreal, QC, Canada}},
  doi = {10.1109/ICSE-SEIP.2019.00042},
  abstract = {Recent advances in machine learning have stimulated widespread interest within the Information Technology sector on integrating AI capabilities into software and services. This goal has forced organizations to evolve their development processes. We report on a study that we conducted on observing software teams at Microsoft as they develop AI-based applications. We consider a nine-stage workflow process informed by prior experiences developing AI applications (e.g., search and NLP) and data science tools (e.g. application diagnostics and bug reporting). We found that various Microsoft teams have united this workflow into preexisting, well-evolved, Agile-like software engineering processes, providing insights about several essential engineering challenges that organizations may face in creating large-scale AI solutions for the marketplace. We collected some best practices from Microsoft teams to address these challenges. In addition, we have identified three aspects of the AI domain that make it fundamentally different from prior software application domains: 1) discovering, managing, and versioning the data needed for machine learning applications is much more complex and difficult than other types of software engineering, 2) model customization and model reuse require very different skills than are typically found in software teams, and 3) AI components are more difficult to handle as distinct modules than traditional software components \textemdash{} models may be ``entangled'' in complex ways and experience non-monotonic error behavior. We believe that the lessons learned by Microsoft teams will be valuable to other organizations.},
  eventtitle = {2019 {{IEEE}}/{{ACM}} 41st {{International Conference}} on {{Software Engineering}}: {{Software Engineering}} in {{Practice}} ({{ICSE}}-{{SEIP}})},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/07-zotero-attachments/IEEE/Amershi_2019_Software Engineering for Machine Learning.pdf},
  isbn = {978-1-72811-760-7},
  langid = {english}
}

@inproceedings{bangash2019whatdevelopersknow,
  title = {What Do {{Developers Know About Machine Learning}}: {{A Study}} of {{ML Discussions}} on {{StackOverflow}}},
  shorttitle = {What Do {{Developers Know About Machine Learning}}},
  booktitle = {2019 {{IEEE}}/{{ACM}} 16th {{International Conference}} on {{Mining Software Repositories}} ({{MSR}})},
  author = {Bangash, Abdul Ali and Sahar, Hareem and Chowdhury, Shaiful and Wong, Alexander William and Hindle, Abram and Ali, Karim},
  date = {2019-05},
  pages = {260--264},
  publisher = {{IEEE}},
  location = {{Montreal, QC, Canada}},
  doi = {10.1109/MSR.2019.00052},
  abstract = {Machine learning, a branch of Artificial Intelligence, is now popular in software engineering community and is successfully used for problems like bug prediction, and software development effort estimation. Developers' understanding of machine learning, however, is not clear, and we require investigation to understand what educators should focus on, and how different online programming discussion communities can be more helpful. We conduct a study on Stack Overflow (SO) machine learning related posts using the SOTorrent dataset. We found that some machine learning topics are significantly more discussed than others, and others need more attention. We also found that topic generation with Latent Dirichlet Allocation (LDA) can suggest more appropriate tags that can make a machine learning post more visible and thus can help in receiving immediate feedback from sites like SO.},
  eventtitle = {2019 {{IEEE}}/{{ACM}} 16th {{International Conference}} on {{Mining Software Repositories}} ({{MSR}})},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/07-zotero-attachments/IEEE/Bangash_2019_What do Developers Know About Machine Learning.pdf},
  isbn = {978-1-72813-412-3},
  langid = {english}
}

@article{borges2016understandingfactorsthat,
  title = {Understanding the {{Factors}} That {{Impact}} the {{Popularity}} of {{GitHub Repositories}}},
  author = {Borges, Hudson and Hora, Andre and Valente, Marco Tulio},
  date = {2016-10},
  journaltitle = {2016 IEEE International Conference on Software Maintenance and Evolution (ICSME)},
  pages = {334--344},
  doi = {10.1109/ICSME.2016.31},
  abstract = {Software popularity is a valuable information to modern open source developers, who constantly want to know if their systems are attracting new users, if new releases are gaining acceptance, or if they are meeting user's expectations. In this paper, we describe a study on the popularity of software systems hosted at GitHub, which is the world's largest collection of open source software. GitHub provides an explicit way for users to manifest their satisfaction with a hosted repository: the stargazers button. In our study, we reveal the main factors that impact the number of stars of GitHub projects, including programming language and application domain. We also study the impact of new features on project popularity. Finally, we identify four main patterns of popularity growth, which are derived after clustering the time series representing the number of stars of 2,279 popular GitHub repositories. We hope our results provide valuable insights to developers and maintainers, which could help them on building and evolving systems in a competitive software market.},
  archiveprefix = {arXiv},
  eprint = {1606.04984},
  eprinttype = {arxiv},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/07-zotero-attachments/undefined/Borges_2016_Understanding the Factors that Impact the Popularity of GitHub Repositories.pdf},
  keywords = {Computer Science - Social and Information Networks,Computer Science - Software Engineering},
  langid = {english}
}

@online{bujokas2020textclassificationusing,
  title = {Text Classification Using Word Embeddings and Deep Learning in Python \textemdash{} Classifying Tweets From\ldots},
  author = {Bujokas, Eligijus},
  date = {2020-03-16T04:26:03},
  url = {https://medium.com/analytics-vidhya/text-classification-using-word-embeddings-and-deep-learning-in-python-classifying-tweets-from-6fe644fcfc81},
  urldate = {2021-05-21},
  abstract = {The purpose of this article is to help a reader understand how to leverage word embeddings and deep learning when creating a text\ldots},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/06-zotero/storage/BDS956UP/text-classification-using-word-embeddings-and-deep-learning-in-python-classifying-tweets-from-6.html},
  langid = {english},
  organization = {{Medium}}
}

@inproceedings{chaparro2017detectingmissinginformation,
  title = {Detecting Missing Information in Bug Descriptions},
  booktitle = {Proceedings of the 2017 11th {{Joint Meeting}} on {{Foundations}} of {{Software Engineering}}},
  author = {Chaparro, Oscar and Lu, Jing and Zampetti, Fiorella and Moreno, Laura and Di Penta, Massimiliano and Marcus, Andrian and Bavota, Gabriele and Ng, Vincent},
  date = {2017-08-21},
  pages = {396--407},
  publisher = {{ACM}},
  location = {{Paderborn Germany}},
  doi = {10.1145/3106237.3106285},
  abstract = {Bug reports document unexpected software behaviors experienced by users. To be effective, they should allow bug triagers to easily understand and reproduce the potential reported bugs, by clearly describing the Observed Behavior (OB), the Steps to Reproduce (S2R), and the Expected Behavior (EB). Unfortunately, while considered extremely useful, reporters often miss such pieces of information in bug reports and, to date, there is no effective way to automatically check and enforce their presence. We manually analyzed nearly 3k bug reports to understand to what extent OB, EB, and S2R are reported in bug reports and what discourse patterns reporters use to describe such information. We found that (i) while most reports contain OB (i.e., 93.5\%), only 35.2\% and 51.4\% explicitly describe EB and S2R, respectively; and (ii) reporters recurrently use 154 discourse patterns to describe such content. Based on these findings, we designed and evaluated an automated approach to detect the absence (or presence) of EB and S2R in bug descriptions. With its best setting, our approach is able to detect missing EB (S2R) with 85.9\% (69.2\%) average precision and 93.2\% (83\%) average recall. Our approach intends to improve bug descriptions quality by alerting reporters about missing EB and S2R at reporting time.},
  eventtitle = {{{ESEC}}/{{FSE}}'17: {{Joint Meeting}} of the {{European Software Engineering Conference}} and the {{ACM SIGSOFT Symposium}} on the {{Foundations}} of {{Software Engineering}}},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/07-zotero-attachments/ACM/Chaparro_2017_Detecting missing information in bug descriptions.pdf},
  isbn = {978-1-4503-5105-8},
  langid = {english}
}

@article{deboom2016representationlearningvery,
  title = {Representation Learning for Very Short Texts Using Weighted Word Embedding Aggregation},
  author = {De Boom, Cedric and Van Canneyt, Steven and Demeester, Thomas and Dhoedt, Bart},
  date = {2016-09},
  journaltitle = {Pattern Recognition Letters},
  shortjournal = {Pattern Recognition Letters},
  volume = {80},
  pages = {150--156},
  issn = {01678655},
  doi = {10.1016/j.patrec.2016.06.012},
  abstract = {Short text messages such as tweets are very noisy and sparse in their use of vocabulary. Traditional textual representations, such as tf-idf, have difficulty grasping the semantic meaning of such texts, which is important in applications such as event detection, opinion mining, news recommendation, etc. We constructed a method based on semantic word embeddings and frequency information to arrive at low-dimensional representations for short texts designed to capture semantic similarity. For this purpose we designed a weight-based model and a learning procedure based on a novel median-based loss function. This paper discusses the details of our model and the optimization methods, together with the experimental results on both Wikipedia and Twitter data. We find that our method outperforms the baseline approaches in the experiments, and that it generalizes well on different word embeddings without retraining. Our method is therefore capable of retaining most of the semantic information in the text, and is applicable out-of-the-box.},
  archiveprefix = {arXiv},
  eprint = {1607.00570},
  eprinttype = {arxiv},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/07-zotero-attachments/undefined/De Boom_2016_Representation learning for very short texts using weighted word embedding.pdf},
  keywords = {Computer Science - Computation and Language,Computer Science - Information Retrieval},
  langid = {english}
}

@article{fan2021whatmakespopular,
  title = {What Makes a Popular Academic {{AI}} Repository?},
  author = {Fan, Yuanrui and Xia, Xin and Lo, David and Hassan, Ahmed E. and Li, Shanping},
  date = {2021-01},
  journaltitle = {Empirical Software Engineering},
  shortjournal = {Empir Software Eng},
  volume = {26},
  pages = {2},
  issn = {1382-3256, 1573-7616},
  doi = {10.1007/s10664-020-09916-6},
  abstract = {Many AI researchers are publishing code, data and other resources that accompany their papers in GitHub repositories. In this paper, we refer to these repositories as academic AI repositories. Our preliminary study shows that highly cited papers are more likely to have popular academic AI repositories (and vice versa). Hence, in this study, we perform an empirical study on academic AI repositories to highlight good software engineering practices of popular academic AI repositories for AI researchers. We collect 1,149 academic AI repositories, in which we label the top 20\% repositories that have the most number of stars as popular, and we label the bottom 70\% repositories as unpopular. The remaining 10\% repositories are set as a gap between popular and unpopular academic AI repositories. We propose 21 features to characterize the software engineering practices of academic AI repositories. Our experimental results show that popular and unpopular academic AI repositories are statistically significantly different in 11 of the studied features\textemdash indicating that the two groups of repositories have significantly different software engineering practices. Furthermore, we find that the number of links to other GitHub repositories in the README file, the number of images in the README file and the inclusion of a license are the most important features for differentiating the two groups of academic AI repositories. Our dataset and code are made publicly available to share with the community.},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/07-zotero-attachments/undefined/Fan_2021_What makes a popular academic AI repository.pdf},
  langid = {english},
  number = {1}
}

@book{geron2019handsonmachinelearning,
  title = {Hands-{{On Machine Learning}} with {{Scikit}}-{{Learn}}, {{Keras}}, and {{TensorFlow}}: {{Concepts}}, {{Tools}}, and {{Techniques}} to {{Build Intelligent Systems}}},
  shorttitle = {Hands-{{On Machine Learning}} with {{Scikit}}-{{Learn}}, {{Keras}}, and {{TensorFlow}}},
  author = {G\'eron, Aur\'elien},
  date = {2019-09-05},
  publisher = {{"O'Reilly Media, Inc."}},
  abstract = {Through a series of recent breakthroughs, deep learning has boosted the entire field of machine learning. Now, even programmers who know close to nothing about this technology can use simple, efficient tools to implement programs capable of learning from data. This practical book shows you how.By using concrete examples, minimal theory, and two production-ready Python frameworks\textemdash Scikit-Learn and TensorFlow\textemdash author Aur\'elien G\'eron helps you gain an intuitive understanding of the concepts and tools for building intelligent systems. You'll learn a range of techniques, starting with simple linear regression and progressing to deep neural networks. With exercises in each chapter to help you apply what you've learned, all you need is programming experience to get started.Explore the machine learning landscape, particularly neural netsUse Scikit-Learn to track an example machine-learning project end-to-endExplore several training models, including support vector machines, decision trees, random forests, and ensemble methodsUse the TensorFlow library to build and train neural netsDive into neural net architectures, including convolutional nets, recurrent nets, and deep reinforcement learningLearn techniques for training and scaling deep neural nets},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/07-zotero-attachments/O'Reilly Media, Inc./Geron_2019_Hands-On Machine Learning with Scikit-Learn, Keras, and TensorFlow.pdf},
  isbn = {978-1-4920-3259-5},
  keywords = {Computers / Computer Vision & Pattern Recognition,Computers / Data Processing,Computers / Intelligence (AI) & Semantics,Computers / Natural Language Processing,Computers / Neural Networks,Computers / Programming Languages / Python},
  langid = {english},
  pagetotal = {921}
}

@inproceedings{gonzalez2020statemluniverse10,
  title = {The {{State}} of the {{ML}}-Universe: 10 {{Years}} of {{Artificial Intelligence}} \& {{Machine Learning Software Development}} on {{GitHub}}},
  shorttitle = {The {{State}} of the {{ML}}-Universe},
  booktitle = {Proceedings of the 17th {{International Conference}} on {{Mining Software Repositories}}},
  author = {Gonzalez, Danielle and Zimmermann, Thomas and Nagappan, Nachiappan},
  date = {2020-06-29},
  pages = {431--442},
  publisher = {{ACM}},
  location = {{Seoul Republic of Korea}},
  doi = {10.1145/3379597.3387473},
  abstract = {In the last few years, artificial intelligence (AI) and machine learning (ML) have become ubiquitous terms. These powerful techniques have escaped obscurity in academic communities with the recent onslaught of AI \& ML tools, frameworks, and libraries that make these techniques accessible to a wider audience of developers. As a result, applying AI \& ML to solve existing and emergent problems is an increasingly popular practice. However, little is known about this domain from the software engineering perspective. Many AI \& ML tools and applications are open source, hosted on platforms such as GitHub that provide rich tools for large-scale distributed software development. Despite widespread use and popularity, these repositories have never been examined as a community to identify unique properties, development patterns, and trends.},
  eventtitle = {{{MSR}} '20: 17th {{International Conference}} on {{Mining Software Repositories}}},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/07-zotero-attachments/ACM/Gonzalez_2020_The State of the ML-universe.pdf},
  isbn = {978-1-4503-7517-7},
  langid = {english}
}

@inproceedings{grichi2020impactmultilanguagedevelopment,
  title = {On the {{Impact}} of {{Multi}}-Language {{Development}} in {{Machine Learning Frameworks}}},
  booktitle = {2020 {{IEEE International Conference}} on {{Software Maintenance}} and {{Evolution}} ({{ICSME}})},
  author = {Grichi, Manel and Eghan, Ellis E. and Adams, Bram},
  date = {2020-09},
  pages = {546--556},
  publisher = {{IEEE}},
  location = {{Adelaide, Australia}},
  doi = {10.1109/ICSME46990.2020.00058},
  abstract = {The role of machine learning frameworks in soft\- ware applications has exploded in recent years. Similar to non-machine learning frameworks, those frameworks need to evolve to incorporate new features, optimizations, etc., yet their evolution is impacted by the interdisciplinary development teams needed to develop them: scientists and developers. One concrete way in which this shows is through the use of multiple pro\- gramming languages in their code base, enabling the scientists to write optimized low-level code while developers can integrate the latter into a robust framework. Since multi-language code bases have been shown to impact the development process, this paper empirically compares ten large open-source multi-language machine learning frameworks and ten large open-source multi\- language traditional systems in terms of the volume of pull requests, their acceptance ratio i.e., the percentage of accepted pull requests among all the received pull requests, review process duration i.e., period taken to accept or reject a pull request, and bug-proneness. We find that multi-language pull request contributions present a challenge for both machine learning and traditional systems. Our main findings show that in both machine learning and traditional systems, multi-language pull requests are likely to be less accepted than mono-language pull requests; it also takes longer for both multi- and mono-language pull requests to be rejected than accepted. Machine learning frameworks take longer to accept/reject a multi-language pull request than traditional systems. Finally, we find that mono\- language pull requests in machine learning frameworks are more bug-prone than traditional systems.},
  eventtitle = {2020 {{IEEE International Conference}} on {{Software Maintenance}} and {{Evolution}} ({{ICSME}})},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/07-zotero-attachments/IEEE/Grichi_2020_On the Impact of Multi-language Development in Machine Learning Frameworks.pdf},
  isbn = {978-1-72815-619-4},
  langid = {english}
}

@inproceedings{han2020empiricalstudydependency,
  title = {An {{Empirical Study}} of the {{Dependency Networks}} of {{Deep Learning Libraries}}},
  booktitle = {2020 {{IEEE International Conference}} on {{Software Maintenance}} and {{Evolution}} ({{ICSME}})},
  author = {Han, Junxiao and Deng, Shuiguang and Lo, David and Zhi, Chen and Yin, Jianwei and Xia, Xin},
  date = {2020-09},
  pages = {868--878},
  publisher = {{IEEE}},
  location = {{Adelaide, Australia}},
  doi = {10.1109/ICSME46990.2020.00116},
  abstract = {Deep Learning techniques have been prevalent in various domains, and more and more open source projects in GitHub rely on deep learning libraries to implement their algorithms. To that end, they should always keep pace with the latest versions of deep learning libraries to make the best use of deep learning libraries. Aptly managing the versions of deep learning libraries can help projects avoid crashes or security issues caused by deep learning libraries. Unfortunately, very few studies have been done on the dependency networks of deep learning libraries. In this paper, we take the first step to perform an exploratory study on the dependency networks of deep learning libraries, namely, Tensorflow, PyTorch, and Theano. We study the project purposes, application domains, dependency degrees, update behaviors and reasons as well as version distributions of deep learning projects that depend on Tensorflow, PyTorch, and Theano. Our study unveils some commonalities in various aspects (e.g., purposes, application domains, dependency degrees) of deep learning libraries and reveals some discrepancies as for the update behaviors, update reasons, and the version distributions. Our findings highlight some directions for researchers and also provide suggestions for deep learning developers and users.},
  eventtitle = {2020 {{IEEE International Conference}} on {{Software Maintenance}} and {{Evolution}} ({{ICSME}})},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/07-zotero-attachments/IEEE/Han_2020_An Empirical Study of the Dependency Networks of Deep Learning Libraries2.pdf},
  isbn = {978-1-72815-619-4},
  langid = {english}
}

@inproceedings{han2020empiricalstudydependencya,
  title = {An {{Empirical Study}} of the {{Dependency Networks}} of {{Deep Learning Libraries}}},
  booktitle = {2020 {{IEEE International Conference}} on {{Software Maintenance}} and {{Evolution}} ({{ICSME}})},
  author = {Han, Junxiao and Deng, Shuiguang and Lo, David and Zhi, Chen and Yin, Jianwei and Xia, Xin},
  date = {2020-09},
  pages = {868--878},
  publisher = {{IEEE}},
  location = {{Adelaide, Australia}},
  doi = {10.1109/ICSME46990.2020.00116},
  abstract = {Deep Learning techniques have been prevalent in various domains, and more and more open source projects in GitHub rely on deep learning libraries to implement their algorithms. To that end, they should always keep pace with the latest versions of deep learning libraries to make the best use of deep learning libraries. Aptly managing the versions of deep learning libraries can help projects avoid crashes or security issues caused by deep learning libraries. Unfortunately, very few studies have been done on the dependency networks of deep learning libraries. In this paper, we take the first step to perform an exploratory study on the dependency networks of deep learning libraries, namely, Tensorflow, PyTorch, and Theano. We study the project purposes, application domains, dependency degrees, update behaviors and reasons as well as version distributions of deep learning projects that depend on Tensorflow, PyTorch, and Theano. Our study unveils some commonalities in various aspects (e.g., purposes, application domains, dependency degrees) of deep learning libraries and reveals some discrepancies as for the update behaviors, update reasons, and the version distributions. Our findings highlight some directions for researchers and also provide suggestions for deep learning developers and users.},
  eventtitle = {2020 {{IEEE International Conference}} on {{Software Maintenance}} and {{Evolution}} ({{ICSME}})},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/07-zotero-attachments/IEEE/Han_2020_An Empirical Study of the Dependency Networks of Deep Learning Libraries.pdf},
  isbn = {978-1-72815-619-4},
  langid = {english}
}

@article{han2020whatprogrammersdiscuss,
  title = {What Do {{Programmers Discuss}} about {{Deep Learning Frameworks}}},
  author = {Han, Junxiao and Shihab, Emad and Wan, Zhiyuan and Deng, Shuiguang and Xia, Xin},
  date = {2020-07},
  journaltitle = {Empirical Software Engineering},
  shortjournal = {Empir Software Eng},
  volume = {25},
  pages = {2694--2747},
  issn = {1382-3256, 1573-7616},
  doi = {10.1007/s10664-020-09819-6},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/07-zotero-attachments/undefined/Han_2020_What do Programmers Discuss about Deep Learning Frameworks.pdf},
  langid = {english},
  number = {4}
}

@inproceedings{hassan2009predictingfaultsusing,
  title = {Predicting Faults Using the Complexity of Code Changes},
  booktitle = {2009 {{IEEE}} 31st {{International Conference}} on {{Software Engineering}}},
  author = {Hassan, Ahmed E.},
  date = {2009},
  pages = {78--88},
  publisher = {{IEEE}},
  location = {{Vancouver, BC, Canada}},
  doi = {10.1109/ICSE.2009.5070510},
  abstract = {Predicting the incidence of faults in code has been commonly associated with measuring complexity. In this paper, we propose complexity metrics that are based on the code change process instead of on the code. We conjecture that a complex code change process negatively affects its product, i.e., the software system. We validate our hypothesis empirically through a case study using data derived from the change history for six large open source projects. Our case study shows that our change complexity metrics are better predictors of fault potential in comparison to other well-known historical predictors of faults, i.e., prior modifications and prior faults.},
  eventtitle = {2009 {{IEEE}} 31st {{International Conference}} on {{Software Engineering}}},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/07-zotero-attachments/IEEE/Hassan_2009_Predicting faults using the complexity of code changes.pdf},
  isbn = {978-1-4244-3453-4},
  langid = {english}
}

@online{humbatova-2019-taxonomyrealfaults,
  title = {Taxonomy of {{Real Faults}} in {{Deep Learning Systems}}},
  author = {Humbatova, Nargiz and Jahangirova, Gunel and Bavota, Gabriele and Riccio, Vincenzo and Stocco, Andrea and Tonella, Paolo},
  date = {2019-11-07},
  url = {http://arxiv.org/abs/1910.11015},
  urldate = {2021-03-17},
  abstract = {The growing application of deep neural networks in safety-critical domains makes the analysis of faults that occur in such systems of enormous importance. In this paper we introduce a large taxonomy of faults in deep learning (DL) systems. We have manually analysed 1059 artefacts gathered from GitHub commits and issues of projects that use the most popular DL frameworks (TensorFlow, Keras and PyTorch) and from related Stack Over ow posts. Structured interviews with 20 researchers and practitioners describing the problems they have encountered in their experience have enriched our taxonomy with a variety of additional faults that did not emerge from the other two sources. Our nal taxonomy was validated with a survey involving an additional set of 21 developers, con rming that almost all fault categories (13/15) were experienced by at least 50\% of the survey participants.},
  archiveprefix = {arXiv},
  eprint = {1910.11015},
  eprinttype = {arxiv},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/07-zotero-attachments/undefined/Humbatova_2019_Taxonomy of Real Faults in Deep Learning Systems.pdf},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning,Computer Science - Software Engineering},
  langid = {english},
  primaryclass = {cs}
}

@article{liu2021exploratorystudyintroduction,
  title = {An {{Exploratory Study}} on the {{Introduction}} and {{Removal}} of {{Different Types}} of {{Technical Debt}}},
  author = {Liu, Jiakun and Huang, Qiao and Xia, Xin and Shihab, Emad and Lo, David and Li, Shanping},
  date = {2021-03},
  journaltitle = {Empirical Software Engineering},
  shortjournal = {Empir Software Eng},
  volume = {26},
  pages = {16},
  issn = {1382-3256, 1573-7616},
  doi = {10.1007/s10664-020-09917-5},
  abstract = {To complete tasks faster, developers often have to sacrifice the quality of the software. Such compromised practice results in the increasing burden to developers in future development. The metaphor, technical debt, describes such practice. Prior research has illustrated the negative impact of technical debt, and many researchers investigated how developers deal with a certain type of technical debt. However, few studies focused on the removal of different types of technical debt in practice.},
  archiveprefix = {arXiv},
  eprint = {2101.03730},
  eprinttype = {arxiv},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/06-zotero/storage/FB78NEAM/2101.03730.pdf},
  keywords = {Computer Science - Software Engineering},
  langid = {english},
  number = {2}
}

@online{naturallanguagetoolkit,
  title = {Natural {{Language Toolkit}} \textemdash{} {{NLTK}} 3.5 Documentation},
  url = {https://www.nltk.org/},
  urldate = {2021-03-30},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/06-zotero/storage/VKI2452L/www.nltk.org.html}
}

@online{navlanilatentsemanticindexing,
  title = {Latent {{Semantic Indexing}} Using {{Scikit}}-{{Learn}}},
  author = {Navlani, Avinash},
  url = {https://machinelearninggeek.com/latent-semantic-indexing-using-scikit-learn/},
  urldate = {2021-05-17},
  abstract = {In this tutorial, we will focus on Latent Semantic Indexing or Latent Semantic Analysis and perform topic modeling using Scikit-learn.},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/06-zotero/storage/MB9PJVXP/latent-semantic-indexing-using-scikit-learn.html},
  langid = {american}
}

@article{scalabrino2019listeningcrowdrelease,
  title = {Listening to the {{Crowd}} for the {{Release Planning}} of {{Mobile Apps}}},
  author = {Scalabrino, Simone and Russo, Barbara and Oliveto, Rocco},
  date = {2019},
  journaltitle = {IEEE TRANSACTIONS ON SOFTWARE ENGINEERING},
  volume = {45},
  pages = {19},
  abstract = {The market for mobile apps is getting bigger and bigger, and it is expected to be worth over 100 Billion dollars in 2020. To have a chance to succeed in such a competitive environment, developers need to build and maintain high-quality apps, continuously astonishing their users with the coolest new features. Mobile app marketplaces allow users to release reviews. Despite reviews are aimed at recommending apps among users, they also contain precious information for developers, reporting bugs and suggesting new features. To exploit such a source of information, developers are supposed to manually read user reviews, something not doable when hundreds of them are collected per day. To help developers dealing with such a task, we developed CLAP (Crowd Listener for releAse Planning), a web application able to (i) categorize user reviews based on the information they carry out, (ii) cluster together related reviews, and (iii) prioritize the clusters of reviews to be implemented when planning the subsequent app release. We evaluated all the steps behind CLAP, showing its high accuracy in categorizing and clustering reviews and the meaningfulness of the recommended prioritizations. Also, given the availability of CLAP as a working tool, we assessed its applicability in industrial environments.},
  file = {/home/norangebit/Documenti/10-personal/12-organizzation/07-zotero-attachments/undefined/Scalabrino_2019_Listening to the Crowd for the Release Planning of Mobile Apps.pdf},
  langid = {english},
  number = {1}
}