by Hannes Thaller, Lukas Linsbauer, Alexander Egyed
Abstract:
Semantic clones are program components with sim-ilar behavior, but different textual representation. Semanticsimilarity is hard to detect, and semantic clone detection isstill an open issue. We present semantic clone detection viaProbabilistic Software Modeling (PSM) as a robust method fordetecting semantically equivalent methods. PSM inspects thestructure and runtime behavior of a program and synthesizes anetwork of Probabilistic Models (PMs). Each PM in the networkrepresents a method in the program and is capable of generatingand evaluating runtime events. We leverage these capabilities toaccurately find semantic clones. Results show that the approachcan detect semantic clones in the complete absence of syntacticsimilarity with high precision and low error rates.Index Terms—clone detection, semantic clone detection, prob-abilistic modeling, multivariate testing, software modeling, staticcode analysis, dynamic code analysis, runtime monitoring, infer-ence, simulation, deep learning
Reference:
Towards Semantic Clone Detection via Probabilistic Software Modeling (Hannes Thaller, Lukas Linsbauer, Alexander Egyed), IEEE, 2020.
Bibtex Entry:
@Workshop{DBLP:conf/iwsc/ThallerLE20,
author = {Hannes Thaller and Lukas Linsbauer and Alexander Egyed},
booktitle = {14th International Workshop on Software Clones, IWSC 2020, London, ON, Canada},
title = {Towards Semantic Clone Detection via Probabilistic Software Modeling},
year = {2020},
pages = {64--69},
publisher = {{IEEE}},
abstract = {Semantic clones are program components with sim-ilar behavior, but different textual representation. Semanticsimilarity is hard to detect, and semantic clone detection isstill an open issue. We present semantic clone detection viaProbabilistic Software Modeling (PSM) as a robust method fordetecting semantically equivalent methods. PSM inspects thestructure and runtime behavior of a program and synthesizes anetwork of Probabilistic Models (PMs). Each PM in the networkrepresents a method in the program and is capable of generatingand evaluating runtime events. We leverage these capabilities toaccurately find semantic clones. Results show that the approachcan detect semantic clones in the complete absence of syntacticsimilarity with high precision and low error rates.Index Terms—clone detection, semantic clone detection, prob-abilistic modeling, multivariate testing, software modeling, staticcode analysis, dynamic code analysis, runtime monitoring, infer-ence, simulation, deep learning},
bibsource = {dblp computer science bibliography, https://dblp.org},
biburl = {https://dblp.org/rec/conf/iwsc/ThallerLE20.bib},
doi = {10.1109/IWSC50091.2020.9047635},
file = {:Conferences/IWSC 2020 - Towards Semantic Clone Detection via Probabilistic Software Modeling/Towards Semantic Clone Detectionvia Probabilistic Software Modeling-preprint.pdf:PDF},
keywords = {FWF P25513, SCCH},
timestamp = {Mon, 30 Mar 2020 12:30:48 +0200},
url = {https://doi.org/10.1109/IWSC50091.2020.9047635},
}