aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Litteken <andrew.litteken@gmail.com>2020-09-17 15:43:40 -0500
committerAndrew Litteken <andrew.litteken@gmail.com>2021-03-20 16:47:50 -0500
commit5155dff2784a47583d432d796b7cf47a0bed9f20 (patch)
treed99e6c88a2bb2b1360dbb54485c056f3b22ab7a7
parent14696baaf4c43fe53f738bc292bbe169eed93d5d (diff)
downloadllvm-5155dff2784a47583d432d796b7cf47a0bed9f20.zip
llvm-5155dff2784a47583d432d796b7cf47a0bed9f20.tar.gz
llvm-5155dff2784a47583d432d796b7cf47a0bed9f20.tar.bz2
[IRSim] Adding basic implementation of llvm-sim.
This is a similarity visualization tool that accepts a Module and passes it to the IRSimilarityIdentifier. The resulting SimilarityGroups are output in a JSON file. Tests are found in test/tools/llvm-sim and check for the file not found, a bad module, and that the JSON is created correctly. Reviewers: paquette, jroelofs, MaskRay Recommit of: 15645d044bcfe2a0f63156048b302f997a717688 to fix linking errors. Differential Revision: https://reviews.llvm.org/D86974
-rw-r--r--llvm/test/CMakeLists.txt1
-rw-r--r--llvm/test/lit.cfg.py2
-rw-r--r--llvm/test/tools/llvm-sim/Inputs/sim1.ll27
-rw-r--r--llvm/test/tools/llvm-sim/fail-cases.test8
-rw-r--r--llvm/test/tools/llvm-sim/single-sim-file.test57
-rw-r--r--llvm/test/tools/llvm-sim/single-sim.test56
-rw-r--r--llvm/tools/llvm-sim/CMakeLists.txt9
-rw-r--r--llvm/tools/llvm-sim/llvm-sim.cpp149
8 files changed, 308 insertions, 1 deletions
diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index 7c4fa2e..0c72adc 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -109,6 +109,7 @@ set(LLVM_TEST_DEPENDS
llvm-readelf
llvm-reduce
llvm-rtdyld
+ llvm-sim
llvm-size
llvm-split
llvm-strings
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 2a1ccc2..244d69e 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -162,7 +162,7 @@ tools.extend([
'llvm-link', 'llvm-lto', 'llvm-lto2', 'llvm-mc', 'llvm-mca',
'llvm-modextract', 'llvm-nm', 'llvm-objcopy', 'llvm-objdump',
'llvm-pdbutil', 'llvm-profdata', 'llvm-ranlib', 'llvm-rc', 'llvm-readelf',
- 'llvm-readobj', 'llvm-rtdyld', 'llvm-size', 'llvm-split', 'llvm-strings',
+ 'llvm-readobj', 'llvm-rtdyld', 'llvm-sim', 'llvm-size', 'llvm-split', 'llvm-strings',
'llvm-strip', 'llvm-tblgen', 'llvm-undname', 'llvm-c-test', 'llvm-cxxfilt',
'llvm-xray', 'yaml2obj', 'obj2yaml', 'yaml-bench', 'verify-uselistorder',
'bugpoint', 'llc', 'llvm-symbolizer', 'opt', 'sancov', 'sanstats'])
diff --git a/llvm/test/tools/llvm-sim/Inputs/sim1.ll b/llvm/test/tools/llvm-sim/Inputs/sim1.ll
new file mode 100644
index 0000000..facc27d
--- /dev/null
+++ b/llvm/test/tools/llvm-sim/Inputs/sim1.ll
@@ -0,0 +1,27 @@
+define void @similar_func1() {
+entry:
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ store i32 2, i32* %a, align 4
+ store i32 3, i32* %b, align 4
+ store i32 4, i32* %c, align 4
+ %al = load i32, i32* %a
+ %bl = load i32, i32* %b
+ %cl = load i32, i32* %c
+ ret void
+}
+
+define void @similar_func2() {
+entry:
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ store i32 2, i32* %a, align 4
+ store i32 3, i32* %b, align 4
+ store i32 4, i32* %c, align 4
+ %al = load i32, i32* %a
+ %bl = load i32, i32* %b
+ %cl = load i32, i32* %c
+ ret void
+}
diff --git a/llvm/test/tools/llvm-sim/fail-cases.test b/llvm/test/tools/llvm-sim/fail-cases.test
new file mode 100644
index 0000000..41e3a56
--- /dev/null
+++ b/llvm/test/tools/llvm-sim/fail-cases.test
@@ -0,0 +1,8 @@
+# RUN: not llvm-sim %s 2>&1 | FileCheck %s
+# RUN: not llvm-sim %s.2 2>&1 | FileCheck %s --check-prefix=EXIST
+
+# File reading error messaging tests.
+
+# CHECK: error: expected top-level entity
+
+# EXIST: error: Could not open input file: No such file or directory
diff --git a/llvm/test/tools/llvm-sim/single-sim-file.test b/llvm/test/tools/llvm-sim/single-sim-file.test
new file mode 100644
index 0000000..5e45edf
--- /dev/null
+++ b/llvm/test/tools/llvm-sim/single-sim-file.test
@@ -0,0 +1,57 @@
+# RUN: llvm-sim -o %t %S/Inputs/sim1.ll
+# RUN: FileCheck %s < %t
+
+# Checking the output of a single module test.
+
+# CHECK: {
+# CHECK-NEXT: "1": [
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 8,
+# CHECK-NEXT: "end": 9
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 18,
+# CHECK-NEXT: "end": 19
+# CHECK-NEXT: }
+# CHECK-NEXT: ],
+# CHECK-NEXT: "2": [
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 7,
+# CHECK-NEXT: "end": 9
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 17,
+# CHECK-NEXT: "end": 19
+# CHECK-NEXT: }
+# CHECK-NEXT: ],
+# CHECK-NEXT: "3": [
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 6,
+# CHECK-NEXT: "end": 9
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 16,
+# CHECK-NEXT: "end": 19
+# CHECK-NEXT: }
+# CHECK-NEXT: ],
+# CHECK-NEXT: "4": [
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 5,
+# CHECK-NEXT: "end": 9
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 15,
+# CHECK-NEXT: "end": 19
+# CHECK-NEXT: }
+# CHECK-NEXT: ],
+# CHECK-NEXT: "5": [
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 4,
+# CHECK-NEXT: "end": 9
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 14,
+# CHECK-NEXT: "end": 19
+# CHECK-NEXT: }
+# CHECK-NEXT: ]
+# CHECK-NEXT: }
diff --git a/llvm/test/tools/llvm-sim/single-sim.test b/llvm/test/tools/llvm-sim/single-sim.test
new file mode 100644
index 0000000..4e04682
--- /dev/null
+++ b/llvm/test/tools/llvm-sim/single-sim.test
@@ -0,0 +1,56 @@
+# RUN: llvm-sim -o - %S/Inputs/sim1.ll | FileCheck %s
+
+# Checking the output of a single module test.
+
+# CHECK: {
+# CHECK-NEXT: "1": [
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 8,
+# CHECK-NEXT: "end": 9
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 18,
+# CHECK-NEXT: "end": 19
+# CHECK-NEXT: }
+# CHECK-NEXT: ],
+# CHECK-NEXT: "2": [
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 7,
+# CHECK-NEXT: "end": 9
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 17,
+# CHECK-NEXT: "end": 19
+# CHECK-NEXT: }
+# CHECK-NEXT: ],
+# CHECK-NEXT: "3": [
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 6,
+# CHECK-NEXT: "end": 9
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 16,
+# CHECK-NEXT: "end": 19
+# CHECK-NEXT: }
+# CHECK-NEXT: ],
+# CHECK-NEXT: "4": [
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 5,
+# CHECK-NEXT: "end": 9
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 15,
+# CHECK-NEXT: "end": 19
+# CHECK-NEXT: }
+# CHECK-NEXT: ],
+# CHECK-NEXT: "5": [
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 4,
+# CHECK-NEXT: "end": 9
+# CHECK-NEXT: },
+# CHECK-NEXT: {
+# CHECK-NEXT: "start": 14,
+# CHECK-NEXT: "end": 19
+# CHECK-NEXT: }
+# CHECK-NEXT: ]
+# CHECK-NEXT: }
diff --git a/llvm/tools/llvm-sim/CMakeLists.txt b/llvm/tools/llvm-sim/CMakeLists.txt
new file mode 100644
index 0000000..7629905
--- /dev/null
+++ b/llvm/tools/llvm-sim/CMakeLists.txt
@@ -0,0 +1,9 @@
+set(LLVM_LINK_COMPONENTS
+ Core
+ Support
+ Analysis
+ IRReader)
+
+add_llvm_tool(llvm-sim
+ llvm-sim.cpp
+)
diff --git a/llvm/tools/llvm-sim/llvm-sim.cpp b/llvm/tools/llvm-sim/llvm-sim.cpp
new file mode 100644
index 0000000..26e370f
--- /dev/null
+++ b/llvm/tools/llvm-sim/llvm-sim.cpp
@@ -0,0 +1,149 @@
+//===-- llvm-sim.cpp - Find similar sections of programs -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This program finds similar sections of a Module, and exports them as a JSON
+// file.
+//
+// To find similarities contained across multiple modules, please use llvm-link
+// first to merge the modules.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IRSimilarityIdentifier.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/ToolOutputFile.h"
+
+using namespace llvm;
+using namespace IRSimilarity;
+
+static cl::opt<std::string> OutputFilename("o", cl::desc("Output Filename"),
+ cl::init("-"),
+ cl::value_desc("filename"));
+
+static cl::opt<std::string> InputSourceFile(cl::Positional,
+ cl::desc("<Source file>"),
+ cl::init("-"),
+ cl::value_desc("filename"));
+
+/// Retrieve the unique number \p I was mapped to in parseBitcodeFile.
+///
+/// \param I - The Instruction to find the instruction number for.
+/// \param LLVMInstNum - The mapping of Instructions to their location in the
+/// module represented by an unsigned integer.
+/// \returns The instruction number for \p I if it exists.
+Optional<unsigned>
+getPositionInModule(const Instruction *I,
+ const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
+ assert(I && "Instruction is nullptr!");
+ DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(I);
+ if (It == LLVMInstNum.end())
+ return None;
+ return It->second;
+}
+
+/// Exports the given SimilarityGroups to a JSON file at \p FilePath.
+///
+/// \param FilePath - The path to the output location.
+/// \param SimSections - The similarity groups to process.
+/// \param LLVMInstNum - The mapping of Instructions to their location in the
+/// module represented by an unsigned integer.
+/// \returns A nonzero error code if there was a failure creating the file.
+std::error_code
+exportToFile(const StringRef FilePath,
+ const SimilarityGroupList &SimSections,
+ const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
+ std::error_code EC;
+ std::unique_ptr<ToolOutputFile> Out(
+ new ToolOutputFile(FilePath, EC, sys::fs::OF_None));
+ if (EC)
+ return EC;
+
+ json::OStream J(Out->os(), 1);
+ J.objectBegin();
+
+ unsigned SimOption = 1;
+ // Process each list of SimilarityGroups organized by the Module.
+ for (const SimilarityGroup &G : SimSections) {
+ std::string SimOptionStr = std::to_string(SimOption);
+ J.attributeBegin(SimOptionStr);
+ J.arrayBegin();
+ // For each file there is a list of the range where the similarity
+ // exists.
+ for (const IRSimilarityCandidate &C : G) {
+ Optional<unsigned> Start =
+ getPositionInModule((*C.front()).Inst, LLVMInstNum);
+ Optional<unsigned> End =
+ getPositionInModule((*C.back()).Inst, LLVMInstNum);
+
+ assert(Start.hasValue() &&
+ "Could not find instruction number for first instruction");
+ assert(End.hasValue() &&
+ "Could not find instruction number for last instruction");
+
+ J.object([&] {
+ J.attribute("start", Start.getValue());
+ J.attribute("end", End.getValue());
+ });
+ }
+ J.arrayEnd();
+ J.attributeEnd();
+ SimOption++;
+ }
+ J.objectEnd();
+
+ Out->keep();
+
+ return EC;
+}
+
+int main(int argc, const char *argv[]) {
+ InitLLVM X(argc, argv);
+
+ cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n");
+
+ LLVMContext CurrContext;
+ SMDiagnostic Err;
+ std::unique_ptr<Module> ModuleToAnalyze =
+ parseIRFile(InputSourceFile, Err, CurrContext);
+
+ if (!ModuleToAnalyze) {
+ Err.print(argv[0], errs());
+ return 1;
+ }
+
+ // Mapping from an Instruction pointer to its occurrence in a sequential
+ // list of all the Instructions in a Module.
+ DenseMap<Instruction *, unsigned> LLVMInstNum;
+
+ // We give each instruction a number, which gives us a start and end value
+ // for the beginning and end of each IRSimilarityCandidate.
+ unsigned InstructionNumber = 1;
+ for (Function &F : *ModuleToAnalyze)
+ for (BasicBlock &BB : F)
+ for (Instruction &I : BB.instructionsWithoutDebug())
+ LLVMInstNum[&I]= InstructionNumber++;
+
+ // The similarity identifier we will use to find the similar sections.
+ IRSimilarityIdentifier SimIdent;
+ SimilarityGroupList SimilaritySections =
+ SimIdent.findSimilarity(*ModuleToAnalyze);
+
+ std::error_code E =
+ exportToFile(OutputFilename, SimilaritySections, LLVMInstNum);
+ if (E) {
+ errs() << argv[0] << ": " << E.message() << '\n';
+ return 2;
+ }
+
+ return 0;
+}