Compare commits

..

3 Commits

Author SHA1 Message Date
83ca2deb0e Better branch detection model
For loops are now broken up into 2 branches: 1 (init) and 2 (cond + inc).
2021-03-07 13:02:14 +02:00
a23e5c66ab Add subscript operation recognition 2021-03-07 11:49:16 +02:00
7a852f8f94 Add function call recognition and pretty printing 2021-03-07 01:25:48 +02:00
10 changed files with 367 additions and 123 deletions

View File

@ -25,7 +25,7 @@ add_executable(op-finder
OperationFinderAstVisitor.cpp
OperationFinderAstConsumer.cpp
OperationFinderAstAction.cpp
)
OperationLog.cpp)
target_include_directories(op-finder
PRIVATE

View File

@ -11,6 +11,35 @@
using namespace clang;
using namespace clang::ast_matchers;
namespace
{
std::string ResolveTypeName(const QualType& t)
{
if (t->isTypedefNameType())
{
const TypedefType* tdt = cast<TypedefType>(t);
assert(tdt);
llvm::outs() << "Typedef type: " << t.getAsString()
<< ", underlying: " << tdt->desugar().getAsString()
<< ".";
return tdt->desugar().getAsString();
}
else if (t->isBuiltinType())
{
llvm::outs() << "Builtin type: " << t.getAsString() << ".";
return t.getAsString();
}
else
{
llvm::outs() << "Other type: " << t.getAsString() << ".";
return t.getAsString();
}
};
}
OperationFinder::OperationFinder(IOperationOutput* storage)
: _storage(storage)
{
@ -19,113 +48,123 @@ OperationFinder::OperationFinder(IOperationOutput* storage)
void OperationFinder::processArithmetic(const clang::BinaryOperator* op, const clang::SourceManager& source_manager)
{
const auto [file_name, line_number, column_number] = resolveLocations(op, source_manager);
auto [file_name, log] = _createBaseOperationLog(op, source_manager);
const std::string op_code = getOpcode(op).str();
llvm::outs() << file_name << ":"
<< line_number << ":"
<< column_number << ":"
<< "Binary arithmetic: Type: " << getOpcode(op) << " LHS: ";
llvm::outs() << "\n\tBinary arithmetic: Type: " << op_code;
OperationLog log;
log.line = line_number;
log.operation = getOpcode(op).str();
log.current_for_loops = _for_loop_stack;
log.is_fallthrough = _in_fallthrough;
log.entry_type = OperationLog::BasicOperation::TYPE_NAME;
log.entry = _createBasicOperationLogEntry(op_code, op, op->getLHS(), op->getRHS());
_processExpressionTypes(log, op, op->getLHS(), op->getRHS());
_storage->pushOperation(file_name, log);
_storage->pushOperation(file_name, std::move(log));
llvm::outs() << "\n";
}
void OperationFinder::processUnaryArithmetic(const clang::UnaryOperator* op, const clang::SourceManager& source_manager)
{
const Expr* lhs = op->getExprStmt();
auto [file_name, log] = _createBaseOperationLog(op, source_manager);
const std::string op_code = getOpcode(op).str();
assert(op);
assert(lhs);
llvm::outs() << "\n\tUnary arithmetic: Type: " << op_code;
const auto [file_name, line_number, column_number] = resolveLocations(op, source_manager);
log.entry_type = OperationLog::BasicOperation::TYPE_NAME;
log.entry = _createBasicOperationLogEntry(op_code, op, op->getExprStmt(), nullptr);
llvm::outs() << file_name << ":"
<< line_number << ":"
<< column_number << ":"
<< "Unary arithmetic: Type: " << getOpcode(op) << " LHS: ";
OperationLog log;
log.line = line_number;
log.operation = getOpcode(op).str();
log.current_for_loops = _for_loop_stack;
log.is_fallthrough = _in_fallthrough;
_processExpressionTypes(log, op, lhs, nullptr);
_storage->pushOperation(file_name, log);
_storage->pushOperation(file_name, std::move(log));
llvm::outs() << "\n";
}
void OperationFinder::fallthroughBranchEntered()
void OperationFinder::processFunctionCall(const clang::CallExpr* call, const SourceManager& source_manager)
{
_in_fallthrough = true;
const FunctionDecl* func = call->getDirectCallee();
assert(func);
auto [file_name, log] = _createBaseOperationLog(call, source_manager);
const std::string func_name = func->getNameAsString();
llvm::outs() << "\n\tFunction call: func name: " << func_name << "\n\tResult eval type: ";
const std::string res_type = ResolveTypeName(func->getType());
auto func_call = std::make_unique<OperationLog::FunctionCall>();
func_call->function_name = func_name;
func_call->call_result_type = res_type;
log.entry_type = OperationLog::FunctionCall::TYPE_NAME;
log.entry = std::move(func_call);
_storage->pushOperation(file_name, std::move(log));
llvm::outs() << "\n";
}
void OperationFinder::fallthroughBranchExited()
void OperationFinder::processArraySubscript(const clang::ArraySubscriptExpr* subscript, const clang::SourceManager& source_manager)
{
_in_fallthrough = false;
auto [file_name, log] = _createBaseOperationLog(subscript, source_manager);
llvm::outs() << "\n\tSubscript:";
log.entry_type = OperationLog::BasicOperation::TYPE_NAME;
log.entry = _createBasicOperationLogEntry("subscript", subscript, subscript->getBase(), subscript->getIdx());
_storage->pushOperation(file_name, std::move(log));
llvm::outs() << "\n";
}
void OperationFinder::forLoopEntered()
void OperationFinder::branchEntered()
{
_for_loop_stack.push_back(_next_for_loop_id);
_next_for_loop_id++;
_current_branch++;
llvm::outs() << "Branch entered: " << _current_branch << "\n";
}
void OperationFinder::forLoopExited()
void OperationFinder::branchExited()
{
_for_loop_stack.pop_back();
llvm::outs() << "Branch exited: " << _current_branch << "\n";
_current_branch--;
assert(_current_branch > -1);
}
void OperationFinder::_processExpressionTypes(OperationLog& log, const Expr* source, const Expr* op1, const Expr* op2)
std::unique_ptr<OperationLog::BasicOperation>
OperationFinder::_createBasicOperationLogEntry(const std::string& opcode, const Expr* source, const Expr* op1, const Expr* op2)
{
auto log = std::make_unique<OperationLog::BasicOperation>();
auto printTypeName = [](const QualType& t) -> std::string
{
if (t->isTypedefNameType())
{
const TypedefType* tdt = cast<TypedefType>(t);
assert(tdt);
llvm::outs() << "Typedef type: " << t.getAsString()
<< ", underlying: " << tdt->desugar().getAsString()
<< ".";
log->operation_name = opcode;
return tdt->desugar().getAsString();
}
else if (t->isBuiltinType())
{
llvm::outs() << "Builtin type: " << t.getAsString() << ".";
return t.getAsString();
}
else
{
llvm::outs() << "Other type: " << t.getAsString() << ".";
return t.getAsString();
}
};
llvm::outs() << "Expression types:\n\tExpression eval type: ";
log.operand_result = printTypeName(source->getType());
llvm::outs() << "\n\tExpression eval type: ";
log->type_result = ResolveTypeName(source->getType());
if (op1)
{
llvm::outs() << "\n\tLHS eval type: ";
log.operand_lhs = printTypeName(op1->getType());
log->type_lhs = ResolveTypeName(op1->getType());
}
if (op2)
{
llvm::outs() << "\n\tRHS eval type: ";
log.operand_rhs = printTypeName(op2->getType());
log->type_rhs = ResolveTypeName(op2->getType());
}
return log;
}
std::pair<std::string, OperationLog> OperationFinder::_createBaseOperationLog(const clang::Stmt* stmt, const clang::SourceManager& source_manager)
{
const auto [file_name, line_number, column_number] = resolveLocations(stmt, source_manager);
OperationLog log;
log.line = line_number;
log.branch_number = _current_branch;
llvm::outs() << file_name << ":"
<< line_number << ":"
<< column_number << ":";
return { file_name, std::move(log) };
}

View File

@ -16,19 +16,19 @@ public:
void processArithmetic(const clang::BinaryOperator* op, const clang::SourceManager& source_manager);
void processUnaryArithmetic(const clang::UnaryOperator* op, const clang::SourceManager& source_manager);
void processFunctionCall(const clang::CallExpr* call, const clang::SourceManager& source_manager);
void processArraySubscript(const clang::ArraySubscriptExpr* subscript, const clang::SourceManager& source_manager);
void fallthroughBranchEntered();
void fallthroughBranchExited();
void forLoopEntered();
void forLoopExited();
void branchEntered();
void branchExited();
private:
void _processExpressionTypes(OperationLog& log, const clang::Expr* source, const clang::Expr* op1, const clang::Expr* op2);
std::unique_ptr<OperationLog::BasicOperation>
_createBasicOperationLogEntry(const std::string& opcode, const clang::Expr* source, const clang::Expr* op1, const clang::Expr* op2);
int _next_for_loop_id = 0;
bool _in_fallthrough = false;
std::vector<int> _for_loop_stack;
std::pair<std::string, OperationLog> _createBaseOperationLog(const clang::Stmt* stmt, const clang::SourceManager& source_manager);
int _current_branch = 0;
IOperationOutput* _storage;
};

View File

@ -43,21 +43,32 @@ bool OperationFinderAstVisitor::VisitUnaryOperator(clang::UnaryOperator* op)
return true;
}
bool OperationFinderAstVisitor::VisitCallExpr(clang::CallExpr* call)
{
assert(_context);
_op_finder->processFunctionCall(call, _context->getSourceManager());
return true;
}
bool OperationFinderAstVisitor::VisitArraySubscriptExpr(clang::ArraySubscriptExpr* subscript)
{
assert(_context);
_op_finder->processArraySubscript(subscript, _context->getSourceManager());
return true;
}
bool OperationFinderAstVisitor::dataTraverseStmtPre(clang::Stmt* stmt)
{
assert(_context);
if (auto* loop = clang::dyn_cast<clang::ForStmt>(stmt))
if (clang::Stmt* branch_entry = _isBranchEntry(stmt))
{
if (loop->getInit())
{
_loop_init = loop->getInit();
_op_finder->fallthroughBranchEntered();
}
else
{
_op_finder->forLoopEntered();
}
_branch_stack.push_back(branch_entry);
_op_finder->branchEntered();
}
return true;
@ -67,16 +78,77 @@ bool OperationFinderAstVisitor::dataTraverseStmtPost(clang::Stmt* stmt)
{
assert(_context);
if (_loop_init && _loop_init == stmt)
if (_loop_header.in_loop_header && (_loop_header.init == stmt || _loop_header.header_end == stmt))
{
_op_finder->forLoopEntered();
_op_finder->fallthroughBranchExited();
_loop_init = nullptr;
if (_loop_header.header_end == stmt)
{
assert(_branch_stack.back() == stmt);
_branch_stack.pop_back();
_op_finder->branchExited();
if (_loop_header.init)
{
assert(_branch_stack.back() == _loop_header.init);
_branch_stack.pop_back();
_op_finder->branchExited();
}
_loop_header = {};
}
else if (_loop_header.init == stmt && !_loop_header.header_end)
{
assert(_branch_stack.back() == stmt);
_branch_stack.pop_back();
_op_finder->branchExited();
_loop_header = {};
}
}
else if (clang::dyn_cast<clang::ForStmt>(stmt))
else if (!_branch_stack.empty() && _branch_stack.back() == stmt)
{
_op_finder->forLoopExited();
_branch_stack.pop_back();
_op_finder->branchExited();
}
else
{
assert(std::find(_branch_stack.cbegin(), _branch_stack.cend(), stmt)
== _branch_stack.cend());
}
return true;
}
clang::Stmt* OperationFinderAstVisitor::_isBranchEntry(clang::Stmt* stmt)
{
if (auto* loop = clang::dyn_cast<clang::ForStmt>(stmt))
{
_loop_header.init = loop->getInit();
if (loop->getCond())
{
_loop_header.header_start = loop->getCond();
_loop_header.header_end = loop->getCond();
}
if (loop->getInc())
{
if (!_loop_header.header_start)
_loop_header.header_start = loop->getInc();
_loop_header.header_end = loop->getInc();
}
_loop_header.in_loop_header = _loop_header.init || _loop_header.header_end;
if (_loop_header.init)
return _loop_header.init;
else if (_loop_header.header_start)
return _loop_header.header_end;
}
else if (_loop_header.in_loop_header && _loop_header.header_start == stmt)
{
return _loop_header.header_end;
}
return nullptr;
}

View File

@ -20,6 +20,8 @@ public:
bool VisitForStmt(clang::ForStmt* stmt);
bool VisitBinaryOperator(clang::BinaryOperator* op);
bool VisitUnaryOperator(clang::UnaryOperator* op);
bool VisitCallExpr(clang::CallExpr* call);
bool VisitArraySubscriptExpr(clang::ArraySubscriptExpr* subscript);
bool dataTraverseStmtPre(clang::Stmt* stmt);
bool dataTraverseStmtPost(clang::Stmt* stmt);
@ -28,7 +30,19 @@ private:
clang::ASTContext* _context;
OperationFinder* _op_finder;
clang::Stmt* _loop_init = nullptr;
struct _LoopHeaderStateMachine
{
bool in_loop_header = false;
clang::Stmt* init = nullptr;
clang::Stmt* header_start = nullptr;
clang::Stmt* header_end = nullptr;
};
_LoopHeaderStateMachine _loop_header;
std::vector<clang::Stmt*> _branch_stack;
clang::Stmt* _isBranchEntry(clang::Stmt* stmt);
};
#endif //C_ANALYZER_OPERATIONFINDERASTVISITOR_HPP

View File

@ -0,0 +1,44 @@
//
// Created by erki on 06.03.21.
//
#include "OperationLog.hpp"
namespace
{
template<typename T>
std::unique_ptr<T> DecodeType(const nlohmann::json& j, const std::string& entry_type)
{
if (entry_type == T::TYPE_NAME)
{
auto t = std::make_unique<T>();
j.get_to(*t);
return t;
}
else
{
return nullptr;
}
}
}
nlohmann::json OperationLog::FunctionCall::toJson() const
{
return *this;
}
nlohmann::json OperationLog::BasicOperation::toJson() const
{
return *this;
}
void OperationLog::DecodeEntry(const nlohmann::json& j)
{
if ((entry = DecodeType<BasicOperation>(j, entry_type)))
return;
else if ((entry = DecodeType<FunctionCall>(j, entry_type)))
return;
}

View File

@ -6,16 +6,50 @@
#define C_ANALYZER_OPERATIONLOG_HPP
#include <vector>
#include <variant>
#include <nlohmann/json.hpp>
struct OperationLog
{
std::string operation;
unsigned int line;
std::string operand_lhs;
std::string operand_rhs;
std::string operand_result;
bool is_fallthrough;
std::vector<int> current_for_loops;
struct IEntry
{
[[nodiscard]] virtual nlohmann::json toJson() const = 0;
};
struct FunctionCall : IEntry
{
static constexpr char TYPE_NAME[] = "function_call";
std::string function_name;
std::string call_result_type;
[[nodiscard]] nlohmann::json toJson() const override;
};
struct BasicOperation : IEntry
{
static constexpr char TYPE_NAME[] = "basic_operation";
std::string operation_name;
std::string type_lhs;
std::string type_rhs;
std::string type_result;
[[nodiscard]] nlohmann::json toJson() const override;
};
unsigned int line = 0;
int branch_number = 0;
std::string entry_type;
std::unique_ptr<IEntry> entry;
OperationLog() = default;
OperationLog(const OperationLog&) = delete;
OperationLog(OperationLog&&) = default;
void DecodeEntry(const nlohmann::json& j);
};
class IOperationOutput
@ -23,33 +57,58 @@ class IOperationOutput
public:
virtual ~IOperationOutput() = default;
virtual void pushOperation(const std::string& filename, const OperationLog& op) = 0;
virtual void pushOperation(const std::string& filename, OperationLog&& op) = 0;
};
#include <nlohmann/json.hpp>
inline void to_json(nlohmann::json& j, const OperationLog& l)
{
j = nlohmann::json{
{"operation", l.operation},
{"line", l.line},
{"operand_lhs", l.operand_lhs},
{"operand_rhs", l.operand_rhs},
{"operand_result", l.operand_result},
{"is_fallthrough", l.is_fallthrough},
{"current_for_loops", l.current_for_loops}
{"entry_type", l.entry_type},
{"entry", l.entry->toJson()},
{"branch_number", l.branch_number}
};
}
inline void from_json(const nlohmann::json& j, OperationLog& l)
{
j.at("operation").get_to(l.operation);
j.at("line").get_to(l.line);
j.at("operand_lhs").get_to(l.operand_lhs);
j.at("operand_rhs").get_to(l.operand_rhs);
j.at("operand_result").get_to(l.operand_result);
j.at("is_fallthrough").get_to(l.is_fallthrough);
j.at("current_for_loops").get_to(l.current_for_loops);
j.at("entry_type").get_to(l.entry_type);
l.DecodeEntry(j["entry"]);
j.at("branch_number").get_to(l.branch_number);
}
inline void to_json(nlohmann::json& j, const OperationLog::BasicOperation& bo)
{
j = nlohmann::json{
{"operation_name", bo.operation_name},
{"type_lhs", bo.type_lhs},
{"type_rhs", bo.type_rhs},
{"type_result", bo.type_result}
};
}
inline void from_json(const nlohmann::json& j, OperationLog::BasicOperation& bo)
{
j.at("operation_name").get_to(bo.operation_name);
j.at("type_lhs").get_to(bo.type_lhs);
j.at("type_rhs").get_to(bo.type_rhs);
j.at("type_result").get_to(bo.type_result);
}
inline void to_json(nlohmann::json& j, const OperationLog::FunctionCall& fcall)
{
j = nlohmann::json{
{"function_name", fcall.function_name},
{"call_result_type", fcall.call_result_type}
};
}
inline void from_json(const nlohmann::json& j, OperationLog::FunctionCall& fcall)
{
j.at("function_name").get_to(fcall.function_name);
j.at("call_result_type").get_to(fcall.call_result_type);
}
#endif //C_ANALYZER_OPERATIONLOG_HPP

View File

@ -18,14 +18,19 @@ OperationStorage::~OperationStorage()
_dumpToFile();
}
void OperationStorage::pushOperation(const std::string& filename, const OperationLog& op)
void OperationStorage::enablePrettyPrint()
{
_pretty_print = true;
}
void OperationStorage::pushOperation(const std::string& filename, OperationLog&& op)
{
auto it = _operations.find(filename);
if (it == _operations.end())
it = _operations.insert({ filename, {} }).first;
it = _operations.emplace(filename, std::vector<OperationLog>()).first;
it->second.push_back(op);
it->second.emplace_back(std::move(op));
}
const std::unordered_map<std::string, std::vector<OperationLog>>& OperationStorage::getOperations() const
@ -39,5 +44,8 @@ void OperationStorage::_dumpToFile()
std::ofstream file(_output_filename);
file << json;
if (_pretty_print)
file << std::setw(4) << json;
else
file << json;
}

View File

@ -20,12 +20,15 @@ public:
explicit OperationStorage(const std::string& output_filename);
~OperationStorage() override;
void pushOperation(const std::string& filename, const OperationLog& op) override;
void enablePrettyPrint();
void pushOperation(const std::string& filename, OperationLog&& op) override;
[[nodiscard]] const std::unordered_map<std::string, std::vector<OperationLog>>& getOperations() const;
private:
std::unordered_map<std::string, std::vector<OperationLog>> _operations;
std::string _output_filename;
bool _pretty_print = false;
void _dumpToFile();
};

View File

@ -24,6 +24,8 @@ static cl::opt<std::string> OutputFile("o", cl::desc("File to output the JSON to
static cl::opt<std::string> RootDirectory("r", cl::desc("The root directory of the source files."),
cl::cat(MyToolCategory));
static cl::opt<bool> PrettyPrint("pretty", cl::desc("Pretty-print the output JSON."));
// CommonOptionsParser declares HelpMessage with a description of the common
// command-line options related to the compilation database and input files.
// It's nice to have this help message in all tools.
@ -50,6 +52,9 @@ int main(int argc, const char** argv)
OperationStorage storage(OutputFile.getValue());
if (PrettyPrint.getValue())
storage.enablePrettyPrint();
OperationFinder op_finder(&storage);
#if 0