Compare commits

...

3 Commits

Author SHA1 Message Date
83ca2deb0e Better branch detection model
For loops are now broken up into 2 branches: 1 (init) and 2 (cond + inc).
2021-03-07 13:02:14 +02:00
a23e5c66ab Add subscript operation recognition 2021-03-07 11:49:16 +02:00
7a852f8f94 Add function call recognition and pretty printing 2021-03-07 01:25:48 +02:00
10 changed files with 367 additions and 123 deletions

View File

@ -25,7 +25,7 @@ add_executable(op-finder
OperationFinderAstVisitor.cpp OperationFinderAstVisitor.cpp
OperationFinderAstConsumer.cpp OperationFinderAstConsumer.cpp
OperationFinderAstAction.cpp OperationFinderAstAction.cpp
) OperationLog.cpp)
target_include_directories(op-finder target_include_directories(op-finder
PRIVATE PRIVATE

View File

@ -11,6 +11,35 @@
using namespace clang; using namespace clang;
using namespace clang::ast_matchers; using namespace clang::ast_matchers;
namespace
{
std::string ResolveTypeName(const QualType& t)
{
if (t->isTypedefNameType())
{
const TypedefType* tdt = cast<TypedefType>(t);
assert(tdt);
llvm::outs() << "Typedef type: " << t.getAsString()
<< ", underlying: " << tdt->desugar().getAsString()
<< ".";
return tdt->desugar().getAsString();
}
else if (t->isBuiltinType())
{
llvm::outs() << "Builtin type: " << t.getAsString() << ".";
return t.getAsString();
}
else
{
llvm::outs() << "Other type: " << t.getAsString() << ".";
return t.getAsString();
}
};
}
OperationFinder::OperationFinder(IOperationOutput* storage) OperationFinder::OperationFinder(IOperationOutput* storage)
: _storage(storage) : _storage(storage)
{ {
@ -19,113 +48,123 @@ OperationFinder::OperationFinder(IOperationOutput* storage)
void OperationFinder::processArithmetic(const clang::BinaryOperator* op, const clang::SourceManager& source_manager) void OperationFinder::processArithmetic(const clang::BinaryOperator* op, const clang::SourceManager& source_manager)
{ {
const auto [file_name, line_number, column_number] = resolveLocations(op, source_manager); auto [file_name, log] = _createBaseOperationLog(op, source_manager);
const std::string op_code = getOpcode(op).str();
llvm::outs() << file_name << ":" llvm::outs() << "\n\tBinary arithmetic: Type: " << op_code;
<< line_number << ":"
<< column_number << ":"
<< "Binary arithmetic: Type: " << getOpcode(op) << " LHS: ";
OperationLog log; log.entry_type = OperationLog::BasicOperation::TYPE_NAME;
log.line = line_number; log.entry = _createBasicOperationLogEntry(op_code, op, op->getLHS(), op->getRHS());
log.operation = getOpcode(op).str();
log.current_for_loops = _for_loop_stack;
log.is_fallthrough = _in_fallthrough;
_processExpressionTypes(log, op, op->getLHS(), op->getRHS()); _storage->pushOperation(file_name, std::move(log));
_storage->pushOperation(file_name, log);
llvm::outs() << "\n"; llvm::outs() << "\n";
} }
void OperationFinder::processUnaryArithmetic(const clang::UnaryOperator* op, const clang::SourceManager& source_manager) void OperationFinder::processUnaryArithmetic(const clang::UnaryOperator* op, const clang::SourceManager& source_manager)
{ {
const Expr* lhs = op->getExprStmt(); auto [file_name, log] = _createBaseOperationLog(op, source_manager);
const std::string op_code = getOpcode(op).str();
assert(op); llvm::outs() << "\n\tUnary arithmetic: Type: " << op_code;
assert(lhs);
const auto [file_name, line_number, column_number] = resolveLocations(op, source_manager); log.entry_type = OperationLog::BasicOperation::TYPE_NAME;
log.entry = _createBasicOperationLogEntry(op_code, op, op->getExprStmt(), nullptr);
llvm::outs() << file_name << ":" _storage->pushOperation(file_name, std::move(log));
<< line_number << ":"
<< column_number << ":"
<< "Unary arithmetic: Type: " << getOpcode(op) << " LHS: ";
OperationLog log;
log.line = line_number;
log.operation = getOpcode(op).str();
log.current_for_loops = _for_loop_stack;
log.is_fallthrough = _in_fallthrough;
_processExpressionTypes(log, op, lhs, nullptr);
_storage->pushOperation(file_name, log);
llvm::outs() << "\n"; llvm::outs() << "\n";
} }
void OperationFinder::fallthroughBranchEntered() void OperationFinder::processFunctionCall(const clang::CallExpr* call, const SourceManager& source_manager)
{ {
_in_fallthrough = true; const FunctionDecl* func = call->getDirectCallee();
assert(func);
auto [file_name, log] = _createBaseOperationLog(call, source_manager);
const std::string func_name = func->getNameAsString();
llvm::outs() << "\n\tFunction call: func name: " << func_name << "\n\tResult eval type: ";
const std::string res_type = ResolveTypeName(func->getType());
auto func_call = std::make_unique<OperationLog::FunctionCall>();
func_call->function_name = func_name;
func_call->call_result_type = res_type;
log.entry_type = OperationLog::FunctionCall::TYPE_NAME;
log.entry = std::move(func_call);
_storage->pushOperation(file_name, std::move(log));
llvm::outs() << "\n";
} }
void OperationFinder::fallthroughBranchExited() void OperationFinder::processArraySubscript(const clang::ArraySubscriptExpr* subscript, const clang::SourceManager& source_manager)
{ {
_in_fallthrough = false; auto [file_name, log] = _createBaseOperationLog(subscript, source_manager);
llvm::outs() << "\n\tSubscript:";
log.entry_type = OperationLog::BasicOperation::TYPE_NAME;
log.entry = _createBasicOperationLogEntry("subscript", subscript, subscript->getBase(), subscript->getIdx());
_storage->pushOperation(file_name, std::move(log));
llvm::outs() << "\n";
} }
void OperationFinder::forLoopEntered() void OperationFinder::branchEntered()
{ {
_for_loop_stack.push_back(_next_for_loop_id); _current_branch++;
llvm::outs() << "Branch entered: " << _current_branch << "\n";
_next_for_loop_id++;
} }
void OperationFinder::forLoopExited() void OperationFinder::branchExited()
{ {
_for_loop_stack.pop_back(); llvm::outs() << "Branch exited: " << _current_branch << "\n";
_current_branch--;
assert(_current_branch > -1);
} }
void OperationFinder::_processExpressionTypes(OperationLog& log, const Expr* source, const Expr* op1, const Expr* op2) std::unique_ptr<OperationLog::BasicOperation>
OperationFinder::_createBasicOperationLogEntry(const std::string& opcode, const Expr* source, const Expr* op1, const Expr* op2)
{ {
auto log = std::make_unique<OperationLog::BasicOperation>();
auto printTypeName = [](const QualType& t) -> std::string log->operation_name = opcode;
{
if (t->isTypedefNameType())
{
const TypedefType* tdt = cast<TypedefType>(t);
assert(tdt);
llvm::outs() << "Typedef type: " << t.getAsString()
<< ", underlying: " << tdt->desugar().getAsString()
<< ".";
return tdt->desugar().getAsString(); llvm::outs() << "\n\tExpression eval type: ";
} log->type_result = ResolveTypeName(source->getType());
else if (t->isBuiltinType())
{
llvm::outs() << "Builtin type: " << t.getAsString() << ".";
return t.getAsString();
}
else
{
llvm::outs() << "Other type: " << t.getAsString() << ".";
return t.getAsString();
}
};
llvm::outs() << "Expression types:\n\tExpression eval type: ";
log.operand_result = printTypeName(source->getType());
if (op1) if (op1)
{ {
llvm::outs() << "\n\tLHS eval type: "; llvm::outs() << "\n\tLHS eval type: ";
log.operand_lhs = printTypeName(op1->getType()); log->type_lhs = ResolveTypeName(op1->getType());
} }
if (op2) if (op2)
{ {
llvm::outs() << "\n\tRHS eval type: "; llvm::outs() << "\n\tRHS eval type: ";
log.operand_rhs = printTypeName(op2->getType()); log->type_rhs = ResolveTypeName(op2->getType());
} }
return log;
}
std::pair<std::string, OperationLog> OperationFinder::_createBaseOperationLog(const clang::Stmt* stmt, const clang::SourceManager& source_manager)
{
const auto [file_name, line_number, column_number] = resolveLocations(stmt, source_manager);
OperationLog log;
log.line = line_number;
log.branch_number = _current_branch;
llvm::outs() << file_name << ":"
<< line_number << ":"
<< column_number << ":";
return { file_name, std::move(log) };
} }

View File

@ -16,19 +16,19 @@ public:
void processArithmetic(const clang::BinaryOperator* op, const clang::SourceManager& source_manager); void processArithmetic(const clang::BinaryOperator* op, const clang::SourceManager& source_manager);
void processUnaryArithmetic(const clang::UnaryOperator* op, const clang::SourceManager& source_manager); void processUnaryArithmetic(const clang::UnaryOperator* op, const clang::SourceManager& source_manager);
void processFunctionCall(const clang::CallExpr* call, const clang::SourceManager& source_manager);
void processArraySubscript(const clang::ArraySubscriptExpr* subscript, const clang::SourceManager& source_manager);
void fallthroughBranchEntered(); void branchEntered();
void fallthroughBranchExited(); void branchExited();
void forLoopEntered();
void forLoopExited();
private: private:
void _processExpressionTypes(OperationLog& log, const clang::Expr* source, const clang::Expr* op1, const clang::Expr* op2); std::unique_ptr<OperationLog::BasicOperation>
_createBasicOperationLogEntry(const std::string& opcode, const clang::Expr* source, const clang::Expr* op1, const clang::Expr* op2);
int _next_for_loop_id = 0; std::pair<std::string, OperationLog> _createBaseOperationLog(const clang::Stmt* stmt, const clang::SourceManager& source_manager);
bool _in_fallthrough = false;
std::vector<int> _for_loop_stack; int _current_branch = 0;
IOperationOutput* _storage; IOperationOutput* _storage;
}; };

View File

@ -43,21 +43,32 @@ bool OperationFinderAstVisitor::VisitUnaryOperator(clang::UnaryOperator* op)
return true; return true;
} }
bool OperationFinderAstVisitor::VisitCallExpr(clang::CallExpr* call)
{
assert(_context);
_op_finder->processFunctionCall(call, _context->getSourceManager());
return true;
}
bool OperationFinderAstVisitor::VisitArraySubscriptExpr(clang::ArraySubscriptExpr* subscript)
{
assert(_context);
_op_finder->processArraySubscript(subscript, _context->getSourceManager());
return true;
}
bool OperationFinderAstVisitor::dataTraverseStmtPre(clang::Stmt* stmt) bool OperationFinderAstVisitor::dataTraverseStmtPre(clang::Stmt* stmt)
{ {
assert(_context); assert(_context);
if (auto* loop = clang::dyn_cast<clang::ForStmt>(stmt)) if (clang::Stmt* branch_entry = _isBranchEntry(stmt))
{ {
if (loop->getInit()) _branch_stack.push_back(branch_entry);
{ _op_finder->branchEntered();
_loop_init = loop->getInit();
_op_finder->fallthroughBranchEntered();
}
else
{
_op_finder->forLoopEntered();
}
} }
return true; return true;
@ -67,16 +78,77 @@ bool OperationFinderAstVisitor::dataTraverseStmtPost(clang::Stmt* stmt)
{ {
assert(_context); assert(_context);
if (_loop_init && _loop_init == stmt) if (_loop_header.in_loop_header && (_loop_header.init == stmt || _loop_header.header_end == stmt))
{ {
_op_finder->forLoopEntered(); if (_loop_header.header_end == stmt)
_op_finder->fallthroughBranchExited(); {
_loop_init = nullptr; assert(_branch_stack.back() == stmt);
_branch_stack.pop_back();
_op_finder->branchExited();
if (_loop_header.init)
{
assert(_branch_stack.back() == _loop_header.init);
_branch_stack.pop_back();
_op_finder->branchExited();
}
_loop_header = {};
}
else if (_loop_header.init == stmt && !_loop_header.header_end)
{
assert(_branch_stack.back() == stmt);
_branch_stack.pop_back();
_op_finder->branchExited();
_loop_header = {};
}
} }
else if (clang::dyn_cast<clang::ForStmt>(stmt)) else if (!_branch_stack.empty() && _branch_stack.back() == stmt)
{ {
_op_finder->forLoopExited(); _branch_stack.pop_back();
_op_finder->branchExited();
}
else
{
assert(std::find(_branch_stack.cbegin(), _branch_stack.cend(), stmt)
== _branch_stack.cend());
} }
return true; return true;
} }
clang::Stmt* OperationFinderAstVisitor::_isBranchEntry(clang::Stmt* stmt)
{
if (auto* loop = clang::dyn_cast<clang::ForStmt>(stmt))
{
_loop_header.init = loop->getInit();
if (loop->getCond())
{
_loop_header.header_start = loop->getCond();
_loop_header.header_end = loop->getCond();
}
if (loop->getInc())
{
if (!_loop_header.header_start)
_loop_header.header_start = loop->getInc();
_loop_header.header_end = loop->getInc();
}
_loop_header.in_loop_header = _loop_header.init || _loop_header.header_end;
if (_loop_header.init)
return _loop_header.init;
else if (_loop_header.header_start)
return _loop_header.header_end;
}
else if (_loop_header.in_loop_header && _loop_header.header_start == stmt)
{
return _loop_header.header_end;
}
return nullptr;
}

View File

@ -20,6 +20,8 @@ public:
bool VisitForStmt(clang::ForStmt* stmt); bool VisitForStmt(clang::ForStmt* stmt);
bool VisitBinaryOperator(clang::BinaryOperator* op); bool VisitBinaryOperator(clang::BinaryOperator* op);
bool VisitUnaryOperator(clang::UnaryOperator* op); bool VisitUnaryOperator(clang::UnaryOperator* op);
bool VisitCallExpr(clang::CallExpr* call);
bool VisitArraySubscriptExpr(clang::ArraySubscriptExpr* subscript);
bool dataTraverseStmtPre(clang::Stmt* stmt); bool dataTraverseStmtPre(clang::Stmt* stmt);
bool dataTraverseStmtPost(clang::Stmt* stmt); bool dataTraverseStmtPost(clang::Stmt* stmt);
@ -28,7 +30,19 @@ private:
clang::ASTContext* _context; clang::ASTContext* _context;
OperationFinder* _op_finder; OperationFinder* _op_finder;
clang::Stmt* _loop_init = nullptr; struct _LoopHeaderStateMachine
{
bool in_loop_header = false;
clang::Stmt* init = nullptr;
clang::Stmt* header_start = nullptr;
clang::Stmt* header_end = nullptr;
};
_LoopHeaderStateMachine _loop_header;
std::vector<clang::Stmt*> _branch_stack;
clang::Stmt* _isBranchEntry(clang::Stmt* stmt);
}; };
#endif //C_ANALYZER_OPERATIONFINDERASTVISITOR_HPP #endif //C_ANALYZER_OPERATIONFINDERASTVISITOR_HPP

View File

@ -0,0 +1,44 @@
//
// Created by erki on 06.03.21.
//
#include "OperationLog.hpp"
namespace
{
template<typename T>
std::unique_ptr<T> DecodeType(const nlohmann::json& j, const std::string& entry_type)
{
if (entry_type == T::TYPE_NAME)
{
auto t = std::make_unique<T>();
j.get_to(*t);
return t;
}
else
{
return nullptr;
}
}
}
nlohmann::json OperationLog::FunctionCall::toJson() const
{
return *this;
}
nlohmann::json OperationLog::BasicOperation::toJson() const
{
return *this;
}
void OperationLog::DecodeEntry(const nlohmann::json& j)
{
if ((entry = DecodeType<BasicOperation>(j, entry_type)))
return;
else if ((entry = DecodeType<FunctionCall>(j, entry_type)))
return;
}

View File

@ -6,16 +6,50 @@
#define C_ANALYZER_OPERATIONLOG_HPP #define C_ANALYZER_OPERATIONLOG_HPP
#include <vector> #include <vector>
#include <variant>
#include <nlohmann/json.hpp>
struct OperationLog struct OperationLog
{ {
std::string operation; struct IEntry
unsigned int line; {
std::string operand_lhs; [[nodiscard]] virtual nlohmann::json toJson() const = 0;
std::string operand_rhs; };
std::string operand_result;
bool is_fallthrough; struct FunctionCall : IEntry
std::vector<int> current_for_loops; {
static constexpr char TYPE_NAME[] = "function_call";
std::string function_name;
std::string call_result_type;
[[nodiscard]] nlohmann::json toJson() const override;
};
struct BasicOperation : IEntry
{
static constexpr char TYPE_NAME[] = "basic_operation";
std::string operation_name;
std::string type_lhs;
std::string type_rhs;
std::string type_result;
[[nodiscard]] nlohmann::json toJson() const override;
};
unsigned int line = 0;
int branch_number = 0;
std::string entry_type;
std::unique_ptr<IEntry> entry;
OperationLog() = default;
OperationLog(const OperationLog&) = delete;
OperationLog(OperationLog&&) = default;
void DecodeEntry(const nlohmann::json& j);
}; };
class IOperationOutput class IOperationOutput
@ -23,33 +57,58 @@ class IOperationOutput
public: public:
virtual ~IOperationOutput() = default; virtual ~IOperationOutput() = default;
virtual void pushOperation(const std::string& filename, const OperationLog& op) = 0; virtual void pushOperation(const std::string& filename, OperationLog&& op) = 0;
}; };
#include <nlohmann/json.hpp>
inline void to_json(nlohmann::json& j, const OperationLog& l) inline void to_json(nlohmann::json& j, const OperationLog& l)
{ {
j = nlohmann::json{ j = nlohmann::json{
{"operation", l.operation},
{"line", l.line}, {"line", l.line},
{"operand_lhs", l.operand_lhs}, {"entry_type", l.entry_type},
{"operand_rhs", l.operand_rhs}, {"entry", l.entry->toJson()},
{"operand_result", l.operand_result}, {"branch_number", l.branch_number}
{"is_fallthrough", l.is_fallthrough},
{"current_for_loops", l.current_for_loops}
}; };
} }
inline void from_json(const nlohmann::json& j, OperationLog& l) inline void from_json(const nlohmann::json& j, OperationLog& l)
{ {
j.at("operation").get_to(l.operation);
j.at("line").get_to(l.line); j.at("line").get_to(l.line);
j.at("operand_lhs").get_to(l.operand_lhs); j.at("entry_type").get_to(l.entry_type);
j.at("operand_rhs").get_to(l.operand_rhs); l.DecodeEntry(j["entry"]);
j.at("operand_result").get_to(l.operand_result);
j.at("is_fallthrough").get_to(l.is_fallthrough); j.at("branch_number").get_to(l.branch_number);
j.at("current_for_loops").get_to(l.current_for_loops); }
inline void to_json(nlohmann::json& j, const OperationLog::BasicOperation& bo)
{
j = nlohmann::json{
{"operation_name", bo.operation_name},
{"type_lhs", bo.type_lhs},
{"type_rhs", bo.type_rhs},
{"type_result", bo.type_result}
};
}
inline void from_json(const nlohmann::json& j, OperationLog::BasicOperation& bo)
{
j.at("operation_name").get_to(bo.operation_name);
j.at("type_lhs").get_to(bo.type_lhs);
j.at("type_rhs").get_to(bo.type_rhs);
j.at("type_result").get_to(bo.type_result);
}
inline void to_json(nlohmann::json& j, const OperationLog::FunctionCall& fcall)
{
j = nlohmann::json{
{"function_name", fcall.function_name},
{"call_result_type", fcall.call_result_type}
};
}
inline void from_json(const nlohmann::json& j, OperationLog::FunctionCall& fcall)
{
j.at("function_name").get_to(fcall.function_name);
j.at("call_result_type").get_to(fcall.call_result_type);
} }
#endif //C_ANALYZER_OPERATIONLOG_HPP #endif //C_ANALYZER_OPERATIONLOG_HPP

View File

@ -18,14 +18,19 @@ OperationStorage::~OperationStorage()
_dumpToFile(); _dumpToFile();
} }
void OperationStorage::pushOperation(const std::string& filename, const OperationLog& op) void OperationStorage::enablePrettyPrint()
{
_pretty_print = true;
}
void OperationStorage::pushOperation(const std::string& filename, OperationLog&& op)
{ {
auto it = _operations.find(filename); auto it = _operations.find(filename);
if (it == _operations.end()) if (it == _operations.end())
it = _operations.insert({ filename, {} }).first; it = _operations.emplace(filename, std::vector<OperationLog>()).first;
it->second.push_back(op); it->second.emplace_back(std::move(op));
} }
const std::unordered_map<std::string, std::vector<OperationLog>>& OperationStorage::getOperations() const const std::unordered_map<std::string, std::vector<OperationLog>>& OperationStorage::getOperations() const
@ -39,5 +44,8 @@ void OperationStorage::_dumpToFile()
std::ofstream file(_output_filename); std::ofstream file(_output_filename);
file << json; if (_pretty_print)
file << std::setw(4) << json;
else
file << json;
} }

View File

@ -20,12 +20,15 @@ public:
explicit OperationStorage(const std::string& output_filename); explicit OperationStorage(const std::string& output_filename);
~OperationStorage() override; ~OperationStorage() override;
void pushOperation(const std::string& filename, const OperationLog& op) override; void enablePrettyPrint();
void pushOperation(const std::string& filename, OperationLog&& op) override;
[[nodiscard]] const std::unordered_map<std::string, std::vector<OperationLog>>& getOperations() const; [[nodiscard]] const std::unordered_map<std::string, std::vector<OperationLog>>& getOperations() const;
private: private:
std::unordered_map<std::string, std::vector<OperationLog>> _operations; std::unordered_map<std::string, std::vector<OperationLog>> _operations;
std::string _output_filename; std::string _output_filename;
bool _pretty_print = false;
void _dumpToFile(); void _dumpToFile();
}; };

View File

@ -24,6 +24,8 @@ static cl::opt<std::string> OutputFile("o", cl::desc("File to output the JSON to
static cl::opt<std::string> RootDirectory("r", cl::desc("The root directory of the source files."), static cl::opt<std::string> RootDirectory("r", cl::desc("The root directory of the source files."),
cl::cat(MyToolCategory)); cl::cat(MyToolCategory));
static cl::opt<bool> PrettyPrint("pretty", cl::desc("Pretty-print the output JSON."));
// CommonOptionsParser declares HelpMessage with a description of the common // CommonOptionsParser declares HelpMessage with a description of the common
// command-line options related to the compilation database and input files. // command-line options related to the compilation database and input files.
// It's nice to have this help message in all tools. // It's nice to have this help message in all tools.
@ -50,6 +52,9 @@ int main(int argc, const char** argv)
OperationStorage storage(OutputFile.getValue()); OperationStorage storage(OutputFile.getValue());
if (PrettyPrint.getValue())
storage.enablePrettyPrint();
OperationFinder op_finder(&storage); OperationFinder op_finder(&storage);
#if 0 #if 0