Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ set(RUNTIME_SOURCE_FILES
runtime/PyType.cpp
runtime/PyZip.cpp
runtime/RuntimeError.cpp
runtime/SourceManager.cpp
runtime/StopIteration.cpp
runtime/SyntaxError.cpp
runtime/TypeError.cpp
Expand All @@ -242,6 +243,7 @@ set(UNITTEST_SOURCES
runtime/PyNumber_tests.cpp
runtime/PyString_tests.cpp
runtime/PyType_tests.cpp
runtime/SourceManager_tests.cpp
testing/main.cpp)

set(PYTHON_LIB_PATH ${cpython_SOURCE_DIR}/Lib)
Expand Down
9 changes: 9 additions & 0 deletions src/executable/FunctionBlock.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,21 @@

#include "Program.hpp"
#include "forward.hpp"
#include <cstdint>
#include <list>
#include <memory>
#include <string>
#include <vector>

using InstructionVector = std::vector<std::unique_ptr<Instruction>>;

struct InstructionSourceLocation
{
uint32_t instruction_index;
uint32_t line;
uint32_t column;
};

struct FunctionMetaData
{
std::string function_name;
Expand All @@ -33,6 +41,7 @@ struct FunctionBlock
{
FunctionMetaData metadata;
InstructionVector blocks;
std::vector<InstructionSourceLocation> instruction_locations;
std::string to_string() const;
};

Expand Down
29 changes: 26 additions & 3 deletions src/executable/bytecode/Bytecode.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#include "Bytecode.hpp"
#include "ast/AST.hpp"
#include "executable/FunctionBlock.hpp"
#include "instructions/Instructions.hpp"
#include "interpreter/Interpreter.hpp"
#include "runtime/BaseException.hpp"
Expand All @@ -8,23 +10,42 @@
#include "serialization/deserialize.hpp"
#include "serialization/serialize.hpp"

#include <algorithm>
#include <optional>

using namespace py;

Bytecode::Bytecode(size_t register_count,
size_t locals_count,
size_t stack_size,
std::string function_name,
InstructionVector instructions,
std::vector<InstructionSourceLocation> instruction_locations,
std::shared_ptr<Program> program)
: Function(register_count,
locals_count,
stack_size,
function_name,
FunctionExecutionBackend::BYTECODE,
std::move(program)),
m_instructions(std::move(instructions))
m_instructions(std::move(instructions)),
m_instruction_locations(std::move(instruction_locations))
{}

std::optional<InstructionSourceLocation> Bytecode::location_for(size_t instruction_index) const
{
if (m_instruction_locations.empty()) { return std::nullopt; }
// Find the last entry whose instruction_index is <= the query.
const auto it = std::upper_bound(m_instruction_locations.begin(),
m_instruction_locations.end(),
instruction_index,
[](size_t idx, const InstructionSourceLocation &entry) {
return idx < entry.instruction_index;
});
if (it == m_instruction_locations.begin()) { return std::nullopt; }
return *std::prev(it);
}

std::string Bytecode::to_string() const
{
std::ostringstream os;
Expand Down Expand Up @@ -83,6 +104,7 @@ std::unique_ptr<Bytecode> Bytecode::deserialize(std::span<const uint8_t> &buffer
stack_size,
function_name,
std::move(instructions),
std::vector<InstructionSourceLocation>{},
std::move(program));
}

Expand Down Expand Up @@ -144,8 +166,9 @@ py::PyResult<py::Value> Bytecode::eval_loop(VirtualMachine &vm, Interpreter &int
// vm.dump();
if (result.is_err()) {
auto *exception = result.unwrap_err();
size_t tb_lineno = 0;
size_t tb_lasti = std::distance(initial_ip, current_ip);
const size_t tb_lasti = std::distance(initial_ip, current_ip);
const size_t tb_lineno =
location_for(tb_lasti).value_or(InstructionSourceLocation{ 0, 0, 0 }).line;
PyTraceback *tb_next = exception->traceback();
auto traceback =
PyTraceback::create(interpreter.execution_frame(), tb_lasti, tb_lineno, tb_next);
Expand Down
4 changes: 4 additions & 0 deletions src/executable/bytecode/Bytecode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,22 @@
class Bytecode : public Function
{
const InstructionVector m_instructions;
const std::vector<InstructionSourceLocation> m_instruction_locations;

public:
Bytecode(size_t register_count,
size_t locals_count,
size_t stack_size,
std::string function_name,
InstructionVector instructions,
std::vector<InstructionSourceLocation> instruction_locations,
std::shared_ptr<Program> program);

auto begin() const { return m_instructions.begin(); }
auto end() const { return m_instructions.end(); }

std::optional<InstructionSourceLocation> location_for(size_t instruction_index) const;

std::string to_string() const override;

std::vector<uint8_t> serialize() const override;
Expand Down
2 changes: 2 additions & 0 deletions src/executable/bytecode/BytecodeProgram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ std::shared_ptr<BytecodeProgram> BytecodeProgram::create(FunctionBlocks &&func_b
main_func.metadata.stack_size,
main_func.metadata.function_name,
std::move(main_func.blocks),
std::move(main_func.instruction_locations),
program);
auto consts = PyTuple::create(main_func.metadata.consts);
if (consts.is_err()) { TODO(); }
Expand Down Expand Up @@ -69,6 +70,7 @@ std::shared_ptr<BytecodeProgram> BytecodeProgram::create(FunctionBlocks &&func_b
func.metadata.stack_size,
func.metadata.function_name,
std::move(func.blocks),
std::move(func.instruction_locations),
program);
consts = PyTuple::create(func.metadata.consts);
if (consts.is_err()) { TODO(); }
Expand Down
67 changes: 67 additions & 0 deletions src/executable/bytecode/Bytecode_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,73 @@

#include "gtest/gtest.h"

namespace {
Bytecode make_bytecode_with_locations(std::vector<InstructionSourceLocation> locations)
{
return Bytecode{ /*register_count=*/0,
/*locals_count=*/0,
/*stack_size=*/0,
/*function_name=*/"<test>",
InstructionVector{},
std::move(locations),
/*program=*/nullptr };
}
}// namespace

TEST(BytecodeLocationFor, ReturnsNulloptWhenTableIsEmpty)
{
auto bc = make_bytecode_with_locations({});
EXPECT_FALSE(bc.location_for(0).has_value());
}

TEST(BytecodeLocationFor, ReturnsNulloptWhenQueryPrecedesFirstEntry)
{
auto bc = make_bytecode_with_locations({
InstructionSourceLocation{ /*instruction_index=*/5, /*line=*/10, /*column=*/2 },
});
EXPECT_FALSE(bc.location_for(0).has_value());
}

TEST(BytecodeLocationFor, ReturnsExactMatchEntry)
{
auto bc = make_bytecode_with_locations({
InstructionSourceLocation{ 0, 1, 0 },
InstructionSourceLocation{ 3, 7, 4 },
InstructionSourceLocation{ 10, 12, 0 },
});
const auto loc = bc.location_for(3);
ASSERT_TRUE(loc.has_value());
EXPECT_EQ(loc->line, 7u);
EXPECT_EQ(loc->column, 4u);
}

TEST(BytecodeLocationFor, ExtendsEntryUntilNextOne)
{
auto bc = make_bytecode_with_locations({
InstructionSourceLocation{ 0, 1, 0 },
InstructionSourceLocation{ 3, 7, 4 },
InstructionSourceLocation{ 10, 12, 0 },
});
// Query between entries should return the most recent preceding entry.
for (uint32_t idx : { 0u, 1u, 2u }) {
const auto loc = bc.location_for(idx);
ASSERT_TRUE(loc.has_value()) << "idx=" << idx;
EXPECT_EQ(loc->line, 1u) << "idx=" << idx;
EXPECT_EQ(loc->column, 0u) << "idx=" << idx;
}
for (uint32_t idx : { 3u, 4u, 9u }) {
const auto loc = bc.location_for(idx);
ASSERT_TRUE(loc.has_value()) << "idx=" << idx;
EXPECT_EQ(loc->line, 7u) << "idx=" << idx;
EXPECT_EQ(loc->column, 4u) << "idx=" << idx;
}
for (uint32_t idx : { 10u, 100u, 9999u }) {
const auto loc = bc.location_for(idx);
ASSERT_TRUE(loc.has_value()) << "idx=" << idx;
EXPECT_EQ(loc->line, 12u) << "idx=" << idx;
EXPECT_EQ(loc->column, 0u) << "idx=" << idx;
}
}

// FIXME: think about what should be tested here
// namespace {
Expand Down
15 changes: 15 additions & 0 deletions src/executable/bytecode/codegen/BytecodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3119,6 +3119,7 @@ std::shared_ptr<Program> BytecodeGenerator::generate_executable(std::string file
ASSERT(m_frame_stack_value_count.size() == 2);
ASSERT(m_frame_free_var_count.size() == 2);
relocate_labels(m_functions);
for (auto &func : m_functions.functions) { func.metadata.filename = filename; }
return BytecodeProgram::create(std::move(m_functions), filename, argv);
}

Expand All @@ -3130,6 +3131,20 @@ InstructionVector *BytecodeGenerator::allocate_block(size_t function_id)
return &function->blocks;
}

void BytecodeGenerator::record_location_for_next_instruction()
{
ASSERT(m_function_id < m_functions.functions.size());
auto &func = *std::next(m_functions.functions.begin(), m_function_id);
const auto line = static_cast<uint32_t>(m_current_source_location.start.row + 1);
const auto column = static_cast<uint32_t>(m_current_source_location.start.column);
auto &locations = func.instruction_locations;
if (!locations.empty() && locations.back().line == line && locations.back().column == column) {
return;
}
const auto next_instruction_index = static_cast<uint32_t>(func.blocks.size());
locations.emplace_back(next_instruction_index, line, column);
}

std::shared_ptr<Program> BytecodeGenerator::compile(std::shared_ptr<ast::Module> node,
std::vector<std::string> argv,
compiler::OptimizationLevel lvl)
Expand Down
8 changes: 8 additions & 0 deletions src/executable/bytecode/codegen/BytecodeGenerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,8 @@ class BytecodeGenerator : public ast::CodeGenerator
ASTContext m_ctx;
std::stack<Scope> m_stack;

SourceLocation m_current_source_location{};

std::set<size_t> m_clear_exception_before_return_functions;
std::unordered_map<size_t, std::vector<std::function<void(bool)>>> m_return_transform;
std::unordered_map<size_t, size_t> m_current_exception_depth;
Expand All @@ -264,9 +266,12 @@ class BytecodeGenerator : public ast::CodeGenerator
template<typename OpType, typename... Args> void emit(Args &&...args)
{
ASSERT(m_current_block);
record_location_for_next_instruction();
m_current_block->push_back(std::make_unique<OpType>(std::forward<Args>(args)...));
}

void record_location_for_next_instruction();

friend std::ostream &operator<<(std::ostream &os, BytecodeGenerator &generator);

std::string to_string() const;
Expand Down Expand Up @@ -374,8 +379,11 @@ class BytecodeGenerator : public ast::CodeGenerator
{
m_ctx.push_node(node);
const auto old_function_id = m_function_id;
const auto old_source_location = m_current_source_location;
m_function_id = function_id;
m_current_source_location = node->source_location();
auto *value = node->codegen(this);
m_current_source_location = old_source_location;
m_function_id = old_function_id;
m_ctx.pop_node();
return static_cast<BytecodeValue *>(value);
Expand Down
28 changes: 28 additions & 0 deletions src/executable/bytecode/codegen/BytecodeGenerator_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#include "../Bytecode.hpp"
#include "../BytecodeProgram.hpp"
#include "BytecodeGenerator.hpp"
#include "executable/common.hpp"
#include "lexer/Lexer.hpp"
#include "parser/Parser.hpp"
#include "runtime/PyCode.hpp"

#include "gtest/gtest.h"

Expand Down Expand Up @@ -44,3 +46,29 @@ TEST(BytecodeGenerator, EmitsProgramWithFunctionDefinitions)
ASSERT_EQ(bytecode_generator->functions().size(), 2);
ASSERT_TRUE(bytecode_generator->main_function());
}

TEST(BytecodeGenerator, AttachesSourceLineToMainInstructions)
{
constexpr std::string_view program =
"x = 1\n"
"y = 2\n"
"z = 3\n";

auto bytecode_generator = generate_bytecode(program);
auto *code = static_cast<py::PyCode *>(bytecode_generator->main_function());
ASSERT_TRUE(code);
const auto *bytecode = static_cast<const Bytecode *>(code->function().get());

// We don't pin down which instruction maps to which line — that's an
// implementation detail of the codegen. We do require that at least one
// instruction reports each of the three source lines (1, 2, 3) and that
// no instruction reports a bogus line.
std::set<uint32_t> observed_lines;
for (size_t i = 0; i < static_cast<size_t>(std::distance(bytecode->begin(), bytecode->end()));
++i) {
if (const auto loc = bytecode->location_for(i)) { observed_lines.insert(loc->line); }
}
EXPECT_TRUE(observed_lines.contains(1u));
EXPECT_TRUE(observed_lines.contains(2u));
EXPECT_TRUE(observed_lines.contains(3u));
}
Loading
Loading