diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 90b4ff04..86698ec4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -217,6 +217,7 @@ set(RUNTIME_SOURCE_FILES runtime/PyType.cpp runtime/PyZip.cpp runtime/RuntimeError.cpp + runtime/SourceManager.cpp runtime/StopIteration.cpp runtime/SyntaxError.cpp runtime/TypeError.cpp @@ -242,6 +243,7 @@ set(UNITTEST_SOURCES runtime/PyNumber_tests.cpp runtime/PyString_tests.cpp runtime/PyType_tests.cpp + runtime/SourceManager_tests.cpp testing/main.cpp) set(PYTHON_LIB_PATH ${cpython_SOURCE_DIR}/Lib) diff --git a/src/executable/FunctionBlock.hpp b/src/executable/FunctionBlock.hpp index 197622b5..5e089960 100644 --- a/src/executable/FunctionBlock.hpp +++ b/src/executable/FunctionBlock.hpp @@ -2,6 +2,7 @@ #include "Program.hpp" #include "forward.hpp" +#include #include #include #include @@ -9,6 +10,13 @@ using InstructionVector = std::vector>; +struct InstructionSourceLocation +{ + uint32_t instruction_index; + uint32_t line; + uint32_t column; +}; + struct FunctionMetaData { std::string function_name; @@ -33,6 +41,7 @@ struct FunctionBlock { FunctionMetaData metadata; InstructionVector blocks; + std::vector instruction_locations; std::string to_string() const; }; diff --git a/src/executable/bytecode/Bytecode.cpp b/src/executable/bytecode/Bytecode.cpp index d1f09aa1..099a304d 100644 --- a/src/executable/bytecode/Bytecode.cpp +++ b/src/executable/bytecode/Bytecode.cpp @@ -1,4 +1,6 @@ #include "Bytecode.hpp" +#include "ast/AST.hpp" +#include "executable/FunctionBlock.hpp" #include "instructions/Instructions.hpp" #include "interpreter/Interpreter.hpp" #include "runtime/BaseException.hpp" @@ -8,6 +10,9 @@ #include "serialization/deserialize.hpp" #include "serialization/serialize.hpp" +#include +#include + using namespace py; Bytecode::Bytecode(size_t register_count, @@ -15,6 +20,7 @@ Bytecode::Bytecode(size_t register_count, size_t stack_size, std::string function_name, InstructionVector instructions, + std::vector instruction_locations, std::shared_ptr program) : Function(register_count, locals_count, @@ -22,9 +28,24 @@ Bytecode::Bytecode(size_t register_count, function_name, FunctionExecutionBackend::BYTECODE, std::move(program)), - m_instructions(std::move(instructions)) + m_instructions(std::move(instructions)), + m_instruction_locations(std::move(instruction_locations)) {} +std::optional Bytecode::location_for(size_t instruction_index) const +{ + if (m_instruction_locations.empty()) { return std::nullopt; } + // Find the last entry whose instruction_index is <= the query. + const auto it = std::upper_bound(m_instruction_locations.begin(), + m_instruction_locations.end(), + instruction_index, + [](size_t idx, const InstructionSourceLocation &entry) { + return idx < entry.instruction_index; + }); + if (it == m_instruction_locations.begin()) { return std::nullopt; } + return *std::prev(it); +} + std::string Bytecode::to_string() const { std::ostringstream os; @@ -83,6 +104,7 @@ std::unique_ptr Bytecode::deserialize(std::span &buffer stack_size, function_name, std::move(instructions), + std::vector{}, std::move(program)); } @@ -144,8 +166,9 @@ py::PyResult Bytecode::eval_loop(VirtualMachine &vm, Interpreter &int // vm.dump(); if (result.is_err()) { auto *exception = result.unwrap_err(); - size_t tb_lineno = 0; - size_t tb_lasti = std::distance(initial_ip, current_ip); + const size_t tb_lasti = std::distance(initial_ip, current_ip); + const size_t tb_lineno = + location_for(tb_lasti).value_or(InstructionSourceLocation{ 0, 0, 0 }).line; PyTraceback *tb_next = exception->traceback(); auto traceback = PyTraceback::create(interpreter.execution_frame(), tb_lasti, tb_lineno, tb_next); diff --git a/src/executable/bytecode/Bytecode.hpp b/src/executable/bytecode/Bytecode.hpp index 2ac8218b..06c6f6db 100644 --- a/src/executable/bytecode/Bytecode.hpp +++ b/src/executable/bytecode/Bytecode.hpp @@ -15,6 +15,7 @@ class Bytecode : public Function { const InstructionVector m_instructions; + const std::vector m_instruction_locations; public: Bytecode(size_t register_count, @@ -22,11 +23,14 @@ class Bytecode : public Function size_t stack_size, std::string function_name, InstructionVector instructions, + std::vector instruction_locations, std::shared_ptr program); auto begin() const { return m_instructions.begin(); } auto end() const { return m_instructions.end(); } + std::optional location_for(size_t instruction_index) const; + std::string to_string() const override; std::vector serialize() const override; diff --git a/src/executable/bytecode/BytecodeProgram.cpp b/src/executable/bytecode/BytecodeProgram.cpp index 8850caff..687c696e 100644 --- a/src/executable/bytecode/BytecodeProgram.cpp +++ b/src/executable/bytecode/BytecodeProgram.cpp @@ -37,6 +37,7 @@ std::shared_ptr BytecodeProgram::create(FunctionBlocks &&func_b main_func.metadata.stack_size, main_func.metadata.function_name, std::move(main_func.blocks), + std::move(main_func.instruction_locations), program); auto consts = PyTuple::create(main_func.metadata.consts); if (consts.is_err()) { TODO(); } @@ -69,6 +70,7 @@ std::shared_ptr BytecodeProgram::create(FunctionBlocks &&func_b func.metadata.stack_size, func.metadata.function_name, std::move(func.blocks), + std::move(func.instruction_locations), program); consts = PyTuple::create(func.metadata.consts); if (consts.is_err()) { TODO(); } diff --git a/src/executable/bytecode/Bytecode_tests.cpp b/src/executable/bytecode/Bytecode_tests.cpp index ee0b05c1..98b8b127 100644 --- a/src/executable/bytecode/Bytecode_tests.cpp +++ b/src/executable/bytecode/Bytecode_tests.cpp @@ -7,6 +7,73 @@ #include "gtest/gtest.h" +namespace { +Bytecode make_bytecode_with_locations(std::vector locations) +{ + return Bytecode{ /*register_count=*/0, + /*locals_count=*/0, + /*stack_size=*/0, + /*function_name=*/"", + InstructionVector{}, + std::move(locations), + /*program=*/nullptr }; +} +}// namespace + +TEST(BytecodeLocationFor, ReturnsNulloptWhenTableIsEmpty) +{ + auto bc = make_bytecode_with_locations({}); + EXPECT_FALSE(bc.location_for(0).has_value()); +} + +TEST(BytecodeLocationFor, ReturnsNulloptWhenQueryPrecedesFirstEntry) +{ + auto bc = make_bytecode_with_locations({ + InstructionSourceLocation{ /*instruction_index=*/5, /*line=*/10, /*column=*/2 }, + }); + EXPECT_FALSE(bc.location_for(0).has_value()); +} + +TEST(BytecodeLocationFor, ReturnsExactMatchEntry) +{ + auto bc = make_bytecode_with_locations({ + InstructionSourceLocation{ 0, 1, 0 }, + InstructionSourceLocation{ 3, 7, 4 }, + InstructionSourceLocation{ 10, 12, 0 }, + }); + const auto loc = bc.location_for(3); + ASSERT_TRUE(loc.has_value()); + EXPECT_EQ(loc->line, 7u); + EXPECT_EQ(loc->column, 4u); +} + +TEST(BytecodeLocationFor, ExtendsEntryUntilNextOne) +{ + auto bc = make_bytecode_with_locations({ + InstructionSourceLocation{ 0, 1, 0 }, + InstructionSourceLocation{ 3, 7, 4 }, + InstructionSourceLocation{ 10, 12, 0 }, + }); + // Query between entries should return the most recent preceding entry. + for (uint32_t idx : { 0u, 1u, 2u }) { + const auto loc = bc.location_for(idx); + ASSERT_TRUE(loc.has_value()) << "idx=" << idx; + EXPECT_EQ(loc->line, 1u) << "idx=" << idx; + EXPECT_EQ(loc->column, 0u) << "idx=" << idx; + } + for (uint32_t idx : { 3u, 4u, 9u }) { + const auto loc = bc.location_for(idx); + ASSERT_TRUE(loc.has_value()) << "idx=" << idx; + EXPECT_EQ(loc->line, 7u) << "idx=" << idx; + EXPECT_EQ(loc->column, 4u) << "idx=" << idx; + } + for (uint32_t idx : { 10u, 100u, 9999u }) { + const auto loc = bc.location_for(idx); + ASSERT_TRUE(loc.has_value()) << "idx=" << idx; + EXPECT_EQ(loc->line, 12u) << "idx=" << idx; + EXPECT_EQ(loc->column, 0u) << "idx=" << idx; + } +} // FIXME: think about what should be tested here // namespace { diff --git a/src/executable/bytecode/codegen/BytecodeGenerator.cpp b/src/executable/bytecode/codegen/BytecodeGenerator.cpp index 61fb2986..82405188 100644 --- a/src/executable/bytecode/codegen/BytecodeGenerator.cpp +++ b/src/executable/bytecode/codegen/BytecodeGenerator.cpp @@ -3119,6 +3119,7 @@ std::shared_ptr BytecodeGenerator::generate_executable(std::string file ASSERT(m_frame_stack_value_count.size() == 2); ASSERT(m_frame_free_var_count.size() == 2); relocate_labels(m_functions); + for (auto &func : m_functions.functions) { func.metadata.filename = filename; } return BytecodeProgram::create(std::move(m_functions), filename, argv); } @@ -3130,6 +3131,20 @@ InstructionVector *BytecodeGenerator::allocate_block(size_t function_id) return &function->blocks; } +void BytecodeGenerator::record_location_for_next_instruction() +{ + ASSERT(m_function_id < m_functions.functions.size()); + auto &func = *std::next(m_functions.functions.begin(), m_function_id); + const auto line = static_cast(m_current_source_location.start.row + 1); + const auto column = static_cast(m_current_source_location.start.column); + auto &locations = func.instruction_locations; + if (!locations.empty() && locations.back().line == line && locations.back().column == column) { + return; + } + const auto next_instruction_index = static_cast(func.blocks.size()); + locations.emplace_back(next_instruction_index, line, column); +} + std::shared_ptr BytecodeGenerator::compile(std::shared_ptr node, std::vector argv, compiler::OptimizationLevel lvl) diff --git a/src/executable/bytecode/codegen/BytecodeGenerator.hpp b/src/executable/bytecode/codegen/BytecodeGenerator.hpp index 4bd2caac..39cbc719 100644 --- a/src/executable/bytecode/codegen/BytecodeGenerator.hpp +++ b/src/executable/bytecode/codegen/BytecodeGenerator.hpp @@ -248,6 +248,8 @@ class BytecodeGenerator : public ast::CodeGenerator ASTContext m_ctx; std::stack m_stack; + SourceLocation m_current_source_location{}; + std::set m_clear_exception_before_return_functions; std::unordered_map>> m_return_transform; std::unordered_map m_current_exception_depth; @@ -264,9 +266,12 @@ class BytecodeGenerator : public ast::CodeGenerator template void emit(Args &&...args) { ASSERT(m_current_block); + record_location_for_next_instruction(); m_current_block->push_back(std::make_unique(std::forward(args)...)); } + void record_location_for_next_instruction(); + friend std::ostream &operator<<(std::ostream &os, BytecodeGenerator &generator); std::string to_string() const; @@ -374,8 +379,11 @@ class BytecodeGenerator : public ast::CodeGenerator { m_ctx.push_node(node); const auto old_function_id = m_function_id; + const auto old_source_location = m_current_source_location; m_function_id = function_id; + m_current_source_location = node->source_location(); auto *value = node->codegen(this); + m_current_source_location = old_source_location; m_function_id = old_function_id; m_ctx.pop_node(); return static_cast(value); diff --git a/src/executable/bytecode/codegen/BytecodeGenerator_tests.cpp b/src/executable/bytecode/codegen/BytecodeGenerator_tests.cpp index 29874088..ffc1be9e 100644 --- a/src/executable/bytecode/codegen/BytecodeGenerator_tests.cpp +++ b/src/executable/bytecode/codegen/BytecodeGenerator_tests.cpp @@ -1,8 +1,10 @@ +#include "../Bytecode.hpp" #include "../BytecodeProgram.hpp" #include "BytecodeGenerator.hpp" #include "executable/common.hpp" #include "lexer/Lexer.hpp" #include "parser/Parser.hpp" +#include "runtime/PyCode.hpp" #include "gtest/gtest.h" @@ -44,3 +46,29 @@ TEST(BytecodeGenerator, EmitsProgramWithFunctionDefinitions) ASSERT_EQ(bytecode_generator->functions().size(), 2); ASSERT_TRUE(bytecode_generator->main_function()); } + +TEST(BytecodeGenerator, AttachesSourceLineToMainInstructions) +{ + constexpr std::string_view program = + "x = 1\n" + "y = 2\n" + "z = 3\n"; + + auto bytecode_generator = generate_bytecode(program); + auto *code = static_cast(bytecode_generator->main_function()); + ASSERT_TRUE(code); + const auto *bytecode = static_cast(code->function().get()); + + // We don't pin down which instruction maps to which line — that's an + // implementation detail of the codegen. We do require that at least one + // instruction reports each of the three source lines (1, 2, 3) and that + // no instruction reports a bogus line. + std::set observed_lines; + for (size_t i = 0; i < static_cast(std::distance(bytecode->begin(), bytecode->end())); + ++i) { + if (const auto loc = bytecode->location_for(i)) { observed_lines.insert(loc->line); } + } + EXPECT_TRUE(observed_lines.contains(1u)); + EXPECT_TRUE(observed_lines.contains(2u)); + EXPECT_TRUE(observed_lines.contains(3u)); +} diff --git a/src/executable/mlir/Target/PythonBytecode/TranslateToPythonBytecode.cpp b/src/executable/mlir/Target/PythonBytecode/TranslateToPythonBytecode.cpp index 93be4222..2b2e5d85 100644 --- a/src/executable/mlir/Target/PythonBytecode/TranslateToPythonBytecode.cpp +++ b/src/executable/mlir/Target/PythonBytecode/TranslateToPythonBytecode.cpp @@ -111,6 +111,7 @@ struct PythonBytecodeEmitter struct FunctionInfo { std::vector> instructions; + std::vector instruction_locations; std::vector<::py::Value> m_consts; std::vector m_varnames; std::vector m_names; @@ -191,6 +192,7 @@ struct PythonBytecodeEmitter std::stack m_current_operation_index; std::stack> m_sorted_blocks; mlir::func::FuncOp m_parent_fn; + InstructionSourceLocation m_current_op_location{}; bool m_writing_to_module{ true }; @@ -222,9 +224,31 @@ struct PythonBytecodeEmitter template void emit(Args &&...args) { auto instruction = std::make_unique(std::forward(args)...); + record_location_for_next_instruction(); current_function().instructions.push_back(std::move(instruction)); } + void record_location_for_next_instruction() + { + auto &locations = current_function().instruction_locations; + if (!locations.empty() && locations.back().line == m_current_op_location.line + && locations.back().column == m_current_op_location.column) { + return; + } + const auto next_instruction_index = + static_cast(current_function().instructions.size()); + locations.emplace_back( + next_instruction_index, m_current_op_location.line, m_current_op_location.column); + } + + void capture_op_location(mlir::Operation &op) + { + if (auto loc = mlir::dyn_cast(op.getLoc())) { + m_current_op_location.line = loc.getLine() + 1; + m_current_op_location.column = loc.getColumn(); + } + } + size_t add_name(std::string_view str) { return current_function().add_name(str); } size_t get_cell_index(std::string_view str) { return current_function().get_cell_index(str); } @@ -472,6 +496,7 @@ struct PythonBytecodeEmitter template<> LogicalResult PythonBytecodeEmitter::emitOperation(Operation &op) { + capture_op_location(op); return llvm::TypeSwitch(&op) // Builtin ops. .Case([this](auto op) { @@ -1445,9 +1470,11 @@ std::shared_ptr translateToPythonBytecode(Operation *op) .register_count = codegen::kNumRegisters, .stack_size = emitter.m_module.m_stack_size, .names = std::move(emitter.m_module.m_names), + .filename = emitter.m_filename, .consts = std::move(emitter.m_module.m_consts), }, .blocks = std::move(instructions), + .instruction_locations = std::move(emitter.m_module.instruction_locations), }; func_blocks.functions.push_back(std::move(fb_module)); } @@ -1463,6 +1490,7 @@ std::shared_ptr translateToPythonBytecode(Operation *op) .varnames = std::move(fn.m_varnames), .freevars = std::move(fn.m_freevars), .names = std::move(fn.m_names), + .filename = emitter.m_filename, .arg_count = fn.m_arg_count, .kwonly_arg_count = fn.m_kwonly_arg_count, .cell2arg = std::move(fn.m_cell2arg), @@ -1470,10 +1498,11 @@ std::shared_ptr translateToPythonBytecode(Operation *op) .flags = std::move(fn.m_flags), }, .blocks = std::move(fn.instructions), + .instruction_locations = std::move(fn.instruction_locations), }; func_blocks.functions.push_back(std::move(fb)); } return BytecodeProgram::create(std::move(func_blocks), emitter.filename(), emitter.argv()); } -}// namespace codegen \ No newline at end of file +}// namespace codegen diff --git a/src/runtime/BaseException.cpp b/src/runtime/BaseException.cpp index 889b9aaa..cc700504 100644 --- a/src/runtime/BaseException.cpp +++ b/src/runtime/BaseException.cpp @@ -4,6 +4,7 @@ #include "PyFrame.hpp" #include "PyTraceback.hpp" #include "PyType.hpp" +#include "SourceManager.hpp" #include "types/api.hpp" #include "types/builtin.hpp" #include "vm/VM.hpp" @@ -108,11 +109,14 @@ std::string BaseException::format_traceback() const out << "Traceback (most recent call last):\n"; auto *tb = m_traceback; while (tb) { + const auto &filename = tb->m_tb_frame->code()->m_filename; out << fmt::format(" File \"{}\", line {}, in {}\n", - "TODO", + filename, tb->m_tb_lineno, tb->m_tb_frame->code()->name()); - out << " TODO -> print line\n"; + const auto source = SourceManager::the().line(filename, tb->m_tb_lineno); + const auto trimmed = SourceManager::strip_leading_whitespace(source); + if (!trimmed.empty()) { out << " " << trimmed << "\n"; } tb = tb->m_tb_next; } out << type()->name() << ": " << what() << "\n"; diff --git a/src/runtime/SourceManager.cpp b/src/runtime/SourceManager.cpp new file mode 100644 index 00000000..84b09b71 --- /dev/null +++ b/src/runtime/SourceManager.cpp @@ -0,0 +1,37 @@ +#include "SourceManager.hpp" + +#include + +namespace py { + +SourceManager &SourceManager::the() +{ + static SourceManager instance; + return instance; +} + +std::string_view SourceManager::line(const std::string &filename, size_t lineno) +{ + const auto &lines = load(filename); + if (lineno == 0 || lineno > lines.size()) { return {}; } + return lines[lineno - 1]; +} + +std::string_view SourceManager::strip_leading_whitespace(std::string_view s) +{ + const auto first = s.find_first_not_of(" \t"); + if (first == std::string_view::npos) { return {}; } + return s.substr(first); +} + +const std::vector &SourceManager::load(const std::string &filename) +{ + if (auto it = m_files.find(filename); it != m_files.end()) { return it->second; } + std::vector lines; + if (std::ifstream f{ filename }) { + for (std::string l; std::getline(f, l);) { lines.push_back(std::move(l)); } + } + return m_files.emplace(filename, std::move(lines)).first->second; +} + +}// namespace py diff --git a/src/runtime/SourceManager.hpp b/src/runtime/SourceManager.hpp new file mode 100644 index 00000000..d5c5936e --- /dev/null +++ b/src/runtime/SourceManager.hpp @@ -0,0 +1,38 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace py { + +// Caches source files on disk so traceback rendering can show the line +// that raised, without re-reading the file once per frame. Lookups are +// 1-indexed (matching tb_lineno). Failed opens are negatively cached so +// repeated requests for an unreadable file don't keep hitting disk. +class SourceManager +{ + public: + static SourceManager &the(); + + // Returns the line at `lineno` (1-indexed) in `filename`, or an empty + // view if the file can't be opened, the line is out of range, or + // `lineno == 0`. + std::string_view line(const std::string &filename, size_t lineno); + + // Returns `s` with leading spaces and tabs removed. Provided here so + // traceback formatters (the main consumer of source lines) don't grow + // their own copy. + static std::string_view strip_leading_whitespace(std::string_view s); + + private: + SourceManager() = default; + + const std::vector &load(const std::string &filename); + + std::unordered_map> m_files; +}; + +}// namespace py diff --git a/src/runtime/SourceManager_tests.cpp b/src/runtime/SourceManager_tests.cpp new file mode 100644 index 00000000..6ba80432 --- /dev/null +++ b/src/runtime/SourceManager_tests.cpp @@ -0,0 +1,53 @@ +#include "SourceManager.hpp" + +#include "gtest/gtest.h" + +#include +#include +#include + +namespace { +std::filesystem::path write_temp(std::string_view name, std::string_view contents) +{ + auto path = std::filesystem::temp_directory_path() / name; + std::ofstream{ path } << contents; + return path; +} +}// namespace + +TEST(SourceManager, ReturnsRequestedLineOneIndexed) +{ + const auto path = write_temp("source_manager_basic.py", + "alpha\n" + "beta\n" + "gamma\n"); + auto &sm = py::SourceManager::the(); + EXPECT_EQ(sm.line(path.string(), 1), "alpha"); + EXPECT_EQ(sm.line(path.string(), 2), "beta"); + EXPECT_EQ(sm.line(path.string(), 3), "gamma"); +} + +TEST(SourceManager, ReturnsEmptyForZeroOrOutOfRangeLine) +{ + const auto path = write_temp("source_manager_oob.py", "only\n"); + auto &sm = py::SourceManager::the(); + EXPECT_TRUE(sm.line(path.string(), 0).empty()); + EXPECT_TRUE(sm.line(path.string(), 2).empty()); + EXPECT_TRUE(sm.line(path.string(), 999).empty()); +} + +TEST(SourceManager, ReturnsEmptyForUnreadableFile) +{ + auto &sm = py::SourceManager::the(); + EXPECT_TRUE(sm.line("/nonexistent/path/source_manager_missing.py", 1).empty()); +} + +TEST(SourceManager, StripLeadingWhitespaceRemovesSpacesAndTabs) +{ + using py::SourceManager; + EXPECT_EQ(SourceManager::strip_leading_whitespace(" x = 1"), "x = 1"); + EXPECT_EQ(SourceManager::strip_leading_whitespace("\t\tcall()"), "call()"); + EXPECT_EQ(SourceManager::strip_leading_whitespace("no_indent"), "no_indent"); + EXPECT_TRUE(SourceManager::strip_leading_whitespace(" \t").empty()); + EXPECT_TRUE(SourceManager::strip_leading_whitespace("").empty()); +}