Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ include(CheckCXXSourceCompiles)

project(python++)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD 23)

include(cmake/CPM.cmake)

Expand Down
2 changes: 1 addition & 1 deletion integration/run_python_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"

PYTHON_EXECUTABLE=$1
GC_FREQUENCY=100000
GC_FREQUENCY=${GC_FREQUENCY:-100000}

# start by calling the tests that we need to work in order to trust the result of the other python tests
if timeout 10s $PYTHON_EXECUTABLE $SCRIPT_DIR/tests/lemmas/assert_false.py --gc-frequency $GC_FREQUENCY &> /dev/null; then
Expand Down
23 changes: 23 additions & 0 deletions integration/tests/dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,26 @@ def dict_setdefault():
assert a["b"] == 10

dict_setdefault()

def dict_pop_missing_key_with_failing_repr():
# Regression: PyDict::pop formatted KeyError via key.__repr__() and
# unconditionally unwrapped the result, aborting if __repr__ raised.
class Bad:
def __hash__(self):
return 0
def __eq__(self, other):
return False
def __repr__(self):
raise ValueError("bad repr")

d = {}
raised = None
try:
d.pop(Bad())
except KeyError:
raised = "KeyError"
except ValueError:
raised = "ValueError"
assert raised is not None, "dict.pop on missing key with failing __repr__ must not abort"

dict_pop_missing_key_with_failing_repr()
10 changes: 10 additions & 0 deletions integration/tests/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,13 @@
exception_raised = True
finally:
assert exception_raised, "list.pop with empty list should raise an IndexError"

def list_recursive_repr():
a = []
a.append(a)
assert repr(a) == "[[...]]", "recursive list repr should use [...] sentinel"
# Calling repr again must drain the visited set; otherwise nested
# repr() would still see `a` as visited.
assert repr(a) == "[[...]]", "recursive list repr should be idempotent"

list_recursive_repr()
64 changes: 64 additions & 0 deletions integration/tests/weakref.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import _weakref
import gc

# A class that supports weakrefs (built-ins like int/list typically don't).
class Foo:
pass

def weakref_registers_and_counts():
f = Foo()
r = _weakref.ref(f)
# Holding a weakref does not bump the strong-ref count, but the runtime
# must record the registration so weakref_count is observable.
assert _weakref.getweakrefcount(f) == 1
# The weakref still resolves to the target while the target is alive.
assert r() is f

weakref_registers_and_counts()

def weakref_resolves_through_gc_collect():
# gc.collect() forces a full mark/sweep regardless of cadence/pause
# state, so this scope can deterministically check that the weakref
# keeps tracking the target across collection cycles.
f = Foo()
r = _weakref.ref(f)
gc.collect()
# `f` is still on the stack here, so it must survive the collection.
assert r() is f
assert _weakref.getweakrefcount(f) == 1

weakref_resolves_through_gc_collect()

def gc_collect_does_not_crash_when_disabled():
# Disabling the GC must not prevent gc.collect() from running; this
# mirrors CPython's gc.collect() semantics.
gc.disable()
try:
assert gc.isenabled() is False
gc.collect()
finally:
gc.enable()
assert gc.isenabled() is True

gc_collect_does_not_crash_when_disabled()

def weakref_wrapper_unregisters_on_its_own_collection():
# Regression test for B8. Whenever a weakref wrapper is collected,
# the runtime must scrub the heap's m_weakrefs table — otherwise
# the per-target vector accumulates dangling pointers and
# getweakrefcount lies. Create N immediately-unreachable wrappers
# and confirm the count is zero after gc.collect(). Pre-fix this
# interpreter reported 100/100 survived; post-fix and on CPython
# (which refcounts wrappers away on the spot) it reports 0/100.
N = 100
def churn(target):
for _ in range(N):
_weakref.ref(target) # result discarded
t = Foo()
churn(t)
gc.collect()
remaining = _weakref.getweakrefcount(t)
assert remaining == 0, \
f"expected all wrappers collected, {remaining}/{N} survived"

weakref_wrapper_unregisters_on_its_own_collection()
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ set(RUNTIME_SOURCE_FILES
runtime/modules/struct/module.cpp
runtime/modules/SysModule.cpp
runtime/modules/WarningsModule.cpp
runtime/modules/GcModule.cpp
runtime/types/builtin.cpp
runtime/warnings/DeprecationWarning.cpp
runtime/warnings/ImportWarning.cpp
Expand Down
6 changes: 3 additions & 3 deletions src/executable/bytecode/Bytecode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,14 @@ PyResult<Value> Bytecode::call_without_setup(VirtualMachine &vm, Interpreter &in
// create main stack frame
ASSERT(!vm.stack().empty());

constexpr auto sentinel = decltype(vm.stack().top().get().last_instruction_pointer)();
if (vm.stack().top().get().last_instruction_pointer == sentinel) {
constexpr auto sentinel = decltype(vm.stack().back().get().last_instruction_pointer)();
if (vm.stack().back().get().last_instruction_pointer == sentinel) {
// first time calling with the stack frame, so we don't have a last instruction pointer yet
vm.set_instruction_pointer(begin());
} else {
// otherwise resume execution, by starting execution from the instruction after the last run
// instruction
vm.set_instruction_pointer(vm.stack().top().get().last_instruction_pointer + 1);
vm.set_instruction_pointer(vm.stack().back().get().last_instruction_pointer + 1);
}

return eval_loop(vm, interpreter);
Expand Down
2 changes: 1 addition & 1 deletion src/interpreter/Interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ PyModule *Interpreter::get_imported_module(PyString *name) const
ScopedStack::~ScopedStack()
{
auto &vm = VirtualMachine::the();
if (!vm.stack().empty() && top_frame && &vm.stack().top().get() == top_frame.get()) {
if (!vm.stack().empty() && top_frame && &vm.stack().back().get() == top_frame.get()) {
vm.pop_frame(true);
}
}
Expand Down
118 changes: 66 additions & 52 deletions src/memory/GarbageCollector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,27 +59,38 @@ __attribute__((no_sanitize_address)) std::stack<Cell *> collect_roots_on_the_sta
{
std::stack<Cell *> roots;

// push register values onto the stack
// Spill all callee-saved registers into `jump_buffer`. setjmp captures
// them as part of saving the calling environment.
std::jmp_buf jump_buffer;
setjmp(jump_buffer);

// traverse the stack from the stack pointer to the stack origin/bottom
// uintptr_t *rsp_;
// asm volatile("movq %%rsp, %0" : "=r"(rsp_));
auto scan_range = [&](uint8_t *begin, uint8_t *end) {
for (uint8_t *p = begin; p < end; p += sizeof(uintptr_t)) {
uint8_t *address =
bit_cast_without_sanitizer<uint8_t *>(*bit_cast_without_sanitizer<uintptr_t *>(p))
- sizeof(GarbageCollected);
spdlog::trace("checking address {}, pointer address={}", (void *)address, (void *)p);
if (heap.slab().has_address(address)) {
spdlog::trace("valid address {}", (void *)address);
auto *obj_header = bit_cast<GarbageCollected *>(address);
add_root(obj_header, roots);
}
}
};

// uint8_t *rsp = bit_cast<uint8_t *>(rsp_);
// jump_buffer is a local variable, so it sits below the current frame
// pointer on stacks that grow downward. The frame-pointer-to-stack-bottom
// scan below therefore skips it, which would lose any GC root whose only
// live reference is in a callee-saved register at this point. Scan the
// buffer's bytes explicitly to recover those spilled values.
auto *jb_begin = bit_cast_without_sanitizer<uint8_t *>(&jump_buffer);
scan_range(jb_begin, jb_begin + sizeof(jump_buffer));

// Traverse the stack from the current frame pointer up to the recorded
// stack bottom; this covers everything in our callers' frames.
uint8_t *rsp = bit_cast_without_sanitizer<uint8_t *>(__builtin_frame_address(0));
for (; rsp < stack_bottom; rsp += sizeof(uintptr_t)) {
uint8_t *address =
bit_cast_without_sanitizer<uint8_t *>(*bit_cast_without_sanitizer<uintptr_t *>(rsp))
- sizeof(GarbageCollected);
spdlog::trace("checking address {}, pointer address={}", (void *)address, (void *)rsp);
if (heap.slab().has_address(address)) {
spdlog::trace("valid address {}", (void *)address);
auto *obj_header = bit_cast<GarbageCollected *>(address);
add_root(obj_header, roots);
}
}
scan_range(rsp, stack_bottom);

spdlog::debug("Done collecting roots from the stack, found {} roots", roots.size());
return roots;
}
Expand Down Expand Up @@ -149,20 +160,27 @@ struct MarkGCVisitor : Cell::Visitor
Heap &m_heap;
std::stack<Cell *> &m_to_visit;

// Collects the 1-hop neighbours of a given cell. visit_graph(visitor) on
// the root invokes Visitor::visit() for each thing the root references
// (which, by convention, may include the root itself for leaf cells); the
// `m_collecting` gate isolates those nested calls so only direct
// neighbours land in the vector and we do not recurse further.
struct NeighbourVisitor : Cell::Visitor
{
std::vector<Cell *> m_neighbours;
size_t m_depth{ 0 };
bool m_collecting{ false };

void visit(Cell &cell)
void collect_neighbours_of(Cell &root)
{
m_depth++;
if (m_depth == 1) {
cell.visit_graph(*this);
} else if (m_depth == 2) {
m_neighbours.push_back(&cell);
}
m_depth--;
m_neighbours.clear();
m_collecting = true;
root.visit_graph(*this);
m_collecting = false;
}

void visit(Cell &cell) override
{
if (m_collecting) { m_neighbours.push_back(&cell); }
}
};

Expand All @@ -174,7 +192,7 @@ struct MarkGCVisitor : Cell::Visitor

if (!is_static_memory(cell_start, m_heap)) {
NeighbourVisitor nv{};
nv.visit(cell);
nv.collect_neighbours_of(cell);
auto &neighbours = nv.m_neighbours;
spdlog::trace("node: {}", static_cast<void *>(&cell));
for (auto *neighbour : neighbours) {
Expand Down Expand Up @@ -212,8 +230,15 @@ struct MarkGCVisitor : Cell::Visitor

void MarkSweepGC::mark_all_cell_unreachable(Heap &heap) const
{
// TODO: once the ideal block sizes are fixed there should be an iterator
// returning a list of all blocks
// NOTE: this site intentionally does NOT use Slab::for_each_block. Doing
// so produces a stack layout (lambda captures across inlining) that
// keeps a Block pointer alive in a slot the next collect_roots scan
// reads as a heap root, regressing
// TestHeap.GarbageCollectorDeallocatesGCPointersWhenStackFrameIsPopped.
// The conservative-GC fragility this exposes is the underlying issue;
// until it is replaced by a precise RootSet, keep the explicit array
// here so the test stays green. The other sweep/has_address sites are
// fine because they run after the scan, not before.
std::array blocks = {
std::reference_wrapper{ heap.slab().block_16() },
std::reference_wrapper{ heap.slab().block_32() },
Expand Down Expand Up @@ -262,21 +287,8 @@ void MarkSweepGC::sweep(Heap &heap) const
{
spdlog::trace("MarkSweepGC::sweep start");

// TODO: once the ideal block sizes are fixed there should be an iterator
// returning a list of all blocks
std::array blocks = {
std::reference_wrapper{ heap.slab().block_16() },
std::reference_wrapper{ heap.slab().block_32() },
std::reference_wrapper{ heap.slab().block_64() },
std::reference_wrapper{ heap.slab().block_128() },
std::reference_wrapper{ heap.slab().block_256() },
std::reference_wrapper{ heap.slab().block_512() },
std::reference_wrapper{ heap.slab().block_1024() },
std::reference_wrapper{ heap.slab().block_2048() },
};
// sweep all the dead objects
for (const auto &block : blocks) {
for (auto &chunk : block.get()->chunks()) {
heap.slab().for_each_block([this, &heap](Block &block) {
for (auto &chunk : block.chunks()) {
chunk.for_each_cell_alive([this, &chunk, &heap](uint8_t *memory) {
auto *header = bit_cast<GarbageCollected *>(memory);
if (header->white()) {
Expand All @@ -293,27 +305,29 @@ void MarkSweepGC::sweep(Heap &heap) const
}
});
}
}
});
spdlog::trace("MarkSweepGC::sweep done");
}


void MarkSweepGC::run(Heap &heap) const
void MarkSweepGC::mark_and_sweep(Heap &heap)
{
if (m_pause) { return; }
if (++m_iterations_since_last_sweep < m_frequency) { return; }

mark_all_cell_unreachable(heap);

auto roots = collect_roots(heap);

mark_all_live_objects(heap, std::move(roots));

sweep(heap);

m_iterations_since_last_sweep = 0;
}

void MarkSweepGC::run(Heap &heap)
{
if (m_pause) { return; }
if (++m_iterations_since_last_sweep < m_frequency) { return; }
mark_and_sweep(heap);
}

void MarkSweepGC::force_run(Heap &heap) { mark_and_sweep(heap); }

void MarkSweepGC::resume()
{
ASSERT(!is_active());
Expand Down
19 changes: 16 additions & 3 deletions src/memory/GarbageCollector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,15 @@ class GarbageCollector
{
public:
virtual ~GarbageCollector() = default;
virtual void run(Heap &) const = 0;
// Not const: a GC pass advances the collector's per-call cadence
// counter, flips every cell's mark colour, and may destroy a large
// chunk of the heap. Pretending the GC instance is immutable across
// the call is a const-correctness lie that this signature avoids.
virtual void run(Heap &) = 0;
// Unconditional collection. Bypasses both the pause flag and the
// cadence counter; used by gc.collect() so user code can force a
// full sweep even while the GC has been disabled.
virtual void force_run(Heap &) = 0;
virtual void resume() = 0;
virtual void pause() = 0;
virtual bool is_active() const = 0;
Expand All @@ -81,7 +89,8 @@ class MarkSweepGC : public GarbageCollector
{
public:
MarkSweepGC();
void run(Heap &) const override;
void run(Heap &) override;
void force_run(Heap &) override;
void resume() override;
void pause() override;
bool is_active() const override;
Expand All @@ -92,7 +101,11 @@ class MarkSweepGC : public GarbageCollector
void sweep(Heap &heap) const;

private:
void mark_and_sweep(Heap &);

// Lazily initialised once on first collect_roots; afterwards immutable.
// `mutable` is the textbook one-time-cache use case.
mutable uint8_t *m_stack_bottom{ nullptr };
mutable size_t m_iterations_since_last_sweep{ 0 };
size_t m_iterations_since_last_sweep{ 0 };
bool m_pause{ false };
};
Loading
Loading