Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Include/internal/pycore_opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

118 changes: 118 additions & 0 deletions Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -1518,6 +1518,29 @@ class Foo:
Foo.attr = 0
self.assertFalse(ex.is_valid())

def test_guard_type_version_locked_removed(self):
Comment thread
Fidget-Spinner marked this conversation as resolved.
"""
Verify that redundant _GUARD_TYPE_VERSION_LOCKED guards are
Comment thread
Fidget-Spinner marked this conversation as resolved.
eliminated for sequential STORE_ATTR_INSTANCE_VALUE in __init__.
"""

class Foo:
def __init__(self):
self.a = 1
self.b = 2
self.c = 3

def thing(n):
for _ in range(n):
Foo()

res, ex = self._run_with_optimizer(thing, TIER2_THRESHOLD)
self.assertIsNotNone(ex)
opnames = list(iter_opnames(ex))
guard_locked_count = opnames.count("_GUARD_TYPE_VERSION_LOCKED")
Comment thread
Fidget-Spinner marked this conversation as resolved.
# Only the first store needs the guard; the rest should be NOPed.
self.assertEqual(guard_locked_count, 1)

def test_type_version_doesnt_segfault(self):
"""
Tests that setting a type version doesn't cause a segfault when later looking at the stack.
Expand All @@ -1539,6 +1562,101 @@ def fn(a):

fn(A())

def test_init_resolves_callable(self):
"""
_CHECK_AND_ALLOCATE_OBJECT should resolve __init__ to a constant,
enabling the optimizer to propagate type information through the frame
and eliminate redundant function version and arg count checks.
"""
class MyPoint:
def __init__(self, x, y):
self.x = x
self.y = y

def testfunc(n):
total = 0.0
for _ in range(n):
p = MyPoint(1.0, 2.0)
total += p.x
return total

res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertAlmostEqual(res, TIER2_THRESHOLD * 1.0)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# The __init__ call should be traced through via _PUSH_FRAME
self.assertIn("_PUSH_FRAME", uops)
# __init__ resolution eliminates function version and arg checks
self.assertNotIn("_CHECK_FUNCTION_VERSION", uops)
self.assertNotIn("_CHECK_FUNCTION_EXACT_ARGS", uops)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@eendebakpt I think the LLM generated this, but this is wrong, there is no function being called in the __init__, so this won't show up.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in latest comimt.


def test_guard_type_version_locked_propagates(self):
"""
_GUARD_TYPE_VERSION_LOCKED should set the type version on the
symbol so repeated accesses to the same type can benefit.
"""
class Item:
def __init__(self, val):
self.val = val

def get(self):
return self.val

def get2(self):
return self.val + 1

def testfunc(n):
item = Item(42)
total = 0
for _ in range(n):
# Two method calls on the same object — the second
# should benefit from type info set by the first.
total += item.get() + item.get2()
return total

res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertEqual(res, TIER2_THRESHOLD * (42 + 43))
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# Both methods should be traced through
self.assertEqual(uops.count("_PUSH_FRAME"), 2)
# Type version propagation: one guard covers both method lookups
self.assertEqual(uops.count("_GUARD_TYPE_VERSION"), 1)
# Function checks eliminated (type info resolves the callable)
self.assertNotIn("_CHECK_FUNCTION_VERSION", uops)
self.assertNotIn("_CHECK_FUNCTION_EXACT_ARGS", uops)

def test_method_chain_guard_elimination(self):
"""
Calling two methods on the same object should share the outer
type guard — only one _GUARD_TYPE_VERSION for the two lookups.
"""
class Calc:
def __init__(self, val):
self.val = val

def add(self, x):
self.val += x
return self

def testfunc(n):
c = Calc(0)
for _ in range(n):
c.add(1).add(2)
return c.val

res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertEqual(res, TIER2_THRESHOLD * 3)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# Both add() calls should be inlined
push_count = uops.count("_PUSH_FRAME")
self.assertEqual(push_count, 2)
# Only one outer type version guard for the two method lookups
# on the same object c (the second lookup reuses type info)
guard_version_count = uops.count("_GUARD_TYPE_VERSION")
self.assertEqual(guard_version_count, 1)

def test_func_guards_removed_or_reduced(self):
def testfunc(n):
for i in range(n):
Expand Down
1 change: 1 addition & 0 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -2994,6 +2994,7 @@ dummy_func(

macro(STORE_ATTR_INSTANCE_VALUE) =
unused/1 +
_RECORD_TOS_TYPE +
_LOCK_OBJECT +
_GUARD_TYPE_VERSION_LOCKED +
_GUARD_DORV_NO_DICT +
Expand Down
36 changes: 34 additions & 2 deletions Python/optimizer_bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,20 @@ dummy_func(void) {
assert(!PyJitRef_IsUnique(value));
}

op(_GUARD_TYPE_VERSION_LOCKED, (type_version/2, owner -- owner)) {
Comment thread
Fidget-Spinner marked this conversation as resolved.
assert(type_version);
if (sym_matches_type_version(owner, type_version)) {
ADD_OP(_NOP, 0, 0);
Comment thread
Fidget-Spinner marked this conversation as resolved.
} else {
PyTypeObject *probable_type = sym_get_probable_type(owner);
if (probable_type->tp_version_tag == type_version && sym_set_type_version(owner, type_version)) {
// Promote the probable type version to a known one.
PyType_Watch(TYPE_WATCHER_ID, (PyObject *)probable_type);
_Py_BloomFilter_Add(dependencies, probable_type);
}
}
}

op(_STORE_ATTR_INSTANCE_VALUE, (offset/1, value, owner -- o)) {
(void)offset;
(void)value;
Expand Down Expand Up @@ -1027,9 +1041,27 @@ dummy_func(void) {
}

op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) {
(void)type_version;
(void)args;
callable = sym_new_not_null(ctx);
PyObject *probable_callable = sym_get_probable_value(callable);
assert(probable_callable != NULL);
assert(PyType_Check(probable_callable));
PyTypeObject *tp = (PyTypeObject *)probable_callable;
if (tp->tp_version_tag == type_version) {
// If the type version has not changed since we last saw it,
// then we know this __init__ is definitely the same one as in the cache.
// We can promote callable to a known constant. This does not need a
// type watcher, as we do not remove this _CHECK_AND_ALLOCATE_OBJECT guard.
// TODO: split up _CHECK_AND_ALLOCATE_OBJECT to the check then alloate, so we can
// eliminate the check.
PyHeapTypeObject *cls = (PyHeapTypeObject *)probable_callable;
PyObject *init = cls->_spec_cache.init;
assert(init != NULL);
assert(PyFunction_Check(init));
callable = sym_new_const(ctx, init);
}
else {
callable = sym_new_not_null(ctx);
}
self_or_null = sym_new_not_null(ctx);
}

Expand Down
29 changes: 27 additions & 2 deletions Python/optimizer_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Python/record_functions.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading