Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Include/internal/pycore_opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

115 changes: 115 additions & 0 deletions Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -1518,6 +1518,29 @@ class Foo:
Foo.attr = 0
self.assertFalse(ex.is_valid())

def test_guard_type_version_locked_removed(self):
"""
Verify that redundant _GUARD_TYPE_VERSION_LOCKED guards are
eliminated for sequential STORE_ATTR_INSTANCE_VALUE in __init__.
"""

class Foo:
def __init__(self):
self.a = 1
self.b = 2
self.c = 3

def thing(n):
for _ in range(n):
Foo()

res, ex = self._run_with_optimizer(thing, TIER2_THRESHOLD)
self.assertIsNotNone(ex)
opnames = list(iter_opnames(ex))
guard_locked_count = opnames.count("_GUARD_TYPE_VERSION_LOCKED")
# Only the first store needs the guard; the rest should be NOPed.
self.assertEqual(guard_locked_count, 1)

def test_type_version_doesnt_segfault(self):
"""
Tests that setting a type version doesn't cause a segfault when later looking at the stack.
Expand All @@ -1539,6 +1562,98 @@ def fn(a):

fn(A())

def test_init_resolves_callable(self):
"""
_CHECK_AND_ALLOCATE_OBJECT should resolve __init__ to a constant,
enabling the optimizer to propagate type information through the frame
and eliminate redundant function version and arg count checks.
"""
class MyPoint:
def __init__(self, x, y):
# If __init__ callable is propagated through, then
# These will get promoted from globals to constants.
self.x = range(1)
self.y = range(1)

def testfunc(n):
for _ in range(n):
p = MyPoint(1.0, 2.0)

_, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# The __init__ call should be traced through via _PUSH_FRAME
self.assertIn("_PUSH_FRAME", uops)
# __init__ resolution allows promotion of range to constant
self.assertNotIn("_LOAD_GLOBAL_BUILTINS", uops)

def test_guard_type_version_locked_propagates(self):
"""
_GUARD_TYPE_VERSION_LOCKED should set the type version on the
symbol so repeated accesses to the same type can benefit.
"""
class Item:
def __init__(self, val):
self.val = val

def get(self):
return self.val

def get2(self):
return self.val + 1

def testfunc(n):
item = Item(42)
total = 0
for _ in range(n):
# Two method calls on the same object — the second
# should benefit from type info set by the first.
total += item.get() + item.get2()
return total

res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertEqual(res, TIER2_THRESHOLD * (42 + 43))
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# Both methods should be traced through
self.assertEqual(uops.count("_PUSH_FRAME"), 2)
# Type version propagation: one guard covers both method lookups
self.assertEqual(uops.count("_GUARD_TYPE_VERSION"), 1)
# Function checks eliminated (type info resolves the callable)
self.assertNotIn("_CHECK_FUNCTION_VERSION", uops)
self.assertNotIn("_CHECK_FUNCTION_EXACT_ARGS", uops)

def test_method_chain_guard_elimination(self):
"""
Calling two methods on the same object should share the outer
type guard — only one _GUARD_TYPE_VERSION for the two lookups.
"""
class Calc:
def __init__(self, val):
self.val = val

def add(self, x):
self.val += x
return self

def testfunc(n):
c = Calc(0)
for _ in range(n):
c.add(1).add(2)
return c.val

res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertEqual(res, TIER2_THRESHOLD * 3)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# Both add() calls should be inlined
push_count = uops.count("_PUSH_FRAME")
self.assertEqual(push_count, 2)
# Only one outer type version guard for the two method lookups
# on the same object c (the second lookup reuses type info)
guard_version_count = uops.count("_GUARD_TYPE_VERSION")
self.assertEqual(guard_version_count, 1)

def test_func_guards_removed_or_reduced(self):
def testfunc(n):
for i in range(n):
Expand Down
1 change: 1 addition & 0 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -2994,6 +2994,7 @@ dummy_func(

macro(STORE_ATTR_INSTANCE_VALUE) =
unused/1 +
_RECORD_TOS_TYPE +
_LOCK_OBJECT +
_GUARD_TYPE_VERSION_LOCKED +
_GUARD_DORV_NO_DICT +
Expand Down
36 changes: 34 additions & 2 deletions Python/optimizer_bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,20 @@ dummy_func(void) {
assert(!PyJitRef_IsUnique(value));
}

op(_GUARD_TYPE_VERSION_LOCKED, (type_version/2, owner -- owner)) {
assert(type_version);
if (sym_matches_type_version(owner, type_version)) {
ADD_OP(_NOP, 0, 0);
} else {
PyTypeObject *probable_type = sym_get_probable_type(owner);
if (probable_type->tp_version_tag == type_version && sym_set_type_version(owner, type_version)) {
// Promote the probable type version to a known one.
PyType_Watch(TYPE_WATCHER_ID, (PyObject *)probable_type);
_Py_BloomFilter_Add(dependencies, probable_type);
}
}
}

op(_STORE_ATTR_INSTANCE_VALUE, (offset/1, value, owner -- o)) {
(void)offset;
(void)value;
Expand Down Expand Up @@ -1027,9 +1041,27 @@ dummy_func(void) {
}

op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) {
(void)type_version;
(void)args;
callable = sym_new_not_null(ctx);
PyObject *probable_callable = sym_get_probable_value(callable);
assert(probable_callable != NULL);
assert(PyType_Check(probable_callable));
PyTypeObject *tp = (PyTypeObject *)probable_callable;
if (tp->tp_version_tag == type_version) {
// If the type version has not changed since we last saw it,
// then we know this __init__ is definitely the same one as in the cache.
// We can promote callable to a known constant. This does not need a
// type watcher, as we do not remove this _CHECK_AND_ALLOCATE_OBJECT guard.
// TODO: split up _CHECK_AND_ALLOCATE_OBJECT to the check then alloate, so we can
// eliminate the check.
PyHeapTypeObject *cls = (PyHeapTypeObject *)probable_callable;
PyObject *init = cls->_spec_cache.init;
assert(init != NULL);
assert(PyFunction_Check(init));
callable = sym_new_const(ctx, init);
}
else {
callable = sym_new_not_null(ctx);
}
self_or_null = sym_new_not_null(ctx);
}

Expand Down
29 changes: 27 additions & 2 deletions Python/optimizer_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Python/record_functions.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading