From c1b9a836dbd5181ef57514139a5109d554ba30f1 Mon Sep 17 00:00:00 2001 From: Neko Asakura Date: Mon, 30 Mar 2026 13:29:07 -0400 Subject: [PATCH 1/6] GH-131798: Narrow the return type of `_FORMAT_SIMPLE` and `_FORMAT_WITH_SPEC` to `str` --- Lib/test/test_capi/test_opt.py | 38 +++++++++++++++++++ ...-03-30-17-01-34.gh-issue-131798.WSefcr.rst | 2 + Python/optimizer_bytecodes.c | 8 ++++ Python/optimizer_cases.c.h | 4 +- 4 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 2005dd9b0866bd..bda26bc7464c5a 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2263,6 +2263,44 @@ def testfunc(n): self.assertNotIn("_GUARD_TOS_UNICODE", uops) self.assertIn("_BINARY_OP_ADD_UNICODE", uops) + def test_format_simple_narrows_to_str(self): + def testfunc(n): + x = [] + for _ in range(n): + v = 42 + s = f"{v}" + t = "hello" + s + x.append(t) + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, ["hello42"] * TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_FORMAT_SIMPLE", uops) + self.assertNotIn("_GUARD_TOS_UNICODE", uops) + self.assertIn("_BINARY_OP_ADD_UNICODE", uops) + + def test_format_with_spec_narrows_to_str(self): + def testfunc(n): + x = [] + for _ in range(n): + v = 3.14 + s = f"{v:.2f}" + t = "pi=" + s + x.append(t) + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, ["pi=3.14"] * TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_FORMAT_WITH_SPEC", uops) + self.assertNotIn("_GUARD_TOS_UNICODE", uops) + self.assertIn("_BINARY_OP_ADD_UNICODE", uops) + def test_binary_op_subscr_str_int(self): def testfunc(n): x = 0 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst new file mode 100644 index 00000000000000..3f7e7fa0c37a5b --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst @@ -0,0 +1,2 @@ +Allow the JIT to remove unicode guards after ``_FORMAT_SIMPLE`` and +``_FORMAT_WITH_SPEC`` by setting the return type to string. diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 792f83cdbd2d3a..814f4a632c5fcc 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -1551,6 +1551,14 @@ dummy_func(void) { set = sym_new_type(ctx, &PySet_Type); } + op(_FORMAT_SIMPLE, (value -- res)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } + + op(_FORMAT_WITH_SPEC, (value, fmt_spec -- res)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } + op(_SET_UPDATE, (set, unused[oparg-1], iterable -- set, unused[oparg-1], i)) { (void)set; i = iterable; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 7539133fb92096..9be588726f2760 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -4280,14 +4280,14 @@ case _FORMAT_SIMPLE: { JitOptRef res; - res = sym_new_not_null(ctx); + res = sym_new_type(ctx, &PyUnicode_Type); stack_pointer[-1] = res; break; } case _FORMAT_WITH_SPEC: { JitOptRef res; - res = sym_new_not_null(ctx); + res = sym_new_type(ctx, &PyUnicode_Type); CHECK_STACK_BOUNDS(-1); stack_pointer[-2] = res; stack_pointer += -1; From ed1204e11f4e25863cc7d12a792c39c1a96dbc3d Mon Sep 17 00:00:00 2001 From: Neko Asakura Date: Wed, 1 Apr 2026 12:03:26 -0400 Subject: [PATCH 2/6] gh-131798: Narrow the return type of `_FORMAT_SIMPLE` and `_FORMAT_WITH_SPEC` to str for built-in types --- Include/internal/pycore_optimizer.h | 1 + ...-03-30-17-01-34.gh-issue-131798.WSefcr.rst | 2 +- Python/optimizer_analysis.c | 1 + Python/optimizer_bytecodes.c | 12 ++++++++++-- Python/optimizer_cases.c.h | 16 ++++++++++++++-- Python/optimizer_symbols.c | 19 +++++++++++++++++++ 6 files changed, 46 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 2986afb142b5d1..101909d11baccb 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -393,6 +393,7 @@ extern JitOptRef _Py_uop_sym_new_type( extern JitOptRef _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val); extern JitOptRef _Py_uop_sym_new_const_steal(JitOptContext *ctx, PyObject *const_val); +extern bool _Py_uop_sym_is_safe_type(JitOptRef sym); bool _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym); _PyStackRef _Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym); extern JitOptRef _Py_uop_sym_new_null(JitOptContext *ctx); diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst index 3f7e7fa0c37a5b..b587598be65b7d 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst @@ -1,2 +1,2 @@ Allow the JIT to remove unicode guards after ``_FORMAT_SIMPLE`` and -``_FORMAT_WITH_SPEC`` by setting the return type to string. +``_FORMAT_WITH_SPEC`` when the input type is a known built-in type. diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 4672a272fc9203..3e4942c483b7ec 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -250,6 +250,7 @@ add_op(JitOptContext *ctx, _PyUOpInstruction *this_instr, /* Shortened forms for convenience, used in optimizer_bytecodes.c */ #define sym_is_not_null _Py_uop_sym_is_not_null #define sym_is_const _Py_uop_sym_is_const +#define sym_is_safe_type _Py_uop_sym_is_safe_type #define sym_is_safe_const _Py_uop_sym_is_safe_const #define sym_get_const _Py_uop_sym_get_const #define sym_new_const_steal _Py_uop_sym_new_const_steal diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 814f4a632c5fcc..ec449ab0dccd46 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -1552,11 +1552,19 @@ dummy_func(void) { } op(_FORMAT_SIMPLE, (value -- res)) { - res = sym_new_type(ctx, &PyUnicode_Type); + if (sym_is_safe_type(value)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } else { + res = sym_new_not_null(ctx); + } } op(_FORMAT_WITH_SPEC, (value, fmt_spec -- res)) { - res = sym_new_type(ctx, &PyUnicode_Type); + if (sym_is_safe_type(value)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } else { + res = sym_new_not_null(ctx); + } } op(_SET_UPDATE, (set, unused[oparg-1], iterable -- set, unused[oparg-1], i)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 9be588726f2760..86d6fa6a77872c 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -4279,15 +4279,27 @@ } case _FORMAT_SIMPLE: { + JitOptRef value; JitOptRef res; - res = sym_new_type(ctx, &PyUnicode_Type); + value = stack_pointer[-1]; + if (sym_is_safe_type(value)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } else { + res = sym_new_not_null(ctx); + } stack_pointer[-1] = res; break; } case _FORMAT_WITH_SPEC: { + JitOptRef value; JitOptRef res; - res = sym_new_type(ctx, &PyUnicode_Type); + value = stack_pointer[-2]; + if (sym_is_safe_type(value)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } else { + res = sym_new_not_null(ctx); + } CHECK_STACK_BOUNDS(-1); stack_pointer[-2] = res; stack_pointer += -1; diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index d6f1c09490aac9..f4072326fb4d2d 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -264,6 +264,25 @@ _Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym) return PyStackRef_FromPyObjectBorrow(const_val); } +/* + Indicates whether the type is a known built-in type + that is safe to narrow. + */ +bool +_Py_uop_sym_is_safe_type(JitOptRef sym) +{ + PyTypeObject *typ = _Py_uop_sym_get_type(sym); + if (typ == NULL) { + return false; + } + return (typ == &PyLong_Type) || + (typ == &PyUnicode_Type) || + (typ == &PyFloat_Type) || + (typ == &_PyNone_Type) || + (typ == &PyBool_Type) || + (typ == &PyFrozenDict_Type); +} + /* Indicates whether the constant is safe to constant evaluate (without side effects). From d09a7ccc9afa331ed9ecdecdad3b32eed761a070 Mon Sep 17 00:00:00 2001 From: Neko Asakura Date: Wed, 1 Apr 2026 12:34:26 -0400 Subject: [PATCH 3/6] gh-131798: Narrow the return type of `_FORMAT_SIMPLE` and `_FORMAT_WITH_SPEC` to str for built-in types --- Python/optimizer_symbols.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index f4072326fb4d2d..d6c014a838cc5d 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -264,6 +264,16 @@ _Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym) return PyStackRef_FromPyObjectBorrow(const_val); } +static bool +is_safe_builtin_type(PyTypeObject *typ) +{ + return (typ == &PyUnicode_Type) || + (typ == &PyFloat_Type) || + (typ == &_PyNone_Type) || + (typ == &PyBool_Type) || + (typ == &PyFrozenDict_Type); +} + /* Indicates whether the type is a known built-in type that is safe to narrow. @@ -275,12 +285,7 @@ _Py_uop_sym_is_safe_type(JitOptRef sym) if (typ == NULL) { return false; } - return (typ == &PyLong_Type) || - (typ == &PyUnicode_Type) || - (typ == &PyFloat_Type) || - (typ == &_PyNone_Type) || - (typ == &PyBool_Type) || - (typ == &PyFrozenDict_Type); + return (typ == &PyLong_Type) || is_safe_builtin_type(typ); } /* @@ -298,11 +303,7 @@ _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym) return true; } PyTypeObject *typ = Py_TYPE(const_val); - return (typ == &PyUnicode_Type) || - (typ == &PyFloat_Type) || - (typ == &_PyNone_Type) || - (typ == &PyBool_Type) || - (typ == &PyFrozenDict_Type); + return is_safe_builtin_type(typ); } void From e1c041751c80a41eeac7d5d2f31f411083c79b41 Mon Sep 17 00:00:00 2001 From: Neko Asakura Date: Fri, 1 May 2026 10:06:05 -0400 Subject: [PATCH 4/6] address feedback --- Python/optimizer_symbols.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index be7dbf28e8d943..6e26056c31046e 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -264,15 +264,18 @@ _Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym) return PyStackRef_FromPyObjectBorrow(const_val); } +/* + An atomic built-in type is not a container, so its values evaluate + without side effects and common functions on them are pure. + */ static bool -is_safe_builtin_type(PyTypeObject *typ) +is_atomic_builtin_type(PyTypeObject *typ) { return (typ == &PyUnicode_Type) || (typ == &PyFloat_Type) || (typ == &_PyNone_Type) || (typ == &PyBool_Type) || - (typ == &PyFrozenDict_Type) || - (typ == &PyFrozenSet_Type); + (typ == &PyBytes_Type); } /* @@ -286,7 +289,7 @@ _Py_uop_sym_is_safe_type(JitOptRef sym) if (typ == NULL) { return false; } - return (typ == &PyLong_Type) || is_safe_builtin_type(typ); + return is_atomic_builtin_type(typ) || typ == &PyLong_Type; } /* @@ -304,7 +307,9 @@ _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym) return true; } PyTypeObject *typ = Py_TYPE(const_val); - return is_safe_builtin_type(typ); + return is_atomic_builtin_type(typ) || + typ == &PyFrozenDict_Type || + typ == &PyFrozenSet_Type; } bool From 3752820fe8a489429668728776632fc5c119380c Mon Sep 17 00:00:00 2001 From: Neko Asakura Date: Fri, 1 May 2026 12:11:27 -0400 Subject: [PATCH 5/6] add constant folding test --- Lib/test/test_capi/test_opt.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 2b9e7483fabdbf..8c0be5f5814c78 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -26,6 +26,12 @@ # For frozenset JIT tests FROZEN_SET_CONST = frozenset({1, 2, 3}) +class _UnsafeEq: + def __hash__(self): return hash(1) + def __eq__(self, other): return False + +UNSAFE_FROZEN_SET = frozenset({_UnsafeEq()}) + class _GenericKey: pass @@ -5126,6 +5132,18 @@ def testfunc(n): self.assertGreaterEqual(count_ops(ex, "_LOAD_CONST_INLINE_BORROW"), 3) self.assertNotIn("_CONTAINS_OP_SET", uops) + def test_frozenset_unsafe_eq_not_const_folded(self): + def testfunc(n): + x = 0 + for _ in range(n): + if 1 in UNSAFE_FROZEN_SET: + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, 0) + self.assertIn("_CONTAINS_OP_SET", get_opnames(ex)) + def test_contains_op_frozendict_const_fold(self): def testfunc(n): x = 0 From 02c37ad7dd7b953da0b685b9062c04ce884b6ada Mon Sep 17 00:00:00 2001 From: Neko Asakura Date: Sat, 2 May 2026 22:50:47 -0400 Subject: [PATCH 6/6] Revert "add constant folding test" This reverts commit 3752820fe8a489429668728776632fc5c119380c. --- Lib/test/test_capi/test_opt.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 8c0be5f5814c78..2b9e7483fabdbf 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -26,12 +26,6 @@ # For frozenset JIT tests FROZEN_SET_CONST = frozenset({1, 2, 3}) -class _UnsafeEq: - def __hash__(self): return hash(1) - def __eq__(self, other): return False - -UNSAFE_FROZEN_SET = frozenset({_UnsafeEq()}) - class _GenericKey: pass @@ -5132,18 +5126,6 @@ def testfunc(n): self.assertGreaterEqual(count_ops(ex, "_LOAD_CONST_INLINE_BORROW"), 3) self.assertNotIn("_CONTAINS_OP_SET", uops) - def test_frozenset_unsafe_eq_not_const_folded(self): - def testfunc(n): - x = 0 - for _ in range(n): - if 1 in UNSAFE_FROZEN_SET: - x += 1 - return x - - res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) - self.assertEqual(res, 0) - self.assertIn("_CONTAINS_OP_SET", get_opnames(ex)) - def test_contains_op_frozendict_const_fold(self): def testfunc(n): x = 0