diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index f356d60ae5c7a7..d33a2b2461fdf3 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -440,6 +440,7 @@ extern JitOptRef _Py_uop_sym_new_type( extern JitOptRef _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val); extern JitOptRef _Py_uop_sym_new_const_steal(JitOptContext *ctx, PyObject *const_val); +extern bool _Py_uop_sym_is_safe_type(JitOptRef sym); bool _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym); bool _Py_uop_sym_is_not_container(JitOptRef sym); _PyStackRef _Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym); diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 7118dfeed9faee..2b9e7483fabdbf 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2526,6 +2526,44 @@ def testfunc(n): self.assertNotIn("_GUARD_TOS_UNICODE", uops) self.assertIn("_BINARY_OP_ADD_UNICODE", uops) + def test_format_simple_narrows_to_str(self): + def testfunc(n): + x = [] + for _ in range(n): + v = 42 + s = f"{v}" + t = "hello" + s + x.append(t) + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, ["hello42"] * TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_FORMAT_SIMPLE", uops) + self.assertNotIn("_GUARD_TOS_UNICODE", uops) + self.assertIn("_BINARY_OP_ADD_UNICODE", uops) + + def test_format_with_spec_narrows_to_str(self): + def testfunc(n): + x = [] + for _ in range(n): + v = 3.14 + s = f"{v:.2f}" + t = "pi=" + s + x.append(t) + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, ["pi=3.14"] * TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_FORMAT_WITH_SPEC", uops) + self.assertNotIn("_GUARD_TOS_UNICODE", uops) + self.assertIn("_BINARY_OP_ADD_UNICODE", uops) + def test_binary_op_subscr_str_int(self): def testfunc(n): x = 0 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst new file mode 100644 index 00000000000000..b587598be65b7d --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst @@ -0,0 +1,2 @@ +Allow the JIT to remove unicode guards after ``_FORMAT_SIMPLE`` and +``_FORMAT_WITH_SPEC`` when the input type is a known built-in type. diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 9f6ce206ef4722..8ddffe67353da7 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -244,6 +244,7 @@ add_op(JitOptContext *ctx, _PyUOpInstruction *this_instr, /* Shortened forms for convenience, used in optimizer_bytecodes.c */ #define sym_is_not_null _Py_uop_sym_is_not_null #define sym_is_const _Py_uop_sym_is_const +#define sym_is_safe_type _Py_uop_sym_is_safe_type #define sym_is_safe_const _Py_uop_sym_is_safe_const #define sym_is_not_container _Py_uop_sym_is_not_container #define sym_get_const _Py_uop_sym_get_const diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 33b5257fd58281..7ef2665c6137ed 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -2084,6 +2084,22 @@ dummy_func(void) { set = sym_new_type(ctx, &PySet_Type); } + op(_FORMAT_SIMPLE, (value -- res)) { + if (sym_is_safe_type(value)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } else { + res = sym_new_not_null(ctx); + } + } + + op(_FORMAT_WITH_SPEC, (value, fmt_spec -- res)) { + if (sym_is_safe_type(value)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } else { + res = sym_new_not_null(ctx); + } + } + op(_SET_UPDATE, (set, unused[oparg-1], iterable -- set, unused[oparg-1], i)) { (void)set; i = iterable; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 8f208beb86476b..3c363c6d80c037 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -5140,15 +5140,27 @@ } case _FORMAT_SIMPLE: { + JitOptRef value; JitOptRef res; - res = sym_new_not_null(ctx); + value = stack_pointer[-1]; + if (sym_is_safe_type(value)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } else { + res = sym_new_not_null(ctx); + } stack_pointer[-1] = res; break; } case _FORMAT_WITH_SPEC: { + JitOptRef value; JitOptRef res; - res = sym_new_not_null(ctx); + value = stack_pointer[-2]; + if (sym_is_safe_type(value)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } else { + res = sym_new_not_null(ctx); + } CHECK_STACK_BOUNDS(-1); stack_pointer[-2] = res; stack_pointer += -1; diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 79f81482d247e3..a1d4d47860d5ad 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -261,6 +261,34 @@ _Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym) return PyStackRef_FromPyObjectBorrow(const_val); } +/* + An atomic built-in type is not a container, so its values evaluate + without side effects and common functions on them are pure. + */ +static bool +is_atomic_builtin_type(PyTypeObject *typ) +{ + return (typ == &PyUnicode_Type) || + (typ == &PyFloat_Type) || + (typ == &_PyNone_Type) || + (typ == &PyBool_Type) || + (typ == &PyBytes_Type); +} + +/* + Indicates whether the type is a known built-in type + that is safe to narrow. + */ +bool +_Py_uop_sym_is_safe_type(JitOptRef sym) +{ + PyTypeObject *typ = _Py_uop_sym_get_type(sym); + if (typ == NULL) { + return false; + } + return is_atomic_builtin_type(typ) || typ == &PyLong_Type; +} + /* Indicates whether the constant is safe to constant evaluate (without side effects). @@ -276,12 +304,9 @@ _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym) return true; } PyTypeObject *typ = Py_TYPE(const_val); - return (typ == &PyUnicode_Type) || - (typ == &PyFloat_Type) || - (typ == &_PyNone_Type) || - (typ == &PyBool_Type) || - (typ == &PyFrozenDict_Type) || - (typ == &PyFrozenSet_Type); + return is_atomic_builtin_type(typ) || + typ == &PyFrozenDict_Type || + typ == &PyFrozenSet_Type; } bool