#0 __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0)
at ./nptl/pthread_kill.c:44
#1 0x00007ffff7d6389f in __pthread_kill_internal (signo=6, threadid=<optimized out>)
at ./nptl/pthread_kill.c:78
#2 0x00007ffff7d17a52 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
#3 0x00007ffff7d02469 in __GI_abort () at ./stdlib/abort.c:79
#4 0x00007ffff7d02395 in __assert_fail_base (fmt=0x7ffff7e8fc30 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n",
assertion=0x5555558a5a10 "p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF",
file=0x5555558a5898 "Parser/pegen_errors.c", line=335, function=<optimized out>) at ./assert/assert.c:92
#5 0x00007ffff7d10b02 in __GI___assert_fail (
assertion=assertion@entry=0x5555558a5a10 "p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF", file=file@entry=0x5555558a5898 "Parser/pegen_errors.c", line=line@entry=335,
function=function@entry=0x5555558a5c40 <__PRETTY_FUNCTION__.1> "_PyPegen_raise_error_known_location")
at ./assert/assert.c:101
#6 0x000055555564c7be in _PyPegen_raise_error_known_location (p=0x7ffff7a8c880,
errtype=0x555555a1cb20 <_PyExc_SyntaxError>, lineno=4, col_offset=8, end_lineno=4, end_col_offset=8,
errmsg=0x5555558a5946 "(%s) %U", va=0x7fffffffd058) at Parser/pegen_errors.c:335
#7 0x000055555564cc08 in _PyPegen_raise_error (p=p@entry=0x7ffff7a8c880, errtype=<optimized out>,
errmsg=errmsg@entry=0x5555558a5946 "(%s) %U") at Parser/pegen_errors.c:235
#8 0x000055555564cedb in _Pypegen_raise_decode_error (p=p@entry=0x7ffff7a8c880) at Parser/pegen_errors.c:133
#9 0x000055555564eb1f in _PyPegen_concatenate_strings (p=p@entry=0x7ffff7a8c880,
strings=strings@entry=0x555555c13290) at Parser/action_helpers.c:957
#10 0x00005555556598cf in strings_rule (p=p@entry=0x7ffff7a8c880) at Parser/parser.c:15481
#11 0x000055555566362f in atom_rule (p=p@entry=0x7ffff7a8c880) at Parser/parser.c:14292
#12 0x000055555567c149 in t_primary_raw (p=p@entry=0x7ffff7a8c880) at Parser/parser.c:18126
#13 0x000055555567c4ac in t_primary_rule (p=p@entry=0x7ffff7a8c880) at Parser/parser.c:17916
#14 0x0000555555680978 in single_subscript_attribute_target_rule (p=p@entry=0x7ffff7a8c880)
at Parser/parser.c:17805
#15 0x00005555556813e4 in _tmp_12_rule (p=p@entry=0x7ffff7a8c880) at Parser/parser.c:24220
#16 0x00005555556a5bb0 in assignment_rule (p=p@entry=0x7ffff7a8c880) at Parser/parser.c:2245
#17 0x00005555556a9f37 in simple_stmt_rule (p=p@entry=0x7ffff7a8c880) at Parser/parser.c:1652
#18 0x00005555556ab747 in simple_stmts_rule (p=p@entry=0x7ffff7a8c880) at Parser/parser.c:1547
#19 0x00005555556ac6f6 in statement_rule (p=p@entry=0x7ffff7a8c880) at Parser/parser.c:1370
#20 0x00005555556ac969 in _loop1_3_rule (p=p@entry=0x7ffff7a8c880) at Parser/parser.c:23651
#21 0x00005555556acb51 in statements_rule (p=p@entry=0x7ffff7a8c880) at Parser/parser.c:1302
#22 0x00005555556b4ea0 in file_rule (p=0x7ffff7a8c880) at Parser/parser.c:1061
#23 0x00005555556b6953 in _PyPegen_parse (p=p@entry=0x7ffff7a8c880) at Parser/parser.c:38772
#24 0x000055555564bfc6 in _PyPegen_run_parser (p=p@entry=0x7ffff7a8c880) at Parser/pegen.c:811
#25 0x000055555564c1b2 in _PyPegen_run_parser_from_file_pointer (fp=fp@entry=0x555555be4310,
start_rule=start_rule@entry=257, filename_ob=filename_ob@entry=0x7ffff7aa9150, enc=enc@entry=0x0,
ps1=ps1@entry=0x0, ps2=ps2@entry=0x0, flags=0x7fffffffd8a8, errcode=0x0, arena=0x7ffff7a939a0)
at Parser/pegen.c:884
#26 0x00005555556b8c2f in _PyParser_ASTFromFile (fp=fp@entry=0x555555be4310,
filename_ob=filename_ob@entry=0x7ffff7aa9150, enc=enc@entry=0x0, mode=mode@entry=257, ps1=ps1@entry=0x0,
ps2=ps2@entry=0x0, flags=0x7fffffffd8a8, errcode=0x0, arena=0x7ffff7a939a0) at Parser/peg_api.c:26
#27 0x0000555555813ca1 in pyrun_file (fp=fp@entry=0x555555be4310, filename=filename@entry=0x7ffff7aa9150,
start=start@entry=257, globals=globals@entry=0x7ffff7a45010, locals=locals@entry=0x7ffff7a45010,
closeit=closeit@entry=1, flags=0x7fffffffd8a8) at Python/pythonrun.c:1620
#28 0x0000555555816941 in _PyRun_SimpleFileObject (fp=fp@entry=0x555555be4310,
filename=filename@entry=0x7ffff7aa9150, closeit=closeit@entry=1, flags=flags@entry=0x7fffffffd8a8)
at Python/pythonrun.c:439
#29 0x0000555555816af5 in _PyRun_AnyFileObject (fp=fp@entry=0x555555be4310,
filename=filename@entry=0x7ffff7aa9150, closeit=closeit@entry=1, flags=flags@entry=0x7fffffffd8a8)
at Python/pythonrun.c:78
#30 0x00005555558332bf in pymain_run_file_obj (program_name=program_name@entry=0x7ffff7aa91c0,
filename=filename@entry=0x7ffff7aa9150, skip_source_first_line=0) at Modules/main.c:360
#31 0x00005555558333c1 in pymain_run_file (config=config@entry=0x555555b29490 <_PyRuntime+69264>)
at Modules/main.c:379
#32 0x0000555555833a99 in pymain_run_python (exitcode=exitcode@entry=0x7fffffffd9fc) at Modules/main.c:610
#33 0x0000555555833cea in Py_RunMain () at Modules/main.c:689
#34 0x0000555555833d3f in pymain_main (args=args@entry=0x7fffffffda40) at Modules/main.c:719
#35 0x0000555555833dc4 in Py_BytesMain (argc=<optimized out>, argv=<optimized out>) at Modules/main.c:743
#36 0x000055555564a742 in main (argc=<optimized out>, argv=<optimized out>) at ./Programs/python.c:15
Bug report
When loading a file containing invalid UTF-8, without a source encoding marker, an assert is emitted. If assert is removed, the error message is close, but not quite correct and has incorrect position information.
To reproduce, create a file with an invalid UTF-8 start byte followed by a valid continuation byte, for example, the sort of contrived and grammatically incorrect:
results in:
Backtrace
In the above assert,
p->tok->doneisE_OK.Bypassing the assert, you get an incorrect position (it should be 7, not 3):
Your environment
Linux, Python 3.11
This bug was found because the coverage-related test added in #94856 failed on some buildbots. For them to fail, they need to both (1) build with
--with-pydebugand (2) have coredumps enabled in the OS. That test should probably be fixed so it fails consistently across all buildbots, even the ones that aren't set up that way.