diff --git a/Modules/_remote_debugging/binary_io.h b/Modules/_remote_debugging/binary_io.h index 18f989f672e103..87a54371c774f1 100644 --- a/Modules/_remote_debugging/binary_io.h +++ b/Modules/_remote_debugging/binary_io.h @@ -61,11 +61,36 @@ extern "C" { #define HDR_SIZE_COMPRESSION 4 #define FILE_HEADER_SIZE (HDR_OFF_COMPRESSION + HDR_SIZE_COMPRESSION) #define FILE_HEADER_PLACEHOLDER_SIZE 64 -#define SAMPLE_HEADER_FIXED_SIZE (sizeof(uint64_t) + sizeof(uint32_t) + 1) static_assert(FILE_HEADER_SIZE <= FILE_HEADER_PLACEHOLDER_SIZE, "FILE_HEADER_SIZE exceeds FILE_HEADER_PLACEHOLDER_SIZE"); +/* Sample header field offsets and sizes */ +#define SMP_OFF_THREAD_ID 0 +#define SMP_SIZE_THREAD_ID sizeof(uint64_t) +#define SMP_OFF_INTERPRETER_ID (SMP_OFF_THREAD_ID + SMP_SIZE_THREAD_ID) +#define SMP_SIZE_INTERPRETER_ID sizeof(uint32_t) +#define SMP_OFF_ENCODING (SMP_OFF_INTERPRETER_ID + SMP_SIZE_INTERPRETER_ID) +#define SMP_SIZE_ENCODING sizeof(uint8_t) +#define SAMPLE_HEADER_FIXED_SIZE (SMP_OFF_ENCODING + SMP_SIZE_ENCODING) + +static_assert(SAMPLE_HEADER_FIXED_SIZE == 13, + "SAMPLE_HEADER_FIXED_SIZE must remain 13"); + +/* Footer field offsets and sizes */ +#define FTR_OFF_STRINGS 0 +#define FTR_SIZE_STRINGS sizeof(uint32_t) +#define FTR_OFF_FRAMES (FTR_OFF_STRINGS + FTR_SIZE_STRINGS) +#define FTR_SIZE_FRAMES sizeof(uint32_t) +#define FTR_OFF_FILE_SIZE (FTR_OFF_FRAMES + FTR_SIZE_FRAMES) +#define FTR_SIZE_FILE_SIZE sizeof(uint64_t) +#define FTR_OFF_CHECKSUM (FTR_OFF_FILE_SIZE + FTR_SIZE_FILE_SIZE) +#define FTR_SIZE_CHECKSUM (2 * sizeof(uint64_t)) +#define FILE_FOOTER_SIZE (FTR_OFF_CHECKSUM + FTR_SIZE_CHECKSUM) + +static_assert(FILE_FOOTER_SIZE == 32, + "FILE_FOOTER_SIZE must remain 32"); + /* Buffer sizes: 512KB balances syscall amortization against memory use, * and aligns well with filesystem block sizes and zstd dictionary windows */ #define WRITE_BUFFER_SIZE (512 * 1024) diff --git a/Modules/_remote_debugging/binary_io_reader.c b/Modules/_remote_debugging/binary_io_reader.c index 6c32ef70ac3f65..3ec4e0c77964c8 100644 --- a/Modules/_remote_debugging/binary_io_reader.c +++ b/Modules/_remote_debugging/binary_io_reader.c @@ -23,15 +23,11 @@ * ============================================================================ */ /* File structure sizes */ -#define FILE_FOOTER_SIZE 32 #define MIN_DECOMPRESS_BUFFER_SIZE (64 * 1024) /* Minimum decompression buffer */ /* Progress callback frequency */ #define PROGRESS_CALLBACK_INTERVAL 1000 -/* Maximum decompression size limit (1GB) */ -#define MAX_DECOMPRESS_SIZE (1ULL << 30) - /* ============================================================================ * BINARY READER IMPLEMENTATION * ============================================================================ */ @@ -47,8 +43,8 @@ reader_parse_header(BinaryReader *reader, const uint8_t *data, size_t file_size) /* Use memcpy to avoid strict aliasing violations and unaligned access */ uint32_t magic; uint32_t version; - memcpy(&magic, &data[0], sizeof(magic)); - memcpy(&version, &data[4], sizeof(version)); + memcpy(&magic, &data[HDR_OFF_MAGIC], HDR_SIZE_MAGIC); + memcpy(&version, &data[HDR_OFF_VERSION], HDR_SIZE_VERSION); /* Detect endianness from magic number */ if (magic == BINARY_FORMAT_MAGIC) { @@ -119,8 +115,8 @@ reader_parse_footer(BinaryReader *reader, const uint8_t *data, size_t file_size) const uint8_t *footer = data + file_size - FILE_FOOTER_SIZE; /* Use memcpy to avoid strict aliasing violations */ uint32_t strings_count, frames_count; - memcpy(&strings_count, &footer[0], sizeof(strings_count)); - memcpy(&frames_count, &footer[4], sizeof(frames_count)); + memcpy(&strings_count, &footer[FTR_OFF_STRINGS], FTR_SIZE_STRINGS); + memcpy(&frames_count, &footer[FTR_OFF_FRAMES], FTR_SIZE_FRAMES); reader->strings_count = SWAP32_IF(reader->needs_swap, strings_count); reader->frames_count = SWAP32_IF(reader->needs_swap, frames_count); @@ -984,11 +980,11 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre /* Use memcpy to avoid strict aliasing violations, then byte-swap if needed */ uint64_t thread_id_raw; uint32_t interpreter_id_raw; - memcpy(&thread_id_raw, &reader->sample_data[offset], sizeof(thread_id_raw)); - offset += 8; + memcpy(&thread_id_raw, &reader->sample_data[offset], SMP_SIZE_THREAD_ID); + offset += SMP_SIZE_THREAD_ID; - memcpy(&interpreter_id_raw, &reader->sample_data[offset], sizeof(interpreter_id_raw)); - offset += 4; + memcpy(&interpreter_id_raw, &reader->sample_data[offset], SMP_SIZE_INTERPRETER_ID); + offset += SMP_SIZE_INTERPRETER_ID; uint64_t thread_id = SWAP64_IF(reader->needs_swap, thread_id_raw); uint32_t interpreter_id = SWAP32_IF(reader->needs_swap, interpreter_id_raw); diff --git a/Modules/_remote_debugging/binary_io_writer.c b/Modules/_remote_debugging/binary_io_writer.c index 0ac6c88d0373a7..4e29c3142e2d4c 100644 --- a/Modules/_remote_debugging/binary_io_writer.c +++ b/Modules/_remote_debugging/binary_io_writer.c @@ -29,9 +29,6 @@ /* Frame buffer: depth varint (max 2 bytes for 256) + 256 frames * 5 bytes/varint + margin */ #define MAX_FRAME_BUFFER_SIZE ((MAX_STACK_DEPTH * MAX_VARINT_SIZE_U32) + MAX_VARINT_SIZE_U32 + 16) -/* File structure sizes */ -#define FILE_FOOTER_SIZE 32 - /* Helper macro: convert PyLong to int32, using default_val if conversion fails */ #define PYLONG_TO_INT32_OR_DEFAULT(obj, var, default_val) \ do { \ @@ -588,9 +585,9 @@ static inline int write_sample_header(BinaryWriter *writer, ThreadEntry *entry, uint8_t encoding) { uint8_t header[SAMPLE_HEADER_FIXED_SIZE]; - memcpy(header, &entry->thread_id, 8); - memcpy(header + 8, &entry->interpreter_id, 4); - header[12] = encoding; + memcpy(header + SMP_OFF_THREAD_ID, &entry->thread_id, SMP_SIZE_THREAD_ID); + memcpy(header + SMP_OFF_INTERPRETER_ID, &entry->interpreter_id, SMP_SIZE_INTERPRETER_ID); + header[SMP_OFF_ENCODING] = encoding; return writer_write_bytes(writer, header, SAMPLE_HEADER_FIXED_SIZE); } @@ -649,9 +646,9 @@ write_sample_with_encoding(BinaryWriter *writer, ThreadEntry *entry, { /* Header: thread_id(8) + interpreter_id(4) + encoding(1) + delta(varint) + status(1) */ uint8_t header_buf[SAMPLE_HEADER_MAX_SIZE]; - memcpy(header_buf, &entry->thread_id, 8); - memcpy(header_buf + 8, &entry->interpreter_id, 4); - header_buf[12] = (uint8_t)encoding_type; + memcpy(header_buf + SMP_OFF_THREAD_ID, &entry->thread_id, SMP_SIZE_THREAD_ID); + memcpy(header_buf + SMP_OFF_INTERPRETER_ID, &entry->interpreter_id, SMP_SIZE_INTERPRETER_ID); + header_buf[SMP_OFF_ENCODING] = (uint8_t)encoding_type; size_t varint_len = encode_varint_u64( header_buf + SAMPLE_HEADER_FIXED_SIZE, timestamp_delta); @@ -1145,17 +1142,17 @@ binary_writer_finalize(BinaryWriter *writer) PyErr_SetFromErrno(PyExc_IOError); return -1; } - uint64_t file_size = (uint64_t)footer_offset + 32; - uint8_t footer[32] = {0}; + uint64_t file_size = (uint64_t)footer_offset + FILE_FOOTER_SIZE; + uint8_t footer[FILE_FOOTER_SIZE] = {0}; /* Cast size_t to uint32_t before memcpy to ensure correct bytes are copied * on both little-endian and big-endian systems (size_t is 8 bytes on 64-bit) */ uint32_t string_count_u32 = (uint32_t)writer->string_count; uint32_t frame_count_u32 = (uint32_t)writer->frame_count; - memcpy(footer + 0, &string_count_u32, 4); - memcpy(footer + 4, &frame_count_u32, 4); - memcpy(footer + 8, &file_size, 8); - /* bytes 16-31: checksum placeholder (zeros) */ - if (fwrite_checked_allow_threads(footer, 32, writer->fp) < 0) { + memcpy(footer + FTR_OFF_STRINGS, &string_count_u32, FTR_SIZE_STRINGS); + memcpy(footer + FTR_OFF_FRAMES, &frame_count_u32, FTR_SIZE_FRAMES); + memcpy(footer + FTR_OFF_FILE_SIZE, &file_size, FTR_SIZE_FILE_SIZE); + /* checksum (FTR_OFF_CHECKSUM..FILE_FOOTER_SIZE-1): placeholder zeros */ + if (fwrite_checked_allow_threads(footer, FILE_FOOTER_SIZE, writer->fp) < 0) { return -1; }