-
Notifications
You must be signed in to change notification settings - Fork 185
Handle cloning of objects larger than 4GB on Windows #2102
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
dc66010
92f4327
3a53906
3274cba
afa74a3
a301988
859e93e
29b9a74
8e6e720
5b44410
1eaaa7f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -37,7 +37,7 @@ static const char index_pack_usage[] = | |
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Torsten Bögershausen wrote on the Git mailing list (how to reply to this email): On Mon, May 04, 2026 at 05:08:18PM +0000, Johannes Schindelin via GitGitGadget wrote:
> From: Johannes Schindelin <johannes.schindelin@gmx.de>
>
> When unpacking objects from a packfile, the object size is decoded
> from a variable-length encoding. On platforms where unsigned long is
> 32-bit (such as Windows, even in 64-bit builds), the shift operation
> overflows when decoding sizes larger than 4GB. The result is a
> truncated size value, causing the unpacked object to be corrupted or
> rejected.
>
> Fix this by changing the size variable to size_t, which is 64-bit on
> 64-bit platforms, and ensuring the shift arithmetic occurs in 64-bit
> space.
>
> This was originally authored by LordKiRon <https://github.com/LordKiRon>,
> who preferred not to reveal their real name and therefore agreed that I
> take over authorship.
>
> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> ---
> builtin/index-pack.c | 9 +++++----
> builtin/unpack-objects.c | 5 +++--
> 2 files changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/builtin/index-pack.c b/builtin/index-pack.c
> index ca7784dc2c..cc660582e9 100644
> --- a/builtin/index-pack.c
> +++ b/builtin/index-pack.c
> @@ -37,7 +37,7 @@ static const char index_pack_usage[] =
>
> struct object_entry {
> struct pack_idx_entry idx;
> - unsigned long size;
> + size_t size;
> unsigned char hdr_size;
> signed char type;
> signed char real_type;
> @@ -469,7 +469,7 @@ static int is_delta_type(enum object_type type)
> return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA);
> }
>
> -static void *unpack_entry_data(off_t offset, unsigned long size,
> +static void *unpack_entry_data(off_t offset, size_t size,
> enum object_type type, struct object_id *oid)
> {
> static char fixed_buf[8192];
> @@ -524,7 +524,8 @@ static void *unpack_raw_entry(struct object_entry *obj,
> struct object_id *oid)
> {
> unsigned char *p;
> - unsigned long size, c;
> + size_t size;
> + unsigned long c;
Does this look a little bit strange ?
p points to an unsigned char (better would be *uint8_t)
then it is dereferenced into an "unsigned long".
Then it is masked with 0x7f
In short: should "c" be declared as uint8_t ?
> off_t base_offset;
> unsigned shift;
> void *data;
> @@ -542,7 +543,7 @@ static void *unpack_raw_entry(struct object_entry *obj,
> p = fill(1);
> c = *p;
> use(1);
> - size += (c & 0x7f) << shift;
> + size += ((size_t)c & 0x7f) << shift;
> shift += 7;
> }
> obj->size = size;
> diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
> index e01cf6e360..59a36c2481 100644
> --- a/builtin/unpack-objects.c
> +++ b/builtin/unpack-objects.c
> @@ -533,7 +533,8 @@ static void unpack_one(unsigned nr)
> {
> unsigned shift;
> unsigned char *pack;
> - unsigned long size, c;
> + size_t size;
> + unsigned long c;
> enum object_type type;
>
> obj_list[nr].offset = consumed_bytes;
> @@ -548,7 +549,7 @@ static void unpack_one(unsigned nr)
> pack = fill(1);
> c = *pack;
> use(1);
> - size += (c & 0x7f) << shift;
> + size += ((size_t)c & 0x7f) << shift;
> shift += 7;
> }
>
> --
> gitgitgadget
>
> |
||
| struct object_entry { | ||
| struct pack_idx_entry idx; | ||
| unsigned long size; | ||
| size_t size; | ||
| unsigned char hdr_size; | ||
| signed char type; | ||
| signed char real_type; | ||
|
|
@@ -469,7 +469,7 @@ static int is_delta_type(enum object_type type) | |
| return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA); | ||
| } | ||
|
|
||
| static void *unpack_entry_data(off_t offset, unsigned long size, | ||
| static void *unpack_entry_data(off_t offset, size_t size, | ||
| enum object_type type, struct object_id *oid) | ||
| { | ||
| static char fixed_buf[8192]; | ||
|
|
@@ -524,7 +524,8 @@ static void *unpack_raw_entry(struct object_entry *obj, | |
| struct object_id *oid) | ||
| { | ||
| unsigned char *p; | ||
| unsigned long size, c; | ||
| size_t size; | ||
| unsigned long c; | ||
| off_t base_offset; | ||
| unsigned shift; | ||
| void *data; | ||
|
|
@@ -542,7 +543,7 @@ static void *unpack_raw_entry(struct object_entry *obj, | |
| p = fill(1); | ||
| c = *p; | ||
| use(1); | ||
| size += (c & 0x7f) << shift; | ||
| size += ((size_t)c & 0x7f) << shift; | ||
| shift += 7; | ||
| } | ||
| obj->size = size; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -629,14 +629,21 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry, | |
| struct packed_git *p = IN_PACK(entry); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Torsten Bögershausen wrote on the Git mailing list (how to reply to this email): On Mon, May 04, 2026 at 05:08:20PM +0000, Johannes Schindelin via GitGitGadget wrote:
> From: Johannes Schindelin <johannes.schindelin@gmx.de>
>
> The odb_read_stream structure uses unsigned long for the size field,
> which is 32-bit on Windows even in 64-bit builds. When streaming
> objects larger than 4GB, the size would be truncated to zero or an
> incorrect value, resulting in empty files being written to disk.
>
> Change the size field in odb_read_stream to size_t and introduce
> unpack_object_header_sz() to return sizes via size_t pointer. Since
> object_info.sizep remains unsigned long for API compatibility, use
> temporary variables where the types differ, with comments noting the
> truncation limitation for code paths that still use unsigned long.
>
> This was originally authored by LordKiRon <https://github.com/LordKiRon>,
> who preferred not to reveal their real name and therefore agreed that I
> take over authorship.
>
> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> ---
> builtin/pack-objects.c | 23 ++++++++++++++++-------
> object-file.c | 10 +++++++++-
> odb/streaming.c | 13 ++++++++++++-
> odb/streaming.h | 2 +-
> oss-fuzz/fuzz-pack-headers.c | 2 +-
> pack-bitmap.c | 2 +-
> pack-check.c | 6 ++++--
> packfile.c | 24 +++++++++++++++---------
> packfile.h | 4 ++--
> 9 files changed, 61 insertions(+), 25 deletions(-)
>
> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> index dd2480a73d..aa4b1cb9b8 100644
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
I haven't been able to follow all changes, so this may be false alarm.
Do we need a cast_size_t_to_ulong() somewhere ?
> @@ -629,14 +629,21 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
> struct packed_git *p = IN_PACK(entry);
> struct pack_window *w_curs = NULL;
> uint32_t pos;
> - off_t offset;
> + off_t offset, cur;
> enum object_type type = oe_type(entry);
> + enum object_type in_pack_type;
> off_t datalen;
> unsigned char header[MAX_PACK_OBJECT_HEADER],
> dheader[MAX_PACK_OBJECT_HEADER];
> unsigned hdrlen;
> const unsigned hashsz = the_hash_algo->rawsz;
> - unsigned long entry_size = SIZE(entry);
> + size_t entry_size;
> +
> + cur = entry->in_pack_offset;
> + in_pack_type = unpack_object_header(p, &w_curs, &cur, &entry_size);
> + if (in_pack_type < 0)
> + die(_("write_reuse_object: unable to parse object header of %s"),
> + oid_to_hex(&entry->idx.oid));
>
> if (DELTA(entry))
> type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
> @@ -1087,7 +1094,7 @@ static void write_reused_pack_one(struct packed_git *reuse_packfile,
> {
> off_t offset, next, cur;
> enum object_type type;
> - unsigned long size;
> + size_t size;
>
> offset = pack_pos_to_offset(reuse_packfile, pos);
> next = pack_pos_to_offset(reuse_packfile, pos + 1);
> @@ -2243,7 +2250,7 @@ static void check_object(struct object_entry *entry, uint32_t object_index)
> off_t ofs;
> unsigned char *buf, c;
> enum object_type type;
> - unsigned long in_pack_size;
> + size_t in_pack_size;
>
> buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
>
> @@ -2734,16 +2741,18 @@ unsigned long oe_get_size_slow(struct packing_data *pack,
> struct pack_window *w_curs;
> unsigned char *buf;
> enum object_type type;
> - unsigned long used, avail, size;
> + unsigned long used, avail;
> + size_t size;
>
> if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
> + unsigned long sz;
> packing_data_lock(&to_pack);
> if (odb_read_object_info(the_repository->objects,
> - &e->idx.oid, &size) < 0)
> + &e->idx.oid, &sz) < 0)
> die(_("unable to get size of %s"),
> oid_to_hex(&e->idx.oid));
> packing_data_unlock(&to_pack);
> - return size;
> + return sz;
> }
>
> p = oe_in_pack(pack, e);
> diff --git a/object-file.c b/object-file.c
> index 086b2b65ff..0be2981c7a 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -2326,6 +2326,7 @@ int odb_source_loose_read_object_stream(struct odb_read_stream **out,
> struct object_info oi = OBJECT_INFO_INIT;
> struct odb_loose_read_stream *st;
> unsigned long mapsize;
> + unsigned long size_ul;
> void *mapped;
>
> mapped = odb_source_loose_map_object(source, oid, &mapsize);
> @@ -2349,11 +2350,18 @@ int odb_source_loose_read_object_stream(struct odb_read_stream **out,
> goto error;
> }
>
> - oi.sizep = &st->base.size;
> + /*
> + * object_info.sizep is unsigned long* (32-bit on Windows), but
> + * st->base.size is size_t (64-bit). Use temporary variable.
> + * Note: loose objects >4GB would still truncate here, but such
> + * large loose objects are uncommon (they'd normally be packed).
> + */
> + oi.sizep = &size_ul;
> oi.typep = &st->base.type;
>
> if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
> goto error;
> + st->base.size = size_ul;
>
> st->mapped = mapped;
> st->mapsize = mapsize;
> diff --git a/odb/streaming.c b/odb/streaming.c
> index 5927a12954..af2adf5ce7 100644
> --- a/odb/streaming.c
> +++ b/odb/streaming.c
> @@ -157,15 +157,26 @@ static int open_istream_incore(struct odb_read_stream **out,
> .base.read = read_istream_incore,
> };
> struct odb_incore_read_stream *st;
> + unsigned long size_ul;
> int ret;
>
> oi.typep = &stream.base.type;
> - oi.sizep = &stream.base.size;
> + /*
> + * object_info.sizep is unsigned long* (32-bit on Windows), but
> + * stream.base.size is size_t (64-bit). We use a temporary variable
> + * because the types are incompatible. Note: this path still truncates
> + * for >4GB objects, but large objects should use pack streaming
> + * (packfile_store_read_object_stream) which handles size_t properly.
> + * This incore fallback is only used for small objects or when pack
> + * streaming is unavailable.
> + */
> + oi.sizep = &size_ul;
> oi.contentp = (void **)&stream.buf;
> ret = odb_read_object_info_extended(odb, oid, &oi,
> OBJECT_INFO_DIE_IF_CORRUPT);
> if (ret)
> return ret;
> + stream.base.size = size_ul;
>
> CALLOC_ARRAY(st, 1);
> *st = stream;
> diff --git a/odb/streaming.h b/odb/streaming.h
> index c7861f7e13..517e2ea2d3 100644
> --- a/odb/streaming.h
> +++ b/odb/streaming.h
> @@ -21,7 +21,7 @@ struct odb_read_stream {
> odb_read_stream_close_fn close;
> odb_read_stream_read_fn read;
> enum object_type type;
> - unsigned long size; /* inflated size of full object */
> + size_t size; /* inflated size of full object */
> };
>
> /*
> diff --git a/oss-fuzz/fuzz-pack-headers.c b/oss-fuzz/fuzz-pack-headers.c
> index 150c0f5fa2..ef61ab577c 100644
> --- a/oss-fuzz/fuzz-pack-headers.c
> +++ b/oss-fuzz/fuzz-pack-headers.c
> @@ -6,7 +6,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
> int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
> {
> enum object_type type;
> - unsigned long len;
> + size_t len;
>
> unpack_object_header_buffer((const unsigned char *)data,
> (unsigned long)size, &type, &len);
> diff --git a/pack-bitmap.c b/pack-bitmap.c
> index f6ec18d83a..f9af8a96bd 100644
> --- a/pack-bitmap.c
> +++ b/pack-bitmap.c
> @@ -2270,7 +2270,7 @@ static int try_partial_reuse(struct bitmap_index *bitmap_git,
> {
> off_t delta_obj_offset;
> enum object_type type;
> - unsigned long size;
> + size_t size;
>
> if (pack_pos >= pack->p->num_objects)
> return -1; /* not actually in the pack */
> diff --git a/pack-check.c b/pack-check.c
> index 79992bb509..2792f34d25 100644
> --- a/pack-check.c
> +++ b/pack-check.c
> @@ -110,7 +110,7 @@ static int verify_packfile(struct repository *r,
> void *data;
> struct object_id oid;
> enum object_type type;
> - unsigned long size;
> + size_t size;
> off_t curpos;
> int data_valid;
>
> @@ -143,7 +143,9 @@ static int verify_packfile(struct repository *r,
> data = NULL;
> data_valid = 0;
> } else {
> - data = unpack_entry(r, p, entries[i].offset, &type, &size);
> + unsigned long sz;
> + data = unpack_entry(r, p, entries[i].offset, &type, &sz);
> + size = sz;
> data_valid = 1;
> }
>
> diff --git a/packfile.c b/packfile.c
> index b012d648ad..fdae91dd11 100644
> --- a/packfile.c
> +++ b/packfile.c
> @@ -1133,7 +1133,7 @@ out:
> }
>
> unsigned long unpack_object_header_buffer(const unsigned char *buf,
> - unsigned long len, enum object_type *type, unsigned long *sizep)
> + unsigned long len, enum object_type *type, size_t *sizep)
> {
> unsigned shift;
> size_t size, c;
> @@ -1144,7 +1144,11 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
> size = c & 15;
> shift = 4;
> while (c & 0x80) {
> - if (len <= used || (bitsizeof(long) - 7) < shift) {
> + /*
> + * Each continuation byte adds 7 bits. Ensure shift won't
> + * overflow size_t (use size_t not long for 64-bit on Windows).
> + */
> + if (len <= used || (bitsizeof(size_t) - 7) < shift) {
> error("bad object header");
> size = used = 0;
> break;
> @@ -1153,7 +1157,7 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
> size = st_add(size, st_left_shift(c & 0x7f, shift));
> shift += 7;
> }
> - *sizep = cast_size_t_to_ulong(size);
> + *sizep = size;
> return used;
> }
>
> @@ -1215,7 +1219,7 @@ unsigned long get_size_from_delta(struct packed_git *p,
> int unpack_object_header(struct packed_git *p,
> struct pack_window **w_curs,
> off_t *curpos,
> - unsigned long *sizep)
> + size_t *sizep)
> {
> unsigned char *base;
> unsigned long left;
> @@ -1367,7 +1371,7 @@ static enum object_type packed_to_object_type(struct repository *r,
>
> while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
> off_t base_offset;
> - unsigned long size;
> + size_t size;
> /* Push the object we're going to leave behind */
> if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
> poi_stack_alloc = alloc_nr(poi_stack_nr);
> @@ -1586,7 +1590,7 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off
> uint32_t *maybe_index_pos, struct object_info *oi)
> {
> struct pack_window *w_curs = NULL;
> - unsigned long size;
> + size_t size;
> off_t curpos = obj_offset;
> enum object_type type = OBJ_NONE;
> uint32_t pack_pos;
> @@ -1778,7 +1782,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
> struct pack_window *w_curs = NULL;
> off_t curpos = obj_offset;
> void *data = NULL;
> - unsigned long size;
> + size_t size;
> enum object_type type;
> struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];
> struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
> @@ -1943,8 +1947,10 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
> (uintmax_t)curpos, p->pack_name);
> data = NULL;
> } else {
> + unsigned long sz;
> data = patch_delta(base, base_size, delta_data,
> - delta_size, &size);
> + delta_size, &sz);
> + size = sz;
>
> /*
> * We could not apply the delta; warn the user, but
> @@ -2929,7 +2935,7 @@ int packfile_read_object_stream(struct odb_read_stream **out,
> struct odb_packed_read_stream *stream;
> struct pack_window *window = NULL;
> enum object_type in_pack_type;
> - unsigned long size;
> + size_t size;
>
> in_pack_type = unpack_object_header(pack, &window, &offset, &size);
> unuse_pack(&window);
> diff --git a/packfile.h b/packfile.h
> index 9b647da7dd..49d6bdecf6 100644
> --- a/packfile.h
> +++ b/packfile.h
> @@ -456,9 +456,9 @@ off_t find_pack_entry_one(const struct object_id *oid, struct packed_git *);
>
> int is_pack_valid(struct packed_git *);
> void *unpack_entry(struct repository *r, struct packed_git *, off_t, enum object_type *, unsigned long *);
> -unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
> +unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, size_t *sizep);
> unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
> -int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *);
> +int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, size_t *);
> off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs,
> off_t *curpos, enum object_type type,
> off_t delta_obj_offset);
> --
> gitgitgadget
>
> |
||
| struct pack_window *w_curs = NULL; | ||
| uint32_t pos; | ||
| off_t offset; | ||
| off_t offset, cur; | ||
| enum object_type type = oe_type(entry); | ||
| enum object_type in_pack_type; | ||
| off_t datalen; | ||
| unsigned char header[MAX_PACK_OBJECT_HEADER], | ||
| dheader[MAX_PACK_OBJECT_HEADER]; | ||
| unsigned hdrlen; | ||
| const unsigned hashsz = the_hash_algo->rawsz; | ||
| unsigned long entry_size = SIZE(entry); | ||
| size_t entry_size; | ||
|
|
||
| cur = entry->in_pack_offset; | ||
| in_pack_type = unpack_object_header(p, &w_curs, &cur, &entry_size); | ||
| if (in_pack_type < 0) | ||
| die(_("write_reuse_object: unable to parse object header of %s"), | ||
| oid_to_hex(&entry->idx.oid)); | ||
|
|
||
| if (DELTA(entry)) | ||
| type = (allow_ofs_delta && DELTA(entry)->idx.offset) ? | ||
|
|
@@ -1087,7 +1094,7 @@ static void write_reused_pack_one(struct packed_git *reuse_packfile, | |
| { | ||
| off_t offset, next, cur; | ||
| enum object_type type; | ||
| unsigned long size; | ||
| size_t size; | ||
|
|
||
| offset = pack_pos_to_offset(reuse_packfile, pos); | ||
| next = pack_pos_to_offset(reuse_packfile, pos + 1); | ||
|
|
@@ -2243,7 +2250,7 @@ static void check_object(struct object_entry *entry, uint32_t object_index) | |
| off_t ofs; | ||
| unsigned char *buf, c; | ||
| enum object_type type; | ||
| unsigned long in_pack_size; | ||
| size_t in_pack_size; | ||
|
|
||
| buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail); | ||
|
|
||
|
|
@@ -2734,16 +2741,18 @@ unsigned long oe_get_size_slow(struct packing_data *pack, | |
| struct pack_window *w_curs; | ||
| unsigned char *buf; | ||
| enum object_type type; | ||
| unsigned long used, avail, size; | ||
| unsigned long used, avail; | ||
| size_t size; | ||
|
|
||
| if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) { | ||
| unsigned long sz; | ||
| packing_data_lock(&to_pack); | ||
| if (odb_read_object_info(the_repository->objects, | ||
| &e->idx.oid, &size) < 0) | ||
| &e->idx.oid, &sz) < 0) | ||
| die(_("unable to get size of %s"), | ||
| oid_to_hex(&e->idx.oid)); | ||
| packing_data_unlock(&to_pack); | ||
| return size; | ||
| return sz; | ||
| } | ||
|
|
||
| p = oe_in_pack(pack, e); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -314,6 +314,15 @@ export DEFAULT_TEST_TARGET=prove | |
| export GIT_TEST_CLONE_2GB=true | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Derrick Stolee wrote on the Git mailing list (how to reply to this email): On 5/4/2026 1:08 PM, Johannes Schindelin via GitGitGadget wrote:
> From: Johannes Schindelin <johannes.schindelin@gmx.de>
>
> Derrick Stolee suggested [1] that expensive tests should be run at a
> regular cadence rather than on every PR iteration. Gate GIT_TEST_LONG
> on push builds to the integration branches (next, master, main, maint)
> so that the EXPENSIVE prereq is satisfied there but not during PR
> validation, where the extra minutes of wall-clock time do not justify
> themselves.
I like that this will be run as part of regular updates to the
important branches. The important bit after that is whether or
not a human pays attention to the signal of these builds.
Junio: Do you pay attention to CI breaks when you push to
'master'?
One way to help this procedure could be to have GitHub CI
failures trigger new issues, which could then be more easily
viewed and noticed by the community watching the repo. This
is of course out-of-scope for this patch series, but could be
considered in the future.
Thanks,
-Stolee
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Junio C Hamano wrote on the Git mailing list (how to reply to this email): Derrick Stolee <stolee@gmail.com> writes:
> On 5/4/2026 1:08 PM, Johannes Schindelin via GitGitGadget wrote:
>> From: Johannes Schindelin <johannes.schindelin@gmx.de>
>>
>> Derrick Stolee suggested [1] that expensive tests should be run at a
>> regular cadence rather than on every PR iteration. Gate GIT_TEST_LONG
>> on push builds to the integration branches (next, master, main, maint)
>> so that the EXPENSIVE prereq is satisfied there but not during PR
>> validation, where the extra minutes of wall-clock time do not justify
>> themselves.
> I like that this will be run as part of regular updates to the
> important branches. The important bit after that is whether or
> not a human pays attention to the signal of these builds.
>
> Junio: Do you pay attention to CI breaks when you push to
> 'master'?
Well, it is way too late to notice breakage when the faulty update
hits 'master'. CI failures should be noticed before breakage hits
'next'.
I often notice and complain when I see failures on 'seen', and
sometimes I help original submitter by bisecting, but I do not
necessarily have enough time and bandwidth to help everybody.
Quite honestly, the best place to give widest test coverage is much
closer to the source of the problems than in my tree and mixed with
other topics, i.e., at individual contributor's CI. That way, I
presume that GitGitGadget can also help submitters avoid sending a
faulty series, reducing the load on the list and the maintainer.
Ideally the CI tests by the integrator should only be catching any
mismerges and unexpected inter-topic interactions, as they cannot be
caught by contributor's standalone tests, so I do not mind widening
coverage of CI tests when I push the integration results out. But
so far, the majority of what I have seen and reported back to the
list have been something that the authors should be equipped to spot
in their topic without getting mixed with other topics into any
integration branches.
> One way to help this procedure could be to have GitHub CI
> failures trigger new issues, which could then be more easily
> viewed and noticed by the community watching the repo. This
> is of course out-of-scope for this patch series, but could be
> considered in the future.
I think a better way to help would be to arrange the workflow so
that we do not even have to trigger an issue, and stop before the
patches leave the original authors' hand. They can of course ask
for help saying "here is my topic in my fork of the repository and
failing in this way for macOS that I do not have access to. Could
anybody help me figuring out what macOS peculiarity my changes are
tickling?", or something like that.
It would be best to find problems early, and make it easier for
individual contributors to help each other by having a concrete CI
failure reports in their forks that they can point at when they ask
for help. And CI run when I push 'seen' or 'master' out would not
help as much as CI run when they publish their forked branches would.
By the way, please expect slow responses as I am (officially) still
mostly offline for the rest of the week.
Thanks.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Junio C Hamano wrote on the Git mailing list (how to reply to this email): (in GMail web interface, excuse typos)
https://github.com/git/git/actions/runs/25366120610/job/74377320625
We seem to be hitting the same _Generic error in various (but not all) jobs
/usr/include/x86_64-linux-gnu/sys/cdefs.h:838:3: note: expanded from
macro '__glibc_const_generic'
838 | _Generic (0 ? (PTR) : (void *) 1, \
| ^
Error: list-objects-filter-options.c:222:10: '_Generic' is a C11
extension [-Werror,-Wc11-extensions]
I thought we updated the codebase to avoid stripping away constness
with strchr() and friends, but the error seems to be more like one
hand in the system passing -Wc11-extensions to stick to older version
of C and the other hand in the system that uses _Generic to implement
the const/non-const variants of strchr() in the system header not
knowing that the other tells C11 const-preserving strchr() should not
be used?
2026年5月5日(火) 21:56 Junio C Hamano <gitster@pobox.com>:
>
> Derrick Stolee <stolee@gmail.com> writes:
>
> > On 5/4/2026 1:08 PM, Johannes Schindelin via GitGitGadget wrote:
> >> From: Johannes Schindelin <johannes.schindelin@gmx.de>
> >>
> >> Derrick Stolee suggested [1] that expensive tests should be run at a
> >> regular cadence rather than on every PR iteration. Gate GIT_TEST_LONG
> >> on push builds to the integration branches (next, master, main, maint)
> >> so that the EXPENSIVE prereq is satisfied there but not during PR
> >> validation, where the extra minutes of wall-clock time do not justify
> >> themselves.
> > I like that this will be run as part of regular updates to the
> > important branches. The important bit after that is whether or
> > not a human pays attention to the signal of these builds.
> >
> > Junio: Do you pay attention to CI breaks when you push to
> > 'master'?
>
> Well, it is way too late to notice breakage when the faulty update
> hits 'master'. CI failures should be noticed before breakage hits
> 'next'.
>
> I often notice and complain when I see failures on 'seen', and
> sometimes I help original submitter by bisecting, but I do not
> necessarily have enough time and bandwidth to help everybody.
>
> Quite honestly, the best place to give widest test coverage is much
> closer to the source of the problems than in my tree and mixed with
> other topics, i.e., at individual contributor's CI. That way, I
> presume that GitGitGadget can also help submitters avoid sending a
> faulty series, reducing the load on the list and the maintainer.
>
> Ideally the CI tests by the integrator should only be catching any
> mismerges and unexpected inter-topic interactions, as they cannot be
> caught by contributor's standalone tests, so I do not mind widening
> coverage of CI tests when I push the integration results out. But
> so far, the majority of what I have seen and reported back to the
> list have been something that the authors should be equipped to spot
> in their topic without getting mixed with other topics into any
> integration branches.
>
> > One way to help this procedure could be to have GitHub CI
> > failures trigger new issues, which could then be more easily
> > viewed and noticed by the community watching the repo. This
> > is of course out-of-scope for this patch series, but could be
> > considered in the future.
>
> I think a better way to help would be to arrange the workflow so
> that we do not even have to trigger an issue, and stop before the
> patches leave the original authors' hand. They can of course ask
> for help saying "here is my topic in my fork of the repository and
> failing in this way for macOS that I do not have access to. Could
> anybody help me figuring out what macOS peculiarity my changes are
> tickling?", or something like that.
>
> It would be best to find problems early, and make it easier for
> individual contributors to help each other by having a concrete CI
> failure reports in their forks that they can point at when they ask
> for help. And CI run when I push 'seen' or 'master' out would not
> help as much as CI run when they publish their forked branches would.
>
> By the way, please expect slow responses as I am (officially) still
> mostly offline for the rest of the week.
>
> Thanks.
> |
||
| export SKIP_DASHED_BUILT_INS=YesPlease | ||
|
|
||
| # Enable expensive tests on push builds to integration branches, but | ||
| # not on PR builds where the extra time is not justified for every | ||
| # iteration. | ||
| case "$GITHUB_EVENT_NAME,$CI_BRANCH" in | ||
| push,*next*|push,*master*|push,*main*|push,*maint*) | ||
| export GIT_TEST_LONG=YesPlease | ||
| ;; | ||
| esac | ||
|
|
||
| case "$distro" in | ||
| ubuntu-*) | ||
| # Python 2 is end of life, and Ubuntu 23.04 and newer don't actually | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -86,8 +86,11 @@ void *patch_delta(const void *src_buf, unsigned long src_size, | |
| * This must be called twice on the delta data buffer, first to get the | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Derrick Stolee wrote on the Git mailing list (how to reply to this email): On 4/28/26 12:26 PM, Johannes Schindelin via GitGitGadget wrote:
> From: Johannes Schindelin <johannes.schindelin@gmx.de>
> > The delta header decoding functions return unsigned long, which
> truncates on Windows for objects larger than 4GB. Introduce size_t
> variants get_delta_hdr_size_sz() and get_size_from_delta_sz() that
> preserve the full 64-bit size, and use them in packed_object_info()
> where the size is needed for streaming decisions.
> + * Size_t variant that doesn't truncate - use for >4GB objects on Windows.
> + */
> +static inline size_t get_delta_hdr_size_sz(const unsigned char **datap,
> + const unsigned char *top)
...
> +static inline unsigned long get_delta_hdr_size(const unsigned char **datap,
> + const unsigned char *top)
> +{
> + size_t size = get_delta_hdr_size_sz(datap, top);
> return cast_size_t_to_ulong(size);
> }
I like this trick to use the 64-bit implementation and only to
down-cast for API compatibility. This allows a more gradual
transition than if we replaced ulongs with size_ts everywhere
at once.
Thanks,
-StoleeThere was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Johannes Schindelin wrote on the Git mailing list (how to reply to this email): Hi Stolee,
On Sun, 3 May 2026, Derrick Stolee wrote:
> On 4/28/26 12:26 PM, Johannes Schindelin via GitGitGadget wrote:
> > From: Johannes Schindelin <johannes.schindelin@gmx.de>
> >
> > The delta header decoding functions return unsigned long, which
> > truncates on Windows for objects larger than 4GB. Introduce size_t
> > variants get_delta_hdr_size_sz() and get_size_from_delta_sz() that
> > preserve the full 64-bit size, and use them in packed_object_info()
> > where the size is needed for streaming decisions.
>
> > + * Size_t variant that doesn't truncate - use for >4GB objects on Windows.
> > + */
> > +static inline size_t get_delta_hdr_size_sz(const unsigned char **datap,
> > + const unsigned char *top)
> ...
> > +static inline unsigned long get_delta_hdr_size(const unsigned char **datap,
> > + const unsigned char *top)
> > +{
> > + size_t size = get_delta_hdr_size_sz(datap, top);
> > return cast_size_t_to_ulong(size);
> > }
>
> I like this trick to use the 64-bit implementation and only to
> down-cast for API compatibility. This allows a more gradual
> transition than if we replaced ulongs with size_ts everywhere
> at once.
Thank you! That was indeed the exact thing I wanted to achieve: To allow
for incremental, easy-to-review patch series.
Ciao,
Johannes |
||
| * expected source buffer size, and again to get the target buffer size. | ||
| */ | ||
| static inline unsigned long get_delta_hdr_size(const unsigned char **datap, | ||
| const unsigned char *top) | ||
| /* | ||
| * Size_t variant that doesn't truncate - use for >4GB objects on Windows. | ||
| */ | ||
| static inline size_t get_delta_hdr_size_sz(const unsigned char **datap, | ||
| const unsigned char *top) | ||
| { | ||
| const unsigned char *data = *datap; | ||
| size_t cmd, size = 0; | ||
|
|
@@ -98,6 +101,13 @@ static inline unsigned long get_delta_hdr_size(const unsigned char **datap, | |
| i += 7; | ||
| } while (cmd & 0x80 && data < top); | ||
| *datap = data; | ||
| return size; | ||
| } | ||
|
|
||
| static inline unsigned long get_delta_hdr_size(const unsigned char **datap, | ||
| const unsigned char *top) | ||
| { | ||
| size_t size = get_delta_hdr_size_sz(datap, top); | ||
| return cast_size_t_to_ulong(size); | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Torsten Bögershausen wrote on the Git mailing list (how to reply to this email):
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Johannes Schindelin wrote on the Git mailing list (how to reply to this email):