Skip to content

Commit a87bbaa

Browse files
committed
cache-tree: fix verify_cache() to catch non-adjacent D/F conflicts
verify_cache() checks that the index does not contain both "path" and "path/file" before writing a tree. It does this by comparing only adjacent entries, relying on the assumption that "path/file" would immediately follow "path" in sorted order. Unfortunately, this assumption does not always hold. For example: docs <-- submodule entry docs-internal/README.md <-- intervening entry docs/requirements.txt <-- D/F conflict, NOT adjacent to "docs" When this happens, verify_cache() silently misses the D/F conflict and write-tree produces a corrupt tree object containing duplicate entries (one for the submodule "docs" and one for the tree "docs"). I could not find any caller in current git that both allows the index to get into this state and then tries to write it out without doing other checks beyond the verify_cache() call in cache_tree_update(), but verify_cache() is documented as a safety net for preventing corrupt trees and should actually provide that guarantee. A downstream consumer that relied solely on cache_tree_update()'s internal checking via verify_cache() to prevent duplicate tree entries was bitten by the gap. Add a test that constructs a corrupt index directly (bypassing the D/F checks in add_index_entry) and verifies that write-tree now rejects it. Signed-off-by: Elijah Newren <newren@gmail.com>
1 parent 0d81c02 commit a87bbaa

File tree

4 files changed

+141
-3
lines changed

4 files changed

+141
-3
lines changed

cache-tree.c

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,22 +192,62 @@ static int verify_cache(struct index_state *istate, int flags)
192192
for (i = 0; i + 1 < istate->cache_nr; i++) {
193193
/* path/file always comes after path because of the way
194194
* the cache is sorted. Also path can appear only once,
195-
* which means conflicting one would immediately follow.
195+
* so path/file is likely the immediately following path
196+
* but might be separated if there is e.g. a
197+
* path-internal/... file.
196198
*/
197199
const struct cache_entry *this_ce = istate->cache[i];
198200
const struct cache_entry *next_ce = istate->cache[i + 1];
199201
const char *this_name = this_ce->name;
200202
const char *next_name = next_ce->name;
201203
int this_len = ce_namelen(this_ce);
204+
const char *conflict_name = NULL;
205+
202206
if (this_len < ce_namelen(next_ce) &&
203-
next_name[this_len] == '/' &&
207+
next_name[this_len] <= '/' &&
204208
strncmp(this_name, next_name, this_len) == 0) {
209+
if (next_name[this_len] == '/') {
210+
conflict_name = next_name;
211+
} else if (next_name[this_len] < '/') {
212+
/*
213+
* The immediately next entry shares our
214+
* prefix but sorts before "path/" (e.g.,
215+
* "path-internal" between "path" and
216+
* "path/file", since '-' (0x2D) < '/'
217+
* (0x2F)). Binary search to find where
218+
* "path/" would be and check for a D/F
219+
* conflict there.
220+
*/
221+
struct cache_entry *other;
222+
struct strbuf probe = STRBUF_INIT;
223+
int pos;
224+
225+
strbuf_add(&probe, this_name, this_len);
226+
strbuf_addch(&probe, '/');
227+
pos = index_name_pos_sparse(istate,
228+
probe.buf,
229+
probe.len);
230+
strbuf_release(&probe);
231+
232+
if (pos < 0)
233+
pos = -pos - 1;
234+
if (pos >= (int)istate->cache_nr)
235+
continue;
236+
other = istate->cache[pos];
237+
if (ce_namelen(other) > this_len &&
238+
other->name[this_len] == '/' &&
239+
!strncmp(this_name, other->name, this_len))
240+
conflict_name = other->name;
241+
}
242+
}
243+
244+
if (conflict_name) {
205245
if (10 < ++funny) {
206246
fprintf(stderr, "...\n");
207247
break;
208248
}
209249
fprintf(stderr, "You have both %s and %s\n",
210-
this_name, next_name);
250+
this_name, conflict_name);
211251
}
212252
}
213253
if (funny)

t/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ integration_tests = [
124124
't0090-cache-tree.sh',
125125
't0091-bugreport.sh',
126126
't0092-diagnose.sh',
127+
't0093-verify-cache-df-gap.sh',
127128
't0095-bloom.sh',
128129
't0100-previous.sh',
129130
't0101-at-syntax.sh',

t/t0093-direct-index-write.pl

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/usr/bin/perl
2+
#
3+
# Build a v2 index file from entries listed on stdin.
4+
# Each line: "octalmode hex-oid name"
5+
# Output: binary index written to stdout.
6+
#
7+
# This bypasses all D/F safety checks in add_index_entry(), simulating
8+
# what happens when code uses ADD_CACHE_JUST_APPEND to bulk-load entries.
9+
use strict;
10+
use warnings;
11+
use Digest::SHA qw(sha1 sha256);
12+
13+
my $hash_algo = $ENV{'GIT_DEFAULT_HASH'} || 'sha1';
14+
my $hash_func = $hash_algo eq 'sha256' ? \&sha256 : \&sha1;
15+
16+
my @entries;
17+
while (my $line = <STDIN>) {
18+
chomp $line;
19+
my ($mode, $oid_hex, $name) = split(/ /, $line, 3);
20+
push @entries, [$mode, $oid_hex, $name];
21+
}
22+
23+
my $body = "DIRC" . pack("NN", 2, scalar @entries);
24+
25+
for my $ent (@entries) {
26+
my ($mode, $oid_hex, $name) = @{$ent};
27+
# 10 x 32-bit stat fields (zeroed), with mode in position 7
28+
my $stat = pack("N10", 0, 0, 0, 0, 0, 0, oct($mode), 0, 0, 0);
29+
my $oid = pack("H*", $oid_hex);
30+
my $flags = pack("n", length($name) & 0xFFF);
31+
my $entry = $stat . $oid . $flags . $name . "\0";
32+
# Pad to 8-byte boundary
33+
while (length($entry) % 8) { $entry .= "\0"; }
34+
$body .= $entry;
35+
}
36+
37+
binmode STDOUT;
38+
print $body . $hash_func->($body);

t/t0093-verify-cache-df-gap.sh

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/bin/sh
2+
3+
test_description='verify_cache() must catch non-adjacent D/F conflicts
4+
5+
Ensure that verify_cache() can complain about bad entries like:
6+
7+
docs <-- submodule
8+
docs-internal/... <-- sorts here because "-" < "/"
9+
docs/... <-- D/F conflict with "docs" above, not adjacent
10+
11+
In order to test verify_cache, we directly construct a corrupt index
12+
(bypassing the D/F safety checks in add_index_entry) and verify that
13+
write-tree rejects it.
14+
'
15+
16+
. ./test-lib.sh
17+
18+
if ! test_have_prereq PERL
19+
then
20+
skip_all='skipping verify_cache D/F tests; Perl not available'
21+
test_done
22+
fi
23+
24+
# Build a v2 index from entries on stdin, bypassing D/F checks.
25+
# Each line: "octalmode hex-oid name" (entries must be pre-sorted).
26+
build_corrupt_index () {
27+
perl "$TEST_DIRECTORY/t0093-direct-index-write.pl" >"$1"
28+
}
29+
30+
test_expect_success 'setup objects' '
31+
test_commit base &&
32+
BLOB=$(git rev-parse HEAD:base.t) &&
33+
SUB_COMMIT=$(git rev-parse HEAD)
34+
'
35+
36+
test_expect_success 'adjacent D/F conflict is caught by verify_cache' '
37+
cat >index-entries <<-EOF &&
38+
0160000 $SUB_COMMIT docs
39+
0100644 $BLOB docs/requirements.txt
40+
EOF
41+
build_corrupt_index .git/index <index-entries &&
42+
43+
test_must_fail git write-tree 2>err &&
44+
test_grep "You have both docs and docs/requirements.txt" err
45+
'
46+
47+
test_expect_success 'non-adjacent D/F conflict is caught by verify_cache' '
48+
cat >index-entries <<-EOF &&
49+
0160000 $SUB_COMMIT docs
50+
0100644 $BLOB docs-internal/README.md
51+
0100644 $BLOB docs/requirements.txt
52+
EOF
53+
build_corrupt_index .git/index <index-entries &&
54+
55+
test_must_fail git write-tree 2>err &&
56+
test_grep "You have both docs and docs/requirements.txt" err
57+
'
58+
59+
test_done

0 commit comments

Comments
 (0)