Skip to content

Commit 7aa5615

Browse files
mjcheethamdscho
authored andcommitted
maintenance: add cache-local-objects maintenance task
Introduce a new maintenance task, `cache-local-objects`, that operates on Scalar or VFS for Git repositories with a per-volume, shared object cache (specified by `gvfs.sharedCache`) to migrate packfiles and loose objects from the repository object directory to the shared cache. Older versions of `microsoft/git` incorrectly placed packfiles in the repository object directory instead of the shared cache; this task will help clean up existing clones impacted by that issue. Migration of packfiles involves the following steps for each pack: 1. Hardlink (or copy): a. the .pack file b. the .keep file c. the .rev file 2. Move (or copy + delete) the .idx file 3. Delete/unlink: a. the .pack file b. the .keep file c. the .rev file Moving the index file after the others ensures the pack is not read from the new cache directory until all associated files (rev, keep) exist in the cache directory also. Moving loose objects operates as a move, or copy + delete. Signed-off-by: Matthew John Cheetham <mjcheetham@outlook.com>
1 parent c8ed750 commit 7aa5615

File tree

3 files changed

+326
-0
lines changed

3 files changed

+326
-0
lines changed

Documentation/git-maintenance.adoc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ task:
6969
* `prefetch`: hourly.
7070
* `loose-objects`: daily.
7171
* `incremental-repack`: daily.
72+
* `cache-local-objects`: weekly.
7273
--
7374
+
7475
`git maintenance register` will also disable foreground maintenance by
@@ -174,6 +175,13 @@ worktree-prune::
174175
The `worktree-prune` task deletes stale or broken worktrees. See
175176
linkgit:git-worktree[1] for more information.
176177

178+
cache-local-objects::
179+
The `cache-local-objects` task only operates on Scalar or VFS for Git
180+
repositories (cloned with either `scalar clone` or `gvfs clone`) that
181+
have the `gvfs.sharedCache` configuration setting present. This task
182+
migrates pack files and loose objects from the repository's object
183+
directory in to the shared volume cache.
184+
177185
OPTIONS
178186
-------
179187
--auto::

builtin/gc.c

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313
#define USE_THE_REPOSITORY_VARIABLE
1414
#define DISABLE_SIGN_COMPARE_WARNINGS
1515

16+
#include "git-compat-util.h"
1617
#include "builtin.h"
1718
#include "abspath.h"
19+
#include "copy.h"
1820
#include "date.h"
1921
#include "dir.h"
2022
#include "environment.h"
@@ -1524,6 +1526,186 @@ static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts
15241526
return 0;
15251527
}
15261528

1529+
static void link_or_copy_or_die(const char *src, const char *dst)
1530+
{
1531+
if (!link(src, dst))
1532+
return;
1533+
1534+
/* Use copy operation if src and dst are on different file systems. */
1535+
if (errno != EXDEV)
1536+
warning_errno(_("failed to link '%s' to '%s'"), src, dst);
1537+
1538+
if (copy_file(dst, src, 0444))
1539+
die_errno(_("failed to copy '%s' to '%s'"), src, dst);
1540+
}
1541+
1542+
static void rename_or_copy_or_die(const char *src, const char *dst)
1543+
{
1544+
if (!rename(src, dst))
1545+
return;
1546+
1547+
/* Use copy and delete if src and dst are on different file systems. */
1548+
if (errno != EXDEV)
1549+
warning_errno(_("failed to move '%s' to '%s'"), src, dst);
1550+
1551+
if (copy_file(dst, src, 0444))
1552+
die_errno(_("failed to copy '%s' to '%s'"), src, dst);
1553+
1554+
if (unlink(src))
1555+
die_errno(_("failed to delete '%s'"), src);
1556+
}
1557+
1558+
static void migrate_pack(const char *srcdir, const char *dstdir,
1559+
const char *pack_filename)
1560+
{
1561+
size_t basenamelen, srclen, dstlen;
1562+
struct strbuf src = STRBUF_INIT, dst = STRBUF_INIT;
1563+
struct {
1564+
const char *ext;
1565+
unsigned move:1;
1566+
} files[] = {
1567+
{".pack", 0},
1568+
{".keep", 0},
1569+
{".rev", 0},
1570+
{".idx", 1}, /* The index file must be atomically moved last. */
1571+
};
1572+
1573+
trace2_region_enter("maintenance", "migrate_pack", the_repository);
1574+
1575+
basenamelen = strlen(pack_filename) - 5; /* .pack */
1576+
strbuf_addstr(&src, srcdir);
1577+
strbuf_addch(&src, '/');
1578+
strbuf_add(&src, pack_filename, basenamelen);
1579+
strbuf_addstr(&src, ".idx");
1580+
1581+
/* A pack without an index file is not yet ready to be migrated. */
1582+
if (!file_exists(src.buf))
1583+
goto cleanup;
1584+
1585+
strbuf_setlen(&src, src.len - 4 /* .idx */);
1586+
strbuf_addstr(&dst, dstdir);
1587+
strbuf_addch(&dst, '/');
1588+
strbuf_add(&dst, pack_filename, basenamelen);
1589+
1590+
srclen = src.len;
1591+
dstlen = dst.len;
1592+
1593+
/* Move or copy files from the source directory to the destination. */
1594+
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
1595+
strbuf_setlen(&src, srclen);
1596+
strbuf_addstr(&src, files[i].ext);
1597+
1598+
if (!file_exists(src.buf))
1599+
continue;
1600+
1601+
strbuf_setlen(&dst, dstlen);
1602+
strbuf_addstr(&dst, files[i].ext);
1603+
1604+
if (files[i].move)
1605+
rename_or_copy_or_die(src.buf, dst.buf);
1606+
else
1607+
link_or_copy_or_die(src.buf, dst.buf);
1608+
}
1609+
1610+
/*
1611+
* Now the pack and all associated files exist at the destination we can
1612+
* now clean up the files in the source directory.
1613+
*/
1614+
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
1615+
/* Files that were moved rather than copied have no clean up. */
1616+
if (files[i].move)
1617+
continue;
1618+
1619+
strbuf_setlen(&src, srclen);
1620+
strbuf_addstr(&src, files[i].ext);
1621+
1622+
/* Files that never existed in originally have no clean up.*/
1623+
if (!file_exists(src.buf))
1624+
continue;
1625+
1626+
if (unlink(src.buf))
1627+
warning_errno(_("failed to delete '%s'"), src.buf);
1628+
}
1629+
1630+
cleanup:
1631+
strbuf_release(&src);
1632+
strbuf_release(&dst);
1633+
1634+
trace2_region_leave("maintenance", "migrate_pack", the_repository);
1635+
}
1636+
1637+
static void move_pack_to_shared_cache(const char *full_path, size_t full_path_len,
1638+
const char *file_name, void *data)
1639+
{
1640+
char *srcdir;
1641+
const char *dstdir = (const char *)data;
1642+
1643+
/* We only care about the actual pack files here.
1644+
* The associated .idx, .keep, .rev files will be copied in tandem
1645+
* with the pack file, with the index file being moved last.
1646+
* The original locations of the non-index files will only deleted
1647+
* once all other files have been copied/moved.
1648+
*/
1649+
if (!ends_with(file_name, ".pack"))
1650+
return;
1651+
1652+
srcdir = xstrndup(full_path, full_path_len - strlen(file_name) - 1);
1653+
1654+
migrate_pack(srcdir, dstdir, file_name);
1655+
1656+
free(srcdir);
1657+
}
1658+
1659+
static int move_loose_object_to_shared_cache(const struct object_id *oid,
1660+
const char *path,
1661+
UNUSED void *data)
1662+
{
1663+
struct stat st;
1664+
struct strbuf dst = STRBUF_INIT;
1665+
char *hex = oid_to_hex(oid);
1666+
1667+
strbuf_addf(&dst, "%s/%.2s/", shared_object_dir, hex);
1668+
1669+
if (stat(dst.buf, &st)) {
1670+
if (mkdir(dst.buf, 0777))
1671+
die_errno(_("failed to create directory '%s'"), dst.buf);
1672+
} else if (!S_ISDIR(st.st_mode))
1673+
die(_("expected '%s' to be a directory"), dst.buf);
1674+
1675+
strbuf_addstr(&dst, hex+2);
1676+
rename_or_copy_or_die(path, dst.buf);
1677+
1678+
strbuf_release(&dst);
1679+
return 0;
1680+
}
1681+
1682+
static int maintenance_task_cache_local_objs(UNUSED struct maintenance_run_opts *opts,
1683+
UNUSED struct gc_config *cfg)
1684+
{
1685+
struct strbuf dstdir = STRBUF_INIT;
1686+
struct repository *r = the_repository;
1687+
1688+
/* This task is only applicable with a VFS/Scalar shared cache. */
1689+
if (!shared_object_dir)
1690+
return 0;
1691+
1692+
/* If the dest is the same as the local odb path then we do nothing. */
1693+
if (!fspathcmp(r->objects->odb->path, shared_object_dir))
1694+
goto cleanup;
1695+
1696+
strbuf_addf(&dstdir, "%s/pack", shared_object_dir);
1697+
1698+
for_each_file_in_pack_dir(r->objects->odb->path, move_pack_to_shared_cache,
1699+
dstdir.buf);
1700+
1701+
for_each_loose_object(move_loose_object_to_shared_cache, NULL,
1702+
FOR_EACH_OBJECT_LOCAL_ONLY);
1703+
1704+
cleanup:
1705+
strbuf_release(&dstdir);
1706+
return 0;
1707+
}
1708+
15271709
typedef int maintenance_task_fn(struct maintenance_run_opts *opts,
15281710
struct gc_config *cfg);
15291711

@@ -1556,6 +1738,7 @@ enum maintenance_task_label {
15561738
TASK_REFLOG_EXPIRE,
15571739
TASK_WORKTREE_PRUNE,
15581740
TASK_RERERE_GC,
1741+
TASK_CACHE_LOCAL_OBJS,
15591742

15601743
/* Leave as final value */
15611744
TASK__COUNT
@@ -1607,6 +1790,10 @@ static struct maintenance_task tasks[] = {
16071790
maintenance_task_rerere_gc,
16081791
rerere_gc_condition,
16091792
},
1793+
[TASK_CACHE_LOCAL_OBJS] = {
1794+
"cache-local-objects",
1795+
maintenance_task_cache_local_objs,
1796+
},
16101797
};
16111798

16121799
static int compare_tasks_by_selection(const void *a_, const void *b_)
@@ -1701,6 +1888,8 @@ static void initialize_maintenance_strategy(void)
17011888
tasks[TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY;
17021889
tasks[TASK_PACK_REFS].enabled = 1;
17031890
tasks[TASK_PACK_REFS].schedule = SCHEDULE_WEEKLY;
1891+
tasks[TASK_CACHE_LOCAL_OBJS].enabled = 1;
1892+
tasks[TASK_CACHE_LOCAL_OBJS].schedule = SCHEDULE_WEEKLY;
17041893
}
17051894
}
17061895

t/t7900-maintenance.sh

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,25 @@ test_systemd_analyze_verify () {
3131
fi
3232
}
3333

34+
test_import_packfile () {
35+
printf "blob\ndata <<END\n%s\nEND\n\n" 1 2 3 4 5 | \
36+
git -c fastimport.unpackLimit=0 fast-import
37+
}
38+
39+
test_get_packdir_files() {
40+
if [ "$#" -eq 0 ]; then
41+
find .git/objects/pack -type f
42+
else
43+
for arg in "$@"; do
44+
find .git/objects/pack -type f -name $arg
45+
done
46+
fi
47+
}
48+
49+
test_get_loose_object_files () {
50+
find .git/objects -type f -path '.git/objects/??/*'
51+
}
52+
3453
test_expect_success 'help text' '
3554
test_expect_code 129 git maintenance -h >actual &&
3655
test_grep "usage: git maintenance <subcommand>" actual &&
@@ -1186,4 +1205,114 @@ test_expect_success 'maintenance aborts with existing lock file' '
11861205
test_grep "Another scheduled git-maintenance(1) process seems to be running" err
11871206
'
11881207

1208+
test_expect_success 'cache-local-objects task with no shared cache no op' '
1209+
test_when_finished "rm -rf repo" &&
1210+
git init repo &&
1211+
(
1212+
cd repo &&
1213+
1214+
test_commit something &&
1215+
git config set maintenance.gc.enabled false &&
1216+
git config set maintenance.cache-local-objects.enabled true &&
1217+
git config set maintenance.cache-local-objects.auto 1 &&
1218+
1219+
test_import_packfile &&
1220+
test_get_packdir_files "*.pack" "*.idx" "*.keep" "*.rev" \
1221+
>files.txt &&
1222+
test_get_loose_object_files >>files.txt &&
1223+
1224+
git maintenance run &&
1225+
while IFS= read -r f; do
1226+
test_path_exists $f || exit 1
1227+
done <files.txt
1228+
)
1229+
'
1230+
1231+
test_expect_success 'cache-local-objects task cache path same as local odb no op' '
1232+
test_when_finished "rm -rf repo" &&
1233+
git init repo &&
1234+
(
1235+
cd repo &&
1236+
1237+
test_commit something &&
1238+
git config set gvfs.sharedcache .git/objects &&
1239+
git config set maintenance.gc.enabled false &&
1240+
git config set maintenance.cache-local-objects.enabled true &&
1241+
git config set maintenance.cache-local-objects.auto 1 &&
1242+
1243+
test_import_packfile &&
1244+
test_get_packdir_files "*.pack" "*.idx" "*.keep" "*.rev" \
1245+
>files.txt &&
1246+
test_get_loose_object_files >>files.txt &&
1247+
1248+
git maintenance run &&
1249+
while IFS= read -r f; do
1250+
test_path_exists $f || exit 1
1251+
done <files.txt
1252+
)
1253+
'
1254+
1255+
test_expect_success 'cache-local-objects task no .rev or .keep' '
1256+
test_when_finished "rm -rf repo cache" &&
1257+
mkdir -p cache/pack &&
1258+
git init repo &&
1259+
(
1260+
cd repo &&
1261+
1262+
test_commit something &&
1263+
git config set gvfs.sharedcache ../cache &&
1264+
git config set maintenance.gc.enabled false &&
1265+
git config set maintenance.cache-local-objects.enabled true &&
1266+
git config set maintenance.cache-local-objects.auto 1 &&
1267+
1268+
test_import_packfile &&
1269+
test_get_packdir_files "*.pack" "*.idx" >src.txt &&
1270+
test_get_loose_object_files >>src.txt &&
1271+
1272+
rm -f .git/objects/pack/*.rev .git/objects/pack/*.keep &&
1273+
1274+
sed "s/.git\\/objects\\//..\\/cache\\//" src.txt >dst.txt &&
1275+
1276+
git maintenance run &&
1277+
while IFS= read -r f; do
1278+
test_path_is_missing $f || exit 1
1279+
done <src.txt &&
1280+
1281+
while IFS= read -r f; do
1282+
test_path_exists $f || exit 1
1283+
done <dst.txt
1284+
)
1285+
'
1286+
1287+
test_expect_success 'cache-local-objects task success' '
1288+
test_when_finished "rm -rf repo cache" &&
1289+
mkdir -p cache/pack &&
1290+
git init repo &&
1291+
(
1292+
cd repo &&
1293+
1294+
test_commit something &&
1295+
git config set gvfs.sharedcache ../cache &&
1296+
git config set maintenance.gc.enabled false &&
1297+
git config set maintenance.cache-local-objects.enabled true &&
1298+
git config set maintenance.cache-local-objects.auto 1 &&
1299+
1300+
test_import_packfile &&
1301+
test_get_packdir_files "*.pack" "*.idx" "*.keep" "*.rev" \
1302+
>src.txt &&
1303+
test_get_loose_object_files >>src.txt &&
1304+
1305+
sed "s/.git\\/objects\\//..\\/cache\\//" src.txt >dst.txt &&
1306+
1307+
git maintenance run &&
1308+
while IFS= read -r f; do
1309+
test_path_is_missing $f || exit 1
1310+
done <src.txt &&
1311+
1312+
while IFS= read -r f; do
1313+
test_path_exists $f || exit 1
1314+
done <dst.txt
1315+
)
1316+
'
1317+
11891318
test_done

0 commit comments

Comments
 (0)