1# Generate a reproducible archive from the content of a directory
2#
3# $1    : input directory
4# $2    : leading component in archive
5# $3    : ISO8601 date: YYYY-MM-DDThh:mm:ssZZ
6# $4    : output file
7# $5... : globs of filenames to exclude from the archive, suitable for
8#         find's -path option, and relative to the input directory $1
9#
10# Notes :
11#   - the timestamp is internally rounded to the highest entire second
12#     less than or equal to the timestamp (i.e. any sub-second fractional
13#     part is ignored)
14#   - must not be called with CWD as, or below, the input directory
15#   - some temporary files are created in CWD, and removed at the end
16#
17# Example:
18#   $ find /path/to/temp/dir
19#   /path/to/temp/dir/
20#   /path/to/temp/dir/some-file
21#   /path/to/temp/dir/some-dir/
22#   /path/to/temp/dir/some-dir/some-other-file
23#
24#   $ mk_tar_gz /path/to/some/dir \
25#               foo_bar-1.2.3 \
26#               1970-01-01T00:00:00Z \
27#               /path/to/foo.tar.gz \
28#               '.git/*' '.svn/*'
29#
30#   $ tar tzf /path/to/foo.tar.gz
31#   foo_bar-1.2.3/some-file
32#   foo_bar-1.2.3/some-dir/some-other-file
33#
34mk_tar_gz() {
35    local in_dir="${1}"
36    local base_dir="${2}"
37    local date="${3}"
38    local out="${4}"
39    shift 4
40    local glob tmp pax_options
41    local -a find_opts
42
43    for glob; do
44        find_opts+=( -or -path "./${glob#./}" )
45    done
46
47    # Drop sub-second precision to play nice with GNU tar's valid_timespec check
48    date="$(date -d "${date}" -u +%Y-%m-%dT%H:%M:%S+00:00)"
49
50    pax_options="delete=atime,delete=ctime,delete=mtime"
51    pax_options+=",exthdr.name=%d/PaxHeaders/%f,exthdr.mtime={${date}}"
52
53    tmp="$(mktemp --tmpdir="$(pwd)")"
54    pushd "${in_dir}" >/dev/null
55
56    # Establish list
57    find . -not -type d -and -not \( -false "${find_opts[@]}" \) >"${tmp}.list"
58    # Sort list for reproducibility
59    LC_ALL=C sort <"${tmp}.list" >"${tmp}.sorted"
60
61    # Create POSIX tarballs, since that's the format the most reproducible
62    ${TAR} cf - --transform="s#^\./#${base_dir}/#S" \
63             --numeric-owner --owner=0 --group=0 --mtime="${date}" \
64             --format=posix --pax-option="${pax_options}" --mode='go=u,go-w' \
65             -T "${tmp}.sorted" >"${tmp}.tar"
66
67    # Compress the archive
68    gzip -6 -n <"${tmp}.tar" >"${out}"
69
70    rm -f "${tmp}"{.list,.sorted,.tar}
71
72    popd >/dev/null
73}
74
75post_process_unpack() {
76    local dest="${1}"
77    local tarball="${2}"
78    local one_file
79
80    mkdir "${dest}"
81    ${TAR} -C "${dest}" --strip-components=1 -xzf "${tarball}"
82    one_file="$(find "${dest}" -type f -print0 |LC_ALL=C sort -z |sed 's/\x0.*//')"
83    touch -r "${one_file}" "${dest}.timestamp"
84}
85
86post_process_repack() {
87    local in_dir="${1}"
88    local base_dir="${2}"
89    local out="${3}"
90    local date
91
92    date="@$(stat -c '%Y' "${in_dir}/${base_dir}.timestamp")"
93
94    mk_tar_gz "${in_dir}/${base_dir}" "${base_dir}" "${date}" "${out}"
95}
96
97# Keep this line and the following as last lines in this file.
98# vim: ft=bash
99