1#!/usr/bin/env bash
2
3# NOTE: if the output of this backend has to change (e.g. we change what gets
4# included in the archive (e.g. LFS), or we change the format of the archive
5# (e.g. tar options, compression ratio or method)), we MUST update the format
6# version in the variable BR_FMT_VERSION_git, in package/pkg-download.mk.
7
8# We want to catch any unexpected failure
9set -e
10
11# Download helper for git, to be called from the download wrapper script
12#
13# Options:
14#   -q          Be quiet.
15#   -r          Clone and archive sub-modules.
16#   -o FILE     Generate archive in FILE.
17#   -u URI      Clone from repository at URI.
18#   -c CSET     Use changeset CSET.
19#   -n NAME     Use basename NAME.
20#
21# Environment:
22#   GIT      : the git command to call
23
24# shellcheck disable=SC1090 # Only provides mk_tar_gz()
25. "${0%/*}/helpers"
26
27# Save our path and options in case we need to call ourselves again
28myname="${0}"
29declare -a OPTS=("${@}")
30
31# This function is called when an error occurs. Its job is to attempt a
32# clone from scratch (only once!) in case the git tree is borked, or in
33# case an unexpected and unsupported situation arises with submodules
34# or uncommitted stuff (e.g. if the user manually mucked around in the
35# git cache).
36_on_error() {
37    local ret=${?}
38
39    printf "Detected a corrupted git cache.\n" >&2
40    if ${BR_GIT_BACKEND_FIRST_FAULT:-false}; then
41        printf "This is the second time in a row; bailing out\n" >&2
42        exit ${ret}
43    fi
44    export BR_GIT_BACKEND_FIRST_FAULT=true
45
46    printf "Removing it and starting afresh.\n" >&2
47
48    popd >/dev/null
49    rm -rf "${git_cache}"
50
51    exec "${myname}" "${OPTS[@]}" || exit ${ret}
52}
53
54quiet=
55large_file=0
56recurse=0
57while getopts "${BR_BACKEND_DL_GETOPTS}" OPT; do
58    case "${OPT}" in
59    q)  quiet=-q; exec >/dev/null;;
60    l)  large_file=1;;
61    r)  recurse=1;;
62    o)  output="${OPTARG}";;
63    u)  uri="${OPTARG}";;
64    c)  cset="${OPTARG}";;
65    d)  dl_dir="${OPTARG}";;
66    n)  basename="${OPTARG}";;
67    :)  printf "option '%s' expects a mandatory argument\n" "${OPTARG}"; exit 1;;
68    \?) printf "unknown option '%s'\n" "${OPTARG}" >&2; exit 1;;
69    esac
70done
71
72shift $((OPTIND-1)) # Get rid of our options
73
74# Create and cd into the directory that will contain the local git cache
75git_cache="${dl_dir}/git"
76mkdir -p "${git_cache}"
77pushd "${git_cache}" >/dev/null
78
79# Any error now should try to recover
80trap _on_error ERR
81set -E
82
83# Caller needs to single-quote its arguments to prevent them from
84# being expanded a second time (in case there are spaces in them)
85_git() {
86    if [ -z "${quiet}" ]; then
87        printf '%s ' GIT_DIR="${git_cache}/.git" "${GIT}" "${@}"; printf '\n'
88    fi
89    _plain_git "$@"
90}
91# Note: please keep command below aligned with what is printed above
92_plain_git() {
93    # shellcheck disable=SC2086 # We want word-splitting for GIT
94    eval GIT_DIR="${git_cache}/.git" ${GIT} "${@}"
95}
96
97# Create a warning file, that the user should not use the git cache.
98# It's ours. Our precious.
99cat <<-_EOF_ >"${dl_dir}/git.readme"
100	IMPORTANT NOTE!
101
102	The git tree located in this directory is for the exclusive use
103	by Buildroot, which uses it as a local cache to reduce bandwidth
104	usage.
105
106	Buildroot *will* trash any changes in that tree whenever it needs
107	to use it. Buildroot may even remove it in case it detects the
108	repository may have been damaged or corrupted.
109
110	Do *not* work in that directory; your changes will eventually get
111	lost. Do *not* even use it as a remote, or as the source for new
112	worktrees; your commits will eventually get lost.
113_EOF_
114
115# Initialise a repository in the git cache. If the repository already
116# existed, this is a noop, unless the repository was broken, in which
117# case this magically restores it to working conditions. In the latter
118# case, we might be missing blobs, but that's not a problem: we'll
119# fetch what we need later anyway.
120#
121# We can still go through the wrapper, because 'init' does not use the
122# path pointed to by GIT_DIR, but really uses the directory passed as
123# argument.
124_git init .
125
126# Ensure the repo has an origin (in case a previous run was killed).
127if ! _plain_git remote |grep -q -E '^origin$'; then
128    _git remote add origin "'${uri}'"
129fi
130
131_git remote set-url origin "'${uri}'"
132
133printf "Fetching all references\n"
134_git fetch origin
135_git fetch origin -t -f
136
137# Try to get the special refs exposed by some forges (pull-requests for
138# github, changes for gerrit...). There is no easy way to know whether
139# the cset the user passed us is such a special ref or a tag or a sha1
140# or whatever else. We'll eventually fail at checking out that cset,
141# below, if there is an issue anyway. Since most of the cset we're gonna
142# have to clone are not such special refs, consign the output to oblivion
143# so as not to alarm unsuspecting users, but still trace it as a warning.
144if ! _git fetch origin "'${cset}:${cset}'" >/dev/null 2>&1; then
145    printf "Could not fetch special ref '%s'; assuming it is not special.\n" "${cset}"
146fi
147
148# Check that the changeset does exist. If it does not, re-cloning from
149# scratch won't help, so we don't want to trash the repository for a
150# missing commit. We just exit without going through the ERR trap.
151if ! _git rev-parse --quiet --verify "'${cset}^{commit}'" >/dev/null 2>&1; then
152    printf "Commit '%s' does not exist in this repository.\n" "${cset}"
153    exit 1
154fi
155
156# The new cset we want to checkout might have different submodules, or
157# have sub-dirs converted to/from a submodule. So we would need to
158# deregister _current_ submodules before we checkout.
159#
160# Using "git submodule deinit --all" would remove all the files for
161# all submodules, including the corresponding .git files or directories.
162# However, it  was only introduced with git-1.8.3, which is too recent
163# for some enterprise-grade distros.
164#
165# So, we fall-back to just removing all submodules directories. We do
166# not need to be recursive, as removing a submodule will de-facto remove
167# its own submodules.
168#
169# For recent git versions, the repository for submodules is stored
170# inside the repository of the super repository, so the following will
171# only remove the working copies of submodules, effectively caching the
172# submodules.
173#
174# For older versions however, the repository is stored in the .git/ of
175# the submodule directory, so the following will effectively remove the
176# the working copy as well as the repository, which means submodules
177# will not be cached for older versions.
178#
179# shellcheck disable=SC2016 # Will be expanded by git-foreach
180cmd='printf "Deregistering submodule \"%s\"\n" "${path}" && cd .. && rm -rf "${path##*/}"'
181_git submodule --quiet foreach "'${cmd}'"
182
183# Checkout the required changeset, so that we can update the required
184# submodules.
185_git checkout -f -q "'${cset}'"
186
187# Get rid of now-untracked directories (in case a git operation was
188# interrupted in a previous run, or to get rid of empty directories
189# that were parents of submodules removed above).
190_git clean -ffdx
191
192# Get date of commit to generate a reproducible archive.
193# %ci is ISO 8601, so it's fully qualified, with TZ and all.
194date="$( _plain_git log -1 --pretty=format:%ci )"
195
196# There might be submodules, so fetch them.
197if [ ${recurse} -eq 1 ]; then
198    _git submodule update --init --recursive
199
200    # Older versions of git will store the absolute path of the git tree
201    # in the .git of submodules, while newer versions just use relative
202    # paths. Detect and fix the older variants to use relative paths, so
203    # that the archives are reproducible across a wider range of git
204    # versions. However, we can't do that if git is too old and uses
205    # full repositories for submodules.
206    # shellcheck disable=SC2016 # Will be expanded by git-foreach
207    cmd='printf "%s\n" "${path}/"'
208    for module_dir in $( _plain_git submodule --quiet foreach "'${cmd}'" ); do
209        [ -f "${module_dir}/.git" ] || continue
210        relative_dir="$( sed -r -e 's,/+,/,g; s,[^/]+/,../,g' <<<"${module_dir}" )"
211        sed -r -i -e "s:^gitdir\: $(pwd)/:gitdir\: ${relative_dir}:" "${module_dir}/.git"
212    done
213fi
214
215# If there are large files then fetch them.
216if [ ${large_file} -eq 1 ]; then
217    _git lfs install --local
218    _git lfs fetch
219    _git lfs checkout
220    # If there are also submodules, recurse into them,
221    # shellcheck disable=SC2086 # We want word-splitting for GIT
222    if [ ${recurse} -eq 1 ]; then
223        _git submodule foreach --recursive ${GIT} lfs install --local
224        _git submodule foreach --recursive ${GIT} lfs fetch
225        _git submodule foreach --recursive ${GIT} lfs checkout
226    fi
227fi
228
229# Find files that are affected by the export-subst git-attribute.
230# There might be a .gitattribute at the root of the repository, as well
231# as in any arbitrary sub-directory, whether from the master repository
232# or a submodule.
233# "git check-attr -z" outputs results using \0 as separator for everything,
234# so there is no difference between field or records (but there is a
235# trailing \0):
236#   path_1\0attr_name\0attr_state\0path_2\0attr_name\0attr_state\0....
237mapfile -d "" files < <(
238    set -o pipefail  # Constrained to this sub-shell
239    find . -print0 \
240    |_plain_git check-attr --stdin -z export-subst \
241    |(i=0
242      while read -r -d "" val; do
243        case "$((i++%3))" in
244          (0)   path="${val}";;
245          (1)   ;; # Attribute name, always "export-subst", as requested
246          (2)
247            if [ "${val}" = "set" ]; then
248                printf "%s\0" "${path}"
249            fi;;
250        esac
251      done
252     )
253)
254# Replace format hints in those files. Always use the master repository
255# as the source of the git metadata, even for files found in submodules
256# as this is the most practical: there is no way to chdir() in (g)awk,
257# and recomputing GIT_DIR for each submodule would really be tedious...
258# There might be any arbitrary number of hints on each line, so iterate
259# over those one by one.
260for f in "${files[@]}"; do
261    TZ=UTC \
262    LC_ALL=C \
263    GIT_DIR="${git_cache}/.git" \
264    awk -v GIT="${GIT}" '
265    {
266        l = $(0);
267        while( (i = match(l, /\$Format:[^\$]+\$/)) > 0 ) {
268            len = RLENGTH;
269            printf("%s", substr(l, 1, i-1) );
270            fmt = substr(l, i, RLENGTH);
271            pretty = substr(fmt, 9, length(fmt)-9);
272            cmd = GIT " -c core.abbrev=40 log -s -n1 --pretty=format:'\''" pretty "'\''";
273            while ( (cmd | getline replace) > 0) {
274                printf("%s", replace);
275            }
276            ret = close(cmd);
277            if (ret != 0) {
278                printf("%s:%d: error while executing command \"%s\"\n", FILENAME, NR, cmd) > "/dev/stderr";
279                exit 1;
280            }
281            l = substr(l, i+len);
282        }
283        printf("%s\n", l);
284    }
285    ' "${f}" >"${f}.br-temp"
286    mv -f "${f}.br-temp" "${f}"
287done
288
289popd >/dev/null
290
291# Generate the archive.
292# We do not want the .git dir; we keep other .git files, in case they are the
293# only files in their directory.
294# The .git dir would generate non reproducible tarballs as it depends on
295# the state of the remote server. It also would generate large tarballs
296# (gigabytes for some linux trees) when a full clone took place.
297mk_tar_gz "${git_cache}" "${basename}" "${date}" "${output}" ".git/*"
298