1#!/usr/bin/env bash 2 3# NOTE: if the output of this backend has to change (e.g. we change what gets 4# included in the archive (e.g. LFS), or we change the format of the archive 5# (e.g. tar options, compression ratio or method)), we MUST update the format 6# version in the variable BR_FMT_VERSION_git, in package/pkg-download.mk. 7 8# We want to catch any unexpected failure 9set -e 10 11# Download helper for git, to be called from the download wrapper script 12# 13# Options: 14# -q Be quiet. 15# -r Clone and archive sub-modules. 16# -o FILE Generate archive in FILE. 17# -u URI Clone from repository at URI. 18# -c CSET Use changeset CSET. 19# -n NAME Use basename NAME. 20# 21# Environment: 22# GIT : the git command to call 23 24# shellcheck disable=SC1090 # Only provides mk_tar_gz() 25. "${0%/*}/helpers" 26 27# Save our path and options in case we need to call ourselves again 28myname="${0}" 29declare -a OPTS=("${@}") 30 31# This function is called when an error occurs. Its job is to attempt a 32# clone from scratch (only once!) in case the git tree is borked, or in 33# case an unexpected and unsupported situation arises with submodules 34# or uncommitted stuff (e.g. if the user manually mucked around in the 35# git cache). 36_on_error() { 37 local ret=${?} 38 39 printf "Detected a corrupted git cache.\n" >&2 40 if ${BR_GIT_BACKEND_FIRST_FAULT:-false}; then 41 printf "This is the second time in a row; bailing out\n" >&2 42 exit ${ret} 43 fi 44 export BR_GIT_BACKEND_FIRST_FAULT=true 45 46 printf "Removing it and starting afresh.\n" >&2 47 48 popd >/dev/null 49 rm -rf "${git_cache}" 50 51 exec "${myname}" "${OPTS[@]}" || exit ${ret} 52} 53 54quiet= 55large_file=0 56recurse=0 57while getopts "${BR_BACKEND_DL_GETOPTS}" OPT; do 58 case "${OPT}" in 59 q) quiet=-q; exec >/dev/null;; 60 l) large_file=1;; 61 r) recurse=1;; 62 o) output="${OPTARG}";; 63 u) uri="${OPTARG}";; 64 c) cset="${OPTARG}";; 65 d) dl_dir="${OPTARG}";; 66 n) basename="${OPTARG}";; 67 :) printf "option '%s' expects a mandatory argument\n" "${OPTARG}"; exit 1;; 68 \?) printf "unknown option '%s'\n" "${OPTARG}" >&2; exit 1;; 69 esac 70done 71 72shift $((OPTIND-1)) # Get rid of our options 73 74# Create and cd into the directory that will contain the local git cache 75git_cache="${dl_dir}/git" 76mkdir -p "${git_cache}" 77pushd "${git_cache}" >/dev/null 78 79# Any error now should try to recover 80trap _on_error ERR 81set -E 82 83# Caller needs to single-quote its arguments to prevent them from 84# being expanded a second time (in case there are spaces in them) 85_git() { 86 if [ -z "${quiet}" ]; then 87 printf '%s ' GIT_DIR="${git_cache}/.git" "${GIT}" "${@}"; printf '\n' 88 fi 89 _plain_git "$@" 90} 91# Note: please keep command below aligned with what is printed above 92_plain_git() { 93 # shellcheck disable=SC2086 # We want word-splitting for GIT 94 eval GIT_DIR="${git_cache}/.git" ${GIT} "${@}" 95} 96 97# Create a warning file, that the user should not use the git cache. 98# It's ours. Our precious. 99cat <<-_EOF_ >"${dl_dir}/git.readme" 100 IMPORTANT NOTE! 101 102 The git tree located in this directory is for the exclusive use 103 by Buildroot, which uses it as a local cache to reduce bandwidth 104 usage. 105 106 Buildroot *will* trash any changes in that tree whenever it needs 107 to use it. Buildroot may even remove it in case it detects the 108 repository may have been damaged or corrupted. 109 110 Do *not* work in that directory; your changes will eventually get 111 lost. Do *not* even use it as a remote, or as the source for new 112 worktrees; your commits will eventually get lost. 113_EOF_ 114 115# Initialise a repository in the git cache. If the repository already 116# existed, this is a noop, unless the repository was broken, in which 117# case this magically restores it to working conditions. In the latter 118# case, we might be missing blobs, but that's not a problem: we'll 119# fetch what we need later anyway. 120# 121# We can still go through the wrapper, because 'init' does not use the 122# path pointed to by GIT_DIR, but really uses the directory passed as 123# argument. 124_git init . 125 126# Ensure the repo has an origin (in case a previous run was killed). 127if ! _plain_git remote |grep -q -E '^origin$'; then 128 _git remote add origin "'${uri}'" 129fi 130 131_git remote set-url origin "'${uri}'" 132 133printf "Fetching all references\n" 134_git fetch origin 135_git fetch origin -t -f 136 137# Try to get the special refs exposed by some forges (pull-requests for 138# github, changes for gerrit...). There is no easy way to know whether 139# the cset the user passed us is such a special ref or a tag or a sha1 140# or whatever else. We'll eventually fail at checking out that cset, 141# below, if there is an issue anyway. Since most of the cset we're gonna 142# have to clone are not such special refs, consign the output to oblivion 143# so as not to alarm unsuspecting users, but still trace it as a warning. 144if ! _git fetch origin "'${cset}:${cset}'" >/dev/null 2>&1; then 145 printf "Could not fetch special ref '%s'; assuming it is not special.\n" "${cset}" 146fi 147 148# Check that the changeset does exist. If it does not, re-cloning from 149# scratch won't help, so we don't want to trash the repository for a 150# missing commit. We just exit without going through the ERR trap. 151if ! _git rev-parse --quiet --verify "'${cset}^{commit}'" >/dev/null 2>&1; then 152 printf "Commit '%s' does not exist in this repository.\n" "${cset}" 153 exit 1 154fi 155 156# The new cset we want to checkout might have different submodules, or 157# have sub-dirs converted to/from a submodule. So we would need to 158# deregister _current_ submodules before we checkout. 159# 160# Using "git submodule deinit --all" would remove all the files for 161# all submodules, including the corresponding .git files or directories. 162# However, it was only introduced with git-1.8.3, which is too recent 163# for some enterprise-grade distros. 164# 165# So, we fall-back to just removing all submodules directories. We do 166# not need to be recursive, as removing a submodule will de-facto remove 167# its own submodules. 168# 169# For recent git versions, the repository for submodules is stored 170# inside the repository of the super repository, so the following will 171# only remove the working copies of submodules, effectively caching the 172# submodules. 173# 174# For older versions however, the repository is stored in the .git/ of 175# the submodule directory, so the following will effectively remove the 176# the working copy as well as the repository, which means submodules 177# will not be cached for older versions. 178# 179# shellcheck disable=SC2016 # Will be expanded by git-foreach 180cmd='printf "Deregistering submodule \"%s\"\n" "${path}" && cd .. && rm -rf "${path##*/}"' 181_git submodule --quiet foreach "'${cmd}'" 182 183# Checkout the required changeset, so that we can update the required 184# submodules. 185_git checkout -f -q "'${cset}'" 186 187# Get rid of now-untracked directories (in case a git operation was 188# interrupted in a previous run, or to get rid of empty directories 189# that were parents of submodules removed above). 190_git clean -ffdx 191 192# Get date of commit to generate a reproducible archive. 193# %ci is ISO 8601, so it's fully qualified, with TZ and all. 194date="$( _plain_git log -1 --pretty=format:%ci )" 195 196# There might be submodules, so fetch them. 197if [ ${recurse} -eq 1 ]; then 198 _git submodule update --init --recursive 199 200 # Older versions of git will store the absolute path of the git tree 201 # in the .git of submodules, while newer versions just use relative 202 # paths. Detect and fix the older variants to use relative paths, so 203 # that the archives are reproducible across a wider range of git 204 # versions. However, we can't do that if git is too old and uses 205 # full repositories for submodules. 206 # shellcheck disable=SC2016 # Will be expanded by git-foreach 207 cmd='printf "%s\n" "${path}/"' 208 for module_dir in $( _plain_git submodule --quiet foreach "'${cmd}'" ); do 209 [ -f "${module_dir}/.git" ] || continue 210 relative_dir="$( sed -r -e 's,/+,/,g; s,[^/]+/,../,g' <<<"${module_dir}" )" 211 sed -r -i -e "s:^gitdir\: $(pwd)/:gitdir\: ${relative_dir}:" "${module_dir}/.git" 212 done 213fi 214 215# If there are large files then fetch them. 216if [ ${large_file} -eq 1 ]; then 217 _git lfs install --local 218 _git lfs fetch 219 _git lfs checkout 220 # If there are also submodules, recurse into them, 221 # shellcheck disable=SC2086 # We want word-splitting for GIT 222 if [ ${recurse} -eq 1 ]; then 223 _git submodule foreach --recursive ${GIT} lfs install --local 224 _git submodule foreach --recursive ${GIT} lfs fetch 225 _git submodule foreach --recursive ${GIT} lfs checkout 226 fi 227fi 228 229# Find files that are affected by the export-subst git-attribute. 230# There might be a .gitattribute at the root of the repository, as well 231# as in any arbitrary sub-directory, whether from the master repository 232# or a submodule. 233# "git check-attr -z" outputs results using \0 as separator for everything, 234# so there is no difference between field or records (but there is a 235# trailing \0): 236# path_1\0attr_name\0attr_state\0path_2\0attr_name\0attr_state\0.... 237mapfile -d "" files < <( 238 set -o pipefail # Constrained to this sub-shell 239 find . -print0 \ 240 |_plain_git check-attr --stdin -z export-subst \ 241 |(i=0 242 while read -r -d "" val; do 243 case "$((i++%3))" in 244 (0) path="${val}";; 245 (1) ;; # Attribute name, always "export-subst", as requested 246 (2) 247 if [ "${val}" = "set" ]; then 248 printf "%s\0" "${path}" 249 fi;; 250 esac 251 done 252 ) 253) 254# Replace format hints in those files. Always use the master repository 255# as the source of the git metadata, even for files found in submodules 256# as this is the most practical: there is no way to chdir() in (g)awk, 257# and recomputing GIT_DIR for each submodule would really be tedious... 258# There might be any arbitrary number of hints on each line, so iterate 259# over those one by one. 260for f in "${files[@]}"; do 261 TZ=UTC \ 262 LC_ALL=C \ 263 GIT_DIR="${git_cache}/.git" \ 264 awk -v GIT="${GIT}" ' 265 { 266 l = $(0); 267 while( (i = match(l, /\$Format:[^\$]+\$/)) > 0 ) { 268 len = RLENGTH; 269 printf("%s", substr(l, 1, i-1) ); 270 fmt = substr(l, i, RLENGTH); 271 pretty = substr(fmt, 9, length(fmt)-9); 272 cmd = GIT " -c core.abbrev=40 log -s -n1 --pretty=format:'\''" pretty "'\''"; 273 while ( (cmd | getline replace) > 0) { 274 printf("%s", replace); 275 } 276 ret = close(cmd); 277 if (ret != 0) { 278 printf("%s:%d: error while executing command \"%s\"\n", FILENAME, NR, cmd) > "/dev/stderr"; 279 exit 1; 280 } 281 l = substr(l, i+len); 282 } 283 printf("%s\n", l); 284 } 285 ' "${f}" >"${f}.br-temp" 286 mv -f "${f}.br-temp" "${f}" 287done 288 289popd >/dev/null 290 291# Generate the archive. 292# We do not want the .git dir; we keep other .git files, in case they are the 293# only files in their directory. 294# The .git dir would generate non reproducible tarballs as it depends on 295# the state of the remote server. It also would generate large tarballs 296# (gigabytes for some linux trees) when a full clone took place. 297mk_tar_gz "${git_cache}" "${basename}" "${date}" "${output}" ".git/*" 298