From 0a72f9c8ea191e668d94dd1be7185d49daa36806 Mon Sep 17 00:00:00 2001 From: tosu Date: Sun, 5 Mar 2023 18:07:48 +0100 Subject: [PATCH] non-standard IFS prevents multi-argument variable command invocation, soooo refactor --- downloadPOSIX.sh | 208 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 148 insertions(+), 60 deletions(-) diff --git a/downloadPOSIX.sh b/downloadPOSIX.sh index 94838ad..2eaad81 100755 --- a/downloadPOSIX.sh +++ b/downloadPOSIX.sh @@ -1,81 +1,139 @@ #!/bin/sh +# Run entire script in subshell to prevent variable pollution +( + +# https://unix.stackexchange.com/a/188365 +# If and only if bash is in POSIX-mode, which can be forced by setting the +# POSIXLY_CORRECT variable (to any value), then the special built-ins, see +# https://www.gnu.org/software/bash/manual/html_node/Special-Builtins.html, +# are found BEFORE functions during lookup. `unset' is one of these special +# built-ins. However, when using `shopt -s expand_aliases', alias expansion in +# non-interactive shells is supported. This can be circumvented by escaping any +# of the letters of the command, usually the first. +# For portability and security, we want to use the `builtin' built-in command, so +# we must unset it if it is a pre-defined function and unalias it if it is an alias. +# This stub does not unset a variable called `builtin', but that is irrelevant. +# We also have to unset `unalias', since it is not a special builtin and calling it +# in POSIX-mode could call a function instead. +# We also unalias unset, since it is used at the bottom of the script. +# The +POSIXLY_CORRECT='1' +COMMANDS_='builtin unalias unset read printf command exit type .' +\unset -f -- ${COMMANDS_} +\unalias -- ${COMMANDS_} 2>/dev/null || true + +WHICH_='type -P' # helper for below + +# GNU coreutils +TR_="$(${WHICH_} tr)" +MKDIR_="$(${WHICH_} mkdir)" +WC_="$(${WHICH_} wc)" + +# GNU non-coreutils +SED_="$(${WHICH_} sed)" +GREP_="$(${WHICH_} grep)" +XARGS_="$(${WHICH_} xargs)" + +# third-party +FFMPEG_="$(${WHICH_} ffmpeg)" +JQ_="$(${WHICH_} jq)" +WGET_="$(${WHICH_} wget)" + + +command -v -- wget >/dev/null 2>&1 || { + printf '\033[31m%s\033[m\n' "Please install wget" + exit 1 +} + +command -v -- jq >/dev/null 2>&1 || { + printf '\033[31m%s\033[m\n' "Please install jq" + exit 1 +} + +command -v -- ffmpeg >/dev/null 2>&1 || { + printf '\033[31m%s\033[m\n' "Please install ffmpeg" + exit 1 +} + # .env contains a line like # BASE64_BEARER_TOKEN='' # Retrieve that base64 token from a request from the browser # network tab network tabfrom a logged in MS Stream (Classic) tab -. ./.env +. './.env' -OUTDIR="./videos" +OUTDIR='./videos' -read -r -d '' API_URL <<- 'EOM' -https://euwe-1.api.microsoftstream.com/api/videos? -$top=100 -& -$skip=0 -& -$orderby=metrics/trendingScore desc -& -$filter= - published{SPACE} - and{SPACE} - ( - ( - state eq 'Completed' and contentSource ne 'livestream' - ){SPACE} - or{SPACE} - ( - contentSource eq 'livestream'{SPACE} - and not - ( - state eq 'Completed'{SPACE} - and not - liveEvent/LiveEventOptions/OnDemandOptions/EnablePlayback - ) - ) - ) -& +read -r API_URL <<- EOM +https://euwe-1.api.microsoftstream.com/api/videos?\ +\$top=100\ +&\ +\$skip=0\ +&\ +\$orderby=metrics/trendingScore asc\ +&\ +\$filter=\ + published \ + and \ + (\ + (\ + state eq 'Completed' and contentSource ne 'livestream'\ + ) \ + or \ + (\ + contentSource eq 'livestream' \ + and not \ + (\ + state eq 'Completed' \ + and not \ + liveEvent/LiveEventOptions/OnDemandOptions/EnablePlayback\ + )\ + )\ + )\ +&\ api-version=1.4-private EOM API_URL="$( printf '%s' "${API_URL}" | - sed 's/{SPACE}/ /g' | - tr -d '\n' + ${TR_} -d '\t' )" -mkdir -p -- "${OUTDIR}" || { - printf '\033[31m%s\033[m\n' 'Aborting' - exit +${MKDIR_} -p -- "${OUTDIR}" || { + printf '\033[31m%s\033[m\n' 'Aborting' + exit 2 } -# '>' without quotes is delimiter, since it's one of the few chars disallowed in a URL. Title must not include this char +# '>' without quotes is delimiter, since it is one of the few characters disallowed in a URL. Title must not include this character # https://www.ietf.org/rfc/rfc2396.txt # 2.4.3. Excluded US-ASCII Characters m3u8_manifest_urls_and_metadata="$( - wget --no-verbose --quiet -O- --header="authorization: Bearer ${BASE64_BEARER_TOKEN}" -- "${API_URL}" | - jq -r '.value[] | .name + ">" + .media.duration[2:] + ">" + (.playbackUrls | map(select(.mimeType == "application/vnd.apple.mpegurl")) | .[].playbackUrl)' + ${WGET_} --no-verbose \ + --quiet \ + --output-document - \ + --header="authorization: Bearer ${BASE64_BEARER_TOKEN}" \ + -- "${API_URL}" | + ${JQ_} --raw-output \ + '.value[] | .name + ">" + .media.duration[2:] + ">" + (.playbackUrls | map(select(.mimeType == "application/vnd.apple.mpegurl")) | .[].playbackUrl)' )" [ -z "${m3u8_manifest_urls_and_metadata}" ] && { - printf '\033[31m%s\033[m\n' 'Error GETting manifest urls (response empty) or Bearer token invalid/expired. Exiting.' - exit + printf '\033[31m%s\033[m\n' 'Error GETting manifest urls (response empty) or Bearer token invalid/expired. Exiting.' + exit 3 } -idx=1 -total="$(printf '%s\n' "${m3u8_manifest_urls_and_metadata}" | wc -l)" +idx='1' +total="$(printf '%s\n' "${m3u8_manifest_urls_and_metadata}" | ${WC_} -l)" -IFS=" -" -for m3u8_manifest_url_and_metadata in ${m3u8_manifest_urls_and_metadata}; do - IFS='>' read -ra META <<- EOM - ${m3u8_manifest_url_and_metadata} +IFS=' +' +for m3u8_manifest_url_and_metadata in ${m3u8_manifest_urls_and_metadata} +do + IFS='>' read -r title length m3u8_manifest_url <<- EOM + ${m3u8_manifest_url_and_metadata} EOM - title="${META[0]}" - length="${META[1]}" - length="$(printf '%s' "${length}" | sed -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g')" - m3u8_manifest_url="${META[2]}" + length="$(printf '%s' "${length}" | ${SED_} -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g')" m3u8_highest_res_url="$( curl --silent -- "${m3u8_manifest_url}" | @@ -85,24 +143,54 @@ for m3u8_manifest_url_and_metadata in ${m3u8_manifest_urls_and_metadata}; do rev | cut -d'&' -f2 | cut -c13- | - sed 's/keyframes/fragments/g' + ${SED_} 's/keyframes/fragments/g' )" filepath="${OUTDIR}/${title}.mp4" printf '\033[32m%s\033[m\n' "Download '${filepath}' (${idx}/${total})" - false && ffmpeg -hide_banner \ - -loglevel error \ - -headers "authorization: Bearer ${BASE64_BEARER_TOKEN}" \ - -i "${m3u8_highest_res_url}" \ - -progress - \ - -c copy -- "${filepath}" | - grep --line-buffered 'out_time=' | - sed -u -e 's/out_time=//g' -e 's/\..*//g' -e 's/^-//g' | - xargs -I{} printf "{} of ${length} \r" + ${FFMPEG_} -hide_banner \ + -loglevel error \ + -headers "authorization: Bearer ${BASE64_BEARER_TOKEN}" \ + -i "${m3u8_highest_res_url}" \ + -progress - \ + -c copy -- "${filepath}" | + ${GREP_} --line-buffered 'out_time=' | + ${SED_} -u -e 's/out_time=//g' -e 's/\..*//g' -e 's/^-//g' | + ${XARGS_} -I{} printf '%s\r' "{} of ${length} " printf '\n\033[33m%s\033[m\n' "Done" idx="$(( idx + 1 ))" done + +unset -v POSIXLY_CORRECT \ +WHICH_ \ +EXIT_ \ +PRINTF_ \ +COMMAND_ \ +READ_ \ +TR_ \ +MKDIR_ \ +SED_ \ +GREP_ \ +WC_ \ +FFMPEG_ \ +JQ_ \ +WGET_ \ +idx \ +total \ +API_URL \ +filepath \ +m3u8_manifest_url \ +m3u8_highest_res_url \ +m3u8_manifest_url_and_metadata \ +m3u8_manifest_urls_and_metadata \ +OUTDIR \ +BASE64_BEARER_TOKEN + +# \n cannnot be trailing character +IFS="$(printf ' \n\t')" + +)