non-standard IFS prevents multi-argument variable command invocation, soooo refactor

This commit is contained in:
tosu 2023-03-05 18:07:48 +01:00
parent debdd31a9a
commit 0a72f9c8ea
1 changed files with 148 additions and 60 deletions

View File

@ -1,81 +1,139 @@
#!/bin/sh #!/bin/sh
# Run entire script in subshell to prevent variable pollution
(
# https://unix.stackexchange.com/a/188365
# If and only if bash is in POSIX-mode, which can be forced by setting the
# POSIXLY_CORRECT variable (to any value), then the special built-ins, see
# https://www.gnu.org/software/bash/manual/html_node/Special-Builtins.html,
# are found BEFORE functions during lookup. `unset' is one of these special
# built-ins. However, when using `shopt -s expand_aliases', alias expansion in
# non-interactive shells is supported. This can be circumvented by escaping any
# of the letters of the command, usually the first.
# For portability and security, we want to use the `builtin' built-in command, so
# we must unset it if it is a pre-defined function and unalias it if it is an alias.
# This stub does not unset a variable called `builtin', but that is irrelevant.
# We also have to unset `unalias', since it is not a special builtin and calling it
# in POSIX-mode could call a function instead.
# We also unalias unset, since it is used at the bottom of the script.
# The
POSIXLY_CORRECT='1'
COMMANDS_='builtin unalias unset read printf command exit type .'
\unset -f -- ${COMMANDS_}
\unalias -- ${COMMANDS_} 2>/dev/null || true
WHICH_='type -P' # helper for below
# GNU coreutils
TR_="$(${WHICH_} tr)"
MKDIR_="$(${WHICH_} mkdir)"
WC_="$(${WHICH_} wc)"
# GNU non-coreutils
SED_="$(${WHICH_} sed)"
GREP_="$(${WHICH_} grep)"
XARGS_="$(${WHICH_} xargs)"
# third-party
FFMPEG_="$(${WHICH_} ffmpeg)"
JQ_="$(${WHICH_} jq)"
WGET_="$(${WHICH_} wget)"
command -v -- wget >/dev/null 2>&1 || {
printf '\033[31m%s\033[m\n' "Please install wget"
exit 1
}
command -v -- jq >/dev/null 2>&1 || {
printf '\033[31m%s\033[m\n' "Please install jq"
exit 1
}
command -v -- ffmpeg >/dev/null 2>&1 || {
printf '\033[31m%s\033[m\n' "Please install ffmpeg"
exit 1
}
# .env contains a line like # .env contains a line like
# BASE64_BEARER_TOKEN='<literal base64 encoded string>' # BASE64_BEARER_TOKEN='<literal base64 encoded string>'
# Retrieve that base64 token from a request from the browser # Retrieve that base64 token from a request from the browser
# network tab network tabfrom a logged in MS Stream (Classic) tab # network tab network tabfrom a logged in MS Stream (Classic) tab
. ./.env . './.env'
OUTDIR="./videos" OUTDIR='./videos'
read -r -d '' API_URL <<- 'EOM' read -r API_URL <<- EOM
https://euwe-1.api.microsoftstream.com/api/videos? https://euwe-1.api.microsoftstream.com/api/videos?\
$top=100 \$top=100\
& &\
$skip=0 \$skip=0\
& &\
$orderby=metrics/trendingScore desc \$orderby=metrics/trendingScore asc\
& &\
$filter= \$filter=\
published{SPACE} published \
and{SPACE} and \
( (\
( (\
state eq 'Completed' and contentSource ne 'livestream' state eq 'Completed' and contentSource ne 'livestream'\
){SPACE} ) \
or{SPACE} or \
( (\
contentSource eq 'livestream'{SPACE} contentSource eq 'livestream' \
and not and not \
( (\
state eq 'Completed'{SPACE} state eq 'Completed' \
and not and not \
liveEvent/LiveEventOptions/OnDemandOptions/EnablePlayback liveEvent/LiveEventOptions/OnDemandOptions/EnablePlayback\
) )\
) )\
) )\
& &\
api-version=1.4-private api-version=1.4-private
EOM EOM
API_URL="$( API_URL="$(
printf '%s' "${API_URL}" | printf '%s' "${API_URL}" |
sed 's/{SPACE}/ /g' | ${TR_} -d '\t'
tr -d '\n'
)" )"
mkdir -p -- "${OUTDIR}" || { ${MKDIR_} -p -- "${OUTDIR}" || {
printf '\033[31m%s\033[m\n' 'Aborting' printf '\033[31m%s\033[m\n' 'Aborting'
exit exit 2
} }
# '>' without quotes is delimiter, since it's one of the few chars disallowed in a URL. Title must not include this char # '>' without quotes is delimiter, since it is one of the few characters disallowed in a URL. Title must not include this character
# https://www.ietf.org/rfc/rfc2396.txt # https://www.ietf.org/rfc/rfc2396.txt
# 2.4.3. Excluded US-ASCII Characters # 2.4.3. Excluded US-ASCII Characters
m3u8_manifest_urls_and_metadata="$( m3u8_manifest_urls_and_metadata="$(
wget --no-verbose --quiet -O- --header="authorization: Bearer ${BASE64_BEARER_TOKEN}" -- "${API_URL}" | ${WGET_} --no-verbose \
jq -r '.value[] | .name + ">" + .media.duration[2:] + ">" + (.playbackUrls | map(select(.mimeType == "application/vnd.apple.mpegurl")) | .[].playbackUrl)' --quiet \
--output-document - \
--header="authorization: Bearer ${BASE64_BEARER_TOKEN}" \
-- "${API_URL}" |
${JQ_} --raw-output \
'.value[] | .name + ">" + .media.duration[2:] + ">" + (.playbackUrls | map(select(.mimeType == "application/vnd.apple.mpegurl")) | .[].playbackUrl)'
)" )"
[ -z "${m3u8_manifest_urls_and_metadata}" ] && { [ -z "${m3u8_manifest_urls_and_metadata}" ] && {
printf '\033[31m%s\033[m\n' 'Error GETting manifest urls (response empty) or Bearer token invalid/expired. Exiting.' printf '\033[31m%s\033[m\n' 'Error GETting manifest urls (response empty) or Bearer token invalid/expired. Exiting.'
exit exit 3
} }
idx=1 idx='1'
total="$(printf '%s\n' "${m3u8_manifest_urls_and_metadata}" | wc -l)" total="$(printf '%s\n' "${m3u8_manifest_urls_and_metadata}" | ${WC_} -l)"
IFS=" IFS='
" '
for m3u8_manifest_url_and_metadata in ${m3u8_manifest_urls_and_metadata}; do for m3u8_manifest_url_and_metadata in ${m3u8_manifest_urls_and_metadata}
IFS='>' read -ra META <<- EOM do
${m3u8_manifest_url_and_metadata} IFS='>' read -r title length m3u8_manifest_url <<- EOM
${m3u8_manifest_url_and_metadata}
EOM EOM
title="${META[0]}" length="$(printf '%s' "${length}" | ${SED_} -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g')"
length="${META[1]}"
length="$(printf '%s' "${length}" | sed -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g')"
m3u8_manifest_url="${META[2]}"
m3u8_highest_res_url="$( m3u8_highest_res_url="$(
curl --silent -- "${m3u8_manifest_url}" | curl --silent -- "${m3u8_manifest_url}" |
@ -85,24 +143,54 @@ for m3u8_manifest_url_and_metadata in ${m3u8_manifest_urls_and_metadata}; do
rev | rev |
cut -d'&' -f2 | cut -d'&' -f2 |
cut -c13- | cut -c13- |
sed 's/keyframes/fragments/g' ${SED_} 's/keyframes/fragments/g'
)" )"
filepath="${OUTDIR}/${title}.mp4" filepath="${OUTDIR}/${title}.mp4"
printf '\033[32m%s\033[m\n' "Download '${filepath}' (${idx}/${total})" printf '\033[32m%s\033[m\n' "Download '${filepath}' (${idx}/${total})"
false && ffmpeg -hide_banner \ ${FFMPEG_} -hide_banner \
-loglevel error \ -loglevel error \
-headers "authorization: Bearer ${BASE64_BEARER_TOKEN}" \ -headers "authorization: Bearer ${BASE64_BEARER_TOKEN}" \
-i "${m3u8_highest_res_url}" \ -i "${m3u8_highest_res_url}" \
-progress - \ -progress - \
-c copy -- "${filepath}" | -c copy -- "${filepath}" |
grep --line-buffered 'out_time=' | ${GREP_} --line-buffered 'out_time=' |
sed -u -e 's/out_time=//g' -e 's/\..*//g' -e 's/^-//g' | ${SED_} -u -e 's/out_time=//g' -e 's/\..*//g' -e 's/^-//g' |
xargs -I{} printf "{} of ${length} \r" ${XARGS_} -I{} printf '%s\r' "{} of ${length} "
printf '\n\033[33m%s\033[m\n' "Done" printf '\n\033[33m%s\033[m\n' "Done"
idx="$(( idx + 1 ))" idx="$(( idx + 1 ))"
done done
unset -v POSIXLY_CORRECT \
WHICH_ \
EXIT_ \
PRINTF_ \
COMMAND_ \
READ_ \
TR_ \
MKDIR_ \
SED_ \
GREP_ \
WC_ \
FFMPEG_ \
JQ_ \
WGET_ \
idx \
total \
API_URL \
filepath \
m3u8_manifest_url \
m3u8_highest_res_url \
m3u8_manifest_url_and_metadata \
m3u8_manifest_urls_and_metadata \
OUTDIR \
BASE64_BEARER_TOKEN
# \n cannnot be trailing character
IFS="$(printf ' \n\t')"
)