Improve command pollution by aliasing and function defs to a more readable and extensible format (posix) and refactor some parts (bash)

This commit is contained in:
tosu 2023-03-05 21:13:36 +01:00
parent 0a72f9c8ea
commit c994ee603f
4 changed files with 41 additions and 185 deletions

3
.gitignore vendored
View File

@ -1 +1,4 @@
.env .env
Makefile
videos
videos2

View File

@ -1,91 +0,0 @@
#!/bin/bash
# .env contains a line like
# BASE64_BEARER_TOKEN='<literal base64 encoded string>'
source .env
OUTDIR="./videos"
read -r -d '' API_URL <<- 'EOM'
https://euwe-1.api.microsoftstream.com/api/videos?
$top=100
&
$skip=0
&
$orderby=metrics/trendingScore desc
&
$filter=
published{SPACE}
and{SPACE}
(
(
state eq 'Completed' and contentSource ne 'livestream'
){SPACE}
or{SPACE}
(
contentSource eq 'livestream'{SPACE}
and not
(
state eq 'Completed'{SPACE}
and not
liveEvent/LiveEventOptions/OnDemandOptions/EnablePlayback
)
)
)
&
api-version=1.4-private
EOM
API_URL="$(
printf '%s' "${API_URL}" |
sed 's/{SPACE}/ /g' |
tr -d '\n'
)"
mkdir -p "${OUTDIR}" || { printf '\033[31m%s\033[m\n' 'Aborting'; exit; }
# '>' without quotes is delimiter, since it's one of the few chars disallowed in a URL. Title must not include this char
# https://www.ietf.org/rfc/rfc2396.txt
# 2.4.3. Excluded US-ASCII Characters
m3u8_manifest_urls_and_metadata=$(
wget --no-verbose --quiet -O- --header="authorization: Bearer ${BASE64_BEARER_TOKEN}" "${API_URL}" |
jq -r '.value[] | .name + ">" + .media.duration[2:] + ">" + (.playbackUrls | map(select(.mimeType == "application/vnd.apple.mpegurl")) | .[].playbackUrl)'
)
if [ -z "${m3u8_manifest_urls_and_metadata}" ]; then
printf '\033[31m%s\033[m\n' 'Error GETting manifest urls (response empty) or Bearer token invalid/expired. Exiting.'
exit
fi
idx=1
total="$(printf '%s\n' "${m3u8_manifest_urls_and_metadata}" | wc -l)"
for m3u8_manifest_url_and_metadata in ${m3u8_manifest_urls_and_metadata}; do
IFS='>' read -ra META <<< "${m3u8_manifest_url_and_metadata}"
title="${META[0]}"
length="${META[1]}"
length="$(echo "${length}" | sed -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g')"
m3u8_manifest_url="${META[2]}"
m3u8_highest_res_url="$(
curl --silent "${m3u8_manifest_url}" |
tail -n1 |
rev |
cut -c3- |
rev |
cut -d'&' -f2 |
cut -c13- |
sed 's/keyframes/fragments/g'
)"
filepath="${OUTDIR}/${title}.mp4"
printf '\033[32m%s\033[m\n' "Download '${filepath}' (${idx}/${total})"
ffmpeg -hide_banner -loglevel error -headers "authorization: Bearer ${BASE64_BEARER_TOKEN}" -i "${m3u8_highest_res_url}" -progress - -c copy "${filepath}" | grep --line-buffered 'out_time=' | sed -u -e 's/out_time=//g' -e 's/\..*//g' -e 's/^-//g' | xargs -I{} printf "{} of ${length} \r"
printf '\n\033[33m%s\033[m\n' "Done"
idx=$(( idx + 1 ))
done

View File

@ -14,7 +14,7 @@ $top=100
& &
$skip=0 $skip=0
& &
$orderby=metrics/trendingScore desc $orderby=metrics/trendingScore asc
& &
$filter= $filter=
published{SPACE} published{SPACE}
@ -39,12 +39,14 @@ api-version=1.4-private
EOM EOM
API_URL="$( API_URL="$(
printf '%s' "${API_URL}" | sed 's/{SPACE}/ /g' <<< "${API_URL}" |
sed 's/{SPACE}/ /g' |
tr -d '\n' tr -d '\n'
)" )"
mkdir -p "${OUTDIR}" || { printf '\033[31m%s\033[m\n' 'Aborting'; exit; } mkdir -p "${OUTDIR}" || {
printf '\033[31m%s\033[m\n' 'Aborting'
exit 2
}
# '>' without quotes is delimiter, since it's one of the few chars disallowed in a URL. Title must not include this char # '>' without quotes is delimiter, since it's one of the few chars disallowed in a URL. Title must not include this char
# https://www.ietf.org/rfc/rfc2396.txt # https://www.ietf.org/rfc/rfc2396.txt
@ -54,33 +56,19 @@ m3u8_manifest_urls_and_metadata=$(
jq -r '.value[] | .name + ">" + .media.duration[2:] + ">" + (.playbackUrls | map(select(.mimeType == "application/vnd.apple.mpegurl")) | .[].playbackUrl)' jq -r '.value[] | .name + ">" + .media.duration[2:] + ">" + (.playbackUrls | map(select(.mimeType == "application/vnd.apple.mpegurl")) | .[].playbackUrl)'
) )
if [ -z "${m3u8_manifest_urls_and_metadata}" ]; then [ -z "${m3u8_manifest_urls_and_metadata}" ] && {
printf '\033[31m%s\033[m\n' 'Error GETting manifest urls (response empty) or Bearer token invalid/expired. Exiting.' printf '\033[31m%s\033[m\n' 'Error GETting manifest urls (response empty) or Bearer token invalid/expired. Exiting.'
exit exit 3
fi }
idx=1 idx=1
total="$(printf '%s\n' "${m3u8_manifest_urls_and_metadata}" | wc -l)" total="$(wc -l <<< "${m3u8_manifest_urls_and_metadata}")"
IFS=$'\n' IFS=$'\n'
for m3u8_manifest_url_and_metadata in ${m3u8_manifest_urls_and_metadata}; do for m3u8_manifest_url_and_metadata in ${m3u8_manifest_urls_and_metadata}; do
IFS='>' read -ra META <<< "${m3u8_manifest_url_and_metadata}" IFS='>' read -r title length m3u8_manifest_url <<< "${m3u8_manifest_url_and_metadata}"
title="${META[0]}" length="$(sed -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g' <<< "${length}")"
length="${META[1]}"
length="$(echo "${length}" | sed -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g')"
m3u8_manifest_url="${META[2]}"
m3u8_highest_res_url="$(
curl --silent "${m3u8_manifest_url}" |
tail -n1 |
rev |
cut -c3- |
rev |
cut -d'&' -f2 |
cut -c13- |
sed 's/keyframes/fragments/g'
)"
filepath="${OUTDIR}/${title}.mp4" filepath="${OUTDIR}/${title}.mp4"
@ -89,9 +77,10 @@ for m3u8_manifest_url_and_metadata in ${m3u8_manifest_urls_and_metadata}; do
ffmpeg -hide_banner \ ffmpeg -hide_banner \
-loglevel error \ -loglevel error \
-headers "authorization: Bearer ${BASE64_BEARER_TOKEN}" \ -headers "authorization: Bearer ${BASE64_BEARER_TOKEN}" \
-i "${m3u8_highest_res_url}" \ -i "${m3u8_manifest_url}" \
-progress - \ -progress - \
-c copy "${filepath}" | -codec copy \
-- "${filepath}" |
grep --line-buffered 'out_time=' | grep --line-buffered 'out_time=' |
sed -u -e 's/out_time=//g' -e 's/\..*//g' -e 's/^-//g' | sed -u -e 's/out_time=//g' -e 's/\..*//g' -e 's/^-//g' |
xargs -I{} printf "{} of ${length} \r" xargs -I{} printf "{} of ${length} \r"

View File

@ -2,45 +2,24 @@
# Run entire script in subshell to prevent variable pollution # Run entire script in subshell to prevent variable pollution
( (
# https://unix.stackexchange.com/a/188365 # https://unix.stackexchange.com/a/188365
# If and only if bash is in POSIX-mode, which can be forced by setting the # If and only if bash is in POSIX-mode, which can be forced by setting the
# POSIXLY_CORRECT variable (to any value), then the special built-ins, see # POSIXLY_CORRECT variable (to any value), then the special built-ins, see
# https://www.gnu.org/software/bash/manual/html_node/Special-Builtins.html, # https://www.gnu.org/software/bash/manual/html_node/Special-Builtins.html,
# are found BEFORE functions during lookup. `unset' is one of these special # are found BEFORE functions during lookup. `unset' is one of these special
# built-ins. However, when using `shopt -s expand_aliases', alias expansion in # built-ins. However, when using `shopt -s expand_aliases', alias expansion in
# non-interactive shells is supported. This can be circumvented by escaping any # non-interactive shells is supported, so aliasing unset is a problem.
# of the letters of the command, usually the first. # This can be circumvented by escaping any of the letters of the command, usually the first.
# For portability and security, we want to use the `builtin' built-in command, so #
# we must unset it if it is a pre-defined function and unalias it if it is an alias. # The COMMANDS_ variable contains the commands, space delimited (therefore the IFS),
# This stub does not unset a variable called `builtin', but that is irrelevant. # which are supposed to be "made safe".
# We also have to unset `unalias', since it is not a special builtin and calling it IFS="
# in POSIX-mode could call a function instead. "
# We also unalias unset, since it is used at the bottom of the script.
# The
POSIXLY_CORRECT='1' POSIXLY_CORRECT='1'
COMMANDS_='builtin unalias unset read printf command exit type .' COMMANDS_='builtin unalias unset read printf command exit type . tr mkdir wc sed grep xargs ffmpeg jq wget'
\unset -f -- ${COMMANDS_} \unset -f -- ${COMMANDS_}
\unalias -- ${COMMANDS_} 2>/dev/null || true \unalias -- ${COMMANDS_} 2>/dev/null || true
WHICH_='type -P' # helper for below
# GNU coreutils
TR_="$(${WHICH_} tr)"
MKDIR_="$(${WHICH_} mkdir)"
WC_="$(${WHICH_} wc)"
# GNU non-coreutils
SED_="$(${WHICH_} sed)"
GREP_="$(${WHICH_} grep)"
XARGS_="$(${WHICH_} xargs)"
# third-party
FFMPEG_="$(${WHICH_} ffmpeg)"
JQ_="$(${WHICH_} jq)"
WGET_="$(${WHICH_} wget)"
command -v -- wget >/dev/null 2>&1 || { command -v -- wget >/dev/null 2>&1 || {
printf '\033[31m%s\033[m\n' "Please install wget" printf '\033[31m%s\033[m\n' "Please install wget"
exit 1 exit 1
@ -96,10 +75,10 @@ EOM
API_URL="$( API_URL="$(
printf '%s' "${API_URL}" | printf '%s' "${API_URL}" |
${TR_} -d '\t' tr -d '\t'
)" )"
${MKDIR_} -p -- "${OUTDIR}" || { mkdir -p -- "${OUTDIR}" || {
printf '\033[31m%s\033[m\n' 'Aborting' printf '\033[31m%s\033[m\n' 'Aborting'
exit 2 exit 2
} }
@ -108,12 +87,12 @@ ${MKDIR_} -p -- "${OUTDIR}" || {
# https://www.ietf.org/rfc/rfc2396.txt # https://www.ietf.org/rfc/rfc2396.txt
# 2.4.3. Excluded US-ASCII Characters # 2.4.3. Excluded US-ASCII Characters
m3u8_manifest_urls_and_metadata="$( m3u8_manifest_urls_and_metadata="$(
${WGET_} --no-verbose \ wget --no-verbose \
--quiet \ --quiet \
--output-document - \ --output-document - \
--header="authorization: Bearer ${BASE64_BEARER_TOKEN}" \ --header="authorization: Bearer ${BASE64_BEARER_TOKEN}" \
-- "${API_URL}" | -- "${API_URL}" |
${JQ_} --raw-output \ jq --raw-output \
'.value[] | .name + ">" + .media.duration[2:] + ">" + (.playbackUrls | map(select(.mimeType == "application/vnd.apple.mpegurl")) | .[].playbackUrl)' '.value[] | .name + ">" + .media.duration[2:] + ">" + (.playbackUrls | map(select(.mimeType == "application/vnd.apple.mpegurl")) | .[].playbackUrl)'
)" )"
@ -123,7 +102,7 @@ m3u8_manifest_urls_and_metadata="$(
} }
idx='1' idx='1'
total="$(printf '%s\n' "${m3u8_manifest_urls_and_metadata}" | ${WC_} -l)" total="$(printf '%s\n' "${m3u8_manifest_urls_and_metadata}" | wc -l)"
IFS=' IFS='
' '
@ -133,32 +112,22 @@ do
${m3u8_manifest_url_and_metadata} ${m3u8_manifest_url_and_metadata}
EOM EOM
length="$(printf '%s' "${length}" | ${SED_} -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g')" length="$(printf '%s' "${length}" | sed -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g')"
m3u8_highest_res_url="$(
curl --silent -- "${m3u8_manifest_url}" |
tail -n1 |
rev |
cut -c3- |
rev |
cut -d'&' -f2 |
cut -c13- |
${SED_} 's/keyframes/fragments/g'
)"
filepath="${OUTDIR}/${title}.mp4" filepath="${OUTDIR}/${title}.mp4"
printf '\033[32m%s\033[m\n' "Download '${filepath}' (${idx}/${total})" printf '\033[32m%s\033[m\n' "Download '${filepath}' (${idx}/${total})"
${FFMPEG_} -hide_banner \ ffmpeg -hide_banner \
-loglevel error \ -loglevel error \
-headers "authorization: Bearer ${BASE64_BEARER_TOKEN}" \ -headers "authorization: Bearer ${BASE64_BEARER_TOKEN}" \
-i "${m3u8_highest_res_url}" \ -i "${m3u8_manifest_url}" \
-progress - \ -progress - \
-c copy -- "${filepath}" | -c copy \
${GREP_} --line-buffered 'out_time=' | -- "${filepath}" |
${SED_} -u -e 's/out_time=//g' -e 's/\..*//g' -e 's/^-//g' | grep --line-buffered 'out_time=' |
${XARGS_} -I{} printf '%s\r' "{} of ${length} " sed -u -e 's/out_time=//g' -e 's/\..*//g' -e 's/^-//g' |
xargs -I{} printf '%s\r' "{} of ${length} "
printf '\n\033[33m%s\033[m\n' "Done" printf '\n\033[33m%s\033[m\n' "Done"
@ -166,19 +135,6 @@ do
done done
unset -v POSIXLY_CORRECT \ unset -v POSIXLY_CORRECT \
WHICH_ \
EXIT_ \
PRINTF_ \
COMMAND_ \
READ_ \
TR_ \
MKDIR_ \
SED_ \
GREP_ \
WC_ \
FFMPEG_ \
JQ_ \
WGET_ \
idx \ idx \
total \ total \
API_URL \ API_URL \
@ -190,7 +146,6 @@ m3u8_manifest_urls_and_metadata \
OUTDIR \ OUTDIR \
BASE64_BEARER_TOKEN BASE64_BEARER_TOKEN
# \n cannnot be trailing character IFS="
IFS="$(printf ' \n\t')" "
) )