Improve command pollution by aliasing and function defs to a more readable and extensible format (posix) and refactor some parts (bash)

This commit is contained in:
tosu 2023-03-05 21:13:36 +01:00
parent 0a72f9c8ea
commit c994ee603f
4 changed files with 41 additions and 185 deletions

3
.gitignore vendored
View File

@ -1 +1,4 @@
.env
Makefile
videos
videos2

View File

@ -1,91 +0,0 @@
#!/bin/bash
# .env contains a line like
# BASE64_BEARER_TOKEN='<literal base64 encoded string>'
source .env
OUTDIR="./videos"
read -r -d '' API_URL <<- 'EOM'
https://euwe-1.api.microsoftstream.com/api/videos?
$top=100
&
$skip=0
&
$orderby=metrics/trendingScore desc
&
$filter=
published{SPACE}
and{SPACE}
(
(
state eq 'Completed' and contentSource ne 'livestream'
){SPACE}
or{SPACE}
(
contentSource eq 'livestream'{SPACE}
and not
(
state eq 'Completed'{SPACE}
and not
liveEvent/LiveEventOptions/OnDemandOptions/EnablePlayback
)
)
)
&
api-version=1.4-private
EOM
API_URL="$(
printf '%s' "${API_URL}" |
sed 's/{SPACE}/ /g' |
tr -d '\n'
)"
mkdir -p "${OUTDIR}" || { printf '\033[31m%s\033[m\n' 'Aborting'; exit; }
# '>' without quotes is delimiter, since it's one of the few chars disallowed in a URL. Title must not include this char
# https://www.ietf.org/rfc/rfc2396.txt
# 2.4.3. Excluded US-ASCII Characters
m3u8_manifest_urls_and_metadata=$(
wget --no-verbose --quiet -O- --header="authorization: Bearer ${BASE64_BEARER_TOKEN}" "${API_URL}" |
jq -r '.value[] | .name + ">" + .media.duration[2:] + ">" + (.playbackUrls | map(select(.mimeType == "application/vnd.apple.mpegurl")) | .[].playbackUrl)'
)
if [ -z "${m3u8_manifest_urls_and_metadata}" ]; then
printf '\033[31m%s\033[m\n' 'Error GETting manifest urls (response empty) or Bearer token invalid/expired. Exiting.'
exit
fi
idx=1
total="$(printf '%s\n' "${m3u8_manifest_urls_and_metadata}" | wc -l)"
for m3u8_manifest_url_and_metadata in ${m3u8_manifest_urls_and_metadata}; do
IFS='>' read -ra META <<< "${m3u8_manifest_url_and_metadata}"
title="${META[0]}"
length="${META[1]}"
length="$(echo "${length}" | sed -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g')"
m3u8_manifest_url="${META[2]}"
m3u8_highest_res_url="$(
curl --silent "${m3u8_manifest_url}" |
tail -n1 |
rev |
cut -c3- |
rev |
cut -d'&' -f2 |
cut -c13- |
sed 's/keyframes/fragments/g'
)"
filepath="${OUTDIR}/${title}.mp4"
printf '\033[32m%s\033[m\n' "Download '${filepath}' (${idx}/${total})"
ffmpeg -hide_banner -loglevel error -headers "authorization: Bearer ${BASE64_BEARER_TOKEN}" -i "${m3u8_highest_res_url}" -progress - -c copy "${filepath}" | grep --line-buffered 'out_time=' | sed -u -e 's/out_time=//g' -e 's/\..*//g' -e 's/^-//g' | xargs -I{} printf "{} of ${length} \r"
printf '\n\033[33m%s\033[m\n' "Done"
idx=$(( idx + 1 ))
done

View File

@ -14,7 +14,7 @@ $top=100
&
$skip=0
&
$orderby=metrics/trendingScore desc
$orderby=metrics/trendingScore asc
&
$filter=
published{SPACE}
@ -39,12 +39,14 @@ api-version=1.4-private
EOM
API_URL="$(
printf '%s' "${API_URL}" |
sed 's/{SPACE}/ /g' |
sed 's/{SPACE}/ /g' <<< "${API_URL}" |
tr -d '\n'
)"
mkdir -p "${OUTDIR}" || { printf '\033[31m%s\033[m\n' 'Aborting'; exit; }
mkdir -p "${OUTDIR}" || {
printf '\033[31m%s\033[m\n' 'Aborting'
exit 2
}
# '>' without quotes is delimiter, since it's one of the few chars disallowed in a URL. Title must not include this char
# https://www.ietf.org/rfc/rfc2396.txt
@ -54,33 +56,19 @@ m3u8_manifest_urls_and_metadata=$(
jq -r '.value[] | .name + ">" + .media.duration[2:] + ">" + (.playbackUrls | map(select(.mimeType == "application/vnd.apple.mpegurl")) | .[].playbackUrl)'
)
if [ -z "${m3u8_manifest_urls_and_metadata}" ]; then
[ -z "${m3u8_manifest_urls_and_metadata}" ] && {
printf '\033[31m%s\033[m\n' 'Error GETting manifest urls (response empty) or Bearer token invalid/expired. Exiting.'
exit
fi
exit 3
}
idx=1
total="$(printf '%s\n' "${m3u8_manifest_urls_and_metadata}" | wc -l)"
total="$(wc -l <<< "${m3u8_manifest_urls_and_metadata}")"
IFS=$'\n'
for m3u8_manifest_url_and_metadata in ${m3u8_manifest_urls_and_metadata}; do
IFS='>' read -ra META <<< "${m3u8_manifest_url_and_metadata}"
IFS='>' read -r title length m3u8_manifest_url <<< "${m3u8_manifest_url_and_metadata}"
title="${META[0]}"
length="${META[1]}"
length="$(echo "${length}" | sed -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g')"
m3u8_manifest_url="${META[2]}"
m3u8_highest_res_url="$(
curl --silent "${m3u8_manifest_url}" |
tail -n1 |
rev |
cut -c3- |
rev |
cut -d'&' -f2 |
cut -c13- |
sed 's/keyframes/fragments/g'
)"
length="$(sed -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g' <<< "${length}")"
filepath="${OUTDIR}/${title}.mp4"
@ -89,9 +77,10 @@ for m3u8_manifest_url_and_metadata in ${m3u8_manifest_urls_and_metadata}; do
ffmpeg -hide_banner \
-loglevel error \
-headers "authorization: Bearer ${BASE64_BEARER_TOKEN}" \
-i "${m3u8_highest_res_url}" \
-i "${m3u8_manifest_url}" \
-progress - \
-c copy "${filepath}" |
-codec copy \
-- "${filepath}" |
grep --line-buffered 'out_time=' |
sed -u -e 's/out_time=//g' -e 's/\..*//g' -e 's/^-//g' |
xargs -I{} printf "{} of ${length} \r"

View File

@ -2,45 +2,24 @@
# Run entire script in subshell to prevent variable pollution
(
# https://unix.stackexchange.com/a/188365
# If and only if bash is in POSIX-mode, which can be forced by setting the
# POSIXLY_CORRECT variable (to any value), then the special built-ins, see
# https://www.gnu.org/software/bash/manual/html_node/Special-Builtins.html,
# are found BEFORE functions during lookup. `unset' is one of these special
# built-ins. However, when using `shopt -s expand_aliases', alias expansion in
# non-interactive shells is supported. This can be circumvented by escaping any
# of the letters of the command, usually the first.
# For portability and security, we want to use the `builtin' built-in command, so
# we must unset it if it is a pre-defined function and unalias it if it is an alias.
# This stub does not unset a variable called `builtin', but that is irrelevant.
# We also have to unset `unalias', since it is not a special builtin and calling it
# in POSIX-mode could call a function instead.
# We also unalias unset, since it is used at the bottom of the script.
# The
# non-interactive shells is supported, so aliasing unset is a problem.
# This can be circumvented by escaping any of the letters of the command, usually the first.
#
# The COMMANDS_ variable contains the commands, space delimited (therefore the IFS),
# which are supposed to be "made safe".
IFS="
"
POSIXLY_CORRECT='1'
COMMANDS_='builtin unalias unset read printf command exit type .'
COMMANDS_='builtin unalias unset read printf command exit type . tr mkdir wc sed grep xargs ffmpeg jq wget'
\unset -f -- ${COMMANDS_}
\unalias -- ${COMMANDS_} 2>/dev/null || true
WHICH_='type -P' # helper for below
# GNU coreutils
TR_="$(${WHICH_} tr)"
MKDIR_="$(${WHICH_} mkdir)"
WC_="$(${WHICH_} wc)"
# GNU non-coreutils
SED_="$(${WHICH_} sed)"
GREP_="$(${WHICH_} grep)"
XARGS_="$(${WHICH_} xargs)"
# third-party
FFMPEG_="$(${WHICH_} ffmpeg)"
JQ_="$(${WHICH_} jq)"
WGET_="$(${WHICH_} wget)"
command -v -- wget >/dev/null 2>&1 || {
printf '\033[31m%s\033[m\n' "Please install wget"
exit 1
@ -96,10 +75,10 @@ EOM
API_URL="$(
printf '%s' "${API_URL}" |
${TR_} -d '\t'
tr -d '\t'
)"
${MKDIR_} -p -- "${OUTDIR}" || {
mkdir -p -- "${OUTDIR}" || {
printf '\033[31m%s\033[m\n' 'Aborting'
exit 2
}
@ -108,12 +87,12 @@ ${MKDIR_} -p -- "${OUTDIR}" || {
# https://www.ietf.org/rfc/rfc2396.txt
# 2.4.3. Excluded US-ASCII Characters
m3u8_manifest_urls_and_metadata="$(
${WGET_} --no-verbose \
wget --no-verbose \
--quiet \
--output-document - \
--header="authorization: Bearer ${BASE64_BEARER_TOKEN}" \
-- "${API_URL}" |
${JQ_} --raw-output \
jq --raw-output \
'.value[] | .name + ">" + .media.duration[2:] + ">" + (.playbackUrls | map(select(.mimeType == "application/vnd.apple.mpegurl")) | .[].playbackUrl)'
)"
@ -123,7 +102,7 @@ m3u8_manifest_urls_and_metadata="$(
}
idx='1'
total="$(printf '%s\n' "${m3u8_manifest_urls_and_metadata}" | ${WC_} -l)"
total="$(printf '%s\n' "${m3u8_manifest_urls_and_metadata}" | wc -l)"
IFS='
'
@ -133,32 +112,22 @@ do
${m3u8_manifest_url_and_metadata}
EOM
length="$(printf '%s' "${length}" | ${SED_} -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g')"
m3u8_highest_res_url="$(
curl --silent -- "${m3u8_manifest_url}" |
tail -n1 |
rev |
cut -c3- |
rev |
cut -d'&' -f2 |
cut -c13- |
${SED_} 's/keyframes/fragments/g'
)"
length="$(printf '%s' "${length}" | sed -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g')"
filepath="${OUTDIR}/${title}.mp4"
printf '\033[32m%s\033[m\n' "Download '${filepath}' (${idx}/${total})"
${FFMPEG_} -hide_banner \
ffmpeg -hide_banner \
-loglevel error \
-headers "authorization: Bearer ${BASE64_BEARER_TOKEN}" \
-i "${m3u8_highest_res_url}" \
-i "${m3u8_manifest_url}" \
-progress - \
-c copy -- "${filepath}" |
${GREP_} --line-buffered 'out_time=' |
${SED_} -u -e 's/out_time=//g' -e 's/\..*//g' -e 's/^-//g' |
${XARGS_} -I{} printf '%s\r' "{} of ${length} "
-c copy \
-- "${filepath}" |
grep --line-buffered 'out_time=' |
sed -u -e 's/out_time=//g' -e 's/\..*//g' -e 's/^-//g' |
xargs -I{} printf '%s\r' "{} of ${length} "
printf '\n\033[33m%s\033[m\n' "Done"
@ -166,19 +135,6 @@ do
done
unset -v POSIXLY_CORRECT \
WHICH_ \
EXIT_ \
PRINTF_ \
COMMAND_ \
READ_ \
TR_ \
MKDIR_ \
SED_ \
GREP_ \
WC_ \
FFMPEG_ \
JQ_ \
WGET_ \
idx \
total \
API_URL \
@ -190,7 +146,6 @@ m3u8_manifest_urls_and_metadata \
OUTDIR \
BASE64_BEARER_TOKEN
# \n cannnot be trailing character
IFS="$(printf ' \n\t')"
IFS="
"
)