From c994ee603f090b76caa686d4c01ee209c6ef8b5c Mon Sep 17 00:00:00 2001 From: tosu Date: Sun, 5 Mar 2023 21:13:36 +0100 Subject: [PATCH] Improve command pollution by aliasing and function defs to a more readable and extensible format (posix) and refactor some parts (bash) --- .gitignore | 3 ++ download.sh | 91 ------------------------------------------------ downloadBash.sh | 41 ++++++++-------------- downloadPOSIX.sh | 91 ++++++++++++------------------------------------ 4 files changed, 41 insertions(+), 185 deletions(-) delete mode 100755 download.sh diff --git a/.gitignore b/.gitignore index 4c49bd7..e367f4d 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ .env +Makefile +videos +videos2 diff --git a/download.sh b/download.sh deleted file mode 100755 index 5b29bcd..0000000 --- a/download.sh +++ /dev/null @@ -1,91 +0,0 @@ -#!/bin/bash - -# .env contains a line like -# BASE64_BEARER_TOKEN='' -source .env - -OUTDIR="./videos" - -read -r -d '' API_URL <<- 'EOM' -https://euwe-1.api.microsoftstream.com/api/videos? -$top=100 -& -$skip=0 -& -$orderby=metrics/trendingScore desc -& -$filter= - published{SPACE} - and{SPACE} - ( - ( - state eq 'Completed' and contentSource ne 'livestream' - ){SPACE} - or{SPACE} - ( - contentSource eq 'livestream'{SPACE} - and not - ( - state eq 'Completed'{SPACE} - and not - liveEvent/LiveEventOptions/OnDemandOptions/EnablePlayback - ) - ) - ) -& -api-version=1.4-private -EOM - -API_URL="$( - printf '%s' "${API_URL}" | - sed 's/{SPACE}/ /g' | - tr -d '\n' -)" - -mkdir -p "${OUTDIR}" || { printf '\033[31m%s\033[m\n' 'Aborting'; exit; } - -# '>' without quotes is delimiter, since it's one of the few chars disallowed in a URL. Title must not include this char -# https://www.ietf.org/rfc/rfc2396.txt -# 2.4.3. Excluded US-ASCII Characters -m3u8_manifest_urls_and_metadata=$( - wget --no-verbose --quiet -O- --header="authorization: Bearer ${BASE64_BEARER_TOKEN}" "${API_URL}" | - jq -r '.value[] | .name + ">" + .media.duration[2:] + ">" + (.playbackUrls | map(select(.mimeType == "application/vnd.apple.mpegurl")) | .[].playbackUrl)' -) - -if [ -z "${m3u8_manifest_urls_and_metadata}" ]; then - printf '\033[31m%s\033[m\n' 'Error GETting manifest urls (response empty) or Bearer token invalid/expired. Exiting.' - exit -fi - -idx=1 -total="$(printf '%s\n' "${m3u8_manifest_urls_and_metadata}" | wc -l)" - -for m3u8_manifest_url_and_metadata in ${m3u8_manifest_urls_and_metadata}; do - IFS='>' read -ra META <<< "${m3u8_manifest_url_and_metadata}" - - title="${META[0]}" - length="${META[1]}" - length="$(echo "${length}" | sed -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g')" - m3u8_manifest_url="${META[2]}" - - m3u8_highest_res_url="$( - curl --silent "${m3u8_manifest_url}" | - tail -n1 | - rev | - cut -c3- | - rev | - cut -d'&' -f2 | - cut -c13- | - sed 's/keyframes/fragments/g' - )" - - filepath="${OUTDIR}/${title}.mp4" - - printf '\033[32m%s\033[m\n' "Download '${filepath}' (${idx}/${total})" - - ffmpeg -hide_banner -loglevel error -headers "authorization: Bearer ${BASE64_BEARER_TOKEN}" -i "${m3u8_highest_res_url}" -progress - -c copy "${filepath}" | grep --line-buffered 'out_time=' | sed -u -e 's/out_time=//g' -e 's/\..*//g' -e 's/^-//g' | xargs -I{} printf "{} of ${length} \r" - - printf '\n\033[33m%s\033[m\n' "Done" - - idx=$(( idx + 1 )) -done diff --git a/downloadBash.sh b/downloadBash.sh index 7dd37b0..3f8ffc6 100755 --- a/downloadBash.sh +++ b/downloadBash.sh @@ -14,7 +14,7 @@ $top=100 & $skip=0 & -$orderby=metrics/trendingScore desc +$orderby=metrics/trendingScore asc & $filter= published{SPACE} @@ -39,12 +39,14 @@ api-version=1.4-private EOM API_URL="$( - printf '%s' "${API_URL}" | - sed 's/{SPACE}/ /g' | + sed 's/{SPACE}/ /g' <<< "${API_URL}" | tr -d '\n' )" -mkdir -p "${OUTDIR}" || { printf '\033[31m%s\033[m\n' 'Aborting'; exit; } +mkdir -p "${OUTDIR}" || { + printf '\033[31m%s\033[m\n' 'Aborting' + exit 2 +} # '>' without quotes is delimiter, since it's one of the few chars disallowed in a URL. Title must not include this char # https://www.ietf.org/rfc/rfc2396.txt @@ -54,33 +56,19 @@ m3u8_manifest_urls_and_metadata=$( jq -r '.value[] | .name + ">" + .media.duration[2:] + ">" + (.playbackUrls | map(select(.mimeType == "application/vnd.apple.mpegurl")) | .[].playbackUrl)' ) -if [ -z "${m3u8_manifest_urls_and_metadata}" ]; then +[ -z "${m3u8_manifest_urls_and_metadata}" ] && { printf '\033[31m%s\033[m\n' 'Error GETting manifest urls (response empty) or Bearer token invalid/expired. Exiting.' - exit -fi + exit 3 +} idx=1 -total="$(printf '%s\n' "${m3u8_manifest_urls_and_metadata}" | wc -l)" +total="$(wc -l <<< "${m3u8_manifest_urls_and_metadata}")" IFS=$'\n' for m3u8_manifest_url_and_metadata in ${m3u8_manifest_urls_and_metadata}; do - IFS='>' read -ra META <<< "${m3u8_manifest_url_and_metadata}" + IFS='>' read -r title length m3u8_manifest_url <<< "${m3u8_manifest_url_and_metadata}" - title="${META[0]}" - length="${META[1]}" - length="$(echo "${length}" | sed -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g')" - m3u8_manifest_url="${META[2]}" - - m3u8_highest_res_url="$( - curl --silent "${m3u8_manifest_url}" | - tail -n1 | - rev | - cut -c3- | - rev | - cut -d'&' -f2 | - cut -c13- | - sed 's/keyframes/fragments/g' - )" + length="$(sed -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g' <<< "${length}")" filepath="${OUTDIR}/${title}.mp4" @@ -89,9 +77,10 @@ for m3u8_manifest_url_and_metadata in ${m3u8_manifest_urls_and_metadata}; do ffmpeg -hide_banner \ -loglevel error \ -headers "authorization: Bearer ${BASE64_BEARER_TOKEN}" \ - -i "${m3u8_highest_res_url}" \ + -i "${m3u8_manifest_url}" \ -progress - \ - -c copy "${filepath}" | + -codec copy \ + -- "${filepath}" | grep --line-buffered 'out_time=' | sed -u -e 's/out_time=//g' -e 's/\..*//g' -e 's/^-//g' | xargs -I{} printf "{} of ${length} \r" diff --git a/downloadPOSIX.sh b/downloadPOSIX.sh index 2eaad81..c89656a 100755 --- a/downloadPOSIX.sh +++ b/downloadPOSIX.sh @@ -2,45 +2,24 @@ # Run entire script in subshell to prevent variable pollution ( - # https://unix.stackexchange.com/a/188365 # If and only if bash is in POSIX-mode, which can be forced by setting the # POSIXLY_CORRECT variable (to any value), then the special built-ins, see # https://www.gnu.org/software/bash/manual/html_node/Special-Builtins.html, # are found BEFORE functions during lookup. `unset' is one of these special # built-ins. However, when using `shopt -s expand_aliases', alias expansion in -# non-interactive shells is supported. This can be circumvented by escaping any -# of the letters of the command, usually the first. -# For portability and security, we want to use the `builtin' built-in command, so -# we must unset it if it is a pre-defined function and unalias it if it is an alias. -# This stub does not unset a variable called `builtin', but that is irrelevant. -# We also have to unset `unalias', since it is not a special builtin and calling it -# in POSIX-mode could call a function instead. -# We also unalias unset, since it is used at the bottom of the script. -# The +# non-interactive shells is supported, so aliasing unset is a problem. +# This can be circumvented by escaping any of the letters of the command, usually the first. +# +# The COMMANDS_ variable contains the commands, space delimited (therefore the IFS), +# which are supposed to be "made safe". +IFS=" + " POSIXLY_CORRECT='1' -COMMANDS_='builtin unalias unset read printf command exit type .' +COMMANDS_='builtin unalias unset read printf command exit type . tr mkdir wc sed grep xargs ffmpeg jq wget' \unset -f -- ${COMMANDS_} \unalias -- ${COMMANDS_} 2>/dev/null || true -WHICH_='type -P' # helper for below - -# GNU coreutils -TR_="$(${WHICH_} tr)" -MKDIR_="$(${WHICH_} mkdir)" -WC_="$(${WHICH_} wc)" - -# GNU non-coreutils -SED_="$(${WHICH_} sed)" -GREP_="$(${WHICH_} grep)" -XARGS_="$(${WHICH_} xargs)" - -# third-party -FFMPEG_="$(${WHICH_} ffmpeg)" -JQ_="$(${WHICH_} jq)" -WGET_="$(${WHICH_} wget)" - - command -v -- wget >/dev/null 2>&1 || { printf '\033[31m%s\033[m\n' "Please install wget" exit 1 @@ -96,10 +75,10 @@ EOM API_URL="$( printf '%s' "${API_URL}" | - ${TR_} -d '\t' + tr -d '\t' )" -${MKDIR_} -p -- "${OUTDIR}" || { +mkdir -p -- "${OUTDIR}" || { printf '\033[31m%s\033[m\n' 'Aborting' exit 2 } @@ -108,12 +87,12 @@ ${MKDIR_} -p -- "${OUTDIR}" || { # https://www.ietf.org/rfc/rfc2396.txt # 2.4.3. Excluded US-ASCII Characters m3u8_manifest_urls_and_metadata="$( - ${WGET_} --no-verbose \ + wget --no-verbose \ --quiet \ --output-document - \ --header="authorization: Bearer ${BASE64_BEARER_TOKEN}" \ -- "${API_URL}" | - ${JQ_} --raw-output \ + jq --raw-output \ '.value[] | .name + ">" + .media.duration[2:] + ">" + (.playbackUrls | map(select(.mimeType == "application/vnd.apple.mpegurl")) | .[].playbackUrl)' )" @@ -123,7 +102,7 @@ m3u8_manifest_urls_and_metadata="$( } idx='1' -total="$(printf '%s\n' "${m3u8_manifest_urls_and_metadata}" | ${WC_} -l)" +total="$(printf '%s\n' "${m3u8_manifest_urls_and_metadata}" | wc -l)" IFS=' ' @@ -133,32 +112,22 @@ do ${m3u8_manifest_url_and_metadata} EOM - length="$(printf '%s' "${length}" | ${SED_} -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g')" - - m3u8_highest_res_url="$( - curl --silent -- "${m3u8_manifest_url}" | - tail -n1 | - rev | - cut -c3- | - rev | - cut -d'&' -f2 | - cut -c13- | - ${SED_} 's/keyframes/fragments/g' - )" + length="$(printf '%s' "${length}" | sed -e 's/\..*S/S/g' -e 's/M/:/g' -e 's/H/:/g')" filepath="${OUTDIR}/${title}.mp4" printf '\033[32m%s\033[m\n' "Download '${filepath}' (${idx}/${total})" - ${FFMPEG_} -hide_banner \ + ffmpeg -hide_banner \ -loglevel error \ -headers "authorization: Bearer ${BASE64_BEARER_TOKEN}" \ - -i "${m3u8_highest_res_url}" \ + -i "${m3u8_manifest_url}" \ -progress - \ - -c copy -- "${filepath}" | - ${GREP_} --line-buffered 'out_time=' | - ${SED_} -u -e 's/out_time=//g' -e 's/\..*//g' -e 's/^-//g' | - ${XARGS_} -I{} printf '%s\r' "{} of ${length} " + -c copy \ + -- "${filepath}" | + grep --line-buffered 'out_time=' | + sed -u -e 's/out_time=//g' -e 's/\..*//g' -e 's/^-//g' | + xargs -I{} printf '%s\r' "{} of ${length} " printf '\n\033[33m%s\033[m\n' "Done" @@ -166,19 +135,6 @@ do done unset -v POSIXLY_CORRECT \ -WHICH_ \ -EXIT_ \ -PRINTF_ \ -COMMAND_ \ -READ_ \ -TR_ \ -MKDIR_ \ -SED_ \ -GREP_ \ -WC_ \ -FFMPEG_ \ -JQ_ \ -WGET_ \ idx \ total \ API_URL \ @@ -190,7 +146,6 @@ m3u8_manifest_urls_and_metadata \ OUTDIR \ BASE64_BEARER_TOKEN -# \n cannnot be trailing character -IFS="$(printf ' \n\t')" - +IFS=" + " )