From f96888447d8122f0f01f066fd816df99e87ef806 Mon Sep 17 00:00:00 2001 From: Gabriel Arazas Date: Tue, 7 Sep 2021 17:54:23 +0800 Subject: [PATCH] Refactor the custom scripts A lot of them are reaching to be bigger so I've refactored them similarly to C codebases with the `main()` entrypoint. Apparently, this is how bigger shell scripts are written like Neofetch, pfetch, and some Kubernetes helper scripts. --- bin/bangs | 171 ++++++++++++++------------- bin/convert-newpipe-db | 28 ++++- bin/split-album | 260 +++++++++++++++++++++++------------------ 3 files changed, 253 insertions(+), 206 deletions(-) diff --git a/bin/bangs b/bin/bangs index 1ec8023..cae1181 100755 --- a/bin/bangs +++ b/bin/bangs @@ -38,8 +38,7 @@ HELP # Simply prints the given string into percent-encoded equivalent. # -# > urlencode "Hello world" -# Hello%20world +# `urlencode "Hello world"` will give "Hello%20world" # # Stolen from https://gist.github.com/cdown/1163649 and https://gist.github.com/cdown/1163649#gistcomment-1256298. # Just ported it in Oil script. @@ -65,100 +64,100 @@ proc warnf(format, @msg) { # The entry point of this program. proc main { - # Config-related variables. - # For now, there is no system-related config. - # This is primarily a user script, after all. :) - const config_dir = "${XDG_CONFIG_HOME:-"$HOME/.config"}/bangs" - const config_file = "${config_dir}/config.json" + # Config-related variables. + # For now, there is no system-related config. + # This is primarily a user script, after all. :) + const config_dir = "${XDG_CONFIG_HOME:-"$HOME/.config"}/bangs" + const config_file = "${config_dir}/config.json" - # Note you can configure these variables through the respective environment variables. - const bangs_prefix = "${BANGS_PREFIX:-~}" - const bangs_placeholder = "${BANGS_PLACEHOLDER:-{{{s}}}}" + # Note you can configure these variables through the respective environment variables. + const bangs_prefix = "${BANGS_PREFIX:-~}" + const bangs_placeholder = "${BANGS_PLACEHOLDER:-{{{s}}}}" - # These are the default bangs available. - # Bangs are any keys that shouldn't have whitespace characters. - # We'll use this in case there is no user configuration. - # - # We also made the default config to be more flexible with the placeholder. - const default_config = { - 'aw': { - 'name': 'Arch Wiki', - 'url': 'https://wiki.archlinux.org/index.php?title=Special%3ASearch&search=' + bangs_placeholder - }, - 'gh': { - 'name': 'GitHub', - 'url': 'https://github.com/search?utf8=%E2%9C%93&q=' + bangs_placeholder - }, - 'g': { - 'name': 'Google', - 'url': 'https://www.google.com/search?q=' + bangs_placeholder - }, - 'so': { - 'name': 'Stack Overflow', - 'url': 'http://stackoverflow.com/search?q=' + bangs_placeholder - }, - 'w': { - 'name': 'Wikipedia', - 'url': 'https://en.wikipedia.org/wiki/Special:Search?search=' + bangs_placeholder - } + # These are the default bangs available. + # Bangs are any keys that shouldn't have whitespace characters. + # We'll use this in case there is no user configuration. + # + # We also made the default config to be more flexible with the placeholder. + const default_config = { + 'aw': { + 'name': 'Arch Wiki', + 'url': 'https://wiki.archlinux.org/index.php?title=Special%3ASearch&search=' + bangs_placeholder + }, + 'gh': { + 'name': 'GitHub', + 'url': 'https://github.com/search?utf8=%E2%9C%93&q=' + bangs_placeholder + }, + 'g': { + 'name': 'Google', + 'url': 'https://www.google.com/search?q=' + bangs_placeholder + }, + 'so': { + 'name': 'Stack Overflow', + 'url': 'http://stackoverflow.com/search?q=' + bangs_placeholder + }, + 'w': { + 'name': 'Wikipedia', + 'url': 'https://en.wikipedia.org/wiki/Special:Search?search=' + bangs_placeholder + } + } + + # Setting up some variables. + const bangs_format = / %start $bangs_prefix !space+ %end / + const valid_bangs = %() + const search_query = %() + + # Config file detection. + # Otherwise, we'll just use the default config. + if test -f $config_file { + json read :bangs < $config_file + } else { + var bangs = default_config + } + + # Show the usage when no arguments was given like any sane program. + if (len(ARGV) == 0) { + usage + exit 0 + } + + # Filter out the bangs from the search query. + # The bangs are just words prefixed with a certain sequence of characters. + # We put both bangs and the search query in separate arrays for easier processing. + # E.g., in the search query `hello ~ddg world ~g`, `~ddg~ and `~g` are the bangs. + for i in @ARGV { + # If the argument is not a bang, append in the search query queue. + write -- $i | rg --quiet $bangs_format || { + append :search_query $i + continue } - # Setting up some variables. - const bangs_format = / %start $bangs_prefix !space+ %end / - const valid_bangs = %() - const search_query = %() - - # Config file detection. - # Otherwise, we'll just use the default config. - if test -f $config_file { - json read :bangs < $config_file + # Otherwise, put it in the bangs array. + # Keep in mind, we do throw out bangs that are not in the bangs database. + var bang = $(write -- $i | sed --regexp-extended --expression "s/^${bangs_prefix}//") + if (bang in bangs) { + append :valid_bangs $bang + warnf "%s will be used to search." $bang } else { - var bangs = default_config + warnf "%s is not found in the database." $bang } + } - # Show the usage when no arguments was given like any sane program. - if (len(ARGV) == 0) { - usage - exit 0 - } + # Encode the query for a consistent formatting. + # Even though this script is in Oil where it has less problems with splitting strings, we still might want to encode the query for a good measure. + var query = join(search_query, " ") + var encoded_query = $(urlencode $query) - # Filter out the bangs from the search query. - # The bangs are just words prefixed with a certain sequence of characters. - # We put both bangs and the search query in separate arrays for easier processing. - # E.g., in the search query `hello ~ddg world ~g`, `~ddg~ and `~g` are the bangs. - for i in @ARGV { - # If the argument is not a bang, append in the search query queue. - write -- $i | rg --quiet $bangs_format || { - append :search_query $i - continue - } + warnf "Search query is '%s'" $query + warnf "Encoded form is '%s'" $encoded_query - # Otherwise, put it in the bangs array. - # Keep in mind, we do throw out bangs that are not in the bangs database. - var bang = $(write -- $i | sed --regexp-extended --expression "s/^${bangs_prefix}//") - if (bang in bangs) { - append :valid_bangs $bang - warnf "%s will be used to search." $bang - } else { - warnf "%s is not found in the database." $bang - } - } + # Search the query with the given bangs. + for bang in @valid_bangs { + var metadata = bangs[bang] + var url = $(write -- ${metadata['url']} | sed --expression "s/${bangs_placeholder}/${encoded_query}/") - # Encode the query for a consistent formatting. - # Even though this script is in Oil where it has less problems with splitting strings, we still might want to encode the query for a good measure. - var query = join(search_query, " ") - var encoded_query = $(urlencode $query) - - warnf "Search query is '%s'" $query - warnf "Encoded form is '%s'" $encoded_query - - # Search the query with the given bangs. - for bang in @valid_bangs { - var metadata = bangs[bang] - var url = $(write -- ${metadata['url']} | sed --expression "s/${bangs_placeholder}/${encoded_query}/") - - handlr open $url - } + handlr open $url + } } main @ARGV diff --git a/bin/convert-newpipe-db b/bin/convert-newpipe-db index f18742c..777ffc2 100755 --- a/bin/convert-newpipe-db +++ b/bin/convert-newpipe-db @@ -23,11 +23,16 @@ var channel_id_eggex = / 'https://www.youtube.com/channel/' (word) / var NEWPIPE_DB = "newpipe.db" var TEMP_FOLDER_NAME = "newpipe" -var NEWPIPE_DB_QUERY = "SELECT name, url, service_id, group_concat(tag, ',') AS tags FROM (SELECT subscriptions.name, subscriptions.url, subscriptions.service_id, '/' || feed_group.name AS tag -FROM subscriptions -LEFT JOIN feed_group_subscription_join AS subs_join -LEFT JOIN feed_group -ON subs_join.subscription_id = subscriptions.uid AND feed_group.uid = subs_join.group_id) GROUP BY name ORDER BY name COLLATE NOCASE;" + +# The SQL query to get the required metadata. +var NEWPIPE_DB_QUERY = " + SELECT name, url, service_id, group_concat(tag, ',') AS tags FROM ( + SELECT subscriptions.name, subscriptions.url, subscriptions.service_id, '/' || feed_group.name AS tag + FROM subscriptions + LEFT JOIN feed_group_subscription_join AS subs_join + LEFT JOIN feed_group + ON subs_join.subscription_id = subscriptions.uid AND feed_group.uid = subs_join.group_id + ) GROUP BY name ORDER BY name COLLATE NOCASE;" # Print the beginning of the template. cat <&2 printf "${msg}\\n" @args @@ -167,62 +105,149 @@ proc kebab-case(word) { write -- $word | sed --regexp-extended --expression 's/./\L&/g' --expression 's/\s+/-/g' --expression 's/[^a-z0-9-]//g' --expression 's/^-+|-+$//g' --expression 's/-+/-/g' } -test -f $audio_file || errorf '%s is not a regular file' $audio_file -test -f $timestamp_file || errorf '%s is not a regular file' $timestamp_file +proc main { + # This could be configured by setting the 'EXTENSION' environment variable. + const EXTENSION = ${EXTENSION:-"opus"} -test $album || prompt "What is the title of the album?" :album -test $author || prompt "Who is the author of the album?" :author -test $pub_date || prompt "When is the album published?" :pub_date + # Set up the variables. + var audio_file = '' + var timestamp_file = '' -const output_data = {} -setvar output_data['file'] = $audio_file -setvar output_data['chapters'] = [] -setvar output_data['album'] = $album -setvar output_data['author'] = $author -setvar output_data['date'] = $pub_date -setvar output_data['extension'] = $EXTENSION + var album = '' + var author = '' + var pub_date = '' -const timestamp_regex = / %start digit{2,} ':' digit{2} ':' digit{2} <'.' digit+>? %end / -var has_error = false + var prints_json = false + var strict_mode = false + var skip = false -# Deserialize the given input into the chapters data. -case $(file --mime-type --brief $timestamp_file) { + # Parse the arguments. + while test $len(ARGV) -gt 0 { + case $[ARGV[0]] { + -h|--help) + write -- $show_help + exit + ;; + --tutorial) + write -- $show_descriptive_help + exit + ;; + --audio-file) + setvar audio_file = ARGV[1] + shift 2 + ;; + --timestamp-file) + setvar timestamp_file = ARGV[1] + shift 2 + ;; + -a|--author) + setvar author = ARGV[1] + shift 2 + ;; + -d|--date) + setvar pub_date = ARGV[1] + shift 2 + ;; + -t|--title) + setvar album = ARGV[1] + shift 2 + ;; + --strict) + setvar strict_mode = true + shift + ;; + --skip) + setvar skip = true + shift + ;; + --json) + setvar prints_json = true + shift + ;; + *) + setvar audio_file = ARGV[0] + setvar timestamp_file = ARGV[1] + shift 2 + ;; + } + } + + # Check the files if it is valid. + test -f $audio_file || errorf '%s is not a regular file' $audio_file + test -f $timestamp_file || errorf '%s is not a regular file' $timestamp_file + + # Prompt for the missing values if not passed from the command line. + test $album || prompt "What is the title of the album?" :album + test $author || prompt "Who is the author of the album?" :author + test $pub_date || prompt "When is the album published?" :pub_date + + # Populate the output data. + # This is going to be used throughout the processing. + # Additionally, the object will be printed when `--json` flag is passed. + const output_data = {} + setvar output_data['file'] = $audio_file + setvar output_data['chapters'] = [] + setvar output_data['album'] = $album + setvar output_data['author'] = $author + setvar output_data['date'] = $pub_date + setvar output_data['extension'] = $EXTENSION + + # The following variable stores an eggex, a simplified notation for regular expressions. + # Pretty nice to use especially that literals are quoted and classes are not. + const timestamp_regex = / %start digit{2,} ':' digit{2} ':' digit{2} <'.' digit+>? %end / + + # We'll keep track whether the pipeline has encountered an error. + # If it has, the script will exit throughout various points of the process. + var has_error = false + + # Deserialize the given input into the chapters data. + # This script accept several formats from a JSON file to a plain-text file derived from Luke Smith's 'booksplit' script. + case $(file --mime-type --brief $timestamp_file) { "application/json") - json read :chapters < $timestamp_file - setvar output_data['chapters'] = chapters - ;; + json read :chapters < $timestamp_file + setvar output_data['chapters'] = chapters + ;; - # Also cleans up the timestamp file with comments (i.e., lines starting with '#') and empty lines allowing for more commenting options. - # I just want to improve the timestamp format a little bit. + # The text file is formatted quite similarly to the required format from the booksplit script. + # I improved some things in the format such as allowing comments (i.e., lines starting with '#') and empty lines allowing for cleaner input. "text/plain") - sed --regexp-extended --expression '/^\s*$/d' --expression '/^#/d' $timestamp_file | while read --line { - var chapter = {} - setvar chapter['title'] = $(write -- $_line | cut -d' ' -f2-) - setvar chapter['timestamp'] = $(write -- $_line | cut -d' ' -f1) + sed --regexp-extended --expression '/^\s*$/d' --expression '/^#/d' $timestamp_file | while read --line { + # We'll build the chapter data to be added later to the output data. + var chapter = {} + setvar chapter['title'] = $(write -- $_line | cut -d' ' -f2-) + setvar chapter['timestamp'] = $(write -- $_line | cut -d' ' -f1) - write -- ${chapter['timestamp']} | rg --quiet $timestamp_regex || { - warnf "'%s' %s is not a valid timestamp" ${chapter['timestamp']} ${chapter['title']} - setvar has_error = true - continue - } - - _ output_data['chapters'].append(chapter) + # Mark the input to be erreneous if the timestamp format is not valid. + # This will cause the script to exit in the next part of the process. + # We won't be exiting immediately to give all possible errors. + write -- ${chapter['timestamp']} | rg --quiet $timestamp_regex || { + warnf "'%s' %s is not a valid timestamp" ${chapter['timestamp']} ${chapter['title']} + setvar has_error = true + continue } - ;; -} -if (strict_mode and has_error) { exit 1 } + _ output_data['chapters'].append(chapter) + } + ;; + } -const title_slug = $(kebab-case $album) -setvar output_data['directory'] = $(realpath --canonicalize-missing $title_slug) -mkdir -p $title_slug + # Exit if the script is set as strict and has erreneous input. + # If the user cares about the input, they have to set it to strict mode. + if (strict_mode and has_error) { exit 1 } -# Rather than sequentially segmenting the audio, we'll extract the starting and ending timestamps of each segment then feed it to a job queue that can execute jobs in parallel. -# Take note we don't have the ending timestamp of each segment so we need a way to look back into items. -const chapter_len = len(output_data['chapters']) -var job_queue = %() + # Set parts of the output data and prepare for the splitting process. + const title_slug = $(kebab-case $album) + setvar output_data['directory'] = $(realpath --canonicalize-missing $title_slug) + mkdir -p $title_slug -for index in @(seq $[chapter_len]) { + # Rather than sequentially segmenting the audio, we'll extract the starting and ending timestamps of each segment then feed it to a job queue that can execute jobs in parallel. + # Take note we don't have the ending timestamp of each segment so we need a way to look back into items. + const chapter_len = len(output_data['chapters']) + var job_queue = %() + + # Iterate through the chapters and populate the job queue. + # We'll also fill up the rest of the chapter-related data into the output data. + for index in @(seq $[chapter_len]) { var index = Int(index) var chapter = output_data['chapters'][index - 1] var start = chapter['timestamp'] @@ -230,17 +255,24 @@ for index in @(seq $[chapter_len]) { var filename = $(printf "%.2d-%s.%s" $index $(kebab-case ${chapter['title']}) $EXTENSION) setvar output_data['chapters'][index - 1]['file'] = filename - # Check for incorrect timestamp order. + # Check for incorrect timestamp order and set the pipeline as erreneous if it is. + # We can't let the splitting process proceed since it will surely make problematic output. if (start > end and end is not null) { - warnf '%s (start) is ahead compared to %s (end)' $start $end - setvar has_error = true + warnf '%s (start) is ahead compared to %s (end)' $start $end + setvar has_error = true } append :job_queue ">&2 printf '[%d/%d] %s\\n' $[index] $[chapter_len] \"$[output_data['chapters'][index - 1]['title']]\"; ffmpeg -loglevel quiet -nostdin -i '${audio_file}' -ss ${start} $['-to ' + end if index != chapter_len else ''] ${title_slug}/${filename}" + } + + # Exit the process if an error detected. + if (has_error) { exit 1 } + + # Start the splitting process if the `--skip` is absent. + if (not skip) { parallel -- @job_queue } + + # Print the output data as JSON if the `--json` flag is passed. + if (prints_json) { json write :output_data } } -if (has_error) { exit 1 } - -if (not skip) { parallel -- @job_queue } -if (prints_json) { json write :output_data } - +main @ARGV