Create a split album script in Oil

2025-01-30 22:57:54 +00:00 · 2021-04-12 21:50:43 +08:00 · 2021-04-12 21:50:43 +08:00 · 37036a1e2c
commit 37036a1e2c
parent 90be31cc52
1 changed files with 222 additions and 0 deletions
--- a/bin/split-album
+++ b/bin/split-album
@ -0,0 +1,222 @@
+#!/usr/bin/env oil
+
+# The following are the dependencies for this script.
+#
+# * Oil shell v0.8.8
+# * coreutils v8.32
+# * file 5-32
+# * ffmpeg v4.3.1
+# * ripgrep v12.1.1 with SIMD runtime
+
+shopt --set strict:all
+
+const show_help = "A small script for splitting files into tracks, perfect for full album releases and audiobooks.
+
+split-album [options...] -tf \$TIMESTAMP_FILE -af \$ALBUM_FILE
+
+Options:
+    -h, --help                      Show the help section.
+    --descriptive-help              Show a helpful tutorial-esque description of the program.
+    -af, --audio-file [file]        Set the audio file to be split.
+    -tf, --timestamp-file [file]    Set the timestamp file to be used for splitting.
+    -t, --title [title]             Set the title of the album.
+    -d, --date [date]               Set the publication date of the album.
+    -a, --author [author]           Set the author of the album.
+    --json                          Print the JSON data for use with other applications.
+    --strict                        Set to be strict, exiting when an invalid timestamp is encountered.
+
+Environment variables:
+    EXTENSION                       The extension to be used. This is used in conjunction with FFmpeg
+                                    where it can detect the codec to be converted automatically with it.
+
+When any of the required metadata is missing (i.e., title, date, author), it will be prompted.
+"
+
+const show_descriptive_help = "Split an album with a timestamp file.
+The timestamp file contains a starting timestamp (any format that ffmpeg accepts in seeking but it is recommended in HH:MM:SS) and the title of the chapter/track.
+For more information, see https://trac.ffmpeg.org/wiki/Seeking.
+Lines starting with '#' and empty lines will be ignored.
+
+The following is an example of the content of a timestamp file.
+
+\`\`\`
+00:00:00 Introduction
+00:03:54 It's the R-rated scene
+00:05:43 Boring exposition at the cafe
+00:16:54 Expedition time
+00:21:51 Fighting time
+00:22:22 Shower scene
+\`\`\`
+
+You can also create a timestamp file in JSON format.
+It is the equivalent of '.chapters' from the JSON output (i.e., '--json').
+The equivalent JSON of the previous example would be this:
+
+\`\`\`
+[
+  {
+    \"timestamp\": \"00:00:00\",
+    \"title\": \"Introduction\"
+  },
+  {
+    \"timestamp\": \"00:03:54\",
+    \"title\": \"It's the R-rated scene\"
+  },
+  {
+    \"timestamp\": \"00:05:43\",
+    \"title\": \"Boring exposition at the cafe\"
+  },
+  {
+    \"timestamp\": \"00:16:54\",
+    \"title\": \"Expedition time\"
+  },
+  {
+    \"timestamp\": \"00:21:51\",
+    \"title\": \"Fighting time\"
+  },
+  {
+    \"timestamp\": \"00:22:22\",
+    \"title\": \"Shower scene\"
+  }
+]
+\`\`\`
+
+There will be a folder created with the safe name of the album (in kebab-case) containing the split tracks.
+The original file will be kept, do what you want with it.
+"
+
+const EXTENSION = ${EXTENSION:-"ogg"}
+
+var audio_file = ''
+var timestamp_file = ''
+
+var album = ''
+var author = ''
+var pub_date = ''
+
+var prints_json = false
+var strict_mode = false
+
+while test $len(ARGV) -gt 0 {
+  case $[ARGV[0]] {
+    -h|--help)
+        write -- $show_help
+        exit
+        ;;
+    --descriptive-help)
+        write -- $show_descriptive_help
+        exit
+        ;;
+    -af|--audio-file)
+        set audio_file = $2
+        shift 2
+        ;;
+    -tf|--timestamp-file)
+        set timestamp_file = $2
+        shift 2
+        ;;
+    -a|--author)
+        set author = $2
+        shift 2
+        ;;
+    -d|--date)
+        set pub_date = $2
+        shift 2
+        ;;
+    -t|--title)
+        set album = $2
+        shift 2
+        ;;
+    --strict)
+        set strict_mode = true
+        shift
+        ;;
+    --json)
+        set prints_json = true
+        shift
+        ;;
+    *)
+        shift
+        ;;
+    }
+}
+
+proc warnf(msg, @args) {
+    >&2 printf "${msg}\\n" @args
+}
+
+proc errorf(msg, @args) {
+    >&2 printf "${msg}\\n" @args
+    exit 1
+}
+
+proc prompt(msg, :out, prefix = ">> ") {
+    >&2 printf '%s\n%s' $msg $prefix
+    read --line
+    setref out = $_line
+}
+
+proc kebab-case(word) {
+    write -- $word | sed --regexp-extended --expression 's/./\L&/g' --expression 's/\s+/-/g' --expression 's/[^.a-z0-9-]//g' --expression 's/^-+|-+$//g' --expression 's/-+/-/g'
+}
+
+test -f $audio_file || errorf '%s is not a regular file' $audio_file
+test -f $timestamp_file || errorf '%s is not a regular file' $timestamp_file
+
+test $album || prompt "What is the title of the album?" :album
+test $author || prompt "Who is the author of the album?" :author
+test $pub_date || prompt "When is the album published?" :pub_date
+
+const output_data = {}
+set output_data['file'] = $audio_file
+set output_data['chapters'] = []
+
+const timestamp_regex = / %start digit{2,} ':' digit{2} ':' digit{2} %end /
+var has_error = false
+
+# Also cleans up the timestamp file with comments and empty lines.
+# I just want to improve the timestamp format (a little bit).
+case $(file --mime-type --brief $timestamp_file) {
+    "application/json")
+        json read :output_data['chapters'] < $audio_file
+        ;;
+    "text/plain")
+        cat $timestamp_file | sed --regexp-extended --expression '/^\s*$/d' --expression '/^#/d' | while read --line {
+            var chapter = {}
+            set chapter['title'] = $(write -- $_line | cut -d' ' -f2-)
+            set chapter['timestamp'] = $(write -- $_line | cut -d' ' -f1)
+        
+            write -- ${chapter['timestamp']} | rg --quiet $timestamp_regex || {
+                warnf "'%s' %s is not a valid timestamp" ${chapter['timestamp']} ${chapter['title']}
+                set has_error = true
+                continue
+            }
+        
+            _ output_data['chapters'].append(chapter)
+        }
+        ;;
+}
+
+if (strict_mode and has_error) { exit 1 }
+
+const title_slug = $(kebab-case $album)
+mkdir -p $title_slug
+
+const chapter_len = len(output_data['chapters'])
+
+for index in @(seq $[chapter_len]) {
+    set chapter = output_data['chapters'][Int(index) - 1]
+    var start = chapter['timestamp']
+    var end = output_data['chapters'][Int(index)]['timestamp'] if Int(index) != chapter_len else null
+    var filename = $(printf "%s/%.2d-%s.%s" $title_slug $index $(kebab-case ${chapter['title']}) $EXTENSION)
+
+    warnf "[%d/%d] %s\\r" ${index} ${chapter_len} ${chapter['title']}
+    if (Int(index) != chapter_len) {
+        ffmpeg -loglevel quiet -nostdin -i $audio_file -ss $start -to $end $filename
+    } else {
+        ffmpeg -loglevel quiet -nostdin -i $audio_file -ss $start $filename
+    }
+}
+
+if (prints_json) { json write :output_data }
+