dotfiles/bin/split-album

#!/usr/bin/env oil

# The following are the dependencies for this script.
#
# * Oil shell v0.8.8
# * coreutils v8.32
# * file 5-32
# * ffmpeg v4.3.1
# * ripgrep v12.1.1

shopt --set strict:all

const show_help = "A small script for splitting files into tracks, perfect for full album releases and audiobooks.

split-album [options...] -tf \$TIMESTAMP_FILE -af \$ALBUM_FILE

Options:
    -h, --help                      Show the help section.
    --tutorial                      Show a helpful tutorial-esque description of the program.
    -af, --audio-file [file]        Set the audio file to be split.
    -tf, --timestamp-file [file]    Set the timestamp file to be used for splitting.
    -t, --title [title]             Set the title of the album.
    -d, --date [date]               Set the publication date of the album.
    -a, --author [author]           Set the author of the album.
    --json                          Print the JSON data for use with other applications.
    --strict                        Set to be strict, exiting when an invalid timestamp is encountered.

Environment variables:
    EXTENSION                       The extension to be used. This is used in conjunction with FFmpeg
                                    where it can detect the codec to be converted automatically with it.

When any of the required metadata is missing (i.e., title, date, author), it will be prompted.
"

const show_descriptive_help = "This script splits an album with a timestamp file.
You're always going to see using this script like the following:

split-album -af \$AUDIO_FILE -tf \$TIMESTAMP_FILE

The timestamp file contains a starting timestamp (in HH:MM:SS[.MS]) and the title of the chapter/track.
For more information, see https://trac.ffmpeg.org/wiki/Seeking.
Lines starting with '#' and empty lines will be ignored.
It's for the best and you don't want some future migraines. :)

The following is an example of the content of a timestamp file.

\`\`\`
00:00:00 Introduction
00:03:54 It's the R-rated scene
00:25:43 Boring exposition at the cafe
00:36:54 Expedition time
00:41:51 Fighting time
00:42:22 Shower scene
\`\`\`

You can also create a timestamp file in JSON format.
It is the equivalent of the 'chapters' key from the JSON output (i.e., '--json').
The equivalent JSON of the previous example would be this:

\`\`\`
[
  {
    \"timestamp\": \"00:00:00\",
    \"title\": \"Introduction\"
  },
  {
    \"timestamp\": \"00:03:54\",
    \"title\": \"It's the R-rated scene\"
  },
  {
    \"timestamp\": \"00:25:43\",
    \"title\": \"Boring exposition at the cafe\"
  },
  {
    \"timestamp\": \"00:36:54\",
    \"title\": \"Expedition time\"
  },
  {
    \"timestamp\": \"00:41:51\",
    \"title\": \"Fighting time\"
  },
  {
    \"timestamp\": \"00:42:22\",
    \"title\": \"Shower scene\"
  }
]
\`\`\`

There will be a folder created with the safe name of the album (in kebab-case) containing the split tracks.
The original file will be kept, do what you want with it.
"

const EXTENSION = ${EXTENSION:-"opus"}

var audio_file = ''
var timestamp_file = ''

var album = ''
var author = ''
var pub_date = ''

var prints_json = false
var strict_mode = false

while test $len(ARGV) -gt 0 {
  case $[ARGV[0]] {
    -h|--help)
        write -- $show_help
        exit
        ;;
    --tutorial)
        write -- $show_descriptive_help
        exit
        ;;
    -af|--audio-file)
        set audio_file = $2
        shift 2
        ;;
    -tf|--timestamp-file)
        set timestamp_file = $2
        shift 2
        ;;
    -a|--author)
        set author = $2
        shift 2
        ;;
    -d|--date)
        set pub_date = $2
        shift 2
        ;;
    -t|--title)
        set album = $2
        shift 2
        ;;
    --strict)
        set strict_mode = true
        shift
        ;;
    --json)
        set prints_json = true
        shift
        ;;
    *)
        shift
        ;;
    }
}

proc warnf(msg, @args) {
    >&2 printf "${msg}\\n" @args
}

proc errorf(msg, @args) {
    >&2 printf "${msg}\\n" @args
    exit 1
}

proc prompt(msg, :out, prefix = ">> ") {
    >&2 printf '%s\n%s' $msg $prefix
    read --line
    setref out = $_line
}

proc kebab-case(word) {
    write -- $word | sed --regexp-extended --expression 's/./\L&/g' --expression 's/\s+/-/g' --expression 's/[^.a-z0-9-]//g' --expression 's/^-+|-+$//g' --expression 's/-+/-/g'
}

test -f $audio_file || errorf '%s is not a regular file' $audio_file
test -f $timestamp_file || errorf '%s is not a regular file' $timestamp_file

test $album || prompt "What is the title of the album?" :album
test $author || prompt "Who is the author of the album?" :author
test $pub_date || prompt "When is the album published?" :pub_date

const output_data = {}
set output_data['file'] = $audio_file
set output_data['chapters'] = []
set output_data['album'] = $album
set output_data['author'] = $author
set output_data['date'] = $pub_date
set output_data['extension'] = $EXTENSION

const timestamp_regex = / %start digit{2,} ':' digit{2} ':' digit{2} <'.' digit+>? %end /
var has_error = false

case $(file --mime-type --brief $timestamp_file) {
    "application/json")
        json read :chapters < $timestamp_file
        set output_data['chapters'] = chapters
        ;;

    # Also cleans up the timestamp file with comments and empty lines.
    # I just want to improve the timestamp format (a little bit).
    "text/plain")
        cat $timestamp_file | sed --regexp-extended --expression '/^\s*$/d' --expression '/^#/d' | while read --line {
            var chapter = {}
            set chapter['title'] = $(write -- $_line | cut -d' ' -f2-)
            set chapter['timestamp'] = $(write -- $_line | cut -d' ' -f1)
        
            write -- ${chapter['timestamp']} | rg --quiet $timestamp_regex || {
                warnf "'%s' %s is not a valid timestamp" ${chapter['timestamp']} ${chapter['title']}
                set has_error = true
                continue
            }
        
            _ output_data['chapters'].append(chapter)
        }
        ;;
}

if (strict_mode and has_error) { exit 1 }

const title_slug = $(kebab-case $album)
set output_data['directory'] = title_slug
mkdir -p $title_slug

const chapter_len = len(output_data['chapters'])

for index in @(seq $[chapter_len]) {
    set chapter = output_data['chapters'][Int(index) - 1]
    var start = chapter['timestamp']
    var end = output_data['chapters'][Int(index)]['timestamp'] if Int(index) != chapter_len else null
    var filename = $(printf "%.2d-%s.%s" $index $(kebab-case ${chapter['title']}) $EXTENSION)
    set output_data['chapters'][Int(index) - 1]['file'] = filename

    warnf "[%d/%d] %s" ${index} ${chapter_len} ${chapter['title']}
    if (Int(index) != chapter_len) {
        ffmpeg -loglevel quiet -nostdin -i $audio_file -ss $start -to $end "${title_slug}/${filename}"
    } else {
        ffmpeg -loglevel quiet -nostdin -i $audio_file -ss $start "${title_slug}/${filename}"
    }
}

if (prints_json) { json write :output_data }
Create a split album script in Oil 2021-04-12 13:50:43 +00:00			`#!/usr/bin/env oil`

			`# The following are the dependencies for this script.`
			`#`
			`# * Oil shell v0.8.8`
			`# * coreutils v8.32`
			`# * file 5-32`
			`# * ffmpeg v4.3.1`
Revise album-related scripts 2021-04-16 15:25:24 +00:00			`# * ripgrep v12.1.1`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00
			`shopt --set strict:all`

			`const show_help = "A small script for splitting files into tracks, perfect for full album releases and audiobooks.`

			`split-album [options...] -tf \$TIMESTAMP_FILE -af \$ALBUM_FILE`

			`Options:`
			`-h, --help Show the help section.`
Revise album-related scripts 2021-04-16 15:25:24 +00:00			`--tutorial Show a helpful tutorial-esque description of the program.`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00			`-af, --audio-file [file] Set the audio file to be split.`
			`-tf, --timestamp-file [file] Set the timestamp file to be used for splitting.`
			`-t, --title [title] Set the title of the album.`
			`-d, --date [date] Set the publication date of the album.`
			`-a, --author [author] Set the author of the album.`
			`--json Print the JSON data for use with other applications.`
			`--strict Set to be strict, exiting when an invalid timestamp is encountered.`

			`Environment variables:`
			`EXTENSION The extension to be used. This is used in conjunction with FFmpeg`
			`where it can detect the codec to be converted automatically with it.`

			`When any of the required metadata is missing (i.e., title, date, author), it will be prompted.`
			`"`

Revise album-related scripts 2021-04-16 15:25:24 +00:00			`const show_descriptive_help = "This script splits an album with a timestamp file.`
			`You're always going to see using this script like the following:`

			`split-album -af \$AUDIO_FILE -tf \$TIMESTAMP_FILE`

			`The timestamp file contains a starting timestamp (in HH:MM:SS[.MS]) and the title of the chapter/track.`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00			`For more information, see https://trac.ffmpeg.org/wiki/Seeking.`
			`Lines starting with '#' and empty lines will be ignored.`
Revise album-related scripts 2021-04-16 15:25:24 +00:00			`It's for the best and you don't want some future migraines. :)`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00
			`The following is an example of the content of a timestamp file.`

			\`\`\`
			`00:00:00 Introduction`
			`00:03:54 It's the R-rated scene`
Revise album-related scripts 2021-04-16 15:25:24 +00:00			`00:25:43 Boring exposition at the cafe`
			`00:36:54 Expedition time`
			`00:41:51 Fighting time`
			`00:42:22 Shower scene`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00			\`\`\`

			`You can also create a timestamp file in JSON format.`
Revise album-related scripts 2021-04-16 15:25:24 +00:00			`It is the equivalent of the 'chapters' key from the JSON output (i.e., '--json').`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00			`The equivalent JSON of the previous example would be this:`

			\`\`\`
			`[`
			`{`
			`\"timestamp\": \"00:00:00\",`
			`\"title\": \"Introduction\"`
			`},`
			`{`
			`\"timestamp\": \"00:03:54\",`
			`\"title\": \"It's the R-rated scene\"`
			`},`
			`{`
Revise album-related scripts 2021-04-16 15:25:24 +00:00			`\"timestamp\": \"00:25:43\",`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00			`\"title\": \"Boring exposition at the cafe\"`
			`},`
			`{`
Revise album-related scripts 2021-04-16 15:25:24 +00:00			`\"timestamp\": \"00:36:54\",`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00			`\"title\": \"Expedition time\"`
			`},`
			`{`
Revise album-related scripts 2021-04-16 15:25:24 +00:00			`\"timestamp\": \"00:41:51\",`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00			`\"title\": \"Fighting time\"`
			`},`
			`{`
Revise album-related scripts 2021-04-16 15:25:24 +00:00			`\"timestamp\": \"00:42:22\",`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00			`\"title\": \"Shower scene\"`
			`}`
			`]`
			\`\`\`

			`There will be a folder created with the safe name of the album (in kebab-case) containing the split tracks.`
			`The original file will be kept, do what you want with it.`
			`"`

Revise album-related scripts 2021-04-16 15:25:24 +00:00			`const EXTENSION = ${EXTENSION:-"opus"}`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00
			`var audio_file = ''`
			`var timestamp_file = ''`

			`var album = ''`
			`var author = ''`
			`var pub_date = ''`

			`var prints_json = false`
			`var strict_mode = false`

			`while test $len(ARGV) -gt 0 {`
			`case $[ARGV[0]] {`
			`-h\|--help)`
			`write -- $show_help`
			`exit`
			`;;`
Revise album-related scripts 2021-04-16 15:25:24 +00:00			`--tutorial)`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00			`write -- $show_descriptive_help`
			`exit`
			`;;`
			`-af\|--audio-file)`
			`set audio_file = $2`
			`shift 2`
			`;;`
			`-tf\|--timestamp-file)`
			`set timestamp_file = $2`
			`shift 2`
			`;;`
			`-a\|--author)`
			`set author = $2`
			`shift 2`
			`;;`
			`-d\|--date)`
			`set pub_date = $2`
			`shift 2`
			`;;`
			`-t\|--title)`
			`set album = $2`
			`shift 2`
			`;;`
			`--strict)`
			`set strict_mode = true`
			`shift`
			`;;`
			`--json)`
			`set prints_json = true`
			`shift`
			`;;`
			`*)`
			`shift`
			`;;`
			`}`
			`}`

			`proc warnf(msg, @args) {`
			`>&2 printf "${msg}\\n" @args`
			`}`

			`proc errorf(msg, @args) {`
			`>&2 printf "${msg}\\n" @args`
			`exit 1`
			`}`

			`proc prompt(msg, :out, prefix = ">> ") {`
			`>&2 printf '%s\n%s' $msg $prefix`
			`read --line`
			`setref out = $_line`
			`}`

			`proc kebab-case(word) {`
			`write -- $word \| sed --regexp-extended --expression 's/./\L&/g' --expression 's/\s+/-/g' --expression 's/[^.a-z0-9-]//g' --expression 's/^-+\|-+$//g' --expression 's/-+/-/g'`
			`}`

			`test -f $audio_file \|\| errorf '%s is not a regular file' $audio_file`
			`test -f $timestamp_file \|\| errorf '%s is not a regular file' $timestamp_file`

			`test $album \|\| prompt "What is the title of the album?" :album`
			`test $author \|\| prompt "Who is the author of the album?" :author`
			`test $pub_date \|\| prompt "When is the album published?" :pub_date`

			`const output_data = {}`
			`set output_data['file'] = $audio_file`
			`set output_data['chapters'] = []`
Revise album-related scripts 2021-04-16 15:25:24 +00:00			`set output_data['album'] = $album`
			`set output_data['author'] = $author`
			`set output_data['date'] = $pub_date`
			`set output_data['extension'] = $EXTENSION`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00
Revise album-related scripts 2021-04-16 15:25:24 +00:00			`const timestamp_regex = / %start digit{2,} ':' digit{2} ':' digit{2} <'.' digit+>? %end /`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00			`var has_error = false`

			`case $(file --mime-type --brief $timestamp_file) {`
			`"application/json")`
Revise album-related scripts 2021-04-16 15:25:24 +00:00			`json read :chapters < $timestamp_file`
			`set output_data['chapters'] = chapters`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00			`;;`
Revise album-related scripts 2021-04-16 15:25:24 +00:00
			`# Also cleans up the timestamp file with comments and empty lines.`
			`# I just want to improve the timestamp format (a little bit).`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00			`"text/plain")`
			`cat $timestamp_file \| sed --regexp-extended --expression '/^\s*$/d' --expression '/^#/d' \| while read --line {`
			`var chapter = {}`
			`set chapter['title'] = $(write -- $_line \| cut -d' ' -f2-)`
			`set chapter['timestamp'] = $(write -- $_line \| cut -d' ' -f1)`

			`write -- ${chapter['timestamp']} \| rg --quiet $timestamp_regex \|\| {`
			`warnf "'%s' %s is not a valid timestamp" ${chapter['timestamp']} ${chapter['title']}`
			`set has_error = true`
			`continue`
			`}`

			`_ output_data['chapters'].append(chapter)`
			`}`
			`;;`
			`}`

			`if (strict_mode and has_error) { exit 1 }`

			`const title_slug = $(kebab-case $album)`
Revise album-related scripts 2021-04-16 15:25:24 +00:00			`set output_data['directory'] = title_slug`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00			`mkdir -p $title_slug`

			`const chapter_len = len(output_data['chapters'])`

			`for index in @(seq $[chapter_len]) {`
			`set chapter = output_data['chapters'][Int(index) - 1]`
			`var start = chapter['timestamp']`
			`var end = output_data['chapters'][Int(index)]['timestamp'] if Int(index) != chapter_len else null`
Revise album-related scripts 2021-04-16 15:25:24 +00:00			`var filename = $(printf "%.2d-%s.%s" $index $(kebab-case ${chapter['title']}) $EXTENSION)`
			`set output_data['chapters'][Int(index) - 1]['file'] = filename`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00
Revise album-related scripts 2021-04-16 15:25:24 +00:00			`warnf "[%d/%d] %s" ${index} ${chapter_len} ${chapter['title']}`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00			`if (Int(index) != chapter_len) {`
Revise album-related scripts 2021-04-16 15:25:24 +00:00			`ffmpeg -loglevel quiet -nostdin -i $audio_file -ss $start -to $end "${title_slug}/${filename}"`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00			`} else {`
Revise album-related scripts 2021-04-16 15:25:24 +00:00			`ffmpeg -loglevel quiet -nostdin -i $audio_file -ss $start "${title_slug}/${filename}"`
Create a split album script in Oil 2021-04-12 13:50:43 +00:00			`}`
			`}`

			`if (prints_json) { json write :output_data }`