dotfiles/bin/split-album

247 lines
7.9 KiB
Plaintext
Raw Normal View History

#! /usr/bin/env nix-shell
#! nix-shell -i oil -p coreutils moreutils ffmpeg gnused ripgrep
2021-04-12 13:50:43 +00:00
shopt --set strict:all
const show_help = "A small script for splitting files into tracks, perfect for full album releases and audiobooks.
2021-07-22 11:24:53 +00:00
Based from Luke Smith's booksplit script
(https://raw.githubusercontent.com/LukeSmithxyz/voidrice/091d7e54c5c1ed77201ce1254aa2623a2801c9f2/.local/bin/booksplit).
2021-04-12 13:50:43 +00:00
2021-04-30 14:21:11 +00:00
split-album [options...] [\$ALBUM_FILE \$TIMESTAMP_FILE]
2021-04-12 13:50:43 +00:00
Options:
-h, --help Show the help section.
2021-04-16 15:25:24 +00:00
--tutorial Show a helpful tutorial-esque description of the program.
2021-06-24 07:19:21 +00:00
--audio-file [file] Set the audio file to be split.
--timestamp-file [file] Set the timestamp file to be used for splitting.
2021-04-12 13:50:43 +00:00
-t, --title [title] Set the title of the album.
-d, --date [date] Set the publication date of the album.
-a, --author [author] Set the author of the album.
--json Print the JSON data for use with other applications.
--skip Skip the extraction part, useful for printing data with '--json' or testing the timestamp file.
2021-04-12 13:50:43 +00:00
--strict Set to be strict, exiting when an invalid timestamp is encountered.
Environment variables:
EXTENSION The extension to be used. This is used in conjunction with FFmpeg
where it can detect the codec to be converted automatically with it.
When any of the required metadata is missing (i.e., title, date, author), it will be prompted.
"
2021-04-16 15:25:24 +00:00
const show_descriptive_help = "This script splits an album with a timestamp file.
You're always going to see using this script like the following:
2021-04-30 14:21:11 +00:00
split-album \$AUDIO_FILE \$TIMESTAMP_FILE
2021-04-16 15:25:24 +00:00
The timestamp file contains a starting timestamp (in HH:MM:SS[.MS]) and the title of the chapter/track.
2021-04-12 13:50:43 +00:00
For more information, see https://trac.ffmpeg.org/wiki/Seeking.
Lines starting with '#' and empty lines will be ignored.
The following is an example of the content of a timestamp file.
\`\`\`
00:00:00 Introduction
00:03:54 It's the R-rated scene
2021-04-16 15:25:24 +00:00
00:25:43 Boring exposition at the cafe
00:36:54 Expedition time
00:41:51 Fighting time
00:42:22 Shower scene
2021-04-12 13:50:43 +00:00
\`\`\`
You can also create a timestamp file in JSON format.
2021-04-16 15:25:24 +00:00
It is the equivalent of the 'chapters' key from the JSON output (i.e., '--json').
2021-04-12 13:50:43 +00:00
The equivalent JSON of the previous example would be this:
\`\`\`
[
{
\"timestamp\": \"00:00:00\",
\"title\": \"Introduction\"
},
{
\"timestamp\": \"00:03:54\",
\"title\": \"It's the R-rated scene\"
},
{
2021-04-16 15:25:24 +00:00
\"timestamp\": \"00:25:43\",
2021-04-12 13:50:43 +00:00
\"title\": \"Boring exposition at the cafe\"
},
{
2021-04-16 15:25:24 +00:00
\"timestamp\": \"00:36:54\",
2021-04-12 13:50:43 +00:00
\"title\": \"Expedition time\"
},
{
2021-04-16 15:25:24 +00:00
\"timestamp\": \"00:41:51\",
2021-04-12 13:50:43 +00:00
\"title\": \"Fighting time\"
},
{
2021-04-16 15:25:24 +00:00
\"timestamp\": \"00:42:22\",
2021-04-12 13:50:43 +00:00
\"title\": \"Shower scene\"
}
]
\`\`\`
There will be a folder created with the safe name of the album (in kebab-case) containing the split tracks.
The original file will be kept, do what you want with it.
"
2021-04-16 15:25:24 +00:00
const EXTENSION = ${EXTENSION:-"opus"}
2021-04-12 13:50:43 +00:00
var audio_file = ''
var timestamp_file = ''
var album = ''
var author = ''
var pub_date = ''
var prints_json = false
var strict_mode = false
var skip = false
2021-04-12 13:50:43 +00:00
while test $len(ARGV) -gt 0 {
case $[ARGV[0]] {
-h|--help)
write -- $show_help
exit
;;
2021-04-16 15:25:24 +00:00
--tutorial)
2021-04-12 13:50:43 +00:00
write -- $show_descriptive_help
exit
;;
2021-04-30 14:21:11 +00:00
--audio-file)
setvar audio_file = ARGV[1]
2021-04-12 13:50:43 +00:00
shift 2
;;
2021-04-30 14:21:11 +00:00
--timestamp-file)
setvar timestamp_file = ARGV[1]
2021-04-12 13:50:43 +00:00
shift 2
;;
-a|--author)
setvar author = ARGV[1]
2021-04-12 13:50:43 +00:00
shift 2
;;
-d|--date)
setvar pub_date = ARGV[1]
2021-04-12 13:50:43 +00:00
shift 2
;;
-t|--title)
setvar album = ARGV[1]
2021-04-12 13:50:43 +00:00
shift 2
;;
--strict)
setvar strict_mode = true
2021-04-12 13:50:43 +00:00
shift
;;
--skip)
setvar skip = true
shift
;;
2021-04-12 13:50:43 +00:00
--json)
setvar prints_json = true
2021-04-12 13:50:43 +00:00
shift
;;
*)
setvar audio_file = ARGV[0]
setvar timestamp_file = ARGV[1]
2021-04-30 14:21:11 +00:00
shift 2
2021-04-12 13:50:43 +00:00
;;
}
}
proc warnf(msg, @args) {
>&2 printf "${msg}\\n" @args
}
proc errorf(msg, @args) {
2021-04-12 13:50:43 +00:00
>&2 printf "${msg}\\n" @args
exit 1
2021-04-12 13:50:43 +00:00
}
proc prompt(msg, :out, prefix = ">> ") {
>&2 printf '%s\n%s' $msg $prefix
read --line
setref out = $_line
}
proc kebab-case(word) {
write -- $word | sed --regexp-extended --expression 's/./\L&/g' --expression 's/\s+/-/g' --expression 's/[^.a-z0-9-]//g' --expression 's/^-+|-+$//g' --expression 's/-+/-/g'
}
test -f $audio_file || errorf '%s is not a regular file' $audio_file
test -f $timestamp_file || errorf '%s is not a regular file' $timestamp_file
test $album || prompt "What is the title of the album?" :album
test $author || prompt "Who is the author of the album?" :author
test $pub_date || prompt "When is the album published?" :pub_date
const output_data = {}
setvar output_data['file'] = $audio_file
setvar output_data['chapters'] = []
setvar output_data['album'] = $album
setvar output_data['author'] = $author
setvar output_data['date'] = $pub_date
setvar output_data['extension'] = $EXTENSION
2021-04-12 13:50:43 +00:00
2021-04-16 15:25:24 +00:00
const timestamp_regex = / %start digit{2,} ':' digit{2} ':' digit{2} <'.' digit+>? %end /
2021-04-12 13:50:43 +00:00
var has_error = false
2021-07-22 11:24:53 +00:00
# Deserialize the given input into the chapters data.
2021-04-12 13:50:43 +00:00
case $(file --mime-type --brief $timestamp_file) {
"application/json")
2021-04-16 15:25:24 +00:00
json read :chapters < $timestamp_file
setvar output_data['chapters'] = chapters
2021-04-12 13:50:43 +00:00
;;
2021-04-16 15:25:24 +00:00
# Also cleans up the timestamp file with comments (i.e., lines starting with '#') and empty lines allowing for more commenting options.
2021-07-22 11:24:53 +00:00
# I just want to improve the timestamp format a little bit.
2021-04-12 13:50:43 +00:00
"text/plain")
sed --regexp-extended --expression '/^\s*$/d' --expression '/^#/d' $timestamp_file | while read --line {
2021-04-12 13:50:43 +00:00
var chapter = {}
setvar chapter['title'] = $(write -- $_line | cut -d' ' -f2-)
setvar chapter['timestamp'] = $(write -- $_line | cut -d' ' -f1)
2021-04-12 13:50:43 +00:00
write -- ${chapter['timestamp']} | rg --quiet $timestamp_regex || {
warnf "'%s' %s is not a valid timestamp" ${chapter['timestamp']} ${chapter['title']}
setvar has_error = true
2021-04-12 13:50:43 +00:00
continue
}
2021-04-12 13:50:43 +00:00
_ output_data['chapters'].append(chapter)
}
;;
}
if (strict_mode and has_error) { exit 1 }
const title_slug = $(kebab-case $album)
setvar output_data['directory'] = $(realpath --canonicalize-missing $title_slug)
2021-04-12 13:50:43 +00:00
mkdir -p $title_slug
# Rather than sequentially segmenting the audio, we'll extract the starting and ending timestamps of each segment then feed it to a job queue that can execute jobs in parallel.
# Take note we don't have the ending timestamp of each segment so we need a way to look back into items.
2021-04-12 13:50:43 +00:00
const chapter_len = len(output_data['chapters'])
var job_queue = %()
2021-04-12 13:50:43 +00:00
for index in @(seq $[chapter_len]) {
var index = Int(index)
var chapter = output_data['chapters'][index - 1]
2021-04-12 13:50:43 +00:00
var start = chapter['timestamp']
var end = output_data['chapters'][index]['timestamp'] if index != chapter_len else null
2021-04-16 15:25:24 +00:00
var filename = $(printf "%.2d-%s.%s" $index $(kebab-case ${chapter['title']}) $EXTENSION)
setvar output_data['chapters'][index - 1]['file'] = filename
2021-04-12 13:50:43 +00:00
2021-07-22 11:24:53 +00:00
# Check for incorrect timestamp order.
if (start > end and end is not null) {
warnf '%s (start) is ahead compared to %s (end)' $start $end
setvar has_error = true
2021-04-12 13:50:43 +00:00
}
2021-07-14 11:28:22 +00:00
push :job_queue ">&2 printf '[%d/%d] %s\\n' $[index] $[chapter_len] \"$[output_data['chapters'][index - 1]['title']]\" && ffmpeg -loglevel quiet -nostdin -i '${audio_file}' -ss ${start} $['-to ' + end if index != chapter_len else ''] ${title_slug}/${filename}"
2021-04-12 13:50:43 +00:00
}
if (has_error) { exit 1 }
if (not skip) { parallel -- @job_queue }
2021-04-12 13:50:43 +00:00
if (prints_json) { json write :output_data }