dotfiles/bin/convert-newpipe-db

#!/usr/bin/env nix-shell
#! nix-shell -i oil -p coreutils sqlite unzip ripgrep jq file

# Convert a Newpipe database (assuming it was exported within the app) into OPML v2.

# Dependencies:
# * osh (oil shell) v0.8.12
# * sqlite3 v3.34.0
# * unzip
# * ripgrep v12.1.1
# * jq
# * file

# Use the current Oil features in strict mode.
# This also enables usage of the syntax.
shopt -s strict:all

var FILENAME = $1

# Testing if the given file is a zip file.
file --mime $FILENAME | rg "application/zip" --quiet || exit 1
var channel_id_eggex = / 'https://www.youtube.com/channel/' (word) /

var NEWPIPE_DB = "newpipe.db"
var TEMP_FOLDER_NAME = "newpipe"

# The SQL query to get the required metadata.
var NEWPIPE_DB_QUERY = "
    SELECT name, url, service_id, group_concat(tag, ',') AS tags FROM (
        SELECT subscriptions.name, subscriptions.url, subscriptions.service_id, '/' || feed_group.name AS tag
        FROM subscriptions
        LEFT JOIN feed_group_subscription_join AS subs_join
        LEFT JOIN feed_group
        ON subs_join.subscription_id = subscriptions.uid AND feed_group.uid = subs_join.group_id
    ) GROUP BY name ORDER BY name COLLATE NOCASE;"

# Print the beginning of the template.
cat <<OPML
<opml version="2.0">
  <head>
    <title>Newpipe subscriptions</title>
    <dateCreated>$(date "+%F %T %z")</dateCreated>
    <ownerName>$(whoami)</ownerName>
    <docs>http://dev.opml.org/spec2.html</docs>
  </head>
  <body>
OPML

# Simply prints an `<outline>` element formatted approriately for the resulting output.
# Don't mind how it is printed right now. :)
proc print-outline(title, xml_url, html_url, tags = "") {
    printf '    <outline type="rss" xmlUrl="%s" htmlUrl="%s" title="%s" text="%s"' $xml_url $html_url $title $title

    if test -n $tags {
      printf ' category="%s"' $tags
    }

    printf '/>\n'
}

# Print the channels in the OPML body.
# This only occurs if the given file does have a Newpipe database.
if unzip -l $FILENAME | rg --quiet $NEWPIPE_DB {
    # Make the temporary directory (preferably on the current directory to make cleanup easier).
    mkdir $TEMP_FOLDER_NAME && unzip -q -u $FILENAME -d $TEMP_FOLDER_NAME

    # Setting up some automatic cleanup upon exit.
    trap "rm --recursive $TEMP_FOLDER_NAME" EXIT

    # Quickly check if a SQLite database is in the Newpipe database folder.
    file --mime "${TEMP_FOLDER_NAME}/${NEWPIPE_DB}" | rg --quiet "application/x-sqlite3" || exit 1

    # Extract the data from the database and process them individually.
    # Note that we formatted the data in CSV to be in one line per object since as of version 0.8.11, Oil has some problems when taking fully nested data from external commands (not yet completely verified).
    # We have to rewrite this part once Oil fixes the issue with nested objects.
    sqlite3 "${TEMP_FOLDER_NAME}/${NEWPIPE_DB}" "${NEWPIPE_DB_QUERY}" --csv --header \
    | dasel select --parser csv --multiple --selector '.[*]' --compact --write json \
    | while read channel {
        # We have separate each channel as a JSON object per line.
        echo $channel | json read :channel

        setvar name = channel['name']
        setvar url = channel['url']
        setvar service_id = channel['service_id']
        setvar tags = channel['tags']

        # The `service_id` column indicates where the channel came from the selection of platforms PeerTube offers.
        # Since the way to handle each platform differs to get the required data, we're throwing them in a case switch.
        case $service_id {
            # YouTube
            '0') {
                setvar channel_id = $(echo $url | sed --quiet --regexp-extended "s|$channel_id_eggex|\\1|p")
                setvar xml_url = "https://www.youtube.com/feeds/videos.xml?channel_id=${channel_id}"
            }
            ;;

            # Peertube instances
            '3') {
                # This naive solution just goes through the domain with the assumption that the database is exported properly from the app and not tampered with.
                # It can go into an infinite loop so take caution for now.
                setvar domain = $(echo $url | cut --delimiter='/' --fields='-3')
                setvar _domain_part_index = 4
                until (Bool($(curl --silent "$domain/api/v1/config/about" | dasel --parser json --selector ".instance.name"))) {
                    setvar domain = $(echo $url | cut --delimiter='/' --fields="-$_domain_part_index")
                    setvar _domain_part_index = Int($_domain_part_index) + 1
                }

                setvar channel_url = $(echo $url | cut --delimiter='/'  --fields='4-')
                setvar feed_type = $(echo $channel_url | rg "video-channels" --quiet && echo "videoChannelId" || echo "accountId")
                setvar channel_id = $(curl "${domain}/api/v1/${channel_url}" --silent | dasel --parser json --selector '.id')
                setvar xml_url = "$domain/feeds/videos.atom?$feed_type=$channel_id"
            }
            ;;
        }

        print-outline $name $xml_url $url $tags
    }
}

# Print the remaining parts of the document.
cat <<OPML
  </body>
</opml>
OPML