From 567c52d80aafdd940e095bc8f3708ec4aa9a5097 Mon Sep 17 00:00:00 2001 From: Gabriel Arazas Date: Sat, 11 Mar 2023 16:34:12 +0800 Subject: [PATCH] Add SWHID include processor --- lib/asciidoctor/foodogsquared-extensions.rb | 2 + .../swhid-include-processor/README.adoc | 43 ++++++++++++++++ .../swhid-include-processor/extension.rb | 51 +++++++++++++++++++ 3 files changed, 96 insertions(+) create mode 100644 lib/asciidoctor/swhid-include-processor/README.adoc create mode 100644 lib/asciidoctor/swhid-include-processor/extension.rb diff --git a/lib/asciidoctor/foodogsquared-extensions.rb b/lib/asciidoctor/foodogsquared-extensions.rb index adb965a..af6de49 100644 --- a/lib/asciidoctor/foodogsquared-extensions.rb +++ b/lib/asciidoctor/foodogsquared-extensions.rb @@ -4,6 +4,7 @@ require 'asciidoctor' require 'asciidoctor/extensions' require_relative 'man-inline-macro/extension' require_relative 'swhid-inline-macro/extension' +require_relative 'swhid-include-processor/extension' require_relative 'github-link-inline-macro/extension' require_relative 'github-raw-content-include-processor/extension' require_relative 'gitlab-link-inline-macro/extension' @@ -13,6 +14,7 @@ Asciidoctor::Extensions.register do inline_macro ManInlineMacro inline_macro SWHInlineMacro + include_processor SWHIDIncludeProcessor inline_macro GitHubLinkInlineMacro include_processor GitHubRawIncludeProcessor diff --git a/lib/asciidoctor/swhid-include-processor/README.adoc b/lib/asciidoctor/swhid-include-processor/README.adoc new file mode 100644 index 0000000..6c8ed46 --- /dev/null +++ b/lib/asciidoctor/swhid-include-processor/README.adoc @@ -0,0 +1,43 @@ += SWHID include processor +:toc: + + +This is an include processor extension for easily fetching SWHIDs, only with the `cnt` schema type. + + +== Synopsis + +[source, asciidoc] +---- +\include::$SWHID[] +---- + +Where `$SWHID` is a link:https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html[SWHID]. +This could accept SWHIDs with qualifiers. + +Take note this include processor will only give the raw content with the `cnt` schema type. +Anything else will be skipped and log a warning instead. + + + +== Extra notes + +[source, asciidoc] +---- += doctitle +:swhid-gpl3: swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2 + +\include::{swhid-nixpkgs}[] +---- + +This include processor also respects the safe mode setting. +This means in order to permit including by SWHID, you have to permit link:https://docs.asciidoctor.org/asciidoc/latest/directives/include-uri/[includes by URIs]. + +Lastly, this include processor uses the Software Heritage API which includes a limitation. + + +== Example usage + +- SWHID with a bare core identifier: `include::swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2[]`. + +- SWHID with full contextual information: `include::swh:1:cnt:4c6ad635164b25b9bc2ebe17d2c3b7c0835f6035;origin=https://github.com/NixOS/nixpkgs;visit=swh:1:snp:6ea7d28dfd4789609e0be2b64179fc9c12931beb;anchor=swh:1:rev:7f5639fa3b68054ca0b062866dc62b22c3f11505;path=/README.md`. diff --git a/lib/asciidoctor/swhid-include-processor/extension.rb b/lib/asciidoctor/swhid-include-processor/extension.rb new file mode 100644 index 0000000..77d9a38 --- /dev/null +++ b/lib/asciidoctor/swhid-include-processor/extension.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +require 'json' +require 'open-uri' +require 'uri' + +class SWHIDIncludeProcessor < Asciidoctor::Extensions::IncludeProcessor + def handles?(target) + target.start_with? 'swh:' + end + + def process(doc, reader, target, attributes) + swhid = target + swhid_core_identifier = swhid.split(';').at(0) + swhid_object_type = (swhid_core_identifier.split ':').at 2 + + unless (doc.safe <= Asciidoctor::SafeMode::SERVER) && (doc.attr? 'allow-uri-read') + raise %('swh:' include cannot be used in safe mode level > SERVER and without attribute 'allow-uri-read') + end + + # We're already going to throw out anything that is not content object type + # just to make the later pipelines easier to construct. + if swhid_object_type != 'cnt' + warn %(SWHID '#{swhid_core_identifier}' is not of 'cnt' type; ignoring) + return reader + end + + version = '1' + + content = begin + uri = URI.parse %(https://archive.softwareheritage.org/api/#{version}/resolve/#{target}/) + + headers = {} + headers['Authorization'] = "Bearer #{ENV['SWH_API_BEARER_TOKEN']}" if ENV['SWH_BEARER_TOKEN'] + headers['Accept'] = 'application/json' + + metadata = OpenURI.open_uri(uri, headers) { |f| JSON.parse(f.read) } + object_hash = metadata['object_id'] + + uri = URI.parse %(https://archive.softwareheritage.org/api/#{version}/content/sha1_git:#{object_hash}/raw/) + OpenURI.open_uri(uri, headers, &:read) + rescue OpenURI::HTTPError => e + warning = %(error while getting '#{swhid_core_identifier}': #{e}) + warn warning + warning + end + + reader.push_include content, target, target, 1, attributes + reader + end +end