Add SWHID include processor

This commit is contained in:
Gabriel Arazas 2023-03-11 16:34:12 +08:00
parent 7d6f345122
commit a1800d898e
No known key found for this signature in database
GPG Key ID: ADE0C41DAB221FCC
3 changed files with 96 additions and 0 deletions

View File

@ -4,6 +4,7 @@ require 'asciidoctor'
require 'asciidoctor/extensions'
require_relative 'man-inline-macro/extension'
require_relative 'swhid-inline-macro/extension'
require_relative 'swhid-include-processor/extension'
require_relative 'github-link-inline-macro/extension'
require_relative 'github-raw-content-include-processor/extension'
require_relative 'gitlab-link-inline-macro/extension'
@ -13,6 +14,7 @@ Asciidoctor::Extensions.register do
inline_macro ManInlineMacro
inline_macro SWHInlineMacro
include_processor SWHIDIncludeProcessor
inline_macro GitHubLinkInlineMacro
include_processor GitHubRawIncludeProcessor

View File

@ -0,0 +1,43 @@
= SWHID include processor
:toc:
This is an include processor extension for easily fetching SWHIDs, only with the `cnt` schema type.
== Synopsis
[source, asciidoc]
----
\include::$SWHID[]
----
Where `$SWHID` is a link:https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html[SWHID].
This could accept SWHIDs with qualifiers.
Take note this include processor will only give the raw content with the `cnt` schema type.
Anything else will be skipped and log a warning instead.
== Extra notes
[source, asciidoc]
----
= doctitle
:swhid-gpl3: swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2
\include::{swhid-nixpkgs}[]
----
This include processor also respects the safe mode setting.
This means in order to permit including by SWHID, you have to permit link:https://docs.asciidoctor.org/asciidoc/latest/directives/include-uri/[includes by URIs].
Lastly, this include processor uses the Software Heritage API which includes a limitation.
== Example usage
- SWHID with a bare core identifier: `include::swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2[]`.
- SWHID with full contextual information: `include::swh:1:cnt:4c6ad635164b25b9bc2ebe17d2c3b7c0835f6035;origin=https://github.com/NixOS/nixpkgs;visit=swh:1:snp:6ea7d28dfd4789609e0be2b64179fc9c12931beb;anchor=swh:1:rev:7f5639fa3b68054ca0b062866dc62b22c3f11505;path=/README.md`.

View File

@ -0,0 +1,51 @@
# frozen_string_literal: true
require 'json'
require 'open-uri'
require 'uri'
class SWHIDIncludeProcessor < Asciidoctor::Extensions::IncludeProcessor
def handles?(target)
target.start_with? 'swh:'
end
def process(doc, reader, target, attributes)
swhid = target
swhid_core_identifier = swhid.split(';').at(0)
swhid_object_type = (swhid_core_identifier.split ':').at 2
unless (doc.safe <= Asciidoctor::SafeMode::SERVER) && (doc.attr? 'allow-uri-read')
raise %('swh:' include cannot be used in safe mode level > SERVER and without attribute 'allow-uri-read')
end
# We're already going to throw out anything that is not content object type
# just to make the later pipelines easier to construct.
if swhid_object_type != 'cnt'
warn %(SWHID '#{swhid_core_identifier}' is not of 'cnt' type; ignoring)
return reader
end
version = '1'
content = begin
uri = URI.parse %(https://archive.softwareheritage.org/api/#{version}/resolve/#{target}/)
headers = {}
headers['Authorization'] = "Bearer #{ENV['SWH_API_BEARER_TOKEN']}" if ENV['SWH_BEARER_TOKEN']
headers['Accept'] = 'application/json'
metadata = OpenURI.open_uri(uri, headers) { |f| JSON.parse(f.read) }
object_hash = metadata['object_id']
uri = URI.parse %(https://archive.softwareheritage.org/api/#{version}/content/sha1_git:#{object_hash}/raw/)
OpenURI.open_uri(uri, headers, &:read)
rescue OpenURI::HTTPError => e
warning = %(error while getting '#{swhid_core_identifier}': #{e})
warn warning
warning
end
reader.push_include content, target, target, 1, attributes
reader
end
end