#!/usr/bin/env oil # Create an interface for select Unicode characters with rofi and copy it into the clipboard. # The characters are extracted from the Unicode Character Database (https://www.unicode.org/ucd/). # It requires a tag name or an XPath similarly used for Python's `xml.etree.ElementTree.Element.findall`. # See the following link for more information. # (https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.Element.findall) # Dependencies: # * Oil shell # * curl # * python (at least v3.7) # * rofi # * awk # Set to use Oil features and strictness. shopt --set strict:all var QUERY = ARGV[0] var UCD_VERSION = "13.0.0" var UCD_XML_URL = "https://www.unicode.org/Public/$UCD_VERSION/ucdxml/ucd.nounihan.grouped.zip" var CACHE = ${XDG_CACHE_DIR:-$HOME/.cache/unicode-character-database} mkdir -p $CACHE if test ! -f $CACHE/ucd.zip { curl $UCD_XML_URL --output $CACHE/ucd.zip --silent } # The remaining thing is coded with Python because I want to use the UCD with no Unihan data and the grouped variation. # Compared to the other variations, it is only ~6MB compared to ~55MB for the flat variation. # Also, it requires more conditional handling than a simple shell script at this point. # I could've made this script entirely in Python but I want to see what Oil shell is capable of. python <

' and recurse into ''.
def print_char(element):
  if element.tag == '{http://www.unicode.org/ns/2003/ucd/1.0}char':
    alias = element.get('na') if element.get('na') else element.get('na1')
    codepoint = int(element.get('cp'), 16)
    print("{code} {alias}".format(code=chr(codepoint), alias=alias))
  elif element.tag == '{http://www.unicode.org/ns/2003/ucd/1.0}group':
    for child in list(element):
      print_char(child)

valid_nodes = root.findall('${QUERY}')
for point in valid_nodes:
  print_char(point)
CODE