# Atom feed generator in GNU AWK # Author: Philip Kaludercic # Version: 1.0.0 # Time-stamp: <2025-06-14 17:25:21 phi> # Copyright (C) 2018-2021 # # This file is in the public domain, to the extent possible under law, # published under the CC0 1.0 Universal license. # # For a full copy of the CC0 license see # https://creativecommons.org/publicdomain/zero/1.0/legalcode # The script parses the standard input stream, and interprets every # tab delimited line as a post. The first column is the title of an # entry, the second a unix timestamp of when it was created. In case a # fourth column is given, the third column will be interpreted as a # path to the actual website. Otherwise, the third column will be used # for the content of the post itself. # The first line's timestamp is assumed to be the newest article, and # will thus be used to set the global "updated" attribute. To ensure # this is the case, it is best to sort the input by the second column: # # $ ... | sort -k2n | awk -f atom.awk > feed.atom # The following environmental variables are used for metadata: # # - TITLE: name of the feed # - NAME: the owners' name # - EMAIL: the owners' email (optional) # - HOMEPAGE: the owners' homepage # - FPATH: path to atom file (on the server) # - RIGHTS: how content is published # Written in accordance to https://validator.w3.org/feed/docs/atom.html BEGIN { FS = "\t" RFC3339 = "%Y-%m-%dT%H:%M:%SZ" # XML boilerplate print "" print "" # Feed metadata if (ENVIRON["TITLE"]) printf xml("title", ENVIRON["TITLE"]) if (ENVIRON["RIGHTS"]) printf xml("rights", ENVIRON["RIGHTS"]) printf xml("generator", "pkal's atom.awk") site = ENVIRON["HOMEPAGE"] if (site !~ /\/$/) site = site "/" printf xml("id", site) printf xml("author", (ENVIRON["NAME"] ? xml("name", ENVIRON["NAME"]) : "") \ (ENVIRON["EMAIL"] ? xml("email", ENVIRON["EMAIL"]) : "") \ xml("uri", ENVIRON["HOMEPAGE"])) # Link to this feed link["rel"] = "self" link["href"] = ENVIRON["HOMEPAGE"] "/" ENVIRON["FPATH"] printf xml("link", "", link) # Prepare link for next entries delete link["type"] link["rel"] = "alternate" # Extract domain from $HOMEPAGE match(ENVIRON["HOMEPAGE"], /(https?:\/\/)?([[:alpha:].]+)(\/.*)?/, data) domain = data[2] } # Function to generate compact xml tags function xml(tag, content, attr) { str = "<" tag for (key in attr) str = str " " key "=\"" attr[key] "\"" if (!content) str = str " />" else { if (content ~ /^\n" else { str = str ">" # https://www.w3.org/TR/xml/#dt-escape gsub(/&/, "\\&", content) gsub(/>/, "\\>", content) gsub(/" } return str "\n" } !init { # The first line is the newest post (assumption) therefore, before # the first line is printed, we add an "updated" tag with the date # of the first entry. printf xml("updated", strftime(RFC3339, $2)) init = !init } !$0 { next } # Skip empty lines !$4 { # Entry with literal content print ""; printf xml("title", $1); printf xml("updated", strftime(RFC3339, $2)); printf xml("id", "tag:" domain "," strftime("%F", $2) ":/" $2); print "" print "" } $4 { # Entry with file link["href"] = ENVIRON["HOMEPAGE"] "/" $3; print ""; printf xml("title", $1); printf xml("updated", strftime(RFC3339, $2)); printf xml("id", "tag:" domain "," strftime("%F", $2) ":/" $3); printf xml("link", "", link); print " 0) print; close(cmd) print "]]>" print "" } END { print "" }