summaryrefslogtreecommitdiff
path: root/lisp/calendar/iso8601.el
diff options
context:
space:
mode:
authorLars Ingebrigtsen <larsi@gnus.org>2019-07-29 14:22:31 +0200
committerLars Ingebrigtsen <larsi@gnus.org>2019-07-29 14:22:38 +0200
commitfa04c8b87e50a2e2b0d021958f637be8f475d8bc (patch)
tree304b07bc09258970ecf7c4a97fd53e85d4cddfe8 /lisp/calendar/iso8601.el
parent6cfda69d72cb9debefc48d0d95e341d389e7303a (diff)
downloademacs-fa04c8b87e50a2e2b0d021958f637be8f475d8bc.tar.gz
Add an ISO 8601 parsing library
* doc/lispref/os.texi (Time Parsing): Document it. * lisp/calendar/iso8601.el: New file. * test/lisp/calendar/iso8601-tests.el: Test ISO8601 parsing functions.
Diffstat (limited to 'lisp/calendar/iso8601.el')
-rw-r--r--lisp/calendar/iso8601.el370
1 files changed, 370 insertions, 0 deletions
diff --git a/lisp/calendar/iso8601.el b/lisp/calendar/iso8601.el
new file mode 100644
index 00000000000..ab0077ac58d
--- /dev/null
+++ b/lisp/calendar/iso8601.el
@@ -0,0 +1,370 @@
+;;; iso8601.el --- parse ISO 8601 date/time strings -*- lexical-binding:t -*-
+
+;; Copyright (C) 2019 Free Software Foundation, Inc.
+
+;; Keywords: dates
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; ISO8601 times basically look like 1985-04-01T15:23:49... Or so
+;; you'd think. This is what everybody means when they say "ISO8601",
+;; but it's in reality a quite large collection of syntaxes, including
+;; week numbers, ordinal dates, durations and intervals. This package
+;; has functions for parsing them all.
+;;
+;; The interface functions are `iso8601-parse', `iso8601-parse-date',
+;; `iso8601-parse-time', `iso8601-parse-zone',
+;; `iso8601-parse-duration' and `iso8601-parse-interval'. They all
+;; return decoded time objects, except the last one, which returns a
+;; list of three of them.
+;;
+;; (iso8601-parse-interval "P1Y2M10DT2H30M/2008W32T153000-01")
+;; '((0 0 13 24 5 2007 nil nil -3600)
+;; (0 30 15 3 8 2008 nil nil -3600)
+;; (0 30 2 10 2 1 nil nil nil))
+;;
+;;
+;; The standard can be found at:
+;;
+;; http://www.loc.gov/standards/datetime/iso-tc154-wg5_n0038_iso_wd_8601-1_2016-02-16.pdf
+;;
+;; The Wikipedia page on the standard is also informative:
+;;
+;; https://en.wikipedia.org/wiki/ISO_8601
+;;
+;; RFC3339 defines the subset that everybody thinks of as "ISO8601".
+
+;;; Code:
+
+(require 'time-date)
+(require 'cl-lib)
+
+(defun iso8601--concat-regexps (regexps)
+ (mapconcat (lambda (regexp)
+ (concat "\\(?:"
+ (replace-regexp-in-string "(" "(?:" regexp)
+ "\\)"))
+ regexps "\\|"))
+
+(defconst iso8601--year-match
+ "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)")
+(defconst iso8601--full-date-match
+ "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-?\\([0-9][0-9]\\)-?\\([0-9][0-9]\\)")
+(defconst iso8601--without-day-match
+ "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-\\([0-9][0-9]\\)")
+(defconst iso8601--outdated-date-match
+ "--\\([0-9][0-9]\\)-?\\([0-9][0-9]\\)")
+(defconst iso8601--week-date-match
+ "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-?W\\([0-9][0-9]\\)-?\\([0-9]\\)?")
+(defconst iso8601--ordinal-date-match
+ "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-?\\([0-9][0-9][0-9]\\)")
+(defconst iso8601--date-match
+ (iso8601--concat-regexps
+ (list iso8601--year-match
+ iso8601--full-date-match
+ iso8601--without-day-match
+ iso8601--outdated-date-match
+ iso8601--week-date-match
+ iso8601--ordinal-date-match)))
+
+(defconst iso8601--time-match
+ "\\([0-9][0-9]\\):?\\([0-9][0-9]\\)?:?\\([0-9][0-9]\\)?\\.?\\([0-9][0-9][0-9]\\)?")
+
+(defconst iso8601--zone-match
+ "\\(Z\\|\\([-+]\\)\\([0-9][0-9]\\):?\\([0-9][0-9]\\)?\\)")
+
+(defconst iso8601--full-time-match
+ (concat "\\(" (replace-regexp-in-string "(" "(?:" iso8601--time-match) "\\)"
+ "\\(" iso8601--zone-match "\\)?"))
+
+(defconst iso8601--combined-match
+ (concat "\\(" iso8601--date-match "\\)"
+ "\\(?:T\\("
+ (replace-regexp-in-string "(" "(?:" iso8601--time-match)
+ "\\)"
+ "\\(" iso8601--zone-match "\\)?\\)?"))
+
+(defconst iso8601--duration-full-match
+ "P\\([0-9]+Y\\)?\\([0-9]+M\\)?\\([0-9]+D\\)?\\(T\\([0-9]+H\\)?\\([0-9]+M\\)?\\([0-9]+S\\)?\\)?")
+(defconst iso8601--duration-week-match
+ "P\\([0-9]+\\)W")
+(defconst iso8601--duration-combined-match
+ (concat "P" iso8601--combined-match))
+(defconst iso8601--duration-match
+ (iso8601--concat-regexps
+ (list iso8601--duration-full-match
+ iso8601--duration-week-match
+ iso8601--duration-combined-match)))
+
+(defun iso8601-parse (string)
+ "Parse an ISO 8601 date/time string and return a `decoded-time' structure.
+
+The ISO 8601 date/time strings look like \"2008-03-02T13:47:30\",
+but shorter, incomplete strings like \"2008-03-02\" are valid, as
+well as variants like \"2008W32\" (week number) and
+\"2008-234\" (ordinal day number)."
+ (if (not (iso8601-valid-p string))
+ (signal 'wrong-type-argument string)
+ (let* ((date-string (match-string 1 string))
+ (time-string (match-string 2 string))
+ (zone-string (match-string 3 string))
+ (date (iso8601-parse-date date-string)))
+ ;; The time portion is optional.
+ (when time-string
+ (let ((time (iso8601-parse-time time-string)))
+ (setf (decoded-time-hour date) (decoded-time-hour time))
+ (setf (decoded-time-minute date) (decoded-time-minute time))
+ (setf (decoded-time-second date) (decoded-time-second time))))
+ ;; The time zone is optional.
+ (when zone-string
+ (setf (decoded-time-zone date)
+ ;; The time zone in decoded times are in seconds.
+ (* (iso8601-parse-zone zone-string) 60)))
+ date)))
+
+(defun iso8601-parse-date (string)
+ "Parse STRING (which should be on ISO 8601 format) and return a time value."
+ (cond
+ ;; Just a year: [-+]YYYY.
+ ((iso8601--match iso8601--year-match string)
+ (iso8601--decoded-time
+ :year (iso8601--adjust-year (match-string 1 string)
+ (match-string 2 string))))
+ ;; Calendar dates: YYYY-MM-DD and variants.
+ ((iso8601--match iso8601--full-date-match string)
+ (iso8601--decoded-time
+ :year (iso8601--adjust-year (match-string 1 string)
+ (match-string 2 string))
+ :month (match-string 3 string)
+ :day (match-string 4 string)))
+ ;; Calendar date without day: YYYY-MM.
+ ((iso8601--match iso8601--without-day-match string)
+ (iso8601--decoded-time
+ :year (iso8601--adjust-year (match-string 1 string)
+ (match-string 2 string))
+ :month (match-string 3 string)))
+ ;; Outdated date without year: --MM-DD
+ ((iso8601--match iso8601--outdated-date-match string)
+ (iso8601--decoded-time
+ :month (match-string 1 string)
+ :day (match-string 2 string)))
+ ;; Week dates: YYYY-Www-D
+ ((iso8601--match iso8601--week-date-match string)
+ (let* ((year (iso8601--adjust-year (match-string 1 string)
+ (match-string 2 string)))
+ (week (string-to-number (match-string 3 string)))
+ (day-of-week (and (match-string 4 string)
+ (string-to-number (match-string 4 string))))
+ (jan-start (decoded-time-weekday
+ (decode-time
+ (iso8601--encode-time
+ (iso8601--decoded-time :year year
+ :month 1
+ :day 4)))))
+ (correction (+ (if (zerop jan-start) 7 jan-start)
+ 3))
+ (ordinal (+ (* week 7) (or day-of-week 0) (- correction))))
+ (cond
+ ;; Monday 29 December 2008 is written "2009-W01-1".
+ ((< ordinal 1)
+ (setq year (1- year)
+ ordinal (+ ordinal (if (date-leap-year-p year)
+ 366 365))))
+ ;; Sunday 3 January 2010 is written "2009-W53-7".
+ ((> ordinal (if (date-leap-year-p year)
+ 366 365))
+ (setq ordinal (- ordinal (if (date-leap-year-p year)
+ 366 365))
+ year (1+ year))))
+ (let ((month-day (date-ordinal-to-time year ordinal)))
+ (iso8601--decoded-time :year year
+ :month (decoded-time-month month-day)
+ :day (decoded-time-day month-day)))))
+ ;; Ordinal dates: YYYY-DDD
+ ((iso8601--match iso8601--ordinal-date-match string)
+ (let* ((year (iso8601--adjust-year (match-string 1 string)
+ (match-string 2 string)))
+ (ordinal (string-to-number (match-string 3 string)))
+ (month-day (date-ordinal-to-time year ordinal)))
+ (iso8601--decoded-time :year year
+ :month (decoded-time-month month-day)
+ :day (decoded-time-day month-day))))
+ (t
+ (signal 'wrong-type-argument string))))
+
+(defun iso8601--adjust-year (sign year)
+ (save-match-data
+ (let ((year (if (stringp year)
+ (string-to-number year)
+ year)))
+ (if (string= sign "-")
+ ;; -0001 is 2 BCE.
+ (1- (- year))
+ year))))
+
+(defun iso8601-parse-time (string)
+ "Parse STRING, which should be an ISO 8601 time string, and return a time value."
+ (if (not (iso8601--match iso8601--full-time-match string))
+ (signal 'wrong-type-argument string)
+ (let ((time (match-string 1 string))
+ (zone (match-string 2 string)))
+ (if (not (iso8601--match iso8601--time-match time))
+ (signal 'wrong-type-argument string)
+ (let ((hour (string-to-number (match-string 1 time)))
+ (minute (and (match-string 2 time)
+ (string-to-number (match-string 2 time))))
+ (second (and (match-string 3 time)
+ (string-to-number (match-string 3 time))))
+ ;; Hm...
+ (_millisecond (and (match-string 4 time)
+ (string-to-number (match-string 4 time)))))
+ (iso8601--decoded-time :hour hour
+ :minute (or minute 0)
+ :second (or second 0)
+ :zone (and zone
+ (* 60 (iso8601-parse-zone
+ zone)))))))))
+
+(defun iso8601-parse-zone (string)
+ "Parse STRING, which should be an ISO 8601 time zone.
+Return the number of minutes."
+ (if (not (iso8601--match iso8601--zone-match string))
+ (signal 'wrong-type-argument string)
+ (if (match-string 2 string)
+ ;; HH:MM-ish.
+ (let ((hour (string-to-number (match-string 3 string)))
+ (minute (and (match-string 4 string)
+ (string-to-number (match-string 4 string)))))
+ (* (if (equal (match-string 2 string) "-")
+ -1
+ 1)
+ (+ (* hour 60)
+ (or minute 0))))
+ ;; "Z".
+ 0)))
+
+(defun iso8601-valid-p (string)
+ "Say whether STRING is a valid ISO 8601 representation."
+ (iso8601--match iso8601--combined-match string))
+
+(defun iso8601-parse-duration (string)
+ "Parse ISO 8601 durations on the form P3Y6M4DT12H30M5S."
+ (cond
+ ((and (iso8601--match iso8601--duration-full-match string)
+ ;; Just a "P" isn't valid; there has to be at least one
+ ;; element, like P1M.
+ (> (length (match-string 0 string)) 2))
+ (iso8601--decoded-time :year (or (match-string 1 string) 0)
+ :month (or (match-string 2 string) 0)
+ :day (or (match-string 3 string) 0)
+ :hour (or (match-string 5 string) 0)
+ :minute (or (match-string 6 string) 0)
+ :second (or (match-string 7 string) 0)))
+ ;; PnW: Weeks.
+ ((iso8601--match iso8601--duration-week-match string)
+ (let ((weeks (string-to-number (match-string 1 string))))
+ ;; Does this make sense? Hm...
+ (iso8601--decoded-time :day (* weeks 7))))
+ ;; P<date>T<time>
+ ((iso8601--match iso8601--duration-combined-match string)
+ (iso8601-parse (substring string 1)))
+ (t
+ (signal 'wrong-type-argument string))))
+
+(defun iso8601-parse-interval (string)
+ "Parse ISO 8601 intervals."
+ (let ((bits (split-string string "/"))
+ start end duration)
+ (if (not (= (length bits) 2))
+ (signal 'wrong-type-argument string)
+ ;; The intervals may be an explicit start/end times, or either a
+ ;; start or an end, and an accompanying duration.
+ (cond
+ ((and (string-match "\\`P" (car bits))
+ (iso8601-valid-p (cadr bits)))
+ (setq duration (iso8601-parse-duration (car bits))
+ end (iso8601-parse (cadr bits))))
+ ((and (string-match "\\`P" (cadr bits))
+ (iso8601-valid-p (car bits)))
+ (setq duration (iso8601-parse-duration (cadr bits))
+ start (iso8601-parse (car bits))))
+ ((and (iso8601-valid-p (car bits))
+ (iso8601-valid-p (cadr bits)))
+ (setq start (iso8601-parse (car bits))
+ end (iso8601-parse (cadr bits))))
+ (t
+ (signal 'wrong-type-argument string))))
+ (unless end
+ (setq end (decoded-time-add start duration)))
+ (unless start
+ (setq start (decoded-time-add end
+ ;; We negate the duration so that
+ ;; we get a subtraction.
+ (mapcar (lambda (elem)
+ (if (numberp elem)
+ (- elem)
+ elem))
+ duration))))
+ (list start end
+ (or duration
+ (decode-time (time-subtract (iso8601--encode-time end)
+ (iso8601--encode-time start))
+ (or (decoded-time-zone end) 0))))))
+
+(defun iso8601--match (regexp string)
+ (string-match (concat "\\`" regexp "\\'") string))
+
+(defun iso8601--value (elem &optional default)
+ (if (stringp elem)
+ (string-to-number elem)
+ (or elem default)))
+
+(cl-defun iso8601--decoded-time (&key second minute hour
+ day month year
+ dst zone)
+ (list (iso8601--value second)
+ (iso8601--value minute)
+ (iso8601--value hour)
+ (iso8601--value day)
+ (iso8601--value month)
+ (iso8601--value year)
+ nil
+ dst
+ zone))
+
+(defun iso8601--encode-time (time)
+ "Like `encode-time', but fill in nil values in TIME."
+ (setq time (copy-sequence time))
+ (unless (decoded-time-second time)
+ (setf (decoded-time-second time) 0))
+ (unless (decoded-time-minute time)
+ (setf (decoded-time-minute time) 0))
+ (unless (decoded-time-hour time)
+ (setf (decoded-time-hour time) 0))
+
+ (unless (decoded-time-day time)
+ (setf (decoded-time-day time) 1))
+ (unless (decoded-time-month time)
+ (setf (decoded-time-month time) 1))
+ (unless (decoded-time-year time)
+ (setf (decoded-time-year time) 0))
+ (encode-time time))
+
+(provide 'iso8601)
+
+;;; iso8601.el ends here