[Checkins] SVN: zc.iso8601/branches/dev/src/zc/iso8601/ free bonus:
add datetime() function that does not use timezone offset info
Fred L. Drake, Jr.
fdrake at gmail.com
Mon May 12 18:23:21 EDT 2008
Log message for revision 86669:
free bonus: add datetime() function that does not use timezone offset info
Changed:
U zc.iso8601/branches/dev/src/zc/iso8601/README.txt
U zc.iso8601/branches/dev/src/zc/iso8601/parse.py
-=-
Modified: zc.iso8601/branches/dev/src/zc/iso8601/README.txt
===================================================================
--- zc.iso8601/branches/dev/src/zc/iso8601/README.txt 2008-05-12 21:40:38 UTC (rev 86668)
+++ zc.iso8601/branches/dev/src/zc/iso8601/README.txt 2008-05-12 22:23:20 UTC (rev 86669)
@@ -5,7 +5,167 @@
This package collects together functions supporting the data formats described
in ISO 8601.
+For the parsing functions, both the "verbose" and "short" forms of ISO 8601
+times are accepted. The verbose form includes hyphens in the date and colons
+in the time, and the short form omits both. For each function, we'll start
+with verbose examples, and will then repeat all of the examples in short form.
+The verbose form is generally preferred in practice since it is substantially
+more readable for humans.
+
+Parsing date/time values
+---------------------------------------------------
+
+There is a function that parses text and returns date/time values:
+
+ >>> from zc.iso8601.parse import datetime
+
+This function does not support or accept values that include time zone
+information:
+
+ >>> datetime(u"2006-12-02T23:40:42Z")
+ Traceback (most recent call last):
+ ValueError: could not parse ISO 8601 datetime: u'2006-12-02T23:40:42Z'
+
+ >>> datetime(u"2006-12-02T23:40:42+00:00")
+ Traceback (most recent call last):
+ ValueError: could not parse ISO 8601 datetime: u'2006-12-02T23:40:42+00:00'
+
+ >>> datetime(u"2006-12-02T23:40:42-00:00")
+ Traceback (most recent call last):
+ ValueError: could not parse ISO 8601 datetime: u'2006-12-02T23:40:42-00:00'
+
+ >>> datetime(u"2006-12-02T23:40:42-01:00")
+ Traceback (most recent call last):
+ ValueError: could not parse ISO 8601 datetime: u'2006-12-02T23:40:42-01:00'
+
+For times that don't include zone offsets, the results are as expected:
+
+ >>> datetime(u"2006-12-02T23:40:42")
+ datetime.datetime(2006, 12, 2, 23, 40, 42)
+
+The seconds field, as shown above, is optional. If omitted, the seconds field
+of the time will be zero:
+
+ >>> datetime(u"2006-12-02T23:40")
+ datetime.datetime(2006, 12, 2, 23, 40)
+
+When the seconds are specified, fractional seconds are supported:
+
+ >>> datetime(u"2008-05-12T14:30:32.000")
+ datetime.datetime(2008, 5, 12, 14, 30, 32)
+
+ >>> datetime(u"2008-05-12T14:30:32.5")
+ datetime.datetime(2008, 5, 12, 14, 30, 32, 500000)
+
+ >>> datetime(u"2008-05-12T14:30:32.01")
+ datetime.datetime(2008, 5, 12, 14, 30, 32, 10000)
+
+ >>> datetime(u"2008-05-12T14:30:32.000001")
+ datetime.datetime(2008, 5, 12, 14, 30, 32, 1)
+
+Fractional seconds smaller than 1 microsecond are simply thrown away:
+
+ >>> datetime(u"2008-05-12T14:30:32.00000099999")
+ datetime.datetime(2008, 5, 12, 14, 30, 32)
+
+If a space is used instead of the "T" separator, the input is still
+interpreted properly:
+
+ >>> datetime(u"2006-12-02 23:40:42")
+ datetime.datetime(2006, 12, 2, 23, 40, 42)
+
+ >>> datetime(u"2008-05-12 14:30:32.01")
+ datetime.datetime(2008, 5, 12, 14, 30, 32, 10000)
+
+Surrounding whitespace is ignored, and multiple whitespace characters between
+the date and time fields is collapsed and treated as if the extra whitespace
+characters were not present:
+
+ >>> datetime(u"""
+ ... 2006-12-02
+ ... \t\r\f
+ ... 23:40:42
+ ... """)
+ datetime.datetime(2006, 12, 2, 23, 40, 42)
+
+ >>> datetime(u"""
+ ... 2008-05-12
+ ... \t\r\f
+ ... 14:30:32.01
+ ... """)
+ datetime.datetime(2008, 5, 12, 14, 30, 32, 10000)
+
+Other whitespace is considered an error:
+
+ >>> datetime(u" 2006 -12-02 23:40:42 ")
+ Traceback (most recent call last):
+ ValueError: could not parse ISO 8601 datetime: u'2006 -12-02 23:40:42'
+
+Now, let's look at how the same examples do in the short form:
+
+ >>> datetime(u"200612-02T23:40:42Z")
+ Traceback (most recent call last):
+ ValueError: could not parse ISO 8601 datetime: u'2006-12-02T23:40:42Z'
+
+ >>> datetime(u"200612-02T23:40:42+00:00")
+ Traceback (most recent call last):
+ ValueError: could not parse ISO 8601 datetime: u'2006-12-02T23:40:42+00:00'
+
+ >>> datetime(u"200612-02T23:40:42-00:00")
+ Traceback (most recent call last):
+ ValueError: could not parse ISO 8601 datetime: u'2006-12-02T23:40:42-00:00'
+
+ >>> datetime(u"200612-02T23:40:42-01:00")
+ Traceback (most recent call last):
+ ValueError: could not parse ISO 8601 datetime: u'2006-12-02T23:40:42-01:00'
+
+ >>> datetime(u"200612-02T23:40:42")
+ datetime.datetime(2006, 12, 2, 23, 40, 42)
+
+ >>> datetime(u"200612-02T23:40")
+ datetime.datetime(2006, 12, 2, 23, 40)
+
+ >>> datetime(u"200805-12T14:30:32.000")
+ datetime.datetime(2008, 5, 12, 14, 30, 32)
+
+ >>> datetime(u"200805-12T14:30:32.5")
+ datetime.datetime(2008, 5, 12, 14, 30, 32, 500000)
+
+ >>> datetime(u"200805-12T14:30:32.01")
+ datetime.datetime(2008, 5, 12, 14, 30, 32, 10000)
+
+ >>> datetime(u"20080512T14:30:32.000001")
+ datetime.datetime(2008, 5, 12, 14, 30, 32, 1)
+
+ >>> datetime(u"20080512T14:30:32.00000099999")
+ datetime.datetime(2008, 5, 12, 14, 30, 32)
+
+ >>> datetime(u"20061202 23:40:42")
+ datetime.datetime(2006, 12, 2, 23, 40, 42)
+
+ >>> datetime(u"20080512 143032.01")
+ datetime.datetime(2008, 5, 12, 14, 30, 32, 10000)
+
+ >>> datetime(u"""
+ ... 20061202
+ ... \t\r\f
+ ... 234042
+ ... """)
+ datetime.datetime(2006, 12, 2, 23, 40, 42)
+
+ >>> datetime(u"""
+ ... 20080512
+ ... \t\r\f
+ ... 143032.01
+ ... """)
+ datetime.datetime(2008, 5, 12, 14, 30, 32, 10000)
+
+ >>> datetime(u" 2006 1202 234042 ")
+ Traceback (most recent call last):
+ ValueError: could not parse ISO 8601 datetime: u'2006 1202 234042'
+
+
Parsing date/time values with time zone information
---------------------------------------------------
@@ -14,12 +174,6 @@
>>> from zc.iso8601.parse import datetimetz
-Both the "verbose" and "short" forms of ISO 8601 times are accepted. The
-verbose form includes hyphens in the date and colons in the time, and the
-short form omits them. We'll start with verbose examples, and will then
-repeat all of the examples in short form. The verbose form is generally
-preferred in practice since it is substantially more readable for humans.
-
Times in UTC may be encoded using either the "Z" notation or "+00:00" (or
"-00:00"). Let try a few examples:
Modified: zc.iso8601/branches/dev/src/zc/iso8601/parse.py
===================================================================
--- zc.iso8601/branches/dev/src/zc/iso8601/parse.py 2008-05-12 21:40:38 UTC (rev 86668)
+++ zc.iso8601/branches/dev/src/zc/iso8601/parse.py 2008-05-12 22:23:20 UTC (rev 86669)
@@ -17,12 +17,13 @@
"""
__docformat__ = "reStructuredText"
-import datetime
+# We have to use import-as since we mask the module name.
+import datetime as _datetime
import pytz
import re
-_tz_re = "(?:Z|(?P<tzdir>[-+])(?P<tzhour>\d\d):(?P<tzmin>\d\d))"
+_tz_re = "(?:Z|(?P<tzdir>[-+])(?P<tzhour>\d\d):(?P<tzmin>\d\d))$"
# "Verbose" ISO 8601, with hyphens and colons:
_datetime_re1 = """\
@@ -33,8 +34,11 @@
(?P<hour>\d\d)
:(?P<minute>\d\d)
(?::(?P<second>\d\d(?:\.\d+)?))?
- """ + _tz_re
+ """
+_datetimetz_re1 = _datetime_re1 + _tz_re
+_datetime_re1 += "$"
+
# "Compact" ISO 8601, without hyphens and colons:
_datetime_re2 = """\
(?P<year>\d\d\d\d)
@@ -44,24 +48,68 @@
(?P<hour>\d\d)
(?P<minute>\d\d)
(?P<second>\d\d(?:\.\d+)?)?
- """ + _tz_re.replace("):(", "):?(")
+ """
-_datetime_rx1 = re.compile(_datetime_re1, re.IGNORECASE | re.VERBOSE)
-_datetime_rx2 = re.compile(_datetime_re2, re.IGNORECASE | re.VERBOSE)
+_datetimetz_re2 = _datetime_re2 + _tz_re.replace("):(", "):?(")
+_datetime_re2 += "$"
+_datetime_rx1 = re.compile(_datetime_re1, re.VERBOSE)
+_datetime_rx2 = re.compile(_datetime_re2, re.VERBOSE)
+_datetime_rxs = [_datetime_rx1, _datetime_rx2]
+_datetimetz_rx1 = re.compile(_datetimetz_re1, re.VERBOSE)
+_datetimetz_rx2 = re.compile(_datetimetz_re2, re.VERBOSE)
+_datetimetz_rxs = [_datetimetz_rx1, _datetimetz_rx2]
+
+
+def datetime(string):
+ """Parse an ISO 8601 date without timezone information.
+
+ Returns a Python datetime object.
+
+ """
+ m = _find_match(string, _datetime_rxs)
+ parts = _get_datetime_parts(m)
+ return _datetime.datetime(*parts)
+
+
def datetimetz(string):
"""Parse an ISO 8601 date including timezone information.
Returns a Python datetime object.
"""
+ m = _find_match(string, _datetimetz_rxs)
+ parts = _get_datetime_parts(m)
+ year, month, day, hour, minute, second, microsecond = parts
+
+ if m.group("tzhour"):
+ tzhour, tzmin = map(int, m.group("tzhour", "tzmin"))
+ offset = (tzhour * 60) + tzmin
+ if m.group("tzdir") == "-":
+ offset *= -1
+ if offset:
+ tzinfo = pytz.FixedOffset(offset)
+ dt = _datetime.datetime(
+ year, month, day, hour, minute, second, microsecond,
+ tzinfo=tzinfo)
+ return dt.astimezone(pytz.UTC)
+
+ return _datetime.datetime(
+ year, month, day, hour, minute, second, microsecond,
+ tzinfo=pytz.UTC)
+
+
+def _find_match(string, rxs):
string = " ".join(string.split())
- m = _datetime_rx1.match(string)
- if m is None:
- m = _datetime_rx2.match(string)
- if m is None:
- raise ValueError("could not parse ISO 8601 datetime: %r" % string)
+ for rx in rxs:
+ m = rx.match(string)
+ if m is not None:
+ return m
+ raise ValueError("could not parse ISO 8601 datetime: %r" % string)
+
+
+def _get_datetime_parts(m):
year, month, day, hour, minute = map(
int, m.group("year", "month", "day", "hour", "minute"))
second = 0
@@ -81,19 +129,4 @@
fractional += "0"
fractional = fractional[:6]
microsecond = int(fractional)
-
- if m.group("tzhour"):
- tzhour, tzmin = map(int, m.group("tzhour", "tzmin"))
- offset = (tzhour * 60) + tzmin
- if m.group("tzdir") == "-":
- offset *= -1
- if offset:
- tzinfo = pytz.FixedOffset(offset)
- dt = datetime.datetime(
- year, month, day, hour, minute, second, microsecond,
- tzinfo=tzinfo)
- return dt.astimezone(pytz.UTC)
-
- return datetime.datetime(
- year, month, day, hour, minute, second, microsecond,
- tzinfo=pytz.UTC)
+ return year, month, day, hour, minute, second, microsecond
More information about the Checkins
mailing list