[Zope3-checkins] CVS: Zope3/src/zope/i18n - _strptime.py:1.2

Tim Peters tim.one@comcast.net
Sat, 18 Jan 2003 23:37:40 -0500


Update of /cvs-repository/Zope3/src/zope/i18n
In directory cvs.zope.org:/tmp/cvs-serv7131/src/zope/i18n

Modified Files:
	_strptime.py 
Log Message:
Synch up w/ improvements in the Python version.


=== Zope3/src/zope/i18n/_strptime.py 1.1 => 1.2 ===
--- Zope3/src/zope/i18n/_strptime.py:1.1	Fri Jan 10 13:55:32 2003
+++ Zope3/src/zope/i18n/_strptime.py	Sat Jan 18 23:37:38 2003
@@ -37,7 +37,6 @@
 import calendar
 from re import compile as re_compile
 from re import IGNORECASE
-from string import whitespace as whitespace_string
 
 __author__ = "Brett Cannon"
 __email__ = "drifty@bigfoot.com"
@@ -46,6 +45,17 @@
 
 RegexpType = type(re_compile(''))
 
+def _getlang():
+    # Figure out what the current language is set to.
+    current_lang = locale.getlocale(locale.LC_TIME)[0]
+    if current_lang:
+        return current_lang
+    else:
+        current_lang = locale.getdefaultlocale()[0]
+        if current_lang:
+            return current_lang
+        else:
+            return ''
 
 class LocaleTime(object):
     """Stores and handles locale-specific information related to time.
@@ -271,13 +281,12 @@
                     ('17', '%d'), ('03', '%m'), ('3', '%m'),
                     # '3' needed for when no leading zero.
                     ('2', '%w'), ('10', '%I')):
-                try:
-                    # Done this way to deal with possible lack of locale info
-                    # manifesting itself as the empty string (i.e., Swedish's
-                    # lack of AM/PM info).
+                # Must deal with possible lack of locale info
+                # manifesting itself as the empty string (e.g., Swedish's
+                # lack of AM/PM info) or a platform returning a tuple of empty
+                # strings (e.g., MacOS 9 having timezone as ('','')).
+                if old:
                     current_format = current_format.replace(old, new)
-                except ValueError:
-                    pass
             time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0))
             if time.strftime(directive, time_tuple).find('00'):
                 U_W = '%U'
@@ -299,19 +308,9 @@
         self.__timezone = self.__pad(time.tzname, 0)
 
     def __calc_lang(self):
-        # Set self.__lang by using locale.getlocale() or
-        # locale.getdefaultlocale().  If both turn up empty, set the attribute
-        # to ''.  This is to stop calls to this method and to make sure
-        # strptime() can produce an re object correctly.
-        current_lang = locale.getlocale(locale.LC_TIME)[0]
-        if current_lang:
-            self.__lang = current_lang
-        else:
-            current_lang = locale.getdefaultlocale()[0]
-            if current_lang:
-                self.__lang = current_lang
-            else:
-                self.__lang = ''
+        # Set self.__lang by using __getlang().
+        self.__lang = _getlang()
+
 
 
 class TimeRE(dict):
@@ -364,7 +363,7 @@
                 raise
 
     def __seqToRE(self, to_convert, directive):
-        """Convert a list to a regex string for matching directive."""
+        """Convert a list to a regex string for matching a directive."""
         def sorter(a, b):
             """Sort based on length.
 
@@ -383,6 +382,11 @@
             return cmp(b_length, a_length)
 
         to_convert = to_convert[:]  # Don't want to change value in-place.
+        for value in to_convert:
+            if value != '':
+                break
+        else:
+            return ''
         to_convert.sort(sorter)
         regex = '|'.join(to_convert)
         regex = '(?P<%s>%s' % (directive, regex)
@@ -391,8 +395,8 @@
     def pattern(self, format):
         """Return re pattern for the format string."""
         processed_format = ''
-        for whitespace in whitespace_string:
-            format = format.replace(whitespace, r'\s*')
+        whitespace_replacement = re_compile('\s+')
+        format = whitespace_replacement.sub('\s*', format)
         while format.find('%') != -1:
             directive_index = format.index('%')+1
             processed_format = "%s%s%s" % (processed_format,
@@ -403,106 +407,120 @@
 
     def compile(self, format):
         """Return a compiled re object for the format string."""
-        format = "(?#%s)%s" % (self.locale_time.lang,format)
         return re_compile(self.pattern(format), IGNORECASE)
 
+# Cached TimeRE; probably only need one instance ever so cache it for performance
+_locale_cache = TimeRE()
+# Cached regex objects; same reason as for TimeRE cache
+_regex_cache = dict()
 
 def strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
-    """Return a time struct based on the input data and the format string.
-
-    The format argument may either be a regular expression object compiled by
-    strptime(), or a format string.  If False is passed in for data_string
-    then the re object calculated for format will be returned.  The re object
-    must be used with the same locale as was used to compile the re object.
-    """
-    locale_time = LocaleTime()
-    if isinstance(format, RegexpType):
-        if format.pattern.find(locale_time.lang) == -1:
-            raise TypeError("re object not created with same language as "
-                            "LocaleTime instance")
-        else:
-            compiled_re = format
-    else:
-        compiled_re = TimeRE(locale_time).compile(format)
-    if data_string is False:
-        return compiled_re
-    else:
-        found = compiled_re.match(data_string)
-        if not found:
-            raise ValueError("time data did not match format")
-        year = month = day = hour = minute = second = weekday = julian = tz =-1
-        found_dict = found.groupdict()
-        for group_key in found_dict.iterkeys():
-            if group_key == 'y':
-                year = int("%s%s" %
-                           (time.strftime("%Y")[:-2], found_dict['y']))
-            elif group_key == 'Y':
-                year = int(found_dict['Y'])
-            elif group_key == 'm':
-                month = int(found_dict['m'])
-            elif group_key == 'B':
-                month = _insensitiveindex(locale_time.f_month, found_dict['B'])
-            elif group_key == 'b':
-                month = _insensitiveindex(locale_time.a_month, found_dict['b'])
-            elif group_key == 'd':
-                day = int(found_dict['d'])
-            elif group_key is 'H':
-                hour = int(found_dict['H'])
-            elif group_key == 'I':
-                hour = int(found_dict['I'])
-                ampm = found_dict.get('p', '').lower()
-                # If there was no AM/PM indicator, we'll treat this like AM
-                if ampm in ('', locale_time.am_pm[0].lower()):
-                    # We're in AM so the hour is correct unless we're
-                    # looking at 12 midnight.
-                    # 12 midnight == 12 AM == hour 0
-                    if hour == 12:
-                        hour = 0
-                elif ampm == locale_time.am_pm[1].lower():
-                    # We're in PM so we need to add 12 to the hour unless
-                    # we're looking at 12 noon.
-                    # 12 noon == 12 PM == hour 12
-                    if hour != 12:
-                        hour += 12
-            elif group_key == 'M':
-                minute = int(found_dict['M'])
-            elif group_key == 'S':
-                second = int(found_dict['S'])
-            elif group_key == 'A':
-                weekday = _insensitiveindex(locale_time.f_weekday,
-                                            found_dict['A'])
-            elif group_key == 'a':
-                weekday = _insensitiveindex(locale_time.a_weekday,
-                                            found_dict['a'])
-            elif group_key == 'w':
-                weekday = int(found_dict['w'])
-                if weekday == 0:
-                    weekday = 6
-                else:
-                    weekday -= 1
-            elif group_key == 'j':
-                julian = int(found_dict['j'])
-            elif group_key == 'Z':
-                found_zone = found_dict['Z'].lower()
-                if locale_time.timezone[0] == locale_time.timezone[1]:
-                    pass #Deals with bad locale setup where timezone info is
-                         # the same; first found on FreeBSD 4.4 -current
-                elif locale_time.timezone[0].lower() == found_zone:
-                    tz = 0
-                elif locale_time.timezone[1].lower() == found_zone:
-                    tz = 1
-                elif locale_time.timezone[2].lower() == found_zone:
-                    tz = 0
-        #XXX <bc>: If calculating fxns are never exposed to the general
-        #          populous then just inline calculations.
-        if julian == -1 and year != -1 and month != -1 and day != -1:
+    """Return a time struct based on the input data and the format string."""
+    global _locale_cache
+    global _regex_cache
+    locale_time = _locale_cache.locale_time
+    # If the language changes, caches are invalidated, so clear them
+    if locale_time.lang != _getlang():
+        _locale_cache = TimeRE()
+        _regex_cache.clear()
+    format_regex = _regex_cache.get(format)
+    if not format_regex:
+        # Limit regex cache size to prevent major bloating of the module;
+        # The value 5 is arbitrary
+        if len(_regex_cache) > 5:
+            _regex_cache.clear()
+        format_regex = _locale_cache.compile(format)
+        _regex_cache[format] = format_regex
+    found = format_regex.match(data_string)
+    if not found:
+        raise ValueError("time data did not match format")
+    year = 1900
+    month = day = 1
+    hour = minute = second = 0
+    tz = -1
+    # Defaulted to -1 so as to signal using functions to calc values
+    weekday = julian = -1
+    found_dict = found.groupdict()
+    for group_key in found_dict.iterkeys():
+        if group_key == 'y':
+            year = int(found_dict['y'])
+            # Open Group specification for strptime() states that a %y
+            #value in the range of [00, 68] is in the century 2000, while
+            #[69,99] is in the century 1900
+            if year <= 68:
+                year += 2000
+            else:
+                year += 1900
+        elif group_key == 'Y':
+            year = int(found_dict['Y'])
+        elif group_key == 'm':
+            month = int(found_dict['m'])
+        elif group_key == 'B':
+            month = _insensitiveindex(locale_time.f_month, found_dict['B'])
+        elif group_key == 'b':
+            month = _insensitiveindex(locale_time.a_month, found_dict['b'])
+        elif group_key == 'd':
+            day = int(found_dict['d'])
+        elif group_key is 'H':
+            hour = int(found_dict['H'])
+        elif group_key == 'I':
+            hour = int(found_dict['I'])
+            ampm = found_dict.get('p', '').lower()
+            # If there was no AM/PM indicator, we'll treat this like AM
+            if ampm in ('', locale_time.am_pm[0].lower()):
+                # We're in AM so the hour is correct unless we're
+                # looking at 12 midnight.
+                # 12 midnight == 12 AM == hour 0
+                if hour == 12:
+                    hour = 0
+            elif ampm == locale_time.am_pm[1].lower():
+                # We're in PM so we need to add 12 to the hour unless
+                # we're looking at 12 noon.
+                # 12 noon == 12 PM == hour 12
+                if hour != 12:
+                    hour += 12
+        elif group_key == 'M':
+            minute = int(found_dict['M'])
+        elif group_key == 'S':
+            second = int(found_dict['S'])
+        elif group_key == 'A':
+            weekday = _insensitiveindex(locale_time.f_weekday,
+                                        found_dict['A'])
+        elif group_key == 'a':
+            weekday = _insensitiveindex(locale_time.a_weekday,
+                                        found_dict['a'])
+        elif group_key == 'w':
+            weekday = int(found_dict['w'])
+            if weekday == 0:
+                weekday = 6
+            else:
+                weekday -= 1
+        elif group_key == 'j':
+            julian = int(found_dict['j'])
+        elif group_key == 'Z':
+            found_zone = found_dict['Z'].lower()
+            if locale_time.timezone[0] == locale_time.timezone[1]:
+                pass #Deals with bad locale setup where timezone info is
+                     # the same; first found on FreeBSD 4.4.
+            elif locale_time.timezone[0].lower() == found_zone:
+                tz = 0
+            elif locale_time.timezone[1].lower() == found_zone:
+                tz = 1
+            elif locale_time.timezone[2].lower() == found_zone:
+                tz = -1
+    #XXX <bc>: If calculating fxns are never exposed to the general
+    #populous then just inline calculations.  Also might be able to use
+    #``datetime`` and the methods it provides.
+    if julian == -1:
             julian = julianday(year, month, day)
-        if (month == -1 or day == -1) and julian != -1 and year != -1:
+    else:  # Assuming that if they bothered to include Julian day it will
+           #be accurate
             year, month, day = gregorian(julian, year)
-        if weekday == -1 and year != -1 and month != -1 and day != -1:
+    if weekday == -1:
             weekday = dayofweek(year, month, day)
-        return time.struct_time(
-            (year,month,day,hour,minute,second,weekday, julian,tz))
+    return time.struct_time((year, month, day,
+                             hour, minute, second,
+                             weekday, julian, tz))
 
 def _insensitiveindex(lst, findme):
     # Perform a case-insensitive index search.
@@ -555,3 +573,4 @@
         return 6
     else:
         return weekday-1
+