[Zope-Checkins] CVS: Zope/lib/python/docutils/parsers/rst - roman.py:1.1.2.1 __init__.py:1.2.10.1 states.py:1.2.10.1 tableparser.py:1.2.10.1

Chris McDonough chrism@zope.com
Mon, 21 Jul 2003 12:39:12 -0400


Update of /cvs-repository/Zope/lib/python/docutils/parsers/rst
In directory cvs.zope.org:/tmp/cvs-serv17213/lib/python/docutils/parsers/rst

Modified Files:
      Tag: Zope-2_7-branch
	__init__.py states.py tableparser.py 
Added Files:
      Tag: Zope-2_7-branch
	roman.py 
Log Message:
Merge changes from HEAD since the release of Zope 2.7a1 into the Zope-2_7-branch in preparation for release of Zope 2.7b1.


=== Added File Zope/lib/python/docutils/parsers/rst/roman.py ===
"""Convert to and from Roman numerals"""

__author__ = "Mark Pilgrim (f8dy@diveintopython.org)"
__version__ = "1.4"
__date__ = "8 August 2001"
__copyright__ = """Copyright (c) 2001 Mark Pilgrim

This program is part of "Dive Into Python", a free Python tutorial for
experienced programmers.  Visit http://diveintopython.org/ for the
latest version.

This program is free software; you can redistribute it and/or modify
it under the terms of the Python 2.1.1 license, available at
http://www.python.org/2.1.1/license.html
"""

import re

#Define exceptions
class RomanError(Exception): pass
class OutOfRangeError(RomanError): pass
class NotIntegerError(RomanError): pass
class InvalidRomanNumeralError(RomanError): pass

#Define digit mapping
romanNumeralMap = (('M',  1000),
                   ('CM', 900),
                   ('D',  500),
                   ('CD', 400),
                   ('C',  100),
                   ('XC', 90),
                   ('L',  50),
                   ('XL', 40),
                   ('X',  10),
                   ('IX', 9),
                   ('V',  5),
                   ('IV', 4),
                   ('I',  1))

def toRoman(n):
    """convert integer to Roman numeral"""
    if not (0 < n < 5000):
        raise OutOfRangeError, "number out of range (must be 1..4999)"
    if int(n) <> n:
        raise NotIntegerError, "decimals can not be converted"

    result = ""
    for numeral, integer in romanNumeralMap:
        while n >= integer:
            result += numeral
            n -= integer
    return result

#Define pattern to detect valid Roman numerals
romanNumeralPattern = re.compile('''
    ^                   # beginning of string
    M{0,4}              # thousands - 0 to 4 M's
    (CM|CD|D?C{0,3})    # hundreds - 900 (CM), 400 (CD), 0-300 (0 to 3 C's),
                        #            or 500-800 (D, followed by 0 to 3 C's)
    (XC|XL|L?X{0,3})    # tens - 90 (XC), 40 (XL), 0-30 (0 to 3 X's),
                        #        or 50-80 (L, followed by 0 to 3 X's)
    (IX|IV|V?I{0,3})    # ones - 9 (IX), 4 (IV), 0-3 (0 to 3 I's),
                        #        or 5-8 (V, followed by 0 to 3 I's)
    $                   # end of string
    ''' ,re.VERBOSE)

def fromRoman(s):
    """convert Roman numeral to integer"""
    if not s:
        raise InvalidRomanNumeralError, 'Input can not be blank'
    if not romanNumeralPattern.search(s):
        raise InvalidRomanNumeralError, 'Invalid Roman numeral: %s' % s

    result = 0
    index = 0
    for numeral, integer in romanNumeralMap:
        while s[index:index+len(numeral)] == numeral:
            result += integer
            index += len(numeral)
    return result



=== Zope/lib/python/docutils/parsers/rst/__init__.py 1.2 => 1.2.10.1 ===
--- Zope/lib/python/docutils/parsers/rst/__init__.py:1.2	Sat Feb  1 04:26:07 2003
+++ Zope/lib/python/docutils/parsers/rst/__init__.py	Mon Jul 21 12:38:05 2003
@@ -95,7 +95,10 @@
           {'action': 'store_true'}),
          ('Set number of spaces for tab expansion (default 8).',
           ['--tab-width'],
-          {'metavar': '<width>', 'type': 'int', 'default': 8}),))
+          {'metavar': '<width>', 'type': 'int', 'default': 8}),
+         ('Remove spaces before footnote references.',
+          ['--trim-footnote-reference-space'],
+          {'action': 'store_true'}),))
 
     def __init__(self, rfc2822=None, inliner=None):
         if rfc2822:


=== Zope/lib/python/docutils/parsers/rst/states.py 1.2 => 1.2.10.1 ===
--- Zope/lib/python/docutils/parsers/rst/states.py:1.2	Sat Feb  1 04:26:07 2003
+++ Zope/lib/python/docutils/parsers/rst/states.py	Mon Jul 21 12:38:05 2003
@@ -107,11 +107,12 @@
 
 import sys
 import re
+import roman
 from types import TupleType
-from docutils import nodes, statemachine, utils, roman, urischemes
+from docutils import nodes, statemachine, utils, urischemes
 from docutils import ApplicationError, DataError
 from docutils.statemachine import StateMachineWS, StateWS
-from docutils.utils import normalize_name
+from docutils.nodes import fully_normalize_name as normalize_name
 from docutils.parsers.rst import directives, languages, tableparser
 from docutils.parsers.rst.languages import en as _fallback_language_module
 
@@ -159,6 +160,7 @@
                            language=self.language,
                            title_styles=[],
                            section_level=0,
+                           section_bubble_up_kludge=0,
                            inliner=inliner)
         self.document = document
         self.attach_observer(document.note_source)
@@ -271,8 +273,10 @@
                           node=node, match_titles=match_titles)
         state_machine.unlink()
         new_offset = state_machine.abs_line_offset()
-        # Adjustment for block if modified in nested parse:
-        self.state_machine.next_line(len(block) - block_length)
+        # No `block.parent` implies disconnected -- lines aren't in sync:
+        if block.parent:
+            # Adjustment for block if modified in nested parse:
+            self.state_machine.next_line(len(block) - block_length)
         return new_offset
 
     def nested_list_parse(self, block, input_offset, node, initial_state,
@@ -340,6 +344,8 @@
                 return None
         if level <= mylevel:            # sibling or supersection
             memo.section_level = level   # bubble up to parent section
+            if len(style) == 2:
+                memo.section_bubble_up_kludge = 1
             # back up 2 lines for underline title, 3 for overline title
             self.state_machine.previous_line(len(style) + 1)
             raise EOFError              # let parent section re-evaluate
@@ -471,13 +477,15 @@
 
     _interpreted_roles = {
         # Values of ``None`` mean "not implemented yet":
-        'title-reference': 'title_reference_role',
-        'abbreviation': None,
-        'acronym': None,
+        'title-reference': 'generic_interpreted_role',
+        'abbreviation': 'generic_interpreted_role',
+        'acronym': 'generic_interpreted_role',
         'index': None,
-        'emphasis': None,
-        'strong': None,
-        'literal': None,
+        'subscript': 'generic_interpreted_role',
+        'superscript': 'generic_interpreted_role',
+        'emphasis': 'generic_interpreted_role',
+        'strong': 'generic_interpreted_role',
+        'literal': 'generic_interpreted_role',
         'named-reference': None,
         'anonymous-reference': None,
         'uri-reference': None,
@@ -487,7 +495,7 @@
         'citation-reference': None,
         'substitution-reference': None,
         'target': None,
-        }
+        'restructuredtext-unimplemented-role': None}
     """Mapping of canonical interpreted text role name to method name.
     Initializes a name to bound-method mapping in `__init__`."""
 
@@ -495,6 +503,18 @@
     """The role to use when no explicit role is given.
     Override in subclasses."""
 
+    generic_roles = {'abbreviation': nodes.abbreviation,
+                     'acronym': nodes.acronym,
+                     'emphasis': nodes.emphasis,
+                     'literal': nodes.literal,
+                     'strong': nodes.strong,
+                     'subscript': nodes.subscript,
+                     'superscript': nodes.superscript,
+                     'title-reference': nodes.title_reference,}
+    """Mapping of canonical interpreted text role name to node class.
+    Used by the `generic_interpreted_role` method for simple, straightforward
+    roles (simple wrapping; no extra processing)."""
+
     def __init__(self, roles=None):
         """
         `roles` is a mapping of canonical role name to role function or bound
@@ -872,9 +892,11 @@
             return uri
 
     def interpreted(self, before, after, rawsource, text, role, lineno):
-        role_function, messages = self.get_role_function(role, lineno)
+        role_function, canonical, messages = self.get_role_function(role,
+                                                                    lineno)
         if role_function:
-            nodelist, messages2 = role_function(role, rawsource, text, lineno)
+            nodelist, messages2 = role_function(canonical, rawsource, text,
+                                                lineno)
             messages.extend(messages2)
             return before, nodelist, after, messages
         else:
@@ -885,34 +907,34 @@
         msg_text = []
         if role:
             name = role.lower()
-            canonical = None
-            try:
-                canonical = self.language.roles[name]
-            except AttributeError, error:
-                msg_text.append('Problem retrieving role entry from language '
-                                'module %r: %s.' % (self.language, error))
-            except KeyError:
-                msg_text.append('No role entry for "%s" in module "%s".'
-                                % (role, self.language.__name__))
-            if not canonical:
-                try:
-                    canonical = _fallback_language_module.roles[name]
-                    msg_text.append('Using English fallback for role "%s".'
-                                    % role)
-                except KeyError:
-                    msg_text.append('Trying "%s" as canonical role name.'
-                                    % role)
-                    # Should be an English name, but just in case:
-                    canonical = name
-            if msg_text:
-                message = self.reporter.info('\n'.join(msg_text), line=lineno)
-                messages.append(message)
+        else:
+            name = self.default_interpreted_role
+        canonical = None
+        try:
+            canonical = self.language.roles[name]
+        except AttributeError, error:
+            msg_text.append('Problem retrieving role entry from language '
+                            'module %r: %s.' % (self.language, error))
+        except KeyError:
+            msg_text.append('No role entry for "%s" in module "%s".'
+                            % (name, self.language.__name__))
+        if not canonical:
             try:
-                return self.interpreted_roles[canonical], messages
+                canonical = _fallback_language_module.roles[name]
+                msg_text.append('Using English fallback for role "%s".'
+                                % name)
             except KeyError:
-                raise UnknownInterpretedRoleError(messages)
-        else:
-            return self.interpreted_roles[self.default_interpreted_role], []
+                msg_text.append('Trying "%s" as canonical role name.'
+                                % name)
+                # Should be an English name, but just in case:
+                canonical = name
+        if msg_text:
+            message = self.reporter.info('\n'.join(msg_text), line=lineno)
+            messages.append(message)
+        try:
+            return self.interpreted_roles[canonical], canonical, messages
+        except KeyError:
+            raise UnknownInterpretedRoleError(messages)
 
     def literal(self, match, lineno):
         before, inlines, remaining, sysmessages, endstring = self.inline_obj(
@@ -936,26 +958,22 @@
               match, lineno, self.patterns.substitution_ref,
               nodes.substitution_reference)
         if len(inlines) == 1:
-            subrefnode = inlines[0]
-            if isinstance(subrefnode, nodes.substitution_reference):
-                subreftext = subrefnode.astext()
-                refname = normalize_name(subreftext)
-                subrefnode['refname'] = refname
-                self.document.note_substitution_ref(
-                      subrefnode)
+            subref_node = inlines[0]
+            if isinstance(subref_node, nodes.substitution_reference):
+                subref_text = subref_node.astext()
+                self.document.note_substitution_ref(subref_node, subref_text)
                 if endstring[-1:] == '_':
-                    referencenode = nodes.reference(
-                          '|%s%s' % (subreftext, endstring), '')
+                    reference_node = nodes.reference(
+                        '|%s%s' % (subref_text, endstring), '')
                     if endstring[-2:] == '__':
-                        referencenode['anonymous'] = 1
+                        reference_node['anonymous'] = 1
                         self.document.note_anonymous_ref(
-                              referencenode)
+                              reference_node)
                     else:
-                        referencenode['refname'] = refname
-                        self.document.note_refname(
-                              referencenode)
-                    referencenode += subrefnode
-                    inlines = [referencenode]
+                        reference_node['refname'] = normalize_name(subref_text)
+                        self.document.note_refname(reference_node)
+                    reference_node += subref_node
+                    inlines = [reference_node]
         return before, inlines, remaining, sysmessages
 
     def footnote_reference(self, match, lineno):
@@ -965,6 +983,9 @@
         """
         label = match.group('footnotelabel')
         refname = normalize_name(label)
+        string = match.string
+        before = string[:match.start('whole')]
+        remaining = string[match.end('whole'):]
         if match.group('citationlabel'):
             refnode = nodes.citation_reference('[%s]_' % label,
                                                refname=refname)
@@ -986,10 +1007,9 @@
             if refname:
                 refnode['refname'] = refname
                 self.document.note_footnote_ref(refnode)
-        string = match.string
-        matchstart = match.start('whole')
-        matchend = match.end('whole')
-        return (string[:matchstart], [refnode], string[matchend:], [])
+            if self.document.settings.trim_footnote_reference_space:
+                before = before.rstrip()
+        return (before, [refnode], remaining, [])
 
     def reference(self, match, lineno, anonymous=None):
         referencename = match.group('refname')
@@ -1084,8 +1104,15 @@
                 '_': reference,
                 '__': anonymous_reference}
 
-    def title_reference_role(self, role, rawtext, text, lineno):
-        return [nodes.title_reference(rawtext, text)], []
+    def generic_interpreted_role(self, role, rawtext, text, lineno):
+        try:
+            role_class = self.generic_roles[role]
+        except KeyError:
+            msg = self.reporter.error('Unknown interpreted text role: "%s".'
+                                      % role, line=lineno)
+            prb = self.problematic(text, text, msg)
+            return [prb], [msg]
+        return [role_class(rawtext, text)], []
 
     def pep_reference_role(self, role, rawtext, text, lineno):
         try:
@@ -1208,16 +1235,72 @@
         """Block quote."""
         indented, indent, line_offset, blank_finish = \
               self.state_machine.get_indented()
-        blockquote = self.block_quote(indented, line_offset)
+        blockquote, messages = self.block_quote(indented, line_offset)
         self.parent += blockquote
+        self.parent += messages
         if not blank_finish:
             self.parent += self.unindent_warning('Block quote')
         return context, next_state, []
 
     def block_quote(self, indented, line_offset):
+        blockquote_lines, attribution_lines, attribution_offset = \
+              self.check_attribution(indented, line_offset)
         blockquote = nodes.block_quote()
-        self.nested_parse(indented, line_offset, blockquote)
-        return blockquote
+        self.nested_parse(blockquote_lines, line_offset, blockquote)
+        messages = []
+        if attribution_lines:
+            attribution, messages = self.parse_attribution(attribution_lines,
+                                                           attribution_offset)
+            blockquote += attribution
+        return blockquote, messages
+
+    attribution_pattern = re.compile(r'--(?![-\n]) *(?=[^ \n])')
+
+    def check_attribution(self, indented, line_offset):
+        """
+        Check for an attribution in the last contiguous block of `indented`.
+
+        * First line after last blank line must begin with "--" (etc.).
+        * Every line after that must have consistent indentation.
+
+        Return a 3-tuple: (block quote lines, attribution lines,
+        attribution offset).
+        """
+        blank = None
+        nonblank_seen = None
+        indent = 0
+        for i in range(len(indented) - 1, 0, -1): # don't check first line
+            this_line_blank = not indented[i].strip()
+            if nonblank_seen and this_line_blank:
+                match = self.attribution_pattern.match(indented[i + 1])
+                if match:
+                    blank = i
+                break
+            elif not this_line_blank:
+                nonblank_seen = 1
+        if blank and len(indented) - blank > 2: # multi-line attribution
+            indent = (len(indented[blank + 2])
+                      - len(indented[blank + 2].lstrip()))
+            for j in range(blank + 3, len(indented)):
+                if indent != (len(indented[j])
+                              - len(indented[j].lstrip())): # bad shape
+                    blank = None
+                    break
+        if blank:
+            a_lines = indented[blank + 1:]
+            a_lines.trim_left(match.end(), end=1)
+            a_lines.trim_left(indent, start=1)
+            return (indented[:blank], a_lines, line_offset + blank + 1)
+        else:
+            return (indented, None, None)
+
+    def parse_attribution(self, indented, line_offset):
+        text = '\n'.join(indented).rstrip()
+        lineno = self.state_machine.abs_line_number() + line_offset
+        textnodes, messages = self.inline_text(text, lineno)
+        node = nodes.attribution(text, '', *textnodes)
+        node.line = lineno
+        return node, messages
 
     def bullet(self, match, context, next_state):
         """Bullet list item."""
@@ -1436,8 +1519,9 @@
             self.parent += msg
             indented, indent, line_offset, blank_finish = \
                   self.state_machine.get_first_known_indented(match.end())
-            blockquote = self.block_quote(indented, line_offset)
+            blockquote, messages = self.block_quote(indented, line_offset)
             self.parent += blockquote
+            self.parent += messages
             if not blank_finish:
                 self.parent += self.unindent_warning('Option list')
             return [], next_state, []
@@ -1689,6 +1773,7 @@
                               (?P=quote)      # close quote if open quote used
                             )
                             %(non_whitespace_escape_before)s
+                            [ ]?            # optional space
                             :               # end of reference name
                             ([ ]+|$)        # followed by whitespace
                             """ % vars(Inliner), re.VERBOSE),
@@ -1864,34 +1949,31 @@
         while block and not block[-1].strip():
             block.pop()
         subname = subdefmatch.group('name')
-        name = normalize_name(subname)
-        substitutionnode = nodes.substitution_definition(
-              blocktext, name=name, alt=subname)
-        substitutionnode.line = lineno
+        substitution_node = nodes.substitution_definition(blocktext)
+        substitution_node.line = lineno
+        self.document.note_substitution_def(
+            substitution_node,subname, self.parent)
         if block:
             block[0] = block[0].strip()
             new_abs_offset, blank_finish = self.nested_list_parse(
-                  block, input_offset=offset, node=substitutionnode,
+                  block, input_offset=offset, node=substitution_node,
                   initial_state='SubstitutionDef', blank_finish=blank_finish)
             i = 0
-            for node in substitutionnode[:]:
+            for node in substitution_node[:]:
                 if not (isinstance(node, nodes.Inline) or
                         isinstance(node, nodes.Text)):
-                    self.parent += substitutionnode[i]
-                    del substitutionnode[i]
+                    self.parent += substitution_node[i]
+                    del substitution_node[i]
                 else:
                     i += 1
-            if len(substitutionnode) == 0:
+            if len(substitution_node) == 0:
                 msg = self.reporter.warning(
                       'Substitution definition "%s" empty or invalid.'
                       % subname,
                       nodes.literal_block(blocktext, blocktext), line=lineno)
                 return [msg], blank_finish
             else:
-                del substitutionnode['alt']
-                self.document.note_substitution_def(
-                      substitutionnode, self.parent)
-                return [substitutionnode], blank_finish
+                return [substitution_node], blank_finish
         else:
             msg = self.reporter.warning(
                   'Substitution definition "%s" missing contents.' % subname,
@@ -2112,6 +2194,7 @@
            re.compile(r"""
                       \.\.[ ]+          # explicit markup start
                       (%s)              # directive name
+                      [ ]?              # optional space
                       ::                # directive delimiter
                       ([ ]+|$)          # whitespace or end of line
                       """ % Inliner.simplename, re.VERBOSE | re.UNICODE))]
@@ -2147,7 +2230,8 @@
               self.state_machine.input_lines[offset:],
               input_offset=self.state_machine.abs_line_offset() + 1,
               node=self.parent, initial_state='Explicit',
-              blank_finish=blank_finish)
+              blank_finish=blank_finish,
+              match_titles=self.state_machine.match_titles)
         self.goto_line(newline_offset)
         if not blank_finish:
             self.parent += self.unindent_warning('Explicit markup')
@@ -2452,11 +2536,8 @@
     initial_transitions = ['embedded_directive', 'text']
 
     def embedded_directive(self, match, context, next_state):
-        if self.parent.has_key('alt'):
-            option_presets = {'alt': self.parent['alt']}
-        else:
-            option_presets = {}
-        nodelist, blank_finish = self.directive(match, **option_presets)
+        nodelist, blank_finish = self.directive(match,
+                                                alt=self.parent['name'])
         self.parent += nodelist
         if not self.state_machine.at_eof():
             self.blank_finish = blank_finish
@@ -2591,8 +2672,9 @@
               self.state_machine.get_indented()
         definitionlistitem = nodes.definition_list_item(
             '\n'.join(termline + list(indented)))
-        termlist, messages = self.term(
-              termline, self.state_machine.abs_line_number() - 1)
+        lineno = self.state_machine.abs_line_number() - 1
+        definitionlistitem.line = lineno
+        termlist, messages = self.term(termline, lineno)
         definitionlistitem += termlist
         definition = nodes.definition('', *messages)
         definitionlistitem += definition
@@ -2678,7 +2760,9 @@
     def eof(self, context):
         """Transition marker at end of section or document."""
         marker = context[0].strip()
-        if len(marker) < 4:
+        if self.memo.section_bubble_up_kludge:
+            self.memo.section_bubble_up_kludge = 0
+        elif len(marker) < 4:
             self.state_correction(context)
         if self.eofcheck:               # ignore EOFError with sections
             lineno = self.state_machine.abs_line_number() - 1
@@ -2741,7 +2825,7 @@
                 self.short_overline(context, blocktext, lineno, 2)
             else:
                 msg = self.reporter.severe(
-                    'Missing underline for overline.',
+                    'Missing matching underline for section title overline.',
                     nodes.literal_block(source, source), line=lineno)
                 self.parent += msg
                 return [], 'Body', []
@@ -2819,8 +2903,13 @@
         start = found + 2               # skip character after escape
 
 def unescape(text, restore_backslashes=0):
-    """Return a string with nulls removed or restored to backslashes."""
+    """
+    Return a string with nulls removed or restored to backslashes.
+    Backslash-escaped spaces are also removed.
+    """
     if restore_backslashes:
         return text.replace('\x00', '\\')
     else:
-        return ''.join(text.split('\x00'))
+        for sep in ['\x00 ', '\x00\n', '\x00']:
+            text = ''.join(text.split(sep))
+        return text


=== Zope/lib/python/docutils/parsers/rst/tableparser.py 1.2 => 1.2.10.1 ===
--- Zope/lib/python/docutils/parsers/rst/tableparser.py:1.2	Sat Feb  1 04:26:07 2003
+++ Zope/lib/python/docutils/parsers/rst/tableparser.py	Mon Jul 21 12:38:06 2003
@@ -131,7 +131,8 @@
     head_body_separator_pat = re.compile(r'\+=[=+]+=\+ *$')
 
     def setup(self, block):
-        self.block = list(block)        # make a copy; it may be modified
+        self.block = block[:]           # make a copy; it may be modified
+        self.block.disconnect()         # don't propagate changes to parent
         self.bottom = len(block) - 1
         self.right = len(block[0]) - 1
         self.head_body_sep = None
@@ -165,7 +166,9 @@
             update_dict_of_lists(self.rowseps, rowseps)
             update_dict_of_lists(self.colseps, colseps)
             self.mark_done(top, left, bottom, right)
-            cellblock = self.get_cell_block(top, left, bottom, right)
+            cellblock = self.block.get_2D_block(top + 1, left + 1,
+                                                bottom, right)
+            cellblock.disconnect()      # lines in cell can't sync with parent
             self.cells.append((top, left, bottom, right, cellblock))
             corners.extend([(top, right), (bottom, left)])
             corners.sort()
@@ -188,19 +191,6 @@
                 return None
         return 1
 
-    def get_cell_block(self, top, left, bottom, right):
-        """Given the corners, extract the text of a cell."""
-        cellblock = []
-        margin = right
-        for lineno in range(top + 1, bottom):
-            line = self.block[lineno][left + 1 : right].rstrip()
-            cellblock.append(line)
-            if line:
-                margin = min(margin, len(line) - len(line.lstrip()))
-        if 0 < margin < right:
-            cellblock = [line[margin:] for line in cellblock]
-        return cellblock
-
     def scan_cell(self, top, left):
         """Starting at the top-left corner, start tracing out a cell."""
         assert self.block[top][left] == '+'
@@ -278,7 +268,7 @@
 
     def structure_from_cells(self):
         """
-        From the data colledted by `scan_cell()`, convert to the final data
+        From the data collected by `scan_cell()`, convert to the final data
         structure.
         """
         rowseps = self.rowseps.keys()   # list of row boundaries
@@ -371,7 +361,8 @@
     span_pat = re.compile('-[ -]*$')
 
     def setup(self, block):
-        self.block = list(block)        # make a copy; it will be modified
+        self.block = block[:]           # make a copy; it will be modified
+        self.block.disconnect()         # don't propagate changes to parent
         # Convert top & bottom borders to column span underlines:
         self.block[0] = self.block[0].replace('=', '-')
         self.block[-1] = self.block[-1].replace('=', '-')
@@ -394,25 +385,26 @@
         self.columns = self.parse_columns(self.block[0], 0)
         self.border_end = self.columns[-1][1]
         firststart, firstend = self.columns[0]
-        block = self.block[1:]
-        offset = 0
-        # Container for accumulating text lines until a row is complete:
-        rowlines = []
-        while block:
-            line = block.pop(0)
-            offset += 1
+        offset = 1                      # skip top border
+        start = 1
+        text_found = None
+        while offset < len(self.block):
+            line = self.block[offset]
             if self.span_pat.match(line):
                 # Column span underline or border; row is complete.
-                self.parse_row(rowlines, (line.rstrip(), offset))
-                rowlines = []
+                self.parse_row(self.block[start:offset], start,
+                               (line.rstrip(), offset))
+                start = offset + 1
+                text_found = None
             elif line[firststart:firstend].strip():
                 # First column not blank, therefore it's a new row.
-                if rowlines:
-                    self.parse_row(rowlines)
-                rowlines = [(line.rstrip(), offset)]
-            else:
-                # Accumulate lines of incomplete row.
-                rowlines.append((line.rstrip(), offset))
+                if text_found and offset != start:
+                    self.parse_row(self.block[start:offset], start)
+                start = offset
+                text_found = 1
+            elif not text_found:
+                start = offset + 1
+            offset += 1
 
     def parse_columns(self, line, offset):
         """
@@ -448,12 +440,12 @@
                     morecols += 1
             except (AssertionError, IndexError):
                 raise TableMarkupError('Column span alignment problem at '
-                                       'line offset %s.' % offset)
-            cells.append((0, morecols, offset, []))
+                                       'line offset %s.' % (offset + 1))
+            cells.append([0, morecols, offset, []])
             i += 1
         return cells
 
-    def parse_row(self, lines, spanline=None):
+    def parse_row(self, lines, start, spanline=None):
         """
         Given the text `lines` of a row, parse it and append to `self.table`.
 
@@ -462,20 +454,30 @@
         text from each line, and check for text in column margins.  Finally,
         adjust for insigificant whitespace.
         """
-        while lines and not lines[-1][0]:
-            lines.pop()                 # Remove blank trailing lines.
-        if lines:
-            offset = lines[0][1]
-        elif spanline:
-            offset = spanline[1]
-        else:
+        if not (lines or spanline):
             # No new row, just blank lines.
             return
         if spanline:
             columns = self.parse_columns(*spanline)
+            span_offset = spanline[1]
         else:
             columns = self.columns[:]
-        row = self.init_row(columns, offset)
+            span_offset = start
+        self.check_columns(lines, start, columns)
+        row = self.init_row(columns, start)
+        for i in range(len(columns)):
+            start, end = columns[i]
+            cellblock = lines.get_2D_block(0, start, len(lines), end)
+            cellblock.disconnect()      # lines in cell can't sync with parent
+            row[i][3] = cellblock
+        self.table.append(row)
+
+    def check_columns(self, lines, first_line, columns):
+        """
+        Check for text in column margins and text overflow in the last column.
+        Raise TableMarkupError if anything but whitespace is in column margins.
+        Adjust the end value for the last column if there is text overflow.
+        """
         # "Infinite" value for a dummy last column's beginning, used to
         # check for text overflow:
         columns.append((sys.maxint, None))
@@ -483,30 +485,20 @@
         for i in range(len(columns) - 1):
             start, end = columns[i]
             nextstart = columns[i+1][0]
-            block = []
-            margin = sys.maxint
-            for line, offset in lines:
+            offset = 0
+            for line in lines:
                 if i == lastcol and line[end:].strip():
                     text = line[start:].rstrip()
-                    columns[lastcol] = (start, start + len(text))
-                    self.adjust_last_column(start + len(text))
+                    new_end = start + len(text)
+                    columns[i] = (start, new_end)
+                    main_start, main_end = self.columns[-1]
+                    if new_end > main_end:
+                        self.columns[-1] = (main_start, new_end)
                 elif line[end:nextstart].strip():
                     raise TableMarkupError('Text in column margin at line '
-                                           'offset %s.' % offset)
-                else:
-                    text = line[start:end].rstrip()
-                block.append(text)
-                if text:
-                    margin = min(margin, len(text) - len(text.lstrip()))
-            if 0 < margin < sys.maxint:
-                block = [line[margin:] for line in block]
-            row[i][3].extend(block)
-        self.table.append(row)
-
-    def adjust_last_column(self, new_end):
-        start, end = self.columns[-1]
-        if new_end > end:
-            self.columns[-1] = (start, new_end)
+                                           'offset %s.' % (first_line + offset))
+                offset += 1
+        columns.pop()
 
     def structure_from_cells(self):
         colspecs = [end - start for start, end in self.columns]