[ZPT] CVS: Packages/TAL/tests - test_htmlparser.py:1.15

Fred L. Drake" <fdrake@acm.org> Fred L. Drake" <fdrake@acm.org>
Tue, 4 Sep 2001 10:53:26 -0400


Update of /cvs-repository/Packages/TAL/tests
In directory cvs.zope.org:/tmp/cvs-serv20772

Modified Files:
	test_htmlparser.py 
Log Message:
Add more tests to cover edge cases that are legal in either HTML (bare
pointy brackets & ampersands) or XHTML (hexadecimal character references),
but not both.  Also added a test for DOCTYPE declaration parsing.


=== Packages/TAL/tests/test_htmlparser.py 1.14 => 1.15 ===
         self.append(("pi", data))
 
+    def unknown_decl(self, decl):
+        self.append(("unknown decl", decl))
+
 
 class EventCollectorExtra(EventCollector):
 
@@ -117,6 +120,7 @@
 comment1b-->
 <Img sRc='Bar' isMAP>sample
 text
+&#x201C;
 <!--comment2a-- --comment2b-->
 </Html>
 """, [
@@ -131,13 +135,36 @@
     ("data", "\n"),
     ("starttag", "img", [("src", "Bar"), ("ismap", None)]),
     ("data", "sample\ntext\n"),
+    ("charref", "x201C"),
+    ("data", "\n"),
     ("comment", "comment2a-- --comment2b"),
     ("data", "\n"),
     ("endtag", "html"),
     ("data", "\n"),
     ])
 
+    def check_doctype_decl(self):
+        inside = """\
+DOCTYPE html [
+  <!ELEMENT html - O EMPTY>
+  <!ATTLIST html
+      version CDATA #IMPLIED
+      profile CDATA 'DublinCore'>
+  <!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'>
+  <!ENTITY myEntity 'internal parsed entity'>
+  <!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'>
+  <!ENTITY % paramEntity 'name|name|name'>
+  %paramEntity;
+  <!-- comment -->
+]"""
+        self._run_check("<!%s>" % inside, [
+            ("decl", inside),
+            ])
+
     def check_bad_nesting(self):
+        # Strangely, this *is* supposed to test that overlapping
+        # elements are allowed.  HTMLParser is more geared toward
+        # lexing the input that parsing the structure.
         self._run_check("<a><b></a></b>", [
             ("starttag", "a", []),
             ("starttag", "b", []),
@@ -145,6 +172,16 @@
             ("endtag", "b"),
             ])
 
+    def check_bare_ampersands(self):
+        self._run_check("this text & contains & ampersands &", [
+            ("data", "this text & contains & ampersands &"),
+            ])
+
+    def check_bare_pointy_brackets(self):
+        self._run_check("this < text > contains < bare>pointy< brackets", [
+            ("data", "this < text > contains < bare>pointy< brackets"),
+            ])
+
     def check_attr_syntax(self):
         output = [
           ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)])
@@ -174,6 +211,14 @@
             ("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),
             ])
 
+    def check_illegal_declarations(self):
+        s = 'abc<!spacer type="block" height="25">def'
+        self._run_check(s, [
+            ("data", "abc"),
+            ("unknown decl", 'spacer type="block" height="25"'),
+            ("data", "def"),
+            ])
+
     def check_starttag_end_boundary(self):
         self._run_check("""<a b='<'>""", [("starttag", "a", [("b", "<")])])
         self._run_check("""<a b='>'>""", [("starttag", "a", [("b", ">")])])
@@ -196,17 +241,12 @@
         self._run_check(["<a b='>'", ">"], output)
 
     def check_starttag_junk_chars(self):
-        self._parse_error("<")
-        self._parse_error("<>")
         self._parse_error("</>")
         self._parse_error("</$>")
         self._parse_error("</")
         self._parse_error("</a")
-        self._parse_error("</a")
         self._parse_error("<a<a>")
         self._parse_error("</a<a>")
-        self._parse_error("<$")
-        self._parse_error("<$>")
         self._parse_error("<!")
         self._parse_error("<a $>")
         self._parse_error("<a")