// Copyright 2010 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package html import ( "bytes" "os" "strings" "testing" ) type tokenTest struct { // A short description of the test case. desc string // The HTML to parse. html string // The string representations of the expected tokens, joined by '$'. golden string } var tokenTests = []tokenTest{ // A single text node. The tokenizer should not break text nodes on whitespace, // nor should it normalize whitespace within a text node. { "text", "foo bar", "foo bar", }, // An entity. { "entity", "one < two", "one < two", }, // A start, self-closing and end tag. The tokenizer does not care if the start // and end tokens don't match; that is the job of the parser. { "tags", "bd", "$b$$d$", }, // Some malformed tags that are missing a '>'. { "malformed tag #0", ``, ``, }, { "malformed tag #1", `

`, `

`, }, { "malformed tag #2", `

`, `

`, }, { "malformed tag #3", `

`, }, { "malformed tag #4", `

`, `

`, }, // Comments. { "comment0", "abcdef", "abc$$$def", }, { "comment1", "az", "a$z", }, { "comment2", "az", "a$z", }, { "comment3", "az", "a$z", }, { "comment4", "az", "a$z", }, { "comment5", "az", "a$<!>z", }, { "comment6", "az", "a$<!->z", }, { "comment7", "a