Parse HTML Token by token



examples/parse-html-token/parse_html_token.go
package main

import (
    "fmt"
    "io"
    "log"
    "strings"

    "golang.org/x/net/html"
)

func main() {
    body := `<html>
    <body>
        <h1>Main title</h1>
        <a href="https://code-maven.com/">Code Maven</a>
    </body>
    </html>`

    reader := strings.NewReader(body)
    tokenizer := html.NewTokenizer(reader)
    for {
        tt := tokenizer.Next()
        if tt == html.ErrorToken {
            if tokenizer.Err() == io.EOF {
                return
            }
            log.Printf("Error: %v", tokenizer.Err())
            return
        }
        fmt.Printf("Token: %v\n", tokenizer.Token())
    }
}

examples/parse-html-token/parse_html_token.out
Token: <html>
Token: 
	
Token: <body>
Token: 
		
Token: <h1>
Token: Main title
Token: </h1>
Token: 
		
Token: <a href="https://code-maven.com/">
Token: Code Maven
Token: </a>
Token: 
	
Token: </body>
Token: 
	
Token: </html>