练习5.1
修改findlinks代码中遍历n.FirstChild链表的部分,将循环调用visit,改成递归调用。
go
func visit(links []string, n *html.Node) []string {
if n == nil {
return links
}
if n.Type == html.ElementNode && n.Data == "a" {
for _, a := range n.Attr {
if a.Key == "href" {
links = append(links, a.Val)
}
}
}
links = visit(links, n.NextSibling)
links = visit(links, n.FirstChild)
return links
}
练习5.2
编写函数,记录在HTML树中出现的同名元素的次数。
go
package main
import (
"fmt"
"os"
"golang.org/x/net/html"
)
type NodeCount map[string]int
func main() {
doc, err := html.Parse(os.Stdin)
if err != nil {
fmt.Fprintf(os.Stderr, "findlinks1: %v\n", err)
os.Exit(1)
}
nodeCount := NodeCount{}
fill(&nodeCount, doc)
fmt.Printf("%v", nodeCount)
}
func fill(nc *NodeCount, cn *html.Node) {
if cn.Type == html.ElementNode {
(*nc)[cn.Data]++
}
for next := cn.FirstChild; next != nil; next = next.NextSibling {
fill(nc, next)
}
}
练习5.3
编写函数输出所有text结点的内容。注意不要访问<script>和<style>元素,因为这些元素对浏览者是不可见的。
go
func getText(texts []string, n *html.Node) []string {
if n.Type == html.TextNode {
texts = append(texts, n.Data)
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Data == "script" || c.Data == "style" {
continue
}
texts = getText(texts, c)
}
return texts
}
练习5.4
扩展visit函数,使其能够处理其他类型的结点,如images、scripts和style sheets。
go
func visit(links []string, n *html.Node) []string {
if n.Type == html.ElementNode && (n.Data == "a" || n.Data == "img" || n.Data == "link" || n.Data == "scripts") {
for _, a := range n.Attr {
if a.Key == "href" {
// fmt.Println(n.Data)
links = append(links, a.Val)
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
links = visit(links, c)
}
return links
}
练习5.5
实现countWordsAndImages。(参考练习4.9如何分词)
go
func countWordsAndImages(n *html.Node) (words, images int) {
texts, images := visit(nil, 0, n)
for _, v := range texts {
words += len(strings.Split(v, " "))
v = strings.Trim(strings.TrimSpace(v), "\r\n")
if v == "" {
continue
}
fmt.Println(strings.Split(v, " "))
words += len(strings.Split(v, " "))
fmt.Println(len(strings.Split(v, " ")))
}
return
}
func visit(texts []string, imgs int, n *html.Node) ([]string, int) {
if n.Type == html.TextNode {
texts = append(texts, n.Data)
}
if n.Type == html.ElementNode && (n.Data == "img") {
imgs++
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Data == "script" || c.Data == "style" {
continue
}
texts, imgs = visit(texts, imgs, c)
}
return texts, imgs
}
练习5.6
修改gopl.io/ch3/surface(§3.2)中的corner函数,将返回值命名,并使用bare return。
go
func corner(i, j int) (sx float64, sy float64) {
// Find point (x,y) at corner of cell (i,j).
x := xyrange * (float64(i)/cells - 0.5)
y := xyrange * (float64(j)/cells - 0.5)
// Compute surface height z.
z := f(x, y)
// Project (x,y,z) isometrically onto 2-D SVG canvas (sx,sy).
sx := width/2 + (x-y)*cos30*xyscale
sy := height/2 + (x+y)*sin30*xyscale - z*zscale
return
}