以前在Java中网页抓取,然后利用正则表示式抽取文本,非常方便。学习go语言时,也产生了相同的想法,就以英语查单词为例,做了个ICIBA版的英文字典。
代码如下:
<pre name="code" class="plain"><pre name="code" class="cpp">package main
import (
"bufio"
"flag"
"fmt"
"io/ioutil"
"net/http"
"os"
"regexp"
"strings"
)
var exit string = "-q" // default exit code
const (
ICIBA string = "http://www.iciba.com/"
MEANING_CN string = `<span class="label_list">\s*((<label>[^<]*</label>\s*)*)</span>`
)
func solveArgs() {
flag.Parse()
args := flag.Args()
if len(args) > 0 {
exit = args[0]
}
}
func search(keyword string) {
keyword = strings.TrimSpace(keyword)
keyword = strings.Replace(keyword, " ", "_", 0) // more than one word, the iciba join it with _
resp, err := http.Get(ICIBA + keyword)
if err != nil {
fmt.Println("Error raised when query the word, the message is " + err.Error())
} else {
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
fmt.Println("http read error")
return
}
reg, _ := regexp.Compile(MEANING_CN)
content := reg.FindSubmatch(body)
if content != nil {
reg, _ = regexp.Compile(`<(/?)label>\s*`)
val := reg.ReplaceAll(content[1], []byte(""))
fmt.Println("= " + string(val))
} else {
fmt.Println("Not found, Pls input again.")
}
}
}
func main() {
solveArgs()
fmt.Println("Welcome! This is Jay's English dictionary.\n")
fmt.Println("Please feel free to input any word and the program will help you to find the Chinese meaning of your input.")
running := true
reader := bufio.NewReader(os.Stdin)
for running {
fmt.Print("\n> ")
data, _, _ := reader.ReadLine()
command := string(data)
if command == exit {
fmt.Println("See you!")
running = false
} else {
search(command)
}
}
}