Ruby写的一个从iciba.com获取单词音标的小工具。 将单词写到一个文件里,输出单词和音标到另一个文件。 require "net/http" require "uri" require 'rexml/document' def usage puts "Usage Guide:" puts "/truby #{File.basename(__FILE__)} [input-filename] [output-filename]" puts "" puts "input-file is a word list. Format is:" puts "--------------------------------------" puts "word1" puts "word2" puts "......" puts "--------------------------------------" puts "output-file is a word and pronunciation list. Format is:" puts "--------------------------------------" puts "word1<TAB>pronunciation1" puts "word2<TAB>pronunciation2" puts "......" puts "--------------------------------------" end class GetPronunciationFromSite @@word_array = [] def get_pronunciation_from_site(input_filename, site_name, output_filename) is_file_exit(input_filename) get_file_pronunciation_from_site(input_filename,site_name) write_result_to_file(output_filename) end def get_file_pronunciation_from_site(input_filename,site_name) File.open(input_filename, "r") do |file| file.each_line do |word| get_word_pronunciation_from_url(word.strip!,site_name) end end end def get_word_pronunciation_from_url(word,site_name) url_string = site_name+"/"+word xml_data = fetch(url_string).body pronunciation = get_pronunciation(xml_data) word << "/t[" << pronunciation << "]" @@word_array << word end def fetch(url_string, limit = 10) puts "try to fetch url `#{url_string}'" raise ArgumentError, 'HTTP redirect too deep' if limit == 0 response = Net::HTTP.get_response(URI.parse(url_string)) case response when Net::HTTPSuccess then response when Net::HTTPRedirection then fetch(response['location'], limit - 1) else response.error! end end def get_pronunciation(xml_data) start_index_string = "<span class=/"font_666/">[美]</span>" start_string = "[<span lang=/"EN-US/" class=/"phonetic fontb font14/">" end_string = "</span>]" start_position = xml_data.index(start_index_string) + start_index_string.length start_position = xml_data.index(start_string, start_position) + start_string.length end_position = xml_data.index(end_string, start_position) return xml_data.slice(start_position, end_position - start_position) end def write_result_to_file(output_filename) file = File.open(output_filename, "w") file.puts @@word_array file.close end def is_file_exit(filename) unless File.exist? filename then puts "Error: File `#{filename}' not found." exit end end end unless ARGV.length == 2 then usage exit end inst = GetPronunciationFromSite.new() inst.get_pronunciation_from_site(ARGV[0], "http://www.iciba.com", ARGV[1])