1.存在特殊字符请参考:http://blog.csdn.net/zhutulang/article/details/37736407
2.父子节点同名可以通过count计数的方式避免,具体代码如下:
public static void main(String [] args){
try {
FileInputStream input = new FileInputStream(tmpFileStr+"/"+"farfetch.xml");
List<HashMap<String, String>> list = _readXml(input, "product");
String starttime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date());
System.out.println("---"+starttime+"--------Farfetch开始更新-----------");
public static List<HashMap<String, String>> _readXml(InputStream input, String nodeName){
try {
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser parser = spf.newSAXParser();
SaxHandler handler = new SaxHandler(nodeName);
parser.parse(input, handler);
input.close();
return handler.getList();
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
核心代码:
package tools;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class SaxHandler extends DefaultHandler {
private HashMap<String, String> map = null;
private List<HashMap<String, String>> list = null;
/**
* 正在解析的元素的标签
*/
private String currentTag = null;
/**
* 正在解析的元素的值
*/
private String currentValue = null;
private String nodeName = null;
//计算第几次进入结束标签,避免两个同名porduct出问题
int count =0;
//对于含有特殊字符的URL解析时进行拼接
StringBuilder sb = new StringBuilder();
public List<HashMap<String, String>> getList(){
return list;
}
public SaxHandler(String nodeName) {
this.nodeName = nodeName;
}
@Override
public void startDocument() throws SAXException {
// TODO 当读到一个开始标签的时候,会触发这个方法
list = new ArrayList<HashMap<String,String>>();
}
@Override
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
// TODO 当遇到文档的开头的时候,调用这个方法
sb.setLength(0);
if(name.equals(nodeName) && map == null){
map = new HashMap<String, String>();
}
/****对两个相同的属性名进行特殊处理 start*****/
if("discount".equals(name)){
map.put("discount_currency", attributes.getValue("currency"));
attributes = null;
}
if("price".equals(name)){
map.put("price_currency", attributes.getValue("currency"));
attributes = null;
}
/****对两个相同的属性名进行特殊处理 end*****/
if(attributes != null && map != null){
for(int i = 0; i < attributes.getLength();i++){
map.put(attributes.getQName(i), attributes.getValue(i));
}
}
currentTag = name;
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
// TODO 这个方法用来处理在XML文件中读到的内容
System.out.println("characters()");
if(currentTag != null && map != null){
currentValue = new String(ch, start, length);
if(currentValue != null && !currentValue.trim().equals("") && !currentValue.trim().endsWith("\n")){
sb.append(currentValue);
map.put(currentTag, sb.toString());
}
}
// currentTag=null;
// currentValue=null;
}
@Override
public void endElement(String uri, String localName, String name)
throws SAXException {
// TODO 在遇到结束标签的时候,调用这个方法
System.out.println("endElement()");
if(name.equals(nodeName)){
if (count > 0) {
list.add(map);
map = null;
count = 0;
}
count++;
}
currentTag = null;
currentValue=null;
super.endElement(uri, localName, name);
}
}
xml文件具体内容如下:
<?xml version="1.0" encoding="utf-8"?> <merchandiser xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="merchandiser.xsd"> <header> <merchantId>37938</merchantId> <merchantName>FARFETCH.COM Australia</merchantName> <createdOn>12/05/2015 07:22:29</createdOn> </header> <product product_id="10100630" name="WERKSTATT:MÜNCHEN leather belt" sku_number="10100630" manufacturer_name="WERKSTATT:MÜNCHEN"> <category> <primary>Accessories</primary> <secondary>Belts</secondary> </category> <URL> <product>http://click.linksynergy.com/link?id=DGme2yum/2Y&offerid=389625.10100630&type=15&murl=http%3A%2F%2Fwww.farfetch.com%2Fshopping%2Fmen%2Fitem10100630.aspx</product> <productImage>http://cdn-images.farfetch.com/10/10/06/30/10100630_579509_800.jpg</productImage> </URL> <description> <short>Black leather belt from Werkstatt: Munchen featuring a silver buckle and silver loop.</short> </description> <discount currency="AUD"> <type>amount</type> </discount> <price currency="AUD"> <retail>1265.42</retail> </price> <brand>WERKSTATT:MÜNCHEN</brand> <shipping> <availability>in-stock</availability> </shipping> <pixel>http://ad.linksynergy.com/fs-bin/show?id=DGme2yum/2Y&bids=389625.10100630&type=15&subid=0</pixel> <attributeClass class_id="60"> <Product_Type>Accessories</Product_Type> <Size>S</Size> <Color>Black</Color> <Gender>Male</Gender> </attributeClass> </product> <product product_id="10100676" name="WERKSTATT:MÜNCHEN leather belt" sku_number="10100676" manufacturer_name="WERKSTATT:MÜNCHEN"> <category> <primary>Accessories</primary> <secondary>Belts</secondary> </category> <URL> <product>http://click.linksynergy.com/link?id=DGme2yum/2Y&offerid=389625.10100676&type=15&murl=http%3A%2F%2Fwww.farfetch.com%2Fshopping%2Fmen%2Fitem10100676.aspx</product> <productImage>http://cdn-images.farfetch.com/10/10/06/76/10100676_579578_800.jpg</productImage> </URL> <description> <short>Black leather belt from Werkstatt: Munchen featuring a silver buckle and leather loop.</short> </description> <discount currency="AUD"> <type>amount</type> </discount> <price currency="AUD"> <retail>1539.03</retail> </price> <brand>WERKSTATT:MÜNCHEN</brand> <shipping> <availability>in-stock</availability> </shipping> <pixel>http://ad.linksynergy.com/fs-bin/show?id=DGme2yum/2Y&bids=389625.10100676&type=15&subid=0</pixel> <attributeClass class_id="60"> <Product_Type>Accessories</Product_Type> <Size>S-M-L-XL</Size> <Color>Black</Color> <Gender>Male</Gender> </attributeClass> </product> <product product_id="10212594" name="SCUNZANI IVO toad skin belt" sku_number="10212594" manufacturer_name="SCUNZANI IVO"> <category> <primary>Accessories</primary> <secondary>Belts</secondary> </category> <URL> <product>http://click.linksynergy.com/link?id=DGme2yum/2Y&offerid=389625.10212594&type=15&murl=http%3A%2F%2Fwww.farfetch.com%2Fshopping%2Fmen%2Fitem10212594.aspx</product> <productImage>http://cdn-images.farfetch.com/10/21/25/94/10212594_1130649_800.jpg</productImage> </URL> <description> <short>Green and black toad skin belt from Scunzani Ivo featuring a silver-tone buckle. Please note that this item cannot be shipped outside the E.U.</short> </description> <discount currency="AUD"> <type>amount</type> </discount> <price currency="AUD"> <retail>461.25</retail> </price> <brand>SCUNZANI IVO</brand> <shipping> <availability>in-stock</availability> </shipping> <pixel>http://ad.linksynergy.com/fs-bin/show?id=DGme2yum/2Y&bids=389625.10212594&type=15&subid=0</pixel> <attributeClass class_id="60"> <Product_Type>Accessories</Product_Type> <Size>M-L</Size> <Color>Green</Color> <Gender>Male</Gender> </attributeClass> </product> <product product_id="11249583" name="JIL SANDER VINTAGE straight leg trousers" sku_number="11249583" manufacturer_name="JIL SANDER VINTAGE"> <category> <primary>Vintage & Archive</primary> <secondary>Trousers</secondary> </category> <URL> <product>http://click.linksynergy.com/link?id=DGme2yum/2Y&offerid=389625.11249583&type=15&murl=http%3A%2F%2Fwww.farfetch.com%2Fshopping%2Fwomen%2Fitem11249583.aspx</product> <productImage>http://cdn-images.farfetch.com/11/24/95/83/11249583_6003611_800.jpg</productImage> </URL> <description> <short>Sky grey linen-wool blend straight leg trousers from Jil Sander Vintage featuring a button and zip fly, side pockets and a back pocket. Circa 1990. Please note that vintage items are not new and therefore might have minor imperfections.</short> </description> <discount currency="AUD"> <type>amount</type> </discount> <price currency="AUD"> <retail>189.77</retail> </price> <brand>JIL SANDER VINTAGE</brand> <shipping> <availability>in-stock</availability> </shipping> <pixel>http://ad.linksynergy.com/fs-bin/show?id=DGme2yum/2Y&bids=389625.11249583&type=15&subid=0</pixel> <attributeClass class_id="60"> <Product_Type>Vintage & Archive</Product_Type> <Size>36</Size> <Color>Grey</Color> <Gender>Female</Gender> </attributeClass> </product> <trailer> <numberOfProducts>118204</numberOfProducts> </trailer> </merchandiser>