import scala.collection.mutable.ArrayBuffer import scala.xml._ import scala.xml.parsing.XhtmlParser import scala.io.Source import scala.xml.transform.{RewriteRule, RuleTransformer} object XMLhandleTest extends App { // val xml = <fred/> // println(xml(0)) // println(xml(0)(0)) println("-------------------------------------") // val xml = <ul> // <li> // {Text("Opening bracket: [")} // </li> // <li> // {Text("Closing bracket: ]")} // </li> // <li> // {Text("Opening brace: {{")} // </li> // <li> // {Text("Closing brace: }}")} // </li> // </ul> // // println(xml.isInstanceOf[Elem]) println("-------------------------------------") def getTextFromElem(elem: Elem): String = { elem match { case Elem(_, _, _, _, Text(t)) => t } } def getAtomFromElem(elem: Elem): String = { elem match { case Elem(_, _, _, _, t:Atom[_]) => t.toString() } } // println(getTextFromElem(<li>Fred</li>)) // println(getTextFromElem(<li>{"Fred"}</li>)) //matcherror // println(getAtomFromElem(<li>{"Fred"}</li>)) println("-------------------------------------") def getImgWithoutAlt(doc: Document): NodeSeq = doc \\ "img" filter { _ \ "@alt" isEmpty } val xhtml = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html> <head> <title>Xhtml Valid Page</title> </head> <body> <p> <img src="image.jpg" /> </p> <p> <img src="imageAlt.jpg" alt="diaporama" /> </p> </body> </html>""" // val parser = new XhtmlParser(Source.fromString(xhtml)) // val doc = parser.initialize.document() // // val img = getImgWithoutAlt(doc) // println(img.getClass)//class scala.xml.NodeSeq$$anon$1 // println(img)//<img src="image.jpg"/> // println(img.size) //1 // for (e <- img) println(e)//<img src="image.jpg"/> // assert(img(0) == <img src="image.jpg" />) println("-------------------------------------") val xhtml2 = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html> <head> <title>Xhtml Valid Page</title> </head> <body> <p> <img name="image" src="image.jpg" /> </p> <p> <img name="imageAlt" src="imageAlt.jpg" alt="diaporama" /> </p> </body> </html>""" // val parser = new XhtmlParser(Source.fromString(xhtml2)) // val doc = parser.initialize.document() // // (doc \\ "img").foreach{ // n => { // println(n \ "@name") // println(n \ "@src") // } // } println("-------------------------------------") val xhtml3 = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html> <head> <title>Xhtml Valid Page</title> </head> <body> <p> <a href="http://www.siteA.fr">Le site des A</a> </p> <p> <a href="http://www.siteB.fr">Le site des B</a> </p> </body> </html>""" // val parser = new XhtmlParser(Source.fromString(xhtml3)) // val doc = parser.initialize.document() // var tr = <tr></tr> // // (doc \\ "a").foreach{ // n => { // tr = tr.copy(child = tr.child ++ <td>{n}</td>) // println(n \\ "@href") // } // } // // println(<table>{tr}</table>) println("-------------------------------------") def mapToDl(m:Map[String,String]): Elem = { <dl>{ for((k,v) <- m) yield <dt>{k}</dt><dd>{v}</dd>}</dl> } println(mapToDl(Map("A" -> "1", "B" -> "2"))) println("-------------------------------------") def dlToMap(dl: Elem): Map[String,String] = { val keys, values = ArrayBuffer[String]() dl.child.foreach{ n => n match { case <dt>{Text(t)}</dt> => keys.append(t) case <dd>{Text(t)}</dd> => values.append(t) } } keys.zip(values).toMap } println(dlToMap(<dl><dt>A</dt><dd>1</dd><dt>B</dt><dd>2</dd></dl>)) println("-------------------------------------") val xhtml4 = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html> <head> <title>Xhtml Valid Page</title> </head> <body> <p> <img src="http://www.imageA.fr" /> </p> <p> <img src="http://www.imageB.fr" alt="imageB" /> </p> </body> </html>""" // val rule = new RewriteRule { // override def transform(n: Node) = n match { // case img @ <img/> if img \ "@alt" isEmpty => // img.asInstanceOf[Elem] % Attribute(null, "alt", "TODO", Null) // case _ => n // } // } // // val parser = new XhtmlParser(Source.fromString(xhtml)) // val doc = parser.initialize.document() // // val transformed = new RuleTransformer(rule).transform(doc) // // println(transformed) println("----------------------------------------------") val xhtml5 = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html> <head> <title>Xhtml Valid Page</title> </head> <body> <p> <img src="http://www.imageA.fr" /> </p> <p><![CDATA[Cdata <section>]]></p> </body> </html>""" def imgTodo(doc: Document): Document = { val s = new RuleTransformer( new RewriteRule { override def transform(n: Node) = n match { case img @ <img/> if img \ "@alt" isEmpty => img.asInstanceOf[Elem] % Attribute(null, "alt", "TODO", Null) case _ => n } } ).transform(doc)(0) val transformed = new Document() transformed.dtd = doc.dtd transformed.docElem = s transformed } val parser = new XhtmlParser(Source.fromString(xhtml5)) val doc = imgTodo(parser.initialize.document()) println(doc) }
XMLhandleTest
最新推荐文章于 2021-02-02 21:54:42 发布