2

我错过了什么吗?有一个更好的方法吗?

输入:

<span style="FONT-FAMILY: 'Lucida Sans','sans-serif'; COLOR: #003572; FONT-SIZE: 9pt; 
mso-fareast-font-family: Calibri; mso-ansi-language: EN-US; mso-fareast-language: EN-US; 
mso-bidi-language: AR-SA; mso-fareast-theme-font: minor-latin">Dr. Who is 
<u>usually</u> available for consultations Mon - Thurs afternoons and Friday 9a-
12p at 555-1212. </span>

期望的输出:

<span style="COLOR: #003572; FONT-SIZE: 9pt;">博士。谁<u>通常</u>可以在周一至周四下午和周五 9a-12p 在 555-1212 进行咨询。</span>

到目前为止我的代码:

//在写入数据库之前清除周长注释内的 HTML

  Whitelist wl = new Whitelist();         
  wl = Whitelist.simpleText();
  wl.addTags("br");
  wl.addTags("p");
  wl.addTags("span");
  wl.addAttributes(":all","style");
  Document doc = 
              Jsoup.parse(
               "<html><head></head><body>"+ds.getWeeklongNote()+"</body></html>");
  Elements e = doc.select("*");
  for (Element el : e){
      for (Attribute attr : el.attributes()){
          if (attr.getKey().equals("span")){
              String newValue = "";
              String s = attr.getValue();
              String[] values = s.split(";");
              for (String value : values){
                  if (value.startsWith("COLOR")||value.startsWith("FONT-SIZE")){
                      newValue += attr.getKey()+"="+attr.getValue()+";";
                  }
              }
              attr.setValue(newValue);
          }
      }
  }

  doc.html(e.outerHtml());
  ds.setWeekLongNote(Jsoup.clean(doc.body().outerHtml(), wl));
4

3 回答 3

2

试试这个:

Document doc = Jsoup.parse(html);
  Elements e = doc.getElementsByTag("body");            
  Log.i("Span element: "+e.get(0).nodeName(), ""+e.get(0).nodeName());
  e = e.get(0).getElementsByTag("span");
  Attributes styleAtt = e.get(0).attributes();
  Attribute a = styleAtt.asList().get(0);           
  if(a.getKey().equals("style")){
     String[] items = a.getValue().trim().split(";");
     String newValue = "";
     for(String item: items){

         if(item.contains("COLOR:")||item.contains("FONT-SIZE:")){
             Log.i("Style Item: ", ""+item);
             newValue = newValue.concat(item).concat(";");
         }
     }
     a.setValue(newValue);
     Log.i("New Atrrbute: ",""+newValue);                    
  }

  Log.i("FINAL HTML: ",""+e.outerHtml()); 

  doc.html(e.outerHtml());
    }

输出:

08-17 18:28:07.692: I/FINAL HTML:(8148): <span style=" COLOR: #003572; FONT-SIZE: 9pt;">Dr. Who is <u>usually</u> available for consultations Mon - Thurs afternoons and Friday 9a- 12p at 555-1212. </span>

干杯,

于 2013-08-17T16:30:14.820 回答
0

如果您有多个 span 元素,则可以使用以下代码片段:

    Document document = Jsoup.parse(html);

    Vector<String> allowedItems = new Vector<String>();
    allowedItems.add("color");
    allowedItems.add("font-size");

    Elements e = document.getElementsByTag("span");
    for (Element element : e) {
        String[] styles = element.attr("style").split(";");
        Vector<String> filteredItems = new Vector<String>();
        for (String item : styles) {
            String key = (item.split(":"))[0].trim().toLowerCase();
            if ( allowedItems.contains(key) ){
                filteredItems.add(item);
            }
        }
        if( filteredItems.size() == 0 ){
            element.removeAttr("style");        
        }else{
            element.attr("style",StringUtils.join(filteredItems, ";"));
        }
    }
于 2015-09-24T09:11:28.753 回答
-1
//remove style attribute
    Elements elms = doc.select("*").not("img");
    for (Element e : elms) {
        String attr = e.attr("style");
        if(!"".equals(attr) || null!=attr){
            e.attr("style", "");
        }
    }
于 2016-01-17T09:32:19.783 回答