代码:
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Html操作工具
* Created by Nemo on 2017/9/20.
*/
public class HtmlUtil {
/**
* 正常获取一段html中的图片列表
* @param s
* @return
*/
public static List<String> getImgs(String s) {
String regex;
List<String> list = new ArrayList<String>();
regex = "src=\"(.*?)\"";
Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
Matcher ma = pa.matcher(s);
while (ma.find()) {
list.add(ma.group());
}
return list;
}
/**
* 返回存有图片地址的数组
* @param tar
* @return
*/
public static String[] getImgaddress(String tar){
List<String> imgList = getImgs(tar);
String res[] = new String[imgList.size()];
if(imgList.size()>0){
for (int i = 0; i < imgList.size(); i++) {
int begin = imgList.get(i).indexOf("\"")+1;
int end = imgList.get(i).lastIndexOf("\"");
String url[] = imgList.get(i).substring(begin,end).split("/");
res[i]=url[url.length-1];
}
}else{
}
return res;
}
/**
* 获取IT之家html中的图片列表
* @param s
* @return
*/
public static List<String> getItHomeImgs(String s){
String regex;
List<String> list = new ArrayList<String>();
regex = "data-original=\"(.*?)\"";
Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
Matcher ma = pa.matcher(s);
while (ma.find()) {
list.add(ma.group());
}
return list;
}
public static void main(String args[]){
String html = " <div class=\"post-sum mgeeeeker\">\n" +
"\t\t <p><img src=\"/Kira/new/res/upload/2017-09/i2dOrBdsEEzxL10wWSC.png\" alt=\"d1a20cf431adcbef48896135a7af2edda3cc9f47\" style=\"max-width: 100%;\" class=\"\"></p><p><font face=\"微软雅黑\"><span style=\"color: inherit; font-size: 30px;\">剧情简介</span><br></font></p><p><font face=\"微软雅黑\" size=\"4\">在英雄内战机场大战落幕后,彼得·帕克<i>(<a target=\"_blank\" href=\"https://baike.baidu.com/item/%E6%B1%A4%E5%A7%86%C2%B7%E8%B5%AB%E5%85%B0%E5%BE%B7\">汤姆·赫兰德</a>饰)</i>在恩师托尼·斯塔克<i>(<a target=\"_blank\" href=\"https://baike.baidu.com/item/%E5%B0%8F%E7%BD%97%E4%BC%AF%E7%89%B9%C2%B7%E5%94%90%E5%B0%BC\">小罗伯特·唐尼</a>饰)</i>的协助下,试图在一名普通高中生和打击犯罪的超级英雄蜘蛛侠间保持平衡,但反派秃鹫<i>(<a target=\"_blank\" href=\"https://baike.baidu.com/item/%E8%BF%88%E5%85%8B%E5%B0%94%C2%B7%E5%9F%BA%E9%A1%BF\">迈克尔·基顿</a>饰)</i>的崛起让人们面临新的威胁。钢铁侠作为蜘蛛侠的导师再次出现,做为引导他踏上英雄之路的关键人物,确保他在未来有资格成为复仇者联盟的一分子。</font></p><p><font face=\"微软雅黑\" size=\"4\"><br></font></p><p><img src=\"http://owj9uubs1.bkt.clouddn.com/203fb80e7bec54e719d96845b3389b504fc26ab7.jpg\" style=\"line-height: 1; max-width: 100%;\"><br></p><p><img src=\"http://owj9uubs1.bkt.clouddn.com/8694a4c27d1ed21b8d1a5dbea76eddc450da3f9d.jpg\" style=\"max-width: 100%;\"><br></p><p><img style=\"max-width:100%;\" src=\"http://owj9uubs1.bkt.clouddn.com/8718367adab44aed9103cdbfb91c8701a18bfb18.jpg\"></p><p><img src=\"http://owj9uubs1.bkt.clouddn.com/e4dde71190ef76c69dea89709716fdfaaf5167b4.jpg\" style=\"line-height: 1; max-width: 100%;\" class=\"\"><br></p><p><b><font color=\"#ff0000\" size=\"5\">注意:此版属于精修枪版,画质一般</font></b></p><p><b style=\"color: inherit; font-family: inherit; font-size: large;\"><font face=\"微软雅黑\">百度云:<a href=\"http://pan.baidu.com/s/1i4NUN7v\" target=\"_blank\">http://pan.baidu.com/s/1i4NUN7v</a></font></b></p></img></img></img></img></img></div>";
List<String> imgs = getImgs(html);
if(imgs!=null){
imgs.forEach(img->{
System.out.println(img);
});
}
String [] imgArr = getImgaddress(html);
if(imgArr!=null){
for(int i=0;i<imgArr.length;i++){
System.err.println(imgArr[i]);
}
}
}
}
输出结果:
src="/Kira/new/res/upload/2017-09/i2dOrBdsEEzxL10wWSC.png"
i2dOrBdsEEzxL10wWSC.png
203fb80e7bec54e719d96845b3389b504fc26ab7.jpg
src="http://owj9uubs1.bkt.clouddn.com/203fb80e7bec54e719d96845b3389b504fc26ab7.jpg"
8694a4c27d1ed21b8d1a5dbea76eddc450da3f9d.jpg
8718367adab44aed9103cdbfb91c8701a18bfb18.jpg
src="http://owj9uubs1.bkt.clouddn.com/8694a4c27d1ed21b8d1a5dbea76eddc450da3f9d.jpg"
e4dde71190ef76c69dea89709716fdfaaf5167b4.jpg
src="http://owj9uubs1.bkt.clouddn.com/8718367adab44aed9103cdbfb91c8701a18bfb18.jpg"
src="http://owj9uubs1.bkt.clouddn.com/e4dde71190ef76c69dea89709716fdfaaf5167b4.jpg"