Python:
import requests
from bs4 import BeautifulSoup
url = 'http://top.baidu.com/buzz?b=1&fr=topbuzz_b1'
save_path = 'hot_python.txt'
if __name__ == '__main__':
content = requests.get(url).content
soup = BeautifulSoup(content, 'lxml')
items = soup.select('table.list-table > tr > .keyword > .list-title')
with open(save_path, 'w') as f:
f.writelines('')
for item in items:
with open(save_path, 'a') as f:
f.writelines(item.text + '\n')
Java:
需引用maven库:
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
爬虫代码:
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
public class BaiduHotSpider {
private static final String URL = "http://top.baidu.com/buzz?b=1&fr=topbuzz_b1";
private static final String SAVE_PATH = "hot_java.txt";
public static void main(String args[]) throws IOException {
Document document = Jsoup.connect(URL).get();
Elements elements = document.select("table.list-table > tbody > tr > .keyword > .list-title");
File file = new File(SAVE_PATH);
if(file.exists()){
file.delete();
}
file.createNewFile();
FileWriter fw = new FileWriter(file.getAbsoluteFile(),true);
BufferedWriter bw = new BufferedWriter(fw);
for(Element element : elements){
bw.write(element.text() + "\n");
}
bw.close();
}
}