精选圈子榜单优站
Nemo
Nemo
管理员
路漫漫其修远兮,吾将上下而求索。

[Nodejs]第一个爬虫


var http = require('http');
var cheerio = require('cheerio');
var url = 'http://www.link-nemo.com/Cynthia/index.do';

function filterChapters(html){
        var $ = cheerio.load(html);
        var chapters = $('.article');

/**
        [{
                id:'',
                title:''
        }]
**/
        var articleData = [];
        chapters.each(function (item){
                var chapter = $(this);
                var chapterA = chapter.find('a');
                var articleId = chapterA.attr('href').split('&articleid=')[1];
                var articleTitle = chapterA.find('.mytitle').text();

                if(articleId!=null && articleId!='' && articleTitle!=null && articleTitle != ''){
                        articleData.push({
                                id:articleId,
                                title:articleTitle
                        });
                }

        });

        return articleData;
}


http.get(url,function (res){
        var html = '';
        res.on('data',function (data){
                html += data;
        });

        res.on('end',function (){
                var articles = filterChapters(html);
                console.log(articles);
        });

}).on('error',function (){
        console.log('Error.');
});


  • 若文章侵犯了您的权益,请联系站长处理:nemo@link-nemo.com

  • 2016-07-05
  • 3349阅读
评论