cheerio request 实现网络爬虫 - 聂峰军个人博客

先上代码突然想了解如何爬虫上网查了下 nodejs 就有现成的包来解决这个事情

var request=require("request") ;
var cheerio = require('cheerio') ;

request('http://www.nodejs.net/category/quick-start',function(err,rqs,body)
    {
 
        if(!err)
        {
            console.log('http://www.nodejs.net/category/quick-start') ;


            $ = cheerio.load(body);//当前body 前端选择器

            $ = cheerio.load($('.explist').html());//当前body 前端选择器



             for(var i=0;i<$('li').length ;i  ) {

            console.log($('li h3 a')[i].attribs.href);
            console.log($('li h3 a')[i].attribs.title);

           }

        }
    }
)

爬虫结果

http://www.nodejs.net/category/quick-start
http://www.nodejs.net/a/20150116/231115.html
在ExpressJS中使用Redis缓存和查询数据及Session持久化
http://www.nodejs.net/a/20150116/225656.html
在ExpressJS中设置二级域名跨域共享Cookie
http://www.nodejs.net/a/20141104/232041.html
推荐一本开源图书《Node.js 包教不包会》
http://www.nodejs.net/a/20141016/233306.html
分享十五个NodeJS应用场景
http://www.nodejs.net/a/20141016/232858.html
基于NodeJS的14款Web框架

就这样把网站的文章列表爬出来了接下来写个循环爬内容就ok 了