使用Nodejs抓取<<文心雕龙>>
【摘要】
/**
* Created by Administrator on 2017/11/3.
* 获取文心雕龙 保存到数据库
*/
let superagent = require("superagent");
let cheerio = require("cheerio");
let async = require("async"...
/** * Created by Administrator on 2017/11/3. * 获取文心雕龙 保存到数据库 */ let superagent = require("superagent"); let cheerio = require("cheerio"); let async = require("async"); let mongoose = require('mongoose'); let Schema = require('mongoose').Schema; const setData = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', 'Referrer': 'www.baidu.com', 'Content-Type': 'text/plain; charset=UTF-8' }; /** *诗 表 */ const Poetry = mongoose.model('poetry', new Schema({ title : String, //诗的标题 author : String, //作者 href : String, //链接 content: String, //诗的内容 dynasty: String, //朝代 tag : Array //标签 })) mongoose.connect('mongodb://localhost/alone', function (err) { if (err) { console.log(err.message); } else { console.log("^_^数据库已连接...请开始你的表演^_^") } }); function getContent(href,title) { superagent.get(href) .set(setData) .end((err, res) => { let $ = cheerio.load(res.text); let obj={ href, title, author:'刘勰', tag:['文心雕龙'], dynasty:'南朝' } obj.content=$(".contson").html(); let new_poetry = new Poetry(obj) new_poetry.save(); // return content; }) } //程序主入口 let mainFun = (target_url, selector) => { superagent.get(target_url) .set(setData) .end((err, res) => { let $ = cheerio.load(res.text); let selectorArr = $(selector); let poetryArr = []; for (let i = 0; i < selectorArr.length; i++) { let item = $(selectorArr[i]); let href = item.attr('href'); let title = item.text(); if(href.indexOf('http://')!==0){ href='http://so.gushiwen.org'+href; } getContent(href,title); console.log("已保存到数据库^_^"); } }) }; mainFun('http://www.gushiwen.org/guwen/wenxin.aspx', '.bookcont a');
文章来源: fizzz.blog.csdn.net,作者:拿我格子衫来,版权归原作者所有,如需转载,请联系作者。
原文链接:fizzz.blog.csdn.net/article/details/78618237
【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)