shua.js 笔记
【摘要】
let superagent = require('superagent')let cheerio = require('cheerio') let baseUrl = 'https://blog.csdn.net/github_35631540/article/list/1'// https://blog.csdn.net/githu...
-
let superagent = require('superagent')
-
let cheerio = require('cheerio')
-
-
let baseUrl = 'https://blog.csdn.net/github_35631540/article/list/1'
-
// https://blog.csdn.net/github_35631540/article/list/1?t=1&
-
// https://blog.csdn.net/github_35631540/article/list/2?t=1&
-
// https://blog.csdn.net/github_35631540/article/list/3?t=1&
-
let blogHrefArr = []
-
let totalPage = 4
-
-
const setData = {
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
-
'Referrer': 'https://blog.csdn.net/github_35631540?t=1',
-
'Content-Type': 'text/html; charset=UTF-8',
-
}
-
-
let mainFun = () => {
-
superagent
-
.get(`${baseUrl}`)
-
.set(setData)
-
.end((err, res) => {
-
let $ = cheerio.load(res.text)
-
let len = $('.article-list .content a').length
-
if (len > 0) {
-
console.log(`获取到${len}条博客记录`)
-
console.log(`开始爬取博客记录....`)
-
for (let i = 0; i < $('.article-list .content a').length; i++) {
-
let blogItem = {
-
name: $('.article-list h4').eq(i).text().replace(/\s+/g, ''),
-
href: $('.article-list .content a').eq(i).attr('href'),
-
}
-
getBlogDetail(blogItem)
-
blogHrefArr.push(blogItem)
-
}
-
// console.log(blogHrefArr)
-
}
-
})
-
}
-
-
let getBlogDetail = (blogItem) => {
-
superagent
-
.get(`${blogItem.href}`)
-
.set(setData)
-
.end((err, res) => {
-
if(res.statusCode === 200) {
-
console.log(`爬取成功:__${blogItem.name}`)
-
}
-
})
-
}
-
-
// 使用递归获取所有页的博客链接
-
let getAllBlogHreef = (n) => {
-
superagent
-
.get(`https://blog.csdn.net/github_35631540/article/list/${n+1}?t=1&`)
-
.set(setData)
-
.end((err,res) => {
-
let $ = cheerio.load(res.text)
-
let len = $('.article-list .content a').length
-
if (len > 0) {
-
console.log(`获取到${len}条博客记录`)
-
console.log(`开始获取博客地址....`)
-
for (let i = 0; i < $('.article-list .content a').length; i++) {
-
let blogItem = {
-
name: $('.article-list h4').eq(i).text().replace(/\s+/g, ''),
-
href: $('.article-list .content a').eq(i).attr('href'),
-
}
-
getBlogDetail(blogItem)
-
// blogHrefArr.push(blogItem)
-
}
-
n++
-
if(n<totalPage){
-
getAllBlogHreef(n)
-
}else{
-
return blogHrefArr
-
}
-
}
-
})
-
}
-
-
console.log(getAllBlogHreef(0))
cd E:\Main_Pro\FUN\upblog & node shua.js
文章来源: fizzz.blog.csdn.net,作者:拿我格子衫来,版权归原作者所有,如需转载,请联系作者。
原文链接:fizzz.blog.csdn.net/article/details/90032834
【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)