Node.js使用cheerio解析html
【摘要】 cheerio语法类似jQuery
doc
doc-zh: https://github.com/cheeriojs/cheerio/wiki/Chinese-README
安装
npm i cheerio
1
代码实例
const cheerio = require("cheerio");
const doc = cheerio.load('<h...
cheerio语法类似jQuery
doc
- doc-zh: https://github.com/cheeriojs/cheerio/wiki/Chinese-README
安装
npm i cheerio
- 1
代码实例
const cheerio = require("cheerio");
const doc = cheerio.load('<h2 class="title">Hello world</h2>', {
xmlMode: true,
decodeEntities: false
});
doc("h2.title").text("Hello there!");
doc("h2").addClass("welcome");
console.log(doc.xml());
// <h2 class="title welcome">Hello there!</h2>
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
项目实战
import cheerio from "cheerio";
/**
* 将外链图片转为本站连接
* @param {*} html
* @returns
*/
export async function replaceImage(html) { const doc = cheerio.load(html, { xmlMode: true, decodeEntities: false
}); let elems = []; // each不等待promise
doc("img").each(function(index, elem) { elems.push(doc(this));
}); for (let elem of elems) { let src = elem.attr("src"); if (src && src.indexOf(process.env.VUE_APP_BASE_URL) == -1) { // 修改为自己的替换方法 let imageSrc = await saveImage(src); if (imageSrc) { elem.attr("src", imageSrc); } }
} return doc.xml();
}
/**
* 提取图片连接
* @param {*} html
* @returns
*/
export function extractImages(html) { const doc = cheerio.load(html, { xmlMode: true, decodeEntities: false
}); let images = []; doc("img").each(function(index, elem) { let src = doc(this).attr("src"); if (src) { images.push(src); }
}); return images;
} /**
* 移除style属性
* @param {*} html
* @returns
*/
export function removeStyle(html) {
const doc = cheerio.load(html, { xmlMode: true, decodeEntities: false
}); doc("*[style]").removeAttr("style"); return doc.xml();
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
文章来源: pengshiyu.blog.csdn.net,作者:彭世瑜,版权归原作者所有,如需转载,请联系作者。
原文链接:pengshiyu.blog.csdn.net/article/details/106361156
【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)