哔哩哔哩视频弹幕词云分析
【摘要】
import requests
import parsel
import csv
import time
for page in range(20,32):
time.sleep(1)
...
import requests
import parsel
import csv
import time
for page in range(20,32):
time.sleep(1)
print('正在爬取'.format(page))
url = 'https://api.bilibili.com/x/v2/dm/history?type=1&oid=140610898&date=2020-11-{}'.format(page)
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
"cookie": "__uuid=1896D3F7-4A98-54EB-F7FA-3301CE9EF5F307776infoc; buvid3=B68B2187-4C3E-4466-A896-FBF9B292099B190963infoc; LIVE_BUVID=AUTO4115757254257055; stardustvideo=1; rpdid=|(umu|ulY)JJ0J'ul~l~klRJ); sid=8cq4r229; im_notify_type_65901796=0; laboratory=1-1; DedeUserID=523606542; DedeUserID__ckMd5=909861ec223d26d8; blackside_state=1; CURRENT_FNVAL=80; SESSDATA=a976c0b4%2C1618637313%2C4d792*a1; bili_jct=7f54729ec20660f750661122b80746d2; PVID=1; bp_video_offset_523606542=458111639975213216; CURRENT_QUALITY=16; bfe_id=1e33d9ad1cb29251013800c68af42315"
}
response = requests.get(url=url, headers=headers)
response.encoding = response.apparent_encoding
selector = parsel.Selector(response.text)
data = selector.css('d::text').getall()
print(data)
for i in data:
print(i)
with open('B站弹幕.csv', mode='a', newline='', encoding='utf-8-sig') as f:
writer = csv.writer(f)
links = []
links.append(i)
writer.writerow(links)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
import jieba
import wordcloud
import imageio
from PIL import Image
import numpy as np
# 读取文件内容
f = open('D:\python\爬虫\B站弹幕.csv', encoding='utf-8')#你文件存储的位置
txt = f.read()
# print(txt)
# jiabe 分词 分割词汇
txt_list = jieba.lcut(txt)
string = ' '.join(txt_list)
# 词云图设置
imgobj = Image.open('1.jpg')#你找的图片模型文件
img_mask=np.array(imgobj)
#print(img_mask)
wc = wordcloud.WordCloud(
mask=img_mask,
max_words=400,
width=800, # 图片的宽
height=500, # 图片的高
background_color='snow', # 图片背景颜色
font_path='msyh.ttc', # 词云字体
random_state=120,
max_font_size=60,
scale=15,
)
# 给词云输入文字
wc.generate(string)
# 词云图保存图片地址
wc.to_file('1.png')
print("图片保存成功")
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
文章来源: blog.csdn.net,作者:肥学,版权归原作者所有,如需转载,请联系作者。
原文链接:blog.csdn.net/jiahuiandxuehui/article/details/110324992
【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)