爬虫管道
【摘要】
from datetime import datetime
from scrapy.exporters import JsonItemExporter, CsvItemExporter
import py...
from datetime import datetime
from scrapy.exporters import JsonItemExporter, CsvItemExporter
import pymongo
import redis
from .settings import REDIS_HOST, REDIS_PORT, MONGO_HOST, MONGO_PORT
- 1
- 2
- 3
- 4
- 5
数据源的管道
class AqiDataPipeline(object):
def process_item(self, item, spider):
# 记录爬取时间
item[‘crawl_time‘] = datetime.utcnow()
# 记录爬虫
item[‘spider‘] = spider.name
return item
- 1
- 2
- 3
- 4
- 5
- 6
- 7
Json的管道
class AqiJsonPipeline(object):
def open_spider(self, spider):
self.file = open("aqi.json", ‘wb‘)
self.write = JsonItemExporter(self.file)
self.write.start_exporting()
def process_item(self, item, spider):
self.write.export_item(item)
return item
def close_spider(self, spider):
self.write.finish_exporting()
self.file.close()
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
Csv的管道
class AqiVscPipeline(object):
def open_spider(self, spider):
self.file = open("aqi.csv", ‘wb‘)
self.write = CsvItemExporter(self.file)
self.write.start_exporting()
def process_item(self, item, spider):
self.write.export_item(item)
return item
def close_spider(self, spider):
self.write.finish_exporting()
self.file.close()
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
mongodb数据库管道
class AqiMongoPipeline(object):
def open_spider(self, spider):
self.client = pymongo.MongoClient(host=MONGO_HOST, port=MONGO_PORT)
self.db = self.client[‘Aqi‘]
self.collection = self.db[‘aqi‘]
def process_item(self, item, spider):
self.collection.insert(dict(item))
return item
def close_spider(self, spider):
self.client.close()
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
redis数据库管道
class AqiRedisPipeline(object):
def open_spider(self, spider):
self.client = redis.Redis(host=REDIS_HOST, port=REDIS_PORT)
def process_item(self, item, spider):
self.client.lpush(‘aqi‘, dict(item))
return item
- 1
- 2
- 3
- 4
- 5
- 6
- 7
文章来源: blog.csdn.net,作者:考古学家lx,版权归原作者所有,如需转载,请联系作者。
原文链接:blog.csdn.net/weixin_43582101/article/details/89679364
【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)