Scrapy 里在pipline控制中文显示正常
基于 python3.x的 scrapy 采集程序,采集中文的时候,会以 unicode(网上经常提到的"乱码"的一种),可以通过如下代码解决问题。
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
# useful for handling different item types with a single interface
import json
class MyspiderPipeline:
def open_spider(self, spider):
self.file = open('items.jl', 'w')
def close_spider(self, spider):
self.file.close()
def process_item(self, item, spider):
print(item)
line = json.dumps(item, ensure_ascii=False) + "\n"
self.file.write(line)
return item