我正在尝试将抓取的项目保存在单独的json文件中,但没有看到任何输出文件。管道和项目在scrapy项目文件夹中的piplines.py和items.py文件中定义。我必须显式调用process_item()还是在scrape()返回项时自动调用它?我在CrawlerProcess(settings = {'ITEM_PIpelInes'})中启用了管道。谢谢。
管道
data1 = rnorm(10000,mean=8,sd=1.3)
data2 = rnorm(10000,mean=4,sd=1.0)
#plot data1 histogram with 40 bins
hist(data1,breaks=40,col="red",xlim=c(2,14),ylim=c(0,800),main="Gaussian deviates : mean=8,sigma=1.3",col.main="blue")
segments(quantile(data1,0.25),quantile(data1,600,col="green",lwd=4,lty=1)
#plot data2 histogram with 40 bins
hist(data2,900),main="Gaussian deviates : mean=4,sigma=1.0",col.main="blue")
print(quantile(data1,0.25))
import json,datetime
class JsonWriterPipeline(object):
def process_item(self,item,spider):
# return item
fileName = datetime.datetime.now().strftime("%Y%m%d%H%M%S") + '.json'
try:
with open(fileName,'w') as fp:
json.dump(dict(item),fp)
return item
except:
return item
class ProjectItem(scrapy.Item):
title = scrapy.Field()
url = scrapy.Field()