当我试图用scrapy刮刮Google App Store时,我无法将数据保存到MongoDB。我收到一个错误:bson.errors.InvalidDocument: cannot encode object
。我在Google上搜索了此错误,结果表明数据类型存在问题,但是我已经将其转换为字典数据类型。
我的Pipeline.py文件是:
from pymongo import MongoClient
from scrapy import Item
class GpPipeline(object):
def open_spider(self,spider):
db_uri = spider.settings.get('MONGODB_URI','mongodb://localhost:27017')
db_name = spider.settings.get('MONGODB_DBNAME','scrapy_db')
db_post = spider.settings.get('MONGODB_DOCNAME','apkinfo')
self.db_client = MongoClient(db_uri)
self.db = self.db_client[db_name]
self.post = self.db[db_post]
def close_spider(self,spider):
self.db_client.close()
def process_item(self,item,spider):
postItem = dict(item)
self.post.insert(postItem)
return item
def insert_db(self,item):
if isinstance(item,Item):
item = dict(item)
self.db.books.insert(item)
来自终端的错误信息基本上如下:
2019-11-17 00:50:18 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://play.google.com/store/apps/details?id=com.pinterest> (referer: None)
2019-11-17 00:50:18 [scrapy.core.scraper] ERROR: Error processing {'app_icon': [<Selector xpath='//img[@itemprop="image"]/@src' data='https://lh3.googleusercontent.com/dVsv8H'>,<Selector xpath='//img[@itemprop="image"]/@src' data='https://lh3.googleusercontent.com/HotsP0'>,<Selector xpath='//img[@itemprop="image"]/@src' data='https://lh3.googleusercontent.com/jLIpel'>,<Selector xpath='//img[@itemprop="image"]/@src' data='https://lh3.googleusercontent.com/4CInZm'>],'app_name': [],'app_url': 'https://play.google.com/store/apps/details?id=com.pinterest'}
Traceback (most recent call last):
File "/anaconda3/lib/python3.7/site-packages/twisted/internet/defer.py",line 649,in _runCallbacks
current.result = callback(current.result,*args,**kw)
File "/Users/zhanjinyang/Desktop/gp/gp/pipelines.py",line 27,in process_item
self.post.insert(postItem)
File "/anaconda3/lib/python3.7/site-packages/pymongo/collection.py",line 3195,in insert
check_keys,manipulate,write_concern)
File "/anaconda3/lib/python3.7/site-packages/pymongo/collection.py",line 614,in _insert
bypass_doc_val,session)
File "/anaconda3/lib/python3.7/site-packages/pymongo/collection.py",line 602,in _insert_one
acknowledged,_insert_command,session)
File "/anaconda3/lib/python3.7/site-packages/pymongo/mongo_client.py",line 1280,in _retryable_write
return self._retry_with_session(retryable,func,s,None)
File "/anaconda3/lib/python3.7/site-packages/pymongo/mongo_client.py",line 1233,in _retry_with_session
return func(session,sock_info,retryable)
File "/anaconda3/lib/python3.7/site-packages/pymongo/collection.py",line 597,in _insert_command
retryable_write=retryable_write)
File "/anaconda3/lib/python3.7/site-packages/pymongo/pool.py",line 589,in command
self._raise_connection_failure(error)
File "/anaconda3/lib/python3.7/site-packages/pymongo/pool.py",line 750,in _raise_connection_failure
raise error
File "/anaconda3/lib/python3.7/site-packages/pymongo/pool.py",line 584,in command
user_fields=user_fields)
File "/anaconda3/lib/python3.7/site-packages/pymongo/network.py",line 121,in command
codec_options,ctx=compression_ctx)
File "/anaconda3/lib/python3.7/site-packages/pymongo/message.py",line 678,in _op_msg
flags,command,identifier,docs,check_keys,opts)
bson.errors.InvalidDocument: cannot encode object: <Selector xpath='//img[@itemprop="image"]/@src' data='https://lh3.googleusercontent.com/dVsv8H'>,of type: <class 'scrapy.selector.unified.Selector'>