import scrapy
from scrapy.loader import ItemLoader
from test3.items import Test3Item
class TestboxSpider(scrapy.Spider):
name = 'testbox'
start_urls =['https://www.tjk.org/TR/YarisSever/Query/ConnectedPage/AtKosuBilgileri?1=1&QueryParameter_AtId=61880']
仅模块:
def parse(self,response):
load = ItemLoader(item=Test3Item(),response=response)
load.add_xpath(
'ikramiye',"normalize-space(//div[@class='grid_6 alpha omega kunye']/span[2]/div/text())")
load.add_xpath('At_Sahibi_Primi',"normalize-space(//div[@class='grid_6 alpha omega kunye']/span[4]/div/text())")
table_data = response.xpath(
"//div[@class='grid_10 alpha omega kunye']/table/tbody/tr[position()<5]")
for td in table_data:
load = ItemLoader(item=Test3Item(),response=response)
title = td.xpath(".//td[1]/text()").extract()
load._add_value('title',title)
K = td.xpath(".//td[2]/text()").extract()
load.add_value('K',K)
incilik_1 = td.xpath(".//td[3]/text()").extract()
load.add_value('incilik_1',incilik_1)
table_paths = response.xpath("//*[@id='tbody0']/tr[position()<6]")
我希望这些项目作为列,而循环数据作为for循环中每个项目下方的行。
for path in table_paths:
load = ItemLoader(
item=Test3Item(),selector=path,response=response)
load.add_xpath('Tarih',".//td[1]/a/text()")
load.add_xpath('Sehir',".//td[2]/a/text()")
load.add_xpath('Msf',".//td[3]/text()")
load.add_xpath('Pist',".//td[4]/text()")
load.add_xpath('s',".//td[5]/text()")
yield load.load_item()
如果我在这里给出,则不会提供所有数据的行。我希望所有项目都为一列,而循环项目为下面的行。
我的预期输出是:
title,K,incilik_1,Tarih,Sehir TOPLAM,51,2/04/2020,Ankara '','',07/08/2019,05/04/2019,Bnkara