This commit is contained in:
parent
41aa5ed3fe
commit
2b906606b9
|
@ -14,6 +14,7 @@ class ZhihuItem(scrapy.Item):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class ProductSpecItem(scrapy.Item):
|
class ProductSpecItem(scrapy.Item):
|
||||||
|
pid = scrapy.Field()
|
||||||
cpu = scrapy.Field()
|
cpu = scrapy.Field()
|
||||||
rom = scrapy.Field()
|
rom = scrapy.Field()
|
||||||
ram = scrapy.Field()
|
ram = scrapy.Field()
|
||||||
|
|
|
@ -130,7 +130,7 @@ class JDHbasePipeline(object):
|
||||||
"comment:good_rate": str(item["good_rate"])
|
"comment:good_rate": str(item["good_rate"])
|
||||||
})
|
})
|
||||||
elif isinstance(item, ProductSpecItem):
|
elif isinstance(item, ProductSpecItem):
|
||||||
self.product_table.put(item["id"],
|
self.product_table.put(item["pid"],
|
||||||
{
|
{
|
||||||
"spec:cpu": str(item["cpu"]),
|
"spec:cpu": str(item["cpu"]),
|
||||||
"spec:rom": str(item['rom']),
|
"spec:rom": str(item['rom']),
|
||||||
|
|
|
@ -51,6 +51,7 @@ class JdSpider(scrapy.Spider):
|
||||||
|
|
||||||
def parse_spec(self, response):
|
def parse_spec(self, response):
|
||||||
#spec
|
#spec
|
||||||
|
product_id = response.meta["product_id"]
|
||||||
cpu = response.xpath('//dt[text()="CPU型号"]/following-sibling::dd/text()').extract_first()
|
cpu = response.xpath('//dt[text()="CPU型号"]/following-sibling::dd/text()').extract_first()
|
||||||
rom = response.xpath('//dt[text()="ROM"]/following-sibling::dd[2]/text()').extract_first()
|
rom = response.xpath('//dt[text()="ROM"]/following-sibling::dd[2]/text()').extract_first()
|
||||||
ram = response.xpath('//dt[text()="RAM"]/following-sibling::dd[2]/text()').extract_first()
|
ram = response.xpath('//dt[text()="RAM"]/following-sibling::dd[2]/text()').extract_first()
|
||||||
|
@ -62,6 +63,7 @@ class JdSpider(scrapy.Spider):
|
||||||
print(cpu, rom, ram, resolution, charge, weight, brand)
|
print(cpu, rom, ram, resolution, charge, weight, brand)
|
||||||
|
|
||||||
yield ProductSpecItem(
|
yield ProductSpecItem(
|
||||||
|
pid = product_id,
|
||||||
cpu = cpu,
|
cpu = cpu,
|
||||||
rom = rom,
|
rom = rom,
|
||||||
ram = ram,
|
ram = ram,
|
||||||
|
@ -164,7 +166,9 @@ class JdSpider(scrapy.Spider):
|
||||||
# priority=100)
|
# priority=100)
|
||||||
|
|
||||||
if url:
|
if url:
|
||||||
yield SplashRequest(url='https:'+url, callback=self.parse_spec)
|
yield SplashRequest(url='https:'+url,
|
||||||
|
callback=self.parse_spec,
|
||||||
|
meta={'product_id': product_id})
|
||||||
|
|
||||||
# 获取下一页
|
# 获取下一页
|
||||||
next_page = response.xpath('//a[@class="pn-next"]/@href').extract_first()
|
next_page = response.xpath('//a[@class="pn-next"]/@href').extract_first()
|
||||||
|
|
Loading…
Reference in New Issue