items.py
python
# Define here the models for your scraped items
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/items.html
import scrapy
class Sss1Item(scrapy.Item):
# define the fields for your item here like:
name = scrapy.Field()
spider_title.py
python
import scrapy
from sss1.items import Sss1Item
class SpiderTitleSpider(scrapy.Spider):
name = "spider_title"
allowed_domains = ["www.zongheng.com"]
start_urls = ["https://read.zongheng.com/chapter/1215341/68208370.html"]
def parse(self, response):
item=Sss1Item()
f=open('我有一剑.txt','a',encoding='utf8')
titles=response.xpath('//*[@id="Jcontent"]/div/div[1]/div[2]/text()').extract()
for asd in titles:
f.write(asd+"\n")
names=[each.extract() for each in response.xpath('//*[@id="Jcontent"]/div/div[4]/p[3]/span[1]/text()')]
# for asd in names:
# f.write(asd+"\n")
item['name']=names
yield item
next=response.xpath('//*[@id="page_reader"]/div[3]/div[1]/div[3]/div[1]/a[3]/@href').get()
next=next.replace("?","")
print('----------------------------------------------------------')
print(next)
if next:
yield scrapy.Request(url=next,callback=self.parse)