playwright爬虫基本用法
等待加载
page.wait_for_load_state('networkidle')
text = page.content()
点击
demo
python
with sync_playwright() as pw:
browser = pw.chromium.launch(headless=True)
context = browser.new_context()
page = context.new_page()
page.goto(url)
page.wait_for_load_state('networkidle')
text = page.content()
page.close()
context.close()
browser.close()
info = parse_info(text)
title = re.findall('''"Buy now:(.+?)",''', text)[0]
禁止加载图片
page.route("**/*", lambda route: route.abort()
if route.request.resource_type == "image"
else route.continue_())
登录状态
保存
python
context.storage_state(path='login_data.json')
使用
python
context = browser.new_context(storage_state='login_data.json')