消息:元素不可交互(selenium.common.exceptions.ElementNotInteractableException)
来源:4-6 项目作业
weixin_慕UI7443657
2022-07-28 20:55:05
问题描述:
在51job上进行扒数据,每次到54页时报错,具体报错如下:
相关截图:
相关代码:
import time
from selenium import webdriver
from lxml import etree
import pymongo
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
# 设置浏览器的无头浏览器,无界面,浏览器将不提供界面,linux操作系统无界面情况下就可以运行了
chrome_options.add_argument("--headless")
# 结果devtoolsactiveport文件不存在的报错
chrome_options.add_argument("--no-sandbox")
# 官方推荐的关闭选项,规避一些BUG
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
client = pymongo.MongoClient(host='localhost',port=27017,username='linzpao-admin', password='linzpao123')
# client = pymongo.MongoClient(host='localhost',port=27017)
# client.admin.authenticate('linzpao-admin','linzpao123')
mydb = client["linzpao"]
mycollection = mydb['_51job']
def job_xpath(job_):
print("job_xpath")
company_s = job_.find_elements_by_xpath("//div[@class = 'er']/a")
job_name_s = job_.find_elements_by_xpath("//div[@class='e']/a/p/span[@class='jname at']")
job_href_s = job_.find_elements_by_xpath("//div[@class='e']/a")
job_salary_s = job_.find_elements_by_xpath("//p/span[@class='sal']")
job_city_in_edu_s = job_.find_elements_by_xpath("//p/span[@class='d at']")
job_time_s = job_.find_elements_by_xpath("//p/span[@class='time']")
for i,j,k,l,m,n in zip(company_s,job_name_s,job_href_s,job_salary_s,job_city_in_edu_s,job_time_s):
job_city_in_edu = m.text.split('|')
if len(job_city_in_edu)==3:
job_info = {
"公司名称":i.text,'发布时间':n.text,'工作岗位':j.text,'薪资':l.text,'工作地点':job_city_in_edu[0],
'工作经验':job_city_in_edu[1],'学历要求':job_city_in_edu[2], '链接地址':k.get_attribute('href')
}
if len(job_city_in_edu)==2:
job_info = {
"公司名称": i.text, '发布时间': n.text, '工作岗位': j.text, '薪资': l.text, '工作地点': job_city_in_edu[0],
'工作经验': job_city_in_edu[1], '学历要求': "无", '链接地址': k.get_attribute('href')
}
if len(job_city_in_edu)==1:
job_info = {
"公司名称": i.text, '发布时间': n.text, '工作岗位': j.text, '薪资': l.text, '工作地点': job_city_in_edu[0],
'工作经验': "无", '学历要求': "无", '链接地址': k.get_attribute('href')
}
if len(job_city_in_edu)==0:
job_info = {
"公司名称": i.text, '发布时间': n.text, '工作岗位': j.text, '薪资': l.text, '工作地点': "无",
'工作经验': "无", '学历要求': "无", '链接地址': k.get_attribute('href')
}
mycollection.insert_one(job_info)
def main(pages):
mycollection.delete_many({})
job = webdriver.Chrome(options=chrome_options)
job.maximize_window()
job.get('https://search.51job.com/list/000000,000000,0000,00,9,99,+,2,1.html')
job.find_element_by_xpath('//div/input[@id="keywordInput"]').send_keys("建筑")
job.find_element_by_xpath('//button[@id="search_btn"]').click()
time.sleep(1)
i = 1
while i<=pages:
print('当前在第{}页获取数据'.format(i))
job_xpath(job)
job.find_element_by_xpath('/html/body/div[2]/div[3]/div/div[2]/div[4]/div[2]/div/div/div/ul/li[8]').click()
i = i+1
time.sleep(2)
if __name__ == '__main__':
main(200)1回答
好帮手慕燕燕
2022-07-29
同学,你好!同学的程序获取前40页数据是没有问题的,大约访问到40页之后,51job 页面会弹出以下验证窗口,这是网站的反爬策略,因此程序无法获取到数据

祝:学习愉快!
相似问题