消息:元素不可交互(selenium.common.exceptions.ElementNotInteractableException)
来源:4-6 项目作业
weixin_慕UI7443657
2022-07-28 20:55:05
问题描述:
在51job上进行扒数据,每次到54页时报错,具体报错如下:
相关截图:
相关代码:
import time from selenium import webdriver from lxml import etree import pymongo from selenium.webdriver.chrome.options import Options chrome_options = Options() # 设置浏览器的无头浏览器,无界面,浏览器将不提供界面,linux操作系统无界面情况下就可以运行了 chrome_options.add_argument("--headless") # 结果devtoolsactiveport文件不存在的报错 chrome_options.add_argument("--no-sandbox") # 官方推荐的关闭选项,规避一些BUG chrome_options.add_argument("--disable-gpu") chrome_options.add_argument('--disable-blink-features=AutomationControlled') client = pymongo.MongoClient(host='localhost',port=27017,username='linzpao-admin', password='linzpao123') # client = pymongo.MongoClient(host='localhost',port=27017) # client.admin.authenticate('linzpao-admin','linzpao123') mydb = client["linzpao"] mycollection = mydb['_51job'] def job_xpath(job_): print("job_xpath") company_s = job_.find_elements_by_xpath("//div[@class = 'er']/a") job_name_s = job_.find_elements_by_xpath("//div[@class='e']/a/p/span[@class='jname at']") job_href_s = job_.find_elements_by_xpath("//div[@class='e']/a") job_salary_s = job_.find_elements_by_xpath("//p/span[@class='sal']") job_city_in_edu_s = job_.find_elements_by_xpath("//p/span[@class='d at']") job_time_s = job_.find_elements_by_xpath("//p/span[@class='time']") for i,j,k,l,m,n in zip(company_s,job_name_s,job_href_s,job_salary_s,job_city_in_edu_s,job_time_s): job_city_in_edu = m.text.split('|') if len(job_city_in_edu)==3: job_info = { "公司名称":i.text,'发布时间':n.text,'工作岗位':j.text,'薪资':l.text,'工作地点':job_city_in_edu[0], '工作经验':job_city_in_edu[1],'学历要求':job_city_in_edu[2], '链接地址':k.get_attribute('href') } if len(job_city_in_edu)==2: job_info = { "公司名称": i.text, '发布时间': n.text, '工作岗位': j.text, '薪资': l.text, '工作地点': job_city_in_edu[0], '工作经验': job_city_in_edu[1], '学历要求': "无", '链接地址': k.get_attribute('href') } if len(job_city_in_edu)==1: job_info = { "公司名称": i.text, '发布时间': n.text, '工作岗位': j.text, '薪资': l.text, '工作地点': job_city_in_edu[0], '工作经验': "无", '学历要求': "无", '链接地址': k.get_attribute('href') } if len(job_city_in_edu)==0: job_info = { "公司名称": i.text, '发布时间': n.text, '工作岗位': j.text, '薪资': l.text, '工作地点': "无", '工作经验': "无", '学历要求': "无", '链接地址': k.get_attribute('href') } mycollection.insert_one(job_info) def main(pages): mycollection.delete_many({}) job = webdriver.Chrome(options=chrome_options) job.maximize_window() job.get('https://search.51job.com/list/000000,000000,0000,00,9,99,+,2,1.html') job.find_element_by_xpath('//div/input[@id="keywordInput"]').send_keys("建筑") job.find_element_by_xpath('//button[@id="search_btn"]').click() time.sleep(1) i = 1 while i<=pages: print('当前在第{}页获取数据'.format(i)) job_xpath(job) job.find_element_by_xpath('/html/body/div[2]/div[3]/div/div[2]/div[4]/div[2]/div/div/div/ul/li[8]').click() i = i+1 time.sleep(2) if __name__ == '__main__': main(200)
1回答
好帮手慕燕燕
2022-07-29
同学,你好!同学的程序获取前40页数据是没有问题的,大约访问到40页之后,51job 页面会弹出以下验证窗口,这是网站的反爬策略,因此程序无法获取到数据
祝:学习愉快!
相似问题