python爬虫selenium模块详解_Python

selenium模块

selenium基本概念

selenium优势

便捷的获取网站中动态加载的数据
便捷实现模拟登陆

selenium使用流程：

1.环境安装：pip install selenium

2.下载一个浏览器的驱动程序（谷歌浏览器）

3.实例化一个浏览器对象

基本使用

代码

				?

									from selenium import webdriver

									from lxml import etree

									from time import sleep

									if __name__ == '__main__':

									 bro = webdriver.Chrome(r"E:\google\Chrome\Application\chromedriver.exe")

									 bro.get(url='http://scxk.nmpa.gov.cn:81/xk/')

									 page_text = bro.page_source

									 tree = etree.HTML(page_text)

									 li_list = tree.xpath('//*[@id="gzlist"]/li')

									 for li in li_list:

									  name = li.xpath('./dl/@title')[0]

									  print(name)

									 sleep(5)

									 bro.quit()

基于浏览器自动化的操作

代码

				?

									#编写基于浏览器自动化的操作代码

									- 发起请求: get(url)

									- 标签定位: find系列的方法

									- 标签交互: send_ keys( 'xxx' )

									- 执行js程序: excute_script('jsCod')

									- 前进，后退: back(),forward( )

									- 关闭浏览器: quit()

代码

https://www.taobao.com/

				?

									from selenium import webdriver

									from time import sleep

									bro = webdriver.Chrome(executable_path=r"E:\google\Chrome\Application\chromedriver.exe")

									bro.get(url='https://www.taobao.com/')

									#标签定位

									search_input = bro.find_element_by_id('q')

									sleep(2)

									#执行一组js代码，使得滚轮向下滑动

									bro.execute_script('window.scrollTo(0,document.body.scrollHeight)')

									sleep(2)

									#标签交互

									search_input.send_keys('女装')

									button = bro.find_element_by_class_name('btn-search')

									button.click()

									bro.get('https://www.baidu.com')

									sleep(2)

									bro.back()

									sleep(2)

									bro.forward()

									sleep(5)

									bro.quit()

selenium处理iframe：

				?

									- 如果定位的标签存在于iframe标签之中，则必须使用switch_to.frame(id)

									- 动作链(拖动) : from selenium. webdriver import ActionChains

									    - 实例化一个动作链对象: action = ActionChains (bro)

									    - click_and_hold(div) :长按且点击操作

									    - move_by_offset(x,y)

									    - perform( )让动作链立即执行

									    - action.release( )释放动作链对象

代码

https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable

				?

									from selenium import webdriver

									from time import sleep

									from selenium.webdriver import ActionChains

									bro = webdriver.Chrome(executable_path=r"E:\google\Chrome\Application\chromedriver.exe")

									bro.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')

									bro.switch_to.frame('iframeResult')

									div = bro.find_element_by_id('draggable')

									#动作链

									action = ActionChains(bro)

									action.click_and_hold(div)

									for i in range(5):

									 action.move_by_offset(17,0).perform()

									 sleep(0.3)

									#释放动作链

									action.release()

									bro.quit()

selenium模拟登陆QQ空间

代码

https://qzone.qq.com/

				?

									from selenium import webdriver

									from time import sleep

									bro = webdriver.Chrome(executable_path=r"E:\google\Chrome\Application\chromedriver.exe")

									bro.get('https://qzone.qq.com/')

									bro.switch_to.frame("login_frame")

									switcher = bro.find_element_by_id('switcher_plogin')

									switcher.click()

									user_tag = bro.find_element_by_id('u')

									password_tag = bro.find_element_by_id('p')

									user_tag.send_keys('1234455')

									password_tag.send_keys('qwer123')

									sleep(1)

									but = bro.find_element_by_id('login_button')

									but.click()

无头浏览器和规避检测

代码

				?

									from selenium import webdriver

									from time import sleep

									#实现无可视化界面

									from selenium.webdriver.chrome.options import Options

									#实现规避检测

									from selenium.webdriver import ChromeOptions

									#实现无可视化界面

									chrome_options = Options()

									chrome_options.add_argument('--headless')

									chrome_options.add_argument('--disable-gpu')

									#实现规避检测

									option = ChromeOptions()

									option.add_experimental_option('excludeSwitches',['enable-automation'])

									bro = webdriver.Chrome(executable_path=r"E:\google\Chrome\Application\chromedriver.exe",chrome_options=chrome_options,options=option)

									bro.get('https://www.baidu.com')

									print(bro.page_source)

									sleep(2)

									bro.quit()