描述:
这里主要是记录一下使用Selenium + Chromedrive模拟登录知乎,并且保存cookie。这里遇到了很多问题,先是在本地运行不起来,然后本地能运行后,在服务器上运行出现的各种问题。
这里使用的服务器是Ubuntu 16.04版本。
过程:
# 下载gogole浏览器deb包
wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
# 安装
sudo dpkg -i google-chrome*
# 如果报错安装不成功
sudo apt undate
sudo sudo apt install -f
# 再次执行安装命名
sudo dpkg -i google-chrome*
google-chrome -version
输出:Google Chrome 75.0.3770.100

安装selenium
pip install selenium
mkdir ZhiHu
cd ZhiHu
# 然后把chromedrive 放到ZhiHu文件夹下,这样便于移到其他项目中
# LoginZhiHu.py
import os
import time
import requests
from http import cookiejar
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__))
DRIVER_BIN = os.path.join(PROJECT_ROOT, "chromedriver")
class ZhiHu(object):
def __init__(self, username: str = None, password: str = None):
self.username = username
self.password = password
self.headers = {
'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 \
(KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
self.session = requests.session()
self.session.cookies = cookiejar.LWPCookieJar(filename='./cookies.txt')
def login(self):
"""
登陆
先读取本地cookie,如果存在,判断是否过期,过期,重新登陆,不过期进行登陆验证
:return:
"""
if self.load_cookies():
print('读取 Cookies 文件')
if self.check_login():
print('登陆成功')
return True
print('Cookies 已过期')
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
browser = webdriver.Chrome(executable_path=DRIVER_BIN, chrome_options=chrome_options)
browser.get('https://www.zhihu.com/signin')
username_input = browser.find_element_by_css_selector("input[name=username]")
time.sleep(2)
username_input.send_keys(self.username)
password_input = browser.find_element_by_css_selector("input[name=password]")
time.sleep(2)
password_input.send_keys(self.password)
button = browser.find_element_by_class_name('SignFlow-submitButton')
button.click()
time.sleep(10)
cookies = browser.get_cookies()
requests.utils.cookiejar_from_dict({c.get('name'): c.get('value') for c in cookies}, self.session.cookies)
self.session.cookies.save(ignore_discard=True, ignore_expires=True)
browser.quit()
if self.check_login():
# print('登录成功')
return True
# print('登录失败')
return False
def load_cookies(self):
"""
读取 Cookies 文件加载到 Session
:return: bool
"""
try:
self.session.cookies.load(ignore_discard=True)
return True
except FileNotFoundError:
return False
def check_login(self):
"""
验证是否可以登陆
:return:
"""
login_url = 'https://www.zhihu.com/signin'
resp = self.session.get(login_url, headers=self.headers, allow_redirects=False)
if resp.status_code == 302:
return True
return False
if __name__ == '__main__':
zhihu = ZhiHu(username='', password='')
zhihu.login()
os.path.basename(self.path), self.start_error_message)
selenium.common.exceptions.WebDriverException: Message: 'chromedriver' executable needs to be in PATH. Please see https://sites.google.com/a/chromium.org/chromedriver/hom
PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__))
DRIVER_BIN = os.path.join(PROJECT_ROOT, "chromedriver")
browser = webdriver.Chrome(executable_path=DRIVER_BIN, chrome_options=chrome_options)
参考文章:https://cloud.tencent.com/developer/ask/137911
在部署到服务器中,因为是没有界面的,所以要设置这几个参数
chrome_options = Options()
# 设置不要打开图形界面
chrome_options.add_argument('--headless')
# 谷歌文档提到需要加上这个属性来规避bug
chrome_options.add_argument('--disable-gpu')
# Chrome在root权限下跑
chrome_options.add_argument('--no-sandbox')
# 这里好像是会使用dev/shm 下的空间,这里空间不是很大,所有如果没有特别的需求,这里禁用
chrome_options.add_argument('--disable-dev-shm-usage')
博客没百搭呀,内容很丰富,加油~