Press "Enter" to skip to content

selenium chromedriver 使用selenium-stealth 进行反检测处理

我们正常用seleniumwire抓数据代码:

from seleniumwire import webdriver  # Import from seleniumwire
from selenium.webdriver.chrome.service import Service

# 设定 chromedriver 的路径
chrome_driver_path = "/Users/cai/Documents/chromedriver-mac-x64/chromedriver"  # 替换为你本地的路径
service = Service(executable_path=chrome_driver_path)
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15"
options = webdriver.ChromeOptions()
options.add_argument('--user-agent=%s' % user_agent)

# 配置 Selenium 的 WebDriver 并指定 chromedriver 的路径
driver = webdriver.Chrome(service=service,options=options)

# Go to the Google home page
driver.get('https://www.xiaocaicai.com/')

# Access requests via the `requests` attribute
for request in driver.requests:
    if request.response:
        if 'apm-fe.xiaocaicai.com/api/data' in request.url:
            print(
                request.url,
                request.response.status_code,
                request.response.headers['Content-Type'],
                request.body
            )

我们通过日志能看到,已经被检测到使用chromedriver访问了

那我们使用 seleniumwire 改造:

from seleniumwire import webdriver  # 使用 seleniumwire 的 webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support import expected_conditions as EC
from selenium_stealth import stealth
import os

def create_data_directory():
    """
    创建或获取项目脚本所在目录下的 data 目录。
    """
    script_dir = os.path.dirname(os.path.abspath(__file__))
    data_dir = os.path.join(script_dir, 'data')
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
        print(f"已创建 data 目录: {data_dir}")
    else:
        print(f"data 目录已存在: {data_dir}")
    return data_dir

def configure_chrome_options(data_dir):
    options = Options()
    options.add_argument('--disable-blink-features=AutomationControlled')
    user_data_dir = os.path.join(data_dir, 'selenium_profile')
    print("user_data_dir:" + user_data_dir)
    options.add_argument(f"--user-data-dir={user_data_dir}")
    user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; zh-CN) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"
    options.add_argument(f'--user-agent={user_agent}')
    options.add_argument('--start-maximized')
    return options

data_dir = create_data_directory()
# 设定 chromedriver 的路径
chrome_driver_path = "/Users/cai/Documents/chromedriver-mac-x64/chromedriver"  # 替换为你本地的路径
service = Service(executable_path=chrome_driver_path)
# 配置 ChromeOptions
options = configure_chrome_options(data_dir)

# 配置 selenium-wire 的选项
seleniumwire_options = {
    'disable_encoding': True,
}

# 初始化 Selenium Wire WebDriver
# chrome_driver_path = "/Users/caizhongzhen/Documents/chromedriver-mac-x64/chromedriver"  # 替换为你本地的路径
# service = Service(executable_path=chrome_driver_path)
driver = webdriver.Chrome(service=service, options=options, seleniumwire_options=seleniumwire_options)

# 使用 selenium-stealth 进行反检测处理,模拟中文 Windows 环境
stealth(driver,
        languages=["zh-CN", "zh"],
        vendor="Google Inc.",
        platform="Win32",
        webgl_vendor="Intel Inc.",
        renderer="Intel Iris OpenGL Engine",
        fix_hairline=True,
        )

# Go to the Google home page
driver.get('https://www.xiaocaicai.com/')

# Access requests via the `requests` attribute
for request in driver.requests:
    if request.response:
        if 'apm-fe.xiaocaicai.com/api/data' in request.url:
            print(
                request.url,
                request.response.status_code,
                request.response.headers['Content-Type'],
                request.body
            )

可以看到我们使用stealth反检测之后,可以避免被检测的情况

发表回复

您的电子邮箱地址不会被公开。 必填项已用 * 标注