how to set proxy with authentication in selenium chromedriver python?

后端 未结 4 743
陌清茗
陌清茗 2020-12-01 06:01

I am creating a script that crawls one website to gather some data but the problem is that they blocked me after too many requests but using a proxy I can send more request

相关标签:
4条回答
  • 2020-12-01 06:16

    Selenium Chrome Proxy Authentication

    Setting chromedriver proxy with Selenium using Python

    If you need to use a proxy with python and Selenium library with chromedriver you usually use the following code (Without any username and password:

    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--proxy-server=%s' % hostname + ":" + port)
    driver = webdriver.Chrome(chrome_options=chrome_options)
    

    It works fine unless proxy requires authentication. if the proxy requires you to log in with a username and password it will not work. In this case, you have to use more tricky solution that is explained below. By the way, if you whitelist your server IP address from the proxy provider or server it should not ask proxy credentials.

    HTTP Proxy Authentication with Chromedriver in Selenium

    To set up proxy authentication we will generate a special file and upload it to chromedriver dynamically using the following code below. This code configures selenium with chromedriver to use HTTP proxy that requires authentication with user/password pair.

    import os
    import zipfile
    
    from selenium import webdriver
    
    PROXY_HOST = '192.168.3.2'  # rotating proxy or host
    PROXY_PORT = 8080 # port
    PROXY_USER = 'proxy-user' # username
    PROXY_PASS = 'proxy-password' # password
    
    
    manifest_json = """
    {
        "version": "1.0.0",
        "manifest_version": 2,
        "name": "Chrome Proxy",
        "permissions": [
            "proxy",
            "tabs",
            "unlimitedStorage",
            "storage",
            "<all_urls>",
            "webRequest",
            "webRequestBlocking"
        ],
        "background": {
            "scripts": ["background.js"]
        },
        "minimum_chrome_version":"22.0.0"
    }
    """
    
    background_js = """
    var config = {
            mode: "fixed_servers",
            rules: {
            singleProxy: {
                scheme: "http",
                host: "%s",
                port: parseInt(%s)
            },
            bypassList: ["localhost"]
            }
        };
    
    chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
    
    function callbackFn(details) {
        return {
            authCredentials: {
                username: "%s",
                password: "%s"
            }
        };
    }
    
    chrome.webRequest.onAuthRequired.addListener(
                callbackFn,
                {urls: ["<all_urls>"]},
                ['blocking']
    );
    """ % (PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS)
    
    
    def get_chromedriver(use_proxy=False, user_agent=None):
        path = os.path.dirname(os.path.abspath(__file__))
        chrome_options = webdriver.ChromeOptions()
        if use_proxy:
            pluginfile = 'proxy_auth_plugin.zip'
    
            with zipfile.ZipFile(pluginfile, 'w') as zp:
                zp.writestr("manifest.json", manifest_json)
                zp.writestr("background.js", background_js)
            chrome_options.add_extension(pluginfile)
        if user_agent:
            chrome_options.add_argument('--user-agent=%s' % user_agent)
        driver = webdriver.Chrome(
            os.path.join(path, 'chromedriver'),
            chrome_options=chrome_options)
        return driver
    
    def main():
        driver = get_chromedriver(use_proxy=True)
        #driver.get('https://www.google.com/search?q=my+ip+address')
        driver.get('https://httpbin.org/ip')
    
    if __name__ == '__main__':
        main()
    

    Function get_chromedriver returns configured selenium webdriver that you can use in your application. This code is tested and works just fine.

    Read more about onAuthRequired event in Chrome.

    0 讨论(0)
  • 2020-12-01 06:22

    Along the way, in the updates the solution using the extension doesnt work (windows at least), while mac and linux does. I think it was chromedriver v2.44 the last working version with extensions

    0 讨论(0)
  • 2020-12-01 06:23

    After hours of digging around with the same problem that you had, I came across this website https://botproxy.net/docs/how-to/setting-chromedriver-proxy-auth-with-selenium-using-python/. I tested it out and worked for me perfectly.

    import os
    import zipfile
    
    from selenium import webdriver
    
    PROXY_HOST = 'x.botproxy.net'  # rotating proxy
    PROXY_PORT = 8080
    PROXY_USER = 'proxy-user'
    PROXY_PASS = 'proxy-password'
    
    
    manifest_json = """
    {
        "version": "1.0.0",
        "manifest_version": 2,
        "name": "Chrome Proxy",
        "permissions": [
            "proxy",
            "tabs",
            "unlimitedStorage",
            "storage",
            "<all_urls>",
            "webRequest",
            "webRequestBlocking"
        ],
        "background": {
            "scripts": ["background.js"]
        },
        "minimum_chrome_version":"22.0.0"
    }
    """
    
    background_js = """
    var config = {
            mode: "fixed_servers",
            rules: {
              singleProxy: {
                scheme: "http",
                host: "%s",
                port: parseInt(%s)
              },
              bypassList: ["localhost"]
            }
          };
    
    chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
    
    function callbackFn(details) {
        return {
            authCredentials: {
                username: "%s",
                password: "%s"
            }
        };
    }
    
    chrome.webRequest.onAuthRequired.addListener(
                callbackFn,
                {urls: ["<all_urls>"]},
                ['blocking']
    );
    """ % (PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS)
    
    
    def get_chromedriver(use_proxy=False, user_agent=None):
        path = os.path.dirname(os.path.abspath(__file__))
        chrome_options = webdriver.ChromeOptions()
        if use_proxy:
            pluginfile = 'proxy_auth_plugin.zip'
    
            with zipfile.ZipFile(pluginfile, 'w') as zp:
                zp.writestr("manifest.json", manifest_json)
                zp.writestr("background.js", background_js)
            chrome_options.add_extension(pluginfile)
        if user_agent:
            chrome_options.add_argument('--user-agent=%s' % user_agent)
        driver = webdriver.Chrome(
            os.path.join(path, 'chromedriver'),
            chrome_options=chrome_options)
        return driver
    
    def main():
        driver = get_chromedriver(use_proxy=True)
        #driver.get('https://www.google.com/search?q=my+ip+address')
        driver.get('https://httpbin.org/ip')
    
    if __name__ == '__main__':
        main()
    
    0 讨论(0)
  • 2020-12-01 06:26

    Here is a quick, creative solution that doesn't require modification of selenium's Options or uploading a file to chromedriver. It makes use of pyautogui (can use any python package that simulates key presses) to enter proxy auth details. It also uses threading to account for chrome authentication popup window that would otherwise pause the script.

    import time
    from threading import Thread
    import pyautogui
    from selenium.webdriver.chrome.options import Options
    from selenium import webdriver
    
    hostname = "HOST_NAME"
    port = "PORT"
    proxy_username = "USERNAME"
    proxy_password = "PASSWORD"
    
    chrome_options = Options()
    chrome_options.add_argument('--proxy-server={}'.format(hostname + ":" + port))
    driver = webdriver.Chrome(options=chrome_options)
    
    
    def enter_proxy_auth(proxy_username, proxy_password):
        time.sleep(1)
        pyautogui.typewrite(proxy_username)
        pyautogui.press('tab')
        pyautogui.typewrite(proxy_password)
        pyautogui.press('enter')
    
    
    def open_a_page(driver, url):
        driver.get(url)
    
    
    Thread(target=open_a_page, args=(driver, "http://www.example.com/")).start()
    Thread(target=enter_proxy_auth, args=(proxy_username, proxy_password)).start()
    

    NOTE: For any serious project or test suite I would recommend opting for a more robust solution. However, if you are just experimenting and require a quick and effective solution, this is an option.

    0 讨论(0)
提交回复
热议问题