Selenium get .har file | 易学教程

I have a two page application:
/login
/profile
I want to get .har file page /profile.
When i go to the page /login, the cookie is created with a key=connect.sid and value = "example value". This cookie is not yet active. I added the cookies with active connect.sid.

WebDriver webDriver = getDriver();
webDriver.get(LOGIN_PAGE);
webDriver.manage().addCookie(connectsSId);

it does not work because after the load page, /login crated a new cookies. i also tried this code:

WebDriver webDriver = getDriver();
webDriver.get(PROFILE_PAGE);
webDriver.manage().deleteAllCookies();
webDriver.manage().addCookie(connectsSId);

and this does not work. cookies were added but it seems too late.

 WebDriver webDriver = getDriver();
 LoginPage loginPage = new LoginPage(getDriver());
 LandingPage landingPage = loginPage.login();
 landingPage.openProfilePage();

This code created a .har file for the page /login.
for some reason, the file is created only after the first call to the page. I can not solve this problem.

You can use browsermob proxy to capture all the request and response data See here

Set preferences in your Selenium code:

    profile.setPreference("devtools.netmonitor.har.enableAutoExportToFile", true);
profile.setPreference("devtools.netmonitor.har.defaultLogDir", String.valueOf(dir));
profile.setPreference("devtools.netmonitor.har.defaultFileName", "network-log-file-%Y-%m-%d-%H-%M-%S");

and open console:

Actions keyAction = new Actions(driver);
keyAction.keyDown(Keys.LEFT_CONTROL).keyDown(Keys.LEFT_SHIFT).sendKeys("q").keyUp(Keys.LEFT_CONTROL).keyUp(Keys.LEFT_SHIFT).perform();

Use PhantomJS with BrowserMobProxy. PhantomJS helps us for JavaScript enables pages. The following code works for HTTPS web addresses, too.

Place 'phantomjs.exe' in C drive and you get the 'HAR-Information.har' file in C drive itself.

Make sure you DO NOT put a ' / ' at the end of the url, like

driver.get("https://www.google.co.in/")

It should be

driver.get("https://www.google.co.in");

Otherwise, it won't work.

package makemyhar;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import net.lightbody.bmp.BrowserMobProxy;
import net.lightbody.bmp.BrowserMobProxyServer;
import net.lightbody.bmp.core.har.Har;
import net.lightbody.bmp.proxy.CaptureType;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.phantomjs.PhantomJSDriver;
import org.openqa.selenium.phantomjs.PhantomJSDriverService;
import org.openqa.selenium.remote.CapabilityType;
import org.openqa.selenium.remote.DesiredCapabilities;

public class MakeMyHAR {
    public static void main(String[] args) throws IOException, InterruptedException {

        //BrowserMobProxy
        BrowserMobProxy server = new BrowserMobProxyServer();
        server.start(0);
        server.setHarCaptureTypes(CaptureType.getAllContentCaptureTypes());
        server.enableHarCaptureTypes(CaptureType.REQUEST_CONTENT, CaptureType.RESPONSE_CONTENT);
        server.newHar("Google");

        //PHANTOMJS_CLI_ARGS
        ArrayList<String> cliArgsCap = new ArrayList<>();
        cliArgsCap.add("--proxy=localhost:"+server.getPort());
        cliArgsCap.add("--ignore-ssl-errors=yes");

        //DesiredCapabilities
        DesiredCapabilities capabilities = new DesiredCapabilities();
        capabilities.setCapability(CapabilityType.ACCEPT_SSL_CERTS, true);
        capabilities.setCapability(CapabilityType.SUPPORTS_JAVASCRIPT, true);
        capabilities.setCapability(PhantomJSDriverService.PHANTOMJS_CLI_ARGS, cliArgsCap);
        capabilities.setCapability(PhantomJSDriverService.PHANTOMJS_EXECUTABLE_PATH_PROPERTY,"C:\\phantomjs.exe");

        //WebDriver
        WebDriver driver = new PhantomJSDriver(capabilities);
        driver.get("https://www.google.co.in");

        //HAR
        Har har = server.getHar();
        FileOutputStream fos = new FileOutputStream("C:\\HAR-Information.har");
        har.writeTo(fos);
        server.stop();
        driver.close();
    }
}

I have tried as well to get the har file using a proxy like browsermob proxy

I did a lot of research because the file which I've received was always empty.

What I did was to enable the browser performance log.

Note this will work only with chrome driver.

This is my driver class (in python)

from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium import webdriver
from lib.config import config


class Driver:

    global performance_log
    capabilities = DesiredCapabilities.CHROME
    capabilities['loggingPrefs'] = {'performance': 'ALL'}

    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument("--headless")
    mobile_emulation = {"deviceName": "Nexus 5"}

    if config.Env().is_mobile():
        chrome_options.add_experimental_option(
            "mobileEmulation", mobile_emulation)
    else:
        pass

    chrome_options.add_experimental_option(
        'perfLoggingPrefs', {"enablePage": True})

    def __init__(self):
        self.instance = webdriver.Chrome(
            executable_path='/usr/local/bin/chromedriver', options=self.chrome_options)

    def navigate(self, url):
        if isinstance(url, str):
            self.instance.get(url)
            self.performance_log = self.instance.get_log('performance')
        else:
            raise TypeError("URL must be a string.")

The amount of information which is found the in output is huge so you'll have to filter the raw data and get the network received and send objects only.

import json
import secrets


def digest_log_data(performance_log):
    # write all raw data in a file
    with open('data.json', 'w', encoding='utf-8') as outfile:
        json.dump(performance_log, outfile)
    # open the file and real it with encoding='utf-8'
    with open('data.json', encoding='utf-8') as data_file:
        data = json.loads(data_file.read())
        return data


def digest_raw_data(data, mongo_object={}):
    for idx, val in enumerate(data):
        data_object = json.loads(data[idx]['message'])
        if (data_object['message']['method'] == 'Network.responseReceived') or (data_object['message']['method'] == 'Network.requestWillBeSent'):
            mongo_object[secrets.token_hex(30)] = data_object
        else:
            pass

We choose to push this data into a mongo db which will be analyse later by an etl and pushed into a redshift database to create statistics .

I hope is what you are looking for.

The way Im running the script is :

import codecs
from pprint import pprint
import urllib
from lib import mongo_client
from lib.test_data import test_data as data
from jsonpath_ng.ext import parse
from IPython import embed
from lib.output_data import process_output_data as output_data
from lib.config import config
from lib import driver

browser = driver.Driver()

# get the list of urls which we need to navigate
urls = data.url_list()

for url in urls:
    browser.navigate(config.Env().base_url() + url)
    print('Visiting ' + url)
    # get performance log
    performance_log = browser.performance_log
    # digest the performace log
    data = output_data.digest_log_data(performance_log)
    # initiate an empty dict
    mongo_object = {}
    # prepare the data for the mongo document
    output_data.digest_raw_data(data, mongo_object)
    # load data into the mongo db
    mongo_client.populate_mongo(mongo_object)


browser.instance.quit()

My main source was this one which I've adjusted it to my needs. https://www.reddit.com/r/Python/comments/97m9iq/headless_browsers_export_to_har/ Thanks

来源：https://stackoverflow.com/questions/29081290/selenium-get-har-file

标签

java

selenium

browsermob