How to download an image using Selenium (any version)?

前端 未结 13 1565
夕颜
夕颜 2020-11-29 04:11

I was wondering, how can one use selenium/webdriver to download an image for a page. Assuming that the user session is required to download the image hence having pure URL i

13条回答
  •  情歌与酒
    2020-11-29 04:43

    How to download to a file, taking URL from element text or attribute

    The complete extension code can be found here:

    https://github.com/gravity-api/gravity-core/blob/master/src/csharp/Gravity.Core/Gravity.Core/Extensions/WebElementExtensions.cs

    If you want to use this method without writing the code, use the NuGet https://www.nuget.org/packages/Gravity.Core/

    Install-Package Gravity.Core -Version 2020.7.5.3
    

    Usage

    using OpenQA.Selenium.Extensions;
     
    ...
     
    var driver = new ChromeDriver();
     
    // from element attribute
    var element = driver.FindElement(By.XPath("//img[@id='my_img']")).DownloadResource(path: @"C:\images\cap_image_01.png", attribute: "src");
     
    // from element text
    var element = driver.FindElement(By.XPath("//div[1]")).DownloadResource(path: @"C:\images\cap_image_01.png");
    

    It is recommended to use the NuGet, since it contains a lot more tools and extension for Selenium

    For using without the NuGet (implement on your own)

    Extension Class

    using System.IO;
    using System.Net.Http;
    using System.Text.RegularExpressions;
     
    namespace Extensions
    {
        public static class WebElementExtensions
        {
            public static IWebElement DownloadResource(this IWebElement element, string path)
            {
                return DoDownloadResource(element, path, "");
            }
     
            public static IWebElement DownloadResource(this IWebElement element, string path, string attribute)
            {
                return DoDownloadResource(element, path, attribute);
            }
     
            private static IWebElement DoDownloadResource(this IWebElement element, string path, string attribute)
            {
                // get resource address
                var resource = (string.IsNullOrEmpty(attribute))
                    ? element.Text
                    : element.GetAttribute(attribute);
     
                // download resource
                using (var client = new HttpClient())
                {
                    // get response for the current resource
                    var httpResponseMessage = client.GetAsync(resource).GetAwaiter().GetResult();
     
                    // exit condition
                    if (!httpResponseMessage.IsSuccessStatusCode) return element;
     
                    // create directories path
                    Directory.CreateDirectory(path);
     
                    // get absolute file name
                    var fileName = Regex.Match(resource, @"[^/\\&\?]+\.\w{3,4}(?=([\?&].*$|$))").Value;
                    path = (path.LastIndexOf(@"\") == path.Length - 1)
                        ? path + fileName
                        : path + $@"\{fileName}";
     
                    // write the file
                    File.WriteAllBytes(path, httpResponseMessage.Content.ReadAsByteArrayAsync().GetAwaiter().GetResult());
                }
     
                // keep the fluent
                return element;
            }
        }
    }
    

    Usage

    using Extensions;
     
    ...
     
    var driver = new ChromeDriver();
     
    // from element attribute
    var element = driver.FindElement(By.XPath("//img[@id='my_img']")).DownloadResource(path: @"C:\images\cap_image_01.png", attribute: "src");
     
    // from element text
    var element = driver.FindElement(By.XPath("//div[1]")).DownloadResource(path: @"C:\images\cap_image_01.png");
    

提交回复
热议问题