Get ReadyState from WebBrowser control without DoEvents

前端 未结 3 2058
天命终不由人
天命终不由人 2020-11-30 10:29

This has been awnsered many times here and at other sites and its working, but I would like ideas to other ways to:

get the ReadyState = Complete after using a navig

相关标签:
3条回答
  • 2020-11-30 11:21

    Below is a basic WinForms app code, illustrating how to wait for the DocumentCompleted event asynchronously, using async/await. It navigates to multiple pages, one after another. Everything is taking place on the main UI thread.

    Instead of calling this.webBrowser.Navigate(url), it might be simulating a form button click, to trigger a POST-style navigation.

    The webBrowser.IsBusy async loop logic is optional, its purpose is to account (non-deterministically) for the page's dynamic AJAX code which may take place after window.onload event.

    using System;
    using System.Diagnostics;
    using System.Threading;
    using System.Threading.Tasks;
    using System.Windows.Forms;
    
    namespace WebBrowserApp
    {
        public partial class MainForm : Form
        {
            WebBrowser webBrowser;
    
            public MainForm()
            {
                InitializeComponent();
    
                // create a WebBrowser
                this.webBrowser = new WebBrowser();
                this.webBrowser.Dock = DockStyle.Fill;
                this.Controls.Add(this.webBrowser);
    
                this.Load += MainForm_Load;
            }
    
            // Form Load event handler
            async void MainForm_Load(object sender, EventArgs e)
            {
                // cancel the whole operation in 30 sec
                var cts = new CancellationTokenSource(30000);
    
                var urls = new String[] { 
                        "http://www.example.com", 
                        "http://www.gnu.org", 
                        "http://www.debian.org" };
    
                await NavigateInLoopAsync(urls, cts.Token);
            }
    
            // navigate to each URL in a loop
            async Task NavigateInLoopAsync(string[] urls, CancellationToken ct)
            {
                foreach (var url in urls)
                {
                    ct.ThrowIfCancellationRequested();
                    var html = await NavigateAsync(ct, () => 
                        this.webBrowser.Navigate(url));
                    Debug.Print("url: {0}, html: \n{1}", url, html);
                }
            }
    
            // asynchronous navigation
            async Task<string> NavigateAsync(CancellationToken ct, Action startNavigation)
            {
                var onloadTcs = new TaskCompletionSource<bool>();
                EventHandler onloadEventHandler = null;
    
                WebBrowserDocumentCompletedEventHandler documentCompletedHandler = delegate
                {
                    // DocumentCompleted may be called several time for the same page,
                    // if the page has frames
                    if (onloadEventHandler != null)
                        return;
    
                    // so, observe DOM onload event to make sure the document is fully loaded
                    onloadEventHandler = (s, e) =>
                        onloadTcs.TrySetResult(true);
                    this.webBrowser.Document.Window.AttachEventHandler("onload", onloadEventHandler);
                };
    
                this.webBrowser.DocumentCompleted += documentCompletedHandler;
                try
                {
                    using (ct.Register(() => onloadTcs.TrySetCanceled(), useSynchronizationContext: true))
                    {
                        startNavigation();
                        // wait for DOM onload event, throw if cancelled
                        await onloadTcs.Task;
                    }
                }
                finally
                {
                    this.webBrowser.DocumentCompleted -= documentCompletedHandler;
                    if (onloadEventHandler != null)
                        this.webBrowser.Document.Window.DetachEventHandler("onload", onloadEventHandler);
                }
    
                // the page has fully loaded by now
    
                // optional: let the page run its dynamic AJAX code,
                // we might add another timeout for this loop
                do { await Task.Delay(500, ct); }
                while (this.webBrowser.IsBusy);
    
                // return the page's HTML content
                return this.webBrowser.Document.GetElementsByTagName("html")[0].OuterHtml;
            }
        }
    }
    

    If you're looking to do something similar from a console app, here is an example of that.

    0 讨论(0)
  • 2020-11-30 11:26

    The solution is simple:

        // MAKE SURE ReadyState = Complete
                while (WebBrowser1.ReadyState.ToString() != "Complete") {
                    Application.DoEvents();         
                }
    

    // Move on to your sub-sequence code...


    Dirty and quick.. I am a VBA guys, this logic has been working forever, just took me days and found none for C# but I just figured this out myself.

    Following is my complete function, the objective is to obtain a segment of info from a webpage:

    private int maxReloadAttempt = 3;
        private int currentAttempt = 1;
    
        private string GetCarrier(string webAddress)
        {
            WebBrowser WebBrowser_4MobileCarrier = new WebBrowser();
            string innerHtml;
            string strStartSearchFor = "subtitle block pull-left\">";
            string strEndSearchFor = "<";
    
            try
            {
                WebBrowser_4MobileCarrier.ScriptErrorsSuppressed = true;
                WebBrowser_4MobileCarrier.Navigate(webAddress); 
    
                // MAKE SURE ReadyState = Complete
                while (WebBrowser_4MobileCarrier.ReadyState.ToString() != "Complete") {
                    Application.DoEvents();         
                }
    
                // LOAD HTML
                innerHtml = WebBrowser_4MobileCarrier.Document.Body.InnerHtml;  
    
                // ATTEMPT (x3) TO EXTRACT CARRIER STRING
                while (currentAttempt <=  maxReloadAttempt) {
                    if (innerHtml.IndexOf(strStartSearchFor) >= 0)
                    {
                        currentAttempt = 1; // Reset attempt counter
                        return Sub_String(innerHtml, strStartSearchFor, strEndSearchFor, "0"); // Method: "Sub_String" is my custom function
                    }
                    else
                    {
                        currentAttempt += 1;    // Increment attempt counter
                        GetCarrier(webAddress); // Recursive method call
                    } // End if
                } // End while
            }   // End Try
    
            catch //(Exception ex)
            {
            }
            return "Unavailable";
        }
    
    0 讨论(0)
  • 2020-11-30 11:32

    Here is a "quick & dirty" solution. It's not 100% foolproof but it doesn't block UI thread and it should be satisfactory to prototype WebBrowser control Automation procedures:

        private async void testButton_Click(object sender, EventArgs e)
        {
            await Task.Factory.StartNew(
                () =>
                {
                    stepTheWeb(() => wb.Navigate("www.yahoo.com"));
                    stepTheWeb(() => wb.Navigate("www.microsoft.com"));
                    stepTheWeb(() => wb.Navigate("asp.net"));
                    stepTheWeb(() => wb.Document.InvokeScript("eval", new[] { "$('p').css('background-color','yellow')" }));
                    bool testFlag = false;
                    stepTheWeb(() => testFlag = wb.DocumentText.Contains("Get Started"));
                    if (testFlag) {    /* TODO */ }
                    // ... 
                }
            );
        }
    
        private void stepTheWeb(Action task)
        {
            this.Invoke(new Action(task));
    
            WebBrowserReadyState rs = WebBrowserReadyState.Interactive;
            while (rs != WebBrowserReadyState.Complete)
            {
                this.Invoke(new Action(() => rs = wb.ReadyState));
                System.Threading.Thread.Sleep(300);
            }
       }
    

    Here is a bit more generic version of testButton_Click method:

        private async void testButton_Click(object sender, EventArgs e)
        {
            var actions = new List<Action>()
                {
                    () => wb.Navigate("www.yahoo.com"),
                    () => wb.Navigate("www.microsoft.com"),
                    () => wb.Navigate("asp.net"),
                    () => wb.Document.InvokeScript("eval", new[] { "$('p').css('background-color','yellow')" }),
                    () => {
                             bool testFlag = false;
                             testFlag  = wb.DocumentText.Contains("Get Started"); 
                             if (testFlag)  {   /*  TODO */  }
                           }
                    //... 
                };
    
            await Task.Factory.StartNew(() => actions.ForEach((x)=> stepTheWeb (x)));  
        }
    

    [Update]

    I have adapted my "quick & dirty" sample by borrowing and sligthly refactoring @Noseratio's NavigateAsync method from this topic. New code version would automate/execute asynchronously in UI thread context not only navigation operations but also Javascript/AJAX calls - any "lamdas"/one automation step task implementation methods.

    All and every code reviews/comments are very welcome. Especially, from @Noseratio. Together, we will make this world better ;)

        public enum ActionTypeEnumeration
        {
            Navigation = 1,
            Javascript = 2,
            UIThreadDependent = 3,
            UNDEFINED = 99
        }
    
        public class ActionDescriptor
        {
            public Action Action { get; set; }
            public ActionTypeEnumeration ActionType { get; set; }
        }
    
        /// <summary>
        /// Executes a set of WebBrowser control's Automation actions
        /// </summary>
        /// <remarks>
        ///  Test form shoudl ahve the following controls:
        ///    webBrowser1 - WebBrowser,
        ///    testbutton - Button,
        ///    testCheckBox - CheckBox,
        ///    totalHtmlLengthTextBox - TextBox
        /// </remarks> 
        private async void testButton_Click(object sender, EventArgs e)
        {
            try
            {
                var cts = new CancellationTokenSource(60000);
    
                var actions = new List<ActionDescriptor>()
                {
                    new ActionDescriptor() { Action = ()=>  wb.Navigate("www.yahoo.com"), ActionType = ActionTypeEnumeration.Navigation}  ,
                    new ActionDescriptor() { Action = () => wb.Navigate("www.microsoft.com"), ActionType = ActionTypeEnumeration.Navigation}  ,
                    new ActionDescriptor() { Action = () => wb.Navigate("asp.net"), ActionType = ActionTypeEnumeration.Navigation}  ,
                    new ActionDescriptor() { Action = () => wb.Document.InvokeScript("eval", new[] { "$('p').css('background-color','yellow')" }), ActionType = ActionTypeEnumeration.Javascript}, 
                    new ActionDescriptor() { Action =
                    () => {
                             testCheckBox.Checked = wb.DocumentText.Contains("Get Started"); 
                           },
                           ActionType = ActionTypeEnumeration.UIThreadDependent} 
                    //... 
                };
    
                foreach (var action in actions)
                {
                   string html = await ExecuteWebBrowserAutomationAction(cts.Token, action.Action, action.ActionType);
                   // count HTML web page stats - just for fun
                   int totalLength = 0;
                   Int32.TryParse(totalHtmlLengthTextBox.Text, out totalLength);
                   totalLength += !string.IsNullOrWhiteSpace(html) ? html.Length : 0;
                   totalHtmlLengthTextBox.Text = totalLength.ToString();   
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message, "Error");   
            }
        }
    
        // asynchronous WebBroswer control Automation
        async Task<string> ExecuteWebBrowserAutomationAction(
                                CancellationToken ct, 
                                Action runWebBrowserAutomationAction, 
                                ActionTypeEnumeration actionType = ActionTypeEnumeration.UNDEFINED)
        {
            var onloadTcs = new TaskCompletionSource<bool>();
            EventHandler onloadEventHandler = null;
    
            WebBrowserDocumentCompletedEventHandler documentCompletedHandler = delegate
            {
                // DocumentCompleted may be called several times for the same page,
                // if the page has frames
                if (onloadEventHandler != null)
                    return;
    
                // so, observe DOM onload event to make sure the document is fully loaded
                onloadEventHandler = (s, e) =>
                    onloadTcs.TrySetResult(true);
                this.wb.Document.Window.AttachEventHandler("onload", onloadEventHandler);
            };
    
    
            this.wb.DocumentCompleted += documentCompletedHandler;
            try
            {
                using (ct.Register(() => onloadTcs.TrySetCanceled(), useSynchronizationContext: true))
                {
                    runWebBrowserAutomationAction();
    
                    if (actionType == ActionTypeEnumeration.Navigation)
                    {
                        // wait for DOM onload event, throw if cancelled
                        await onloadTcs.Task;
                    }
                }
            }
            finally
            {
                this.wb.DocumentCompleted -= documentCompletedHandler;
                if (onloadEventHandler != null)
                    this.wb.Document.Window.DetachEventHandler("onload", onloadEventHandler);
            }
    
            // the page has fully loaded by now
    
            // optional: let the page run its dynamic AJAX code,
            // we might add another timeout for this loop
            do { await Task.Delay(500, ct); }
            while (this.wb.IsBusy);
    
            // return the page's HTML content
            return this.wb.Document.GetElementsByTagName("html")[0].OuterHtml;
        }
    
    0 讨论(0)
提交回复
热议问题