VBA Interact with HTML + WebBrowser

The original idea from this post came from this another one. But I needed to have it extended to any other functions, and to deal with Gmail two steps login. To interact with HTML pages and its controls we need to add two references in our Excel VBA.
  1. Microsoft HTML Object Library (mshtml.tlb): This library is required to access all HTML controls which can be present on your HTML page.
  2. Microsoft Internet Controls (ieframe.dll): This reference is required to do operations on Internet Explorer because to open an HTML page we need to access Internet Explorer.

How to Add Reference in Excel

  1. Go to VB Editor Screen (Alt+F11)
  2. Tools –> References…
  3. From the List of Available References Select your Reference Name which you want to add.
  4. Note: If you are not able to find the Reference Name in the list then Click on browse and select the dll name which is given for those Reference Names.
  5. Click OK

The VBA WebBrowser project

It will look something like these: VBA_WebBrowser ToDo: detect where is the mouse pressing down and find object in that X,Y position, so inner HTML code can be recalled and copied to clipboard. Paste inside the UserForm code the following, and add the neccesary objects that warns when loaded.
'http://automatetheweb.net/vba-getelementsbytagname-method/

Option Explicit

Private bWebChange As Boolean

Private Type PointAPI
    X As Long
    Y As Long
End Type

Private oTmpHTMLDoc As HTMLDocument 'Reference to Microsoft HTML Object Library
Private oTmpHTML_Element As IHTMLElement
Private oTmpHTML_Element2 As IHTMLElement2
Private oTmpHTML_TagCol As IHTMLElementCollection
Private oTmpHTML_TagCol2 As IHTMLElementCollection2

Private Sub txtURL_Enter()
    Call cmdGo_Click
End Sub

Private Sub UserForm_Initialize()
'It's not the same to go from different webbrowser, site can change for each browser...
'Is is a multipage site?
    With Me
        .txtUsername.Text = ""
        .txtPassword.Text = ""
    End With
End Sub

Private Sub cmdGo_Click()
    With Me
        If .txtURL.Text = vbNullString Then Exit Sub
        
        If .txtURL.Text Like "http*//*" Then
            .txtURL.Text = .txtURL.Text
        ElseIf .txtURL.Text Like "www*" Then
            .txtURL.Text = "http://" & .txtURL.Text
        Else
            'count dot chars
            .txtURL.Text = "http://www." & .txtURL.Text
        End If
        .WebBrowser1.Navigate (.txtURL.Text)
    End With
End Sub

Private Sub cmdAction_Click()
' Always get "name" property
    With Me
        With .WebBrowser1
            If Not bWebChange Then Exit Sub 'Wait for completion
            
            ' Go to site and search <input for input box... get "name" property
            ' Go to site and search  "*button*" for buttons ...
            
            .Document.all("Email").Value = Me.txtUsername.Text
            
            bWebChange = False
            .Document.all("signIn").Click
                    
            If bWebChange Then
                'Wait until complete... go to next page
                .Document.all("Passwd").Value = Me.txtPassword.Text
                .Document.all("signIn").Click
            End If
        End With
    End With
End Sub

Private Sub cmdGetScreenCordinates_Click()
Call GetTable
Stop
    Dim oPoint As PointAPI
    
    With Me.WebBrowser1
        '.Silent = False
        '.Navigate Me.txtURL.Text
        '.Visible = True 'False
    
        'Call Browser_Complete
         
        Set oTmpHTMLDoc = .Document
        Set oTmpHTML_Element2 = oTmpHTMLDoc.getElementsByTagName("a")(0)
    
        oPoint = GetScreenCordinates(oTmpHTML_Element2, oTmpHTMLDoc)
    End With
End Sub

Private Function GetScreenCordinates(ByVal oHTML_Element As IHTMLElement, _
                                     ByVal oHTMLDoc As HTMLDocument) As PointAPI
    Dim oPoint As PointAPI
    
    oPoint.X = oHTML_Element.offsetLeft
    oPoint.Y = oHTML_Element.offsetTop
    Do While Not (oHTML_Element.offsetParent Is Nothing)
        oPoint.X = oPoint.X + oHTML_Element.offsetParent.offsetLeft
        oPoint.Y = oPoint.Y + oHTML_Element.offsetParent.offsetTop
        
        If (oHTML_Element = oHTMLDoc.getElementsByTagName("body")(0)) Then
            Exit Do
        Else
            Set oHTML_Element = oHTML_Element.offsetParent
        End If
    Loop
        
    GetScreenCordinates = oPoint
End Function

Private Sub ListAHRef_Click()
    On Error GoTo Err
    
    'Dim oBrowser As InternetExplorer
    Dim oHTMLDoc As HTMLDocument 'Reference to Microsoft HTML Object Library
    Dim oHTML_Element As IHTMLElement
    Dim oHTML_Element2 As IHTMLElement2
    Dim oHTML_Element3 As IHTMLElement3
    Dim oHTML_Element4 As IHTMLElement4
    Dim oHTML_TagCol As IHTMLElementCollection
    Dim oHTML_TagCol2 As IHTMLElementCollection2
    Dim oHTML_TagCol3 As IHTMLElementCollection3
    Dim oHTML_TagCol4 As IHTMLElementCollection4
    Dim lgItem As Long

    'Set oBrowser = Me.WebBrowser1 'New InternetExplorer
    'With oBrowser
    With Me.WebBrowser1
        '.Silent = False
        '.Navigate Me.txtURL.Text
        '.Visible = True 'False
    
        'Call Browser_Complete
         
        Set oHTMLDoc = .Document
        Set oHTML_TagCol2 = oHTMLDoc.getElementsByTagName("a")
        'N# items: oHTML_TagCol.length
        lgItem = lgItem + 1
        For Each oHTML_Element2 In oHTML_TagCol2
            lgItem = lgItem + 1

            With oHTML_Element2
                ActiveSheet.Cells(lgItem, 1).Value = .href
                ActiveSheet.Cells(lgItem, 2).Value = .innerText
                'ActiveSheet.Cells(lgItem, 3).Value = .offsetWidth
                'ActiveSheet.Cells(lgItem, 4).Value = .offsetHeight
                
                'Warning: If the element is in a frame, the coords are relative to the frame's origin, not the browsers.
                'ActiveSheet.Cells(lgItem, 5).Value = .getBoundingClientRect.Left
                'ActiveSheet.Cells(lgItem, 6).Value = .getBoundingClientRect.Top
                'ActiveSheet.Cells(lgItem, 7).Value = .getBoundingClientRect.Right
                'ActiveSheet.Cells(lgItem, 8).Value = .getBoundingClientRect.bottom
                
                'ActiveSheet.Cells(lgItem, 9).Value = .offsetParent.offsetLeft
                'ActiveSheet.Cells(lgItem, 10).Value = .offsetParent.offsetTop
            End With
        Next
        '.Visible = True
    End With
    
    'Debug.Print Me.WebBrowser1.Container.InsideHeight
    'Debug.Print Me.WebBrowser1.Container.InsideWidth
    
    'Debug.Print oHTMLDoc.body.ScrollWidth & "x" & oHTMLDoc.body.ScrollHeight
    'Debug.Print oHTMLDoc.body.ClientWidth & "x" & oHTMLDoc.body.ClientHeight
    
    'Set oHTML_Element2 = oHTMLDoc.elementFromPoint(444, 121)

ExitProc:
    Exit Sub

Err:
    MsgBox ("Error Occurred")
End Sub

Private Sub GetTable()
    Dim lgCell As Long
    Dim lgR As Long ' Row counter
    Dim lgC As Long ' Column counter
    Dim oHTML_ElementTable As IHTMLElement2
    Dim oHTML_ElementRow As IHTMLElement
    Dim oHTML_ElementCell As IHTMLElement

    'beeing Table HTML element with id 'myTable'
    'look at all the 'tr' elements in the 'table' ,
    'and evaluate each, one at a time, using 'ele' variable
    lgR = 1
    Set oHTML_ElementTable = Me.WebBrowser1.Document.getElementById("myTable")
    For Each oHTML_ElementRow In oHTML_ElementTable.getElementsByTagName("tr")
        'show the text content of 'tr' element being looked at
        'Debug.Print oHTML_ElementRow.innerText

        'each 'tr' (table row) element contains 4 children ('td') elements
        lgC = 0
        For lgCell = 0 To oHTML_ElementRow.Children.lenght
        'For Each oHTML_ElementCell In oHTML_ElementRow.Children
            'Debug.Print oHTML_ElementCell.innerText
            lgC = lgC + 1
            ActiveSheet.Cells(lgR, lgC).Value2 = oHTML_ElementCell.innerText
        'Next oHTML_ElementCell
        Next lgCell

        lgR = lgR + 1
    Next oHTML_ElementRow
End Sub

'Private Sub WebBrowser1_BeforeNavigate2(ByVal pDisp As Object, URL As Variant, Flags As Variant, TargetFrameName As Variant, PostData As Variant, Headers As Variant, Cancel As Boolean)
'' Get caller...
'Stop
'End Sub

Private Sub WebBrowser1_DocumentComplete(ByVal pDisp As Object, URL As Variant)
    If pDisp Is Me.WebBrowser1.Application Then
        bWebChange = True
        '
        ' Now you can poke around the Document Object
        '
    End If
End Sub

Private Sub URL_Navigate(Optional ByRef strURL As String = vbNullString)
    With Me.WebBrowser1
        strURL = VBA.Trim$(strURL)
        If strURL = vbNullString Then
            strURL = VBA.Trim$(Me.txtURL.Text)
            
            Do While strURL = vbNullString
                strURL = VBA.Trim$(VBA.InputBox(Prompt:="Type URL", Default:="http://www.site.com"))
                strURL = VBA.Trim$(strURL)
            Loop
        End If
        
        .Navigate strURL
        .Visible = True 'False
    
        Call Browser_Complete
    End With
End Sub

Private Sub Browser_Complete(Optional ByVal TimeWait As Long = 3)
'wait for page to load
    With Me.WebBrowser1
        'Do While .Busy = True Or .ReadyState  4: DoEvents: Loop
        
        Do
        ' Wait till the Browser is loaded
            DoEvents
        Loop Until .ReadyState = READYSTATE_COMPLETE
    
        ' Once browser is fully loaded give few seconds.
        ' This is because sometimes even though the Browser State is Complete
        ' but still some of the controls are not ready completely.
        ' In such case your script may fail.
        ' That's why I have given a waiting time of 3 seconds after the page is loaded completely.
        Application.Wait DateAdd("s", TimeWait, Now)
    End With
End Sub

Private Sub cmdLogin_Click()
    On Error GoTo Err
    
    'Dim oBrowser As InternetExplorer
    Dim oHTMLDoc As HTMLDocument 'Reference to Microsoft HTML Object Library
    Dim oHTML_Element As IHTMLElement
    Dim oHTML_TagCol As IHTMLElementCollection

    'Set oBrowser = Me.WebBrowser1 'New InternetExplorer
    With Me.WebBrowser1
        '.Height = 1000
        '.Width = 1000
        
        Call URL_Navigate(Me.txtURL.Text)
         
        'Once browser is open with Gmail URL
        'Now we need to pass ID and password at the right
        'field. For this right click on the Gmail page
        'and click on View Code. Here check the "id" of
        'User Name and Password Textboxes.
        'For example if ID of the User Name textbox is "username"
        'then syntax to pass User name in that field would be:
        'oHTMLDoc.all.username.Value="your user name".
        'Same way I have passed user name and password as below
        Set oHTMLDoc = .Document
        oHTMLDoc.all.Email.Value = Me.txtUsername.Text 'yourUserName
        oHTMLDoc.all.Passwd.Value = Me.txtPassword.Text 'YourPassword
         
        'after entering email id and password
        'we need to search the button to Sign in gmail.
        'For this also we need to check the ID or name of that
        'button by right clicking and seeing the code.
        'Once you get the Name of that button then use the
        'below code to click on that.
        'Here for loop is necessary because if it is not able
        'to find the control in first time then it will go
        'and look for another button on that page.
        'Whichever button it is finding with the name as
        '"signIn", it will click and for loop will end.
         
        For Each oHTML_Element In oHTMLDoc.getElementsByName("signIn")
            If oHTML_Element.Type = "submit" Then oHTML_Element.Click: Exit For
        Next
         
        Call Browser_Complete
        
        .Visible = True
    End With
    
ExitProc:
    Exit Sub

Err:
    MsgBox ("Error Occurred")
End Sub
  Some hints about how to deal with HTMLElements: Debug.Print objIE.document.getElementsByTagName(“p”)(0).innerHTML ‘ displays inner HTML of 1st p element on a page. Debug.Print objIE.document.getElementsByTagName(“p”)(4).textContent ‘ displays text content of 5th p element on a page. ‘clicks an input element that has a value equal to ‘Sign In’ For Each ele In objIE.document.getElementsByTagName(“input”) If ele.Value = “Sign In” then ele.Click: Exit For Next ‘gets the link element containing ‘wp-admin’ and navigates to it. For Each ele In objIE.document.getElementsByTagName(“a”) If InStr(ele.href, “wp-admin”) > 0 then objIE.navigate ele.href: Exit For Next    

Leave a Reply

Your email address will not be published. Required fields are marked *