How to scrape phone no using python when it show after clicked

烂漫一生 提交于 2020-03-12 06:46:08

问题


I want to scrape phone no but phone no only displays after clicked so please is it possible to scrape phone no directly using python?My code scrape phone no but with starr***. here is the link from where I want to scrape phone no:https://hipages.com.au/connect/abcelectricservicespl/service/126298 please guide me! here is my code:

import requests
from bs4 import BeautifulSoup


def get_page(url):
    response = requests.get(url)

    if not response.ok:
        print('server responded:', response.status_code)
    else:
        soup = BeautifulSoup(response.text, 'lxml')
    return soup

def get_detail_data(soup):

    try:
        title = (soup.find('h1', class_="sc-AykKI",id=False).text)
    except:
        title = 'Empty Title'
    print(title)

    try:
        contact_person = (soup.findAll('span', class_="Contact__Item-sc-1giw2l4-2 kBpGee",id=False)[0].text)
    except:
        contact_person = 'Empty Person'
    print(contact_person)

    try:
        location = (soup.findAll('span', class_="Contact__Item-sc-1giw2l4-2 kBpGee",id=False)[1].text)
    except:
        location = 'Empty location'
    print(location)

    try:
        cell = (soup.findAll('span', class_="Contact__Item-sc-1giw2l4-2 kBpGee",id=False)[2].text)
    except:
        cell = 'Empty Cell No'
    print(cell)

    try:
        phone = (soup.findAll('span', class_="Contact__Item-sc-1giw2l4-2 kBpGee",id=False)[3].text)
    except:
        phone = 'Empty Phone No'
    print(phone)

    try:
        Verify_ABN = (soup.find('p', class_="sc-AykKI").text)
    except:
        Verify_ABN = 'Empty Verify_ABN'
    print(Verify_ABN)

    try:
        ABN = (soup.find('div', class_="box__Box-sc-1u3aqjl-0").find('a'))
    except:
        ABN = 'Empty ABN'
    print(ABN)



def main():
    #get data of detail page
    url = "https://hipages.com.au/connect/abcelectricservicespl/service/126298"
    #get_page(url)
    get_detail_data(get_page(url))



if __name__ == '__main__':
    main()

回答1:


import requests
from bs4 import BeautifulSoup
import re


def Main():
    r = requests.get(
        "https://hipages.com.au/connect/abcelectricservicespl/service/126298")
    soup = BeautifulSoup(r.text, 'html.parser')
    name = soup.find("h1", {'class': 'sc-AykKI'}).text
    print(name)
    person = soup.find(
        "span", {'class': 'Contact__Item-sc-1giw2l4-2 kBpGee'}).text.strip()
    print(person)
    addr = soup.findAll(
        "span", {'class': 'Contact__Item-sc-1giw2l4-2 kBpGee'})[1].text
    print(addr)
    print(re.search('phone\\\\":\\\\"(.*?)\\\\"', r.text).group(1))
    print(re.search('mobile\\\\":\\\\"(.*?)\\\\"', r.text).group(1))
    print(re.search('abn\\\\":\\\\"(.*?)\\\\"', r.text).group(1))
    print(re.search('website\\\\":\\\\"(.*?)\\\\"', r.text).group(1))


Main()

Output:

ABC Electric Services p/l
Mal
222 Henry Lawson DRV, Georges Hall NSW 2198
1800 801 828
0408 600 950
37137808989
www.abcelectricservices.com.au

Or if you would like to parse the full script:

import requests
from bs4 import BeautifulSoup
import pyjsparser
import json
import re


def Main():
    r = requests.get(
        "https://hipages.com.au/connect/abcelectricservicespl/service/126298")
    soup = BeautifulSoup(r.text, 'html.parser')
    phone = soup.findAll("script")[5]
    tree = pyjsparser.parse(phone.text)
    print(json.loads(tree["body"][0]["expression"]["right"]["value"]))


Main()

Another version:

import requests
from bs4 import BeautifulSoup
import re
import json


def Main():
    r = requests.get(
        "https://hipages.com.au/connect/abcelectricservicespl/service/126298")
    soup = BeautifulSoup(r.text, 'html.parser')
    data = soup.findAll("script")[5].text
    source = re.search(r'__INITIAL_STATE__\s*=\s*"({.*})', data).group(1)
    kuku = json.loads(re.sub('(?<!\\\)\\\\"', '"', source))
    print(json.dumps(kuku, indent=4))


Main()



回答2:


Phone number exists in page source already. There is a script in page source starting with window.__INITIAL_STATE__, it contains an object having data against multiple providers so you can get phone number for all of them from here or simply load this object in json and on basis of store as a key, get phone number against that store



来源:https://stackoverflow.com/questions/60520118/how-to-scrape-phone-no-using-python-when-it-show-after-clicked

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!