问题
I want to scrape historical data of specific coins from the CoinMarketCap website using Python. I want as granular data as possible. I think the data comes from the following string :
 
I want to use BeautifulSoup to get this data and save it to a CSV file, but I have no clue how.
回答1:
It seems it uses JavaScript which use url
https://graphs.coinmarketcap.com/currencies/verge/1513624444000/1514229244000/
to get data as JOSN so you can easily get it as python dictionary
EDIT: it uses timestamp*1000 in url to get historical data. And it write in csv
import requests
import datetime
import csv
date1 = '2016.01.01'
date2 = '2017.01.01'
date1 = datetime.datetime.strptime(date1, '%Y.%m.%d')
date2 = datetime.datetime.strptime(date2, '%Y.%m.%d')
date1 = int(date1.timestamp() * 1000)
date2 = int(date2.timestamp() * 1000)
print('dates:', date1, date2)
print('-----')
url = 'https://graphs.coinmarketcap.com/currencies/verge/{}/{}/'.format(date1, date2)
response = requests.get(url)
data = response.json()
for key in data.keys():
    print('key:', key)
print('-----')
f = open('output.csv', 'w')
csv_writer = csv.writer(f)
row = ('date', 'price_btc', 'price_usd', 'volume_usd')
csv_writer.writerow(row)
for item1, item2, item3 in zip(data['price_btc'], data['price_usd'], data['volume_usd']): #[:10]:
    date = datetime.datetime.fromtimestamp(item1[0]//1000)
    date = date.strftime('%Y.%m.%d %H:%M:%S')
    print('date:', date)
    print(' btc:', item1[1])
    print(' usd:', item2[1])
    print(' vol:', item3[1])
    print('-----')
    row = (date, item1[1], item3[1], item3[1])
    csv_writer.writerow(row)
f.close()    
Part of result:
dates: 1451602800000 1483225200000
-----
key: market_cap_by_available_supply
key: price_btc
key: price_usd
key: volume_usd
-----
date: 2016.01.01 00:04:19
 btc: 3.00032e-08
 usd: 1.29262e-05
 vol: 548
-----
date: 2016.01.02 00:04:19
 btc: 2.1964e-08
 usd: 9.52678e-06
 vol: 246
-----
date: 2016.01.03 00:04:19
 btc: 2.67174e-08
 usd: 1.15926e-05
 vol: 2805
-----
EDIT: read in loop
import requests
import datetime
import csv
import webbrowser
def get_data(name, timestamp1, timestamp2, csv_writer):
    url = 'https://graphs.coinmarketcap.com/currencies/{}/{}/{}/'.format(name, timestamp1, timestamp2)
    response = requests.get(url)
    try:
        data = response.json()
    except Exception:
        with open('output.html', 'w') as f:
            f.write(response.text)
        webbrowser.open('output.html')
        exit()
    for item1, item2, item3 in zip(data['price_btc'], data['price_usd'], data['volume_usd']): #[:10]:
        date = datetime.datetime.fromtimestamp(item1[0]//1000)
        date = date.strftime('%Y.%m.%d %H:%M:%S')
        row = (date, item1[1], item3[1], item3[1])
        csv_writer.writerow(row)
def scrape(name, start_date, finish_date):
    f = open(name + '.csv', 'w')
    csv_writer = csv.writer(f)
    row = ('date', 'price_btc', 'price_usd', 'volume_usd')
    csv_writer.writerow(row)
    one_day = datetime.timedelta(days=1)
    start_date = datetime.datetime.strptime(start_date, '%Y.%m.%d')
    finish_date = datetime.datetime.strptime(finish_date, '%Y.%m.%d')
    date1 = start_date
    date2 = start_date + one_day
    while date1 < finish_date:
        print(name, date1, date2)
        date1_timestamp = int(date1.timestamp() * 1000)
        date2_timestamp = int(date2.timestamp() * 1000)
        get_data(name, date1_timestamp, date2_timestamp, csv_writer)
        date1 = date2
        date2 += one_day
    f.close()    
# --- main ---
scrape('verge', '2016.01.01', '2017.01.01')
scrape('bitcoin', '2016.01.01', '2017.01.01')
scrape('ethereum', '2016.01.01', '2017.01.01')
来源:https://stackoverflow.com/questions/47971406/scrape-highcharts-data-to-csv-file