I made a little test case to compare YAML and JSON speed :
import json
import yaml
from datetime import datetime
from random import randint
NB_ROW=1024
pri
For reference, I compared a couple of human-readable formats and indeed Python's yaml reader is by far the slowest. (Note the log-scaling in the below plot.) If you're looking for speed, you want one of the JSON loaders, e.g., orjson:
Code to reproduce the plot:
import numpy
import perfplot
import json
import ujson
import orjson
import toml
import yaml
from yaml import Loader, CLoader
import pandas
def setup(n):
numpy.random.seed(0)
data = numpy.random.rand(n, 3)
with open("out.yml", "w") as f:
yaml.dump(data.tolist(), f)
with open("out.json", "w") as f:
json.dump(data.tolist(), f, indent=4)
with open("out.dat", "w") as f:
numpy.savetxt(f, data)
with open("out.toml", "w") as f:
toml.dump({"data": data.tolist()}, f)
def yaml_python(arr):
with open("out.yml", "r") as f:
out = yaml.load(f, Loader=Loader)
return out
def yaml_c(arr):
with open("out.yml", "r") as f:
out = yaml.load(f, Loader=CLoader)
return out
def json_load(arr):
with open("out.json", "r") as f:
out = json.load(f)
return out
def ujson_load(arr):
with open("out.json", "r") as f:
out = ujson.load(f)
return out
def orjson_load(arr):
with open("out.json", "rb") as f:
out = orjson.loads(f.read())
return out
def loadtxt(arr):
with open("out.dat", "r") as f:
out = numpy.loadtxt(f)
return out
def pandas_read(arr):
out = pandas.read_csv("out.dat", header=None, sep=" ")
return out.values
def toml_load(arr):
with open("out.toml", "r") as f:
out = toml.load(f)
return out["data"]
perfplot.save(
"out.png",
setup=setup,
kernels=[
yaml_python,
yaml_c,
json_load,
loadtxt,
pandas_read,
toml_load,
ujson_load,
orjson_load,
],
n_range=[2 ** k for k in range(18)],
)