Your data, converted to valid csv is saved in data.csv
:
PrimaryId,FirstName,LastName,City,CarName,DogName
100,John,Smith,NewYork,Toyota,Spike
100,John,Smith,NewYork,BMW,Spike
100,John,Smith,NewYork,Toyota,Rusty
100,John,Smith,NewYork,BMW,Rusty
101,Ben,Swan,Sydney,Volkswagen,Buddy
101,Ben,Swan,Sydney,Ford,Buddy
101,Ben,Swan,Sydney,Audi,Buddy
101,Ben,Swan,Sydney,Volkswagen,Max
101,Ben,Swan,Sydney,Ford,Max
101,Ben,Swan,Sydney,Audi,Max
102,Julia,Brown,London,Mini,Lucy
Using pandas to do the heavy lifting, and assuming this valid csv file, this is one way of doing what you want:
import json
import pandas as pd
df = pd.read_csv('data.csv')
def get_nested_rec(key, grp):
rec = {}
rec['PrimaryId'] = key[0]
rec['FirstName'] = key[1]
rec['LastName'] = key[2]
rec['City'] = key[3]
for field in ['CarName','DogName']:
rec[field] = list(grp[field].unique())
return rec
records = []
for key, grp in df.groupby(['PrimaryId','FirstName','LastName','City']):
rec = get_nested_rec(key, grp)
records.append(rec)
records = dict(data = records)
print(json.dumps(records, indent=4))
And the result:
{
"data": [
{
"City": "NewYork",
"FirstName": "John",
"PrimaryId": 100,
"LastName": "Smith",
"CarName": [
"Toyota",
"BMW"
],
"DogName": [
"Spike",
"Rusty"
]
},
{
"City": "Sydney",
"FirstName": "Ben",
"PrimaryId": 101,
"LastName": "Swan",
"CarName": [
"Volkswagen",
"Ford",
"Audi"
],
"DogName": [
"Buddy",
"Max"
]
},
{
"City": "London",
"FirstName": "Julia",
"PrimaryId": 102,
"LastName": "Brown",
"CarName": [
"Mini"
],
"DogName": [
"Lucy"
]
}
]
}