Given a list of people with their birth and end years (all between 1900
and 2000
), find the year with the most number of people alive.
Here
We can also use numpy slicing, which is quite neat, and should also be quite efficient:
import numpy as np
from collections import namedtuple
Person = namedtuple('Person', ('birth', 'death'))
people = [Person(1900,2000), Person(1950,1960), Person(1955, 1959)]
START_YEAR = 1900
END_YEAR = 2000
people_alive = np.zeros(END_YEAR - START_YEAR + 1) # Alive each year
for p in people:
a = p.birth - START_YEAR
b = p.death - START_YEAR + 1 # include year of death
people_alive[a:b] += 1
# Find indexes of maximum aliveness and convert to year
most_alive = np.flatnonzero(people_alive == people_alive.max()) + START_YEAR
EDIT It seems like the namedtuple adds a bit of overhead, so to speed up a bit more, remove the namedtuple and do
for birth, death in people:
instead.
Without importing anything, and using a class for readability, here's my solution. Let me know what you think! I also made a separate function for getMaxBirthYear in case you're at an interview and someone wants you to code that out rather than using built in functions (I used them :) )
class Person:
def __init__(self, birth=None, death=None):
self.birth=birth
self.death=death
def getPopulationPeak(people):
maxBirthYear = getMaxBirthYear(people)
deltas = getDeltas(people, maxBirthYear)
currentSum = 0
maxSum = 0
maxYear = 0
for year in sorted(deltas.keys()):
currentSum += deltas[year]
if currentSum > maxSum:
maxSum = currentSum
maxYear = year
return maxYear, maxSum
def getMaxBirthYear(people):
return max(people, key=lambda x: x.birth).birth
def getDeltas(people, maxBirthYear):
deltas = dict()
for person in people:
if person.birth in deltas.keys():
deltas[person.birth] += 1
else:
deltas[person.birth] = 1
if person.death + 1 in deltas.keys():
deltas[person.death + 1] -= 1
elif person.death + 1 not in deltas.keys() and person.death <= maxBirthYear: # We can skip deaths after the last birth year
deltas[person.death + 1] = -1
return deltas
testPeople = [
Person(1750,1802),
Person(2000,2010),
Person(1645,1760),
Person(1985,2002),
Person(2000,2050),
Person(2005,2080),
]
print(getPopulationPeak(testPeople))
I would go like this:
unborn
list)alive
listalive
list that dies first, remove it from the list.alive
list in a dictunborn
and alive
lists are emptyComplexity should be around O((m + n) * log(m))
(each year is considered only once, and each person only twice, multiplied by the insertion cost in the alive
list)
from bisect import insort
def most_populated(population, single=True):
years = dict()
unborn = sorted(population, key=lambda x: -x[0])
alive = []
dead = []
for year in range(unborn[-1][0], max(population, key=lambda x: x[1])[1] + 1):
while unborn and unborn[-1][0] == year:
insort(alive, -unborn.pop()[1])
while alive and alive[-1] == -(year - 1):
dead.append(-alive.pop())
years[year] = len(alive)
return max(years, key=years.get) if single else \
[key for key, val in years.iteritems() if val == max(years.values())]
Follow the 'maxAlive' an 'theYear' to get the first year with the highest number
years = {}
for p in people:
if p.birth in years:
years[p.birth] += 1
else:
years[p.birth] = 1
if p.death in years:
years[p.death] -= 1
else:
years[p.death] = -1
alive = 0
maxAlive = 0
theYear = people[0].birth
for year in sorted(years):
alive += years[year]
if alive > maxAlive:
maxAlive = alive
theYear = year
>>> from collections import Counter
>>> from itertools import chain
>>> def most_pop(pop):
... pop_flat = chain.from_iterable(range(i,j+1) for i,j in pop)
... return Counter(pop_flat).most_common()
...
>>> most_pop([(1920, 1939), (1911, 1944), (1920, 1955), (1938, 1939)])[0]
I came over the following code that is exactly what you need.
Let's say the range of years is 1900 - 2000
def year_with_max_population(people):
population_changes = [0 for _ in xrange(1900, 2000)]
for person in people:
population_changes[person.birth_year - 1900] += 1
population_changes[person.death_year - 1900] -= 1
max_population = 0
max_population_index = 0
population = 0
for index, population_change in enumerate(population_changes):
population += population_change
if population > max_population:
max_population = population
max_population_index = index
return 1900 + max_population_index
credit 'Brian Schmitz' here