fit (triple-) gauss to data python

让人想犯罪 __ 提交于 2019-12-05 14:13:06

Having a link to actual data would be helpful, but I can make a few recommendations without the data.

First, converting x to np.log(x) is so easy that it is probably worth the effort.

Second, the definition for Gaussian doesn't normally include 1./x -- it might be a small effect, but your values of x are changing by an order of magnitude, so maybe not.

Third, you're providing the same starting value for mu for all three Gaussians. This makes the fit much harder. Try to give starting points that are closer to the actual expected values, and if possible bounds on those values.

To help address these points, you might find lmfit (https://lmfit.github.io/lmfit-py/) helpful. It will definitely make your script shorter, perhaps something like

import numpy as np
import matplotlib.pyplot as plt
from lmfit.models import GaussianModel

y, bin_edges = np.histogram(Density, bins=np.logspace(np.log10(MIN), np.log10(MAX), 32))

x = np.log((bin_edges[:-1] + bin_edges[1:])/2.0) #take log here

# build a model as a sum of 3 Gaussians
model = (GaussianModel(prefix='g1_') + GaussianModel(prefix='g2_') + 
         GaussianModel(prefix='g3_'))

# build Parameters with initial values
params = model.make_params(g1_amplitude=60, g1_center=-1.0, g1_sigma=1,
                           g2_amplitude=30, g2_center= 0.0, g1_sigma=1,
                           g2_amplitude=10, g2_center= 1.0, g1_sigma=1)

# optionally, set bound / constraints on Parameters:
params['g1_center'].max = 0

params['g2_center'].min = -1.0
params['g2_center'].max = 1.0

params['g3_center'].min = 0

# perform the actual fit
result = model.fit(y, params, x=x)

# print fit statistics and values and uncertainties for variables
print(result.fit_report())

# evaluate the model components ('g1_', 'g2_', and 'g3_')
comps = result.eval_components(result.params, x=x)

# plot the results
plt.plot(x, y, label='data')
plt.plot(x, result.best_fit, label='best fit')

plt.plot(x, comps['g1_'], label='gaussian1')
plt.plot(x, comps['g2_'], label='gaussian2')
plt.plot(x, comps['g3_'], label='gaussian3')
# other plt methods for axes and labels
plt.show()

If your model really needs (1/x) times a Gaussian, or you need a different functional form. You could use the built-in LognormalModel, one of the other built-in Models, or easily write your own model function and wrap that.

hope that helps.

For your specific case there is no difference between summing up three Gaussian or the mixed model, latter one only taking care that the norm is kept. Basically, I just simplified and cleaned up your version. It runs nicely, but be aware that the results depend on the number of bins quite significantly.

import matplotlib.pyplot as plt
import numpy as np
import scipy.optimize as so

data = np.loadtxt( "data.txt" )
myBins = np.logspace( np.log10( min( data ) ), np.log10( max( data ) ), 35 )

""" as we are logarithmic I calculate the bin 'centre' logarithmic as well """
xBins = np.fromiter( ( ( 10**( np.log10( x * y ) / 2. ) ) for x,y in zip( myBins[:-1], myBins[1:] ) ), np.float ) 
vals, bins = np.histogram( data, myBins )

def chunks( l, n ):
    """Yield successive n-sized chunks from l."""
    for i in range( 0, len( l ), n ):
        yield l[ i:i + n ]


"""  I use a simplified version without the 1/x """
def my_gauss( x, c, mu, sig ):
    #~ out = c * np.exp( - ( np.log( x ) - mu )**2.0 / (2.0 * sig**2.0 ) ) * np.divide( 1, x )
    out = c * np.exp( - ( np.log( x ) - mu )**2.0 / (2.0 * sig**2.0 ) )
    return out


def triple_residuals( params, xData, yData ):
    yTh = np.zeros_like( yData, dtype=np.float )
    for params in chunks( params, 3 ) :
        yTh += np.fromiter( ( my_gauss( x, *params ) for x in xData ), np.float )
    diff = yData - yTh
    return diff


sol, err = so.leastsq( triple_residuals, [ 40, -2.1, 1.1, 10, -0.1, 1.1, 10, 2.1, 1.1 ], args=( xBins, vals )  )


myxList = np.logspace( np.log10( min( data ) ), np.log10( max( data ) ), 150 )

""" for guessing start values """
#~ myg1List = np.fromiter( ( my_gauss( x, 40, -2.1, 1.1 ) for x in myxList ), np.float )
#~ myg2List = np.fromiter( ( my_gauss( x, 20, -0.1, 1.2 ) for x in myxList ), np.float )
#~ myg3List = np.fromiter( ( my_gauss( x, 10, 2.1, 1.3 ) for x in myxList ), np.float )


fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1)
ax.plot( bins[:-1], vals )

""" for plotting start values """
#~ ax.plot( myxList,  myg1List )
#~ ax.plot( myxList,  myg2List )
#~ ax.plot( myxList,  myg3List )

gs = dict()
for i,params in enumerate( chunks( sol, 3) ) :
    print params
    gs[i] = np.fromiter( ( my_gauss( x, *params ) for x in myxList ), np.float )
    ax.plot( myxList,  gs[i], ls='--' )

gsAll = gs[0] + gs[1] + gs[2]
ax.plot( myxList,  gsAll, lw=3 )

ax.set_xscale('log')
plt.show()

and provides:

>>[58.91221784 -2.1544611   0.89842033]
>>[21.29816862  0.13135854  0.80339236]
>>[5.44419833 2.42596666 0.85324204]

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!