问题
I try to get ARIMA configuration some faster that I acctually do.
So I use a Iterate method to compare all ARIMA combinations to select better. For that I create a function to Iterate:
def difference(dataset, interval=1):
diff = list()
for i in range(interval, len(dataset)):
value = dataset[i] - dataset[i - interval]
diff.append(value)
return np.array(diff)
# invert differenced value
def inverse_difference(history, yhat, interval=1):
return yhat + history[-interval]
# evaluate an ARIMA model for a given order (p,d,q) and return RMSE
def evaluate_arima_model(dataset, arima_order):
dataset = dataset.astype('float32')
train_size = int(len(dataset) * 0.50)
train, test = dataset[0:train_size], dataset[train_size:]
history = [x for x in train]
# make predictions
predictions = list()
for t in range(len(test)):
# difference data
months_in_year = maxlength
diff = difference(history, months_in_year)
model = ARIMA(diff, order=arima_order)
model_fit = model.fit(trend='nc', disp=0)
yhat = model_fit.forecast()[0]
yhat = inverse_difference(history, yhat, months_in_year)
predictions.append(yhat)
history.append(test[t])
# calculate out of sample error
mse = mean_squared_error(test, predictions)
rmse = sqrt(mse)
return rmse
Actually I do that in minutes with this method. But isn't good time for an API where I gonna use the logic.
# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(dataset, p_values, d_values, q_values):
dataset = dataset.astype('float32')
train_size = int(len(dataset) * 0.50)
train, test = dataset[0:train_size], dataset[train_size:]
global best_score, best_cfg
best_score, best_cfg = float("inf"), None
for p in p_values:
for d in d_values:
for q in q_values:
order = (p,d,q)
try:
mse = evaluate_arima_model(dataset, order)
if mse < best_score:
best_score, best_cfg = mse, order
print('ARIMA%s RMSE=%.3f' % (order,mse))
except:
continue
# print(best_cfg, best_score)
print('Best ARIMA%s RMSE=%.3f' % (best_cfg, best_score))
# evaluate parameters
p_values = range(0, 7)
d_values = range(0, 3)
q_values = range(0, 7)
warnings.filterwarnings("ignore")
evaluate_models(data_train.values, p_values, d_values, q_values)
To acelerate the process I want use Multiprocessing method iterating evaluate_arima_model function. But ProcessPoolExecutor don't work because don't print any result
# evaluate combinations of p, d and q values for an ARIMA model
orders = []
def fill_orders( p_values, d_values, q_values):
for p in p_values:
for d in d_values:
for q in q_values:
order = (p,d,q)
orders.append(order)
# fill orders array
p_values = range(0, 7)
d_values = range(0, 3)
q_values = range(0, 7)
warnings.filterwarnings("ignore")
fill_orders(p_values, d_values, q_values)
with concurrent.futures.ProcessPoolExecutor() as executor:
results = [executor.submit(evaluate_arima_model, (dataset, order)) for order in orders]
for f in concurrent.futures.as_completed(results):
print(f.result())
try:
f.result()
except:
continue
else:
print(f.result())
回答1:
I wouldn't expect the second block of code you've shown to do anything. For this code:
# evaluate combinations of p, d and q values for an ARIMA model
orders = []
def evaluate_models( p_values, d_values, q_values):
for p in p_values:
for d in d_values:
for q in q_values:
order = (p,d,q)
orders.append(order)
with concurrent.futures.ProcessPoolExecutor() as executor:
results = [executor.submit(evaluate_arima_model, (dataset, order)) for order in orders]
for f in concurrent.futures.as_completed(results):
print(f.result())
try:
f.result()
except:
continue
else:
print(f.result())
orders will always be empty because you are declaring it so and then never calling evaluate_models, or anything else that could be putting objects into orders. Since orders is empty, no processes will be registered to run, and results will also be empty, and so this code won't do anything. Do you mean to call evaluate_models before you do with concurrent.futures....?
来源:https://stackoverflow.com/questions/65066091/processpoolexecutor-dont-execute