Introductory Paper ¶

↑ Click on “Open in Colab” to execute this notebook live on Google Colaboratory.

In this Google Colab notebook, we provide step by step instructions on how to reproduce the results and figures presented in our paper (click here to view the paper on arXiv). All the results were obtained using the NERSC/Cori supercomputer at Lawerence Berkeley National Laboratory.

Getting Started ¶

The following command will load the HYPPO package and include it in the Python path of this Google Colaboratory notebook. We will also load all the product files made used in our paper, which are publicly available in this repository . While we will give instructions on how to redo the Hyperparameter Optimization from scratch, some data (the one used for the CT reconstruction application) are proprietary and cannot be shared at the moment.

[1]:

            %%capture
!pip install PyYAML==5.1
!git clone https://gitlab.com/hpo-uq/hyppo
!pip install -r hyppo/requirements.txt
!ln -s /content/hyppo/hyppo $(python -c "import pip; print(pip.__path__[0].rstrip('/pip'))")/
!git clone https://gitlab.com/hpo-uq/publications/paper-1 data

           

[ ]:

            import hyppo
evaluation = hyppo.extract('data/evaluation_only/logs/*_01.log',target_loss='outer',target_unc='MAD')
surrogate = hyppo.extract('data/surrogate/logs/*_01.log',target_loss='outer',target_unc='MAD')

           

Fig. 1: Visualizing uncertainties ¶

For time series prediction ¶

In this first figure, we show how uncertainty cna be visualized along with the predicted time series.

[ ]:

             import numpy
numpy.savetxt('samples.txt',[[30,40,5,20,1]],fmt='%i')

[ ]:

             import hyppo
config = {
    'trainer':'internal',
    'data' : {
        'dataset'     : 'generic',
        'data_path'   : 'temperature',
        'n_timestamp' : 100,
        'n_out'       : 1,
    },
    'model' : {
        'trial'    : 5,
        'library'  : 'pt',
        'dl_type'  : 'mlp',
        'update'   : True,
        'validate' : True,
    },
    'prms' : {
        'nevals' : 1,
        'names'  : ['epochs', 'batch','layers','nodes','dropout'],
        'mult'   : [       1,       5,       1,      5,     0.05],
        'xlow'   : [       5,      10,       1,      1,        1],
        'xup'    : [      30,      40,       6,     10,       19],
        'record' : 'samples.txt',
    },
    'uq' : {
        'uq_on'         : True,
        'uq_hpo'        : False,
        'uq_weights'    : [0.5, 0.5],
        'data_noise'    : 0.0,
        'data_type'     : 'ts',
        'dropout_masks' : 500,
    }
}
hyppo.inline_job(config)

            

[ ]:

             import hyppo
dicts = {**config['model'],**config['data']}
data = hyppo.get_data(**dicts)

            

2022-02-06 02:14:35,810 INFO ========================================
2022-02-06 02:14:35,818 INFO DATASET:
2022-02-06 02:14:35,843 INFO ----------------------------------------
2022-02-06 02:14:35,847 INFO    Loading dataset...
2022-02-06 02:14:35,888 INFO    Size of dataset: 3650
2022-02-06 02:14:35,894 INFO    Transform dataset...
2022-02-06 02:14:35,966 INFO    Data preparation completed!
2022-02-06 02:14:35,969 INFO ========================================

[ ]:

             import numpy
y_real_train  = data['train']['y_data'].squeeze()
y_real_valid  = data['valid']['y_data'].squeeze()
y_real_test   = data['test']['y_data'].squeeze()
y_real        = numpy.concatenate((y_real_train,y_real_valid))
test_indx     = numpy.size(y_real)
y_real        = numpy.concatenate((y_real, y_real_test))
x_pred        = numpy.asarray([i for i in range(test_indx,test_indx+numpy.size(y_real_test))])
out_path      = 'logs/output/evaluation_30_40_5_20_1'
pred_mean     = numpy.loadtxt('%s/pred_mean.txt' % out_path)
pred_var      = numpy.loadtxt('%s/pred_var.txt' % out_path)
uq_means      = numpy.loadtxt('%s/uq_means.txt' % out_path)
trained_means = numpy.loadtxt('%s/trained_means.txt' % out_path)

            

[ ]:

             import glob, numpy
import matplotlib.pyplot as plt
import matplotlib.patheffects as pe
plt.style.use('seaborn')
plt.figure(figsize=(6,4),dpi=200)
plt.plot(y_real,lw=0.5,zorder=1,label='Observed')
for i in range(uq_means.shape[1]):
    plt.plot(x_pred, uq_means[:,i],color='cyan',lw=0.1,alpha=0.1,zorder=1)
plt.fill_between(x_pred,
                 pred_mean-2*numpy.sqrt(pred_var),
                 pred_mean+2*numpy.sqrt(pred_var),color='green',alpha=0.4,zorder=2)
plt.fill_between(x_pred,
                 pred_mean.squeeze()-numpy.sqrt(pred_var.squeeze()),
                 pred_mean.squeeze()+numpy.sqrt(pred_var.squeeze()),color='yellow',alpha=0.5,zorder=3)
for i in range(trained_means.shape[1]):
    plt.plot(x_pred, trained_means[:,i],'k--',lw=0.5,zorder=4)
plt.plot(x_pred, pred_mean,'r-',lw=1,zorder=5)
plt.xlim([2600,3300])
plt.ylim([0,1])
plt.xlabel('Day')
plt.ylabel('Normalized Temperature')
plt.tight_layout()
plt.savefig('figure.pdf')

            

For image classification ¶

[ ]:

             classes = [ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]
loss = [0.55818635, 0.35699278, 0.6525674,  0.6731422,  0.61552846, 0.6370991,
 0.6203768,  0.7147306,  0.41948083, 0.53932667]
std = [0.23771548, 0.35699278, 0.24572021, 0.29641974, 0.31616145, 0.3629009,
 0.35618317, 0.2660244,  0.34038505, 0.2691552 ]

            

[ ]:

             import matplotlib.pyplot as plt
plt.style.use('seaborn')
plt.figure(figsize=(6,4),dpi=200)
plt.errorbar(classes,loss,yerr=std,color='indianred', fmt='.', ms=10, mfc='white', mew=1.5, lw=1, capsize=5,mec='black')
plt.axhline(max(loss),ls='dashed',color='black',lw=0.5)
plt.xlabel('Class Index')
plt.ylabel('Probability')
plt.xticks(numpy.arange(1,len(classes)+1))
plt.xlim(0.5,10.5)
plt.ylim(-0.1,1.1)
plt.tight_layout()
plt.savefig('classification')
plt.show()

            

Fig. 2 : HPO solutions ¶

[2]:

            import numpy
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
evaluation = evaluation.sort_values(by=['params'])
plt.style.use('seaborn')
plt.figure(figsize=(6,4),dpi=200)
plt.subplots_adjust(top=0.95,right=0.82)
plt.scatter(evaluation.loss,evaluation.stdev,c=evaluation.params,lw=0.1,alpha=0.5,s=50,cmap='plasma')
plt.xscale('log')
plt.yscale('log')
plt.xlim(xmin=3e-2,xmax=3e-1)
plt.ylim(ymin=1e-4,ymax=1)
plt.xticks([0.03,0.1,0.3])
plt.gca().xaxis.set_major_formatter(FormatStrFormatter('%.2f'))
plt.minorticks_off()
plt.xlabel('Loss')
plt.ylabel(r'1-$\mathrm{\sigma}$ Deviation')
cax = plt.axes([0.83, 0.125, 0.03, 0.823])
cbar = plt.colorbar(cax=cax)
cbar.set_ticks([2e3,4e3,6e3,8e3,10e3,12e3])
cbar.set_ticklabels(['2k','4k','6k','8k','10k','12k'])
cbar.set_alpha(1)
cbar.draw_all()
cbar.set_label('Number of trainable parameters')
plt.savefig('scatter_loss.pdf')
plt.show()

           

Fig. 3 : Convergence plot ¶

[ ]:

            import numpy
# Sort evaluations only array by loss
sort_idxs = numpy.argsort(evaluation[:,1])[::-1]
loss = evaluation[sort_idxs,1]
sdev = evaluation[sort_idxs,2]
sets = evaluation[sort_idxs,3:]
# Sort surrogate results by number of samples used
sort_idxs = numpy.argsort(surrogate[:,-1])
loss_surr, sdev_surr = [], []
for i in sort_idxs:
    if len(loss_surr)==0 or surrogate[i,1]<min(loss_surr):
        loss_surr.append(surrogate[i,1])
        sdev_surr.append(surrogate[i,2])
        print(surrogate[i,1],surrogate[i,2])
    else:
        loss_surr.append(min(loss_surr))
        sdev_surr.append(sdev_surr[-1])
loss_surr = numpy.array(loss_surr)
sdev_surr = numpy.array(sdev_surr)

           

0.14256 0.07209
0.08837 0.07267
0.04988 0.01938
0.04824 0.01294
0.04274 0.00858
0.04097 0.00361
0.03815 0.00242
0.03765 0.0018
0.03472 0.01358
0.03469 0.00053

Out of the evaluation-only run, we picked 10 bad evaluations, i.e. with high loss, from the first 5% of the list sorted by loss. The random selection was done as follows:

           import random
idxs = random.sample(range(len(evaluation)//20), 10)

where idxs corresponds to the list of indices of the 10 bad evaluations. The random indices used in our work was the following:

[ ]:

            idxs = [7, 12, 37, 17, 18, 24, 4, 5, 11, 20]

           

Below we show the sample values along with the loss and standard deviation obtained for each hyperparameter set:

[ ]:

            for i in idxs:
    print(sets[i],loss[i],sdev[i])

[ 6. 38.  2.  2.  7.  0.] 0.18929 0.19847
[ 8. 15.  1.  5.  9.  0.] 0.16859 0.18847
[ 5. 33.  2.  8. 16.  0.] 0.10877 0.0513
[20. 26.  1.  1. 16.  0.] 0.13813 0.09657
[15. 37.  2.  1.  4.  0.] 0.13775 0.14956
[13. 35.  5.  1. 15.  0.] 0.12743 0.12746
[ 7. 39.  2.  2. 15.  0.] 0.2297 0.13342
[ 5. 25.  5.  2. 18.  0.] 0.22418 0.21816
[ 9. 19.  2.  1.  8.  0.] 0.17347 0.17497
[ 9. 25.  1.  5. 18.  0.] 0.13649 0.09254

[ ]:

            import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
plt.style.use('seaborn')
plt.figure(figsize=(6,4),dpi=200)
plt.plot(loss,color='slateblue',zorder=1,label='Random Sampling')
plt.fill_between(range(len(loss)),loss-sdev/2,loss+sdev/2,alpha=0.5,color='slateblue')
plt.plot(loss_surr,color='orange',zorder=1,label='Surrogate Modeling')
plt.fill_between(range(len(loss_surr)),loss_surr-sdev_surr/2,loss_surr+sdev_surr/2,alpha=0.5,color='orange')
plt.scatter(idxs,loss[idxs],color='white',s=20,zorder=2,edgecolors='tomato',lw=2)
plt.axhline(min(loss),ls='dashed',color='black',lw=1)
plt.scatter(len(loss),min(loss),s=20,color='black',zorder=2)
plt.scatter(numpy.argwhere(loss_surr<min(loss))[0,0],min(loss),s=20,color='black',zorder=2)
plt.xscale('log')
plt.yscale('log')
plt.yticks([0.03,0.1,0.3])
plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
plt.xlabel('Index of function evaluations')
plt.ylabel('Loss')
plt.legend(loc='upper right')
plt.tight_layout()
plt.savefig('compare.pdf')
plt.show()

           

Fig. 4: HYPPO vs. DeepHyper ¶

Fig. 7 : Performance plot ¶

[ ]:

            performance.min(),performance.max()

           

(236.876, 12826.283)

[ ]:

            import numpy
performance = numpy.loadtxt('data/performance.txt')
print('Maximum speedup: %.2f' % (performance.max()/performance.min()))

           

Maximum speedup: 54.15

[ ]:

            import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from matplotlib.ticker import FormatStrFormatter
import seaborn as sns
plt.style.use('seaborn')
plt.figure(figsize=(6,4),dpi=200)
plt.imshow(performance[0,0]/performance[::-1],extent=[1,6,1,16],
           interpolation='bicubic',cmap='icefire_r',aspect='auto',norm=LogNorm(),
           vmin=1,vmax=100)
plt.yticks([1,2,4,8,16])
cbar = plt.colorbar(ticks=[1,10,100],pad=0.01)
cbar.set_label('HPO Speedup')
plt.xlabel('SLURM tasks')
plt.ylabel('SLURM steps')
plt.tight_layout()
plt.savefig('speedup.pdf')
plt.show()

           

Fig. 8 : CT scatter plot ¶

[ ]:

            import numpy
results = numpy.loadtxt('data/ct_results.txt')[:,-3:]

[ ]:

            import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from matplotlib.colors import LogNorm
idxs = numpy.argsort(results[:,0])
plt.style.use('seaborn')
plt.figure(figsize=(6,4),dpi=200)
plt.subplots_adjust(top=0.95,right=0.82)
plt.scatter(results[idxs,1],results[idxs,2],c=results[idxs,0],lw=0.1,alpha=0.7,s=100,cmap='plasma', norm=LogNorm())
plt.xscale('log')
plt.yscale('log')
plt.xlabel('Loss')
plt.ylabel(r'1-$\mathrm{\sigma}$ Deviation')
cax = plt.axes([0.83, 0.125, 0.03, 0.823])
cbar = plt.colorbar(cax=cax)
cbar.set_ticks([2e4,5e4,1e5,2e5,3e5,5e5])
cbar.set_ticklabels(['20k','50k','100k','200k','300k','500k'])
cbar.set_alpha(1)
cbar.draw_all()
cbar.set_label('Number of trainable parameters')
plt.savefig('ct_scatterplot.pdf')
plt.show()