|
| 1 | +#!/bin/bash |
| 2 | +### Job Name (will be used as prefix later on!) |
| 3 | +#SBATCH --job-name="ICAR_tst" |
| 4 | +#SBATCH --nodes=1 |
| 5 | +#SBATCH --ntasks-per-node=32 |
| 6 | +#SBATCH --time=00:05:00 |
| 7 | +#SBATCH --constraint=haswell |
| 8 | +#SBATCH --qos=debug |
| 9 | +### Project code |
| 10 | +#SBATCH --account=m4062 |
| 11 | +### error and output files in separate folder, name with jobid (%x) an job name (%j) |
| 12 | +### N.B: create the job_output folder before submitting this job! |
| 13 | +#SBATCH --output=job_output/log-%x.%j.out |
| 14 | +#SBATCH --error=job_output/log-%x.%j.err |
| 15 | + |
| 16 | +# Make sure a python environment with xarray is available: |
| 17 | +module load python |
| 18 | +conda activate myenv |
| 19 | + |
| 20 | +# Set OpenMP variables |
| 21 | +export OMP_NUM_THREADS=1 |
| 22 | +# export MP_TASK_AFFINITY=core:$OMP_NUM_THREADS |
| 23 | + |
| 24 | +# the easy way |
| 25 | +# icar icar_options.nml |
| 26 | + |
| 27 | +# the complex way (allows a continuous sequence of jobs) |
| 28 | +PREFIX=tst ##$SBATCH_JOB_NAME |
| 29 | + |
| 30 | +# it is useful to keep all other filenames relative to $PREFIX |
| 31 | +# note that this is not required anywhere though |
| 32 | +OUTDIR=$PREFIX |
| 33 | +OPTFILE=options.nml #${PREFIX}_options.nml |
| 34 | +BATCHFILE=batch_submit_SLURM.sh #${PREFIX}_batch_submit.sh |
| 35 | +TEMPLATE=${PREFIX}_template.nml |
| 36 | + |
| 37 | +# the ICAR executable to use |
| 38 | +EXE=$HOME/bin/icar_dbs |
| 39 | + |
| 40 | +# load any environmental settings to run icar properly (system dependent): |
| 41 | +. /global/cfs/cdirs/m4062/env_scripts/UO-GNU-env.sh |
| 42 | + |
| 43 | + |
| 44 | +# various useful helper scripts (SETUP_RUN is critical) |
| 45 | +SETUP_RUN=${HOME}/icar/helpers/setup_next_run.py |
| 46 | +MAKE_TEMPLATE=${HOME}/icar/helpers/make_template.py |
| 47 | +MKOUTDIR=mkdir #<user_defined_path>/mkscratch.py # mkscratch creates the directory on scratch and links to it |
| 48 | + |
| 49 | + |
| 50 | + |
| 51 | +# -------------------------------------------------- |
| 52 | +# SHOULD NOT NEED TO MODIFY ANYTHING BELOW THIS LINE |
| 53 | +# -------------------------------------------------- |
| 54 | + |
| 55 | +# if the template file doesn't exist yet, make it |
| 56 | +if [[ ! -e $TEMPLATE ]]; then |
| 57 | + $MAKE_TEMPLATE $OPTFILE $TEMPLATE > job_output/py_mktemp.out |
| 58 | +fi |
| 59 | + |
| 60 | +# # if the output directory doesn't exist, create it |
| 61 | +# if [[ ! -e $OUTDIR ]]; then |
| 62 | +# $MKOUTDIR $OUTDIR |
| 63 | +# fi |
| 64 | + |
| 65 | +# if we didn't finish yet we have to continue -BK: but we print this in line 87, so 2 jobs max? |
| 66 | +if [[ ! -e ${PREFIX}_finished ]]; then |
| 67 | + # first submit the next job dependant on this one |
| 68 | + # sub -w "ended(${PBS_JOBID})" < $BATCHFILE |
| 69 | + # qsub -W depend=afterany:${PBS_JOBID} ${BATCHFILE} ## PBS version |
| 70 | + sbatch --dependency=afternotok:$SLURM_JOB_ID ${BATCHFILE} |
| 71 | + |
| 72 | + # if we have run before, setup the appropriate restart options |
| 73 | + if [[ -e ${PREFIX}_running ]]; then |
| 74 | + # echo "setting up next run (setup_next_run.py)" |
| 75 | + $SETUP_RUN $OPTFILE $TEMPLATE > job_output/py_setup.out |
| 76 | + fi |
| 77 | + |
| 78 | + # declare that we have run before so the next job will know |
| 79 | + touch ${PREFIX}_running |
| 80 | + |
| 81 | + # run the actual executable (e.g. icar options.nml) |
| 82 | + # cafrun -n 36 $EXE $OPTFILE > job_output/icar_$SLURM_JOB_ID.out |
| 83 | + cafrun -n 36 $EXE $OPTFILE >> job_output/icar.out ### if you prefer one log file for the icar output |
| 84 | + |
| 85 | + # typically the job will get killed while icar is running |
| 86 | + # but for some reason bkilling the job still lets it touch _finished... |
| 87 | + # maybe this will give it a chance to really kill it first? |
| 88 | + sleep 10 |
| 89 | + |
| 90 | + # if icar completes, we are done, tell the next job that we finished |
| 91 | + # BK dont understand this: wont it prevent the next (or after-next job from starting (ln 63)) |
| 92 | + touch ${PREFIX}_finished |
| 93 | +else |
| 94 | + # if the last job ran to completion, delete the inter-job communication files and exit |
| 95 | + rm ${PREFIX}_running |
| 96 | + rm ${PREFIX}_finished |
| 97 | +fi |
0 commit comments