SalvusFlow Example Site Configurations

This is a collection of some example SalvusFlow site configurations for a few real sites around the world. The configurations originate either from us or some users who shared them.

Please keep in mind that we cannot continuously test these so they might be slightly out of date but nonetheless they can still serve as guidance to configure your own sites.

Slurm Example Sites

Piz Daint, CSCS, Switzerland

https://www.cscs.ch/computers/piz-daint/

Copy
[sites.piz_daint]
    site_type = "slurm"
    default_ranks = 12
    max_ranks = 10000
    salvus_binary = "/users/{USER}/Salvus/bin/salvus"
    run_directory = "/scratch/snx3000/{USER}/salvus_flow/run"
    tmp_directory = "/scratch/snx3000/{USER}/salvus_flow/tmp"
    # Daint for some reason does not appear to set the LD_LIBRARY_PATH
    # when loading amodule.
    [[sites.piz_daint.environment_variable]]
        name = "LD_LIBRARY_PATH"
        value = "/opt/cray/pe/mpt/7.7.2/gni/mpich-gnu-abi/5.1/lib"
    [sites.piz_daint.ssh_settings]
        hostname = "daint.cscs.ch"
        username = "{USER}"
    [sites.piz_daint.site_specific]
        tasks_per_node = 12
        partition = "normal"
        debug_partition = "debug"
        path_to_slurm_binaries = "/opt/slurm/17.11.12.cscs/bin"
        # These are account/project dependent!
        [[sites.piz_daint.site_specific.additional_sbatch_arguments]]
            name = "constraint"
            value = "gpu"
        [[sites.piz_daint.site_specific.additional_sbatch_arguments]]
            name = "account"
            value = "{ACCOUT}"
        [[sites.piz_daint.site_specific.modules_to_switch]]
            old = "PrgEnv-cray"
            new = "PrgEnv-gnu"
        [[sites.piz_daint.site_specific.modules_to_switch]]
            old = "cray-mpich"
            new = "cray-mpich-abi"

Eejit, Geosciences, University of Utrecht, Netherlands

[sites.eejit]
    site_type = "slurm"
    default_ranks = 24
    max_ranks = 1008
    salvus_binary = "/quanta1/home/{USER}/Salvus/bin/salvus"
    run_directory = "/scratch/{USER}/SalvusFlow/run"
    tmp_directory = "/scratch/{USER}/SalvusFlow/tmp"
    # Use the MPI from the Mondaic downloader.
    [[sites.eejit.environment_variable]]
        name = "LD_LIBRARY_PATH"
        value = "/quanta1/home/{USER}/Salvus/lib"
    [sites.eejit.ssh_settings]
        hostname = "eejit.geo.uu.nl"
        username = "{USER}"
    [sites.eejit.site_specific]
        tasks_per_node = 24
        partition = "gpu"
        path_to_slurm_binaries = "/usr/bin"
        omit_default_srun_arguments = true
        # Use Salvus' MPI.
        replace_srun_with = "/quanta1/home/{USER}/Salvus/bin/mpirun"
        [[sites.eejit.site_specific.additional_srun_arguments]]
            name = "n"
            value = "$SLURM_NTASKS"

PBS Example Sites

Raijin, NCI, Australia

https://opus.nci.org.au/display/Help/Raijin+User+Guide

[sites.raijin]
    site_type = "pbs"
    # This depends on the chosen raijin queue.
    default_ranks = 16
    max_ranks = 800
    # Adapt these to your folders!
    salvus_binary = "/home/{NUMBER}/{USER}/Salvus/bin/salvus"
    run_directory = "/short/{PROJECT}/{USER}/salvus_flow/run"
    tmp_directory = "/short/{PROJECT}/{USER}/salvus_flow/tmp"
    # The compute nodes on raijin cannot access the internet, so license
    # tokens have to be used.
    use_license_tokens = true
    # Make sure the set up key based SSH authentication beforehand as
    # described in the Salvus documentation.
    [sites.raijin.ssh_settings]
        hostname = "raijin.nci.org.au"
        username = "{USER}"
    [sites.raijin.site_specific]
        # Depends on the chosen raijin queue.
        tasks_per_node = 16
        # This is a solid default and should be good for most applications.
        memory_per_rank_in_mb = 1024
        # On raijin the queue determines what processors and processor
        # architectures Salvus will run on. This in turn means that different
        # Salvus binaries could be used on different queues (as newer processors
        # offer more moder instructions sets). This can be chosen at the time
        # Salvus is downloaded.
        #
        # See "Queue Structure" here: https://opus.nci.org.au/display/Help/Raijin+User+Guide
        # More information: https://salvus.io/installation/installation/#instruction-sets
        queue = "normal"
        # Please be aware that the express queue is Sandy Bridge only and
        # thus will not work with Haswell binaries. Just comment this line
        # if you use the Haswell binaries.
        debug_queue = "express"
        compute_resources_template = "ncpus={RANKS}"
        path_to_pbs_binaries = "/opt/pbs/default/bin"
        # Use the mpirun from the intel-mpi module.
        replace_pbsrun_with = "mpirun"
        # Salvus requires an ABI compatible MPI. This is the most recent
        # available one on raijin.
        modules_to_load = ["intel-mpi/5.1.3.210"]

LFS Example Sites

Euler, ETHZ, Zurich, Switzerland

https://scicomp.ethz.ch/wiki/Euler

[sites.euler]
    # Euler uses IBM's LSF system.
    site_type = "lsf"
    # Node size.
    default_ranks = 24
    # Specify this depending on your allowance on euler.
    max_ranks = 240
    # This is just the default place at which the downloader will put Salvus.
    salvus_binary = "/cluster/home/{USER}/Salvus/bin/salvus"
    # As always - make sure these directories are fine for you. You might have
    # access to more suitable folders on Euler, depending on your project but
    # every Euler user has access to these.
    run_directory = "/cluster/home/{USER}/salvus_flow_run"
    tmp_directory = "/cluster/scratch/{USER}/salvus_flow_temp"
    # Compute nodes don't have internet access, so use license tokens.
    use_license_tokens = true
    [sites.euler.ssh_settings]
        hostname = "euler.ethz.ch"
        username = "{USER}"
    [sites.euler.site_specific]
        # First load the `new` module which gives access to a lot more modules.
        # Then load Intel's MPI which is an ABI compatible MPI that works with
        # Salvus.
        modules_to_load = ['new', 'impi']
        # `fullnode` is a special resource specifier on Euler which grants
        # exclusive access to a single node. If not given jobs from other
        # users might run on the same site.
        #
        # This configuration sets it to only be set for jobs with 18 or more
        # ranks.
        [[sites.euler.site_specific.additional_bsub_argument]]
            name = "R"
            value = "fullnode"
            condition = "ranks >= 18"
        # Euler by defaults only grants 1024 MB of RAM per rank. This setting
        # doubles that.
        [[sites.euler.site_specific.additional_bsub_argument]]
            name = "R"
            value = "rusage[mem=2048]"
PAGE CONTENTS