Тут показані розбіжності між вибраною ревізією та поточною версією сторінки.
Наступна ревізія | Попередня ревізія | ||
slurm [2015/04/28 23:00] wombat створено |
slurm [2015/06/19 12:42] wombat MUNGE |
||
---|---|---|---|
Рядок 2: | Рядок 2: | ||
===== Установка ===== | ===== Установка ===== | ||
+ | |||
+ | ==== MUNGE ==== | ||
+ | |||
+ | Скопировать файл /etc/munge/munge.key на все компьютеры кластера. Доступ к этому файлу должен быть ограничен, т.к. он содержит секретный ключ, по которому даётся доступ к вычислительным мощностям компьютеров. | ||
+ | |||
+ | user@Computer03:~$ ls -l /etc/munge/munge.key | ||
+ | -r-------- 1 munge munge 1024 кві 29 14:48 /etc/munge/munge.key | ||
==== Gentoo ==== | ==== Gentoo ==== | ||
Рядок 14: | Рядок 21: | ||
+ | ==== Debian ==== | ||
+ | apt-get install slurm-wlm | ||
===== Настройка ===== | ===== Настройка ===== | ||
- | file:///usr/share/doc/slurm-2.6.3/html/configurator.html | + | ''/usr/share/doc/slurm-2.6.3/html/configurator.html'' |
''/etc/slurm/cgroup.conf'' FIXME | ''/etc/slurm/cgroup.conf'' FIXME | ||
- | https://computing.llnl.gov/linux/slurm/quickstart.html | ||
+ | https://computing.llnl.gov/linux/slurm/quickstart.html | ||
+ | <file conf '/etc/slurm/slurm.conf'> | ||
+ | # slurm.conf file generated by configurator.html. | ||
+ | # Put this file on all nodes of your cluster. | ||
+ | # See the slurm.conf man page for more information. | ||
+ | # | ||
+ | ControlMachine=k517 | ||
+ | #ControlAddr= | ||
+ | #BackupController= | ||
+ | #BackupAddr= | ||
+ | # | ||
+ | AuthType=auth/munge | ||
+ | CacheGroups=0 | ||
+ | #CheckpointType=checkpoint/none | ||
+ | CryptoType=crypto/munge | ||
+ | #DisableRootJobs=NO | ||
+ | #EnforcePartLimits=NO | ||
+ | #Epilog= | ||
+ | #EpilogSlurmctld= | ||
+ | #FirstJobId=1 | ||
+ | #MaxJobId=999999 | ||
+ | #GresTypes= | ||
+ | #GroupUpdateForce=0 | ||
+ | #GroupUpdateTime=600 | ||
+ | #JobCheckpointDir=/var/slurm/checkpoint | ||
+ | #JobCredentialPrivateKey= | ||
+ | #JobCredentialPublicCertificate= | ||
+ | #JobFileAppend=0 | ||
+ | #JobRequeue=1 | ||
+ | #JobSubmitPlugins=1 | ||
+ | #KillOnBadExit=0 | ||
+ | #Licenses=foo*4,bar | ||
+ | #MailProg=/bin/mail | ||
+ | #MaxJobCount=5000 | ||
+ | #MaxStepCount=40000 | ||
+ | #MaxTasksPerNode=128 | ||
+ | MpiDefault=none | ||
+ | #MpiParams=ports=#-# | ||
+ | #PluginDir= | ||
+ | #PlugStackConfig= | ||
+ | #PrivateData=jobs | ||
+ | ProctrackType=proctrack/cgroup | ||
+ | #Prolog= | ||
+ | #PrologSlurmctld= | ||
+ | #PropagatePrioProcess=0 | ||
+ | #PropagateResourceLimits= | ||
+ | #PropagateResourceLimitsExcept= | ||
+ | ReturnToService=1 | ||
+ | #SallocDefaultCommand= | ||
+ | SlurmctldPidFile=/var/run/slurm/slurmctld.pid | ||
+ | SlurmctldPort=6817 | ||
+ | SlurmdPidFile=/var/run/slurm/slurmd.pid | ||
+ | SlurmdPort=6818 | ||
+ | SlurmdSpoolDir=/var/spool/slurmd | ||
+ | SlurmUser=slurm | ||
+ | #SlurmdUser=root | ||
+ | #SrunEpilog= | ||
+ | #SrunProlog= | ||
+ | StateSaveLocation=/tmp/slurm | ||
+ | SwitchType=switch/none | ||
+ | #TaskEpilog= | ||
+ | TaskPlugin=task/cgroup | ||
+ | #TaskPluginParam= | ||
+ | #TaskProlog= | ||
+ | #TopologyPlugin=topology/tree | ||
+ | #TmpFs=/tmp | ||
+ | #TrackWCKey=no | ||
+ | #TreeWidth= | ||
+ | #UnkillableStepProgram= | ||
+ | #UsePAM=0 | ||
+ | # | ||
+ | # | ||
+ | # TIMERS | ||
+ | #BatchStartTimeout=10 | ||
+ | #CompleteWait=0 | ||
+ | #EpilogMsgTime=2000 | ||
+ | #GetEnvTimeout=2 | ||
+ | #HealthCheckInterval=0 | ||
+ | #HealthCheckProgram= | ||
+ | InactiveLimit=0 | ||
+ | KillWait=30 | ||
+ | #MessageTimeout=10 | ||
+ | #ResvOverRun=0 | ||
+ | MinJobAge=300 | ||
+ | #OverTimeLimit=0 | ||
+ | SlurmctldTimeout=120 | ||
+ | SlurmdTimeout=300 | ||
+ | #UnkillableStepTimeout=60 | ||
+ | #VSizeFactor=0 | ||
+ | Waittime=0 | ||
+ | # | ||
+ | # | ||
+ | # SCHEDULING | ||
+ | #DefMemPerCPU=0 | ||
+ | FastSchedule=0 | ||
+ | #MaxMemPerCPU=0 | ||
+ | #SchedulerRootFilter=1 | ||
+ | #SchedulerTimeSlice=30 | ||
+ | SchedulerType=sched/backfill | ||
+ | SchedulerPort=7321 | ||
+ | SelectType=select/linear | ||
+ | #SelectTypeParameters= | ||
+ | # | ||
+ | # | ||
+ | # JOB PRIORITY | ||
+ | #PriorityType=priority/basic | ||
+ | #PriorityDecayHalfLife= | ||
+ | #PriorityCalcPeriod= | ||
+ | #PriorityFavorSmall= | ||
+ | #PriorityMaxAge= | ||
+ | #PriorityUsageResetPeriod= | ||
+ | #PriorityWeightAge= | ||
+ | #PriorityWeightFairshare= | ||
+ | #PriorityWeightJobSize= | ||
+ | #PriorityWeightPartition= | ||
+ | #PriorityWeightQOS= | ||
+ | # | ||
+ | # | ||
+ | # LOGGING AND ACCOUNTING | ||
+ | #AccountingStorageEnforce=0 | ||
+ | #AccountingStorageHost= | ||
+ | AccountingStorageLoc=/var/log/slurm | ||
+ | #AccountingStoragePass= | ||
+ | #AccountingStoragePort= | ||
+ | AccountingStorageType=accounting_storage/filetxt | ||
+ | #AccountingStorageUser= | ||
+ | AccountingStoreJobComment=YES | ||
+ | ClusterName=cluster | ||
+ | #DebugFlags= | ||
+ | #JobCompHost= | ||
+ | #JobCompLoc= | ||
+ | #JobCompPass= | ||
+ | #JobCompPort= | ||
+ | JobCompType=jobcomp/none | ||
+ | #JobCompUser= | ||
+ | JobAcctGatherFrequency=30 | ||
+ | JobAcctGatherType=jobacct_gather/linux | ||
+ | SlurmctldDebug=3 | ||
+ | #SlurmctldLogFile= | ||
+ | SlurmdDebug=3 | ||
+ | #SlurmdLogFile= | ||
+ | #SlurmSchedLogFile= | ||
+ | #SlurmSchedLogLevel= | ||
+ | # | ||
+ | # | ||
+ | # POWER SAVE SUPPORT FOR IDLE NODES (optional) | ||
+ | #SuspendProgram= | ||
+ | #ResumeProgram= | ||
+ | #SuspendTimeout= | ||
+ | #ResumeTimeout= | ||
+ | #ResumeRate= | ||
+ | #SuspendExcNodes= | ||
+ | #SuspendExcParts= | ||
+ | #SuspendRate= | ||
+ | #SuspendTime= | ||
+ | # | ||
+ | # | ||
+ | # COMPUTE NODES | ||
+ | NodeName=k517 CPUs=1 Sockets=1 CoresPerSocket=2 ThreadsPerCore=2 State=UNKNOWN | ||
+ | PartitionName=debug Nodes=k517 Default=YES MaxTime=INFINITE State=UP | ||
+ | </file> | ||