我将 python HTCondor api 与简单的并行任务一起使用:
with schedd.transaction() as shedd_transaction:
sub = htcondor.Submit(
{
"universe": "parallel",
"executable": "/bin/ping",
"machine_count": "1",
"request_cpus": "0",
"error": ".test.err",
"output": ".test.out",
"log": ".test.log",
"should_transfer_files": "NO",
"transfer_executable": "False",
"run_as_owner": "True",
"+Owner": f'"user"',
"+ParallelShutdownPolicy": "WAIT_FOR_ALL",
}
)
res = sub.queue_with_itemdata(
shedd_transaction,
1,
iter(
[
{
"arguments": "-c3 127.0.0.1",
"initial_dir": "/tmp/tmp1",
},
{
"arguments": "-c10 127.0.0.1",
"initial_dir": "/tmp/tmp2",
},
]
),
)
在watch -n 0.5 condor_q -nobatch -verbose -allusers
命令之后我看到:
带有2.1
id 的工作提前结束!为什么会这样?
condor_q -analyze
任务执行期间的输出:
root@b0d6b2e00bc8:/# condor_q -analyze 2
007.000: Job is running.
Last successful match: Mon Jul 29 18:47:50 2019
007.000: Run analysis summary ignoring user priority. Of 3 machines,
0 are rejected by your job's requirements
0 reject your job because of their own requirements
2 match and are already running your jobs
0 match but are serving other users
1 are able to run your job
007.001: Job is running.
007.001: Run analysis summary ignoring user priority. Of 3 machines,
0 are rejected by your job's requirements
0 reject your job because of their own requirements
2 match and are already running your jobs
0 match but are serving other users
1 are able to run your job