diff --git a/cell_output.log b/cell_output.log new file mode 100644 index 00000000000..eaa23c5b56e --- /dev/null +++ b/cell_output.log @@ -0,0 +1,463 @@ +warning: `VIRTUAL_ENV=/tmp/builds/YQxxH4yPp/0/omniml/integration/nmm-sandbox/.venv-intern-agent` does not match the project environment path `.venv` and will be ignored; use `--active` to target the active environment instead +Using CPython 3.12.13 interpreter at: /usr/local/bin/python +Creating virtual environment at: .venv +warning: No `requires-python` value found in the workspace. Defaulting to `>=3.12`. + Updating https://github.com/NVIDIA-NeMo/Run (HEAD) + Updated https://github.com/NVIDIA-NeMo/Run (1e26b6a98a756575c10a9a0ea9661fac0c7ad776) +warning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance. + If the cache and target directories are on different filesystems, hardlinking may not be supported. + If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning. +Installed 149 packages in 2.73s +Configuring global options +Dry run for task __main__:cicd +Resolved Arguments +┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +┃ Argument Name ┃ Resolved Value ┃ +┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +│ detach │ True │ +│ hf_local │ None │ +│ identity │ '/.ssh/id_ed25519' │ +│ job_dir │ '/lustre/fsw/portfolios/coreai/users/chenhany/experiment… │ +│ job_name │ 'NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mt… │ +│ pipeline │ SandboxPipeline( │ +│ │ global_vars=GlobalVariables( │ +│ │ hf_model='/hf-local/nvidia/NVIDIA-Nemotron-3-Super-1… │ +│ │ task_0=SandboxTask0( │ +│ │ script='common/specdec_bench/run.sh', │ +│ │ slurm_config=SlurmConfig( │ +│ │ host='cw-dfw-cs-001-login-01.nvidia.com', │ +│ │ account='coreai_dlalgo_modelopt', │ +│ │ partition='batch', │ +│ │ container='vllm/vllm-openai:v0.22.1', │ +│ │ modelopt_install_path='/usr/local/lib/python3.12/d… │ +│ │ container_mounts=['/lustre/fsw/portfolios/coreai/p… │ +│ │ '/lustre:/lustre', '/cm:/cm', │ +│ │ '/var/run/munge:/var/run/munge'], │ +│ │ srun_args=['--no-container-mount-home'], │ +│ │ array=None, │ +│ │ nodes=1, │ +│ │ ntasks_per_node=1, │ +│ │ gpus_per_node=4), │ +│ │ args=['--dataset speed', '--dataset_path │ +│ │ /hf-local/nvidia/SPEED-Bench-Internal/qualitative', │ +│ │ '--engine VLLM', '--speculative_algorithm MTP', │ +│ │ '--draft_length 3', '--tp_size 4', '--ep_size 1', │ +│ │ '--concurrency 32', '--output_length 4096', │ +│ │ '--aa_timing', '--show_progress', '--save_dir │ +│ │ /scratchspace/{sweep_name_default}/qualitative', │ +│ │ '--temperature 0', '--max_seq_len 65536', '--save_dir │ +│ │ /scratchspace/nvidia-nvidia-nemotron-3-super-120b-a12b-b… │ +│ │ '--draft_length 3'], │ +│ │ environment=[{'HF_MODEL_CKPT': │ +│ │ '<>'}, {'HF_LOCAL': '/hf-local'}]), │ +│ │ task_1=SandboxTask1( │ +│ │ script='common/specdec_bench/run.sh', │ +│ │ slurm_config=SlurmConfig( │ +│ │ host='cw-dfw-cs-001-login-01.nvidia.com', │ +│ │ account='coreai_dlalgo_modelopt', │ +│ │ partition='batch', │ +│ │ container='vllm/vllm-openai:v0.22.1', │ +│ │ modelopt_install_path='/usr/local/lib/python3.12/d… │ +│ │ container_mounts=['/lustre/fsw/portfolios/coreai/p… │ +│ │ '/lustre:/lustre', '/cm:/cm', │ +│ │ '/var/run/munge:/var/run/munge'], │ +│ │ srun_args=['--no-container-mount-home'], │ +│ │ array=None, │ +│ │ nodes=1, │ +│ │ ntasks_per_node=1, │ +│ │ gpus_per_node=4), │ +│ │ args=['--dataset speed', '--dataset_path │ +│ │ /hf-local/nvidia/SPEED-Bench-Internal/throughput_32k', │ +│ │ '--engine VLLM', '--speculative_algorithm MTP', │ +│ │ '--draft_length 3', '--tp_size 4', '--ep_size 1', │ +│ │ '--concurrency 8', '--num_requests 80', '--runtime_params │ +│ │ common/specdec_bench/runtime_params_throughput_32k.yaml', │ +│ │ '--output_length 4096', '--aa_timing', '--show_progress', │ +│ │ '--save_dir │ +│ │ /scratchspace/{sweep_name_default}/throughput_32k', │ +│ │ '--temperature 0', '--max_seq_len 65536', '--save_dir │ +│ │ /scratchspace/nvidia-nvidia-nemotron-3-super-120b-a12b-b… │ +│ │ '--num_requests 80', '--draft_length 3'], │ +│ │ environment=[{'HF_MODEL_CKPT': │ +│ │ '<>'}, {'HF_LOCAL': '/hf-local'}])) │ +│ task │ None │ +│ test_level │ 0 │ +│ user │ 'chenhany' │ +└──────────────────┴───────────────────────────────────────────────────────────┘ +Launching cicd... +============================================================ +Version Report +============================================================ + Launcher 5c34b6a (main) + Model-Optimizer 9f37fe19 (pensieve-intern/OMNIML-5095/cell-t0-d3) +============================================================ +────────────── Entering Experiment cicd with id: cicd_1781404847 ─────────────── +job NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm task 0 slurm_config: SlurmConfig(host='cw-dfw-cs-001-login-01.nvidia.com', port=22, account='coreai_dlalgo_modelopt', partition='batch', qos=None, container='vllm/vllm-openai:v0.22.1', modelopt_install_path='/usr/local/lib/python3.12/dist-packages/modelopt', container_mounts=['/lustre/fsw/portfolios/coreai/projects/coreai_dlalgo_modelopt/hf-local:/hf-local', '/lustre:/lustre', '/cm:/cm', '/var/run/munge:/var/run/munge'], srun_args=['--no-container-mount-home'], array=None, nodes=1, ntasks_per_node=1, gpus_per_node=4, time='04:00:00', local=False, segment=None) +job NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm task 1 slurm_config: SlurmConfig(host='cw-dfw-cs-001-login-01.nvidia.com', port=22, account='coreai_dlalgo_modelopt', partition='batch', qos=None, container='vllm/vllm-openai:v0.22.1', modelopt_install_path='/usr/local/lib/python3.12/dist-packages/modelopt', container_mounts=['/lustre/fsw/portfolios/coreai/projects/coreai_dlalgo_modelopt/hf-local:/hf-local', '/lustre:/lustre', '/cm:/cm', '/var/run/munge:/var/run/munge'], srun_args=['--no-container-mount-home'], array=None, nodes=1, ntasks_per_node=1, gpus_per_node=4, time='04:00:00', local=False, segment=None) +find: ‘modules/Megatron-LM/megatron/*’: No such file or directory +find: ‘modules/Megatron-LM/examples/*’: No such file or directory +find: ‘modules/Megatron-LM/*.py’: No such file or directory +find: ‘modules/Model-Optimizer-Internal/**’: No such file or directory +find: ‘modules/Megatron-LM/megatron/*’: No such file or directory +find: ‘modules/Megatron-LM/examples/*’: No such file or directory +find: ‘modules/Megatron-LM/*.py’: No such file or directory +find: ‘modules/Model-Optimizer-Internal/**’: No such file or directory +[02:40:53] Connecting to client.py:257 + chenhany@cw-dfw-cs-001-login-01.nvidia.com +[02:40:53] INFO Connected (version 2.0, client transport.py:1786 + OpenSSH_8.9p1) + INFO Authentication (publickey) successful! transport.py:1786 + INFO rsyncing rsync.py:37 + /tmp/pensieve-intern-agent-oyheszzz/workspace/ex + periments/cicd/cicd_1781404847 to + /lustre/fsw/portfolios/coreai/users/chenhany/exp + eriments/cicd ... +[02:41:19] INFO Successfully ran `rsync -pthrvz --rsh='ssh -i rsync.py:93 + /.ssh/id_ed25519 -p 22 ' + /tmp/pensieve-intern-agent-oyheszzz/workspace/ex + periments/cicd/cicd_1781404847 + chenhany@cw-dfw-cs-001-login-01.nvidia.com:/lust + re/fsw/portfolios/coreai/users/chenhany/experime + nts/cicd` +[02:41:19] Launching job experiment.py:800 + NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_benc + h_mtp_vllm_0 for experiment cicd +[02:41:20] INFO Launched app: launcher.py:116 + slurm_tunnel://nemo_run/12787753 +[02:41:20] Launching job experiment.py:800 + NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_benc + h_mtp_vllm_1 for experiment cicd +[SLURM] Job 12787753 - State: PENDING, Estimated start: N/A, Current time: 2026-06-14 02:41:20 + INFO Launched app: launcher.py:116 + slurm_tunnel://nemo_run/12787754 +────────────────── Detaching from Experiment cicd_1781404847. ────────────────── + Task specific cleanup won't be run. experiment.py:1212 + Ephemeral logs and artifacts may be lost. +[SLURM] Job 12787754 - State: PENDING, Estimated start: N/A, Current time: 2026-06-14 02:41:20 + +Experiment Status for cicd_1781404847 + +Task 0: NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0 +- Status: SUBMITTED +- Executor: SlurmExecutor on chenhany@cw-dfw-cs-001-login-01.nvidia.com +- Job id: 12787753 +- Local Directory: /tmp/pensieve-intern-agent-oyheszzz/workspace/experiments/cicd/cicd_1781404847/NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0 +- Remote Directory: /lustre/fsw/portfolios/coreai/users/chenhany/experiments/cicd/cicd_1781404847/NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0 + +Task 1: NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1 +- Status: SUBMITTED +- Executor: SlurmExecutor on chenhany@cw-dfw-cs-001-login-01.nvidia.com +- Job id: 12787754 +- Local Directory: /tmp/pensieve-intern-agent-oyheszzz/workspace/experiments/cicd/cicd_1781404847/NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1 +- Remote Directory: /lustre/fsw/portfolios/coreai/users/chenhany/experiments/cicd/cicd_1781404847/NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1 + + +# The experiment was run with the following tasks: ['NVIDIA-Nemotron-3-Super-120 +# You can inspect and reconstruct this experiment at a later point in time using +experiment = run.Experiment.from_id("cicd_1781404847") +experiment.status() # Gets the overall status +experiment.logs("NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0 +experiment.cancel("NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm + + +# You can inspect this experiment at a later point in time using the CLI as well +nemo experiment status cicd_1781404847 +nemo experiment logs cicd_1781404847 0 +nemo experiment cancel cicd_1781404847 0 + +Found 1 experiment(s): cicd_1781404847 + +=== [2026-06-14 02:41:26] Polling iteration 1/14400 === + cicd_1781404847 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0: RUNNING + cicd_1781404847 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1: PENDING + + Summary: 0 succeeded, 0 failed, 0 cancelled, 1 running, 1 pending +Waiting 180s before next poll... + +=== [2026-06-14 02:44:28] Polling iteration 2/14400 === + cicd_1781404847 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0: RUNNING + cicd_1781404847 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1: PENDING + + Summary: 0 succeeded, 0 failed, 0 cancelled, 1 running, 1 pending +Waiting 180s before next poll... + +=== [2026-06-14 02:47:31] Polling iteration 3/14400 === + cicd_1781404847 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0: RUNNING + cicd_1781404847 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1: PENDING + + Summary: 0 succeeded, 0 failed, 0 cancelled, 1 running, 1 pending +Waiting 180s before next poll... + +=== [2026-06-14 02:50:34] Polling iteration 4/14400 === + cicd_1781404847 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0: RUNNING + cicd_1781404847 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1: PENDING + + Summary: 0 succeeded, 0 failed, 0 cancelled, 1 running, 1 pending +Waiting 180s before next poll... + +=== [2026-06-14 02:53:36] Polling iteration 5/14400 === + cicd_1781404847 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0: RUNNING + cicd_1781404847 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1: PENDING + + Summary: 0 succeeded, 0 failed, 0 cancelled, 1 running, 1 pending +Waiting 180s before next poll... + +=== [2026-06-14 02:56:39] Polling iteration 6/14400 === + cicd_1781404847 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0: RUNNING + cicd_1781404847 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1: PENDING + + Summary: 0 succeeded, 0 failed, 0 cancelled, 1 running, 1 pending +Waiting 180s before next poll... + +=== [2026-06-14 02:59:41] Polling iteration 7/14400 === + cicd_1781404847 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0: SUCCEEDED + cicd_1781404847 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1: FAILED + + Summary: 1 succeeded, 1 failed, 0 cancelled, 0 running, 0 pending + +All experiments complete. + SUCCEEDED: 1 + FAILED: 1 + CANCELLED: 0 + +=== Fetching experiment logs === +Fetching logs: cicd_1781404847 task 0 +Fetching logs: cicd_1781404847 task 1 +=== Done fetching logs === +warning: `VIRTUAL_ENV=/tmp/builds/YQxxH4yPp/0/omniml/integration/nmm-sandbox/.venv-intern-agent` does not match the project environment path `.venv` and will be ignored; use `--active` to target the active environment instead +warning: No `requires-python` value found in the workspace. Defaulting to `>=3.12`. +Configuring global options +Dry run for task __main__:cicd +Resolved Arguments +┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +┃ Argument Name ┃ Resolved Value ┃ +┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +│ detach │ True │ +│ hf_local │ None │ +│ identity │ '/.ssh/id_ed25519' │ +│ job_dir │ '/lustre/fsw/portfolios/coreai/users/chenhany/experiment… │ +│ job_name │ 'NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mt… │ +│ pipeline │ SandboxPipeline( │ +│ │ global_vars=GlobalVariables( │ +│ │ hf_model='/hf-local/nvidia/NVIDIA-Nemotron-3-Super-1… │ +│ │ task_0=SandboxTask0( │ +│ │ script='common/specdec_bench/run.sh', │ +│ │ slurm_config=SlurmConfig( │ +│ │ host='cw-dfw-cs-001-login-01.nvidia.com', │ +│ │ account='coreai_dlalgo_modelopt', │ +│ │ partition='batch', │ +│ │ container='vllm/vllm-openai:v0.22.1', │ +│ │ modelopt_install_path='/usr/local/lib/python3.12/d… │ +│ │ container_mounts=['/lustre/fsw/portfolios/coreai/p… │ +│ │ '/lustre:/lustre', '/cm:/cm', │ +│ │ '/var/run/munge:/var/run/munge'], │ +│ │ srun_args=['--no-container-mount-home'], │ +│ │ array=None, │ +│ │ nodes=1, │ +│ │ ntasks_per_node=1, │ +│ │ gpus_per_node=4), │ +│ │ args=['--dataset speed', '--dataset_path │ +│ │ /hf-local/nvidia/SPEED-Bench-Internal/qualitative', │ +│ │ '--engine VLLM', '--speculative_algorithm MTP', │ +│ │ '--draft_length 3', '--tp_size 4', '--ep_size 1', │ +│ │ '--concurrency 32', '--output_length 4096', │ +│ │ '--aa_timing', '--show_progress', '--save_dir │ +│ │ /scratchspace/{sweep_name_default}/qualitative', │ +│ │ '--temperature 0', '--max_seq_len 65536', '--save_dir │ +│ │ /scratchspace/nvidia-nvidia-nemotron-3-super-120b-a12b-b… │ +│ │ '--draft_length 3'], │ +│ │ environment=[{'HF_MODEL_CKPT': │ +│ │ '<>'}, {'HF_LOCAL': '/hf-local'}]), │ +│ │ task_1=SandboxTask1( │ +│ │ script='common/specdec_bench/run.sh', │ +│ │ slurm_config=SlurmConfig( │ +│ │ host='cw-dfw-cs-001-login-01.nvidia.com', │ +│ │ account='coreai_dlalgo_modelopt', │ +│ │ partition='batch', │ +│ │ container='vllm/vllm-openai:v0.22.1', │ +│ │ modelopt_install_path='/usr/local/lib/python3.12/d… │ +│ │ container_mounts=['/lustre/fsw/portfolios/coreai/p… │ +│ │ '/lustre:/lustre', '/cm:/cm', │ +│ │ '/var/run/munge:/var/run/munge'], │ +│ │ srun_args=['--no-container-mount-home'], │ +│ │ array=None, │ +│ │ nodes=1, │ +│ │ ntasks_per_node=1, │ +│ │ gpus_per_node=4), │ +│ │ args=['--dataset speed', '--dataset_path │ +│ │ /hf-local/nvidia/SPEED-Bench-Internal/throughput_32k', │ +│ │ '--engine VLLM', '--speculative_algorithm MTP', │ +│ │ '--draft_length 3', '--tp_size 4', '--ep_size 1', │ +│ │ '--concurrency 8', '--num_requests 80', '--output_length │ +│ │ 4096', '--aa_timing', '--show_progress', '--save_dir │ +│ │ /scratchspace/{sweep_name_default}/throughput_32k', │ +│ │ '--temperature 0', '--max_seq_len 65536', '--save_dir │ +│ │ /scratchspace/nvidia-nvidia-nemotron-3-super-120b-a12b-b… │ +│ │ '--num_requests 80', '--draft_length 3'], │ +│ │ environment=[{'HF_MODEL_CKPT': │ +│ │ '<>'}, {'HF_LOCAL': '/hf-local'}])) │ +│ task │ None │ +│ test_level │ 0 │ +│ user │ 'chenhany' │ +└──────────────────┴───────────────────────────────────────────────────────────┘ +Launching cicd... +============================================================ +Version Report +============================================================ + Launcher 5c34b6a (main) + Model-Optimizer 9f37fe19 (pensieve-intern/OMNIML-5095/cell-t0-d3) +============================================================ +────────────── Entering Experiment cicd with id: cicd_1781406008 ─────────────── +job NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm task 0 slurm_config: SlurmConfig(host='cw-dfw-cs-001-login-01.nvidia.com', port=22, account='coreai_dlalgo_modelopt', partition='batch', qos=None, container='vllm/vllm-openai:v0.22.1', modelopt_install_path='/usr/local/lib/python3.12/dist-packages/modelopt', container_mounts=['/lustre/fsw/portfolios/coreai/projects/coreai_dlalgo_modelopt/hf-local:/hf-local', '/lustre:/lustre', '/cm:/cm', '/var/run/munge:/var/run/munge'], srun_args=['--no-container-mount-home'], array=None, nodes=1, ntasks_per_node=1, gpus_per_node=4, time='04:00:00', local=False, segment=None) +job NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm task 1 slurm_config: SlurmConfig(host='cw-dfw-cs-001-login-01.nvidia.com', port=22, account='coreai_dlalgo_modelopt', partition='batch', qos=None, container='vllm/vllm-openai:v0.22.1', modelopt_install_path='/usr/local/lib/python3.12/dist-packages/modelopt', container_mounts=['/lustre/fsw/portfolios/coreai/projects/coreai_dlalgo_modelopt/hf-local:/hf-local', '/lustre:/lustre', '/cm:/cm', '/var/run/munge:/var/run/munge'], srun_args=['--no-container-mount-home'], array=None, nodes=1, ntasks_per_node=1, gpus_per_node=4, time='04:00:00', local=False, segment=None) +find: ‘modules/Megatron-LM/megatron/*’: No such file or directory +find: ‘modules/Megatron-LM/examples/*’: No such file or directory +find: ‘modules/Megatron-LM/*.py’: No such file or directory +find: ‘modules/Model-Optimizer-Internal/**’: No such file or directory +find: ‘modules/Megatron-LM/megatron/*’: No such file or directory +find: ‘modules/Megatron-LM/examples/*’: No such file or directory +find: ‘modules/Megatron-LM/*.py’: No such file or directory +find: ‘modules/Model-Optimizer-Internal/**’: No such file or directory +[03:00:14] Connecting to client.py:257 + chenhany@cw-dfw-cs-001-login-01.nvidia.com +[03:00:14] INFO Connected (version 2.0, client transport.py:1786 + OpenSSH_8.9p1) + INFO Authentication (publickey) successful! transport.py:1786 + INFO rsyncing rsync.py:37 + /tmp/pensieve-intern-agent-oyheszzz/workspace/ex + periments/cicd/cicd_1781406008 to + /lustre/fsw/portfolios/coreai/users/chenhany/exp + eriments/cicd ... +[03:00:41] INFO Successfully ran `rsync -pthrvz --rsh='ssh -i rsync.py:93 + /.ssh/id_ed25519 -p 22 ' + /tmp/pensieve-intern-agent-oyheszzz/workspace/ex + periments/cicd/cicd_1781406008 + chenhany@cw-dfw-cs-001-login-01.nvidia.com:/lust + re/fsw/portfolios/coreai/users/chenhany/experime + nts/cicd` +[03:00:41] Launching job experiment.py:800 + NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_benc + h_mtp_vllm_0 for experiment cicd + INFO Launched app: launcher.py:116 + slurm_tunnel://nemo_run/12788161 + Launching job experiment.py:800 + NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_benc + h_mtp_vllm_1 for experiment cicd +[SLURM] Job 12788161 - State: PENDING, Estimated start: N/A, Current time: 2026-06-14 03:00:41 + INFO Launched app: launcher.py:116 + slurm_tunnel://nemo_run/12788162 +────────────────── Detaching from Experiment cicd_1781406008. ────────────────── + Task specific cleanup won't be run. experiment.py:1212 + Ephemeral logs and artifacts may be lost. +[SLURM] Job 12788162 - State: PENDING, Estimated start: N/A, Current time: 2026-06-14 03:00:42 + +Experiment Status for cicd_1781406008 + +Task 0: NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0 +- Status: SUBMITTED +- Executor: SlurmExecutor on chenhany@cw-dfw-cs-001-login-01.nvidia.com +- Job id: 12788161 +- Local Directory: /tmp/pensieve-intern-agent-oyheszzz/workspace/experiments/cicd/cicd_1781406008/NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0 +- Remote Directory: /lustre/fsw/portfolios/coreai/users/chenhany/experiments/cicd/cicd_1781406008/NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0 + +Task 1: NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1 +- Status: SUBMITTED +- Executor: SlurmExecutor on chenhany@cw-dfw-cs-001-login-01.nvidia.com +- Job id: 12788162 +- Local Directory: /tmp/pensieve-intern-agent-oyheszzz/workspace/experiments/cicd/cicd_1781406008/NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1 +- Remote Directory: /lustre/fsw/portfolios/coreai/users/chenhany/experiments/cicd/cicd_1781406008/NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1 + + +# The experiment was run with the following tasks: ['NVIDIA-Nemotron-3-Super-120 +# You can inspect and reconstruct this experiment at a later point in time using +experiment = run.Experiment.from_id("cicd_1781406008") +experiment.status() # Gets the overall status +experiment.logs("NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0 +experiment.cancel("NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm + + +# You can inspect this experiment at a later point in time using the CLI as well +nemo experiment status cicd_1781406008 +nemo experiment logs cicd_1781406008 0 +nemo experiment cancel cicd_1781406008 0 + +Found 1 experiment(s): cicd_1781406008 + +=== [2026-06-14 03:00:46] Polling iteration 1/14400 === + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0: RUNNING + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1: PENDING + + Summary: 0 succeeded, 0 failed, 0 cancelled, 1 running, 1 pending +Waiting 180s before next poll... + +=== [2026-06-14 03:03:49] Polling iteration 2/14400 === + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0: RUNNING + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1: PENDING + + Summary: 0 succeeded, 0 failed, 0 cancelled, 1 running, 1 pending +Waiting 180s before next poll... + +=== [2026-06-14 03:06:51] Polling iteration 3/14400 === + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0: RUNNING + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1: PENDING + + Summary: 0 succeeded, 0 failed, 0 cancelled, 1 running, 1 pending +Waiting 180s before next poll... + +=== [2026-06-14 03:09:54] Polling iteration 4/14400 === + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0: RUNNING + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1: PENDING + + Summary: 0 succeeded, 0 failed, 0 cancelled, 1 running, 1 pending +Waiting 180s before next poll... + +=== [2026-06-14 03:12:56] Polling iteration 5/14400 === + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0: RUNNING + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1: PENDING + + Summary: 0 succeeded, 0 failed, 0 cancelled, 1 running, 1 pending +Waiting 180s before next poll... + +=== [2026-06-14 03:15:59] Polling iteration 6/14400 === + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0: RUNNING + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1: PENDING + + Summary: 0 succeeded, 0 failed, 0 cancelled, 1 running, 1 pending +Waiting 180s before next poll... + +=== [2026-06-14 03:19:02] Polling iteration 7/14400 === + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0: SUCCEEDED + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1: RUNNING + + Summary: 1 succeeded, 0 failed, 0 cancelled, 1 running, 0 pending +Waiting 180s before next poll... + +=== [2026-06-14 03:22:04] Polling iteration 8/14400 === + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0: SUCCEEDED + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1: RUNNING + + Summary: 1 succeeded, 0 failed, 0 cancelled, 1 running, 0 pending +Waiting 180s before next poll... + +=== [2026-06-14 03:25:07] Polling iteration 9/14400 === + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0: SUCCEEDED + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1: RUNNING + + Summary: 1 succeeded, 0 failed, 0 cancelled, 1 running, 0 pending +Waiting 180s before next poll... + +=== [2026-06-14 03:28:09] Polling iteration 10/14400 === + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_0: SUCCEEDED + cicd_1781406008 / NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm_1: SUCCEEDED + + Summary: 2 succeeded, 0 failed, 0 cancelled, 0 running, 0 pending + +All experiments complete. + SUCCEEDED: 2 + FAILED: 0 + CANCELLED: 0 + +=== Fetching experiment logs === +Fetching logs: cicd_1781406008 task 0 +Fetching logs: cicd_1781406008 task 1 +=== Done fetching logs === diff --git a/metrics_output.txt b/metrics_output.txt new file mode 100644 index 00000000000..d05cbb94c9e --- /dev/null +++ b/metrics_output.txt @@ -0,0 +1,16 @@ +qualitative Average_AL= 2.968 +qualitative Category_AL coding = 3.1526 +qualitative Category_AL humanities = 2.8842 +qualitative Category_AL math = 3.097 +qualitative Category_AL multilingual = 3.2816 +qualitative Category_AL qa = 2.8204 +qualitative Category_AL rag = 3.1553 +qualitative Category_AL reasoning = 3.0415 +qualitative Category_AL roleplay = 2.5703 +qualitative Category_AL stem = 2.902 +qualitative Category_AL summarization = 3.0081 +qualitative Category_AL writing = 2.7351 +throughput_32k Average_AL= 3.0762 +throughput_32k Category_AL high_entropy = 2.6308 +throughput_32k Category_AL low_entropy = 3.4368 +throughput_32k Category_AL mixed = 3.1743 diff --git a/tools/launcher/examples/Nemotron-h/NVIDIA-Nemotron-3-Super-120B-A12B-BF16/specdec_bench_mtp_vllm.yaml b/tools/launcher/examples/Nemotron-h/NVIDIA-Nemotron-3-Super-120B-A12B-BF16/specdec_bench_mtp_vllm.yaml new file mode 100644 index 00000000000..471e96c35f2 --- /dev/null +++ b/tools/launcher/examples/Nemotron-h/NVIDIA-Nemotron-3-Super-120B-A12B-BF16/specdec_bench_mtp_vllm.yaml @@ -0,0 +1,68 @@ +# SPEED-bench MTP speculative-decoding run for NVIDIA-Nemotron-3-Super-120B-A12B-BF16 via vLLM. +# +# Nemotron-3-Super-120B-A12B is 120B total params (MoE; 12B active per +# token). BF16 weights = 240 GB total, so tp_size=4 minimum on 80 GB +# H100/A100. Match gpus_per_node to tp_size for this single-node run. +# +# Slurm run on cw_dfw — cells override per-cell --temperature, +# --max_seq_len, --save_dir, --draft_length, and --num_requests via +# pipeline.task_N.args+=[...]. + +job_name: NVIDIA-Nemotron-3-Super-120B-A12B-BF16_specdec_bench_mtp_vllm + +pipeline: + global_vars: + hf_model: /hf-local/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16 + + # task_0: SPEED qualitative split + task_0: + script: common/specdec_bench/run.sh + args: + - --dataset speed + - --dataset_path /hf-local/nvidia/SPEED-Bench-Internal/qualitative + - --engine VLLM + - --speculative_algorithm MTP + - --draft_length 3 + - --tp_size 4 + - --ep_size 1 + - --concurrency 32 + - --output_length 4096 + - --aa_timing + - --show_progress + - --save_dir /scratchspace/{sweep_name_default}/qualitative + environment: + - HF_MODEL_CKPT: <> + - HF_LOCAL: /hf-local + slurm_config: + _factory_: "slurm_factory" + nodes: 1 + ntasks_per_node: 1 + gpus_per_node: 4 + container: vllm/vllm-openai:v0.22.1 + + # task_1: SPEED throughput_32k split + task_1: + script: common/specdec_bench/run.sh + args: + - --dataset speed + - --dataset_path /hf-local/nvidia/SPEED-Bench-Internal/throughput_32k + - --engine VLLM + - --speculative_algorithm MTP + - --draft_length 3 + - --tp_size 4 + - --ep_size 1 + - --concurrency 8 + - --num_requests 80 + - --output_length 4096 + - --aa_timing + - --show_progress + - --save_dir /scratchspace/{sweep_name_default}/throughput_32k + environment: + - HF_MODEL_CKPT: <> + - HF_LOCAL: /hf-local + slurm_config: + _factory_: "slurm_factory" + nodes: 1 + ntasks_per_node: 1 + gpus_per_node: 4 + container: vllm/vllm-openai:v0.22.1