I'm reaching out from the ALS computing group. Was told to reach out here for SFAPI issues in a previous NERSC ticket.
For my integration of Prefect with sfapi, I am currently using the test script provided in one of the training sessions, modified for my project, in a function that can create a client object from my version of the NerscClient as shown in the file above, and then submitting the job:
def launch_nersc_jobs_tomography(
):
logger = get_run_logger()
client = create_nersc_client()
user = client.user()
logger.info("Client created")
home_path = f"/global/homes/{user.name[0]}/{user.name}"
scratch_path = f"/pscratch/sd/{user.name[0]}/{user.name}"
client.perlmutter.run(f"mkdir -p {scratch_path}/prefect-recon-test")
#job_script = get_job_script(scratch_path)
N = 5
job_script = f"""#!/bin/bash
#SBATCH -q debug
#SBATCH -A als
#SBATCH -N 1
#SBATCH -C cpu
#SBATCH -t 00:10:00
#SBATCH -J sfapi-demo
#SBATCH --exclusive
#SBATCH --output={scratch_path}/nerscClient-test/sfapi-demo-%j.out
#SBATCH --error={scratch_path}/nerscClient-test/sfapi-demo-%j.error
module load python
# Prints N random numbers to form a normal disrobution
python -c "import numpy as np; numbers = np.random.normal(size={N}); [print(n) for n in numbers]"
"""
job = client.perlmutter.submit_job(job_script)
job.complete()
logger.info(f"Job {job.id} completed")
The client object is created, the new directory is created, but I run into an error with the submit_job. This is the full error message:
Traceback (most recent call last):
File "home/splash_flows_globus/orchestration/_tests/test_832_prefect_nersc_jobs.py", line 8, in <module>
test_launch_nersc_jobs_tomography()
File "home/splash_flows_globus/orchestration/_tests/test_832_prefect_nersc_jobs.py", line 4, in test_launch_nersc_jobs_tomography
launch_nersc_jobs_tomography()
File "home/splash_flows_globus/env/lib/python3.9/site-packages/prefect/flows.py", line 1231, in __call__
return enter_flow_run_engine_from_flow_call(
File "home/splash_flows_globus/env/lib/python3.9/site-packages/prefect/engine.py", line 293, in enter_flow_run_engine_from_flow_call
retval = from_sync.wait_for_call_in_loop_thread(
File "home/splash_flows_globus/env/lib/python3.9/site-packages/prefect/_internal/concurrency/api.py", line 218, in wait_for_call_in_loop_thread
return call.result()
File "home/splash_flows_globus/env/lib/python3.9/site-packages/prefect/_internal/concurrency/calls.py", line 318, in result
return self.future.result(timeout=timeout)
File "home/splash_flows_globus/env/lib/python3.9/site-packages/prefect/_internal/concurrency/calls.py", line 179, in result
return self.__get_result()
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/concurrent/futures/_base.py", line 390, in __get_result
raise self._exception
File "home/splash_flows_globus/env/lib/python3.9/site-packages/prefect/_internal/concurrency/calls.py", line 389, in _run_async
result = await coro
File "home/splash_flows_globus/env/lib/python3.9/site-packages/prefect/client/utilities.py", line 100, in with_injected_client
return await fn(*args, **kwargs)
File "home/splash_flows_globus/env/lib/python3.9/site-packages/prefect/engine.py", line 396, in create_then_begin_flow_run
return await state.result(fetch=True)
File "home/splash_flows_globus/env/lib/python3.9/site-packages/prefect/states.py", line 91, in _get_state_result
raise await get_state_exception(state)
File "home/splash_flows_globus/env/lib/python3.9/site-packages/prefect/engine.py", line 877, in orchestrate_flow_run
result = await flow_call.aresult()
File "home/splash_flows_globus/env/lib/python3.9/site-packages/prefect/_internal/concurrency/calls.py", line 327, in aresult
return await asyncio.wrap_future(self.future)
File "home/splash_flows_globus/env/lib/python3.9/site-packages/prefect/_internal/concurrency/calls.py", line 352, in _run_sync
result = self.fn(*self.args, **self.kwargs)
File "home/splash_flows_globus/orchestration/flows/bl832/move.py", line 280, in launch_nersc_jobs_tomography
job = client.perlmutter.submit_job(job_script)
File "home/splash_flows_globus/env/lib/python3.9/site-packages/sfapi_client/_sync/compute.py", line 34, in wrapper
return method(self, *args, **kwargs)
File "home/splash_flows_globus/env/lib/python3.9/site-packages/sfapi_client/_sync/compute.py", line 111, in submit_job
raise SfApiError(result["error"])
sfapi_client.exceptions.SfApiError: sbatch: error: No architecture specified, cannot estimate job costs.
sbatch: error: Batch job submission failed: Unspecified error
Is it a script issue? This script works in a notebook, so I haven't had issues with the script before. But I've never seen this error before. Could you direct me on what could have caused this error?