diff --git a/mkconcore.py b/mkconcore.py index 3b20f8e4..db9d7f15 100644 --- a/mkconcore.py +++ b/mkconcore.py @@ -72,7 +72,8 @@ import stat import copy_with_port_portname import numpy as np -import shlex # Added for POSIX shell escaping +import shlex # Added for POSIX shell escaping +import posixpath # input validation helper def safe_name(value, context, allow_path=False): @@ -92,6 +93,36 @@ def safe_name(value, context, allow_path=False): if re.search(pattern, value): raise ValueError(f"Unsafe {context}: '{value}' contains illegal characters.") return value + +def _normalize_relpath(value): + return value.replace("\\", "/") + +def safe_relpath(value, context): + """ + Validates a relative path for node source files. + Allows subdirectories, but blocks traversal, absolute paths, and drive roots. + """ + if not value: + raise ValueError(f"{context} cannot be empty") + normalized = _normalize_relpath(value) + safe_name(normalized, context, allow_path=True) + if normalized.startswith("/") or normalized.startswith("~"): + raise ValueError(f"Unsafe {context}: absolute paths are not allowed.") + if re.match(r"^[A-Za-z]:", normalized): + raise ValueError(f"Unsafe {context}: drive paths are not allowed.") + if ":" in normalized: + raise ValueError(f"Unsafe {context}: ':' is not allowed in relative paths.") + parts = normalized.split("/") + if any(part == "" for part in parts): + raise ValueError(f"Unsafe {context}: empty path segment is not allowed.") + if any(part == ".." for part in parts): + raise ValueError(f"Unsafe {context}: path traversal ('..') is not allowed.") + return normalized + +def ensure_parent_dir(path): + parent = os.path.dirname(path) + if parent: + os.makedirs(parent, exist_ok=True) MKCONCORE_VER = "22-09-18" @@ -248,14 +279,15 @@ def cleanup_script_files(): node_label = re.sub(r'(\s+|\n)', ' ', node_label) #Validate node labels - if ':' in node_label: - container_part, source_part = node_label.split(':', 1) - safe_name(container_part, f"Node container name '{container_part}'") - safe_name(source_part, f"Node source file '{source_part}'") - else: - safe_name(node_label, f"Node label '{node_label}'") - # Explicitly reject incorrect format to prevent later crashes and ambiguity - raise ValueError(f"Invalid node label '{node_label}': expected format 'container:source' with a ':' separator.") + if ':' in node_label: + container_part, source_part = node_label.split(':', 1) + safe_name(container_part, f"Node container name '{container_part}'") + normalized_source = safe_relpath(source_part, f"Node source file '{source_part}'") + node_label = f"{container_part}:{normalized_source}" + else: + safe_name(node_label, f"Node label '{node_label}'") + # Explicitly reject incorrect format to prevent later crashes and ambiguity + raise ValueError(f"Invalid node label '{node_label}': expected format 'container:source' with a ':' separator.") nodes_dict[node['id']] = node_label node_id_to_label_map[node['id']] = node_label.split(':')[0] @@ -373,10 +405,10 @@ def cleanup_script_files(): logging.warning(f"Error processing edge for parameter aggregation: {e}") # --- Now, run the specialization for each node that has aggregated parameters --- -if node_edge_params: - logging.info("Running script specialization process...") - specialized_scripts_output_dir = os.path.abspath(os.path.join(outdir, "src")) - os.makedirs(specialized_scripts_output_dir, exist_ok=True) +if node_edge_params: + logging.info("Running script specialization process...") + specialized_scripts_output_dir = os.path.abspath(os.path.join(outdir, "src")) + os.makedirs(specialized_scripts_output_dir, exist_ok=True) for node_id, params_list in node_edge_params.items(): current_node_full_label = nodes_dict[node_id] @@ -388,23 +420,33 @@ def cleanup_script_files(): if not original_script or "." not in original_script: continue # Skip if not a script file - template_script_full_path = os.path.join(sourcedir, original_script) - if not os.path.exists(template_script_full_path): - logging.error(f"Cannot specialize: Original script '{template_script_full_path}' not found in '{sourcedir}'.") - continue - - new_script_basename = copy_with_port_portname.run_specialization_script( - template_script_full_path, - specialized_scripts_output_dir, - params_list, - python_executable, - copy_script_py_path - ) - - if new_script_basename: - # Update nodes_dict to point to the new comprehensive specialized script - nodes_dict[node_id] = f"{container_name}:{new_script_basename}" - logging.info(f"Node ID '{node_id}' ('{container_name}') updated to use specialized script '{new_script_basename}'.") + template_script_full_path = os.path.join(sourcedir, original_script) + if not os.path.exists(template_script_full_path): + logging.error(f"Cannot specialize: Original script '{template_script_full_path}' not found in '{sourcedir}'.") + continue + + script_subdir = posixpath.dirname(original_script) + node_output_dir = specialized_scripts_output_dir + if script_subdir: + node_output_dir = os.path.join(specialized_scripts_output_dir, script_subdir) + os.makedirs(node_output_dir, exist_ok=True) + + new_script_basename = copy_with_port_portname.run_specialization_script( + template_script_full_path, + node_output_dir, + params_list, + python_executable, + copy_script_py_path + ) + + if new_script_basename: + # Update nodes_dict to point to the new comprehensive specialized script + if script_subdir: + new_script_relpath = posixpath.join(script_subdir, new_script_basename) + else: + new_script_relpath = new_script_basename + nodes_dict[node_id] = f"{container_name}:{new_script_relpath}" + logging.info(f"Node ID '{node_id}' ('{container_name}') updated to use specialized script '{new_script_relpath}'.") else: logging.error(f"Failed to generate specialized script for node ID '{node_id}'. It will retain its original script.") @@ -446,22 +488,25 @@ def cleanup_script_files(): else: dockername, langext = sourcecode, "" - script_target_path = os.path.join(outdir, "src", sourcecode) + script_target_path = os.path.join(outdir, "src", sourcecode) + ensure_parent_dir(script_target_path) # If the script was specialized, it's already in outdir/src. If not, copy from sourcedir. if node_id_key not in node_edge_params: script_source_path = os.path.join(sourcedir, sourcecode) - if os.path.exists(script_source_path): - shutil.copy2(script_source_path, script_target_path) - else: - logging.error(f"Script '{sourcecode}' not found in sourcedir '{sourcedir}'") + if os.path.exists(script_source_path): + shutil.copy2(script_source_path, script_target_path) + else: + logging.error(f"Script '{sourcecode}' not found in sourcedir '{sourcedir}'") # The rest of the file handling (Dockerfiles, .dir) uses 'dockername', # which is now derived from the specialized script name, maintaining consistency. if concoretype == "docker": custom_dockerfile = f"Dockerfile.{dockername}" - if os.path.exists(os.path.join(sourcedir, custom_dockerfile)): - shutil.copy2(os.path.join(sourcedir, custom_dockerfile), os.path.join(outdir, "src", custom_dockerfile)) + if os.path.exists(os.path.join(sourcedir, custom_dockerfile)): + dest_dockerfile = os.path.join(outdir, "src", custom_dockerfile) + ensure_parent_dir(dest_dockerfile) + shutil.copy2(os.path.join(sourcedir, custom_dockerfile), dest_dockerfile) dir_for_node = f"{dockername}.dir" if os.path.isdir(os.path.join(sourcedir, dir_for_node)): @@ -640,11 +685,15 @@ def cleanup_script_files(): try: containername, sourcecode = node_label.split(':', 1) if not sourcecode or "." not in sourcecode: continue - dockername = os.path.splitext(sourcecode)[0] - with open(os.path.join(outdir, "src", f"{dockername}.iport"), "w") as fport: - fport.write(str(ports['iport']).replace("'" + prefixedgenode, "'")) - with open(os.path.join(outdir, "src", f"{dockername}.oport"), "w") as fport: - fport.write(str(ports['oport']).replace("'" + prefixedgenode, "'")) + dockername = os.path.splitext(sourcecode)[0] + iport_path = os.path.join(outdir, "src", f"{dockername}.iport") + oport_path = os.path.join(outdir, "src", f"{dockername}.oport") + ensure_parent_dir(iport_path) + ensure_parent_dir(oport_path) + with open(iport_path, "w") as fport: + fport.write(str(ports['iport']).replace("'" + prefixedgenode, "'")) + with open(oport_path, "w") as fport: + fport.write(str(ports['oport']).replace("'" + prefixedgenode, "'")) except ValueError: continue @@ -653,10 +702,11 @@ def cleanup_script_files(): if (concoretype=="docker"): for node in nodes_dict: containername,sourcecode = nodes_dict[node].split(':') - if len(sourcecode)!=0 and sourcecode.find(".")!=-1: #3/28/21 - dockername,langext = sourcecode.split(".") - if not os.path.exists(outdir+"/src/Dockerfile."+dockername): # 3/30/21 - try: + if len(sourcecode)!=0 and sourcecode.find(".")!=-1: #3/28/21 + dockername,langext = sourcecode.split(".") + dockerfile_path = os.path.join(outdir, "src", f"Dockerfile.{dockername}") + if not os.path.exists(dockerfile_path): # 3/30/21 + try: if langext=="py": src_path = CONCOREPATH+"/Dockerfile.py" logging.info("assuming .py extension for Dockerfile") @@ -677,8 +727,9 @@ def cleanup_script_files(): except: logging.error(f"{CONCOREPATH} is not correct path to concore") quit() - with open(outdir+"/src/Dockerfile."+dockername,"w") as fcopy: - fcopy.write(source_content) + ensure_parent_dir(dockerfile_path) + with open(dockerfile_path,"w") as fcopy: + fcopy.write(source_content) if langext=="py": fcopy.write('CMD ["python", "-i", "'+sourcecode+'"]\n') if langext=="m": @@ -695,7 +746,7 @@ def cleanup_script_files(): containername,sourcecode = nodes_dict[node].split(':') if len(sourcecode)!=0 and sourcecode.find(".")!=-1: #3/28/21 dockername,langext = sourcecode.split(".") - fbuild.write("mkdir docker-"+dockername+"\n") + fbuild.write("mkdir -p docker-"+dockername+"\n") fbuild.write("cd docker-"+dockername+"\n") fbuild.write("cp ../src/Dockerfile."+dockername+" Dockerfile\n") #copy sourcefiles from ./src into corresponding directories @@ -922,36 +973,48 @@ def cleanup_script_files(): if concoretype=="posix": fbuild.write('#!/bin/bash' + "\n") -for node in nodes_dict: - containername,sourcecode = nodes_dict[node].split(':') - if len(sourcecode)!=0: - if sourcecode.find(".")==-1: - logging.error("cannot pull container "+sourcecode+" with control core type "+concoretype) #3/28/21 - quit() - dockername,langext = sourcecode.split(".") - fbuild.write('mkdir '+containername+"\n") - if concoretype == "windows": - fbuild.write("copy .\\src\\"+sourcecode+" .\\"+containername+"\\"+sourcecode+"\n") - if langext == "py": - fbuild.write("copy .\\src\\concore.py .\\" + containername + "\\concore.py\n") - elif langext == "cpp": - # 6/22/21 - fbuild.write("copy .\\src\\concore.hpp .\\" + containername + "\\concore.hpp\n") - elif langext == "v": - # 6/25/21 - fbuild.write("copy .\\src\\concore.v .\\" + containername + "\\concore.v\n") - elif langext == "m": # 4/2/21 - fbuild.write("copy .\\src\\concore_*.m .\\" + containername + "\\\n") - fbuild.write("copy .\\src\\import_concore.m .\\" + containername + "\\\n") - fbuild.write("copy .\\src\\"+dockername+".iport .\\"+containername+"\\concore.iport\n") - fbuild.write("copy .\\src\\"+dockername+".oport .\\"+containername+"\\concore.oport\n") - #include data files in here if they exist - if os.path.isdir(sourcedir+"/"+dockername+".dir"): - fbuild.write("copy .\\src\\"+dockername+".dir\\*.* .\\"+containername+"\n") - else: - fbuild.write("cp ./src/"+sourcecode+" ./"+containername+"/"+sourcecode+"\n") - if langext == "py": - fbuild.write("cp ./src/concore.py ./"+containername+"/concore.py\n") +for node in nodes_dict: + containername,sourcecode = nodes_dict[node].split(':') + if len(sourcecode)!=0: + if sourcecode.find(".")==-1: + logging.error("cannot pull container "+sourcecode+" with control core type "+concoretype) #3/28/21 + quit() + dockername,langext = sourcecode.split(".") + if concoretype == "windows": + fbuild.write('mkdir '+containername+"\n") + else: + fbuild.write("mkdir -p ./"+containername+"\n") + source_subdir = posixpath.dirname(sourcecode) + if source_subdir: + if concoretype == "windows": + source_subdir_win = source_subdir.replace("/", "\\") + fbuild.write("mkdir .\\"+containername+"\\"+source_subdir_win+"\n") + else: + fbuild.write("mkdir -p ./"+containername+"/"+source_subdir+"\n") + if concoretype == "windows": + sourcecode_win = sourcecode.replace("/", "\\") + dockername_win = dockername.replace("/", "\\") + fbuild.write("copy .\\src\\"+sourcecode_win+" .\\"+containername+"\\"+sourcecode_win+"\n") + if langext == "py": + fbuild.write("copy .\\src\\concore.py .\\" + containername + "\\concore.py\n") + elif langext == "cpp": + # 6/22/21 + fbuild.write("copy .\\src\\concore.hpp .\\" + containername + "\\concore.hpp\n") + elif langext == "v": + # 6/25/21 + fbuild.write("copy .\\src\\concore.v .\\" + containername + "\\concore.v\n") + elif langext == "m": # 4/2/21 + fbuild.write("copy .\\src\\concore_*.m .\\" + containername + "\\\n") + fbuild.write("copy .\\src\\import_concore.m .\\" + containername + "\\\n") + fbuild.write("copy .\\src\\"+dockername_win+".iport .\\"+containername+"\\concore.iport\n") + fbuild.write("copy .\\src\\"+dockername_win+".oport .\\"+containername+"\\concore.oport\n") + #include data files in here if they exist + if os.path.isdir(sourcedir+"/"+dockername+".dir"): + fbuild.write("copy .\\src\\"+dockername_win+".dir\\*.* .\\"+containername+"\n") + else: + fbuild.write("cp ./src/"+sourcecode+" ./"+containername+"/"+sourcecode+"\n") + if langext == "py": + fbuild.write("cp ./src/concore.py ./"+containername+"/concore.py\n") elif langext == "cpp": fbuild.write("cp ./src/concore.hpp ./"+containername+"/concore.hpp\n") elif langext == "v": diff --git a/tests/test_cli.py b/tests/test_cli.py index 6aa78109..a835f8df 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -101,6 +101,30 @@ def test_run_command_default_type(self): else: self.assertTrue(Path('out/build').exists()) + def test_run_command_subdir_source(self): + with self.runner.isolated_filesystem(temp_dir=self.temp_dir): + result = self.runner.invoke(cli, ['init', 'test-project']) + self.assertEqual(result.exit_code, 0) + + subdir = Path('test-project/src/subdir') + subdir.mkdir(parents=True, exist_ok=True) + shutil.move('test-project/src/script.py', subdir / 'script.py') + + workflow_path = Path('test-project/workflow.graphml') + content = workflow_path.read_text() + content = content.replace('N1:script.py', 'N1:subdir/script.py') + workflow_path.write_text(content) + + result = self.runner.invoke(cli, [ + 'run', + 'test-project/workflow.graphml', + '--source', 'test-project/src', + '--output', 'out', + '--type', 'posix' + ]) + self.assertEqual(result.exit_code, 0) + self.assertTrue(Path('out/src/subdir/script.py').exists()) + def test_run_command_existing_output(self): with self.runner.isolated_filesystem(temp_dir=self.temp_dir): result = self.runner.invoke(cli, ['init', 'test-project'])