diff --git a/src/DS/sds.c b/src/DS/sds.c index 15f5079c39..10083b0d54 100644 --- a/src/DS/sds.c +++ b/src/DS/sds.c @@ -244,12 +244,30 @@ static int ds_sds_register_xmlDoc(struct ds_sds_session *session, xmlDoc* doc, x return -1; } - struct oscap_source *component_source = oscap_source_new_from_xmlDoc(new_doc, relative_filepath); + xmlChar *xml_buf = NULL; + int buf_size = 0; + xmlDocDumpMemory(new_doc, &xml_buf, &buf_size); + xmlFreeDoc(new_doc); + if (xml_buf == NULL || buf_size <= 0) { + oscap_seterr(OSCAP_EFAMILY_XML, "Failed to serialize extracted component '%s'", relative_filepath); + xmlFree(xml_buf); + return -1; + } + + char *buf = malloc((size_t)buf_size); + if (buf == NULL) { + xmlFree(xml_buf); + return -1; + } + memcpy(buf, xml_buf, (size_t)buf_size); + xmlFree(xml_buf); + + struct oscap_source *component_source = oscap_source_new_take_memory(buf, (size_t)buf_size, relative_filepath); if (ds_sds_session_register_component_source(session, relative_filepath, component_source) != 0) { oscap_source_free(component_source); } - return 0; // TODO: Return value of ds_sds_session_register_component_source(). (commit message) + return 0; } static int ds_sds_register_component(struct ds_sds_session *session, xmlDoc* doc, xmlNodePtr component_inner_root, const char* component_id, const char* target_filename_dirname, const char* relative_filepath) diff --git a/src/OVAL/oval_defModel.c b/src/OVAL/oval_defModel.c index 00b9043524..7320faf41d 100644 --- a/src/OVAL/oval_defModel.c +++ b/src/OVAL/oval_defModel.c @@ -226,7 +226,7 @@ static inline int _oval_definition_model_merge_source(struct oval_definition_mod { /* setup context */ struct oval_parser_context context; - context.reader = oscap_source_get_xmlTextReader(source); + context.reader = oscap_source_get_streaming_xmlTextReader(source); if (context.reader == NULL) { return -1; } diff --git a/src/OVAL/oval_directives.c b/src/OVAL/oval_directives.c index 55f110be9d..ffb528c38d 100644 --- a/src/OVAL/oval_directives.c +++ b/src/OVAL/oval_directives.c @@ -111,7 +111,7 @@ int oval_directives_model_import_source(struct oval_directives_model *model, str /* setup context */ struct oval_parser_context context; - context.reader = oscap_source_get_xmlTextReader(source); + context.reader = oscap_source_get_streaming_xmlTextReader(source); if (context.reader == NULL) { return -1; } diff --git a/src/OVAL/oval_sysModel.c b/src/OVAL/oval_sysModel.c index e5a1bb525d..95f858893b 100644 --- a/src/OVAL/oval_sysModel.c +++ b/src/OVAL/oval_sysModel.c @@ -235,7 +235,7 @@ int oval_syschar_model_import_source(struct oval_syschar_model *model, struct os int ret = 0; /* setup context */ struct oval_parser_context context; - context.reader = oscap_source_get_xmlTextReader(source); + context.reader = oscap_source_get_streaming_xmlTextReader(source); if (context.reader == NULL) { return -1; } diff --git a/src/OVAL/oval_varModel.c b/src/OVAL/oval_varModel.c index e9c025bedd..29cef3ac12 100644 --- a/src/OVAL/oval_varModel.c +++ b/src/OVAL/oval_varModel.c @@ -302,7 +302,7 @@ static int _oval_variable_model_parse(struct oval_variable_model *model, xmlText struct oval_variable_model *oval_variable_model_import_source(struct oscap_source *source) { int ret; - xmlTextReader *reader = oscap_source_get_xmlTextReader(source); + xmlTextReader *reader = oscap_source_get_streaming_xmlTextReader(source); if (reader == NULL) { return NULL; } diff --git a/src/OVAL/results/oval_resModel.c b/src/OVAL/results/oval_resModel.c index e8bcf22841..d2339a06f8 100644 --- a/src/OVAL/results/oval_resModel.c +++ b/src/OVAL/results/oval_resModel.c @@ -200,7 +200,7 @@ int oval_results_model_import_source(struct oval_results_model *model, struct os /* setup context */ struct oval_parser_context context; - context.reader = oscap_source_get_xmlTextReader(source); + context.reader = oscap_source_get_streaming_xmlTextReader(source); if (context.reader == NULL) { return -1; } diff --git a/src/XCCDF/xccdf_session.c b/src/XCCDF/xccdf_session.c index 5466402cae..da2e0e5bbf 100644 --- a/src/XCCDF/xccdf_session.c +++ b/src/XCCDF/xccdf_session.c @@ -33,6 +33,7 @@ #include #include "oscap_source.h" +#include "source/oscap_source_priv.h" #include #include #include @@ -792,6 +793,7 @@ static inline int _xccdf_session_load_xccdf_benchmark(struct xccdf_session *sess if (benchmark == NULL) { return 1; } + oscap_source_free_xmlDoc(session->xccdf.source); /* create the policy model */ session->xccdf.policy_model = xccdf_policy_model_new(benchmark); @@ -1216,6 +1218,7 @@ int xccdf_session_load_oval(struct xccdf_session *session) oscap_source_readable_origin(contents[idx]->source)); return 1; } + oscap_source_free_xmlDoc(contents[idx]->source); /* def_model -> session */ struct oval_agent_session *tmp_sess = oval_agent_new_session(tmp_def_model, contents[idx]->href); diff --git a/src/source/oscap_source.c b/src/source/oscap_source.c index 2acbe88b27..f578991cd4 100644 --- a/src/source/oscap_source.c +++ b/src/source/oscap_source.c @@ -25,6 +25,7 @@ #endif #include +#include #include #ifdef OS_WINDOWS #include @@ -163,6 +164,15 @@ void oscap_source_free_xmlDoc(struct oscap_source *source) } } +void oscap_source_free_memory(struct oscap_source *source) +{ + if (source != NULL) { + free(source->origin.memory); + source->origin.memory = NULL; + source->origin.memory_size = 0; + } +} + /** * Returns human readable description of oscap_source origin */ @@ -187,17 +197,80 @@ xmlTextReader *oscap_source_get_xmlTextReader(struct oscap_source *source) return reader; } +xmlTextReader *oscap_source_get_streaming_xmlTextReader(struct oscap_source *source) +{ + if (source->xml.doc != NULL) { + return oscap_source_get_xmlTextReader(source); + } + + if (source->origin.memory != NULL) { + if (bz2_memory_is_bzip(source->origin.memory, source->origin.memory_size)) { + return oscap_source_get_xmlTextReader(source); + } + xmlTextReader *reader = xmlReaderForMemory(source->origin.memory, + source->origin.memory_size, NULL, NULL, 0); + if (reader == NULL) { + oscap_seterr(OSCAP_EFAMILY_XML, "Unable to create streaming xmlTextReader for %s", + oscap_source_readable_origin(source)); + oscap_setxmlerr(xmlGetLastError()); + } + return reader; + } + + if (source->origin.filepath != NULL) { + int fd = open(source->origin.filepath, O_RDONLY); + if (fd == -1) { + oscap_seterr(OSCAP_EFAMILY_XML, "Unable to open file for streaming xmlTextReader: %s", + oscap_source_readable_origin(source)); + return NULL; + } + if (bz2_fd_is_bzip(fd)) { + close(fd); + return oscap_source_get_xmlTextReader(source); + } + struct stat st; + if (fstat(fd, &st) != 0 || st.st_size <= 0) { + close(fd); + return oscap_source_get_xmlTextReader(source); + } + size_t file_size = (size_t)st.st_size; + source->origin.memory = malloc(file_size); + if (source->origin.memory == NULL) { + close(fd); + return oscap_source_get_xmlTextReader(source); + } + size_t total_read = 0; + while (total_read < file_size) { + ssize_t n = read(fd, source->origin.memory + total_read, file_size - total_read); + if (n <= 0) break; + total_read += (size_t)n; + } + close(fd); + source->origin.memory_size = total_read; + xmlTextReader *reader = xmlReaderForMemory(source->origin.memory, + source->origin.memory_size, source->origin.filepath, NULL, 0); + if (reader == NULL) { + oscap_seterr(OSCAP_EFAMILY_XML, "Unable to create streaming xmlTextReader for %s", + oscap_source_readable_origin(source)); + oscap_setxmlerr(xmlGetLastError()); + } + return reader; + } + + oscap_seterr(OSCAP_EFAMILY_XML, "Unable to create streaming xmlTextReader for %s", + oscap_source_readable_origin(source)); + return NULL; +} + oscap_document_type_t oscap_source_get_scap_type(struct oscap_source *source) { if (source->scap_type == OSCAP_DOCUMENT_UNKNOWN) { - xmlTextReader *reader = oscap_source_get_xmlTextReader(source); + xmlTextReader *reader = oscap_source_get_streaming_xmlTextReader(source); if (reader == NULL) { - // the oscap error is already set return OSCAP_DOCUMENT_UNKNOWN; } if (oscap_determine_document_type_reader(reader, &(source->scap_type)) == -1) { oscap_seterr(OSCAP_EFAMILY_XML, "Unknown document type: '%s'", oscap_source_readable_origin(source)); - // in case of error scap_type must remain UNKNOWN assert(source->scap_type == OSCAP_DOCUMENT_UNKNOWN); } xmlFreeTextReader(reader); @@ -385,7 +458,7 @@ int oscap_source_validate_schematron(struct oscap_source *source) const char *oscap_source_get_schema_version(struct oscap_source *source) { if (source->origin.version == NULL) { - xmlTextReader *reader = oscap_source_get_xmlTextReader(source); + xmlTextReader *reader = oscap_source_get_streaming_xmlTextReader(source); if (reader == NULL) { return NULL; } diff --git a/src/source/oscap_source_priv.h b/src/source/oscap_source_priv.h index 4c4aa3d424..7492b96843 100644 --- a/src/source/oscap_source_priv.h +++ b/src/source/oscap_source_priv.h @@ -60,13 +60,30 @@ struct oscap_source *oscap_source_new_from_xmlDoc(xmlDoc *doc, const char *filep /** * Get an xmlTextReader assigned with this resource. The reader needs to be - * disposed by caller. + * disposed by caller. This variant walks over the in-memory DOM (loading + * it first if necessary). * @memberof oscap_source * @param source Resource to read the content * @returns xmlTextReader structure to read the content */ xmlTextReader *oscap_source_get_xmlTextReader(struct oscap_source *source); +/** + * Get a streaming xmlTextReader that does NOT require loading the full DOM + * into memory. For file-based sources, this reads directly from the file. + * For memory-based sources, it parses from the memory buffer. For sources + * that already have a cached DOM, it walks the DOM (same as get_xmlTextReader). + * + * This should be preferred over oscap_source_get_xmlTextReader() when the + * caller only needs sequential read access and does not need the DOM to + * persist after reading. + * + * @memberof oscap_source + * @param source Resource to read the content + * @returns xmlTextReader structure to read the content + */ +xmlTextReader *oscap_source_get_streaming_xmlTextReader(struct oscap_source *source); + /** * Get a DOM representation of this resource. The document ins still owned * by oscap_source. @@ -85,4 +102,13 @@ xmlDoc *oscap_source_get_xmlDoc(struct oscap_source *source); */ xmlDoc *oscap_source_pop_xmlDoc(struct oscap_source *source); +/** + * Release the memory buffer held by this source. After this call, the + * source can no longer be parsed from its raw memory. This is useful + * to reduce memory after the source has been fully consumed. + * @memberof oscap_source + * @param source Resource to release memory buffer from + */ +void oscap_source_free_memory(struct oscap_source *source); + #endif