From abe5ffbbfec5fc7f1cb2e1ebe93a7c17df4b176c Mon Sep 17 00:00:00 2001 From: Marius Bakke Date: Mon, 19 Jul 2021 14:11:12 +0200 Subject: gnu: libxml2: Update to 2.9.12. * gnu/packages/patches/libxml2-parent-pointers.patch, gnu/packages/patches/libxml2-terminating-newline.patch, gnu/packages/patches/libxml2-xpath-recursion-limit.patch: New files. * gnu/packages/patches/python-libxml2-python39-compat.patch: Delete file. * gnu/local.mk (dist_patch_DATA): Adjust accordingly. * gnu/packages/xml.scm (libxml2): Update to 2.9.12. (python-lxml)[source](modules, snippet): New fields. --- gnu/packages/patches/libxml2-parent-pointers.patch | 228 +++++++++++++++++++++ .../patches/libxml2-terminating-newline.patch | 33 +++ .../patches/libxml2-xpath-recursion-limit.patch | 20 ++ .../patches/python-libxml2-python39-compat.patch | 94 --------- gnu/packages/xml.scm | 25 ++- 5 files changed, 297 insertions(+), 103 deletions(-) create mode 100644 gnu/packages/patches/libxml2-parent-pointers.patch create mode 100644 gnu/packages/patches/libxml2-terminating-newline.patch create mode 100644 gnu/packages/patches/libxml2-xpath-recursion-limit.patch delete mode 100644 gnu/packages/patches/python-libxml2-python39-compat.patch (limited to 'gnu/packages') diff --git a/gnu/packages/patches/libxml2-parent-pointers.patch b/gnu/packages/patches/libxml2-parent-pointers.patch new file mode 100644 index 0000000000..1f0615c512 --- /dev/null +++ b/gnu/packages/patches/libxml2-parent-pointers.patch @@ -0,0 +1,228 @@ +Fix a regression in 2.9.12 where some corrupt XML structures were handled +incorrectly: + + https://gitlab.gnome.org/GNOME/libxml2/-/issues/255 + +This is an amalgamation of these upstream commits: + + https://gitlab.gnome.org/GNOME/libxml2/-/commit/85b1792e37b131e7a51af98a37f92472e8de5f3f + https://gitlab.gnome.org/GNOME/libxml2/-/commit/13ad8736d294536da4cbcd70a96b0a2fbf47070c + +diff --git a/HTMLtree.c b/HTMLtree.c +--- a/HTMLtree.c ++++ b/HTMLtree.c +@@ -744,7 +744,7 @@ void + htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED, + int format) { +- xmlNodePtr root; ++ xmlNodePtr root, parent; + xmlAttrPtr attr; + const htmlElemDesc * info; + +@@ -755,6 +755,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + } + + root = cur; ++ parent = cur->parent; + while (1) { + switch (cur->type) { + case XML_HTML_DOCUMENT_NODE: +@@ -762,13 +763,25 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + if (((xmlDocPtr) cur)->intSubset != NULL) { + htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL); + } +- if (cur->children != NULL) { ++ /* Always validate cur->parent when descending. */ ++ if ((cur->parent == parent) && (cur->children != NULL)) { ++ parent = cur; + cur = cur->children; + continue; + } + break; + + case XML_ELEMENT_NODE: ++ /* ++ * Some users like lxml are known to pass nodes with a corrupted ++ * tree structure. Fall back to a recursive call to handle this ++ * case. ++ */ ++ if ((cur->parent != parent) && (cur->children != NULL)) { ++ htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); ++ break; ++ } ++ + /* + * Get specific HTML info for that node. + */ +@@ -817,6 +830,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + (cur->name != NULL) && + (cur->name[0] != 'p')) /* p, pre, param */ + xmlOutputBufferWriteString(buf, "\n"); ++ parent = cur; + cur = cur->children; + continue; + } +@@ -825,9 +839,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + (info != NULL) && (!info->isinline)) { + if ((cur->next->type != HTML_TEXT_NODE) && + (cur->next->type != HTML_ENTITY_REF_NODE) && +- (cur->parent != NULL) && +- (cur->parent->name != NULL) && +- (cur->parent->name[0] != 'p')) /* p, pre, param */ ++ (parent != NULL) && ++ (parent->name != NULL) && ++ (parent->name[0] != 'p')) /* p, pre, param */ + xmlOutputBufferWriteString(buf, "\n"); + } + +@@ -842,9 +856,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + break; + if (((cur->name == (const xmlChar *)xmlStringText) || + (cur->name != (const xmlChar *)xmlStringTextNoenc)) && +- ((cur->parent == NULL) || +- ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) && +- (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) { ++ ((parent == NULL) || ++ ((xmlStrcasecmp(parent->name, BAD_CAST "script")) && ++ (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) { + xmlChar *buffer; + + buffer = xmlEncodeEntitiesReentrant(doc, cur->content); +@@ -902,13 +916,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + break; + } + +- /* +- * The parent should never be NULL here but we want to handle +- * corrupted documents gracefully. +- */ +- if (cur->parent == NULL) +- return; +- cur = cur->parent; ++ cur = parent; ++ /* cur->parent was validated when descending. */ ++ parent = cur->parent; + + if ((cur->type == XML_HTML_DOCUMENT_NODE) || + (cur->type == XML_DOCUMENT_NODE)) { +@@ -939,9 +949,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + (cur->next != NULL)) { + if ((cur->next->type != HTML_TEXT_NODE) && + (cur->next->type != HTML_ENTITY_REF_NODE) && +- (cur->parent != NULL) && +- (cur->parent->name != NULL) && +- (cur->parent->name[0] != 'p')) /* p, pre, param */ ++ (parent != NULL) && ++ (parent->name != NULL) && ++ (parent->name[0] != 'p')) /* p, pre, param */ + xmlOutputBufferWriteString(buf, "\n"); + } + } +diff --git a/xmlsave.c b/xmlsave.c +--- a/xmlsave.c ++++ b/xmlsave.c +@@ -847,7 +847,7 @@ htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + static void + xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + int format = ctxt->format; +- xmlNodePtr tmp, root, unformattedNode = NULL; ++ xmlNodePtr tmp, root, unformattedNode = NULL, parent; + xmlAttrPtr attr; + xmlChar *start, *end; + xmlOutputBufferPtr buf; +@@ -856,6 +856,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + buf = ctxt->buf; + + root = cur; ++ parent = cur->parent; + while (1) { + switch (cur->type) { + case XML_DOCUMENT_NODE: +@@ -868,7 +869,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + break; + + case XML_DOCUMENT_FRAG_NODE: +- if (cur->children != NULL) { ++ /* Always validate cur->parent when descending. */ ++ if ((cur->parent == parent) && (cur->children != NULL)) { ++ parent = cur; + cur = cur->children; + continue; + } +@@ -887,7 +890,18 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + break; + + case XML_ELEMENT_NODE: +- if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput)) ++ /* ++ * Some users like lxml are known to pass nodes with a corrupted ++ * tree structure. Fall back to a recursive call to handle this ++ * case. ++ */ ++ if ((cur->parent != parent) && (cur->children != NULL)) { ++ xmlNodeDumpOutputInternal(ctxt, cur); ++ break; ++ } ++ ++ if ((ctxt->level > 0) && (ctxt->format == 1) && ++ (xmlIndentTreeOutput)) + xmlOutputBufferWrite(buf, ctxt->indent_size * + (ctxt->level > ctxt->indent_nr ? + ctxt->indent_nr : ctxt->level), +@@ -942,6 +956,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + xmlOutputBufferWrite(buf, 1, ">"); + if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n"); + if (ctxt->level >= 0) ctxt->level++; ++ parent = cur; + cur = cur->children; + continue; + } +@@ -1058,13 +1073,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + break; + } + +- /* +- * The parent should never be NULL here but we want to handle +- * corrupted documents gracefully. +- */ +- if (cur->parent == NULL) +- return; +- cur = cur->parent; ++ cur = parent; ++ /* cur->parent was validated when descending. */ ++ parent = cur->parent; + + if (cur->type == XML_ELEMENT_NODE) { + if (ctxt->level > 0) ctxt->level--; +diff --git a/xmlsave.c b/xmlsave.c +--- a/xmlsave.c ++++ b/xmlsave.c +@@ -890,6 +890,13 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + break; + + case XML_ELEMENT_NODE: ++ if ((cur != root) && (ctxt->format == 1) && ++ (xmlIndentTreeOutput)) ++ xmlOutputBufferWrite(buf, ctxt->indent_size * ++ (ctxt->level > ctxt->indent_nr ? ++ ctxt->indent_nr : ctxt->level), ++ ctxt->indent); ++ + /* + * Some users like lxml are known to pass nodes with a corrupted + * tree structure. Fall back to a recursive call to handle this +@@ -900,13 +907,6 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + break; + } + +- if ((ctxt->level > 0) && (ctxt->format == 1) && +- (xmlIndentTreeOutput)) +- xmlOutputBufferWrite(buf, ctxt->indent_size * +- (ctxt->level > ctxt->indent_nr ? +- ctxt->indent_nr : ctxt->level), +- ctxt->indent); +- + xmlOutputBufferWrite(buf, 1, "<"); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); diff --git a/gnu/packages/patches/libxml2-terminating-newline.patch b/gnu/packages/patches/libxml2-terminating-newline.patch new file mode 100644 index 0000000000..3f5c88dd4e --- /dev/null +++ b/gnu/packages/patches/libxml2-terminating-newline.patch @@ -0,0 +1,33 @@ +Fix a regression in 2.9.12 where serializing empty HTML documents would +not add a terminating newline. + + https://gitlab.gnome.org/GNOME/libxml2/-/issues/266 + +Taken from upstream: + + https://gitlab.gnome.org/GNOME/libxml2/-/commit/92d9ab4c28842a09ca2b76d3ff2f933e01b6cd6f + +diff --git a/HTMLtree.c b/HTMLtree.c +--- a/HTMLtree.c ++++ b/HTMLtree.c +@@ -763,11 +763,15 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + if (((xmlDocPtr) cur)->intSubset != NULL) { + htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL); + } +- /* Always validate cur->parent when descending. */ +- if ((cur->parent == parent) && (cur->children != NULL)) { +- parent = cur; +- cur = cur->children; +- continue; ++ if (cur->children != NULL) { ++ /* Always validate cur->parent when descending. */ ++ if (cur->parent == parent) { ++ parent = cur; ++ cur = cur->children; ++ continue; ++ } ++ } else { ++ xmlOutputBufferWriteString(buf, "\n"); + } + break; + diff --git a/gnu/packages/patches/libxml2-xpath-recursion-limit.patch b/gnu/packages/patches/libxml2-xpath-recursion-limit.patch new file mode 100644 index 0000000000..051196c635 --- /dev/null +++ b/gnu/packages/patches/libxml2-xpath-recursion-limit.patch @@ -0,0 +1,20 @@ +Fix recursion accounting in XPath expressions: + + https://gitlab.gnome.org/GNOME/libxml2/-/issues/264 + +Taken from upstream: + + https://gitlab.gnome.org/GNOME/libxml2/-/commit/3e1aad4fe584747fd7d17cc7b2863a78e2d21a77 + +diff --git a/xpath.c b/xpath.c +--- a/xpath.c ++++ b/xpath.c +@@ -10983,7 +10983,7 @@ xmlXPathCompileExpr(xmlXPathParserContextPtr ctxt, int sort) { + } + + if (xpctxt != NULL) +- xpctxt->depth -= 1; ++ xpctxt->depth -= 10; + } + + /** diff --git a/gnu/packages/patches/python-libxml2-python39-compat.patch b/gnu/packages/patches/python-libxml2-python39-compat.patch deleted file mode 100644 index a707ea3434..0000000000 --- a/gnu/packages/patches/python-libxml2-python39-compat.patch +++ /dev/null @@ -1,94 +0,0 @@ -https://gitlab.gnome.org/GNOME/libxml2/-/commit/e4fb36841800038c289997432ca547c9bfef9db1.patch - -From e4fb36841800038c289997432ca547c9bfef9db1 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= -Date: Fri, 28 Feb 2020 12:48:14 +0100 -Subject: [PATCH] Parenthesize Py_Check() in ifs - -In C, if expressions should be parenthesized. -PyLong_Check, PyUnicode_Check etc. happened to expand to a parenthesized -expression before, but that's not API to rely on. - -Since Python 3.9.0a4 it needs to be parenthesized explicitly. - -Fixes https://gitlab.gnome.org/GNOME/libxml2/issues/149 ---- - python/libxml.c | 4 ++-- - python/types.c | 12 ++++++------ - 2 files changed, 8 insertions(+), 8 deletions(-) - -diff --git a/python/libxml.c b/python/libxml.c -index bc676c4e..81e709f3 100644 ---- a/python/libxml.c -+++ b/python/libxml.c -@@ -294,7 +294,7 @@ xmlPythonFileReadRaw (void * context, char * buffer, int len) { - lenread = PyBytes_Size(ret); - data = PyBytes_AsString(ret); - #ifdef PyUnicode_Check -- } else if PyUnicode_Check (ret) { -+ } else if (PyUnicode_Check (ret)) { - #if PY_VERSION_HEX >= 0x03030000 - Py_ssize_t size; - const char *tmp; -@@ -359,7 +359,7 @@ xmlPythonFileRead (void * context, char * buffer, int len) { - lenread = PyBytes_Size(ret); - data = PyBytes_AsString(ret); - #ifdef PyUnicode_Check -- } else if PyUnicode_Check (ret) { -+ } else if (PyUnicode_Check (ret)) { - #if PY_VERSION_HEX >= 0x03030000 - Py_ssize_t size; - const char *tmp; -diff --git a/python/types.c b/python/types.c -index c2bafeb1..ed284ec7 100644 ---- a/python/types.c -+++ b/python/types.c -@@ -602,16 +602,16 @@ libxml_xmlXPathObjectPtrConvert(PyObject *obj) - if (obj == NULL) { - return (NULL); - } -- if PyFloat_Check (obj) { -+ if (PyFloat_Check (obj)) { - ret = xmlXPathNewFloat((double) PyFloat_AS_DOUBLE(obj)); -- } else if PyLong_Check(obj) { -+ } else if (PyLong_Check(obj)) { - #ifdef PyLong_AS_LONG - ret = xmlXPathNewFloat((double) PyLong_AS_LONG(obj)); - #else - ret = xmlXPathNewFloat((double) PyInt_AS_LONG(obj)); - #endif - #ifdef PyBool_Check -- } else if PyBool_Check (obj) { -+ } else if (PyBool_Check (obj)) { - - if (obj == Py_True) { - ret = xmlXPathNewBoolean(1); -@@ -620,14 +620,14 @@ libxml_xmlXPathObjectPtrConvert(PyObject *obj) - ret = xmlXPathNewBoolean(0); - } - #endif -- } else if PyBytes_Check (obj) { -+ } else if (PyBytes_Check (obj)) { - xmlChar *str; - - str = xmlStrndup((const xmlChar *) PyBytes_AS_STRING(obj), - PyBytes_GET_SIZE(obj)); - ret = xmlXPathWrapString(str); - #ifdef PyUnicode_Check -- } else if PyUnicode_Check (obj) { -+ } else if (PyUnicode_Check (obj)) { - #if PY_VERSION_HEX >= 0x03030000 - xmlChar *str; - const char *tmp; -@@ -650,7 +650,7 @@ libxml_xmlXPathObjectPtrConvert(PyObject *obj) - ret = xmlXPathWrapString(str); - #endif - #endif -- } else if PyList_Check (obj) { -+ } else if (PyList_Check (obj)) { - int i; - PyObject *node; - xmlNodePtr cur; --- -GitLab - diff --git a/gnu/packages/xml.scm b/gnu/packages/xml.scm index f07f9b91b1..092d238626 100644 --- a/gnu/packages/xml.scm +++ b/gnu/packages/xml.scm @@ -184,14 +184,17 @@ hierarchical form with variable field lengths.") (define-public libxml2 (package (name "libxml2") - (version "2.9.10") + (version "2.9.12") (source (origin (method url-fetch) (uri (string-append "ftp://xmlsoft.org/libxml2/libxml2-" version ".tar.gz")) (sha256 (base32 - "07xynh8hcxb2yb1fs051xrgszjvj37wnxvxgsj10rzmqzy9y3zma")))) + "14hxwzmf5xqppx77z7i0ni9lpzg1a84dqpf8j8l1fvy570g6imn8")) + (patches (search-patches "libxml2-parent-pointers.patch" + "libxml2-terminating-newline.patch" + "libxml2-xpath-recursion-limit.patch")))) (build-system gnu-build-system) (outputs '("out" "static" "doc")) (arguments @@ -217,8 +220,7 @@ hierarchical form with variable field lengths.") ;; file such that Libtool does the right thing when both ;; the shared and static variants are available. (substitute* (string-append src "/lib/libxml2.la") - (("^old_library='libxml2.a'") "old_library=''")) - #t)))))) + (("^old_library='libxml2.a'") "old_library=''")))))))) (home-page "http://www.xmlsoft.org/") (synopsis "C parser for XML") (inputs `(("xz" ,xz))) @@ -374,8 +376,7 @@ It uses libxml2 to access the XML files.") (source (origin (inherit (package-source libxml2)) (patches - (append (search-patches "python-libxml2-python39-compat.patch" - "python-libxml2-utf8.patch") + (append (search-patches "python-libxml2-utf8.patch") (origin-patches (package-source libxml2)))))) (build-system python-build-system) (outputs '("out")) @@ -397,8 +398,7 @@ It uses libxml2 to access the XML files.") (format #f "ROOT = r'~a'" libxml2)) ;; For 'iconv.h'. (("/opt/include") - (string-append glibc "/include")))) - #t))))) + (string-append glibc "/include"))))))))) (inputs `(("libxml2" ,libxml2))) (synopsis "Python bindings for the libxml2 library"))) @@ -2603,7 +2603,14 @@ because lxml.etree already has its own implementation of XPath 1.0.") (method url-fetch) (uri (pypi-uri "lxml" version)) (sha256 - (base32 "0s14r1w2x9sdlcsw8mxiqgw4rz5zs5lpqpxrfyn4a1mkndqqbdrr")))) + (base32 "0s14r1w2x9sdlcsw8mxiqgw4rz5zs5lpqpxrfyn4a1mkndqqbdrr")) + ;; Adapt a test to libxml2 2.9.12, taken from this commit: + ;; https://github.com/lxml/lxml/commit/852ed1092bd80b6b9a51db24371047e + (modules '((guix build utils))) + (snippet + '(substitute* "src/lxml/tests/test_etree.py" + (("self\\.assertEqual\\(\\{'hha': None\\}, el\\.nsmap\\)") + "self.assertEqual({}, el.nsmap)"))))) (build-system python-build-system) (arguments `(#:phases (modify-phases %standard-phases -- cgit v1.2.3