summaryrefslogtreecommitdiff
path: root/gnu/packages/patches/libxml2-parent-pointers.patch
diff options
context:
space:
mode:
authorMarius Bakke <marius@gnu.org>2021-07-19 14:11:12 +0200
committerMarius Bakke <marius@gnu.org>2021-07-23 17:26:46 +0200
commitabe5ffbbfec5fc7f1cb2e1ebe93a7c17df4b176c (patch)
tree7b53201cc4054bb3f3dade9151dd4dca9a426e61 /gnu/packages/patches/libxml2-parent-pointers.patch
parent11fb5f45a847f44833d56c7a733c411edc2d3a37 (diff)
downloadguix-patches-abe5ffbbfec5fc7f1cb2e1ebe93a7c17df4b176c.tar
guix-patches-abe5ffbbfec5fc7f1cb2e1ebe93a7c17df4b176c.tar.gz
gnu: libxml2: Update to 2.9.12.
* gnu/packages/patches/libxml2-parent-pointers.patch, gnu/packages/patches/libxml2-terminating-newline.patch, gnu/packages/patches/libxml2-xpath-recursion-limit.patch: New files. * gnu/packages/patches/python-libxml2-python39-compat.patch: Delete file. * gnu/local.mk (dist_patch_DATA): Adjust accordingly. * gnu/packages/xml.scm (libxml2): Update to 2.9.12. (python-lxml)[source](modules, snippet): New fields.
Diffstat (limited to 'gnu/packages/patches/libxml2-parent-pointers.patch')
-rw-r--r--gnu/packages/patches/libxml2-parent-pointers.patch228
1 files changed, 228 insertions, 0 deletions
diff --git a/gnu/packages/patches/libxml2-parent-pointers.patch b/gnu/packages/patches/libxml2-parent-pointers.patch
new file mode 100644
index 0000000000..1f0615c512
--- /dev/null
+++ b/gnu/packages/patches/libxml2-parent-pointers.patch
@@ -0,0 +1,228 @@
+Fix a regression in 2.9.12 where some corrupt XML structures were handled
+incorrectly:
+
+ https://gitlab.gnome.org/GNOME/libxml2/-/issues/255
+
+This is an amalgamation of these upstream commits:
+
+ https://gitlab.gnome.org/GNOME/libxml2/-/commit/85b1792e37b131e7a51af98a37f92472e8de5f3f
+ https://gitlab.gnome.org/GNOME/libxml2/-/commit/13ad8736d294536da4cbcd70a96b0a2fbf47070c
+
+diff --git a/HTMLtree.c b/HTMLtree.c
+--- a/HTMLtree.c
++++ b/HTMLtree.c
+@@ -744,7 +744,7 @@ void
+ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+ xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
+ int format) {
+- xmlNodePtr root;
++ xmlNodePtr root, parent;
+ xmlAttrPtr attr;
+ const htmlElemDesc * info;
+
+@@ -755,6 +755,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+ }
+
+ root = cur;
++ parent = cur->parent;
+ while (1) {
+ switch (cur->type) {
+ case XML_HTML_DOCUMENT_NODE:
+@@ -762,13 +763,25 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+ if (((xmlDocPtr) cur)->intSubset != NULL) {
+ htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
+ }
+- if (cur->children != NULL) {
++ /* Always validate cur->parent when descending. */
++ if ((cur->parent == parent) && (cur->children != NULL)) {
++ parent = cur;
+ cur = cur->children;
+ continue;
+ }
+ break;
+
+ case XML_ELEMENT_NODE:
++ /*
++ * Some users like lxml are known to pass nodes with a corrupted
++ * tree structure. Fall back to a recursive call to handle this
++ * case.
++ */
++ if ((cur->parent != parent) && (cur->children != NULL)) {
++ htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
++ break;
++ }
++
+ /*
+ * Get specific HTML info for that node.
+ */
+@@ -817,6 +830,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+ (cur->name != NULL) &&
+ (cur->name[0] != 'p')) /* p, pre, param */
+ xmlOutputBufferWriteString(buf, "\n");
++ parent = cur;
+ cur = cur->children;
+ continue;
+ }
+@@ -825,9 +839,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+ (info != NULL) && (!info->isinline)) {
+ if ((cur->next->type != HTML_TEXT_NODE) &&
+ (cur->next->type != HTML_ENTITY_REF_NODE) &&
+- (cur->parent != NULL) &&
+- (cur->parent->name != NULL) &&
+- (cur->parent->name[0] != 'p')) /* p, pre, param */
++ (parent != NULL) &&
++ (parent->name != NULL) &&
++ (parent->name[0] != 'p')) /* p, pre, param */
+ xmlOutputBufferWriteString(buf, "\n");
+ }
+
+@@ -842,9 +856,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+ break;
+ if (((cur->name == (const xmlChar *)xmlStringText) ||
+ (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
+- ((cur->parent == NULL) ||
+- ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
+- (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
++ ((parent == NULL) ||
++ ((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
++ (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
+ xmlChar *buffer;
+
+ buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
+@@ -902,13 +916,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+ break;
+ }
+
+- /*
+- * The parent should never be NULL here but we want to handle
+- * corrupted documents gracefully.
+- */
+- if (cur->parent == NULL)
+- return;
+- cur = cur->parent;
++ cur = parent;
++ /* cur->parent was validated when descending. */
++ parent = cur->parent;
+
+ if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
+ (cur->type == XML_DOCUMENT_NODE)) {
+@@ -939,9 +949,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+ (cur->next != NULL)) {
+ if ((cur->next->type != HTML_TEXT_NODE) &&
+ (cur->next->type != HTML_ENTITY_REF_NODE) &&
+- (cur->parent != NULL) &&
+- (cur->parent->name != NULL) &&
+- (cur->parent->name[0] != 'p')) /* p, pre, param */
++ (parent != NULL) &&
++ (parent->name != NULL) &&
++ (parent->name[0] != 'p')) /* p, pre, param */
+ xmlOutputBufferWriteString(buf, "\n");
+ }
+ }
+diff --git a/xmlsave.c b/xmlsave.c
+--- a/xmlsave.c
++++ b/xmlsave.c
+@@ -847,7 +847,7 @@ htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+ static void
+ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+ int format = ctxt->format;
+- xmlNodePtr tmp, root, unformattedNode = NULL;
++ xmlNodePtr tmp, root, unformattedNode = NULL, parent;
+ xmlAttrPtr attr;
+ xmlChar *start, *end;
+ xmlOutputBufferPtr buf;
+@@ -856,6 +856,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+ buf = ctxt->buf;
+
+ root = cur;
++ parent = cur->parent;
+ while (1) {
+ switch (cur->type) {
+ case XML_DOCUMENT_NODE:
+@@ -868,7 +869,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+ break;
+
+ case XML_DOCUMENT_FRAG_NODE:
+- if (cur->children != NULL) {
++ /* Always validate cur->parent when descending. */
++ if ((cur->parent == parent) && (cur->children != NULL)) {
++ parent = cur;
+ cur = cur->children;
+ continue;
+ }
+@@ -887,7 +890,18 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+ break;
+
+ case XML_ELEMENT_NODE:
+- if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput))
++ /*
++ * Some users like lxml are known to pass nodes with a corrupted
++ * tree structure. Fall back to a recursive call to handle this
++ * case.
++ */
++ if ((cur->parent != parent) && (cur->children != NULL)) {
++ xmlNodeDumpOutputInternal(ctxt, cur);
++ break;
++ }
++
++ if ((ctxt->level > 0) && (ctxt->format == 1) &&
++ (xmlIndentTreeOutput))
+ xmlOutputBufferWrite(buf, ctxt->indent_size *
+ (ctxt->level > ctxt->indent_nr ?
+ ctxt->indent_nr : ctxt->level),
+@@ -942,6 +956,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+ xmlOutputBufferWrite(buf, 1, ">");
+ if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n");
+ if (ctxt->level >= 0) ctxt->level++;
++ parent = cur;
+ cur = cur->children;
+ continue;
+ }
+@@ -1058,13 +1073,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+ break;
+ }
+
+- /*
+- * The parent should never be NULL here but we want to handle
+- * corrupted documents gracefully.
+- */
+- if (cur->parent == NULL)
+- return;
+- cur = cur->parent;
++ cur = parent;
++ /* cur->parent was validated when descending. */
++ parent = cur->parent;
+
+ if (cur->type == XML_ELEMENT_NODE) {
+ if (ctxt->level > 0) ctxt->level--;
+diff --git a/xmlsave.c b/xmlsave.c
+--- a/xmlsave.c
++++ b/xmlsave.c
+@@ -890,6 +890,13 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+ break;
+
+ case XML_ELEMENT_NODE:
++ if ((cur != root) && (ctxt->format == 1) &&
++ (xmlIndentTreeOutput))
++ xmlOutputBufferWrite(buf, ctxt->indent_size *
++ (ctxt->level > ctxt->indent_nr ?
++ ctxt->indent_nr : ctxt->level),
++ ctxt->indent);
++
+ /*
+ * Some users like lxml are known to pass nodes with a corrupted
+ * tree structure. Fall back to a recursive call to handle this
+@@ -900,13 +907,6 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+ break;
+ }
+
+- if ((ctxt->level > 0) && (ctxt->format == 1) &&
+- (xmlIndentTreeOutput))
+- xmlOutputBufferWrite(buf, ctxt->indent_size *
+- (ctxt->level > ctxt->indent_nr ?
+- ctxt->indent_nr : ctxt->level),
+- ctxt->indent);
+-
+ xmlOutputBufferWrite(buf, 1, "<");
+ if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
+ xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);