Index: lxml-4.9.3/doc/FAQ.txt
===================================================================
--- lxml-4.9.3.orig/doc/FAQ.txt
+++ lxml-4.9.3/doc/FAQ.txt
@@ -1105,9 +1105,9 @@ useless for the data commonly sent throu
 can simply be disabled, which rules out several types of
 denial of service attacks at once.  This also involves an attack
 that reads local files from the server, as XML entities can be
-defined to expand into their content.   Consequently, version
-1.2 of the SOAP standard explicitly disallows entity references
-in the XML stream.
+defined to expand into the content of external resources.
+Consequently, version 1.2 of the SOAP standard explicitly
+disallows entity references in the XML stream.
 
 To disable entity expansion, use an XML parser that is configured
 with the option ``resolve_entities=False``.  Then, after (or
@@ -1115,7 +1115,11 @@ while) parsing the document, use ``root.
 recursively search for entity references.  If it contains any,
 reject the entire input document with a suitable error response.
 In lxml 3.x, you can also use the new DTD introspection API to
-apply your own restrictions on input documents.
+apply your own restrictions on input documents.  Since version 5.x,
+lxml disables the expansion of external entities (XXE) by default.
+If you really want to allow loading external files into XML documents
+using this functionality, you have to explicitly set
+``resolve_entities=True``.
 
 Another attack to consider is compression bombs.  If you allow
 compressed input into your web service, attackers can try to send
Index: lxml-4.9.3/src/lxml/includes/xmlparser.pxd
===================================================================
--- lxml-4.9.3.orig/src/lxml/includes/xmlparser.pxd
+++ lxml-4.9.3/src/lxml/includes/xmlparser.pxd
@@ -1,9 +1,9 @@
 from libc.string cimport const_char
 
 from lxml.includes.tree cimport (
-    xmlDoc, xmlNode, xmlDict, xmlDtd, xmlChar, const_xmlChar)
+    xmlDoc, xmlNode, xmlEntity, xmlDict, xmlDtd, xmlChar, const_xmlChar)
 from lxml.includes.tree cimport xmlInputReadCallback, xmlInputCloseCallback
-from lxml.includes.xmlerror cimport xmlError, xmlStructuredErrorFunc
+from lxml.includes.xmlerror cimport xmlError, xmlStructuredErrorFunc, xmlErrorLevel
 
 
 cdef extern from "libxml/parser.h":
@@ -47,11 +47,15 @@ cdef extern from "libxml/parser.h":
 
     ctypedef void (*referenceSAXFunc)(void * ctx, const_xmlChar* name)
 
+    ctypedef xmlEntity* (*getEntitySAXFunc)(void* ctx, const_xmlChar* name) noexcept
+
+
     cdef int XML_SAX2_MAGIC
 
 cdef extern from "libxml/tree.h":
     ctypedef struct xmlParserInput:
         int line
+        int col
         int length
         const_xmlChar* base
         const_xmlChar* cur
@@ -76,6 +80,7 @@ cdef extern from "libxml/tree.h":
         charactersSAXFunc               characters
         cdataBlockSAXFunc               cdataBlock
         referenceSAXFunc                reference
+        getEntitySAXFunc                getEntity
         commentSAXFunc                  comment
         processingInstructionSAXFunc	processingInstruction
         startDocumentSAXFunc            startDocument
@@ -150,6 +155,8 @@ cdef extern from "libxml/parser.h":
         int inSubset
         int charset
         xmlParserInput* input
+        int inputNr
+        xmlParserInput** inputTab
 
     ctypedef enum xmlParserOption:
         XML_PARSE_RECOVER = 1 # recover on errors
@@ -212,6 +219,12 @@ cdef extern from "libxml/parser.h":
                                    char* filename, const_char* encoding,
                                    int options) nogil
 
+    cdef void xmlErrParser(xmlParserCtxt* ctxt, xmlNode* node,
+                           int domain, int code, xmlErrorLevel level,
+                           const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
+                           int int1, const char *msg, ...)
+
+
 # iterparse:
 
     cdef xmlParserCtxt* xmlCreatePushParserCtxt(xmlSAXHandler* sax,
@@ -233,6 +246,8 @@ cdef extern from "libxml/parser.h":
     cdef xmlExternalEntityLoader xmlGetExternalEntityLoader() nogil
     cdef void xmlSetExternalEntityLoader(xmlExternalEntityLoader f) nogil
 
+    cdef xmlEntity* xmlSAX2GetEntity(void* ctxt, const_xmlChar* name) noexcept
+
 # DTDs:
 
     cdef xmlDtd* xmlParseDTD(const_xmlChar* ExternalID, const_xmlChar* SystemID) nogil
Index: lxml-4.9.3/src/lxml/parser.pxi
===================================================================
--- lxml-4.9.3.orig/src/lxml/parser.pxi
+++ lxml-4.9.3/src/lxml/parser.pxi
@@ -794,6 +794,7 @@ cdef inline int _fixHtmlDictNodeNames(tr
         c_attr = c_attr.next
     return 0
 
+
 @cython.internal
 cdef class _BaseParser:
     cdef ElementClassLookup _class_lookup
@@ -806,6 +807,7 @@ cdef class _BaseParser:
     cdef bint _remove_pis
     cdef bint _strip_cdata
     cdef bint _collect_ids
+    cdef bint _resolve_external_entities
     cdef XMLSchema _schema
     cdef bytes _filename
     cdef readonly object target
@@ -814,7 +816,7 @@ cdef class _BaseParser:
 
     def __init__(self, int parse_options, bint for_html, XMLSchema schema,
                  remove_comments, remove_pis, strip_cdata, collect_ids,
-                 target, encoding):
+                 target, encoding, bint resolve_external_entities=True):
         cdef tree.xmlCharEncodingHandler* enchandler
         cdef int c_encoding
         if not isinstance(self, (XMLParser, HTMLParser)):
@@ -827,6 +829,7 @@ cdef class _BaseParser:
         self._remove_pis = remove_pis
         self._strip_cdata = strip_cdata
         self._collect_ids = collect_ids
+        self._resolve_external_entities = resolve_external_entities
         self._schema = schema
 
         self._resolvers = _ResolverRegistry()
@@ -906,6 +909,8 @@ cdef class _BaseParser:
         if self._strip_cdata:
             # hard switch-off for CDATA nodes => makes them plain text
             pctxt.sax.cdataBlock = NULL
+        if not self._resolve_external_entities:
+            pctxt.sax.getEntity = _getInternalEntityOnly
 
     cdef int _registerHtmlErrorHandler(self, xmlparser.xmlParserCtxt* c_ctxt) except -1:
         cdef xmlparser.xmlSAXHandler* sax = c_ctxt.sax
@@ -1207,6 +1212,58 @@ cdef class _BaseParser:
             context.cleanup()
 
 
+cdef tree.xmlEntity* _getInternalEntityOnly(void* ctxt, const_xmlChar* name) noexcept:
+    """
+    Callback function to intercept the entity resolution when external entity loading is disabled.
+    """
+    cdef tree.xmlEntity* entity = xmlparser.xmlSAX2GetEntity(ctxt, name)
+    if not entity:
+        return NULL
+    if entity.etype not in (
+            tree.xmlEntityType.XML_EXTERNAL_GENERAL_PARSED_ENTITY,
+            tree.xmlEntityType.XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
+            tree.xmlEntityType.XML_EXTERNAL_PARAMETER_ENTITY):
+        return entity
+
+    # Reject all external entities and fail the parsing instead. There is currently
+    # no way in libxml2 to just prevent the entity resolution in this case.
+    cdef xmlerror.xmlError c_error
+    cdef xmlerror.xmlStructuredErrorFunc err_func
+    cdef xmlparser.xmlParserInput* parser_input
+    cdef void* err_context
+
+    c_ctxt = <xmlparser.xmlParserCtxt *> ctxt
+    err_func = xmlerror.xmlStructuredError
+    if err_func:
+        parser_input = c_ctxt.input
+        # Copied from xmlVErrParser() in libxml2: get current input from stack.
+        if parser_input and parser_input.filename is NULL and c_ctxt.inputNr > 1:
+            parser_input = c_ctxt.inputTab[c_ctxt.inputNr - 2]
+
+        c_error = xmlerror.xmlError(
+            domain=xmlerror.xmlErrorDomain.XML_FROM_PARSER,
+            code=xmlerror.xmlParserErrors.XML_ERR_EXT_ENTITY_STANDALONE,
+            level=xmlerror.xmlErrorLevel.XML_ERR_FATAL,
+            message=b"External entity resolution is disabled for security reasons "
+                    b"when resolving '&%s;'. Use 'XMLParser(resolve_entities=True)' "
+                    b"if you consider it safe to enable it.",
+            file=parser_input.filename,
+            node=entity,
+            str1=<char*> name,
+            str2=NULL,
+            str3=NULL,
+            line=parser_input.line if parser_input else 0,
+            int1=0,
+            int2=parser_input.col if parser_input else 0,
+        )
+        err_context = xmlerror.xmlStructuredErrorContext
+        err_func(err_context, &c_error)
+
+    c_ctxt.wellFormed = 0
+    # The entity was looked up and does not need to be freed.
+    return NULL
+
+
 cdef void _initSaxDocument(void* ctxt) with gil:
     xmlparser.xmlSAX2StartDocument(ctxt)
     c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
@@ -1508,12 +1565,15 @@ cdef class XMLParser(_FeedParser):
     - strip_cdata        - replace CDATA sections by normal text content (default: True)
     - compact            - save memory for short text content (default: True)
     - collect_ids        - use a hash table of XML IDs for fast access (default: True, always True with DTD validation)
-    - resolve_entities   - replace entities by their text value (default: True)
     - huge_tree          - disable security restrictions and support very deep trees
                            and very long text content (only affects libxml2 2.7+)
 
     Other keyword arguments:
 
+    - resolve_entities - replace entities by their text value: False for keeping the
+          entity references, True for resolving them, and 'internal' for resolving
+          internal definitions only (no external file/URL access).
+          The default used to be True and was changed to 'internal' in lxml 5.0.
     - encoding - override the document encoding
     - target   - a parser target object that will receive the parse events
     - schema   - an XMLSchema to validate against
@@ -1525,10 +1585,11 @@ cdef class XMLParser(_FeedParser):
     def __init__(self, *, encoding=None, attribute_defaults=False,
                  dtd_validation=False, load_dtd=False, no_network=True,
                  ns_clean=False, recover=False, XMLSchema schema=None,
-                 huge_tree=False, remove_blank_text=False, resolve_entities=True,
+                 huge_tree=False, remove_blank_text=False, resolve_entities='internal',
                  remove_comments=False, remove_pis=False, strip_cdata=True,
                  collect_ids=True, target=None, compact=True):
         cdef int parse_options
+        cdef bint resolve_external = True
         parse_options = _XML_DEFAULT_PARSE_OPTIONS
         if load_dtd:
             parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD
@@ -1553,12 +1614,14 @@ cdef class XMLParser(_FeedParser):
             parse_options = parse_options ^ xmlparser.XML_PARSE_COMPACT
         if not resolve_entities:
             parse_options = parse_options ^ xmlparser.XML_PARSE_NOENT
+        elif resolve_entities == 'internal':
+            resolve_external = False
         if not strip_cdata:
             parse_options = parse_options ^ xmlparser.XML_PARSE_NOCDATA
 
         _BaseParser.__init__(self, parse_options, 0, schema,
                              remove_comments, remove_pis, strip_cdata,
-                             collect_ids, target, encoding)
+                             collect_ids, target, encoding, resolve_external)
 
 
 cdef class XMLPullParser(XMLParser):
Index: lxml-4.9.3/src/lxml/tests/test_etree.py
===================================================================
--- lxml-4.9.3.orig/src/lxml/tests/test_etree.py
+++ lxml-4.9.3/src/lxml/tests/test_etree.py
@@ -12,11 +12,14 @@ from __future__ import absolute_import
 from collections import OrderedDict
 import os.path
 import unittest
+import contextlib
 import copy
 import sys
 import re
 import gc
 import operator
+import shutil
+import tempfile
 import textwrap
 import tempfile
 import zlib
@@ -1691,6 +1694,84 @@ class ETreeOnlyTestCase(HelperTestCase):
         self.assertEqual(_bytes('<doc>&myentity;</doc>'),
                           tostring(root))
 
+    @contextlib.contextmanager
+    def _xml_test_file(self, name, content=b'<evil>XML</evil>'):
+        temp_dir = tempfile.mkdtemp()
+        try:
+            xml_file = os.path.join(temp_dir, name)
+            with open(xml_file, 'wb') as tmpfile:
+                tmpfile.write(content)
+            yield xml_file
+        finally:
+            shutil.rmtree(temp_dir)
+
+    def test_entity_parse_external(self):
+        fromstring = self.etree.fromstring
+        tostring = self.etree.tostring
+        parser = self.etree.XMLParser(resolve_entities=True)
+
+        with self._xml_test_file("entity.xml") as entity_file:
+            xml = '''
+            <!DOCTYPE doc [
+                <!ENTITY my_external_entity SYSTEM "%s">
+            ]>
+            <doc>&my_external_entity;</doc>
+            ''' % path2url(entity_file)
+            root = fromstring(xml, parser)
+
+        self.assertEqual(_bytes('<doc><evil>XML</evil></doc>'),
+                          tostring(root))
+        self.assertEqual(root.tag, 'doc')
+        self.assertEqual(root[0].tag, 'evil')
+        self.assertEqual(root[0].text, 'XML')
+        self.assertEqual(root[0].tail, None)
+
+    def test_entity_parse_external_no_resolve(self):
+        fromstring = self.etree.fromstring
+        parser = self.etree.XMLParser(resolve_entities=False)
+        Entity = self.etree.Entity
+
+        with self._xml_test_file("entity.xml") as entity_file:
+            xml = '''
+            <!DOCTYPE doc [
+                <!ENTITY my_external_entity SYSTEM "%s">
+            ]>
+            <doc>&my_external_entity;</doc>
+            ''' % path2url(entity_file)
+            root = fromstring(xml, parser)
+
+        self.assertEqual(root[0].tag, Entity)
+        self.assertEqual(root[0].text, "&my_external_entity;")
+
+    def test_entity_parse_no_external_default(self):
+        fromstring = self.etree.fromstring
+
+        with self._xml_test_file("entity.xml") as entity_file:
+            xml = '''
+            <!DOCTYPE doc [
+                <!ENTITY my_failing_external_entity SYSTEM "%s">
+            ]>
+            <doc>&my_failing_external_entity;</doc>
+            ''' % path2url(entity_file)
+
+            try:
+                fromstring(xml)
+            except self.etree.XMLSyntaxError as exc:
+                exception = exc
+            else:
+                self.assertTrue(False, "XMLSyntaxError was not raised")
+
+        self.assertIn("my_failing_external_entity", str(exception))
+        self.assertTrue(exception.error_log)
+        # Depending on the libxml2 version, we get different errors here,
+        # not necessarily the one that lxml produced. But it should fail either way.
+        for error in exception.error_log:
+            if "my_failing_external_entity" in error.message:
+                self.assertEqual(5, error.line)
+                break
+        else:
+            self.assertFalse("entity error not found in parser error log")
+
     def test_entity_restructure(self):
         xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp "&#160;"> ]>
             <root>
