from lxml import etree

# Path to your XML dump file
xml_file = 'dumpfile.xml'
output_file = 'outputfile.txt'

# Open the XML file and output file
with open(xml_file, 'rb') as f, open(output_file, 'w', encoding='utf-8') as out:
    # Parse the XML
    context = etree.iterparse(f, events=('end',), tag='{http://www.mediawiki.org/xml/export-0.10/}page')
    
    for event, elem in context:
        title = elem.findtext('{http://www.mediawiki.org/xml/export-0.10/}title')
        text = elem.findtext('.//{http://www.mediawiki.org/xml/export-0.10/}text')
        
        if text:
            out.write(f'== {title} ==\n{text}\n\n')
        
        # Clear the element to free up memory
        elem.clear()
        while elem.getprevious() is not None:
            del elem.getparent()[0]
