#!/usr/bin/python3 # Copyright 2017, 2022 by Steve Litt # Expat license: https://directory.fsf.org/wiki/License:Expat import sys import re import xml.etree.ElementTree as ET ERRCOUNT=0 def string_to_enable_special_chars(): st='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+='' st+=']>' return st def printit(thestring): f=sys.stdout print(thestring, file=f) def disclaimer(fname): printit('\n=======================================') st='Disclaimer: This program replaced file {}\'s' printit(st.format(fname)) printit(' line with a special html5') printit('DOCTYPE line while evaluating. The original') printit('file has not been changed. It\'s possible') printit('this program might be inaccurate if the') printit('original file had a non-html5 DOCTYPE line.') printit('=======================================\n') def abort_on_crazy_doctype(thestring): printit('') st='This file\'s doctype line is: {}' printit(st.format(thestring)) st='The XML Checker program works ' st+='only on doctypes that:' printit(st) printit(' Begin with "" character') printit(' That ">" character is at the end') printit('This error might not indicate mal-formed XML,') printit('It might just mean that the XML Checker program') printit('can\'t be run on this file with its current') printit('') != 1: abort_on_crazy_doctype(thestring) elif st[-1] != '>': abort_on_crazy_doctype(thestring) else: return False def file2string(prepender, fname): outstring = prepender + "\n" inf = open(fname, "r") inlines = inf.readlines() for line in inlines: if eligible(line): outstring += line return outstring fname = sys.argv[1] print('\nTesting for well formedness {} ...\n'.format(fname)) htmlstring=file2string(string_to_enable_special_chars(), fname) try: tree = ET.fromstring(htmlstring) except ET.ParseError as err: (line, col) = err.position code = str(err.code) printit('ERROR: {} !!!!!!'.format(str(err))) ERRCOUNT += 1 disclaimer(fname) sys.exit(1) else: st='CONGRATULATIONS: {} is well formed XML'.format(fname) printit(st.format(fname) + '!!!!!!') disclaimer(fname)