/* Kweelt - a framework to query XML documents * Copyright (C) 2000, Arnaud Sahuguet (Arnaud.Sahuguet@polytechnique.org) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ package xacute.util; import java.io.PrintWriter; import java.util.StringTokenizer; import org.xml.sax.AttributeList; import org.xml.sax.DocumentHandler; import org.xml.sax.Locator; import org.xml.sax.InputSource; import org.apache.xerces.parsers.SAXParser; import java.io.FileReader; import java.util.Iterator; import java.util.TreeMap; import java.security.MessageDigest; /** * This class computes the hash of an XML document. * Two equivalent documents should have the same hash. * issues: * - blank spaces * - order or attributes * We deliberately ignore DTD -- default values, etc. -- **/ public class HashHandler implements DocumentHandler { MessageDigest md; TreeMap sorted; public HashHandler() throws Exception { this.md = MessageDigest.getInstance("MD5"); this.sorted = new TreeMap(); } public String toString() { byte[] hash = md.digest(); StringBuffer buf = new StringBuffer(hash.length * 2); for (int i = 0; i < hash.length; i++) { if (((int) hash[i] & 0xff) < 0x10) buf.append("0"); buf.append(Long.toString((int) hash[i] & 0xff, 16)); } return buf.toString(); } public void characters(char[] ch, int start, int length) { String data = new String(ch, start, length).replace('\n',' ').replace('\r',' ').replace('\t',' ').trim(); if (data.length() > 0) md.update( data.getBytes()); return; } public void endDocument() { return; } public void endElement(String name) { return; } public void ignorableWhitespace(char[] ch, int start, int length) { return; } public void processingInstruction(String target, String data) { return; } public void setDocumentLocator(Locator locator) { return; } public void startDocument() { return; } public void startElement(String name, AttributeList atts) { md.update(name.getBytes()); if (atts.getLength() > 0) { sorted.clear(); for (int i=0; i < atts.getLength(); i++) { String attName = atts.getName(i); String attValue = atts.getValue(i); sorted.put(attName, attValue); } Iterator keyIter = sorted.keySet().iterator(); for( ; keyIter.hasNext(); ) { String attName = (String) keyIter.next(); md.update(attName.getBytes()); String attValue = (String) sorted.get(attName); md.update(attValue.getBytes()); } } return; } public static void main(String args[]) throws Exception { SAXParser parser = new SAXParser(); HashHandler hasher = new HashHandler(); parser.setDocumentHandler(hasher); parser.parse( new InputSource( new FileReader(args[0]) ) ); System.out.println( hasher.toString()); } }