1   
2   
3   
4   
5   
6   
7   
8   package org.dom4j.io;
9   
10  import java.io.File;
11  import java.io.InputStream;
12  import java.io.Reader;
13  import java.net.URL;
14  import java.util.HashMap;
15  import java.util.Iterator;
16  import java.util.Map;
17  
18  import org.dom4j.Document;
19  import org.dom4j.DocumentException;
20  import org.dom4j.DocumentFactory;
21  
22  import org.xml.sax.InputSource;
23  import org.xml.sax.SAXException;
24  import org.xml.sax.XMLReader;
25  
26  /***
27   * The SAXModifier reads, modifies and writes XML documents using SAX.
28   * 
29   * <p>
30   * Registered {@link ElementModifier}objects can provide modifications to (part
31   * of) the xml tree, while the document is still being processed. This makes it
32   * possible to change large xml documents without having them in memory.
33   * </p>
34   * 
35   * <p>
36   * The modified document is written when the {@link XMLWriter}is specified.
37   * </p>
38   * 
39   * @author Wonne Keysers (Realsoftware.be)
40   * 
41   * @see org.dom4j.io.SAXReader
42   * @see org.dom4j.io.XMLWriter
43   */
44  public class SAXModifier {
45      private XMLWriter xmlWriter;
46  
47      private XMLReader xmlReader;
48  
49      private boolean pruneElements;
50  
51      private SAXModifyReader modifyReader;
52  
53      private HashMap modifiers = new HashMap();
54  
55      /***
56       * Creates a new modifier. <br>
57       * The XMLReader to parse the source will be created via the
58       * org.xml.sax.driver system property or JAXP if the system property is not
59       * set.
60       */
61      public SAXModifier() {
62      }
63  
64      /***
65       * Creates a new modifier. <br>
66       * The XMLReader to parse the source will be created via the
67       * org.xml.sax.driver system property or JAXP if the system property is not
68       * set.
69       * 
70       * @param pruneElements
71       *            Set to true when the modified document must NOT be kept in
72       *            memory.
73       */
74      public SAXModifier(boolean pruneElements) {
75          this.pruneElements = pruneElements;
76      }
77  
78      /***
79       * Creates a new modifier that will the specified {@link
80       * org.xml.sax.XMLReader} to parse the source.
81       * 
82       * @param xmlReader
83       *            The XMLReader to use
84       */
85      public SAXModifier(XMLReader xmlReader) {
86          this.xmlReader = xmlReader;
87      }
88  
89      /***
90       * Creates a new modifier that will the specified {@link
91       * org.xml.sax.XMLReader} to parse the source.
92       * 
93       * @param xmlReader
94       *            The XMLReader to use
95       * @param pruneElements
96       *            Set to true when the modified document must NOT be kept in
97       *            memory.
98       */
99      public SAXModifier(XMLReader xmlReader, boolean pruneElements) {
100         this.xmlReader = xmlReader;
101     }
102 
103     /***
104      * Reads a Document from the given {@link java.io.File}and writes it to the
105      * specified {@link XMLWriter}using SAX. Registered {@linkElementModifier}
106      * objects are invoked on the fly.
107      * 
108      * @param source
109      *            is the <code>File</code> to read from.
110      * 
111      * @return the newly created Document instance
112      * 
113      * @throws DocumentException
114      *             DocumentException org.dom4j.DocumentException} if an error
115      *             occurs during parsing.
116      */
117     public Document modify(File source) throws DocumentException {
118         try {
119             return installModifyReader().read(source);
120         } catch (SAXModifyException ex) {
121             Throwable cause = ex.getCause();
122             throw new DocumentException(cause.getMessage(), cause);
123         }
124     }
125 
126     /***
127      * Reads a Document from the given {@link org.xml.sax.InputSource}and
128      * writes it to the specified {@link XMLWriter}using SAX. Registered
129      * {@link ElementModifier}objects are invoked on the fly.
130      * 
131      * @param source
132      *            is the <code>org.xml.sax.InputSource</code> to read from.
133      * 
134      * @return the newly created Document instance
135      * 
136      * @throws DocumentException
137      *             DocumentException org.dom4j.DocumentException} if an error
138      *             occurs during parsing.
139      */
140     public Document modify(InputSource source) throws DocumentException {
141         try {
142             return installModifyReader().read(source);
143         } catch (SAXModifyException ex) {
144             Throwable cause = ex.getCause();
145             throw new DocumentException(cause.getMessage(), cause);
146         }
147     }
148 
149     /***
150      * Reads a Document from the given {@link java.io.InputStream}and writes it
151      * to the specified {@link XMLWriter}using SAX. Registered {@link
152      * ElementModifier} objects are invoked on the fly.
153      * 
154      * @param source
155      *            is the <code>java.io.InputStream</code> to read from.
156      * 
157      * @return the newly created Document instance
158      * 
159      * @throws DocumentException
160      *             DocumentException org.dom4j.DocumentException} if an error
161      *             occurs during parsing.
162      */
163     public Document modify(InputStream source) throws DocumentException {
164         try {
165             return installModifyReader().read(source);
166         } catch (SAXModifyException ex) {
167             Throwable cause = ex.getCause();
168             throw new DocumentException(cause.getMessage(), cause);
169         }
170     }
171 
172     /***
173      * Reads a Document from the given {@link java.io.InputStream}and writes it
174      * to the specified {@link XMLWriter}using SAX. Registered {@link
175      * ElementModifier} objects are invoked on the fly.
176      * 
177      * @param source
178      *            is the <code>java.io.InputStream</code> to read from.
179      * @param systemId
180      *            DOCUMENT ME!
181      * 
182      * @return the newly created Document instance
183      * 
184      * @throws DocumentException
185      *             DocumentException org.dom4j.DocumentException} if an error
186      *             occurs during parsing.
187      */
188     public Document modify(InputStream source, String systemId)
189             throws DocumentException {
190         try {
191             return installModifyReader().read(source);
192         } catch (SAXModifyException ex) {
193             Throwable cause = ex.getCause();
194             throw new DocumentException(cause.getMessage(), cause);
195         }
196     }
197 
198     /***
199      * Reads a Document from the given {@link java.io.Reader}and writes it to
200      * the specified {@link XMLWriter}using SAX. Registered {@link
201      * ElementModifier} objects are invoked on the fly.
202      * 
203      * @param source
204      *            is the <code>java.io.Reader</code> to read from.
205      * 
206      * @return the newly created Document instance
207      * 
208      * @throws DocumentException
209      *             DocumentException org.dom4j.DocumentException} if an error
210      *             occurs during parsing.
211      */
212     public Document modify(Reader source) throws DocumentException {
213         try {
214             return installModifyReader().read(source);
215         } catch (SAXModifyException ex) {
216             Throwable cause = ex.getCause();
217             throw new DocumentException(cause.getMessage(), cause);
218         }
219     }
220 
221     /***
222      * Reads a Document from the given {@link java.io.Reader}and writes it to
223      * the specified {@link XMLWriter}using SAX. Registered {@link
224      * ElementModifier} objects are invoked on the fly.
225      * 
226      * @param source
227      *            is the <code>java.io.Reader</code> to read from.
228      * @param systemId
229      *            DOCUMENT ME!
230      * 
231      * @return the newly created Document instance
232      * 
233      * @throws DocumentException
234      *             DocumentException org.dom4j.DocumentException} if an error
235      *             occurs during parsing.
236      */
237     public Document modify(Reader source, String systemId)
238             throws DocumentException {
239         try {
240             return installModifyReader().read(source);
241         } catch (SAXModifyException ex) {
242             Throwable cause = ex.getCause();
243             throw new DocumentException(cause.getMessage(), cause);
244         }
245     }
246 
247     /***
248      * Reads a Document from the given {@link java.net.URL}and writes it to the
249      * specified {@link XMLWriter}using SAX. Registered {@linkElementModifier}
250      * objects are invoked on the fly.
251      * 
252      * @param source
253      *            is the <code>java.net.URL</code> to read from.
254      * 
255      * @return the newly created Document instance
256      * 
257      * @throws DocumentException
258      *             DocumentException org.dom4j.DocumentException} if an error
259      *             occurs during parsing.
260      */
261     public Document modify(URL source) throws DocumentException {
262         try {
263             return installModifyReader().read(source);
264         } catch (SAXModifyException ex) {
265             Throwable cause = ex.getCause();
266             throw new DocumentException(cause.getMessage(), cause);
267         }
268     }
269 
270     /***
271      * Reads a Document from the given URL or filename and writes it to the
272      * specified {@link XMLWriter}using SAX. Registered {@linkElementModifier}
273      * objects are invoked on the fly.
274      * 
275      * @param source
276      *            is the URL or filename to read from.
277      * 
278      * @return the newly created Document instance
279      * 
280      * @throws DocumentException
281      *             DocumentException org.dom4j.DocumentException} if an error
282      *             occurs during parsing.
283      */
284     public Document modify(String source) throws DocumentException {
285         try {
286             return installModifyReader().read(source);
287         } catch (SAXModifyException ex) {
288             Throwable cause = ex.getCause();
289             throw new DocumentException(cause.getMessage(), cause);
290         }
291     }
292 
293     /***
294      * Adds the {@link ElementModifier}to be called when the specified element
295      * path is encounted while parsing the source.
296      * 
297      * @param path
298      *            The element path to be handled
299      * @param modifier
300      *            The {@link ElementModifier}to be called by the event based
301      *            processor.
302      */
303     public void addModifier(String path, ElementModifier modifier) {
304         this.modifiers.put(path, modifier);
305     }
306 
307     /***
308      * Removes all registered {@link ElementModifier}instances from the event
309      * based processor.
310      */
311     public void resetModifiers() {
312         this.modifiers.clear();
313         getSAXModifyReader().resetHandlers();
314     }
315 
316     /***
317      * Removes the {@link ElementModifier}from the event based processor, for
318      * the specified element path.
319      * 
320      * @param path
321      *            The path to remove the {@link ElementModifier}for.
322      */
323     public void removeModifier(String path) {
324         this.modifiers.remove(path);
325         getSAXModifyReader().removeHandler(path);
326     }
327 
328     /***
329      * Get the {@link org.dom4j.DocumentFactory}used to create the DOM4J
330      * document structure
331      * 
332      * @return <code>DocumentFactory</code> that will be used
333      */
334     public DocumentFactory getDocumentFactory() {
335         return getSAXModifyReader().getDocumentFactory();
336     }
337 
338     /***
339      * Sets the {@link org.dom4j.DocumentFactory}used to create the DOM4J
340      * document tree.
341      * 
342      * @param factory
343      *            <code>DocumentFactory</code> to be used
344      */
345     public void setDocumentFactory(DocumentFactory factory) {
346         getSAXModifyReader().setDocumentFactory(factory);
347     }
348 
349     /***
350      * Returns the current {@link XMLWriter}.
351      * 
352      * @return XMLWriter
353      */
354     public XMLWriter getXMLWriter() {
355         return this.xmlWriter;
356     }
357 
358     /***
359      * Sets the {@link XMLWriter}used to write the modified document.
360      * 
361      * @param writer
362      *            The writer to use.
363      */
364     public void setXMLWriter(XMLWriter writer) {
365         this.xmlWriter = writer;
366     }
367 
368     /***
369      * Returns true when xml elements are not kept in memory while parsing. The
370      * {@link org.dom4j.Document}returned by the modify methods will be null.
371      * 
372      * @return Returns the pruneElements.
373      */
374     public boolean isPruneElements() {
375         return pruneElements;
376     }
377 
378     private SAXReader installModifyReader() throws DocumentException {
379         try {
380             SAXModifyReader reader = getSAXModifyReader();
381 
382             if (isPruneElements()) {
383                 modifyReader.setDispatchHandler(new PruningDispatchHandler());
384             }
385 
386             reader.resetHandlers();
387 
388             Iterator modifierIt = this.modifiers.entrySet().iterator();
389 
390             while (modifierIt.hasNext()) {
391                 Map.Entry entry = (Map.Entry) modifierIt.next();
392 
393                 SAXModifyElementHandler handler = new SAXModifyElementHandler(
394                         (ElementModifier) entry.getValue());
395                 reader.addHandler((String) entry.getKey(), handler);
396             }
397 
398             reader.setXMLWriter(getXMLWriter());
399             reader.setXMLReader(getXMLReader());
400 
401             return reader;
402         } catch (SAXException ex) {
403             throw new DocumentException(ex.getMessage(), ex);
404         }
405     }
406 
407     private XMLReader getXMLReader() throws SAXException {
408         if (this.xmlReader == null) {
409             xmlReader = SAXHelper.createXMLReader(false);
410         }
411 
412         return this.xmlReader;
413     }
414 
415     private SAXModifyReader getSAXModifyReader() {
416         if (modifyReader == null) {
417             modifyReader = new SAXModifyReader();
418         }
419 
420         return modifyReader;
421     }
422 }
423 
424 
425 
426 
427 
428 
429 
430 
431 
432 
433 
434 
435 
436 
437 
438 
439 
440 
441 
442 
443 
444 
445 
446 
447 
448 
449 
450 
451 
452 
453 
454 
455 
456 
457 
458 
459