View Javadoc

1   package uk.ac.ebi.intenz.tools.sib.comparator;
2   
3   import org.apache.log4j.Logger;
4   import uk.ac.ebi.intenz.tools.sib.exceptions.EnzymeEntryValidationException;
5   import uk.ac.ebi.intenz.tools.sib.validator.EnzymeEntryValidator;
6   import uk.ac.ebi.intenz.tools.sib.writer.LineType;
7   import uk.ac.ebi.intenz.domain.enzyme.EnzymeCommissionNumber;
8   import uk.ac.ebi.intenz.domain.exceptions.EcException;
9   
10  import java.io.*;
11  import java.util.regex.Pattern;
12  import java.util.regex.Matcher;
13  import java.util.List;
14  import java.util.ArrayList;
15  
16  /**
17   * This class compares two ENZYME.dat flat file formats.<br/>
18   *
19   * @author Michael Darsow
20   * @version $Revision: 1.2 $ $Date: 2008/01/28 11:43:23 $
21   */
22  public class FlatFileComparator {
23  
24     private static final Logger LOGGER =
25  	   Logger.getLogger(FlatFileComparator.class.getName());
26     private static final Logger LOGGER_ID = Logger.getLogger("ID");
27     private static final Logger LOGGER_DE = Logger.getLogger("DE");
28     private static final Logger LOGGER_AN = Logger.getLogger("AN");
29     private static final Logger LOGGER_CA = Logger.getLogger("CA");
30     private static final Logger LOGGER_CC = Logger.getLogger("CC");
31     private static final Logger LOGGER_CF = Logger.getLogger("CF");
32     private static final Logger LOGGER_DI = Logger.getLogger("DI");
33     private static final Logger LOGGER_DR = Logger.getLogger("DR");
34     private static final Logger LOGGER_PR = Logger.getLogger("PR");
35  
36     /**
37      * This number indicates the amount of lines which contain the header of ENZYME.dat.
38      */
39     private static final int START_LINE_NO = 27;
40     private static final String NEW_LINE = "\n";
41  
42     private static final String END_OF_ENTRY = new String("\\/\\/.*?");
43  
44     private static final String MISSING_ENZYME_ENTRY = new String("MISSING ENZYME ENTRY");
45     private static final String MISSING_INTENZ_ENTRY = new String("MISSING INTENZ ENTRY");
46  
47     private static int idCounter = 0;
48     private static int deCounter = 0;
49     private static int anCounter = 0;
50     private static int caCounter = 0;
51     private static int ccCounter = 0;
52     private static int cfCounter = 0;
53     private static int diCounter = 0;
54     private static int prCounter = 0;
55     private static int drCounter = 0;
56  
57     public static void compare (File flatFile1, File flatFile2) {
58        BufferedReader flatFile1Reader = null;
59        BufferedReader flatFile2Reader = null;
60        try {
61           flatFile1Reader = new BufferedReader(new FileReader(flatFile1));
62           flatFile2Reader = new BufferedReader(new FileReader(flatFile2));
63           // This is the line number used to count where we are in the flat file
64           int lineNo = 1;
65  
66           String lineOfLoadedFF = flatFile1Reader.readLine();
67           lineOfLoadedFF = addNewLine(lineOfLoadedFF);
68           String lineOfGeneratedFF = flatFile2Reader.readLine();
69           lineOfGeneratedFF = addNewLine(lineOfGeneratedFF);
70  
71           StringBuffer entryA = new StringBuffer();
72           StringBuffer entryB = new StringBuffer();
73           String missingEntry = "";
74           // Compare lines and return a message in case of a difference between the line's content.
75           while ( lineOfLoadedFF != null && lineOfGeneratedFF != null ) {
76              // If lineNo is greater than the start number start appending.
77              if ( lineNo > START_LINE_NO ) {
78                 if (missingEntry != MISSING_INTENZ_ENTRY) entryA.append(lineOfLoadedFF); // FIXME: maintain missingEntry flag
79                 if (missingEntry != MISSING_ENZYME_ENTRY) entryB.append(lineOfGeneratedFF); // FIXME: maintain missingEntry flag
80                 try {
81                    // There are two scenarios.
82                    // Either one of the files can finish first in which case it has to iterate through the second
83                    // file to find the end of entry of the second before it can compare entries.
84                    if ( !isNotEndOfEntry(lineOfLoadedFF) ) {
85                       // If its not the end of entry of the second file - then find it before proceeding.
86                       if (missingEntry != MISSING_ENZYME_ENTRY)
87                           findEndOfEntry(lineOfGeneratedFF, flatFile2Reader, entryB);
88  
89                       String outputResult = compareEntries(entryA.toString(), entryB.toString());
90                       if ( outputResult.length() != 0 ){
91                           missingEntry = checkForMissingEntries(outputResult);
92                           LOGGER.info(outputResult);
93                       } else {
94                           missingEntry = "";
95                       }
96                       if (missingEntry != MISSING_ENZYME_ENTRY) entryB.delete(0, entryB.length());
97                       if (missingEntry != MISSING_INTENZ_ENTRY) entryA.delete(0, entryA.length());
98  
99                       // Else the second file has finished first and it must check the first file for its
100                      // end of entry pattern
101                   } else if ( !isNotEndOfEntry(lineOfGeneratedFF) ) {
102                      if (missingEntry != MISSING_INTENZ_ENTRY)
103                          findEndOfEntry(lineOfLoadedFF, flatFile1Reader, entryA);
104 
105                      String outputResult = compareEntries(entryA.toString(), entryB.toString());
106                      if ( outputResult.length() != 0 ){
107                          missingEntry = checkForMissingEntries(outputResult);
108                          LOGGER.info(outputResult);
109                      } else {
110                          missingEntry = "";
111                      }
112                      if (missingEntry != MISSING_ENZYME_ENTRY) entryB.delete(0, entryB.length());
113                      if (missingEntry != MISSING_INTENZ_ENTRY) entryA.delete(0, entryA.length());
114                   }
115                } catch ( EnzymeEntryValidationException e ) {
116                   LOGGER.error(e);
117                   clearBuffers(entryA, entryB);
118                }
119             }
120 
121             lineNo++;
122             if (missingEntry != MISSING_INTENZ_ENTRY) lineOfLoadedFF = nextLine(flatFile1Reader);
123             if (missingEntry != MISSING_ENZYME_ENTRY) lineOfGeneratedFF = nextLine(flatFile2Reader);
124             if ( lineOfLoadedFF.equals("null" + NEW_LINE) || lineOfGeneratedFF.equals("null" + NEW_LINE) )
125                break;
126          }
127 
128          logTotals();
129 
130       } catch ( IOException e ) {
131          LOGGER.error("Error while reading a flat file.", e);
132       } finally {
133          try {
134             if ( flatFile1Reader != null ) flatFile1Reader.close();
135             if ( flatFile2Reader != null ) flatFile2Reader.close();
136          } catch ( IOException e ) {
137             LOGGER.error("Error while closing a reader.", e);
138          }
139       }
140    }
141    
142    /**
143     * Called only for unmatching entries.
144     * @param outputResult
145     * @return MISSING_ENZYME_ENTRY or MISSING_INTENZ_ENTRY when the entry ID is different,
146     *   empty string otherwise.
147     */
148    private static String checkForMissingEntries(String outputResult) {
149        String missingEntry = "";
150        if (outputResult.startsWith(MISSING_ENZYME_ENTRY)){
151            missingEntry = MISSING_ENZYME_ENTRY;
152        } else if (outputResult.startsWith(MISSING_INTENZ_ENTRY)){
153            missingEntry = MISSING_INTENZ_ENTRY;
154        }
155        return missingEntry;
156    }
157 
158     private static String nextLine(BufferedReader reader) throws IOException {
159          String line = reader.readLine();
160          return addNewLine(line);
161     }
162 
163 
164    /**
165     * Compares two entries by checking their data line by line.
166     *
167     * @param entryA The first entry...
168     * @param entryB ...and the second one.
169     * @return the differences (if any)
170     * @throws EnzymeEntryValidationException if an enzyme does not comply to the ENZYME format.
171     */
172    public static String compareEntries (String entryA, String entryB) throws EnzymeEntryValidationException {
173       if ( entryA == null ) throw new NullPointerException("Parameter 'entryA' must not be null.");
174       if ( entryB == null ) throw new NullPointerException("Parameter 'entryB' must not be null.");
175       LOGGER.debug(entryA);
176       LOGGER.debug(entryB);
177       entryA = reconstructSentences(entryA);
178       entryB = reconstructSentences(entryB);
179       // PreParse entries to reconstruct lines
180       // Check if both entries contain values in the correct ENZYME entry format.
181       try {
182          EnzymeEntryValidator.validate(entryA);
183       } catch ( EnzymeEntryValidationException e ) {
184          throw new EnzymeEntryValidationException("Error in entryA: "
185         		 + e.getMessage()
186         		 + ((entryA.indexOf('\n') > -1)? entryA.substring(0, entryA.indexOf('\n')) : entryA));
187       }
188       try {
189          EnzymeEntryValidator.validate(entryB);
190       } catch ( EnzymeEntryValidationException e ) {
191          throw new EnzymeEntryValidationException("Error in entryB: "
192         		 + e.getMessage()
193         		 + ((entryB.indexOf('\n') > -1)? entryB.substring(0, entryB.indexOf('\n')) : entryB));
194       }
195 
196       StringBuffer differences = new StringBuffer();
197 
198       // Return no difference if both entries are identical.
199       if ( entryA.equals(entryB) )
200          return differences.toString();
201 
202       if ( lineIsDifferent(entryA, entryB, LineType.ID) ) {
203           // Check for missing entries:
204           String idLine = null;
205           String ecString = null;
206           try {
207               idLine = addNewLine(getLine(entryA, LineType.ID));
208               ecString = getEC(idLine);
209               EnzymeCommissionNumber ecA = EnzymeCommissionNumber.valueOf(ecString);
210               idLine = addNewLine(getLine(entryB, LineType.ID));
211               ecString = getEC(idLine);
212               EnzymeCommissionNumber ecB = EnzymeCommissionNumber.valueOf(ecString);
213               int ecDiff = ecA.compareTo(ecB);
214               if (ecDiff != 0){
215                   if (ecDiff < 0){
216                       differences.append(MISSING_INTENZ_ENTRY);
217                       differences.append(": ");
218                       differences.append(ecB.toString());
219                       differences.append(" does not exist in ENZYME");
220                   } else if (ecDiff > 0){
221                       differences.append(MISSING_ENZYME_ENTRY);
222                       differences.append(": ");
223                       differences.append(ecA.toString());
224                       differences.append(" does not exist in IntEnz");
225                   }
226                   LOGGER_ID.warn(differences.toString());
227                   idCounter++;
228                   return differences.toString();
229               }
230           } catch (EcException e) {
231               throw new EnzymeEntryValidationException("Bad EC number: " + ecString);
232           }
233 //          String diffString = getDifferenceMessage(LineType.ID, entryA, entryB, LOGGER_ID);
234 //         if ( diffString != null && !diffString.equals("") ) {
235 //            idCounter++;
236 //            differences.append(diffString);
237 //         }
238       }
239       if ( lineIsDifferent(entryA, entryB, LineType.DE) ) {
240          String diffString = getDifferenceMessage(LineType.DE, entryA, entryB, LOGGER_DE);
241          if ( diffString != null && !diffString.equals("") ) {
242             deCounter++;
243             differences.append(diffString);
244          }
245       }
246 
247       int lineIndex = linesAreDifferent(entryA, entryB, LineType.AN);
248       if ( lineIndex != -1 ) {
249          String diffString = getDifferenceMessage(LineType.AN, entryA, entryB, lineIndex, LOGGER_AN);
250          if ( diffString != null && !diffString.equals("") ) {
251             anCounter++;
252             differences.append(diffString);
253          }
254       }
255 
256       lineIndex = linesAreDifferent(entryA, entryB, LineType.CA);
257       if ( lineIndex != -1 ) {
258          String diffString = getDifferenceMessage(LineType.CA, entryA, entryB, lineIndex, LOGGER_CA);
259          if ( diffString != null && !diffString.equals("") ) {
260             caCounter++;
261             differences.append(diffString);
262          }
263       }
264 
265       lineIndex = linesAreDifferent(entryA, entryB, LineType.CF);
266       if ( lineIndex != -1 ) {
267          String diffString = getDifferenceMessage(LineType.CF, entryA, entryB, lineIndex, LOGGER_CF);
268          if ( diffString != null && !diffString.equals("") ) {
269             cfCounter++;
270             differences.append(diffString);
271          }
272       }
273 
274       lineIndex = linesAreDifferent(entryA, entryB, LineType.CC);
275       if ( lineIndex != -1 ) {
276          String diffString = getDifferenceMessage(LineType.CC, entryA, entryB, lineIndex, LOGGER_CC);
277          if ( diffString != null && !diffString.equals("") ) {
278             ccCounter++;
279             differences.append(diffString);
280          }
281       }
282 
283       lineIndex = linesAreDifferent(entryA, entryB, LineType.DI);
284       if ( lineIndex != -1 ) {
285          String diffString = getDifferenceMessage(LineType.DI, entryA, entryB, lineIndex, LOGGER_DI);
286          if ( diffString != null && !diffString.equals("") ) {
287             diCounter++;
288             differences.append(diffString);
289          }
290       }
291 
292       lineIndex = linesAreDifferent(entryA, entryB, LineType.PR);
293       if ( lineIndex != -1 ) {
294          String diffString = getDifferenceMessage(LineType.PR, entryA, entryB, lineIndex, LOGGER_PR);
295          if ( diffString != null && !diffString.equals("") ) {
296             prCounter++;
297             differences.append(diffString);
298          }
299          //differences.append(getDifferenceMessage(LineType.PR, getEC(entryA), lineIndex));
300       }
301 
302       lineIndex = linesAreDifferent(entryA, entryB, LineType.DR);
303       if ( lineIndex != -1 ) {
304          String diffString = getDifferenceMessage(LineType.DR, entryA, entryB, lineIndex, LOGGER_DR);
305          if ( diffString != null && !diffString.equals("") ) {
306             drCounter++;
307             differences.append(diffString);
308          }
309          // differences.append(getDifferenceMessage(LineType.DR, getEC(entryA), lineIndex));
310       }
311 
312       if ( differences.length() == 0 || differences.length() == 1 ) {
313          //    LOGGER.info("PROBLEM HERE for entryA: " + entryA.toString());
314          //    LOGGER.info("PROBLEM HERE for entryB: " + entryB.toString());
315       }
316 
317       return differences.toString();
318    }
319 
320    //----------------------------- PRIVATE METHODS --------------------------//
321 
322    /**
323     * This method preparses the entry from the flat file and tries to reconstruct
324     * sentences. If first looks for hyphens and then appends them without a space
325     * and then looks for lines not ending with full stops and appends them with a
326     * space.<br/>
327     * @param entry
328     * @return
329     */
330    private static String reconstructSentences (String entry) {
331       // Variable definitions
332       StringBuffer finalPreParsed = new StringBuffer();
333       StringBuffer hypenParsed = new StringBuffer();
334       Pattern idPattern = Pattern.compile("(ID   \\d+?\\.\\d+?\\.\\d+?\\.\\d+?\n)");
335       Matcher idMatcher = idPattern.matcher(entry);
336 
337       // Remove identifier
338       if ( idMatcher.find() ) {
339          finalPreParsed.append(idMatcher.group());
340          entry = entry.substring(finalPreParsed.length(), entry.length());
341       }
342 
343       // Match hyphens first lines
344       Pattern hyphenPattern = Pattern.compile("\\-\n(\\p{Upper}{2}\\s{3,}+)");
345       Matcher hyphenMatcher = hyphenPattern.matcher(entry);
346       boolean foundHyphen = hyphenMatcher.find();
347       if ( foundHyphen ) {
348          String whatFollows = entry.substring(hyphenMatcher.end());
349          if (whatFollows.startsWith("or ")
350                  || whatFollows.startsWith("and ")
351                  || whatFollows.startsWith("bonds ")){
352              hypenParsed.append(hyphenMatcher.replaceAll("- "));
353          } else {
354              hypenParsed.append(hyphenMatcher.replaceAll("-"));
355          }
356       } else {
357          hypenParsed.append(entry);
358       }
359 
360       // Match new lines first lines
361 //      Pattern newLinePattern = Pattern.compile("(.[^\\.;\\/])(\n(\\p{Upper}{2}\\s{3,}+))");
362       removeSpareText(hypenParsed, "(.[^\\.;\\/]|sp\\.|e\\.g\\.|cf\\.)(\n(\\p{Upper}{2}\\s{3,}+))");
363       
364       removeSpareText(hypenParsed, "(CA   .+?)\nCA   ");
365       
366       removeSpareText(hypenParsed, "(CC   .+?)\nCC       ");
367       
368       finalPreParsed.append(hypenParsed.toString());
369 
370       return finalPreParsed.toString();
371    }
372     
373     /**
374      * Removes not needed text
375      * @param text
376      * @param regex A regex with at least one group in parenthesis,
377      *      which will be kept in the text. The rest of the pattern will be
378      *      discarded. A space will be added at the end.
379      */
380     private static void removeSpareText(StringBuffer text, String regex) {
381         Pattern pattern = Pattern.compile(regex);
382         Matcher matcher = pattern.matcher(text.toString());
383         boolean found = matcher.find();
384         while (found) {
385             String subString = matcher.group(0);
386             String replacement = matcher.group(1);
387             text.replace(text.indexOf(subString), text.indexOf(subString)
388                     + subString.length(), replacement + " ");
389             found = matcher.find();
390         }
391     }
392 
393    /**
394     * Helper method to find the end of an entry being read from the flat file.
395     * It also appends the data to the StringBuffer provided.
396     *
397     * @param lineOfLoadedFF
398     * @param flatFile1Reader
399     * @param entryA
400     * @throws IOException
401     */
402    private static void findEndOfEntry (String lineOfLoadedFF, BufferedReader flatFile1Reader,
403                                        StringBuffer entryA) throws IOException {
404       if ( isNotEndOfEntry(lineOfLoadedFF) ) {
405          String line = readLineAndAppend(flatFile1Reader, entryA);
406          while ( isNotEndOfEntry(line) ) {
407             line = readLineAndAppend(flatFile1Reader, entryA);
408          }
409       }
410    }
411 
412    private static boolean isNotEndOfEntry (String line) {
413       return (line != null && !line.trim().matches(END_OF_ENTRY));
414    }
415 
416    /**
417     * This helper reads the line from the Reader and appends a new line character as well the line
418     * being read.
419     *
420     * @param flatFile2Reader
421     * @param entryB
422     * @return True if its the end of entry else false if it isn't
423     * @throws IOException
424     */
425    private static String readLineAndAppend (BufferedReader flatFile2Reader, StringBuffer entryB) throws IOException {
426       String lineOfGeneratedFF;
427       lineOfGeneratedFF = flatFile2Reader.readLine();
428       lineOfGeneratedFF = addNewLine(lineOfGeneratedFF);
429       entryB.append(lineOfGeneratedFF);
430       return lineOfGeneratedFF;
431    }
432 
433    /**
434     * Convenience method to clear the buffers.
435     *
436     * @param entryA
437     * @param entryB
438     */
439    private static void clearBuffers (StringBuffer entryA, StringBuffer entryB) {
440       entryA.delete(0, entryA.length());
441       entryB.delete(0, entryB.length());
442    }
443 
444    /**
445     * Logs the totals after the files have been compared.
446     */
447    private static void logTotals () {
448       int totalCounter = idCounter + deCounter + anCounter + caCounter + ccCounter
449             + cfCounter + diCounter + drCounter + prCounter;
450       LOGGER.info("-------------------------------------------------");
451       LOGGER.info("ID: " + idCounter);
452       LOGGER.info("DE: " + deCounter);
453       LOGGER.info("AN: " + anCounter);
454       LOGGER.info("CA: " + caCounter);
455       LOGGER.info("CC: " + ccCounter);
456       LOGGER.info("CF: " + cfCounter);
457       LOGGER.info("DI: " + diCounter);
458       LOGGER.info("DR: " + drCounter);
459       LOGGER.info("PR: " + prCounter);
460       LOGGER.info("-------------------------------------------------");
461       LOGGER.info("Total Errors: " + totalCounter);
462       LOGGER.info("-------------------------------------------------");
463    }
464 
465    /**
466     * This method is a convenience method to add a new line character.
467     *
468     * @param lineToAdd
469     */
470    private static String addNewLine (String lineToAdd) {
471       return lineToAdd += NEW_LINE;
472    }
473 
474    /**
475     * Compares the lines with the given line type of both entries with each other.
476     *
477     * @param entryA   The first entry to be compared with...
478     * @param entryB   ...the second entry.
479     * @param lineType The line type of the lines to be compared.
480     * @return <code>true</code> if both lines with the same line type of the given entries are identical.
481     */
482    private static boolean lineIsDifferent (String entryA, String entryB, LineType lineType) {
483       assert entryA != null : "Parameter 'entryA' must not be null.";
484       assert entryB != null : "Parameter 'entryB' must not be null.";
485       assert lineType != null : "Parameter 'lineType' must not be null.";
486       String lineEntryA = getLine(entryA, lineType);
487       String lineEntryB = getLine(entryB, lineType);
488       return !lineEntryA.equals(lineEntryB);
489    }
490 
491    /**
492     * Compares the lines with the given line type of both entries with each other.
493     *
494     * @param entryA   The first entry to be compared with...
495     * @param entryB   ...the second entry.
496     * @param lineType The line type of the lines to be compared.
497     * @return the line index where the difference has been spotted, or -1 if all lines with the given line type of the
498     *         given entries are identical.
499     */
500    private static int linesAreDifferent (String entryA, String entryB, LineType lineType) {
501       assert entryA != null : "Parameter 'entryA' must not be null.";
502       assert entryB != null : "Parameter 'entryB' must not be null.";
503       assert lineType != null : "Parameter 'lineType' must not be null.";
504       List linesEntryA = getLines(entryA, lineType);
505       List linesEntryB = getLines(entryB, lineType);
506       for ( int lineIndex = 0; lineIndex < linesEntryA.size(); lineIndex++ ) {
507          String lineEntryA = (String) linesEntryA.get(lineIndex);
508          if ( linesEntryB.size() - 1 < lineIndex ) //something missing
509             return lineIndex + 1;
510          String lineEntryB = (String) linesEntryB.get(lineIndex);
511          if ( !lineEntryA.equals(lineEntryB) )
512             return lineIndex + 1;
513       }
514       return -1;
515    }
516 
517    /**
518     * Creates a message describing the difference that has been found.
519     *
520     * @param lineType The line type of the line where the difference has been spotted.
521     * @param entryA   The first entry.
522     * @param entryB   The second entry.
523     * @param logger
524     * @return the message describing the difference.
525     */
526    private static String getDifferenceMessage (LineType lineType, String entryA, String entryB, Logger logger) {
527       assert lineType != null : "Parameter 'lineType' must not be null.";
528       assert entryA != null : "Parameter 'entryA' must not be null.";
529       assert entryB != null : "Parameter 'entryB' must not be null.";
530       StringBuffer differences = new StringBuffer();
531       differences.append(lineType.toString());
532       differences.append(" line is different for entry: ");
533       differences.append(getEC(entryA));
534       differences.append(" (EC of ENZYME.dat)");
535       logger.info(differences);
536       StringBuffer enzymeDat = new StringBuffer("ENZYME.DAT: ").append(getLine(entryA, lineType));
537       differences.append(enzymeDat);
538       logger.info(enzymeDat);
539       StringBuffer intenzDat = new StringBuffer("INTENZ.DAT: ").append(getLine(entryB, lineType));
540       differences.append(intenzDat);
541       logger.info(intenzDat);
542       return differences.toString();
543    }
544 
545    /**
546     * Creates a message describing the difference that has been found.
547     * <p/>
548     * This method also gives the line number when line types have been checked which can have multiple lines.
549     *
550     * @param lineType  The line type of the line where the difference has been spotted.
551     * @param entryA    The EC number of the first entry.
552     * @param lineIndex The number of the line in the group of lines.
553     * @param logger
554     * @return the message describing the difference.
555     */
556    private static String getDifferenceMessage (LineType lineType, String entryA, String entryB, int lineIndex, Logger logger) {
557       assert lineType != null : "Parameter 'lineType' must not be null.";
558       assert entryA != null : "Parameter 'entryA' must not be null.";
559       StringBuffer differences = new StringBuffer();
560       differences.append(lineType.toString());
561       differences.append(" line ");
562       differences.append(lineIndex);
563       differences.append(" is different for entry: ");
564       differences.append(getEC(entryA));
565       differences.append(" (EC of enzyme.dat)");
566       logger.info(differences.toString());
567       StringBuffer enzymeDat = new StringBuffer("ENZYME.DAT: ").append(getLineOnLineIndex(entryA, lineType, lineIndex));
568       differences.append(enzymeDat);
569       logger.info(enzymeDat);
570       StringBuffer intenzDat = new StringBuffer("INTENZ.DAT: ").append(getLineOnLineIndex(entryB, lineType, lineIndex));
571       differences.append(intenzDat);
572       logger.info(intenzDat);
573       return differences.toString();
574    }
575 
576    /**
577     * Extracts the EC number from the ID line.
578     *
579     * @param entryA The entry containing the ID line.
580     * @return the EC number.
581     */
582    private static String getEC (String entryA) {
583       assert entryA != null : "Parameter 'entryA' must not be null.";
584       Pattern IDLinePattern = Pattern.compile("ID   (\\d+?\\.\\d+?\\.\\d+?\\.\\d+?)\n");
585       Matcher IDLinePatternMatcher = IDLinePattern.matcher(entryA);
586       if ( IDLinePatternMatcher.find() ) return IDLinePatternMatcher.group(1);
587       return "";
588    }
589 
590    /**
591     * Extracts the line of the given line type from the given entry.
592     *
593     * @param entry    The entry from which the line will be exctracted.
594     * @param lineType The line type of the line to be extracted.
595     * @return the line or an empty string if no ID line could be found.
596     */
597    private static String getLine (String entry, LineType lineType) {
598       assert entry != null : "Parameter 'entry' must not be null.";
599       Pattern linePattern = Pattern.compile("(" + lineType.toString() + "   .+?)\n");
600       Matcher linePatternMatcher = linePattern.matcher(entry);
601       if ( linePatternMatcher.find() ) return linePatternMatcher.group(1);
602       return "";
603    }
604 
605    /**
606     * Extracts the all lines of the given line type from the given entry.
607     *
608     * @param entry    The entry from which the lines will be exctracted.
609     * @param lineType The line type of the lines to be extracted.
610     * @return the lines stored in an {@link java.util.ArrayList}.
611     */
612    private static List getLines (String entry, LineType lineType) {
613       assert entry != null : "Parameter 'entry' must not be null.";
614       assert lineType != null : "Parameter 'lineType' must not be null.";
615       Pattern linePattern = Pattern.compile("(" + lineType.toString() + "   .+?)\n");
616       Matcher linePatternMatcher = linePattern.matcher(entry);
617       List lines = new ArrayList();
618       while ( linePatternMatcher.find() ) lines.add(linePatternMatcher.group(1));
619       return lines;
620    }
621 
622    /**
623     * Static method which returns the line based on the lineIndex provided.
624     *
625     * @param entry
626     * @param lineType
627     * @param lineIndex
628     * @return
629     * @throws IndexOutOfBoundsException Will be thrown if the index is larger than the size
630     *                                   of the list generated.
631     */
632    private static String getLineOnLineIndex (String entry, LineType lineType, int lineIndex) {
633       --lineIndex;
634       List lines = getLines(entry, lineType);
635       if ( lines.size() == 0 )
636          return new String("");
637       else if ( lineIndex < lines.size() )
638          return (String) lines.get(lineIndex);
639       else
640          return (String) lines.get(lines.size() - 1);
641    }
642 
643 }